src/rx/rx.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 /* RX:  Extended Remote Procedure Call */
  11
  12 #include <afsconfig.h>
  13 #ifdef  KERNEL
  14 #include "afs/param.h"
  15 #else
  16 #include <afs/param.h>
  17 #endif
  18
  19 RCSID
  20     ("$Header$");
  21
  22 #ifdef KERNEL
  23 #include "afs/sysincludes.h"
  24 #include "afsincludes.h"
  25 #ifndef UKERNEL
  26 #include "h/types.h"
  27 #include "h/time.h"
  28 #include "h/stat.h"
  29 #ifdef  AFS_OSF_ENV
  30 #include <net/net_globals.h>
  31 #endif /* AFS_OSF_ENV */
  32 #ifdef AFS_LINUX20_ENV
  33 #include "h/socket.h"
  34 #endif
  35 #include "netinet/in.h"
  36 #ifdef AFS_SUN57_ENV
  37 #include "inet/common.h"
  38 #include "inet/ip.h"
  39 #include "inet/ip_ire.h"
  40 #endif
  41 #include "afs/afs_args.h"
  42 #include "afs/afs_osi.h"
  43 #ifdef RX_KERNEL_TRACE
  44 #include "rx_kcommon.h"
  45 #endif
  46 #if     (defined(AFS_AUX_ENV) || defined(AFS_AIX_ENV))
  47 #include "h/systm.h"
  48 #endif
  49 #ifdef RXDEBUG
  50 #undef RXDEBUG                  /* turn off debugging */
  51 #endif /* RXDEBUG */
  52 #if defined(AFS_SGI_ENV)
  53 #include "sys/debug.h"
  54 #endif
  55 #include "afsint.h"
  56 #ifdef  AFS_OSF_ENV
  57 #undef kmem_alloc
  58 #undef kmem_free
  59 #undef mem_alloc
  60 #undef mem_free
  61 #undef register
  62 #endif /* AFS_OSF_ENV */
  63 #else /* !UKERNEL */
  64 #include "afs/sysincludes.h"
  65 #include "afsincludes.h"
  66 #endif /* !UKERNEL */
  67 #include "afs/lock.h"
  68 #include "rx_kmutex.h"
  69 #include "rx_kernel.h"
  70 #include "rx_clock.h"
  71 #include "rx_queue.h"
  72 #include "rx.h"
  73 #include "rx_globals.h"
  74 #include "rx_trace.h"
  75 #define AFSOP_STOP_RXCALLBACK   210     /* Stop CALLBACK process */
  76 #define AFSOP_STOP_AFS          211     /* Stop AFS process */
  77 #define AFSOP_STOP_BKG          212     /* Stop BKG process */
  78 #include "afsint.h"
  79 extern afs_int32 afs_termState;
  80 #ifdef AFS_AIX41_ENV
  81 #include "sys/lockl.h"
  82 #include "sys/lock_def.h"
  83 #endif /* AFS_AIX41_ENV */
  84 # include "rxgen_consts.h"
  85 #else /* KERNEL */
  86 # include <sys/types.h>
  87 # include <string.h>
  88 # include <errno.h>
  89 #ifdef AFS_NT40_ENV
  90 # include <stdlib.h>
  91 # include <fcntl.h>
  92 # include <afs/afsutil.h>
  93 # include <WINNT\afsreg.h>
  94 #else
  95 # include <sys/socket.h>
  96 # include <sys/file.h>
  97 # include <netdb.h>
  98 # include <sys/stat.h>
  99 # include <netinet/in.h>
 100 # include <sys/time.h>
 101 #endif
 102 # include "rx.h"
 103 # include "rx_user.h"
 104 # include "rx_clock.h"
 105 # include "rx_queue.h"
 106 # include "rx_globals.h"
 107 # include "rx_trace.h"
 108 # include <afs/rxgen_consts.h>
 109 #endif /* KERNEL */
 110
 111 int (*registerProgram) () = 0;
 112 int (*swapNameProgram) () = 0;
 113
 114 /* Local static routines */
 115 static void rxi_DestroyConnectionNoLock(register struct rx_connection *conn);
 116 #ifdef RX_ENABLE_LOCKS
 117 static void rxi_SetAcksInTransmitQueue(register struct rx_call *call);
 118 #endif
 119
 120 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
 121 struct rx_tq_debug {
 122     afs_int32 rxi_start_aborted;        /* rxi_start awoke after rxi_Send in error. */
 123     afs_int32 rxi_start_in_error;
 124 } rx_tq_debug;
 125 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
 126
 127 /*
 128  * rxi_rpc_peer_stat_cnt counts the total number of peer stat structures
 129  * currently allocated within rx.  This number is used to allocate the
 130  * memory required to return the statistics when queried.
 131  */
 132
 133 static unsigned int rxi_rpc_peer_stat_cnt;
 134
 135 /*
 136  * rxi_rpc_process_stat_cnt counts the total number of local process stat
 137  * structures currently allocated within rx.  The number is used to allocate
 138  * the memory required to return the statistics when queried.
 139  */
 140
 141 static unsigned int rxi_rpc_process_stat_cnt;
 142
 143 #if !defined(offsetof)
 144 #include <stddef.h>             /* for definition of offsetof() */
 145 #endif
 146
 147 #ifdef AFS_PTHREAD_ENV
 148 #include <assert.h>
 149
 150 /*
 151  * Use procedural initialization of mutexes/condition variables
 152  * to ease NT porting
 153  */
 154
 155 extern pthread_mutex_t rx_stats_mutex;
 156 extern pthread_mutex_t des_init_mutex;
 157 extern pthread_mutex_t des_random_mutex;
 158 extern pthread_mutex_t rx_clock_mutex;
 159 extern pthread_mutex_t rxi_connCacheMutex;
 160 extern pthread_mutex_t rx_event_mutex;
 161 extern pthread_mutex_t osi_malloc_mutex;
 162 extern pthread_mutex_t event_handler_mutex;
 163 extern pthread_mutex_t listener_mutex;
 164 extern pthread_mutex_t rx_if_init_mutex;
 165 extern pthread_mutex_t rx_if_mutex;
 166 extern pthread_mutex_t rxkad_client_uid_mutex;
 167 extern pthread_mutex_t rxkad_random_mutex;
 168
 169 extern pthread_cond_t rx_event_handler_cond;
 170 extern pthread_cond_t rx_listener_cond;
 171
 172 static pthread_mutex_t epoch_mutex;
 173 static pthread_mutex_t rx_init_mutex;
 174 static pthread_mutex_t rx_debug_mutex;
 175
 176 static void
 177 rxi_InitPthread(void)
 178 {
 179     assert(pthread_mutex_init(&rx_clock_mutex, (const pthread_mutexattr_t *)0)
 180            == 0);
 181     assert(pthread_mutex_init(&rx_stats_mutex, (const pthread_mutexattr_t *)0)
 182            == 0);
 183     assert(pthread_mutex_init
 184            (&rxi_connCacheMutex, (const pthread_mutexattr_t *)0) == 0);
 185     assert(pthread_mutex_init(&rx_init_mutex, (const pthread_mutexattr_t *)0)
 186            == 0);
 187     assert(pthread_mutex_init(&epoch_mutex, (const pthread_mutexattr_t *)0) ==
 188            0);
 189     assert(pthread_mutex_init(&rx_event_mutex, (const pthread_mutexattr_t *)0)
 190            == 0);
 191     assert(pthread_mutex_init(&des_init_mutex, (const pthread_mutexattr_t *)0)
 192            == 0);
 193     assert(pthread_mutex_init
 194            (&des_random_mutex, (const pthread_mutexattr_t *)0) == 0);
 195     assert(pthread_mutex_init
 196            (&osi_malloc_mutex, (const pthread_mutexattr_t *)0) == 0);
 197     assert(pthread_mutex_init
 198            (&event_handler_mutex, (const pthread_mutexattr_t *)0) == 0);
 199     assert(pthread_mutex_init(&listener_mutex, (const pthread_mutexattr_t *)0)
 200            == 0);
 201     assert(pthread_mutex_init
 202            (&rx_if_init_mutex, (const pthread_mutexattr_t *)0) == 0);
 203     assert(pthread_mutex_init(&rx_if_mutex, (const pthread_mutexattr_t *)0) ==
 204            0);
 205     assert(pthread_mutex_init
 206            (&rxkad_client_uid_mutex, (const pthread_mutexattr_t *)0) == 0);
 207     assert(pthread_mutex_init
 208            (&rxkad_random_mutex, (const pthread_mutexattr_t *)0) == 0);
 209     assert(pthread_mutex_init(&rx_debug_mutex, (const pthread_mutexattr_t *)0)
 210            == 0);
 211
 212     assert(pthread_cond_init
 213            (&rx_event_handler_cond, (const pthread_condattr_t *)0) == 0);
 214     assert(pthread_cond_init(&rx_listener_cond, (const pthread_condattr_t *)0)
 215            == 0);
 216     assert(pthread_key_create(&rx_thread_id_key, NULL) == 0);
 217     assert(pthread_key_create(&rx_ts_info_key, NULL) == 0);
 218
 219     rxkad_global_stats_init();
 220 }
 221
 222 pthread_once_t rx_once_init = PTHREAD_ONCE_INIT;
 223 #define INIT_PTHREAD_LOCKS \
 224 assert(pthread_once(&rx_once_init, rxi_InitPthread)==0)
 225 /*
 226  * The rx_stats_mutex mutex protects the following global variables:
 227  * rxi_dataQuota
 228  * rxi_minDeficit
 229  * rxi_availProcs
 230  * rxi_totalMin
 231  * rxi_lowConnRefCount
 232  * rxi_lowPeerRefCount
 233  * rxi_nCalls
 234  * rxi_Alloccnt
 235  * rxi_Allocsize
 236  * rx_nFreePackets
 237  * rx_tq_debug
 238  * rx_stats
 239  */
 240 #else
 241 #define INIT_PTHREAD_LOCKS
 242 #endif
 243
 244
 245 /* Variables for handling the minProcs implementation.  availProcs gives the
 246  * number of threads available in the pool at this moment (not counting dudes
 247  * executing right now).  totalMin gives the total number of procs required
 248  * for handling all minProcs requests.  minDeficit is a dynamic variable
 249  * tracking the # of procs required to satisfy all of the remaining minProcs
 250  * demands.
 251  * For fine grain locking to work, the quota check and the reservation of
 252  * a server thread has to come while rxi_availProcs and rxi_minDeficit
 253  * are locked. To this end, the code has been modified under #ifdef
 254  * RX_ENABLE_LOCKS so that quota checks and reservation occur at the
 255  * same time. A new function, ReturnToServerPool() returns the allocation.
 256  *
 257  * A call can be on several queue's (but only one at a time). When
 258  * rxi_ResetCall wants to remove the call from a queue, it has to ensure
 259  * that no one else is touching the queue. To this end, we store the address
 260  * of the queue lock in the call structure (under the call lock) when we
 261  * put the call on a queue, and we clear the call_queue_lock when the
 262  * call is removed from a queue (once the call lock has been obtained).
 263  * This allows rxi_ResetCall to safely synchronize with others wishing
 264  * to manipulate the queue.
 265  */
 266
 267 #ifdef RX_ENABLE_LOCKS
 268 static afs_kmutex_t rx_rpc_stats;
 269 void rxi_StartUnlocked();
 270 #endif
 271
 272 /* We keep a "last conn pointer" in rxi_FindConnection. The odds are
 273 ** pretty good that the next packet coming in is from the same connection
 274 ** as the last packet, since we're send multiple packets in a transmit window.
 275 */
 276 struct rx_connection *rxLastConn = 0;
 277
 278 #ifdef RX_ENABLE_LOCKS
 279 /* The locking hierarchy for rx fine grain locking is composed of these
 280  * tiers:
 281  *
 282  * rx_connHashTable_lock - synchronizes conn creation, rx_connHashTable access
 283  * conn_call_lock - used to synchonize rx_EndCall and rx_NewCall
 284  * call->lock - locks call data fields.
 285  * These are independent of each other:
 286  *      rx_freeCallQueue_lock
 287  *      rxi_keyCreate_lock
 288  * rx_serverPool_lock
 289  * freeSQEList_lock
 290  *
 291  * serverQueueEntry->lock
 292  * rx_rpc_stats
 293  * rx_peerHashTable_lock - locked under rx_connHashTable_lock
 294  * peer->lock - locks peer data fields.
 295  * conn_data_lock - that more than one thread is not updating a conn data
 296  *                  field at the same time.
 297  * rx_freePktQ_lock
 298  *
 299  * lowest level:
 300  *      multi_handle->lock
 301  *      rxevent_lock
 302  *      rx_stats_mutex
 303  *
 304  * Do we need a lock to protect the peer field in the conn structure?
 305  *      conn->peer was previously a constant for all intents and so has no
 306  *      lock protecting this field. The multihomed client delta introduced
 307  *      a RX code change : change the peer field in the connection structure
 308  *      to that remote inetrface from which the last packet for this
 309  *      connection was sent out. This may become an issue if further changes
 310  *      are made.
 311  */
 312 #define SET_CALL_QUEUE_LOCK(C, L) (C)->call_queue_lock = (L)
 313 #define CLEAR_CALL_QUEUE_LOCK(C) (C)->call_queue_lock = NULL
 314 #ifdef RX_LOCKS_DB
 315 /* rxdb_fileID is used to identify the lock location, along with line#. */
 316 static int rxdb_fileID = RXDB_FILE_RX;
 317 #endif /* RX_LOCKS_DB */
 318 #else /* RX_ENABLE_LOCKS */
 319 #define SET_CALL_QUEUE_LOCK(C, L)
 320 #define CLEAR_CALL_QUEUE_LOCK(C)
 321 #endif /* RX_ENABLE_LOCKS */
 322 struct rx_serverQueueEntry *rx_waitForPacket = 0;
 323 struct rx_serverQueueEntry *rx_waitingForPacket = 0;
 324
 325 /* ------------Exported Interfaces------------- */
 326
 327 /* This function allows rxkad to set the epoch to a suitably random number
 328  * which rx_NewConnection will use in the future.  The principle purpose is to
 329  * get rxnull connections to use the same epoch as the rxkad connections do, at
 330  * least once the first rxkad connection is established.  This is important now
 331  * that the host/port addresses aren't used in FindConnection: the uniqueness
 332  * of epoch/cid matters and the start time won't do. */
 333
 334 #ifdef AFS_PTHREAD_ENV
 335 /*
 336  * This mutex protects the following global variables:
 337  * rx_epoch
 338  */
 339
 340 #define LOCK_EPOCH assert(pthread_mutex_lock(&epoch_mutex)==0)
 341 #define UNLOCK_EPOCH assert(pthread_mutex_unlock(&epoch_mutex)==0)
 342 #else
 343 #define LOCK_EPOCH
 344 #define UNLOCK_EPOCH
 345 #endif /* AFS_PTHREAD_ENV */
 346
 347 void
 348 rx_SetEpoch(afs_uint32 epoch)
 349 {
 350     LOCK_EPOCH;
 351     rx_epoch = epoch;
 352     UNLOCK_EPOCH;
 353 }
 354
 355 /* Initialize rx.  A port number may be mentioned, in which case this
 356  * becomes the default port number for any service installed later.
 357  * If 0 is provided for the port number, a random port will be chosen
 358  * by the kernel.  Whether this will ever overlap anything in
 359  * /etc/services is anybody's guess...  Returns 0 on success, -1 on
 360  * error. */
 361 #ifndef AFS_NT40_ENV
 362 static
 363 #endif
 364 int rxinit_status = 1;
 365 #ifdef AFS_PTHREAD_ENV
 366 /*
 367  * This mutex protects the following global variables:
 368  * rxinit_status
 369  */
 370
 371 #define LOCK_RX_INIT assert(pthread_mutex_lock(&rx_init_mutex)==0)
 372 #define UNLOCK_RX_INIT assert(pthread_mutex_unlock(&rx_init_mutex)==0)
 373 #else
 374 #define LOCK_RX_INIT
 375 #define UNLOCK_RX_INIT
 376 #endif
 377
 378 int
 379 rx_InitHost(u_int host, u_int port)
 380 {
 381 #ifdef KERNEL
 382     osi_timeval_t tv;
 383 #else /* KERNEL */
 384     struct timeval tv;
 385 #endif /* KERNEL */
 386     char *htable, *ptable;
 387     int tmp_status;
 388
 389     SPLVAR;
 390
 391     INIT_PTHREAD_LOCKS;
 392     LOCK_RX_INIT;
 393     if (rxinit_status == 0) {
 394         tmp_status = rxinit_status;
 395         UNLOCK_RX_INIT;
 396         return tmp_status;      /* Already started; return previous error code. */
 397     }
 398 #ifdef RXDEBUG
 399     rxi_DebugInit();
 400 #endif
 401 #ifdef AFS_NT40_ENV
 402     if (afs_winsockInit() < 0)
 403         return -1;
 404 #endif
 405
 406 #ifndef KERNEL
 407     /*
 408      * Initialize anything necessary to provide a non-premptive threading
 409      * environment.
 410      */
 411     rxi_InitializeThreadSupport();
 412 #endif
 413
 414     /* Allocate and initialize a socket for client and perhaps server
 415      * connections. */
 416
 417     rx_socket = rxi_GetHostUDPSocket(host, (u_short) port);
 418     if (rx_socket == OSI_NULLSOCKET) {
 419         UNLOCK_RX_INIT;
 420         return RX_ADDRINUSE;
 421     }
 422 #ifdef  RX_ENABLE_LOCKS
 423 #ifdef RX_LOCKS_DB
 424     rxdb_init();
 425 #endif /* RX_LOCKS_DB */
 426     MUTEX_INIT(&rx_stats_mutex, "rx_stats_mutex", MUTEX_DEFAULT, 0);
 427     MUTEX_INIT(&rx_rpc_stats, "rx_rpc_stats", MUTEX_DEFAULT, 0);
 428     MUTEX_INIT(&rx_freePktQ_lock, "rx_freePktQ_lock", MUTEX_DEFAULT, 0);
 429     MUTEX_INIT(&freeSQEList_lock, "freeSQEList lock", MUTEX_DEFAULT, 0);
 430     MUTEX_INIT(&rx_freeCallQueue_lock, "rx_freeCallQueue_lock", MUTEX_DEFAULT,
 431                0);
 432     CV_INIT(&rx_waitingForPackets_cv, "rx_waitingForPackets_cv", CV_DEFAULT,
 433             0);
 434     MUTEX_INIT(&rx_peerHashTable_lock, "rx_peerHashTable_lock", MUTEX_DEFAULT,
 435                0);
 436     MUTEX_INIT(&rx_connHashTable_lock, "rx_connHashTable_lock", MUTEX_DEFAULT,
 437                0);
 438     MUTEX_INIT(&rx_serverPool_lock, "rx_serverPool_lock", MUTEX_DEFAULT, 0);
 439 #ifndef KERNEL
 440     MUTEX_INIT(&rxi_keyCreate_lock, "rxi_keyCreate_lock", MUTEX_DEFAULT, 0);
 441 #endif /* !KERNEL */
 442 #if defined(KERNEL) && defined(AFS_HPUX110_ENV)
 443     if (!uniprocessor)
 444         rx_sleepLock = alloc_spinlock(LAST_HELD_ORDER - 10, "rx_sleepLock");
 445 #endif /* KERNEL && AFS_HPUX110_ENV */
 446 #endif /* RX_ENABLE_LOCKS */
 447
 448     rxi_nCalls = 0;
 449     rx_connDeadTime = 12;
 450     rx_tranquil = 0;            /* reset flag */
 451     memset((char *)&rx_stats, 0, sizeof(struct rx_stats));
 452     htable = (char *)
 453         osi_Alloc(rx_hashTableSize * sizeof(struct rx_connection *));
 454     PIN(htable, rx_hashTableSize * sizeof(struct rx_connection *));     /* XXXXX */
 455     memset(htable, 0, rx_hashTableSize * sizeof(struct rx_connection *));
 456     ptable = (char *)osi_Alloc(rx_hashTableSize * sizeof(struct rx_peer *));
 457     PIN(ptable, rx_hashTableSize * sizeof(struct rx_peer *));   /* XXXXX */
 458     memset(ptable, 0, rx_hashTableSize * sizeof(struct rx_peer *));
 459
 460     /* Malloc up a bunch of packets & buffers */
 461     rx_nFreePackets = 0;
 462     queue_Init(&rx_freePacketQueue);
 463     rxi_NeedMorePackets = FALSE;
 464 #ifdef RX_ENABLE_TSFPQ
 465     rx_nPackets = 0;    /* in TSFPQ version, rx_nPackets is managed by rxi_MorePackets* */
 466     rxi_MorePacketsTSFPQ(rx_extraPackets + RX_MAX_QUOTA + 2, RX_TS_FPQ_FLUSH_GLOBAL, 0);
 467 #else /* RX_ENABLE_TSFPQ */
 468     rx_nPackets = rx_extraPackets + RX_MAX_QUOTA + 2;   /* fudge */
 469     rxi_MorePackets(rx_nPackets);
 470 #endif /* RX_ENABLE_TSFPQ */
 471     rx_CheckPackets();
 472
 473     NETPRI;
 474
 475     clock_Init();
 476
 477 #if defined(AFS_NT40_ENV) && !defined(AFS_PTHREAD_ENV)
 478     tv.tv_sec = clock_now.sec;
 479     tv.tv_usec = clock_now.usec;
 480     srand((unsigned int)tv.tv_usec);
 481 #else
 482     osi_GetTime(&tv);
 483 #endif
 484     if (port) {
 485         rx_port = port;
 486     } else {
 487 #if defined(KERNEL) && !defined(UKERNEL)
 488         /* Really, this should never happen in a real kernel */
 489         rx_port = 0;
 490 #else
 491         struct sockaddr_in addr;
 492         int addrlen = sizeof(addr);
 493         if (getsockname((int)rx_socket, (struct sockaddr *)&addr, &addrlen)) {
 494             rx_Finalize();
 495             return -1;
 496         }
 497         rx_port = addr.sin_port;
 498 #endif
 499     }
 500     rx_stats.minRtt.sec = 9999999;
 501 #ifdef  KERNEL
 502     rx_SetEpoch(tv.tv_sec | 0x80000000);
 503 #else
 504     rx_SetEpoch(tv.tv_sec);     /* Start time of this package, rxkad
 505                                  * will provide a randomer value. */
 506 #endif
 507     MUTEX_ENTER(&rx_stats_mutex);
 508     rxi_dataQuota += rx_extraQuota;     /* + extra pkts caller asked to rsrv */
 509     MUTEX_EXIT(&rx_stats_mutex);
 510     /* *Slightly* random start time for the cid.  This is just to help
 511      * out with the hashing function at the peer */
 512     rx_nextCid = ((tv.tv_sec ^ tv.tv_usec) << RX_CIDSHIFT);
 513     rx_connHashTable = (struct rx_connection **)htable;
 514     rx_peerHashTable = (struct rx_peer **)ptable;
 515
 516     rx_lastAckDelay.sec = 0;
 517     rx_lastAckDelay.usec = 400000;      /* 400 milliseconds */
 518     rx_hardAckDelay.sec = 0;
 519     rx_hardAckDelay.usec = 100000;      /* 100 milliseconds */
 520     rx_softAckDelay.sec = 0;
 521     rx_softAckDelay.usec = 100000;      /* 100 milliseconds */
 522
 523     rxevent_Init(20, rxi_ReScheduleEvents);
 524
 525     /* Initialize various global queues */
 526     queue_Init(&rx_idleServerQueue);
 527     queue_Init(&rx_incomingCallQueue);
 528     queue_Init(&rx_freeCallQueue);
 529
 530 #if defined(AFS_NT40_ENV) && !defined(KERNEL)
 531     /* Initialize our list of usable IP addresses. */
 532     rx_GetIFInfo();
 533 #endif
 534
 535     /* Start listener process (exact function is dependent on the
 536      * implementation environment--kernel or user space) */
 537     rxi_StartListener();
 538
 539     USERPRI;
 540     tmp_status = rxinit_status = 0;
 541     UNLOCK_RX_INIT;
 542     return tmp_status;
 543 }
 544
 545 int
 546 rx_Init(u_int port)
 547 {
 548     return rx_InitHost(htonl(INADDR_ANY), port);
 549 }
 550
 551 /* called with unincremented nRequestsRunning to see if it is OK to start
 552  * a new thread in this service.  Could be "no" for two reasons: over the
 553  * max quota, or would prevent others from reaching their min quota.
 554  */
 555 #ifdef RX_ENABLE_LOCKS
 556 /* This verion of QuotaOK reserves quota if it's ok while the
 557  * rx_serverPool_lock is held.  Return quota using ReturnToServerPool().
 558  */
 559 static int
 560 QuotaOK(register struct rx_service *aservice)
 561 {
 562     /* check if over max quota */
 563     if (aservice->nRequestsRunning >= aservice->maxProcs) {
 564         return 0;
 565     }
 566
 567     /* under min quota, we're OK */
 568     /* otherwise, can use only if there are enough to allow everyone
 569      * to go to their min quota after this guy starts.
 570      */
 571     MUTEX_ENTER(&rx_stats_mutex);
 572     if ((aservice->nRequestsRunning < aservice->minProcs)
 573         || (rxi_availProcs > rxi_minDeficit)) {
 574         aservice->nRequestsRunning++;
 575         /* just started call in minProcs pool, need fewer to maintain
 576          * guarantee */
 577         if (aservice->nRequestsRunning <= aservice->minProcs)
 578             rxi_minDeficit--;
 579         rxi_availProcs--;
 580         MUTEX_EXIT(&rx_stats_mutex);
 581         return 1;
 582     }
 583     MUTEX_EXIT(&rx_stats_mutex);
 584
 585     return 0;
 586 }
 587
 588 static void
 589 ReturnToServerPool(register struct rx_service *aservice)
 590 {
 591     aservice->nRequestsRunning--;
 592     MUTEX_ENTER(&rx_stats_mutex);
 593     if (aservice->nRequestsRunning < aservice->minProcs)
 594         rxi_minDeficit++;
 595     rxi_availProcs++;
 596     MUTEX_EXIT(&rx_stats_mutex);
 597 }
 598
 599 #else /* RX_ENABLE_LOCKS */
 600 static int
 601 QuotaOK(register struct rx_service *aservice)
 602 {
 603     int rc = 0;
 604     /* under min quota, we're OK */
 605     if (aservice->nRequestsRunning < aservice->minProcs)
 606         return 1;
 607
 608     /* check if over max quota */
 609     if (aservice->nRequestsRunning >= aservice->maxProcs)
 610         return 0;
 611
 612     /* otherwise, can use only if there are enough to allow everyone
 613      * to go to their min quota after this guy starts.
 614      */
 615     if (rxi_availProcs > rxi_minDeficit)
 616         rc = 1;
 617     return rc;
 618 }
 619 #endif /* RX_ENABLE_LOCKS */
 620
 621 #ifndef KERNEL
 622 /* Called by rx_StartServer to start up lwp's to service calls.
 623    NExistingProcs gives the number of procs already existing, and which
 624    therefore needn't be created. */
 625 void
 626 rxi_StartServerProcs(int nExistingProcs)
 627 {
 628     register struct rx_service *service;
 629     register int i;
 630     int maxdiff = 0;
 631     int nProcs = 0;
 632
 633     /* For each service, reserve N processes, where N is the "minimum"
 634      * number of processes that MUST be able to execute a request in parallel,
 635      * at any time, for that process.  Also compute the maximum difference
 636      * between any service's maximum number of processes that can run
 637      * (i.e. the maximum number that ever will be run, and a guarantee
 638      * that this number will run if other services aren't running), and its
 639      * minimum number.  The result is the extra number of processes that
 640      * we need in order to provide the latter guarantee */
 641     for (i = 0; i < RX_MAX_SERVICES; i++) {
 642         int diff;
 643         service = rx_services[i];
 644         if (service == (struct rx_service *)0)
 645             break;
 646         nProcs += service->minProcs;
 647         diff = service->maxProcs - service->minProcs;
 648         if (diff > maxdiff)
 649             maxdiff = diff;
 650     }
 651     nProcs += maxdiff;          /* Extra processes needed to allow max number requested to run in any given service, under good conditions */
 652     nProcs -= nExistingProcs;   /* Subtract the number of procs that were previously created for use as server procs */
 653     for (i = 0; i < nProcs; i++) {
 654         rxi_StartServerProc(rx_ServerProc, rx_stackSize);
 655     }
 656 }
 657 #endif /* KERNEL */
 658
 659 #ifdef AFS_NT40_ENV
 660 /* This routine is only required on Windows */
 661 void
 662 rx_StartClientThread(void)
 663 {
 664 #ifdef AFS_PTHREAD_ENV
 665     pthread_t pid;
 666     pid = pthread_self();
 667 #endif /* AFS_PTHREAD_ENV */
 668 }
 669 #endif /* AFS_NT40_ENV */
 670
 671 /* This routine must be called if any services are exported.  If the
 672  * donateMe flag is set, the calling process is donated to the server
 673  * process pool */
 674 void
 675 rx_StartServer(int donateMe)
 676 {
 677     register struct rx_service *service;
 678     register int i;
 679     SPLVAR;
 680     clock_NewTime();
 681
 682     NETPRI;
 683     /* Start server processes, if necessary (exact function is dependent
 684      * on the implementation environment--kernel or user space).  DonateMe
 685      * will be 1 if there is 1 pre-existing proc, i.e. this one.  In this
 686      * case, one less new proc will be created rx_StartServerProcs.
 687      */
 688     rxi_StartServerProcs(donateMe);
 689
 690     /* count up the # of threads in minProcs, and add set the min deficit to
 691      * be that value, too.
 692      */
 693     for (i = 0; i < RX_MAX_SERVICES; i++) {
 694         service = rx_services[i];
 695         if (service == (struct rx_service *)0)
 696             break;
 697         MUTEX_ENTER(&rx_stats_mutex);
 698         rxi_totalMin += service->minProcs;
 699         /* below works even if a thread is running, since minDeficit would
 700          * still have been decremented and later re-incremented.
 701          */
 702         rxi_minDeficit += service->minProcs;
 703         MUTEX_EXIT(&rx_stats_mutex);
 704     }
 705
 706     /* Turn on reaping of idle server connections */
 707     rxi_ReapConnections();
 708
 709     USERPRI;
 710
 711     if (donateMe) {
 712 #ifndef AFS_NT40_ENV
 713 #ifndef KERNEL
 714         char name[32];
 715         static int nProcs;
 716 #ifdef AFS_PTHREAD_ENV
 717         pid_t pid;
 718         pid = (pid_t) pthread_self();
 719 #else /* AFS_PTHREAD_ENV */
 720         PROCESS pid;
 721         LWP_CurrentProcess(&pid);
 722 #endif /* AFS_PTHREAD_ENV */
 723
 724         sprintf(name, "srv_%d", ++nProcs);
 725         if (registerProgram)
 726             (*registerProgram) (pid, name);
 727 #endif /* KERNEL */
 728 #endif /* AFS_NT40_ENV */
 729         rx_ServerProc(NULL);    /* Never returns */
 730     }
 731 #ifdef RX_ENABLE_TSFPQ
 732     /* no use leaving packets around in this thread's local queue if
 733      * it isn't getting donated to the server thread pool.
 734      */
 735     rxi_FlushLocalPacketsTSFPQ();
 736 #endif /* RX_ENABLE_TSFPQ */
 737     return;
 738 }
 739
 740 /* Create a new client connection to the specified service, using the
 741  * specified security object to implement the security model for this
 742  * connection. */
 743 struct rx_connection *
 744 rx_NewConnection(register afs_uint32 shost, u_short sport, u_short sservice,
 745                  register struct rx_securityClass *securityObject,
 746                  int serviceSecurityIndex)
 747 {
 748     int hashindex, i;
 749     afs_int32 cid, cix, nclones;
 750     register struct rx_connection *conn, *tconn, *ptconn;
 751
 752     SPLVAR;
 753
 754     clock_NewTime();
 755     dpf(("rx_NewConnection(host %x, port %u, service %u, securityObject %x, serviceSecurityIndex %d)\n", ntohl(shost), ntohs(sport), sservice, securityObject, serviceSecurityIndex));
 756
 757         conn = tconn = 0;
 758         nclones = rx_max_clones_per_connection;
 759
 760     /* Vasilsi said: "NETPRI protects Cid and Alloc", but can this be true in
 761      * the case of kmem_alloc? */
 762
 763     NETPRI;
 764     MUTEX_ENTER(&rx_connHashTable_lock);
 765
 766     /* send in the clones */
 767     for(cix = 0; cix <= nclones; ++cix) {
 768
 769           ptconn = tconn;
 770           tconn = rxi_AllocConnection();
 771           tconn->type = RX_CLIENT_CONNECTION;
 772           tconn->epoch = rx_epoch;
 773           tconn->peer = rxi_FindPeer(shost, sport, 0, 1);
 774           tconn->serviceId = sservice;
 775           tconn->securityObject = securityObject;
 776           tconn->securityData = (void *) 0;
 777           tconn->securityIndex = serviceSecurityIndex;
 778           tconn->ackRate = RX_FAST_ACK_RATE;
 779           tconn->nSpecific = 0;
 780           tconn->specific = NULL;
 781           tconn->challengeEvent = NULL;
 782           tconn->delayedAbortEvent = NULL;
 783           tconn->abortCount = 0;
 784           tconn->error = 0;
 785     for (i = 0; i < RX_MAXCALLS; i++) {
 786         tconn->twind[i] = rx_initSendWindow;
 787         tconn->rwind[i] = rx_initReceiveWindow;
 788     }
 789           tconn->parent = 0;
 790           tconn->next_clone = 0;
 791           tconn->nclones = nclones;
 792           rx_SetConnDeadTime(tconn, rx_connDeadTime);
 793
 794           if(cix == 0) {
 795                 conn = tconn;
 796           } else {
 797                 tconn->flags |= RX_CLONED_CONNECTION;
 798                 tconn->parent = conn;
 799                 ptconn->next_clone = tconn;
 800           }
 801
 802           /* generic connection setup */
 803 #ifdef  RX_ENABLE_LOCKS
 804           MUTEX_INIT(&tconn->conn_call_lock, "conn call lock", MUTEX_DEFAULT, 0);
 805           MUTEX_INIT(&tconn->conn_data_lock, "conn data lock", MUTEX_DEFAULT, 0);
 806           CV_INIT(&tconn->conn_call_cv, "conn call cv", CV_DEFAULT, 0);
 807 #endif
 808           cid = (rx_nextCid += RX_MAXCALLS);
 809           tconn->cid = cid;
 810           RXS_NewConnection(securityObject, tconn);
 811           hashindex =
 812                 CONN_HASH(shost, sport, tconn->cid, tconn->epoch,
 813                                   RX_CLIENT_CONNECTION);
 814           tconn->refCount++; /* no lock required since only this thread knows */
 815           tconn->next = rx_connHashTable[hashindex];
 816           rx_connHashTable[hashindex] = tconn;
 817           rx_MutexIncrement(rx_stats.nClientConns, rx_stats_mutex);
 818     }
 819
 820     MUTEX_EXIT(&rx_connHashTable_lock);
 821     USERPRI;
 822     return conn;
 823 }
 824
 825 void
 826 rx_SetConnDeadTime(register struct rx_connection *conn, register int seconds)
 827 {
 828   /* The idea is to set the dead time to a value that allows several
 829    * keepalives to be dropped without timing out the connection. */
 830   struct rx_connection *tconn;
 831   tconn = conn;
 832   do {
 833         tconn->secondsUntilDead = MAX(seconds, 6);
 834         tconn->secondsUntilPing = tconn->secondsUntilDead / 6;
 835   } while(tconn->next_clone && (tconn = tconn->next_clone));
 836 }
 837
 838 int rxi_lowPeerRefCount = 0;
 839 int rxi_lowConnRefCount = 0;
 840
 841 /*
 842  * Cleanup a connection that was destroyed in rxi_DestroyConnectioNoLock.
 843  * NOTE: must not be called with rx_connHashTable_lock held.
 844  */
 845 void
 846 rxi_CleanupConnection(struct rx_connection *conn)
 847 {
 848     /* Notify the service exporter, if requested, that this connection
 849      * is being destroyed */
 850     if (conn->type == RX_SERVER_CONNECTION && conn->service->destroyConnProc)
 851         (*conn->service->destroyConnProc) (conn);
 852
 853     /* Notify the security module that this connection is being destroyed */
 854     RXS_DestroyConnection(conn->securityObject, conn);
 855
 856     /* If this is the last connection using the rx_peer struct, set its
 857      * idle time to now. rxi_ReapConnections will reap it if it's still
 858      * idle (refCount == 0) after rx_idlePeerTime (60 seconds) have passed.
 859      */
 860     MUTEX_ENTER(&rx_peerHashTable_lock);
 861     if (conn->peer->refCount < 2) {
 862         conn->peer->idleWhen = clock_Sec();
 863         if (conn->peer->refCount < 1) {
 864             conn->peer->refCount = 1;
 865             MUTEX_ENTER(&rx_stats_mutex);
 866             rxi_lowPeerRefCount++;
 867             MUTEX_EXIT(&rx_stats_mutex);
 868         }
 869     }
 870     conn->peer->refCount--;
 871     MUTEX_EXIT(&rx_peerHashTable_lock);
 872
 873     if (conn->type == RX_SERVER_CONNECTION)
 874         rx_MutexDecrement(rx_stats.nServerConns, rx_stats_mutex);
 875     else
 876         rx_MutexDecrement(rx_stats.nClientConns, rx_stats_mutex);
 877 #ifndef KERNEL
 878     if (conn->specific) {
 879         int i;
 880         for (i = 0; i < conn->nSpecific; i++) {
 881             if (conn->specific[i] && rxi_keyCreate_destructor[i])
 882                 (*rxi_keyCreate_destructor[i]) (conn->specific[i]);
 883             conn->specific[i] = NULL;
 884         }
 885         free(conn->specific);
 886     }
 887     conn->specific = NULL;
 888     conn->nSpecific = 0;
 889 #endif /* !KERNEL */
 890
 891     MUTEX_DESTROY(&conn->conn_call_lock);
 892     MUTEX_DESTROY(&conn->conn_data_lock);
 893     CV_DESTROY(&conn->conn_call_cv);
 894
 895     rxi_FreeConnection(conn);
 896 }
 897
 898 /* Destroy the specified connection */
 899 void
 900 rxi_DestroyConnection(register struct rx_connection *conn)
 901 {
 902   register struct rx_connection *tconn, *dtconn;
 903
 904   MUTEX_ENTER(&rx_connHashTable_lock);
 905
 906   if(!(conn->flags & RX_CLONED_CONNECTION)) {
 907         tconn = conn->next_clone;
 908         conn->next_clone = 0; /* once */
 909         do {
 910           if(tconn) {
 911                 dtconn = tconn;
 912                 tconn = tconn->next_clone;
 913                 rxi_DestroyConnectionNoLock(dtconn);
 914                 /* destroyed? */
 915                 if (dtconn == rx_connCleanup_list) {
 916                   rx_connCleanup_list = rx_connCleanup_list->next;
 917                   MUTEX_EXIT(&rx_connHashTable_lock);
 918                   /* rxi_CleanupConnection will free tconn */
 919                   rxi_CleanupConnection(dtconn);
 920                   MUTEX_ENTER(&rx_connHashTable_lock);
 921                   (conn->nclones)--;
 922                 }
 923           }
 924         } while(tconn);
 925   }
 926
 927   rxi_DestroyConnectionNoLock(conn);
 928   /* conn should be at the head of the cleanup list */
 929   if (conn == rx_connCleanup_list) {
 930         rx_connCleanup_list = rx_connCleanup_list->next;
 931         MUTEX_EXIT(&rx_connHashTable_lock);
 932         rxi_CleanupConnection(conn);
 933   }
 934 #ifdef RX_ENABLE_LOCKS
 935   else {
 936         MUTEX_EXIT(&rx_connHashTable_lock);
 937   }
 938 #endif /* RX_ENABLE_LOCKS */
 939 }
 940
 941 static void
 942 rxi_DestroyConnectionNoLock(register struct rx_connection *conn)
 943 {
 944     register struct rx_connection **conn_ptr;
 945     register int havecalls = 0;
 946     struct rx_packet *packet;
 947     int i;
 948     SPLVAR;
 949
 950     clock_NewTime();
 951
 952     NETPRI;
 953     MUTEX_ENTER(&conn->conn_data_lock);
 954     if (conn->refCount > 0)
 955         conn->refCount--;
 956     else {
 957         MUTEX_ENTER(&rx_stats_mutex);
 958         rxi_lowConnRefCount++;
 959         MUTEX_EXIT(&rx_stats_mutex);
 960     }
 961
 962     if ((conn->refCount > 0) || (conn->flags & RX_CONN_BUSY)) {
 963         /* Busy; wait till the last guy before proceeding */
 964         MUTEX_EXIT(&conn->conn_data_lock);
 965         USERPRI;
 966         return;
 967     }
 968
 969     /* If the client previously called rx_NewCall, but it is still
 970      * waiting, treat this as a running call, and wait to destroy the
 971      * connection later when the call completes. */
 972     if ((conn->type == RX_CLIENT_CONNECTION)
 973         && (conn->flags & RX_CONN_MAKECALL_WAITING)) {
 974         conn->flags |= RX_CONN_DESTROY_ME;
 975         MUTEX_EXIT(&conn->conn_data_lock);
 976         USERPRI;
 977         return;
 978     }
 979     MUTEX_EXIT(&conn->conn_data_lock);
 980
 981     /* Check for extant references to this connection */
 982     for (i = 0; i < RX_MAXCALLS; i++) {
 983         register struct rx_call *call = conn->call[i];
 984         if (call) {
 985             havecalls = 1;
 986             if (conn->type == RX_CLIENT_CONNECTION) {
 987                 MUTEX_ENTER(&call->lock);
 988                 if (call->delayedAckEvent) {
 989                     /* Push the final acknowledgment out now--there
 990                      * won't be a subsequent call to acknowledge the
 991                      * last reply packets */
 992                     rxevent_Cancel(call->delayedAckEvent, call,
 993                                    RX_CALL_REFCOUNT_DELAY);
 994                     if (call->state == RX_STATE_PRECALL
 995                         || call->state == RX_STATE_ACTIVE) {
 996                         rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
 997                     } else {
 998                         rxi_AckAll(NULL, call, 0);
 999                     }
1000                 }
1001                 MUTEX_EXIT(&call->lock);
1002             }
1003         }
1004     }
1005 #ifdef RX_ENABLE_LOCKS
1006     if (!havecalls) {
1007         if (MUTEX_TRYENTER(&conn->conn_data_lock)) {
1008             MUTEX_EXIT(&conn->conn_data_lock);
1009         } else {
1010             /* Someone is accessing a packet right now. */
1011             havecalls = 1;
1012         }
1013     }
1014 #endif /* RX_ENABLE_LOCKS */
1015
1016     if (havecalls) {
1017         /* Don't destroy the connection if there are any call
1018          * structures still in use */
1019         MUTEX_ENTER(&conn->conn_data_lock);
1020         conn->flags |= RX_CONN_DESTROY_ME;
1021         MUTEX_EXIT(&conn->conn_data_lock);
1022         USERPRI;
1023         return;
1024     }
1025
1026     if (conn->delayedAbortEvent) {
1027         rxevent_Cancel(conn->delayedAbortEvent, (struct rx_call *)0, 0);
1028         packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1029         if (packet) {
1030             MUTEX_ENTER(&conn->conn_data_lock);
1031             rxi_SendConnectionAbort(conn, packet, 0, 1);
1032             MUTEX_EXIT(&conn->conn_data_lock);
1033             rxi_FreePacket(packet);
1034         }
1035     }
1036
1037     /* Remove from connection hash table before proceeding */
1038     conn_ptr =
1039         &rx_connHashTable[CONN_HASH
1040                           (peer->host, peer->port, conn->cid, conn->epoch,
1041                            conn->type)];
1042     for (; *conn_ptr; conn_ptr = &(*conn_ptr)->next) {
1043         if (*conn_ptr == conn) {
1044             *conn_ptr = conn->next;
1045             break;
1046         }
1047     }
1048     /* if the conn that we are destroying was the last connection, then we
1049      * clear rxLastConn as well */
1050     if (rxLastConn == conn)
1051         rxLastConn = 0;
1052
1053     /* Make sure the connection is completely reset before deleting it. */
1054     /* get rid of pending events that could zap us later */
1055     if (conn->challengeEvent)
1056         rxevent_Cancel(conn->challengeEvent, (struct rx_call *)0, 0);
1057     if (conn->checkReachEvent)
1058         rxevent_Cancel(conn->checkReachEvent, (struct rx_call *)0, 0);
1059
1060     /* Add the connection to the list of destroyed connections that
1061      * need to be cleaned up. This is necessary to avoid deadlocks
1062      * in the routines we call to inform others that this connection is
1063      * being destroyed. */
1064     conn->next = rx_connCleanup_list;
1065     rx_connCleanup_list = conn;
1066 }
1067
1068 /* Externally available version */
1069 void
1070 rx_DestroyConnection(register struct rx_connection *conn)
1071 {
1072     SPLVAR;
1073
1074     NETPRI;
1075     rxi_DestroyConnection(conn);
1076     USERPRI;
1077 }
1078
1079 void
1080 rx_GetConnection(register struct rx_connection *conn)
1081 {
1082     SPLVAR;
1083
1084     NETPRI;
1085     MUTEX_ENTER(&conn->conn_data_lock);
1086     conn->refCount++;
1087     MUTEX_EXIT(&conn->conn_data_lock);
1088     USERPRI;
1089 }
1090
1091 /* Wait for the transmit queue to no longer be busy.
1092  * requires the call->lock to be held */
1093 static void rxi_WaitforTQBusy(struct rx_call *call) {
1094     while (call->flags & RX_CALL_TQ_BUSY) {
1095         call->flags |= RX_CALL_TQ_WAIT;
1096         call->tqWaiters++;
1097 #ifdef RX_ENABLE_LOCKS
1098         osirx_AssertMine(&call->lock, "rxi_WaitforTQ lock");
1099         CV_WAIT(&call->cv_tq, &call->lock);
1100 #else /* RX_ENABLE_LOCKS */
1101         osi_rxSleep(&call->tq);
1102 #endif /* RX_ENABLE_LOCKS */
1103         call->tqWaiters--;
1104         if (call->tqWaiters == 0) {
1105             call->flags &= ~RX_CALL_TQ_WAIT;
1106         }
1107     }
1108 }
1109 /* Start a new rx remote procedure call, on the specified connection.
1110  * If wait is set to 1, wait for a free call channel; otherwise return
1111  * 0.  Maxtime gives the maximum number of seconds this call may take,
1112  * after rx_NewCall returns.  After this time interval, a call to any
1113  * of rx_SendData, rx_ReadData, etc. will fail with RX_CALL_TIMEOUT.
1114  * For fine grain locking, we hold the conn_call_lock in order to
1115  * to ensure that we don't get signalle after we found a call in an active
1116  * state and before we go to sleep.
1117  */
1118 struct rx_call *
1119 rx_NewCall(register struct rx_connection *conn)
1120 {
1121     register int i;
1122     register struct rx_call *call;
1123         register struct rx_connection *tconn;
1124     struct clock queueTime;
1125     SPLVAR;
1126
1127     clock_NewTime();
1128     dpf(("rx_NewCall(conn %x)\n", conn));
1129
1130     NETPRI;
1131     clock_GetTime(&queueTime);
1132     MUTEX_ENTER(&conn->conn_call_lock);
1133
1134     /*
1135      * Check if there are others waiting for a new call.
1136      * If so, let them go first to avoid starving them.
1137      * This is a fairly simple scheme, and might not be
1138      * a complete solution for large numbers of waiters.
1139      *
1140      * makeCallWaiters keeps track of the number of
1141      * threads waiting to make calls and the
1142      * RX_CONN_MAKECALL_WAITING flag bit is used to
1143      * indicate that there are indeed calls waiting.
1144      * The flag is set when the waiter is incremented.
1145      * It is only cleared in rx_EndCall when
1146      * makeCallWaiters is 0.  This prevents us from
1147      * accidently destroying the connection while it
1148      * is potentially about to be used.
1149      */
1150     MUTEX_ENTER(&conn->conn_data_lock);
1151     if (conn->makeCallWaiters) {
1152         conn->flags |= RX_CONN_MAKECALL_WAITING;
1153         conn->makeCallWaiters++;
1154         MUTEX_EXIT(&conn->conn_data_lock);
1155
1156 #ifdef  RX_ENABLE_LOCKS
1157         CV_WAIT(&conn->conn_call_cv, &conn->conn_call_lock);
1158 #else
1159         osi_rxSleep(conn);
1160 #endif
1161         MUTEX_ENTER(&conn->conn_data_lock);
1162         conn->makeCallWaiters--;
1163     }
1164     MUTEX_EXIT(&conn->conn_data_lock);
1165
1166         /* search for next free call on this connection or
1167          * its clones, if any */
1168     for (;;) {
1169                 tconn = conn;
1170                 do {
1171                         for (i = 0; i < RX_MAXCALLS; i++) {
1172                                 call = tconn->call[i];
1173                                 if (call) {
1174                                         MUTEX_ENTER(&call->lock);
1175                                         if (call->state == RX_STATE_DALLY) {
1176                                                 rxi_ResetCall(call, 0);
1177                                                 (*call->callNumber)++;
1178                                                 goto f_call;
1179                                         }
1180                                         MUTEX_EXIT(&call->lock);
1181                                 } else {
1182                                         call = rxi_NewCall(tconn, i);
1183                                         goto f_call;
1184                                 }
1185                         } /* for i < RX_MAXCALLS */
1186                 } while (tconn->next_clone && (tconn = tconn->next_clone));
1187
1188         f_call:
1189
1190                 if (i < RX_MAXCALLS) {
1191                         break;
1192                 }
1193
1194                 /* to be here, all available calls for this connection (and all
1195                  * its clones) must be in use */
1196
1197                 MUTEX_ENTER(&conn->conn_data_lock);
1198                 conn->flags |= RX_CONN_MAKECALL_WAITING;
1199                 conn->makeCallWaiters++;
1200                 MUTEX_EXIT(&conn->conn_data_lock);
1201
1202 #ifdef  RX_ENABLE_LOCKS
1203                 CV_WAIT(&conn->conn_call_cv, &conn->conn_call_lock);
1204 #else
1205                 osi_rxSleep(conn);
1206 #endif
1207                 MUTEX_ENTER(&conn->conn_data_lock);
1208                 conn->makeCallWaiters--;
1209                 MUTEX_EXIT(&conn->conn_data_lock);
1210     } /* for ;; */
1211     /*
1212      * Wake up anyone else who might be giving us a chance to
1213      * run (see code above that avoids resource starvation).
1214      */
1215 #ifdef  RX_ENABLE_LOCKS
1216     CV_BROADCAST(&conn->conn_call_cv);
1217 #else
1218     osi_rxWakeup(conn);
1219 #endif
1220
1221     CALL_HOLD(call, RX_CALL_REFCOUNT_BEGIN);
1222
1223     /* Client is initially in send mode */
1224     call->state = RX_STATE_ACTIVE;
1225     call->error = conn->error;
1226     if (call->error)
1227         call->mode = RX_MODE_ERROR;
1228     else
1229         call->mode = RX_MODE_SENDING;
1230
1231     /* remember start time for call in case we have hard dead time limit */
1232     call->queueTime = queueTime;
1233     clock_GetTime(&call->startTime);
1234     hzero(call->bytesSent);
1235     hzero(call->bytesRcvd);
1236
1237     /* Turn on busy protocol. */
1238     rxi_KeepAliveOn(call);
1239
1240     MUTEX_EXIT(&call->lock);
1241     MUTEX_EXIT(&conn->conn_call_lock);
1242     USERPRI;
1243
1244 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
1245     /* Now, if TQ wasn't cleared earlier, do it now. */
1246     MUTEX_ENTER(&call->lock);
1247     rxi_WaitforTQBusy(call);
1248     if (call->flags & RX_CALL_TQ_CLEARME) {
1249         rxi_ClearTransmitQueue(call, 0);
1250         queue_Init(&call->tq);
1251     }
1252     MUTEX_EXIT(&call->lock);
1253 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
1254
1255     dpf(("rx_NewCall(call %x)\n", call));
1256     return call;
1257 }
1258
1259 int
1260 rxi_HasActiveCalls(register struct rx_connection *aconn)
1261 {
1262     register int i;
1263     register struct rx_call *tcall;
1264     SPLVAR;
1265
1266     NETPRI;
1267     for (i = 0; i < RX_MAXCALLS; i++) {
1268         if ((tcall = aconn->call[i])) {
1269             if ((tcall->state == RX_STATE_ACTIVE)
1270                 || (tcall->state == RX_STATE_PRECALL)) {
1271                 USERPRI;
1272                 return 1;
1273             }
1274         }
1275     }
1276     USERPRI;
1277     return 0;
1278 }
1279
1280 int
1281 rxi_GetCallNumberVector(register struct rx_connection *aconn,
1282                         register afs_int32 * aint32s)
1283 {
1284     register int i;
1285     register struct rx_call *tcall;
1286     SPLVAR;
1287
1288     NETPRI;
1289     for (i = 0; i < RX_MAXCALLS; i++) {
1290         if ((tcall = aconn->call[i]) && (tcall->state == RX_STATE_DALLY))
1291             aint32s[i] = aconn->callNumber[i] + 1;
1292         else
1293             aint32s[i] = aconn->callNumber[i];
1294     }
1295     USERPRI;
1296     return 0;
1297 }
1298
1299 int
1300 rxi_SetCallNumberVector(register struct rx_connection *aconn,
1301                         register afs_int32 * aint32s)
1302 {
1303     register int i;
1304     register struct rx_call *tcall;
1305     SPLVAR;
1306
1307     NETPRI;
1308     for (i = 0; i < RX_MAXCALLS; i++) {
1309         if ((tcall = aconn->call[i]) && (tcall->state == RX_STATE_DALLY))
1310             aconn->callNumber[i] = aint32s[i] - 1;
1311         else
1312             aconn->callNumber[i] = aint32s[i];
1313     }
1314     USERPRI;
1315     return 0;
1316 }
1317
1318 /* Advertise a new service.  A service is named locally by a UDP port
1319  * number plus a 16-bit service id.  Returns (struct rx_service *) 0
1320  * on a failure.
1321  *
1322      char *serviceName;  Name for identification purposes (e.g. the
1323                          service name might be used for probing for
1324                          statistics) */
1325 struct rx_service *
1326 rx_NewServiceHost(afs_uint32 host, u_short port, u_short serviceId,
1327                   char *serviceName, struct rx_securityClass **securityObjects,
1328                   int nSecurityObjects,
1329                   afs_int32(*serviceProc) (struct rx_call * acall))
1330 {
1331     osi_socket socket = OSI_NULLSOCKET;
1332     register struct rx_service *tservice;
1333     register int i;
1334     SPLVAR;
1335
1336     clock_NewTime();
1337
1338     if (serviceId == 0) {
1339         (osi_Msg
1340          "rx_NewService:  service id for service %s is not non-zero.\n",
1341          serviceName);
1342         return 0;
1343     }
1344     if (port == 0) {
1345         if (rx_port == 0) {
1346             (osi_Msg
1347              "rx_NewService: A non-zero port must be specified on this call if a non-zero port was not provided at Rx initialization (service %s).\n",
1348              serviceName);
1349             return 0;
1350         }
1351         port = rx_port;
1352         socket = rx_socket;
1353     }
1354
1355     tservice = rxi_AllocService();
1356     NETPRI;
1357     for (i = 0; i < RX_MAX_SERVICES; i++) {
1358         register struct rx_service *service = rx_services[i];
1359         if (service) {
1360             if (port == service->servicePort && host == service->serviceHost) {
1361                 if (service->serviceId == serviceId) {
1362                     /* The identical service has already been
1363                      * installed; if the caller was intending to
1364                      * change the security classes used by this
1365                      * service, he/she loses. */
1366                     (osi_Msg
1367                      "rx_NewService: tried to install service %s with service id %d, which is already in use for service %s\n",
1368                      serviceName, serviceId, service->serviceName);
1369                     USERPRI;
1370                     rxi_FreeService(tservice);
1371                     return service;
1372                 }
1373                 /* Different service, same port: re-use the socket
1374                  * which is bound to the same port */
1375                 socket = service->socket;
1376             }
1377         } else {
1378             if (socket == OSI_NULLSOCKET) {
1379                 /* If we don't already have a socket (from another
1380                  * service on same port) get a new one */
1381                 socket = rxi_GetHostUDPSocket(htonl(INADDR_ANY), port);
1382                 if (socket == OSI_NULLSOCKET) {
1383                     USERPRI;
1384                     rxi_FreeService(tservice);
1385                     return 0;
1386                 }
1387             }
1388             service = tservice;
1389             service->socket = socket;
1390             service->serviceHost = host;
1391             service->servicePort = port;
1392             service->serviceId = serviceId;
1393             service->serviceName = serviceName;
1394             service->nSecurityObjects = nSecurityObjects;
1395             service->securityObjects = securityObjects;
1396             service->minProcs = 0;
1397             service->maxProcs = 1;
1398             service->idleDeadTime = 60;
1399             service->idleDeadErr = 0;
1400             service->connDeadTime = rx_connDeadTime;
1401             service->executeRequestProc = serviceProc;
1402             service->checkReach = 0;
1403             rx_services[i] = service;   /* not visible until now */
1404             USERPRI;
1405             return service;
1406         }
1407     }
1408     USERPRI;
1409     rxi_FreeService(tservice);
1410     (osi_Msg "rx_NewService: cannot support > %d services\n",
1411      RX_MAX_SERVICES);
1412     return 0;
1413 }
1414
1415 /* Set configuration options for all of a service's security objects */
1416
1417 afs_int32
1418 rx_SetSecurityConfiguration(struct rx_service *service,
1419                             rx_securityConfigVariables type,
1420                             void *value)
1421 {
1422     int i;
1423     for (i = 0; i<service->nSecurityObjects; i++) {
1424         if (service->securityObjects[i]) {
1425             RXS_SetConfiguration(service->securityObjects[i], NULL, type,
1426                                  value, NULL);
1427         }
1428     }
1429     return 0;
1430 }
1431
1432 struct rx_service *
1433 rx_NewService(u_short port, u_short serviceId, char *serviceName,
1434               struct rx_securityClass **securityObjects, int nSecurityObjects,
1435               afs_int32(*serviceProc) (struct rx_call * acall))
1436 {
1437     return rx_NewServiceHost(htonl(INADDR_ANY), port, serviceId, serviceName, securityObjects, nSecurityObjects, serviceProc);
1438 }
1439
1440 /* Generic request processing loop. This routine should be called
1441  * by the implementation dependent rx_ServerProc. If socketp is
1442  * non-null, it will be set to the file descriptor that this thread
1443  * is now listening on. If socketp is null, this routine will never
1444  * returns. */
1445 void
1446 rxi_ServerProc(int threadID, struct rx_call *newcall, osi_socket * socketp)
1447 {
1448     register struct rx_call *call;
1449     register afs_int32 code;
1450     register struct rx_service *tservice = NULL;
1451
1452     for (;;) {
1453         if (newcall) {
1454             call = newcall;
1455             newcall = NULL;
1456         } else {
1457             call = rx_GetCall(threadID, tservice, socketp);
1458             if (socketp && *socketp != OSI_NULLSOCKET) {
1459                 /* We are now a listener thread */
1460                 return;
1461             }
1462         }
1463
1464         /* if server is restarting( typically smooth shutdown) then do not
1465          * allow any new calls.
1466          */
1467
1468         if (rx_tranquil && (call != NULL)) {
1469             SPLVAR;
1470
1471             NETPRI;
1472             MUTEX_ENTER(&call->lock);
1473
1474             rxi_CallError(call, RX_RESTARTING);
1475             rxi_SendCallAbort(call, (struct rx_packet *)0, 0, 0);
1476
1477             MUTEX_EXIT(&call->lock);
1478             USERPRI;
1479         }
1480 #ifdef  KERNEL
1481         if (afs_termState == AFSOP_STOP_RXCALLBACK) {
1482 #ifdef RX_ENABLE_LOCKS
1483             AFS_GLOCK();
1484 #endif /* RX_ENABLE_LOCKS */
1485             afs_termState = AFSOP_STOP_AFS;
1486             afs_osi_Wakeup(&afs_termState);
1487 #ifdef RX_ENABLE_LOCKS
1488             AFS_GUNLOCK();
1489 #endif /* RX_ENABLE_LOCKS */
1490             return;
1491         }
1492 #endif
1493
1494         tservice = call->conn->service;
1495
1496         if (tservice->beforeProc)
1497             (*tservice->beforeProc) (call);
1498
1499         code = call->conn->service->executeRequestProc(call);
1500
1501         if (tservice->afterProc)
1502             (*tservice->afterProc) (call, code);
1503
1504         rx_EndCall(call, code);
1505         MUTEX_ENTER(&rx_stats_mutex);
1506         rxi_nCalls++;
1507         MUTEX_EXIT(&rx_stats_mutex);
1508     }
1509 }
1510
1511
1512 void
1513 rx_WakeupServerProcs(void)
1514 {
1515     struct rx_serverQueueEntry *np, *tqp;
1516     SPLVAR;
1517
1518     NETPRI;
1519     MUTEX_ENTER(&rx_serverPool_lock);
1520
1521 #ifdef RX_ENABLE_LOCKS
1522     if (rx_waitForPacket)
1523         CV_BROADCAST(&rx_waitForPacket->cv);
1524 #else /* RX_ENABLE_LOCKS */
1525     if (rx_waitForPacket)
1526         osi_rxWakeup(rx_waitForPacket);
1527 #endif /* RX_ENABLE_LOCKS */
1528     MUTEX_ENTER(&freeSQEList_lock);
1529     for (np = rx_FreeSQEList; np; np = tqp) {
1530         tqp = *(struct rx_serverQueueEntry **)np;
1531 #ifdef RX_ENABLE_LOCKS
1532         CV_BROADCAST(&np->cv);
1533 #else /* RX_ENABLE_LOCKS */
1534         osi_rxWakeup(np);
1535 #endif /* RX_ENABLE_LOCKS */
1536     }
1537     MUTEX_EXIT(&freeSQEList_lock);
1538     for (queue_Scan(&rx_idleServerQueue, np, tqp, rx_serverQueueEntry)) {
1539 #ifdef RX_ENABLE_LOCKS
1540         CV_BROADCAST(&np->cv);
1541 #else /* RX_ENABLE_LOCKS */
1542         osi_rxWakeup(np);
1543 #endif /* RX_ENABLE_LOCKS */
1544     }
1545     MUTEX_EXIT(&rx_serverPool_lock);
1546     USERPRI;
1547 }
1548
1549 /* meltdown:
1550  * One thing that seems to happen is that all the server threads get
1551  * tied up on some empty or slow call, and then a whole bunch of calls
1552  * arrive at once, using up the packet pool, so now there are more
1553  * empty calls.  The most critical resources here are server threads
1554  * and the free packet pool.  The "doreclaim" code seems to help in
1555  * general.  I think that eventually we arrive in this state: there
1556  * are lots of pending calls which do have all their packets present,
1557  * so they won't be reclaimed, are multi-packet calls, so they won't
1558  * be scheduled until later, and thus are tying up most of the free
1559  * packet pool for a very long time.
1560  * future options:
1561  * 1.  schedule multi-packet calls if all the packets are present.
1562  * Probably CPU-bound operation, useful to return packets to pool.
1563  * Do what if there is a full window, but the last packet isn't here?
1564  * 3.  preserve one thread which *only* runs "best" calls, otherwise
1565  * it sleeps and waits for that type of call.
1566  * 4.  Don't necessarily reserve a whole window for each thread.  In fact,
1567  * the current dataquota business is badly broken.  The quota isn't adjusted
1568  * to reflect how many packets are presently queued for a running call.
1569  * So, when we schedule a queued call with a full window of packets queued
1570  * up for it, that *should* free up a window full of packets for other 2d-class
1571  * calls to be able to use from the packet pool.  But it doesn't.
1572  *
1573  * NB.  Most of the time, this code doesn't run -- since idle server threads
1574  * sit on the idle server queue and are assigned by "...ReceivePacket" as soon
1575  * as a new call arrives.
1576  */
1577 /* Sleep until a call arrives.  Returns a pointer to the call, ready
1578  * for an rx_Read. */
1579 #ifdef RX_ENABLE_LOCKS
1580 struct rx_call *
1581 rx_GetCall(int tno, struct rx_service *cur_service, osi_socket * socketp)
1582 {
1583     struct rx_serverQueueEntry *sq;
1584     register struct rx_call *call = (struct rx_call *)0;
1585     struct rx_service *service = NULL;
1586     SPLVAR;
1587
1588     MUTEX_ENTER(&freeSQEList_lock);
1589
1590     if ((sq = rx_FreeSQEList)) {
1591         rx_FreeSQEList = *(struct rx_serverQueueEntry **)sq;
1592         MUTEX_EXIT(&freeSQEList_lock);
1593     } else {                    /* otherwise allocate a new one and return that */
1594         MUTEX_EXIT(&freeSQEList_lock);
1595         sq = (struct rx_serverQueueEntry *)
1596             rxi_Alloc(sizeof(struct rx_serverQueueEntry));
1597         MUTEX_INIT(&sq->lock, "server Queue lock", MUTEX_DEFAULT, 0);
1598         CV_INIT(&sq->cv, "server Queue lock", CV_DEFAULT, 0);
1599     }
1600
1601     MUTEX_ENTER(&rx_serverPool_lock);
1602     if (cur_service != NULL) {
1603         ReturnToServerPool(cur_service);
1604     }
1605     while (1) {
1606         if (queue_IsNotEmpty(&rx_incomingCallQueue)) {
1607             register struct rx_call *tcall, *ncall, *choice2 = NULL;
1608
1609             /* Scan for eligible incoming calls.  A call is not eligible
1610              * if the maximum number of calls for its service type are
1611              * already executing */
1612             /* One thread will process calls FCFS (to prevent starvation),
1613              * while the other threads may run ahead looking for calls which
1614              * have all their input data available immediately.  This helps
1615              * keep threads from blocking, waiting for data from the client. */
1616             for (queue_Scan(&rx_incomingCallQueue, tcall, ncall, rx_call)) {
1617                 service = tcall->conn->service;
1618                 if (!QuotaOK(service)) {
1619                     continue;
1620                 }
1621                 if (tno == rxi_fcfs_thread_num
1622                     || !tcall->queue_item_header.next) {
1623                     /* If we're the fcfs thread , then  we'll just use
1624                      * this call. If we haven't been able to find an optimal
1625                      * choice, and we're at the end of the list, then use a
1626                      * 2d choice if one has been identified.  Otherwise... */
1627                     call = (choice2 ? choice2 : tcall);
1628                     service = call->conn->service;
1629                 } else if (!queue_IsEmpty(&tcall->rq)) {
1630                     struct rx_packet *rp;
1631                     rp = queue_First(&tcall->rq, rx_packet);
1632                     if (rp->header.seq == 1) {
1633                         if (!meltdown_1pkt
1634                             || (rp->header.flags & RX_LAST_PACKET)) {
1635                             call = tcall;
1636                         } else if (rxi_2dchoice && !choice2
1637                                    && !(tcall->flags & RX_CALL_CLEARED)
1638                                    && (tcall->rprev > rxi_HardAckRate)) {
1639                             choice2 = tcall;
1640                         } else
1641                             rxi_md2cnt++;
1642                     }
1643                 }
1644                 if (call) {
1645                     break;
1646                 } else {
1647                     ReturnToServerPool(service);
1648                 }
1649             }
1650         }
1651
1652         if (call) {
1653             queue_Remove(call);
1654             MUTEX_EXIT(&rx_serverPool_lock);
1655             MUTEX_ENTER(&call->lock);
1656
1657             if (call->flags & RX_CALL_WAIT_PROC) {
1658                 call->flags &= ~RX_CALL_WAIT_PROC;
1659                 MUTEX_ENTER(&rx_stats_mutex);
1660                 rx_nWaiting--;
1661                 MUTEX_EXIT(&rx_stats_mutex);
1662             }
1663
1664             if (call->state != RX_STATE_PRECALL || call->error) {
1665                 MUTEX_EXIT(&call->lock);
1666                 MUTEX_ENTER(&rx_serverPool_lock);
1667                 ReturnToServerPool(service);
1668                 call = NULL;
1669                 continue;
1670             }
1671
1672             if (queue_IsEmpty(&call->rq)
1673                 || queue_First(&call->rq, rx_packet)->header.seq != 1)
1674                 rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
1675
1676             CLEAR_CALL_QUEUE_LOCK(call);
1677             break;
1678         } else {
1679             /* If there are no eligible incoming calls, add this process
1680              * to the idle server queue, to wait for one */
1681             sq->newcall = 0;
1682             sq->tno = tno;
1683             if (socketp) {
1684                 *socketp = OSI_NULLSOCKET;
1685             }
1686             sq->socketp = socketp;
1687             queue_Append(&rx_idleServerQueue, sq);
1688 #ifndef AFS_AIX41_ENV
1689             rx_waitForPacket = sq;
1690 #else
1691             rx_waitingForPacket = sq;
1692 #endif /* AFS_AIX41_ENV */
1693             do {
1694                 CV_WAIT(&sq->cv, &rx_serverPool_lock);
1695 #ifdef  KERNEL
1696                 if (afs_termState == AFSOP_STOP_RXCALLBACK) {
1697                     MUTEX_EXIT(&rx_serverPool_lock);
1698                     return (struct rx_call *)0;
1699                 }
1700 #endif
1701             } while (!(call = sq->newcall)
1702                      && !(socketp && *socketp != OSI_NULLSOCKET));
1703             MUTEX_EXIT(&rx_serverPool_lock);
1704             if (call) {
1705                 MUTEX_ENTER(&call->lock);
1706             }
1707             break;
1708         }
1709     }
1710
1711     MUTEX_ENTER(&freeSQEList_lock);
1712     *(struct rx_serverQueueEntry **)sq = rx_FreeSQEList;
1713     rx_FreeSQEList = sq;
1714     MUTEX_EXIT(&freeSQEList_lock);
1715
1716     if (call) {
1717         clock_GetTime(&call->startTime);
1718         call->state = RX_STATE_ACTIVE;
1719         call->mode = RX_MODE_RECEIVING;
1720 #ifdef RX_KERNEL_TRACE
1721         if (ICL_SETACTIVE(afs_iclSetp)) {
1722             int glockOwner = ISAFS_GLOCK();
1723             if (!glockOwner)
1724                 AFS_GLOCK();
1725             afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
1726                        __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER,
1727                        call);
1728             if (!glockOwner)
1729                 AFS_GUNLOCK();
1730         }
1731 #endif
1732
1733         rxi_calltrace(RX_CALL_START, call);
1734         dpf(("rx_GetCall(port=%d, service=%d) ==> call %x\n",
1735              call->conn->service->servicePort, call->conn->service->serviceId,
1736              call));
1737
1738         CALL_HOLD(call, RX_CALL_REFCOUNT_BEGIN);
1739         MUTEX_EXIT(&call->lock);
1740     } else {
1741         dpf(("rx_GetCall(socketp=0x%x, *socketp=0x%x)\n", socketp, *socketp));
1742     }
1743
1744     return call;
1745 }
1746 #else /* RX_ENABLE_LOCKS */
1747 struct rx_call *
1748 rx_GetCall(int tno, struct rx_service *cur_service, osi_socket * socketp)
1749 {
1750     struct rx_serverQueueEntry *sq;
1751     register struct rx_call *call = (struct rx_call *)0, *choice2;
1752     struct rx_service *service = NULL;
1753     SPLVAR;
1754
1755     NETPRI;
1756     MUTEX_ENTER(&freeSQEList_lock);
1757
1758     if ((sq = rx_FreeSQEList)) {
1759         rx_FreeSQEList = *(struct rx_serverQueueEntry **)sq;
1760         MUTEX_EXIT(&freeSQEList_lock);
1761     } else {                    /* otherwise allocate a new one and return that */
1762         MUTEX_EXIT(&freeSQEList_lock);
1763         sq = (struct rx_serverQueueEntry *)
1764             rxi_Alloc(sizeof(struct rx_serverQueueEntry));
1765         MUTEX_INIT(&sq->lock, "server Queue lock", MUTEX_DEFAULT, 0);
1766         CV_INIT(&sq->cv, "server Queue lock", CV_DEFAULT, 0);
1767     }
1768     MUTEX_ENTER(&sq->lock);
1769
1770     if (cur_service != NULL) {
1771         cur_service->nRequestsRunning--;
1772         if (cur_service->nRequestsRunning < cur_service->minProcs)
1773             rxi_minDeficit++;
1774         rxi_availProcs++;
1775     }
1776     if (queue_IsNotEmpty(&rx_incomingCallQueue)) {
1777         register struct rx_call *tcall, *ncall;
1778         /* Scan for eligible incoming calls.  A call is not eligible
1779          * if the maximum number of calls for its service type are
1780          * already executing */
1781         /* One thread will process calls FCFS (to prevent starvation),
1782          * while the other threads may run ahead looking for calls which
1783          * have all their input data available immediately.  This helps
1784          * keep threads from blocking, waiting for data from the client. */
1785         choice2 = (struct rx_call *)0;
1786         for (queue_Scan(&rx_incomingCallQueue, tcall, ncall, rx_call)) {
1787             service = tcall->conn->service;
1788             if (QuotaOK(service)) {
1789                 if (tno == rxi_fcfs_thread_num
1790                     || !tcall->queue_item_header.next) {
1791                     /* If we're the fcfs thread, then  we'll just use
1792                      * this call. If we haven't been able to find an optimal
1793                      * choice, and we're at the end of the list, then use a
1794                      * 2d choice if one has been identified.  Otherwise... */
1795                     call = (choice2 ? choice2 : tcall);
1796                     service = call->conn->service;
1797                 } else if (!queue_IsEmpty(&tcall->rq)) {
1798                     struct rx_packet *rp;
1799                     rp = queue_First(&tcall->rq, rx_packet);
1800                     if (rp->header.seq == 1
1801                         && (!meltdown_1pkt
1802                             || (rp->header.flags & RX_LAST_PACKET))) {
1803                         call = tcall;
1804                     } else if (rxi_2dchoice && !choice2
1805                                && !(tcall->flags & RX_CALL_CLEARED)
1806                                && (tcall->rprev > rxi_HardAckRate)) {
1807                         choice2 = tcall;
1808                     } else
1809                         rxi_md2cnt++;
1810                 }
1811             }
1812             if (call)
1813                 break;
1814         }
1815     }
1816
1817     if (call) {
1818         queue_Remove(call);
1819         /* we can't schedule a call if there's no data!!! */
1820         /* send an ack if there's no data, if we're missing the
1821          * first packet, or we're missing something between first
1822          * and last -- there's a "hole" in the incoming data. */
1823         if (queue_IsEmpty(&call->rq)
1824             || queue_First(&call->rq, rx_packet)->header.seq != 1
1825             || call->rprev != queue_Last(&call->rq, rx_packet)->header.seq)
1826             rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
1827
1828         call->flags &= (~RX_CALL_WAIT_PROC);
1829         service->nRequestsRunning++;
1830         /* just started call in minProcs pool, need fewer to maintain
1831          * guarantee */
1832         if (service->nRequestsRunning <= service->minProcs)
1833             rxi_minDeficit--;
1834         rxi_availProcs--;
1835         rx_nWaiting--;
1836         /* MUTEX_EXIT(&call->lock); */
1837     } else {
1838         /* If there are no eligible incoming calls, add this process
1839          * to the idle server queue, to wait for one */
1840         sq->newcall = 0;
1841         if (socketp) {
1842             *socketp = OSI_NULLSOCKET;
1843         }
1844         sq->socketp = socketp;
1845         queue_Append(&rx_idleServerQueue, sq);
1846         do {
1847             osi_rxSleep(sq);
1848 #ifdef  KERNEL
1849             if (afs_termState == AFSOP_STOP_RXCALLBACK) {
1850                 USERPRI;
1851                 rxi_Free(sq, sizeof(struct rx_serverQueueEntry));
1852                 return (struct rx_call *)0;
1853             }
1854 #endif
1855         } while (!(call = sq->newcall)
1856                  && !(socketp && *socketp != OSI_NULLSOCKET));
1857     }
1858     MUTEX_EXIT(&sq->lock);
1859
1860     MUTEX_ENTER(&freeSQEList_lock);
1861     *(struct rx_serverQueueEntry **)sq = rx_FreeSQEList;
1862     rx_FreeSQEList = sq;
1863     MUTEX_EXIT(&freeSQEList_lock);
1864
1865     if (call) {
1866         clock_GetTime(&call->startTime);
1867         call->state = RX_STATE_ACTIVE;
1868         call->mode = RX_MODE_RECEIVING;
1869 #ifdef RX_KERNEL_TRACE
1870         if (ICL_SETACTIVE(afs_iclSetp)) {
1871             int glockOwner = ISAFS_GLOCK();
1872             if (!glockOwner)
1873                 AFS_GLOCK();
1874             afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
1875                        __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER,
1876                        call);
1877             if (!glockOwner)
1878                 AFS_GUNLOCK();
1879         }
1880 #endif
1881
1882         rxi_calltrace(RX_CALL_START, call);
1883         dpf(("rx_GetCall(port=%d, service=%d) ==> call %x\n",
1884              call->conn->service->servicePort, call->conn->service->serviceId,
1885              call));
1886     } else {
1887         dpf(("rx_GetCall(socketp=0x%x, *socketp=0x%x)\n", socketp, *socketp));
1888     }
1889
1890     USERPRI;
1891
1892     return call;
1893 }
1894 #endif /* RX_ENABLE_LOCKS */
1895
1896
1897
1898 /* Establish a procedure to be called when a packet arrives for a
1899  * call.  This routine will be called at most once after each call,
1900  * and will also be called if there is an error condition on the or
1901  * the call is complete.  Used by multi rx to build a selection
1902  * function which determines which of several calls is likely to be a
1903  * good one to read from.
1904  * NOTE: the way this is currently implemented it is probably only a
1905  * good idea to (1) use it immediately after a newcall (clients only)
1906  * and (2) only use it once.  Other uses currently void your warranty
1907  */
1908 void
1909 rx_SetArrivalProc(register struct rx_call *call,
1910                   register void (*proc) (register struct rx_call * call,
1911                                         register void * mh,
1912                                         register int index),
1913                   register void * handle, register int arg)
1914 {
1915     call->arrivalProc = proc;
1916     call->arrivalProcHandle = handle;
1917     call->arrivalProcArg = arg;
1918 }
1919
1920 /* Call is finished (possibly prematurely).  Return rc to the peer, if
1921  * appropriate, and return the final error code from the conversation
1922  * to the caller */
1923
1924 afs_int32
1925 rx_EndCall(register struct rx_call *call, afs_int32 rc)
1926 {
1927     register struct rx_connection *conn = call->conn;
1928     register struct rx_service *service;
1929     afs_int32 error;
1930     SPLVAR;
1931
1932
1933
1934     dpf(("rx_EndCall(call %x rc %d error %d abortCode %d)\n", call, rc, call->error, call->abortCode));
1935
1936     NETPRI;
1937     MUTEX_ENTER(&call->lock);
1938
1939     if (rc == 0 && call->error == 0) {
1940         call->abortCode = 0;
1941         call->abortCount = 0;
1942     }
1943
1944     call->arrivalProc = (void (*)())0;
1945     if (rc && call->error == 0) {
1946         rxi_CallError(call, rc);
1947         /* Send an abort message to the peer if this error code has
1948          * only just been set.  If it was set previously, assume the
1949          * peer has already been sent the error code or will request it
1950          */
1951         rxi_SendCallAbort(call, (struct rx_packet *)0, 0, 0);
1952     }
1953     if (conn->type == RX_SERVER_CONNECTION) {
1954         /* Make sure reply or at least dummy reply is sent */
1955         if (call->mode == RX_MODE_RECEIVING) {
1956             rxi_WriteProc(call, 0, 0);
1957         }
1958         if (call->mode == RX_MODE_SENDING) {
1959             rxi_FlushWrite(call);
1960         }
1961         service = conn->service;
1962         rxi_calltrace(RX_CALL_END, call);
1963         /* Call goes to hold state until reply packets are acknowledged */
1964         if (call->tfirst + call->nSoftAcked < call->tnext) {
1965             call->state = RX_STATE_HOLD;
1966         } else {
1967             call->state = RX_STATE_DALLY;
1968             rxi_ClearTransmitQueue(call, 0);
1969             rxevent_Cancel(call->resendEvent, call, RX_CALL_REFCOUNT_RESEND);
1970             rxevent_Cancel(call->keepAliveEvent, call,
1971                            RX_CALL_REFCOUNT_ALIVE);
1972         }
1973     } else {                    /* Client connection */
1974         char dummy;
1975         /* Make sure server receives input packets, in the case where
1976          * no reply arguments are expected */
1977         if ((call->mode == RX_MODE_SENDING)
1978             || (call->mode == RX_MODE_RECEIVING && call->rnext == 1)) {
1979             (void)rxi_ReadProc(call, &dummy, 1);
1980         }
1981
1982         /* If we had an outstanding delayed ack, be nice to the server
1983          * and force-send it now.
1984          */
1985         if (call->delayedAckEvent) {
1986             rxevent_Cancel(call->delayedAckEvent, call,
1987                            RX_CALL_REFCOUNT_DELAY);
1988             call->delayedAckEvent = NULL;
1989             rxi_SendDelayedAck(NULL, call, NULL);
1990         }
1991
1992         /* We need to release the call lock since it's lower than the
1993          * conn_call_lock and we don't want to hold the conn_call_lock
1994          * over the rx_ReadProc call. The conn_call_lock needs to be held
1995          * here for the case where rx_NewCall is perusing the calls on
1996          * the connection structure. We don't want to signal until
1997          * rx_NewCall is in a stable state. Otherwise, rx_NewCall may
1998          * have checked this call, found it active and by the time it
1999          * goes to sleep, will have missed the signal.
2000          *
2001          * Do not clear the RX_CONN_MAKECALL_WAITING flag as long as
2002          * there are threads waiting to use the conn object.
2003          */
2004         MUTEX_EXIT(&call->lock);
2005         MUTEX_ENTER(&conn->conn_call_lock);
2006         MUTEX_ENTER(&call->lock);
2007         MUTEX_ENTER(&conn->conn_data_lock);
2008         conn->flags |= RX_CONN_BUSY;
2009         if (conn->flags & RX_CONN_MAKECALL_WAITING) {
2010             if (conn->makeCallWaiters == 0)
2011                 conn->flags &= (~RX_CONN_MAKECALL_WAITING);
2012             MUTEX_EXIT(&conn->conn_data_lock);
2013 #ifdef  RX_ENABLE_LOCKS
2014             CV_BROADCAST(&conn->conn_call_cv);
2015 #else
2016             osi_rxWakeup(conn);
2017 #endif
2018         }
2019 #ifdef RX_ENABLE_LOCKS
2020         else {
2021             MUTEX_EXIT(&conn->conn_data_lock);
2022         }
2023 #endif /* RX_ENABLE_LOCKS */
2024         call->state = RX_STATE_DALLY;
2025     }
2026     error = call->error;
2027
2028     /* currentPacket, nLeft, and NFree must be zeroed here, because
2029      * ResetCall cannot: ResetCall may be called at splnet(), in the
2030      * kernel version, and may interrupt the macros rx_Read or
2031      * rx_Write, which run at normal priority for efficiency. */
2032     if (call->currentPacket) {
2033         queue_Prepend(&call->iovq, call->currentPacket);
2034         call->currentPacket = (struct rx_packet *)0;
2035     }
2036
2037     call->nLeft = call->nFree = call->curlen = 0;
2038
2039     /* Free any packets from the last call to ReadvProc/WritevProc */
2040     rxi_FreePackets(0, &call->iovq);
2041
2042     CALL_RELE(call, RX_CALL_REFCOUNT_BEGIN);
2043     MUTEX_EXIT(&call->lock);
2044     if (conn->type == RX_CLIENT_CONNECTION) {
2045         MUTEX_EXIT(&conn->conn_call_lock);
2046         conn->flags &= ~RX_CONN_BUSY;
2047     }
2048     USERPRI;
2049     /*
2050      * Map errors to the local host's errno.h format.
2051      */
2052     error = ntoh_syserr_conv(error);
2053     return error;
2054 }
2055
2056 #if !defined(KERNEL)
2057
2058 /* Call this routine when shutting down a server or client (especially
2059  * clients).  This will allow Rx to gracefully garbage collect server
2060  * connections, and reduce the number of retries that a server might
2061  * make to a dead client.
2062  * This is not quite right, since some calls may still be ongoing and
2063  * we can't lock them to destroy them. */
2064 void
2065 rx_Finalize(void)
2066 {
2067     register struct rx_connection **conn_ptr, **conn_end;
2068
2069     INIT_PTHREAD_LOCKS;
2070     LOCK_RX_INIT;
2071     if (rxinit_status == 1) {
2072         UNLOCK_RX_INIT;
2073         return;                 /* Already shutdown. */
2074     }
2075     rxi_DeleteCachedConnections();
2076     if (rx_connHashTable) {
2077         MUTEX_ENTER(&rx_connHashTable_lock);
2078         for (conn_ptr = &rx_connHashTable[0], conn_end =
2079              &rx_connHashTable[rx_hashTableSize]; conn_ptr < conn_end;
2080              conn_ptr++) {
2081             struct rx_connection *conn, *next;
2082             for (conn = *conn_ptr; conn; conn = next) {
2083                 next = conn->next;
2084                 if (conn->type == RX_CLIENT_CONNECTION) {
2085                     /* MUTEX_ENTER(&conn->conn_data_lock); when used in kernel */
2086                     conn->refCount++;
2087                     /* MUTEX_EXIT(&conn->conn_data_lock); when used in kernel */
2088 #ifdef RX_ENABLE_LOCKS
2089                     rxi_DestroyConnectionNoLock(conn);
2090 #else /* RX_ENABLE_LOCKS */
2091                     rxi_DestroyConnection(conn);
2092 #endif /* RX_ENABLE_LOCKS */
2093                 }
2094             }
2095         }
2096 #ifdef RX_ENABLE_LOCKS
2097         while (rx_connCleanup_list) {
2098             struct rx_connection *conn;
2099             conn = rx_connCleanup_list;
2100             rx_connCleanup_list = rx_connCleanup_list->next;
2101             MUTEX_EXIT(&rx_connHashTable_lock);
2102             rxi_CleanupConnection(conn);
2103             MUTEX_ENTER(&rx_connHashTable_lock);
2104         }
2105         MUTEX_EXIT(&rx_connHashTable_lock);
2106 #endif /* RX_ENABLE_LOCKS */
2107     }
2108     rxi_flushtrace();
2109
2110 #ifdef AFS_NT40_ENV
2111     afs_winsockCleanup();
2112 #endif
2113
2114     rxinit_status = 1;
2115     UNLOCK_RX_INIT;
2116 }
2117 #endif
2118
2119 /* if we wakeup packet waiter too often, can get in loop with two
2120     AllocSendPackets each waking each other up (from ReclaimPacket calls) */
2121 void
2122 rxi_PacketsUnWait(void)
2123 {
2124     if (!rx_waitingForPackets) {
2125         return;
2126     }
2127 #ifdef KERNEL
2128     if (rxi_OverQuota(RX_PACKET_CLASS_SEND)) {
2129         return;                 /* still over quota */
2130     }
2131 #endif /* KERNEL */
2132     rx_waitingForPackets = 0;
2133 #ifdef  RX_ENABLE_LOCKS
2134     CV_BROADCAST(&rx_waitingForPackets_cv);
2135 #else
2136     osi_rxWakeup(&rx_waitingForPackets);
2137 #endif
2138     return;
2139 }
2140
2141
2142 /* ------------------Internal interfaces------------------------- */
2143
2144 /* Return this process's service structure for the
2145  * specified socket and service */
2146 struct rx_service *
2147 rxi_FindService(register osi_socket socket, register u_short serviceId)
2148 {
2149     register struct rx_service **sp;
2150     for (sp = &rx_services[0]; *sp; sp++) {
2151         if ((*sp)->serviceId == serviceId && (*sp)->socket == socket)
2152             return *sp;
2153     }
2154     return 0;
2155 }
2156
2157 /* Allocate a call structure, for the indicated channel of the
2158  * supplied connection.  The mode and state of the call must be set by
2159  * the caller. Returns the call with mutex locked. */
2160 struct rx_call *
2161 rxi_NewCall(register struct rx_connection *conn, register int channel)
2162 {
2163     register struct rx_call *call;
2164 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
2165     register struct rx_call *cp;        /* Call pointer temp */
2166     register struct rx_call *nxp;       /* Next call pointer, for queue_Scan */
2167 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2168
2169     dpf(("rxi_NewCall(conn %x, channel %d)\n", conn, channel));
2170
2171     /* Grab an existing call structure, or allocate a new one.
2172      * Existing call structures are assumed to have been left reset by
2173      * rxi_FreeCall */
2174     MUTEX_ENTER(&rx_freeCallQueue_lock);
2175
2176 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
2177     /*
2178      * EXCEPT that the TQ might not yet be cleared out.
2179      * Skip over those with in-use TQs.
2180      */
2181     call = NULL;
2182     for (queue_Scan(&rx_freeCallQueue, cp, nxp, rx_call)) {
2183         if (!(cp->flags & RX_CALL_TQ_BUSY)) {
2184             call = cp;
2185             break;
2186         }
2187     }
2188     if (call) {
2189 #else /* AFS_GLOBAL_RXLOCK_KERNEL */
2190     if (queue_IsNotEmpty(&rx_freeCallQueue)) {
2191         call = queue_First(&rx_freeCallQueue, rx_call);
2192 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2193         queue_Remove(call);
2194         rx_MutexDecrement(rx_stats.nFreeCallStructs, rx_stats_mutex);
2195         MUTEX_EXIT(&rx_freeCallQueue_lock);
2196         MUTEX_ENTER(&call->lock);
2197         CLEAR_CALL_QUEUE_LOCK(call);
2198 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
2199         /* Now, if TQ wasn't cleared earlier, do it now. */
2200         if (call->flags & RX_CALL_TQ_CLEARME) {
2201             rxi_ClearTransmitQueue(call, 0);
2202             queue_Init(&call->tq);
2203         }
2204 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2205         /* Bind the call to its connection structure */
2206         call->conn = conn;
2207         rxi_ResetCall(call, 1);
2208     } else {
2209         call = (struct rx_call *)rxi_Alloc(sizeof(struct rx_call));
2210
2211         MUTEX_EXIT(&rx_freeCallQueue_lock);
2212         MUTEX_INIT(&call->lock, "call lock", MUTEX_DEFAULT, NULL);
2213         MUTEX_ENTER(&call->lock);
2214         CV_INIT(&call->cv_twind, "call twind", CV_DEFAULT, 0);
2215         CV_INIT(&call->cv_rq, "call rq", CV_DEFAULT, 0);
2216         CV_INIT(&call->cv_tq, "call tq", CV_DEFAULT, 0);
2217
2218         rx_MutexIncrement(rx_stats.nFreeCallStructs, rx_stats_mutex);
2219         /* Initialize once-only items */
2220         queue_Init(&call->tq);
2221         queue_Init(&call->rq);
2222         queue_Init(&call->iovq);
2223         /* Bind the call to its connection structure (prereq for reset) */
2224         call->conn = conn;
2225         rxi_ResetCall(call, 1);
2226     }
2227     call->channel = channel;
2228     call->callNumber = &conn->callNumber[channel];
2229     call->rwind = conn->rwind[channel];
2230     call->twind = conn->twind[channel];
2231     /* Note that the next expected call number is retained (in
2232      * conn->callNumber[i]), even if we reallocate the call structure
2233      */
2234     conn->call[channel] = call;
2235     /* if the channel's never been used (== 0), we should start at 1, otherwise
2236      * the call number is valid from the last time this channel was used */
2237     if (*call->callNumber == 0)
2238         *call->callNumber = 1;
2239
2240     return call;
2241 }
2242
2243 /* A call has been inactive long enough that so we can throw away
2244  * state, including the call structure, which is placed on the call
2245  * free list.
2246  * Call is locked upon entry.
2247  * haveCTLock set if called from rxi_ReapConnections
2248  */
2249 #ifdef RX_ENABLE_LOCKS
2250 void
2251 rxi_FreeCall(register struct rx_call *call, int haveCTLock)
2252 #else /* RX_ENABLE_LOCKS */
2253 void
2254 rxi_FreeCall(register struct rx_call *call)
2255 #endif                          /* RX_ENABLE_LOCKS */
2256 {
2257     register int channel = call->channel;
2258     register struct rx_connection *conn = call->conn;
2259
2260
2261     if (call->state == RX_STATE_DALLY || call->state == RX_STATE_HOLD)
2262         (*call->callNumber)++;
2263     rxi_ResetCall(call, 0);
2264     call->conn->call[channel] = (struct rx_call *)0;
2265
2266     MUTEX_ENTER(&rx_freeCallQueue_lock);
2267     SET_CALL_QUEUE_LOCK(call, &rx_freeCallQueue_lock);
2268 #ifdef AFS_GLOBAL_RXLOCK_KERNEL
2269     /* A call may be free even though its transmit queue is still in use.
2270      * Since we search the call list from head to tail, put busy calls at
2271      * the head of the list, and idle calls at the tail.
2272      */
2273     if (call->flags & RX_CALL_TQ_BUSY)
2274         queue_Prepend(&rx_freeCallQueue, call);
2275     else
2276         queue_Append(&rx_freeCallQueue, call);
2277 #else /* AFS_GLOBAL_RXLOCK_KERNEL */
2278     queue_Append(&rx_freeCallQueue, call);
2279 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2280     rx_MutexIncrement(rx_stats.nFreeCallStructs, rx_stats_mutex);
2281     MUTEX_EXIT(&rx_freeCallQueue_lock);
2282
2283     /* Destroy the connection if it was previously slated for
2284      * destruction, i.e. the Rx client code previously called
2285      * rx_DestroyConnection (client connections), or
2286      * rxi_ReapConnections called the same routine (server
2287      * connections).  Only do this, however, if there are no
2288      * outstanding calls. Note that for fine grain locking, there appears
2289      * to be a deadlock in that rxi_FreeCall has a call locked and
2290      * DestroyConnectionNoLock locks each call in the conn. But note a
2291      * few lines up where we have removed this call from the conn.
2292      * If someone else destroys a connection, they either have no
2293      * call lock held or are going through this section of code.
2294      */
2295     if (conn->flags & RX_CONN_DESTROY_ME && !(conn->flags & RX_CONN_MAKECALL_WAITING)) {
2296         MUTEX_ENTER(&conn->conn_data_lock);
2297         conn->refCount++;
2298         MUTEX_EXIT(&conn->conn_data_lock);
2299 #ifdef RX_ENABLE_LOCKS
2300         if (haveCTLock)
2301             rxi_DestroyConnectionNoLock(conn);
2302         else
2303             rxi_DestroyConnection(conn);
2304 #else /* RX_ENABLE_LOCKS */
2305         rxi_DestroyConnection(conn);
2306 #endif /* RX_ENABLE_LOCKS */
2307     }
2308 }
2309
2310 afs_int32 rxi_Alloccnt = 0, rxi_Allocsize = 0;
2311 char *
2312 rxi_Alloc(register size_t size)
2313 {
2314     register char *p;
2315
2316     rx_MutexAdd1Increment2(rxi_Allocsize, (afs_int32)size, rxi_Alloccnt, rx_stats_mutex);
2317     p = (char *)osi_Alloc(size);
2318
2319     if (!p)
2320         osi_Panic("rxi_Alloc error");
2321     memset(p, 0, size);
2322     return p;
2323 }
2324
2325 void
2326 rxi_Free(void *addr, register size_t size)
2327 {
2328     rx_MutexAdd1Decrement2(rxi_Allocsize, -(afs_int32)size, rxi_Alloccnt, rx_stats_mutex);
2329     osi_Free(addr, size);
2330 }
2331
2332 void
2333 rxi_SetPeerMtu(register afs_uint32 host, register afs_uint32 port, int mtu)
2334 {
2335     struct rx_peer **peer_ptr, **peer_end;
2336     int hashIndex;
2337
2338     MUTEX_ENTER(&rx_peerHashTable_lock);
2339     if (port == 0) {
2340        for (peer_ptr = &rx_peerHashTable[0], peer_end =
2341                 &rx_peerHashTable[rx_hashTableSize]; peer_ptr < peer_end;
2342             peer_ptr++) {
2343            struct rx_peer *peer, *next;
2344            for (peer = *peer_ptr; peer; peer = next) {
2345                next = peer->next;
2346                if (host == peer->host) {
2347                    MUTEX_ENTER(&peer->peer_lock);
2348                    peer->ifMTU=MIN(mtu, peer->ifMTU);
2349                    peer->natMTU = rxi_AdjustIfMTU(peer->ifMTU);
2350                    MUTEX_EXIT(&peer->peer_lock);
2351                }
2352            }
2353        }
2354     } else {
2355        struct rx_peer *peer, *next;
2356        hashIndex = PEER_HASH(host, port);
2357        for (peer = rx_peerHashTable[hashIndex]; peer; peer = peer->next) {
2358            if ((peer->host == host) && (peer->port == port)) {
2359                MUTEX_ENTER(&peer->peer_lock);
2360                peer->ifMTU=MIN(mtu, peer->ifMTU);
2361                peer->natMTU = rxi_AdjustIfMTU(peer->ifMTU);
2362                MUTEX_EXIT(&peer->peer_lock);
2363            }
2364        }
2365     }
2366     MUTEX_EXIT(&rx_peerHashTable_lock);
2367 }
2368
2369 /* Find the peer process represented by the supplied (host,port)
2370  * combination.  If there is no appropriate active peer structure, a
2371  * new one will be allocated and initialized
2372  * The origPeer, if set, is a pointer to a peer structure on which the
2373  * refcount will be be decremented. This is used to replace the peer
2374  * structure hanging off a connection structure */
2375 struct rx_peer *
2376 rxi_FindPeer(register afs_uint32 host, register u_short port,
2377              struct rx_peer *origPeer, int create)
2378 {
2379     register struct rx_peer *pp;
2380     int hashIndex;
2381     hashIndex = PEER_HASH(host, port);
2382     MUTEX_ENTER(&rx_peerHashTable_lock);
2383     for (pp = rx_peerHashTable[hashIndex]; pp; pp = pp->next) {
2384         if ((pp->host == host) && (pp->port == port))
2385             break;
2386     }
2387     if (!pp) {
2388         if (create) {
2389             pp = rxi_AllocPeer();       /* This bzero's *pp */
2390             pp->host = host;    /* set here or in InitPeerParams is zero */
2391             pp->port = port;
2392             MUTEX_INIT(&pp->peer_lock, "peer_lock", MUTEX_DEFAULT, 0);
2393             queue_Init(&pp->congestionQueue);
2394             queue_Init(&pp->rpcStats);
2395             pp->next = rx_peerHashTable[hashIndex];
2396             rx_peerHashTable[hashIndex] = pp;
2397             rxi_InitPeerParams(pp);
2398             rx_MutexIncrement(rx_stats.nPeerStructs, rx_stats_mutex);
2399         }
2400     }
2401     if (pp && create) {
2402         pp->refCount++;
2403     }
2404     if (origPeer)
2405         origPeer->refCount--;
2406     MUTEX_EXIT(&rx_peerHashTable_lock);
2407     return pp;
2408 }
2409
2410
2411 /* Find the connection at (host, port) started at epoch, and with the
2412  * given connection id.  Creates the server connection if necessary.
2413  * The type specifies whether a client connection or a server
2414  * connection is desired.  In both cases, (host, port) specify the
2415  * peer's (host, pair) pair.  Client connections are not made
2416  * automatically by this routine.  The parameter socket gives the
2417  * socket descriptor on which the packet was received.  This is used,
2418  * in the case of server connections, to check that *new* connections
2419  * come via a valid (port, serviceId).  Finally, the securityIndex
2420  * parameter must match the existing index for the connection.  If a
2421  * server connection is created, it will be created using the supplied
2422  * index, if the index is valid for this service */
2423 struct rx_connection *
2424 rxi_FindConnection(osi_socket socket, register afs_int32 host,
2425                    register u_short port, u_short serviceId, afs_uint32 cid,
2426                    afs_uint32 epoch, int type, u_int securityIndex)
2427 {
2428     int hashindex, flag, i;
2429     register struct rx_connection *conn;
2430     hashindex = CONN_HASH(host, port, cid, epoch, type);
2431     MUTEX_ENTER(&rx_connHashTable_lock);
2432     rxLastConn ? (conn = rxLastConn, flag = 0) : (conn =
2433                                                   rx_connHashTable[hashindex],
2434                                                   flag = 1);
2435     for (; conn;) {
2436         if ((conn->type == type) && ((cid & RX_CIDMASK) == conn->cid)
2437             && (epoch == conn->epoch)) {
2438             register struct rx_peer *pp = conn->peer;
2439             if (securityIndex != conn->securityIndex) {
2440                 /* this isn't supposed to happen, but someone could forge a packet
2441                  * like this, and there seems to be some CM bug that makes this
2442                  * happen from time to time -- in which case, the fileserver
2443                  * asserts. */
2444                 MUTEX_EXIT(&rx_connHashTable_lock);
2445                 return (struct rx_connection *)0;
2446             }
2447             if (pp->host == host && pp->port == port)
2448                 break;
2449             if (type == RX_CLIENT_CONNECTION && pp->port == port)
2450                 break;
2451             /* So what happens when it's a callback connection? */
2452             if (                /*type == RX_CLIENT_CONNECTION && */
2453                    (conn->epoch & 0x80000000))
2454                 break;
2455         }
2456         if (!flag) {
2457             /* the connection rxLastConn that was used the last time is not the
2458              ** one we are looking for now. Hence, start searching in the hash */
2459             flag = 1;
2460             conn = rx_connHashTable[hashindex];
2461         } else
2462             conn = conn->next;
2463     }
2464     if (!conn) {
2465         struct rx_service *service;
2466         if (type == RX_CLIENT_CONNECTION) {
2467             MUTEX_EXIT(&rx_connHashTable_lock);
2468             return (struct rx_connection *)0;
2469         }
2470         service = rxi_FindService(socket, serviceId);
2471         if (!service || (securityIndex >= service->nSecurityObjects)
2472             || (service->securityObjects[securityIndex] == 0)) {
2473             MUTEX_EXIT(&rx_connHashTable_lock);
2474             return (struct rx_connection *)0;
2475         }
2476         conn = rxi_AllocConnection();   /* This bzero's the connection */
2477         MUTEX_INIT(&conn->conn_call_lock, "conn call lock", MUTEX_DEFAULT, 0);
2478         MUTEX_INIT(&conn->conn_data_lock, "conn data lock", MUTEX_DEFAULT, 0);
2479         CV_INIT(&conn->conn_call_cv, "conn call cv", CV_DEFAULT, 0);
2480         conn->next = rx_connHashTable[hashindex];
2481         rx_connHashTable[hashindex] = conn;
2482         conn->peer = rxi_FindPeer(host, port, 0, 1);
2483         conn->type = RX_SERVER_CONNECTION;
2484         conn->lastSendTime = clock_Sec();       /* don't GC immediately */
2485         conn->epoch = epoch;
2486         conn->cid = cid & RX_CIDMASK;
2487         /* conn->serial = conn->lastSerial = 0; */
2488         /* conn->timeout = 0; */
2489         conn->ackRate = RX_FAST_ACK_RATE;
2490         conn->service = service;
2491         conn->serviceId = serviceId;
2492         conn->securityIndex = securityIndex;
2493         conn->securityObject = service->securityObjects[securityIndex];
2494         conn->nSpecific = 0;
2495         conn->specific = NULL;
2496         rx_SetConnDeadTime(conn, service->connDeadTime);
2497         rx_SetConnIdleDeadTime(conn, service->idleDeadTime);
2498         rx_SetServerConnIdleDeadErr(conn, service->idleDeadErr);
2499         for (i = 0; i < RX_MAXCALLS; i++) {
2500             conn->twind[i] = rx_initSendWindow;
2501             conn->rwind[i] = rx_initReceiveWindow;
2502         }
2503         /* Notify security object of the new connection */
2504         RXS_NewConnection(conn->securityObject, conn);
2505         /* XXXX Connection timeout? */
2506         if (service->newConnProc)
2507             (*service->newConnProc) (conn);
2508         rx_MutexIncrement(rx_stats.nServerConns, rx_stats_mutex);
2509     }
2510
2511     MUTEX_ENTER(&conn->conn_data_lock);
2512     conn->refCount++;
2513     MUTEX_EXIT(&conn->conn_data_lock);
2514
2515     rxLastConn = conn;          /* store this connection as the last conn used */
2516     MUTEX_EXIT(&rx_connHashTable_lock);
2517     return conn;
2518 }
2519
2520 /* There are two packet tracing routines available for testing and monitoring
2521  * Rx.  One is called just after every packet is received and the other is
2522  * called just before every packet is sent.  Received packets, have had their
2523  * headers decoded, and packets to be sent have not yet had their headers
2524  * encoded.  Both take two parameters: a pointer to the packet and a sockaddr
2525  * containing the network address.  Both can be modified.  The return value, if
2526  * non-zero, indicates that the packet should be dropped.  */
2527
2528 int (*rx_justReceived) () = 0;
2529 int (*rx_almostSent) () = 0;
2530
2531 /* A packet has been received off the interface.  Np is the packet, socket is
2532  * the socket number it was received from (useful in determining which service
2533  * this packet corresponds to), and (host, port) reflect the host,port of the
2534  * sender.  This call returns the packet to the caller if it is finished with
2535  * it, rather than de-allocating it, just as a small performance hack */
2536
2537 struct rx_packet *
2538 rxi_ReceivePacket(register struct rx_packet *np, osi_socket socket,
2539                   afs_uint32 host, u_short port, int *tnop,
2540                   struct rx_call **newcallp)
2541 {
2542     register struct rx_call *call;
2543     register struct rx_connection *conn;
2544     int channel;
2545     afs_uint32 currentCallNumber;
2546     int type;
2547     int skew;
2548 #ifdef RXDEBUG
2549     char *packetType;
2550 #endif
2551     struct rx_packet *tnp;
2552
2553 #ifdef RXDEBUG
2554 /* We don't print out the packet until now because (1) the time may not be
2555  * accurate enough until now in the lwp implementation (rx_Listener only gets
2556  * the time after the packet is read) and (2) from a protocol point of view,
2557  * this is the first time the packet has been seen */
2558     packetType = (np->header.type > 0 && np->header.type < RX_N_PACKET_TYPES)
2559         ? rx_packetTypes[np->header.type - 1] : "*UNKNOWN*";
2560     dpf(("R %d %s: %x.%d.%d.%d.%d.%d.%d flags %d, packet %x",
2561          np->header.serial, packetType, ntohl(host), ntohs(port), np->header.serviceId,
2562          np->header.epoch, np->header.cid, np->header.callNumber,
2563          np->header.seq, np->header.flags, np));
2564 #endif
2565
2566     if (np->header.type == RX_PACKET_TYPE_VERSION) {
2567         return rxi_ReceiveVersionPacket(np, socket, host, port, 1);
2568     }
2569
2570     if (np->header.type == RX_PACKET_TYPE_DEBUG) {
2571         return rxi_ReceiveDebugPacket(np, socket, host, port, 1);
2572     }
2573 #ifdef RXDEBUG
2574     /* If an input tracer function is defined, call it with the packet and
2575      * network address.  Note this function may modify its arguments. */
2576     if (rx_justReceived) {
2577         struct sockaddr_in addr;
2578         int drop;
2579         addr.sin_family = AF_INET;
2580         addr.sin_port = port;
2581         addr.sin_addr.s_addr = host;
2582 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2583         addr.sin_len = sizeof(addr);
2584 #endif /* AFS_OSF_ENV */
2585         drop = (*rx_justReceived) (np, &addr);
2586         /* drop packet if return value is non-zero */
2587         if (drop)
2588             return np;
2589         port = addr.sin_port;   /* in case fcn changed addr */
2590         host = addr.sin_addr.s_addr;
2591     }
2592 #endif
2593
2594     /* If packet was not sent by the client, then *we* must be the client */
2595     type = ((np->header.flags & RX_CLIENT_INITIATED) != RX_CLIENT_INITIATED)
2596         ? RX_CLIENT_CONNECTION : RX_SERVER_CONNECTION;
2597
2598     /* Find the connection (or fabricate one, if we're the server & if
2599      * necessary) associated with this packet */
2600     conn =
2601         rxi_FindConnection(socket, host, port, np->header.serviceId,
2602                            np->header.cid, np->header.epoch, type,
2603                            np->header.securityIndex);
2604
2605     if (!conn) {
2606         /* If no connection found or fabricated, just ignore the packet.
2607          * (An argument could be made for sending an abort packet for
2608          * the conn) */
2609         return np;
2610     }
2611
2612     MUTEX_ENTER(&conn->conn_data_lock);
2613     if (conn->maxSerial < np->header.serial)
2614         conn->maxSerial = np->header.serial;
2615     MUTEX_EXIT(&conn->conn_data_lock);
2616
2617     /* If the connection is in an error state, send an abort packet and ignore
2618      * the incoming packet */
2619     if (conn->error) {
2620         /* Don't respond to an abort packet--we don't want loops! */
2621         MUTEX_ENTER(&conn->conn_data_lock);
2622         if (np->header.type != RX_PACKET_TYPE_ABORT)
2623             np = rxi_SendConnectionAbort(conn, np, 1, 0);
2624         conn->refCount--;
2625         MUTEX_EXIT(&conn->conn_data_lock);
2626         return np;
2627     }
2628
2629     /* Check for connection-only requests (i.e. not call specific). */
2630     if (np->header.callNumber == 0) {
2631         switch (np->header.type) {
2632         case RX_PACKET_TYPE_ABORT: {
2633             /* What if the supplied error is zero? */
2634             afs_int32 errcode = ntohl(rx_GetInt32(np, 0));
2635             dpf(("rxi_ReceivePacket ABORT rx_GetInt32 = %d", errcode));
2636             rxi_ConnectionError(conn, errcode);
2637             MUTEX_ENTER(&conn->conn_data_lock);
2638             conn->refCount--;
2639             MUTEX_EXIT(&conn->conn_data_lock);
2640             return np;
2641         }
2642         case RX_PACKET_TYPE_CHALLENGE:
2643             tnp = rxi_ReceiveChallengePacket(conn, np, 1);
2644             MUTEX_ENTER(&conn->conn_data_lock);
2645             conn->refCount--;
2646             MUTEX_EXIT(&conn->conn_data_lock);
2647             return tnp;
2648         case RX_PACKET_TYPE_RESPONSE:
2649             tnp = rxi_ReceiveResponsePacket(conn, np, 1);
2650             MUTEX_ENTER(&conn->conn_data_lock);
2651             conn->refCount--;
2652             MUTEX_EXIT(&conn->conn_data_lock);
2653             return tnp;
2654         case RX_PACKET_TYPE_PARAMS:
2655         case RX_PACKET_TYPE_PARAMS + 1:
2656         case RX_PACKET_TYPE_PARAMS + 2:
2657             /* ignore these packet types for now */
2658             MUTEX_ENTER(&conn->conn_data_lock);
2659             conn->refCount--;
2660             MUTEX_EXIT(&conn->conn_data_lock);
2661             return np;
2662
2663
2664         default:
2665             /* Should not reach here, unless the peer is broken: send an
2666              * abort packet */
2667             rxi_ConnectionError(conn, RX_PROTOCOL_ERROR);
2668             MUTEX_ENTER(&conn->conn_data_lock);
2669             tnp = rxi_SendConnectionAbort(conn, np, 1, 0);
2670             conn->refCount--;
2671             MUTEX_EXIT(&conn->conn_data_lock);
2672             return tnp;
2673         }
2674     }
2675
2676     channel = np->header.cid & RX_CHANNELMASK;
2677     call = conn->call[channel];
2678 #ifdef  RX_ENABLE_LOCKS
2679     if (call)
2680         MUTEX_ENTER(&call->lock);
2681     /* Test to see if call struct is still attached to conn. */
2682     if (call != conn->call[channel]) {
2683         if (call)
2684             MUTEX_EXIT(&call->lock);
2685         if (type == RX_SERVER_CONNECTION) {
2686             call = conn->call[channel];
2687             /* If we started with no call attached and there is one now,
2688              * another thread is also running this routine and has gotten
2689              * the connection channel. We should drop this packet in the tests
2690              * below. If there was a call on this connection and it's now
2691              * gone, then we'll be making a new call below.
2692              * If there was previously a call and it's now different then
2693              * the old call was freed and another thread running this routine
2694              * has created a call on this channel. One of these two threads
2695              * has a packet for the old call and the code below handles those
2696              * cases.
2697              */
2698             if (call)
2699                 MUTEX_ENTER(&call->lock);
2700         } else {
2701             /* This packet can't be for this call. If the new call address is
2702              * 0 then no call is running on this channel. If there is a call
2703              * then, since this is a client connection we're getting data for
2704              * it must be for the previous call.
2705              */
2706             rx_MutexIncrement(rx_stats.spuriousPacketsRead, rx_stats_mutex);
2707             MUTEX_ENTER(&conn->conn_data_lock);
2708             conn->refCount--;
2709             MUTEX_EXIT(&conn->conn_data_lock);
2710             return np;
2711         }
2712     }
2713 #endif
2714     currentCallNumber = conn->callNumber[channel];
2715
2716     if (type == RX_SERVER_CONNECTION) { /* We're the server */
2717         if (np->header.callNumber < currentCallNumber) {
2718             rx_MutexIncrement(rx_stats.spuriousPacketsRead, rx_stats_mutex);
2719 #ifdef  RX_ENABLE_LOCKS
2720             if (call)
2721                 MUTEX_EXIT(&call->lock);
2722 #endif
2723             MUTEX_ENTER(&conn->conn_data_lock);
2724             conn->refCount--;
2725             MUTEX_EXIT(&conn->conn_data_lock);
2726             return np;
2727         }
2728         if (!call) {
2729             MUTEX_ENTER(&conn->conn_call_lock);
2730             call = rxi_NewCall(conn, channel);
2731             MUTEX_EXIT(&conn->conn_call_lock);
2732             *call->callNumber = np->header.callNumber;
2733             if (np->header.callNumber == 0)
2734                 dpf(("RecPacket call 0 %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", np->header.serial, rx_packetTypes[np->header.type - 1], ntohl(conn->peer->host), ntohs(conn->peer->port), np->header.serial, np->header.epoch, np->header.cid, np->header.callNumber, np->header.seq, np->header.flags, (unsigned long)np, np->retryTime.sec, np->retryTime.usec / 1000, np->length));
2735
2736             call->state = RX_STATE_PRECALL;
2737             clock_GetTime(&call->queueTime);
2738             hzero(call->bytesSent);
2739             hzero(call->bytesRcvd);
2740             /*
2741              * If the number of queued calls exceeds the overload
2742              * threshold then abort this call.
2743              */
2744             if ((rx_BusyThreshold > 0) && (rx_nWaiting > rx_BusyThreshold)) {
2745                 struct rx_packet *tp;
2746
2747                 rxi_CallError(call, rx_BusyError);
2748                 tp = rxi_SendCallAbort(call, np, 1, 0);
2749                 MUTEX_EXIT(&call->lock);
2750                 MUTEX_ENTER(&conn->conn_data_lock);
2751                 conn->refCount--;
2752                 MUTEX_EXIT(&conn->conn_data_lock);
2753                 rx_MutexIncrement(rx_stats.nBusies, rx_stats_mutex);
2754                 return tp;
2755             }
2756             rxi_KeepAliveOn(call);
2757         } else if (np->header.callNumber != currentCallNumber) {
2758             /* Wait until the transmit queue is idle before deciding
2759              * whether to reset the current call. Chances are that the
2760              * call will be in ether DALLY or HOLD state once the TQ_BUSY
2761              * flag is cleared.
2762              */
2763 #ifdef AFS_GLOBAL_RXLOCK_KERNEL
2764             while ((call->state == RX_STATE_ACTIVE)
2765                    && (call->flags & RX_CALL_TQ_BUSY)) {
2766                 call->flags |= RX_CALL_TQ_WAIT;
2767                 call->tqWaiters++;
2768 #ifdef RX_ENABLE_LOCKS
2769                 osirx_AssertMine(&call->lock, "rxi_Start lock3");
2770                 CV_WAIT(&call->cv_tq, &call->lock);
2771 #else /* RX_ENABLE_LOCKS */
2772                 osi_rxSleep(&call->tq);
2773 #endif /* RX_ENABLE_LOCKS */
2774                 call->tqWaiters--;
2775                 if (call->tqWaiters == 0)
2776                     call->flags &= ~RX_CALL_TQ_WAIT;
2777             }
2778 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2779             /* If the new call cannot be taken right now send a busy and set
2780              * the error condition in this call, so that it terminates as
2781              * quickly as possible */
2782             if (call->state == RX_STATE_ACTIVE) {
2783                 struct rx_packet *tp;
2784
2785                 rxi_CallError(call, RX_CALL_DEAD);
2786                 tp = rxi_SendSpecial(call, conn, np, RX_PACKET_TYPE_BUSY,
2787                                      NULL, 0, 1);
2788                 MUTEX_EXIT(&call->lock);
2789                 MUTEX_ENTER(&conn->conn_data_lock);
2790                 conn->refCount--;
2791                 MUTEX_EXIT(&conn->conn_data_lock);
2792                 return tp;
2793             }
2794             rxi_ResetCall(call, 0);
2795             *call->callNumber = np->header.callNumber;
2796             if (np->header.callNumber == 0)
2797                 dpf(("RecPacket call 0 %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", np->header.serial, rx_packetTypes[np->header.type - 1], ntohl(conn->peer->host), ntohs(conn->peer->port), np->header.serial, np->header.epoch, np->header.cid, np->header.callNumber, np->header.seq, np->header.flags, (unsigned long)np, np->retryTime.sec, np->retryTime.usec / 1000, np->length));
2798
2799             call->state = RX_STATE_PRECALL;
2800             clock_GetTime(&call->queueTime);
2801             hzero(call->bytesSent);
2802             hzero(call->bytesRcvd);
2803             /*
2804              * If the number of queued calls exceeds the overload
2805              * threshold then abort this call.
2806              */
2807             if ((rx_BusyThreshold > 0) && (rx_nWaiting > rx_BusyThreshold)) {
2808                 struct rx_packet *tp;
2809
2810                 rxi_CallError(call, rx_BusyError);
2811                 tp = rxi_SendCallAbort(call, np, 1, 0);
2812                 MUTEX_EXIT(&call->lock);
2813                 MUTEX_ENTER(&conn->conn_data_lock);
2814                 conn->refCount--;
2815                 MUTEX_EXIT(&conn->conn_data_lock);
2816                 rx_MutexIncrement(rx_stats.nBusies, rx_stats_mutex);
2817                 return tp;
2818             }
2819             rxi_KeepAliveOn(call);
2820         } else {
2821             /* Continuing call; do nothing here. */
2822         }
2823     } else {                    /* we're the client */
2824         /* Ignore all incoming acknowledgements for calls in DALLY state */
2825         if (call && (call->state == RX_STATE_DALLY)
2826             && (np->header.type == RX_PACKET_TYPE_ACK)) {
2827             rx_MutexIncrement(rx_stats.ignorePacketDally, rx_stats_mutex);
2828 #ifdef  RX_ENABLE_LOCKS
2829             if (call) {
2830                 MUTEX_EXIT(&call->lock);
2831             }
2832 #endif
2833             MUTEX_ENTER(&conn->conn_data_lock);
2834             conn->refCount--;
2835             MUTEX_EXIT(&conn->conn_data_lock);
2836             return np;
2837         }
2838
2839         /* Ignore anything that's not relevant to the current call.  If there
2840          * isn't a current call, then no packet is relevant. */
2841         if (!call || (np->header.callNumber != currentCallNumber)) {
2842             rx_MutexIncrement(rx_stats.spuriousPacketsRead, rx_stats_mutex);
2843 #ifdef  RX_ENABLE_LOCKS
2844             if (call) {
2845                 MUTEX_EXIT(&call->lock);
2846             }
2847 #endif
2848             MUTEX_ENTER(&conn->conn_data_lock);
2849             conn->refCount--;
2850             MUTEX_EXIT(&conn->conn_data_lock);
2851             return np;
2852         }
2853         /* If the service security object index stamped in the packet does not
2854          * match the connection's security index, ignore the packet */
2855         if (np->header.securityIndex != conn->securityIndex) {
2856 #ifdef  RX_ENABLE_LOCKS
2857             MUTEX_EXIT(&call->lock);
2858 #endif
2859             MUTEX_ENTER(&conn->conn_data_lock);
2860             conn->refCount--;
2861             MUTEX_EXIT(&conn->conn_data_lock);
2862             return np;
2863         }
2864
2865         /* If we're receiving the response, then all transmit packets are
2866          * implicitly acknowledged.  Get rid of them. */
2867         if (np->header.type == RX_PACKET_TYPE_DATA) {
2868 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
2869             /* XXX Hack. Because we must release the global rx lock when
2870              * sending packets (osi_NetSend) we drop all acks while we're
2871              * traversing the tq in rxi_Start sending packets out because
2872              * packets may move to the freePacketQueue as result of being here!
2873              * So we drop these packets until we're safely out of the
2874              * traversing. Really ugly!
2875              * For fine grain RX locking, we set the acked field in the
2876              * packets and let rxi_Start remove them from the transmit queue.
2877              */
2878             if (call->flags & RX_CALL_TQ_BUSY) {
2879 #ifdef  RX_ENABLE_LOCKS
2880                 rxi_SetAcksInTransmitQueue(call);
2881 #else
2882                 conn->refCount--;
2883                 return np;      /* xmitting; drop packet */
2884 #endif
2885             } else {
2886                 rxi_ClearTransmitQueue(call, 0);
2887             }
2888 #else /* AFS_GLOBAL_RXLOCK_KERNEL */
2889             rxi_ClearTransmitQueue(call, 0);
2890 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2891         } else {
2892             if (np->header.type == RX_PACKET_TYPE_ACK) {
2893                 /* now check to see if this is an ack packet acknowledging that the
2894                  * server actually *lost* some hard-acked data.  If this happens we
2895                  * ignore this packet, as it may indicate that the server restarted in
2896                  * the middle of a call.  It is also possible that this is an old ack
2897                  * packet.  We don't abort the connection in this case, because this
2898                  * *might* just be an old ack packet.  The right way to detect a server
2899                  * restart in the midst of a call is to notice that the server epoch
2900                  * changed, btw.  */
2901                 /* XXX I'm not sure this is exactly right, since tfirst **IS**
2902                  * XXX unacknowledged.  I think that this is off-by-one, but
2903                  * XXX I don't dare change it just yet, since it will
2904                  * XXX interact badly with the server-restart detection
2905                  * XXX code in receiveackpacket.  */
2906                 if (ntohl(rx_GetInt32(np, FIRSTACKOFFSET)) < call->tfirst) {
2907                     rx_MutexIncrement(rx_stats.spuriousPacketsRead, rx_stats_mutex);
2908                     MUTEX_EXIT(&call->lock);
2909                     MUTEX_ENTER(&conn->conn_data_lock);
2910                     conn->refCount--;
2911                     MUTEX_EXIT(&conn->conn_data_lock);
2912                     return np;
2913                 }
2914             }
2915         }                       /* else not a data packet */
2916     }
2917
2918     osirx_AssertMine(&call->lock, "rxi_ReceivePacket middle");
2919     /* Set remote user defined status from packet */
2920     call->remoteStatus = np->header.userStatus;
2921
2922     /* Note the gap between the expected next packet and the actual
2923      * packet that arrived, when the new packet has a smaller serial number
2924      * than expected.  Rioses frequently reorder packets all by themselves,
2925      * so this will be quite important with very large window sizes.
2926      * Skew is checked against 0 here to avoid any dependence on the type of
2927      * inPacketSkew (which may be unsigned).  In C, -1 > (unsigned) 0 is always
2928      * true!
2929      * The inPacketSkew should be a smoothed running value, not just a maximum.  MTUXXX
2930      * see CalculateRoundTripTime for an example of how to keep smoothed values.
2931      * I think using a beta of 1/8 is probably appropriate.  93.04.21
2932      */
2933     MUTEX_ENTER(&conn->conn_data_lock);
2934     skew = conn->lastSerial - np->header.serial;
2935     conn->lastSerial = np->header.serial;
2936     MUTEX_EXIT(&conn->conn_data_lock);
2937     if (skew > 0) {
2938         register struct rx_peer *peer;
2939         peer = conn->peer;
2940         if (skew > peer->inPacketSkew) {
2941             dpf(("*** In skew changed from %d to %d\n", peer->inPacketSkew,
2942                  skew));
2943             peer->inPacketSkew = skew;
2944         }
2945     }
2946
2947     /* Now do packet type-specific processing */
2948     switch (np->header.type) {
2949     case RX_PACKET_TYPE_DATA:
2950         np = rxi_ReceiveDataPacket(call, np, 1, socket, host, port, tnop,
2951                                    newcallp);
2952         break;
2953     case RX_PACKET_TYPE_ACK:
2954         /* Respond immediately to ack packets requesting acknowledgement
2955          * (ping packets) */
2956         if (np->header.flags & RX_REQUEST_ACK) {
2957             if (call->error)
2958                 (void)rxi_SendCallAbort(call, 0, 1, 0);
2959             else
2960                 (void)rxi_SendAck(call, 0, np->header.serial,
2961                                   RX_ACK_PING_RESPONSE, 1);
2962         }
2963         np = rxi_ReceiveAckPacket(call, np, 1);
2964         break;
2965     case RX_PACKET_TYPE_ABORT: {
2966         /* An abort packet: reset the call, passing the error up to the user. */
2967         /* What if error is zero? */
2968         /* What if the error is -1? the application will treat it as a timeout. */
2969         afs_int32 errdata = ntohl(*(afs_int32 *) rx_DataOf(np));
2970         dpf(("rxi_ReceivePacket ABORT rx_DataOf = %d", errdata));
2971         rxi_CallError(call, errdata);
2972         MUTEX_EXIT(&call->lock);
2973         MUTEX_ENTER(&conn->conn_data_lock);
2974         conn->refCount--;
2975         MUTEX_EXIT(&conn->conn_data_lock);
2976         return np;              /* xmitting; drop packet */
2977     }
2978     case RX_PACKET_TYPE_BUSY:
2979         /* XXXX */
2980         break;
2981     case RX_PACKET_TYPE_ACKALL:
2982         /* All packets acknowledged, so we can drop all packets previously
2983          * readied for sending */
2984 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
2985         /* XXX Hack. We because we can't release the global rx lock when
2986          * sending packets (osi_NetSend) we drop all ack pkts while we're
2987          * traversing the tq in rxi_Start sending packets out because
2988          * packets may move to the freePacketQueue as result of being
2989          * here! So we drop these packets until we're safely out of the
2990          * traversing. Really ugly!
2991          * For fine grain RX locking, we set the acked field in the packets
2992          * and let rxi_Start remove the packets from the transmit queue.
2993          */
2994         if (call->flags & RX_CALL_TQ_BUSY) {
2995 #ifdef  RX_ENABLE_LOCKS
2996             rxi_SetAcksInTransmitQueue(call);
2997             break;
2998 #else /* RX_ENABLE_LOCKS */
2999             MUTEX_EXIT(&call->lock);
3000             MUTEX_ENTER(&conn->conn_data_lock);
3001             conn->refCount--;
3002             MUTEX_EXIT(&conn->conn_data_lock);
3003             return np;          /* xmitting; drop packet */
3004 #endif /* RX_ENABLE_LOCKS */
3005         }
3006 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
3007         rxi_ClearTransmitQueue(call, 0);
3008         rxevent_Cancel(call->keepAliveEvent, call, RX_CALL_REFCOUNT_ALIVE);
3009         break;
3010     default:
3011         /* Should not reach here, unless the peer is broken: send an abort
3012          * packet */
3013         rxi_CallError(call, RX_PROTOCOL_ERROR);
3014         np = rxi_SendCallAbort(call, np, 1, 0);
3015         break;
3016     };
3017     /* Note when this last legitimate packet was received, for keep-alive
3018      * processing.  Note, we delay getting the time until now in the hope that
3019      * the packet will be delivered to the user before any get time is required
3020      * (if not, then the time won't actually be re-evaluated here). */
3021     call->lastReceiveTime = clock_Sec();
3022     MUTEX_EXIT(&call->lock);
3023     MUTEX_ENTER(&conn->conn_data_lock);
3024     conn->refCount--;
3025     MUTEX_EXIT(&conn->conn_data_lock);
3026     return np;
3027 }
3028
3029 /* return true if this is an "interesting" connection from the point of view
3030     of someone trying to debug the system */
3031 int
3032 rxi_IsConnInteresting(struct rx_connection *aconn)
3033 {
3034     register int i;
3035     register struct rx_call *tcall;
3036
3037     if (aconn->flags & (RX_CONN_MAKECALL_WAITING | RX_CONN_DESTROY_ME))
3038         return 1;
3039     for (i = 0; i < RX_MAXCALLS; i++) {
3040         tcall = aconn->call[i];
3041         if (tcall) {
3042             if ((tcall->state == RX_STATE_PRECALL)
3043                 || (tcall->state == RX_STATE_ACTIVE))
3044                 return 1;
3045             if ((tcall->mode == RX_MODE_SENDING)
3046                 || (tcall->mode == RX_MODE_RECEIVING))
3047                 return 1;
3048         }
3049     }
3050     return 0;
3051 }
3052
3053 #ifdef KERNEL
3054 /* if this is one of the last few packets AND it wouldn't be used by the
3055    receiving call to immediately satisfy a read request, then drop it on
3056    the floor, since accepting it might prevent a lock-holding thread from
3057    making progress in its reading. If a call has been cleared while in
3058    the precall state then ignore all subsequent packets until the call
3059    is assigned to a thread. */
3060
3061 static int
3062 TooLow(struct rx_packet *ap, struct rx_call *acall)
3063 {
3064     int rc = 0;
3065     MUTEX_ENTER(&rx_stats_mutex);
3066     if (((ap->header.seq != 1) && (acall->flags & RX_CALL_CLEARED)
3067          && (acall->state == RX_STATE_PRECALL))
3068         || ((rx_nFreePackets < rxi_dataQuota + 2)
3069             && !((ap->header.seq < acall->rnext + rx_initSendWindow)
3070                  && (acall->flags & RX_CALL_READER_WAIT)))) {
3071         rc = 1;
3072     }
3073     MUTEX_EXIT(&rx_stats_mutex);
3074     return rc;
3075 }
3076 #endif /* KERNEL */
3077
3078 static void
3079 rxi_CheckReachEvent(struct rxevent *event, struct rx_connection *conn,
3080                     struct rx_call *acall)
3081 {
3082     struct rx_call *call = acall;
3083     struct clock when, now;
3084     int i, waiting;
3085
3086     MUTEX_ENTER(&conn->conn_data_lock);
3087     conn->checkReachEvent = NULL;
3088     waiting = conn->flags & RX_CONN_ATTACHWAIT;
3089     if (event)
3090         conn->refCount--;
3091     MUTEX_EXIT(&conn->conn_data_lock);
3092
3093     if (waiting) {
3094         if (!call) {
3095             MUTEX_ENTER(&conn->conn_call_lock);
3096             MUTEX_ENTER(&conn->conn_data_lock);
3097             for (i = 0; i < RX_MAXCALLS; i++) {
3098                 struct rx_call *tc = conn->call[i];
3099                 if (tc && tc->state == RX_STATE_PRECALL) {
3100                     call = tc;
3101                     break;
3102                 }
3103             }
3104             if (!call)
3105                 /* Indicate that rxi_CheckReachEvent is no longer running by
3106                  * clearing the flag.  Must be atomic under conn_data_lock to
3107                  * avoid a new call slipping by: rxi_CheckConnReach holds
3108                  * conn_data_lock while checking RX_CONN_ATTACHWAIT.
3109                  */
3110                 conn->flags &= ~RX_CONN_ATTACHWAIT;
3111             MUTEX_EXIT(&conn->conn_data_lock);
3112             MUTEX_EXIT(&conn->conn_call_lock);
3113         }
3114
3115         if (call) {
3116             if (call != acall)
3117                 MUTEX_ENTER(&call->lock);
3118             rxi_SendAck(call, NULL, 0, RX_ACK_PING, 0);
3119             if (call != acall)
3120                 MUTEX_EXIT(&call->lock);
3121
3122             clock_GetTime(&now);
3123             when = now;
3124             when.sec += RX_CHECKREACH_TIMEOUT;
3125             MUTEX_ENTER(&conn->conn_data_lock);
3126             if (!conn->checkReachEvent) {
3127                 conn->refCount++;
3128                 conn->checkReachEvent =
3129                     rxevent_PostNow(&when, &now, rxi_CheckReachEvent, conn,
3130                                     NULL);
3131             }
3132             MUTEX_EXIT(&conn->conn_data_lock);
3133         }
3134     }
3135 }
3136
3137 static int
3138 rxi_CheckConnReach(struct rx_connection *conn, struct rx_call *call)
3139 {
3140     struct rx_service *service = conn->service;
3141     struct rx_peer *peer = conn->peer;
3142     afs_uint32 now, lastReach;
3143
3144     if (service->checkReach == 0)
3145         return 0;
3146
3147     now = clock_Sec();
3148     MUTEX_ENTER(&peer->peer_lock);
3149     lastReach = peer->lastReachTime;
3150     MUTEX_EXIT(&peer->peer_lock);
3151     if (now - lastReach < RX_CHECKREACH_TTL)
3152         return 0;
3153
3154     MUTEX_ENTER(&conn->conn_data_lock);
3155     if (conn->flags & RX_CONN_ATTACHWAIT) {
3156         MUTEX_EXIT(&conn->conn_data_lock);
3157         return 1;
3158     }
3159     conn->flags |= RX_CONN_ATTACHWAIT;
3160     MUTEX_EXIT(&conn->conn_data_lock);
3161     if (!conn->checkReachEvent)
3162         rxi_CheckReachEvent(NULL, conn, call);
3163
3164     return 1;
3165 }
3166
3167 /* try to attach call, if authentication is complete */
3168 static void
3169 TryAttach(register struct rx_call *acall, register osi_socket socket,
3170           register int *tnop, register struct rx_call **newcallp,
3171           int reachOverride)
3172 {
3173     struct rx_connection *conn = acall->conn;
3174
3175     if (conn->type == RX_SERVER_CONNECTION
3176         && acall->state == RX_STATE_PRECALL) {
3177         /* Don't attach until we have any req'd. authentication. */
3178         if (RXS_CheckAuthentication(conn->securityObject, conn) == 0) {
3179             if (reachOverride || rxi_CheckConnReach(conn, acall) == 0)
3180                 rxi_AttachServerProc(acall, socket, tnop, newcallp);
3181             /* Note:  this does not necessarily succeed; there
3182              * may not any proc available
3183              */
3184         } else {
3185             rxi_ChallengeOn(acall->conn);
3186         }
3187     }
3188 }
3189
3190 /* A data packet has been received off the interface.  This packet is
3191  * appropriate to the call (the call is in the right state, etc.).  This
3192  * routine can return a packet to the caller, for re-use */
3193
3194 struct rx_packet *
3195 rxi_ReceiveDataPacket(register struct rx_call *call,
3196                       register struct rx_packet *np, int istack,
3197                       osi_socket socket, afs_uint32 host, u_short port,
3198                       int *tnop, struct rx_call **newcallp)
3199 {
3200     int ackNeeded = 0;          /* 0 means no, otherwise ack_reason */
3201     int newPackets = 0;
3202     int didHardAck = 0;
3203     int haveLast = 0;
3204     afs_uint32 seq, serial, flags;
3205     int isFirst;
3206     struct rx_packet *tnp;
3207     struct clock when, now;
3208     rx_MutexIncrement(rx_stats.dataPacketsRead, rx_stats_mutex);
3209
3210 #ifdef KERNEL
3211     /* If there are no packet buffers, drop this new packet, unless we can find
3212      * packet buffers from inactive calls */
3213     if (!call->error
3214         && (rxi_OverQuota(RX_PACKET_CLASS_RECEIVE) || TooLow(np, call))) {
3215         MUTEX_ENTER(&rx_freePktQ_lock);
3216         rxi_NeedMorePackets = TRUE;
3217         MUTEX_EXIT(&rx_freePktQ_lock);
3218         rx_MutexIncrement(rx_stats.noPacketBuffersOnRead, rx_stats_mutex);
3219         call->rprev = np->header.serial;
3220         rxi_calltrace(RX_TRACE_DROP, call);
3221         dpf(("packet %x dropped on receipt - quota problems", np));
3222         if (rxi_doreclaim)
3223             rxi_ClearReceiveQueue(call);
3224         clock_GetTime(&now);
3225         when = now;
3226         clock_Add(&when, &rx_softAckDelay);
3227         if (!call->delayedAckEvent
3228             || clock_Gt(&call->delayedAckEvent->eventTime, &when)) {
3229             rxevent_Cancel(call->delayedAckEvent, call,
3230                            RX_CALL_REFCOUNT_DELAY);
3231             CALL_HOLD(call, RX_CALL_REFCOUNT_DELAY);
3232             call->delayedAckEvent =
3233                 rxevent_PostNow(&when, &now, rxi_SendDelayedAck, call, 0);
3234         }
3235         /* we've damaged this call already, might as well do it in. */
3236         return np;
3237     }
3238 #endif /* KERNEL */
3239
3240     /*
3241      * New in AFS 3.5, if the RX_JUMBO_PACKET flag is set then this
3242      * packet is one of several packets transmitted as a single
3243      * datagram. Do not send any soft or hard acks until all packets
3244      * in a jumbogram have been processed. Send negative acks right away.
3245      */
3246     for (isFirst = 1, tnp = NULL; isFirst || tnp; isFirst = 0) {
3247         /* tnp is non-null when there are more packets in the
3248          * current jumbo gram */
3249         if (tnp) {
3250             if (np)
3251                 rxi_FreePacket(np);
3252             np = tnp;
3253         }
3254
3255         seq = np->header.seq;
3256         serial = np->header.serial;
3257         flags = np->header.flags;
3258
3259         /* If the call is in an error state, send an abort message */
3260         if (call->error)
3261             return rxi_SendCallAbort(call, np, istack, 0);
3262
3263         /* The RX_JUMBO_PACKET is set in all but the last packet in each
3264          * AFS 3.5 jumbogram. */
3265         if (flags & RX_JUMBO_PACKET) {
3266             tnp = rxi_SplitJumboPacket(np, host, port, isFirst);
3267         } else {
3268             tnp = NULL;
3269         }
3270
3271         if (np->header.spare != 0) {
3272             MUTEX_ENTER(&call->conn->conn_data_lock);
3273             call->conn->flags |= RX_CONN_USING_PACKET_CKSUM;
3274             MUTEX_EXIT(&call->conn->conn_data_lock);
3275         }
3276
3277         /* The usual case is that this is the expected next packet */
3278         if (seq == call->rnext) {
3279
3280             /* Check to make sure it is not a duplicate of one already queued */
3281             if (queue_IsNotEmpty(&call->rq)
3282                 && queue_First(&call->rq, rx_packet)->header.seq == seq) {
3283                 rx_MutexIncrement(rx_stats.dupPacketsRead, rx_stats_mutex);
3284                 dpf(("packet %x dropped on receipt - duplicate", np));
3285                 rxevent_Cancel(call->delayedAckEvent, call,
3286                                RX_CALL_REFCOUNT_DELAY);
3287                 np = rxi_SendAck(call, np, serial, RX_ACK_DUPLICATE, istack);
3288                 ackNeeded = 0;
3289                 call->rprev = seq;
3290                 continue;
3291             }
3292
3293             /* It's the next packet. Stick it on the receive queue
3294              * for this call. Set newPackets to make sure we wake
3295              * the reader once all packets have been processed */
3296             queue_Prepend(&call->rq, np);
3297             call->nSoftAcks++;
3298             np = NULL;          /* We can't use this anymore */
3299             newPackets = 1;
3300
3301             /* If an ack is requested then set a flag to make sure we
3302              * send an acknowledgement for this packet */
3303             if (flags & RX_REQUEST_ACK) {
3304                 ackNeeded = RX_ACK_REQUESTED;
3305             }
3306
3307             /* Keep track of whether we have received the last packet */
3308             if (flags & RX_LAST_PACKET) {
3309                 call->flags |= RX_CALL_HAVE_LAST;
3310                 haveLast = 1;
3311             }
3312
3313             /* Check whether we have all of the packets for this call */
3314             if (call->flags & RX_CALL_HAVE_LAST) {
3315                 afs_uint32 tseq;        /* temporary sequence number */
3316                 struct rx_packet *tp;   /* Temporary packet pointer */
3317                 struct rx_packet *nxp;  /* Next pointer, for queue_Scan */
3318
3319                 for (tseq = seq, queue_Scan(&call->rq, tp, nxp, rx_packet)) {
3320                     if (tseq != tp->header.seq)
3321                         break;
3322                     if (tp->header.flags & RX_LAST_PACKET) {
3323                         call->flags |= RX_CALL_RECEIVE_DONE;
3324                         break;
3325                     }
3326                     tseq++;
3327                 }
3328             }
3329
3330             /* Provide asynchronous notification for those who want it
3331              * (e.g. multi rx) */
3332             if (call->arrivalProc) {
3333                 (*call->arrivalProc) (call, call->arrivalProcHandle,
3334                                       call->arrivalProcArg);
3335                 call->arrivalProc = (void (*)())0;
3336             }
3337
3338             /* Update last packet received */
3339             call->rprev = seq;
3340
3341             /* If there is no server process serving this call, grab
3342              * one, if available. We only need to do this once. If a
3343              * server thread is available, this thread becomes a server
3344              * thread and the server thread becomes a listener thread. */
3345             if (isFirst) {
3346                 TryAttach(call, socket, tnop, newcallp, 0);
3347             }
3348         }
3349         /* This is not the expected next packet. */
3350         else {
3351             /* Determine whether this is a new or old packet, and if it's
3352              * a new one, whether it fits into the current receive window.
3353              * Also figure out whether the packet was delivered in sequence.
3354              * We use the prev variable to determine whether the new packet
3355              * is the successor of its immediate predecessor in the
3356              * receive queue, and the missing flag to determine whether
3357              * any of this packets predecessors are missing.  */
3358
3359             afs_uint32 prev;    /* "Previous packet" sequence number */
3360             struct rx_packet *tp;       /* Temporary packet pointer */
3361             struct rx_packet *nxp;      /* Next pointer, for queue_Scan */
3362             int missing;        /* Are any predecessors missing? */
3363
3364             /* If the new packet's sequence number has been sent to the
3365              * application already, then this is a duplicate */
3366             if (seq < call->rnext) {
3367                 rx_MutexIncrement(rx_stats.dupPacketsRead, rx_stats_mutex);
3368                 rxevent_Cancel(call->delayedAckEvent, call,
3369                                RX_CALL_REFCOUNT_DELAY);
3370                 np = rxi_SendAck(call, np, serial, RX_ACK_DUPLICATE, istack);
3371                 ackNeeded = 0;
3372                 call->rprev = seq;
3373                 continue;
3374             }
3375
3376             /* If the sequence number is greater than what can be
3377              * accomodated by the current window, then send a negative
3378              * acknowledge and drop the packet */
3379             if ((call->rnext + call->rwind) <= seq) {
3380                 rxevent_Cancel(call->delayedAckEvent, call,
3381                                RX_CALL_REFCOUNT_DELAY);
3382                 np = rxi_SendAck(call, np, serial, RX_ACK_EXCEEDS_WINDOW,
3383                                  istack);
3384                 ackNeeded = 0;
3385                 call->rprev = seq;
3386                 continue;
3387             }
3388
3389             /* Look for the packet in the queue of old received packets */
3390             for (prev = call->rnext - 1, missing =
3391                  0, queue_Scan(&call->rq, tp, nxp, rx_packet)) {
3392                 /*Check for duplicate packet */
3393                 if (seq == tp->header.seq) {
3394                     rx_MutexIncrement(rx_stats.dupPacketsRead, rx_stats_mutex);
3395                     rxevent_Cancel(call->delayedAckEvent, call,
3396                                    RX_CALL_REFCOUNT_DELAY);
3397                     np = rxi_SendAck(call, np, serial, RX_ACK_DUPLICATE,
3398                                      istack);
3399                     ackNeeded = 0;
3400                     call->rprev = seq;
3401                     goto nextloop;
3402                 }
3403                 /* If we find a higher sequence packet, break out and
3404                  * insert the new packet here. */
3405                 if (seq < tp->header.seq)
3406                     break;
3407                 /* Check for missing packet */
3408                 if (tp->header.seq != prev + 1) {
3409                     missing = 1;
3410                 }
3411
3412                 prev = tp->header.seq;
3413             }
3414
3415             /* Keep track of whether we have received the last packet. */
3416             if (flags & RX_LAST_PACKET) {
3417                 call->flags |= RX_CALL_HAVE_LAST;
3418             }
3419
3420             /* It's within the window: add it to the the receive queue.
3421              * tp is left by the previous loop either pointing at the
3422              * packet before which to insert the new packet, or at the
3423              * queue head if the queue is empty or the packet should be
3424              * appended. */
3425             queue_InsertBefore(tp, np);
3426             call->nSoftAcks++;
3427             np = NULL;
3428
3429             /* Check whether we have all of the packets for this call */
3430             if ((call->flags & RX_CALL_HAVE_LAST)
3431                 && !(call->flags & RX_CALL_RECEIVE_DONE)) {
3432                 afs_uint32 tseq;        /* temporary sequence number */
3433
3434                 for (tseq =
3435                      call->rnext, queue_Scan(&call->rq, tp, nxp, rx_packet)) {
3436                     if (tseq != tp->header.seq)
3437                         break;
3438                     if (tp->header.flags & RX_LAST_PACKET) {
3439                         call->flags |= RX_CALL_RECEIVE_DONE;
3440                         break;
3441                     }
3442                     tseq++;
3443                 }
3444             }
3445
3446             /* We need to send an ack of the packet is out of sequence,
3447              * or if an ack was requested by the peer. */
3448             if (seq != prev + 1 || missing) {
3449                 ackNeeded = RX_ACK_OUT_OF_SEQUENCE;
3450             } else if (flags & RX_REQUEST_ACK) {
3451                 ackNeeded = RX_ACK_REQUESTED;
3452             }
3453
3454             /* Acknowledge the last packet for each call */
3455             if (flags & RX_LAST_PACKET) {
3456                 haveLast = 1;
3457             }
3458
3459             call->rprev = seq;
3460         }
3461       nextloop:;
3462     }
3463
3464     if (newPackets) {
3465         /*
3466          * If the receiver is waiting for an iovec, fill the iovec
3467          * using the data from the receive queue */
3468         if (call->flags & RX_CALL_IOVEC_WAIT) {
3469             didHardAck = rxi_FillReadVec(call, serial);
3470             /* the call may have been aborted */
3471             if (call->error) {
3472                 return NULL;
3473             }
3474             if (didHardAck) {
3475                 ackNeeded = 0;
3476             }
3477         }
3478
3479         /* Wakeup the reader if any */
3480         if ((call->flags & RX_CALL_READER_WAIT)
3481             && (!(call->flags & RX_CALL_IOVEC_WAIT) || !(call->iovNBytes)
3482                 || (call->iovNext >= call->iovMax)
3483                 || (call->flags & RX_CALL_RECEIVE_DONE))) {
3484             call->flags &= ~RX_CALL_READER_WAIT;
3485 #ifdef  RX_ENABLE_LOCKS
3486             CV_BROADCAST(&call->cv_rq);
3487 #else
3488             osi_rxWakeup(&call->rq);
3489 #endif
3490         }
3491     }
3492
3493     /*
3494      * Send an ack when requested by the peer, or once every
3495      * rxi_SoftAckRate packets until the last packet has been
3496      * received. Always send a soft ack for the last packet in
3497      * the server's reply. */
3498     if (ackNeeded) {
3499         rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
3500         np = rxi_SendAck(call, np, serial, ackNeeded, istack);
3501     } else if (call->nSoftAcks > (u_short) rxi_SoftAckRate) {
3502         rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
3503         np = rxi_SendAck(call, np, serial, RX_ACK_IDLE, istack);
3504     } else if (call->nSoftAcks) {
3505         clock_GetTime(&now);
3506         when = now;
3507         if (haveLast && !(flags & RX_CLIENT_INITIATED)) {
3508             clock_Add(&when, &rx_lastAckDelay);
3509         } else {
3510             clock_Add(&when, &rx_softAckDelay);
3511         }
3512         if (!call->delayedAckEvent
3513             || clock_Gt(&call->delayedAckEvent->eventTime, &when)) {
3514             rxevent_Cancel(call->delayedAckEvent, call,
3515                            RX_CALL_REFCOUNT_DELAY);
3516             CALL_HOLD(call, RX_CALL_REFCOUNT_DELAY);
3517             call->delayedAckEvent =
3518                 rxevent_PostNow(&when, &now, rxi_SendDelayedAck, call, 0);
3519         }
3520     } else if (call->flags & RX_CALL_RECEIVE_DONE) {
3521         rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
3522     }
3523
3524     return np;
3525 }
3526
3527 #ifdef  ADAPT_WINDOW
3528 static void rxi_ComputeRate();
3529 #endif
3530
3531 static void
3532 rxi_UpdatePeerReach(struct rx_connection *conn, struct rx_call *acall)
3533 {
3534     struct rx_peer *peer = conn->peer;
3535
3536     MUTEX_ENTER(&peer->peer_lock);
3537     peer->lastReachTime = clock_Sec();
3538     MUTEX_EXIT(&peer->peer_lock);
3539
3540     MUTEX_ENTER(&conn->conn_data_lock);
3541     if (conn->flags & RX_CONN_ATTACHWAIT) {
3542         int i;
3543
3544         conn->flags &= ~RX_CONN_ATTACHWAIT;
3545         MUTEX_EXIT(&conn->conn_data_lock);
3546
3547         for (i = 0; i < RX_MAXCALLS; i++) {
3548             struct rx_call *call = conn->call[i];
3549             if (call) {
3550                 if (call != acall)
3551                     MUTEX_ENTER(&call->lock);
3552                 /* tnop can be null if newcallp is null */
3553                 TryAttach(call, (osi_socket) - 1, NULL, NULL, 1);
3554                 if (call != acall)
3555                     MUTEX_EXIT(&call->lock);
3556             }
3557         }
3558     } else
3559         MUTEX_EXIT(&conn->conn_data_lock);
3560 }
3561
3562 static const char *
3563 rx_ack_reason(int reason)
3564 {
3565     switch (reason) {
3566     case RX_ACK_REQUESTED:
3567         return "requested";
3568     case RX_ACK_DUPLICATE:
3569         return "duplicate";
3570     case RX_ACK_OUT_OF_SEQUENCE:
3571         return "sequence";
3572     case RX_ACK_EXCEEDS_WINDOW:
3573         return "window";
3574     case RX_ACK_NOSPACE:
3575         return "nospace";
3576     case RX_ACK_PING:
3577         return "ping";
3578     case RX_ACK_PING_RESPONSE:
3579         return "response";
3580     case RX_ACK_DELAY:
3581         return "delay";
3582     case RX_ACK_IDLE:
3583         return "idle";
3584     default:
3585         return "unknown!!";
3586     }
3587 }
3588
3589
3590 /* rxi_ComputePeerNetStats
3591  *
3592  * Called exclusively by rxi_ReceiveAckPacket to compute network link
3593  * estimates (like RTT and throughput) based on ack packets.  Caller
3594  * must ensure that the packet in question is the right one (i.e.
3595  * serial number matches).
3596  */
3597 static void
3598 rxi_ComputePeerNetStats(struct rx_call *call, struct rx_packet *p,
3599                         struct rx_ackPacket *ap, struct rx_packet *np)
3600 {
3601     struct rx_peer *peer = call->conn->peer;
3602
3603     /* Use RTT if not delayed by client. */
3604     if (ap->reason != RX_ACK_DELAY)
3605         rxi_ComputeRoundTripTime(p, &p->timeSent, peer);
3606 #ifdef ADAPT_WINDOW
3607     rxi_ComputeRate(peer, call, p, np, ap->reason);
3608 #endif
3609 }
3610
3611 /* The real smarts of the whole thing.  */
3612 struct rx_packet *
3613 rxi_ReceiveAckPacket(register struct rx_call *call, struct rx_packet *np,
3614                      int istack)
3615 {
3616     struct rx_ackPacket *ap;
3617     int nAcks;
3618     register struct rx_packet *tp;
3619     register struct rx_packet *nxp;     /* Next packet pointer for queue_Scan */
3620     register struct rx_connection *conn = call->conn;
3621     struct rx_peer *peer = conn->peer;
3622     afs_uint32 first;
3623     afs_uint32 serial;
3624     /* because there are CM's that are bogus, sending weird values for this. */
3625     afs_uint32 skew = 0;
3626     int nbytes;
3627     int missing;
3628     int acked;
3629     int nNacked = 0;
3630     int newAckCount = 0;
3631     u_short maxMTU = 0;         /* Set if peer supports AFS 3.4a jumbo datagrams */
3632     int maxDgramPackets = 0;    /* Set if peer supports AFS 3.5 jumbo datagrams */
3633
3634     rx_MutexIncrement(rx_stats.ackPacketsRead, rx_stats_mutex);
3635     ap = (struct rx_ackPacket *)rx_DataOf(np);
3636     nbytes = rx_Contiguous(np) - (int)((ap->acks) - (u_char *) ap);
3637     if (nbytes < 0)
3638         return np;              /* truncated ack packet */
3639
3640     /* depends on ack packet struct */
3641     nAcks = MIN((unsigned)nbytes, (unsigned)ap->nAcks);
3642     first = ntohl(ap->firstPacket);
3643     serial = ntohl(ap->serial);
3644     /* temporarily disabled -- needs to degrade over time
3645      * skew = ntohs(ap->maxSkew); */
3646
3647     /* Ignore ack packets received out of order */
3648     if (first < call->tfirst) {
3649         return np;
3650     }
3651
3652     if (np->header.flags & RX_SLOW_START_OK) {
3653         call->flags |= RX_CALL_SLOW_START_OK;
3654     }
3655
3656     if (ap->reason == RX_ACK_PING_RESPONSE)
3657         rxi_UpdatePeerReach(conn, call);
3658
3659 #ifdef RXDEBUG
3660 #ifdef AFS_NT40_ENV
3661     if (rxdebug_active) {
3662         char msg[512];
3663         size_t len;
3664
3665         len = _snprintf(msg, sizeof(msg),
3666                         "tid[%d] RACK: reason %s serial %u previous %u seq %u skew %d first %u acks %u space %u ",
3667                          GetCurrentThreadId(), rx_ack_reason(ap->reason),
3668                          ntohl(ap->serial), ntohl(ap->previousPacket),
3669                          (unsigned int)np->header.seq, (unsigned int)skew,
3670                          ntohl(ap->firstPacket), ap->nAcks, ntohs(ap->bufferSpace) );
3671         if (nAcks) {
3672             int offset;
3673
3674             for (offset = 0; offset < nAcks && len < sizeof(msg); offset++)
3675                 msg[len++] = (ap->acks[offset] == RX_ACK_TYPE_NACK ? '-' : '*');
3676         }
3677         msg[len++]='\n';
3678         msg[len] = '\0';
3679         OutputDebugString(msg);
3680     }
3681 #else /* AFS_NT40_ENV */
3682     if (rx_Log) {
3683         fprintf(rx_Log,
3684                 "RACK: reason %x previous %u seq %u serial %u skew %d first %u",
3685                 ap->reason, ntohl(ap->previousPacket),
3686                 (unsigned int)np->header.seq, (unsigned int)serial,
3687                 (unsigned int)skew, ntohl(ap->firstPacket));
3688         if (nAcks) {
3689             int offset;
3690             for (offset = 0; offset < nAcks; offset++)
3691                 putc(ap->acks[offset] == RX_ACK_TYPE_NACK ? '-' : '*',
3692                      rx_Log);
3693         }
3694         putc('\n', rx_Log);
3695     }
3696 #endif /* AFS_NT40_ENV */
3697 #endif
3698
3699     /* Update the outgoing packet skew value to the latest value of
3700      * the peer's incoming packet skew value.  The ack packet, of
3701      * course, could arrive out of order, but that won't affect things
3702      * much */
3703     MUTEX_ENTER(&peer->peer_lock);
3704     peer->outPacketSkew = skew;
3705
3706     /* Check for packets that no longer need to be transmitted, and
3707      * discard them.  This only applies to packets positively
3708      * acknowledged as having been sent to the peer's upper level.
3709      * All other packets must be retained.  So only packets with
3710      * sequence numbers < ap->firstPacket are candidates. */
3711     for (queue_Scan(&call->tq, tp, nxp, rx_packet)) {
3712         if (tp->header.seq >= first)
3713             break;
3714         call->tfirst = tp->header.seq + 1;
3715         if (serial
3716             && (tp->header.serial == serial || tp->firstSerial == serial))
3717             rxi_ComputePeerNetStats(call, tp, ap, np);
3718         if (!(tp->flags & RX_PKTFLAG_ACKED)) {
3719             newAckCount++;
3720         }
3721 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
3722         /* XXX Hack. Because we have to release the global rx lock when sending
3723          * packets (osi_NetSend) we drop all acks while we're traversing the tq
3724          * in rxi_Start sending packets out because packets may move to the
3725          * freePacketQueue as result of being here! So we drop these packets until
3726          * we're safely out of the traversing. Really ugly!
3727          * To make it even uglier, if we're using fine grain locking, we can
3728          * set the ack bits in the packets and have rxi_Start remove the packets
3729          * when it's done transmitting.
3730          */
3731         if (call->flags & RX_CALL_TQ_BUSY) {
3732 #ifdef RX_ENABLE_LOCKS
3733             tp->flags |= RX_PKTFLAG_ACKED;
3734             call->flags |= RX_CALL_TQ_SOME_ACKED;
3735 #else /* RX_ENABLE_LOCKS */
3736             break;
3737 #endif /* RX_ENABLE_LOCKS */
3738         } else
3739 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
3740         {
3741             queue_Remove(tp);
3742             rxi_FreePacket(tp); /* rxi_FreePacket mustn't wake up anyone, preemptively. */
3743         }
3744     }
3745
3746 #ifdef ADAPT_WINDOW
3747     /* Give rate detector a chance to respond to ping requests */
3748     if (ap->reason == RX_ACK_PING_RESPONSE) {
3749         rxi_ComputeRate(peer, call, 0, np, ap->reason);
3750     }
3751 #endif
3752
3753     /* N.B. we don't turn off any timers here.  They'll go away by themselves, anyway */
3754
3755     /* Now go through explicit acks/nacks and record the results in
3756      * the waiting packets.  These are packets that can't be released
3757      * yet, even with a positive acknowledge.  This positive
3758      * acknowledge only means the packet has been received by the
3759      * peer, not that it will be retained long enough to be sent to
3760      * the peer's upper level.  In addition, reset the transmit timers
3761      * of any missing packets (those packets that must be missing
3762      * because this packet was out of sequence) */
3763
3764     call->nSoftAcked = 0;
3765     for (missing = 0, queue_Scan(&call->tq, tp, nxp, rx_packet)) {
3766         /* Update round trip time if the ack was stimulated on receipt
3767          * of this packet */
3768 #ifdef AFS_GLOBAL_RXLOCK_KERNEL
3769 #ifdef RX_ENABLE_LOCKS
3770         if (tp->header.seq >= first)
3771 #endif /* RX_ENABLE_LOCKS */
3772 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
3773             if (serial
3774                 && (tp->header.serial == serial || tp->firstSerial == serial))
3775                 rxi_ComputePeerNetStats(call, tp, ap, np);
3776
3777         /* Set the acknowledge flag per packet based on the
3778          * information in the ack packet. An acknowlegded packet can
3779          * be downgraded when the server has discarded a packet it
3780          * soacked previously, or when an ack packet is received
3781          * out of sequence. */
3782         if (tp->header.seq < first) {
3783             /* Implicit ack information */
3784             if (!(tp->flags & RX_PKTFLAG_ACKED)) {
3785                 newAckCount++;
3786             }
3787             tp->flags |= RX_PKTFLAG_ACKED;
3788         } else if (tp->header.seq < first + nAcks) {
3789             /* Explicit ack information:  set it in the packet appropriately */
3790             if (ap->acks[tp->header.seq - first] == RX_ACK_TYPE_ACK) {
3791                 if (!(tp->flags & RX_PKTFLAG_ACKED)) {
3792                     newAckCount++;
3793                     tp->flags |= RX_PKTFLAG_ACKED;
3794                 }
3795                 if (missing) {
3796                     nNacked++;
3797                 } else {
3798                     call->nSoftAcked++;
3799                 }
3800             } else /* RX_ACK_TYPE_NACK */ {
3801                 tp->flags &= ~RX_PKTFLAG_ACKED;
3802                 missing = 1;
3803             }
3804         } else {
3805             tp->flags &= ~RX_PKTFLAG_ACKED;
3806             missing = 1;
3807         }
3808
3809         /* If packet isn't yet acked, and it has been transmitted at least
3810          * once, reset retransmit time using latest timeout
3811          * ie, this should readjust the retransmit timer for all outstanding
3812          * packets...  So we don't just retransmit when we should know better*/
3813
3814         if (!(tp->flags & RX_PKTFLAG_ACKED) && !clock_IsZero(&tp->retryTime)) {
3815             tp->retryTime = tp->timeSent;
3816             clock_Add(&tp->retryTime, &peer->timeout);
3817             /* shift by eight because one quarter-sec ~ 256 milliseconds */
3818             clock_Addmsec(&(tp->retryTime), ((afs_uint32) tp->backoff) << 8);
3819         }
3820     }
3821
3822     /* If the window has been extended by this acknowledge packet,
3823      * then wakeup a sender waiting in alloc for window space, or try
3824      * sending packets now, if he's been sitting on packets due to
3825      * lack of window space */
3826     if (call->tnext < (call->tfirst + call->twind)) {
3827 #ifdef  RX_ENABLE_LOCKS
3828         CV_SIGNAL(&call->cv_twind);
3829 #else
3830         if (call->flags & RX_CALL_WAIT_WINDOW_ALLOC) {
3831             call->flags &= ~RX_CALL_WAIT_WINDOW_ALLOC;
3832             osi_rxWakeup(&call->twind);
3833         }
3834 #endif
3835         if (call->flags & RX_CALL_WAIT_WINDOW_SEND) {
3836             call->flags &= ~RX_CALL_WAIT_WINDOW_SEND;
3837         }
3838     }
3839
3840     /* if the ack packet has a receivelen field hanging off it,
3841      * update our state */
3842     if (np->length >= rx_AckDataSize(ap->nAcks) + 2 * sizeof(afs_int32)) {
3843         afs_uint32 tSize;
3844
3845         /* If the ack packet has a "recommended" size that is less than
3846          * what I am using now, reduce my size to match */
3847         rx_packetread(np, rx_AckDataSize(ap->nAcks) + sizeof(afs_int32),
3848                       (int)sizeof(afs_int32), &tSize);
3849         tSize = (afs_uint32) ntohl(tSize);
3850         peer->natMTU = rxi_AdjustIfMTU(MIN(tSize, peer->ifMTU));
3851
3852         /* Get the maximum packet size to send to this peer */
3853         rx_packetread(np, rx_AckDataSize(ap->nAcks), (int)sizeof(afs_int32),
3854                       &tSize);
3855         tSize = (afs_uint32) ntohl(tSize);
3856         tSize = (afs_uint32) MIN(tSize, rx_MyMaxSendSize);
3857         tSize = rxi_AdjustMaxMTU(peer->natMTU, tSize);
3858
3859         /* sanity check - peer might have restarted with different params.
3860          * If peer says "send less", dammit, send less...  Peer should never
3861          * be unable to accept packets of the size that prior AFS versions would
3862          * send without asking.  */
3863         if (peer->maxMTU != tSize) {
3864             if (peer->maxMTU > tSize) /* possible cong., maxMTU decreased */
3865                 peer->congestSeq++;
3866             peer->maxMTU = tSize;
3867             peer->MTU = MIN(tSize, peer->MTU);
3868             call->MTU = MIN(call->MTU, tSize);
3869         }
3870
3871         if (np->length == rx_AckDataSize(ap->nAcks) + 3 * sizeof(afs_int32)) {
3872             /* AFS 3.4a */
3873             rx_packetread(np,
3874                           rx_AckDataSize(ap->nAcks) + 2 * sizeof(afs_int32),
3875                           (int)sizeof(afs_int32), &tSize);
3876             tSize = (afs_uint32) ntohl(tSize);  /* peer's receive window, if it's */
3877             if (tSize < call->twind) {  /* smaller than our send */
3878                 call->twind = tSize;    /* window, we must send less... */
3879                 call->ssthresh = MIN(call->twind, call->ssthresh);
3880                 call->conn->twind[call->channel] = call->twind;
3881             }
3882
3883             /* Only send jumbograms to 3.4a fileservers. 3.3a RX gets the
3884              * network MTU confused with the loopback MTU. Calculate the
3885              * maximum MTU here for use in the slow start code below.
3886              */
3887             maxMTU = peer->maxMTU;
3888             /* Did peer restart with older RX version? */
3889             if (peer->maxDgramPackets > 1) {
3890                 peer->maxDgramPackets = 1;
3891             }
3892         } else if (np->length >=
3893                    rx_AckDataSize(ap->nAcks) + 4 * sizeof(afs_int32)) {
3894             /* AFS 3.5 */
3895             rx_packetread(np,
3896                           rx_AckDataSize(ap->nAcks) + 2 * sizeof(afs_int32),
3897                           sizeof(afs_int32), &tSize);
3898             tSize = (afs_uint32) ntohl(tSize);
3899             /*
3900              * As of AFS 3.5 we set the send window to match the receive window.
3901              */
3902             if (tSize < call->twind) {
3903                 call->twind = tSize;
3904                 call->conn->twind[call->channel] = call->twind;
3905                 call->ssthresh = MIN(call->twind, call->ssthresh);
3906             } else if (tSize > call->twind) {
3907                 call->twind = tSize;
3908                 call->conn->twind[call->channel] = call->twind;
3909             }
3910
3911             /*
3912              * As of AFS 3.5, a jumbogram is more than one fixed size
3913              * packet transmitted in a single UDP datagram. If the remote
3914              * MTU is smaller than our local MTU then never send a datagram
3915              * larger than the natural MTU.
3916              */
3917             rx_packetread(np,
3918                           rx_AckDataSize(ap->nAcks) + 3 * sizeof(afs_int32),
3919                           sizeof(afs_int32), &tSize);
3920             maxDgramPackets = (afs_uint32) ntohl(tSize);
3921             maxDgramPackets = MIN(maxDgramPackets, rxi_nDgramPackets);
3922             maxDgramPackets = MIN(maxDgramPackets, peer->ifDgramPackets);
3923             if (peer->natMTU < peer->ifMTU)
3924                 maxDgramPackets = MIN(maxDgramPackets, rxi_AdjustDgramPackets(1, peer->natMTU));
3925             if (maxDgramPackets > 1) {
3926                 peer->maxDgramPackets = maxDgramPackets;
3927                 call->MTU = RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE;
3928             } else {
3929                 peer->maxDgramPackets = 1;
3930                 call->MTU = peer->natMTU;
3931             }
3932         } else if (peer->maxDgramPackets > 1) {
3933             /* Restarted with lower version of RX */
3934             peer->maxDgramPackets = 1;
3935         }
3936     } else if (peer->maxDgramPackets > 1
3937                || peer->maxMTU != OLD_MAX_PACKET_SIZE) {
3938         /* Restarted with lower version of RX */
3939         peer->maxMTU = OLD_MAX_PACKET_SIZE;
3940         peer->natMTU = OLD_MAX_PACKET_SIZE;
3941         peer->MTU = OLD_MAX_PACKET_SIZE;
3942         peer->maxDgramPackets = 1;
3943         peer->nDgramPackets = 1;
3944         peer->congestSeq++;
3945         call->MTU = OLD_MAX_PACKET_SIZE;
3946     }
3947
3948     if (nNacked) {
3949         /*
3950          * Calculate how many datagrams were successfully received after
3951          * the first missing packet and adjust the negative ack counter
3952          * accordingly.
3953          */
3954         call->nAcks = 0;
3955         call->nNacks++;
3956         nNacked = (nNacked + call->nDgramPackets - 1) / call->nDgramPackets;
3957         if (call->nNacks < nNacked) {
3958             call->nNacks = nNacked;
3959         }
3960     } else {
3961         call->nAcks += newAckCount;
3962         call->nNacks = 0;
3963     }
3964
3965     if (call->flags & RX_CALL_FAST_RECOVER) {
3966         if (nNacked) {
3967             call->cwind = MIN((int)(call->cwind + 1), rx_maxSendWindow);
3968         } else {
3969             call->flags &= ~RX_CALL_FAST_RECOVER;
3970             call->cwind = call->nextCwind;
3971             call->nextCwind = 0;
3972             call->nAcks = 0;
3973         }
3974         call->nCwindAcks = 0;
3975     } else if (nNacked && call->nNacks >= (u_short) rx_nackThreshold) {
3976         /* Three negative acks in a row trigger congestion recovery */
3977 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
3978         MUTEX_EXIT(&peer->peer_lock);
3979         if (call->flags & RX_CALL_FAST_RECOVER_WAIT) {
3980             /* someone else is waiting to start recovery */
3981             return np;
3982         }
3983         call->flags |= RX_CALL_FAST_RECOVER_WAIT;
3984         rxi_WaitforTQBusy(call);
3985         MUTEX_ENTER(&peer->peer_lock);
3986 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
3987         call->flags &= ~RX_CALL_FAST_RECOVER_WAIT;
3988         call->flags |= RX_CALL_FAST_RECOVER;
3989         call->ssthresh = MAX(4, MIN((int)call->cwind, (int)call->twind)) >> 1;
3990         call->cwind =
3991             MIN((int)(call->ssthresh + rx_nackThreshold), rx_maxSendWindow);
3992         call->nDgramPackets = MAX(2, (int)call->nDgramPackets) >> 1;
3993         call->nextCwind = call->ssthresh;
3994         call->nAcks = 0;
3995         call->nNacks = 0;
3996         peer->MTU = call->MTU;
3997         peer->cwind = call->nextCwind;
3998         peer->nDgramPackets = call->nDgramPackets;
3999         peer->congestSeq++;
4000         call->congestSeq = peer->congestSeq;
4001         /* Reset the resend times on the packets that were nacked
4002          * so we will retransmit as soon as the window permits*/
4003         for (acked = 0, queue_ScanBackwards(&call->tq, tp, nxp, rx_packet)) {
4004             if (acked) {
4005                 if (!(tp->flags & RX_PKTFLAG_ACKED)) {
4006                     clock_Zero(&tp->retryTime);
4007                 }
4008             } else if (tp->flags & RX_PKTFLAG_ACKED) {
4009                 acked = 1;
4010             }
4011         }
4012     } else {
4013         /* If cwind is smaller than ssthresh, then increase
4014          * the window one packet for each ack we receive (exponential
4015          * growth).
4016          * If cwind is greater than or equal to ssthresh then increase
4017          * the congestion window by one packet for each cwind acks we
4018          * receive (linear growth).  */
4019         if (call->cwind < call->ssthresh) {
4020             call->cwind =
4021                 MIN((int)call->ssthresh, (int)(call->cwind + newAckCount));
4022             call->nCwindAcks = 0;
4023         } else {
4024             call->nCwindAcks += newAckCount;
4025             if (call->nCwindAcks >= call->cwind) {
4026                 call->nCwindAcks = 0;
4027                 call->cwind = MIN((int)(call->cwind + 1), rx_maxSendWindow);
4028             }
4029         }
4030         /*
4031          * If we have received several acknowledgements in a row then
4032          * it is time to increase the size of our datagrams
4033          */
4034         if ((int)call->nAcks > rx_nDgramThreshold) {
4035             if (peer->maxDgramPackets > 1) {
4036                 if (call->nDgramPackets < peer->maxDgramPackets) {
4037                     call->nDgramPackets++;
4038                 }
4039                 call->MTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE;
4040             } else if (call->MTU < peer->maxMTU) {
4041                 call->MTU += peer->natMTU;
4042                 call->MTU = MIN(call->MTU, peer->maxMTU);
4043             }
4044             call->nAcks = 0;
4045         }
4046     }
4047
4048     MUTEX_EXIT(&peer->peer_lock);       /* rxi_Start will lock peer. */
4049
4050     /* Servers need to hold the call until all response packets have
4051      * been acknowledged. Soft acks are good enough since clients
4052      * are not allowed to clear their receive queues. */
4053     if (call->state == RX_STATE_HOLD
4054         && call->tfirst + call->nSoftAcked >= call->tnext) {
4055         call->state = RX_STATE_DALLY;
4056         rxi_ClearTransmitQueue(call, 0);
4057         rxevent_Cancel(call->keepAliveEvent, call, RX_CALL_REFCOUNT_ALIVE);
4058     } else if (!queue_IsEmpty(&call->tq)) {
4059         rxi_Start(0, call, 0, istack);
4060     }
4061     return np;
4062 }
4063
4064 /* Received a response to a challenge packet */
4065 struct rx_packet *
4066 rxi_ReceiveResponsePacket(register struct rx_connection *conn,
4067                           register struct rx_packet *np, int istack)
4068 {
4069     int error;
4070
4071     /* Ignore the packet if we're the client */
4072     if (conn->type == RX_CLIENT_CONNECTION)
4073         return np;
4074
4075     /* If already authenticated, ignore the packet (it's probably a retry) */
4076     if (RXS_CheckAuthentication(conn->securityObject, conn) == 0)
4077         return np;
4078
4079     /* Otherwise, have the security object evaluate the response packet */
4080     error = RXS_CheckResponse(conn->securityObject, conn, np);
4081     if (error) {
4082         /* If the response is invalid, reset the connection, sending
4083          * an abort to the peer */
4084 #ifndef KERNEL
4085         rxi_Delay(1);
4086 #endif
4087         rxi_ConnectionError(conn, error);
4088         MUTEX_ENTER(&conn->conn_data_lock);
4089         np = rxi_SendConnectionAbort(conn, np, istack, 0);
4090         MUTEX_EXIT(&conn->conn_data_lock);
4091         return np;
4092     } else {
4093         /* If the response is valid, any calls waiting to attach
4094          * servers can now do so */
4095         int i;
4096
4097         for (i = 0; i < RX_MAXCALLS; i++) {
4098             struct rx_call *call = conn->call[i];
4099             if (call) {
4100                 MUTEX_ENTER(&call->lock);
4101                 if (call->state == RX_STATE_PRECALL)
4102                     rxi_AttachServerProc(call, (osi_socket) - 1, NULL, NULL);
4103                 /* tnop can be null if newcallp is null */
4104                 MUTEX_EXIT(&call->lock);
4105             }
4106         }
4107
4108         /* Update the peer reachability information, just in case
4109          * some calls went into attach-wait while we were waiting
4110          * for authentication..
4111          */
4112         rxi_UpdatePeerReach(conn, NULL);
4113     }
4114     return np;
4115 }
4116
4117 /* A client has received an authentication challenge: the security
4118  * object is asked to cough up a respectable response packet to send
4119  * back to the server.  The server is responsible for retrying the
4120  * challenge if it fails to get a response. */
4121
4122 struct rx_packet *
4123 rxi_ReceiveChallengePacket(register struct rx_connection *conn,
4124                            register struct rx_packet *np, int istack)
4125 {
4126     int error;
4127
4128     /* Ignore the challenge if we're the server */
4129     if (conn->type == RX_SERVER_CONNECTION)
4130         return np;
4131
4132     /* Ignore the challenge if the connection is otherwise idle; someone's
4133      * trying to use us as an oracle. */
4134     if (!rxi_HasActiveCalls(conn))
4135         return np;
4136
4137     /* Send the security object the challenge packet.  It is expected to fill
4138      * in the response. */
4139     error = RXS_GetResponse(conn->securityObject, conn, np);
4140
4141     /* If the security object is unable to return a valid response, reset the
4142      * connection and send an abort to the peer.  Otherwise send the response
4143      * packet to the peer connection. */
4144     if (error) {
4145         rxi_ConnectionError(conn, error);
4146         MUTEX_ENTER(&conn->conn_data_lock);
4147         np = rxi_SendConnectionAbort(conn, np, istack, 0);
4148         MUTEX_EXIT(&conn->conn_data_lock);
4149     } else {
4150         np = rxi_SendSpecial((struct rx_call *)0, conn, np,
4151                              RX_PACKET_TYPE_RESPONSE, NULL, -1, istack);
4152     }
4153     return np;
4154 }
4155
4156
4157 /* Find an available server process to service the current request in
4158  * the given call structure.  If one isn't available, queue up this
4159  * call so it eventually gets one */
4160 void
4161 rxi_AttachServerProc(register struct rx_call *call,
4162                      register osi_socket socket, register int *tnop,
4163                      register struct rx_call **newcallp)
4164 {
4165     register struct rx_serverQueueEntry *sq;
4166     register struct rx_service *service = call->conn->service;
4167     register int haveQuota = 0;
4168
4169     /* May already be attached */
4170     if (call->state == RX_STATE_ACTIVE)
4171         return;
4172
4173     MUTEX_ENTER(&rx_serverPool_lock);
4174
4175     haveQuota = QuotaOK(service);
4176     if ((!haveQuota) || queue_IsEmpty(&rx_idleServerQueue)) {
4177         /* If there are no processes available to service this call,
4178          * put the call on the incoming call queue (unless it's
4179          * already on the queue).
4180          */
4181 #ifdef RX_ENABLE_LOCKS
4182         if (haveQuota)
4183             ReturnToServerPool(service);
4184 #endif /* RX_ENABLE_LOCKS */
4185
4186         if (!(call->flags & RX_CALL_WAIT_PROC)) {
4187             call->flags |= RX_CALL_WAIT_PROC;
4188             MUTEX_ENTER(&rx_stats_mutex);
4189             rx_nWaiting++;
4190             rx_nWaited++;
4191             MUTEX_EXIT(&rx_stats_mutex);
4192             rxi_calltrace(RX_CALL_ARRIVAL, call);
4193             SET_CALL_QUEUE_LOCK(call, &rx_serverPool_lock);
4194             queue_Append(&rx_incomingCallQueue, call);
4195         }
4196     } else {
4197         sq = queue_First(&rx_idleServerQueue, rx_serverQueueEntry);
4198
4199         /* If hot threads are enabled, and both newcallp and sq->socketp
4200          * are non-null, then this thread will process the call, and the
4201          * idle server thread will start listening on this threads socket.
4202          */
4203         queue_Remove(sq);
4204         if (rx_enable_hot_thread && newcallp && sq->socketp) {
4205             *newcallp = call;
4206             *tnop = sq->tno;
4207             *sq->socketp = socket;
4208             clock_GetTime(&call->startTime);
4209             CALL_HOLD(call, RX_CALL_REFCOUNT_BEGIN);
4210         } else {
4211             sq->newcall = call;
4212         }
4213         if (call->flags & RX_CALL_WAIT_PROC) {
4214             /* Conservative:  I don't think this should happen */
4215             call->flags &= ~RX_CALL_WAIT_PROC;
4216             if (queue_IsOnQueue(call)) {
4217                 queue_Remove(call);
4218                 MUTEX_ENTER(&rx_stats_mutex);
4219                 rx_nWaiting--;
4220                 MUTEX_EXIT(&rx_stats_mutex);
4221             }
4222         }
4223         call->state = RX_STATE_ACTIVE;
4224         call->mode = RX_MODE_RECEIVING;
4225 #ifdef RX_KERNEL_TRACE
4226         {
4227             int glockOwner = ISAFS_GLOCK();
4228             if (!glockOwner)
4229                 AFS_GLOCK();
4230             afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
4231                        __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER,
4232                        call);
4233             if (!glockOwner)
4234                 AFS_GUNLOCK();
4235         }
4236 #endif
4237         if (call->flags & RX_CALL_CLEARED) {
4238             /* send an ack now to start the packet flow up again */
4239             call->flags &= ~RX_CALL_CLEARED;
4240             rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
4241         }
4242 #ifdef  RX_ENABLE_LOCKS
4243         CV_SIGNAL(&sq->cv);
4244 #else
4245         service->nRequestsRunning++;
4246         if (service->nRequestsRunning <= service->minProcs)
4247             rxi_minDeficit--;
4248         rxi_availProcs--;
4249         osi_rxWakeup(sq);
4250 #endif
4251     }
4252     MUTEX_EXIT(&rx_serverPool_lock);
4253 }
4254
4255 /* Delay the sending of an acknowledge event for a short while, while
4256  * a new call is being prepared (in the case of a client) or a reply
4257  * is being prepared (in the case of a server).  Rather than sending
4258  * an ack packet, an ACKALL packet is sent. */
4259 void
4260 rxi_AckAll(struct rxevent *event, register struct rx_call *call, char *dummy)
4261 {
4262 #ifdef RX_ENABLE_LOCKS
4263     if (event) {
4264         MUTEX_ENTER(&call->lock);
4265         call->delayedAckEvent = NULL;
4266         CALL_RELE(call, RX_CALL_REFCOUNT_ACKALL);
4267     }
4268     rxi_SendSpecial(call, call->conn, (struct rx_packet *)0,
4269                     RX_PACKET_TYPE_ACKALL, NULL, 0, 0);
4270     if (event)
4271         MUTEX_EXIT(&call->lock);
4272 #else /* RX_ENABLE_LOCKS */
4273     if (event)
4274         call->delayedAckEvent = NULL;
4275     rxi_SendSpecial(call, call->conn, (struct rx_packet *)0,
4276                     RX_PACKET_TYPE_ACKALL, NULL, 0, 0);
4277 #endif /* RX_ENABLE_LOCKS */
4278 }
4279
4280 void
4281 rxi_SendDelayedAck(struct rxevent *event, register struct rx_call *call,
4282                    char *dummy)
4283 {
4284 #ifdef RX_ENABLE_LOCKS
4285     if (event) {
4286         MUTEX_ENTER(&call->lock);
4287         if (event == call->delayedAckEvent)
4288             call->delayedAckEvent = NULL;
4289         CALL_RELE(call, RX_CALL_REFCOUNT_DELAY);
4290     }
4291     (void)rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
4292     if (event)
4293         MUTEX_EXIT(&call->lock);
4294 #else /* RX_ENABLE_LOCKS */
4295     if (event)
4296         call->delayedAckEvent = NULL;
4297     (void)rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
4298 #endif /* RX_ENABLE_LOCKS */
4299 }
4300
4301
4302 #ifdef RX_ENABLE_LOCKS
4303 /* Set ack in all packets in transmit queue. rxi_Start will deal with
4304  * clearing them out.
4305  */
4306 static void
4307 rxi_SetAcksInTransmitQueue(register struct rx_call *call)
4308 {
4309     register struct rx_packet *p, *tp;
4310     int someAcked = 0;
4311
4312     for (queue_Scan(&call->tq, p, tp, rx_packet)) {
4313         p->flags |= RX_PKTFLAG_ACKED;
4314         someAcked = 1;
4315     }
4316     if (someAcked) {
4317         call->flags |= RX_CALL_TQ_CLEARME;
4318         call->flags |= RX_CALL_TQ_SOME_ACKED;
4319     }
4320
4321     rxevent_Cancel(call->resendEvent, call, RX_CALL_REFCOUNT_RESEND);
4322     call->tfirst = call->tnext;
4323     call->nSoftAcked = 0;
4324
4325     if (call->flags & RX_CALL_FAST_RECOVER) {
4326         call->flags &= ~RX_CALL_FAST_RECOVER;
4327         call->cwind = call->nextCwind;
4328         call->nextCwind = 0;
4329     }
4330
4331     CV_SIGNAL(&call->cv_twind);
4332 }
4333 #endif /* RX_ENABLE_LOCKS */
4334
4335 /* Clear out the transmit queue for the current call (all packets have
4336  * been received by peer) */
4337 void
4338 rxi_ClearTransmitQueue(register struct rx_call *call, register int force)
4339 {
4340 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
4341     register struct rx_packet *p, *tp;
4342
4343     if (!force && (call->flags & RX_CALL_TQ_BUSY)) {
4344         int someAcked = 0;
4345         for (queue_Scan(&call->tq, p, tp, rx_packet)) {
4346             p->flags |= RX_PKTFLAG_ACKED;
4347             someAcked = 1;
4348         }
4349         if (someAcked) {
4350             call->flags |= RX_CALL_TQ_CLEARME;
4351             call->flags |= RX_CALL_TQ_SOME_ACKED;
4352         }
4353     } else {
4354 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
4355         rxi_FreePackets(0, &call->tq);
4356 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
4357         call->flags &= ~RX_CALL_TQ_CLEARME;
4358     }
4359 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
4360
4361     rxevent_Cancel(call->resendEvent, call, RX_CALL_REFCOUNT_RESEND);
4362     call->tfirst = call->tnext; /* implicitly acknowledge all data already sent */
4363     call->nSoftAcked = 0;
4364
4365     if (call->flags & RX_CALL_FAST_RECOVER) {
4366         call->flags &= ~RX_CALL_FAST_RECOVER;
4367         call->cwind = call->nextCwind;
4368     }
4369 #ifdef  RX_ENABLE_LOCKS
4370     CV_SIGNAL(&call->cv_twind);
4371 #else
4372     osi_rxWakeup(&call->twind);
4373 #endif
4374 }
4375
4376 void
4377 rxi_ClearReceiveQueue(register struct rx_call *call)
4378 {
4379     if (queue_IsNotEmpty(&call->rq)) {
4380         rx_packetReclaims += rxi_FreePackets(0, &call->rq);
4381         call->flags &= ~(RX_CALL_RECEIVE_DONE | RX_CALL_HAVE_LAST);
4382     }
4383     if (call->state == RX_STATE_PRECALL) {
4384         call->flags |= RX_CALL_CLEARED;
4385     }
4386 }
4387
4388 /* Send an abort packet for the specified call */
4389 struct rx_packet *
4390 rxi_SendCallAbort(register struct rx_call *call, struct rx_packet *packet,
4391                   int istack, int force)
4392 {
4393     afs_int32 error;
4394     struct clock when, now;
4395
4396     if (!call->error)
4397         return packet;
4398
4399     /* Clients should never delay abort messages */
4400     if (rx_IsClientConn(call->conn))
4401         force = 1;
4402
4403     if (call->abortCode != call->error) {
4404         call->abortCode = call->error;
4405         call->abortCount = 0;
4406     }
4407
4408     if (force || rxi_callAbortThreshhold == 0
4409         || call->abortCount < rxi_callAbortThreshhold) {
4410         if (call->delayedAbortEvent) {
4411             rxevent_Cancel(call->delayedAbortEvent, call,
4412                            RX_CALL_REFCOUNT_ABORT);
4413         }
4414         error = htonl(call->error);
4415         call->abortCount++;
4416         packet =
4417             rxi_SendSpecial(call, call->conn, packet, RX_PACKET_TYPE_ABORT,
4418                             (char *)&error, sizeof(error), istack);
4419     } else if (!call->delayedAbortEvent) {
4420         clock_GetTime(&now);
4421         when = now;
4422         clock_Addmsec(&when, rxi_callAbortDelay);
4423         CALL_HOLD(call, RX_CALL_REFCOUNT_ABORT);
4424         call->delayedAbortEvent =
4425             rxevent_PostNow(&when, &now, rxi_SendDelayedCallAbort, call, 0);
4426     }
4427     return packet;
4428 }
4429
4430 /* Send an abort packet for the specified connection.  Packet is an
4431  * optional pointer to a packet that can be used to send the abort.
4432  * Once the number of abort messages reaches the threshhold, an
4433  * event is scheduled to send the abort. Setting the force flag
4434  * overrides sending delayed abort messages.
4435  *
4436  * NOTE: Called with conn_data_lock held. conn_data_lock is dropped
4437  *       to send the abort packet.
4438  */
4439 struct rx_packet *
4440 rxi_SendConnectionAbort(register struct rx_connection *conn,
4441                         struct rx_packet *packet, int istack, int force)
4442 {
4443     afs_int32 error;
4444     struct clock when, now;
4445
4446     if (!conn->error)
4447         return packet;
4448
4449     /* Clients should never delay abort messages */
4450     if (rx_IsClientConn(conn))
4451         force = 1;
4452
4453     if (force || rxi_connAbortThreshhold == 0
4454         || conn->abortCount < rxi_connAbortThreshhold) {
4455         if (conn->delayedAbortEvent) {
4456             rxevent_Cancel(conn->delayedAbortEvent, (struct rx_call *)0, 0);
4457         }
4458         error = htonl(conn->error);
4459         conn->abortCount++;
4460         MUTEX_EXIT(&conn->conn_data_lock);
4461         packet =
4462             rxi_SendSpecial((struct rx_call *)0, conn, packet,
4463                             RX_PACKET_TYPE_ABORT, (char *)&error,
4464                             sizeof(error), istack);
4465         MUTEX_ENTER(&conn->conn_data_lock);
4466     } else if (!conn->delayedAbortEvent) {
4467         clock_GetTime(&now);
4468         when = now;
4469         clock_Addmsec(&when, rxi_connAbortDelay);
4470         conn->delayedAbortEvent =
4471             rxevent_PostNow(&when, &now, rxi_SendDelayedConnAbort, conn, 0);
4472     }
4473     return packet;
4474 }
4475
4476 /* Associate an error all of the calls owned by a connection.  Called
4477  * with error non-zero.  This is only for really fatal things, like
4478  * bad authentication responses.  The connection itself is set in
4479  * error at this point, so that future packets received will be
4480  * rejected. */
4481 void
4482 rxi_ConnectionError(register struct rx_connection *conn,
4483                     register afs_int32 error)
4484 {
4485     if (error) {
4486         register int i;
4487
4488         dpf(("rxi_ConnectionError conn %x error %d", conn, error));
4489
4490         MUTEX_ENTER(&conn->conn_data_lock);
4491         if (conn->challengeEvent)
4492             rxevent_Cancel(conn->challengeEvent, (struct rx_call *)0, 0);
4493         if (conn->checkReachEvent) {
4494             rxevent_Cancel(conn->checkReachEvent, (struct rx_call *)0, 0);
4495             conn->checkReachEvent = 0;
4496             conn->flags &= ~RX_CONN_ATTACHWAIT;
4497             conn->refCount--;
4498         }
4499         MUTEX_EXIT(&conn->conn_data_lock);
4500         for (i = 0; i < RX_MAXCALLS; i++) {
4501             struct rx_call *call = conn->call[i];
4502             if (call) {
4503                 MUTEX_ENTER(&call->lock);
4504                 rxi_CallError(call, error);
4505                 MUTEX_EXIT(&call->lock);
4506             }
4507         }
4508         conn->error = error;
4509         rx_MutexIncrement(rx_stats.fatalErrors, rx_stats_mutex);
4510     }
4511 }
4512
4513 void
4514 rxi_CallError(register struct rx_call *call, afs_int32 error)
4515 {
4516     dpf(("rxi_CallError call %x error %d call->error %d", call, error, call->error));
4517     if (call->error)
4518         error = call->error;
4519
4520 #ifdef RX_GLOBAL_RXLOCK_KERNEL
4521     if (!((call->flags & RX_CALL_TQ_BUSY) || (call->tqWaiters > 0))) {
4522         rxi_ResetCall(call, 0);
4523     }
4524 #else
4525     rxi_ResetCall(call, 0);
4526 #endif
4527     call->error = error;
4528     call->mode = RX_MODE_ERROR;
4529 }
4530
4531 /* Reset various fields in a call structure, and wakeup waiting
4532  * processes.  Some fields aren't changed: state & mode are not
4533  * touched (these must be set by the caller), and bufptr, nLeft, and
4534  * nFree are not reset, since these fields are manipulated by
4535  * unprotected macros, and may only be reset by non-interrupting code.
4536  */
4537 #ifdef ADAPT_WINDOW
4538 /* this code requires that call->conn be set properly as a pre-condition. */
4539 #endif /* ADAPT_WINDOW */
4540
4541 void
4542 rxi_ResetCall(register struct rx_call *call, register int newcall)
4543 {
4544     register int flags;
4545     register struct rx_peer *peer;
4546     struct rx_packet *packet;
4547
4548     dpf(("rxi_ResetCall(call %x, newcall %d)\n", call, newcall));
4549
4550     /* Notify anyone who is waiting for asynchronous packet arrival */
4551     if (call->arrivalProc) {
4552         (*call->arrivalProc) (call, call->arrivalProcHandle,
4553                               call->arrivalProcArg);
4554         call->arrivalProc = (void (*)())0;
4555     }
4556
4557     if (call->delayedAbortEvent) {
4558         rxevent_Cancel(call->delayedAbortEvent, call, RX_CALL_REFCOUNT_ABORT);
4559         packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
4560         if (packet) {
4561             rxi_SendCallAbort(call, packet, 0, 1);
4562             rxi_FreePacket(packet);
4563         }
4564     }
4565
4566     /*
4567      * Update the peer with the congestion information in this call
4568      * so other calls on this connection can pick up where this call
4569      * left off. If the congestion sequence numbers don't match then
4570      * another call experienced a retransmission.
4571      */
4572     peer = call->conn->peer;
4573     MUTEX_ENTER(&peer->peer_lock);
4574     if (!newcall) {
4575         if (call->congestSeq == peer->congestSeq) {
4576             peer->cwind = MAX(peer->cwind, call->cwind);
4577             peer->MTU = MAX(peer->MTU, call->MTU);
4578             peer->nDgramPackets =
4579                 MAX(peer->nDgramPackets, call->nDgramPackets);
4580         }
4581     } else {
4582         call->abortCode = 0;
4583         call->abortCount = 0;
4584     }
4585     if (peer->maxDgramPackets > 1) {
4586         call->MTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE;
4587     } else {
4588         call->MTU = peer->MTU;
4589     }
4590     call->cwind = MIN((int)peer->cwind, (int)peer->nDgramPackets);
4591     call->ssthresh = rx_maxSendWindow;
4592     call->nDgramPackets = peer->nDgramPackets;
4593     call->congestSeq = peer->congestSeq;
4594     MUTEX_EXIT(&peer->peer_lock);
4595
4596     flags = call->flags;
4597     rxi_ClearReceiveQueue(call);
4598 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
4599     if (flags & RX_CALL_TQ_BUSY) {
4600         call->flags = RX_CALL_TQ_CLEARME | RX_CALL_TQ_BUSY;
4601         call->flags |= (flags & RX_CALL_TQ_WAIT);
4602     } else
4603 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
4604     {
4605         rxi_ClearTransmitQueue(call, 0);
4606         queue_Init(&call->tq);
4607         if (call->tqWaiters || (flags & RX_CALL_TQ_WAIT)) {
4608             dpf(("rcall %x has %d waiters and flags %d\n", call, call->tqWaiters, call->flags));
4609         }
4610         call->flags = 0;
4611         while (call->tqWaiters) {
4612 #ifdef RX_ENABLE_LOCKS
4613             CV_BROADCAST(&call->cv_tq);
4614 #else /* RX_ENABLE_LOCKS */
4615             osi_rxWakeup(&call->tq);
4616 #endif /* RX_ENABLE_LOCKS */
4617             call->tqWaiters--;
4618         }
4619     }
4620     queue_Init(&call->rq);
4621     call->error = 0;
4622     call->twind = call->conn->twind[call->channel];
4623     call->rwind = call->conn->rwind[call->channel];
4624     call->nSoftAcked = 0;
4625     call->nextCwind = 0;
4626     call->nAcks = 0;
4627     call->nNacks = 0;
4628     call->nCwindAcks = 0;
4629     call->nSoftAcks = 0;
4630     call->nHardAcks = 0;
4631
4632     call->tfirst = call->rnext = call->tnext = 1;
4633     call->rprev = 0;
4634     call->lastAcked = 0;
4635     call->localStatus = call->remoteStatus = 0;
4636
4637     if (flags & RX_CALL_READER_WAIT) {
4638 #ifdef  RX_ENABLE_LOCKS
4639         CV_BROADCAST(&call->cv_rq);
4640 #else
4641         osi_rxWakeup(&call->rq);
4642 #endif
4643     }
4644     if (flags & RX_CALL_WAIT_PACKETS) {
4645         MUTEX_ENTER(&rx_freePktQ_lock);
4646         rxi_PacketsUnWait();    /* XXX */
4647         MUTEX_EXIT(&rx_freePktQ_lock);
4648     }
4649 #ifdef  RX_ENABLE_LOCKS
4650     CV_SIGNAL(&call->cv_twind);
4651 #else
4652     if (flags & RX_CALL_WAIT_WINDOW_ALLOC)
4653         osi_rxWakeup(&call->twind);
4654 #endif
4655
4656 #ifdef RX_ENABLE_LOCKS
4657     /* The following ensures that we don't mess with any queue while some
4658      * other thread might also be doing so. The call_queue_lock field is
4659      * is only modified under the call lock. If the call is in the process
4660      * of being removed from a queue, the call is not locked until the
4661      * the queue lock is dropped and only then is the call_queue_lock field
4662      * zero'd out. So it's safe to lock the queue if call_queue_lock is set.
4663      * Note that any other routine which removes a call from a queue has to
4664      * obtain the queue lock before examing the queue and removing the call.
4665      */
4666     if (call->call_queue_lock) {
4667         MUTEX_ENTER(call->call_queue_lock);
4668         if (queue_IsOnQueue(call)) {
4669             queue_Remove(call);
4670             if (flags & RX_CALL_WAIT_PROC) {
4671                 MUTEX_ENTER(&rx_stats_mutex);
4672                 rx_nWaiting--;
4673                 MUTEX_EXIT(&rx_stats_mutex);
4674             }
4675         }
4676         MUTEX_EXIT(call->call_queue_lock);
4677         CLEAR_CALL_QUEUE_LOCK(call);
4678     }
4679 #else /* RX_ENABLE_LOCKS */
4680     if (queue_IsOnQueue(call)) {
4681         queue_Remove(call);
4682         if (flags & RX_CALL_WAIT_PROC)
4683             rx_nWaiting--;
4684     }
4685 #endif /* RX_ENABLE_LOCKS */
4686
4687     rxi_KeepAliveOff(call);
4688     rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
4689 }
4690
4691 /* Send an acknowledge for the indicated packet (seq,serial) of the
4692  * indicated call, for the indicated reason (reason).  This
4693  * acknowledge will specifically acknowledge receiving the packet, and
4694  * will also specify which other packets for this call have been
4695  * received.  This routine returns the packet that was used to the
4696  * caller.  The caller is responsible for freeing it or re-using it.
4697  * This acknowledgement also returns the highest sequence number
4698  * actually read out by the higher level to the sender; the sender
4699  * promises to keep around packets that have not been read by the
4700  * higher level yet (unless, of course, the sender decides to abort
4701  * the call altogether).  Any of p, seq, serial, pflags, or reason may
4702  * be set to zero without ill effect.  That is, if they are zero, they
4703  * will not convey any information.
4704  * NOW there is a trailer field, after the ack where it will safely be
4705  * ignored by mundanes, which indicates the maximum size packet this
4706  * host can swallow.  */
4707 /*
4708     register struct rx_packet *optionalPacket;  use to send ack (or null)
4709     int seq;                     Sequence number of the packet we are acking
4710     int serial;                  Serial number of the packet
4711     int pflags;                  Flags field from packet header
4712     int reason;                  Reason an acknowledge was prompted
4713 */
4714
4715 struct rx_packet *
4716 rxi_SendAck(register struct rx_call *call,
4717             register struct rx_packet *optionalPacket, int serial, int reason,
4718             int istack)
4719 {
4720     struct rx_ackPacket *ap;
4721     register struct rx_packet *rqp;
4722     register struct rx_packet *nxp;     /* For queue_Scan */
4723     register struct rx_packet *p;
4724     u_char offset;
4725     afs_int32 templ;
4726 #ifdef RX_ENABLE_TSFPQ
4727     struct rx_ts_info_t * rx_ts_info;
4728 #endif
4729
4730     /*
4731      * Open the receive window once a thread starts reading packets
4732      */
4733     if (call->rnext > 1) {
4734         call->conn->rwind[call->channel] = call->rwind = rx_maxReceiveWindow;
4735     }
4736
4737     call->nHardAcks = 0;
4738     call->nSoftAcks = 0;
4739     if (call->rnext > call->lastAcked)
4740         call->lastAcked = call->rnext;
4741     p = optionalPacket;
4742
4743     if (p) {
4744         rx_computelen(p, p->length);    /* reset length, you never know */
4745     } /* where that's been...         */
4746 #ifdef RX_ENABLE_TSFPQ
4747     else {
4748         RX_TS_INFO_GET(rx_ts_info);
4749         if ((p = rx_ts_info->local_special_packet)) {
4750             rx_computelen(p, p->length);
4751         } else if ((p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL))) {
4752             rx_ts_info->local_special_packet = p;
4753         } else { /* We won't send the ack, but don't panic. */
4754             return optionalPacket;
4755         }
4756     }
4757 #else
4758     else if (!(p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL))) {
4759         /* We won't send the ack, but don't panic. */
4760         return optionalPacket;
4761     }
4762 #endif
4763
4764     templ =
4765         rx_AckDataSize(call->rwind) + 4 * sizeof(afs_int32) -
4766         rx_GetDataSize(p);
4767     if (templ > 0) {
4768         if (rxi_AllocDataBuf(p, templ, RX_PACKET_CLASS_SPECIAL) > 0) {
4769 #ifndef RX_ENABLE_TSFPQ
4770             if (!optionalPacket)
4771                 rxi_FreePacket(p);
4772 #endif
4773             return optionalPacket;
4774         }
4775         templ = rx_AckDataSize(call->rwind) + 2 * sizeof(afs_int32);
4776         if (rx_Contiguous(p) < templ) {
4777 #ifndef RX_ENABLE_TSFPQ
4778             if (!optionalPacket)
4779                 rxi_FreePacket(p);
4780 #endif
4781             return optionalPacket;
4782         }
4783     }
4784
4785
4786     /* MTUXXX failing to send an ack is very serious.  We should */
4787     /* try as hard as possible to send even a partial ack; it's */
4788     /* better than nothing. */
4789     ap = (struct rx_ackPacket *)rx_DataOf(p);
4790     ap->bufferSpace = htonl(0); /* Something should go here, sometime */
4791     ap->reason = reason;
4792
4793     /* The skew computation used to be bogus, I think it's better now. */
4794     /* We should start paying attention to skew.    XXX  */
4795     ap->serial = htonl(serial);
4796     ap->maxSkew = 0;            /* used to be peer->inPacketSkew */
4797
4798     ap->firstPacket = htonl(call->rnext);       /* First packet not yet forwarded to reader */
4799     ap->previousPacket = htonl(call->rprev);    /* Previous packet received */
4800
4801     /* No fear of running out of ack packet here because there can only be at most
4802      * one window full of unacknowledged packets.  The window size must be constrained
4803      * to be less than the maximum ack size, of course.  Also, an ack should always
4804      * fit into a single packet -- it should not ever be fragmented.  */
4805     for (offset = 0, queue_Scan(&call->rq, rqp, nxp, rx_packet)) {
4806         if (!rqp || !call->rq.next
4807             || (rqp->header.seq > (call->rnext + call->rwind))) {
4808 #ifndef RX_ENABLE_TSFPQ
4809             if (!optionalPacket)
4810                 rxi_FreePacket(p);
4811 #endif
4812             rxi_CallError(call, RX_CALL_DEAD);
4813             return optionalPacket;
4814         }
4815
4816         while (rqp->header.seq > call->rnext + offset)
4817             ap->acks[offset++] = RX_ACK_TYPE_NACK;
4818         ap->acks[offset++] = RX_ACK_TYPE_ACK;
4819
4820         if ((offset > (u_char) rx_maxReceiveWindow) || (offset > call->rwind)) {
4821 #ifndef RX_ENABLE_TSFPQ
4822             if (!optionalPacket)
4823                 rxi_FreePacket(p);
4824 #endif
4825             rxi_CallError(call, RX_CALL_DEAD);
4826             return optionalPacket;
4827         }
4828     }
4829
4830     ap->nAcks = offset;
4831     p->length = rx_AckDataSize(offset) + 4 * sizeof(afs_int32);
4832
4833     /* these are new for AFS 3.3 */
4834     templ = rxi_AdjustMaxMTU(call->conn->peer->ifMTU, rx_maxReceiveSize);
4835     templ = htonl(templ);
4836     rx_packetwrite(p, rx_AckDataSize(offset), sizeof(afs_int32), &templ);
4837     templ = htonl(call->conn->peer->ifMTU);
4838     rx_packetwrite(p, rx_AckDataSize(offset) + sizeof(afs_int32),
4839                    sizeof(afs_int32), &templ);
4840
4841     /* new for AFS 3.4 */
4842     templ = htonl(call->rwind);
4843     rx_packetwrite(p, rx_AckDataSize(offset) + 2 * sizeof(afs_int32),
4844                    sizeof(afs_int32), &templ);
4845
4846     /* new for AFS 3.5 */
4847     templ = htonl(call->conn->peer->ifDgramPackets);
4848     rx_packetwrite(p, rx_AckDataSize(offset) + 3 * sizeof(afs_int32),
4849                    sizeof(afs_int32), &templ);
4850
4851     p->header.serviceId = call->conn->serviceId;
4852     p->header.cid = (call->conn->cid | call->channel);
4853     p->header.callNumber = *call->callNumber;
4854     p->header.seq = 0;
4855     p->header.securityIndex = call->conn->securityIndex;
4856     p->header.epoch = call->conn->epoch;
4857     p->header.type = RX_PACKET_TYPE_ACK;
4858     p->header.flags = RX_SLOW_START_OK;
4859     if (reason == RX_ACK_PING) {
4860         p->header.flags |= RX_REQUEST_ACK;
4861 #ifdef ADAPT_WINDOW
4862         clock_GetTime(&call->pingRequestTime);
4863 #endif
4864     }
4865     if (call->conn->type == RX_CLIENT_CONNECTION)
4866         p->header.flags |= RX_CLIENT_INITIATED;
4867
4868 #ifdef RXDEBUG
4869 #ifdef AFS_NT40_ENV
4870     if (rxdebug_active) {
4871         char msg[512];
4872         size_t len;
4873
4874         len = _snprintf(msg, sizeof(msg),
4875                         "tid[%d] SACK: reason %s serial %u previous %u seq %u first %u acks %u space %u ",
4876                          GetCurrentThreadId(), rx_ack_reason(ap->reason),
4877                          ntohl(ap->serial), ntohl(ap->previousPacket),
4878                          (unsigned int)p->header.seq, ntohl(ap->firstPacket),
4879                          ap->nAcks, ntohs(ap->bufferSpace) );
4880         if (ap->nAcks) {
4881             int offset;
4882
4883             for (offset = 0; offset < ap->nAcks && len < sizeof(msg); offset++)
4884                 msg[len++] = (ap->acks[offset] == RX_ACK_TYPE_NACK ? '-' : '*');
4885         }
4886         msg[len++]='\n';
4887         msg[len] = '\0';
4888         OutputDebugString(msg);
4889     }
4890 #else /* AFS_NT40_ENV */
4891     if (rx_Log) {
4892         fprintf(rx_Log, "SACK: reason %x previous %u seq %u first %u ",
4893                 ap->reason, ntohl(ap->previousPacket),
4894                 (unsigned int)p->header.seq, ntohl(ap->firstPacket));
4895         if (ap->nAcks) {
4896             for (offset = 0; offset < ap->nAcks; offset++)
4897                 putc(ap->acks[offset] == RX_ACK_TYPE_NACK ? '-' : '*',
4898                      rx_Log);
4899         }
4900         putc('\n', rx_Log);
4901     }
4902 #endif /* AFS_NT40_ENV */
4903 #endif
4904     {
4905         register int i, nbytes = p->length;
4906
4907         for (i = 1; i < p->niovecs; i++) {      /* vec 0 is ALWAYS header */
4908             if (nbytes <= p->wirevec[i].iov_len) {
4909                 register int savelen, saven;
4910
4911                 savelen = p->wirevec[i].iov_len;
4912                 saven = p->niovecs;
4913                 p->wirevec[i].iov_len = nbytes;
4914                 p->niovecs = i + 1;
4915                 rxi_Send(call, p, istack);
4916                 p->wirevec[i].iov_len = savelen;
4917                 p->niovecs = saven;
4918                 break;
4919             } else
4920                 nbytes -= p->wirevec[i].iov_len;
4921         }
4922     }
4923     rx_MutexIncrement(rx_stats.ackPacketsSent, rx_stats_mutex);
4924 #ifndef RX_ENABLE_TSFPQ
4925     if (!optionalPacket)
4926         rxi_FreePacket(p);
4927 #endif
4928     return optionalPacket;      /* Return packet for re-use by caller */
4929 }
4930
4931 /* Send all of the packets in the list in single datagram */
4932 static void
4933 rxi_SendList(struct rx_call *call, struct rx_packet **list, int len,
4934              int istack, int moreFlag, struct clock *now,
4935              struct clock *retryTime, int resending)
4936 {
4937     int i;
4938     int requestAck = 0;
4939     int lastPacket = 0;
4940     struct rx_connection *conn = call->conn;
4941     struct rx_peer *peer = conn->peer;
4942
4943     MUTEX_ENTER(&peer->peer_lock);
4944     peer->nSent += len;
4945     if (resending)
4946         peer->reSends += len;
4947     rx_MutexIncrement(rx_stats.dataPacketsSent, rx_stats_mutex);
4948     MUTEX_EXIT(&peer->peer_lock);
4949
4950     if (list[len - 1]->header.flags & RX_LAST_PACKET) {
4951         lastPacket = 1;
4952     }
4953
4954     /* Set the packet flags and schedule the resend events */
4955     /* Only request an ack for the last packet in the list */
4956     for (i = 0; i < len; i++) {
4957         list[i]->retryTime = *retryTime;
4958         if (list[i]->header.serial) {
4959             /* Exponentially backoff retry times */
4960             if (list[i]->backoff < MAXBACKOFF) {
4961                 /* so it can't stay == 0 */
4962                 list[i]->backoff = (list[i]->backoff << 1) + 1;
4963             } else
4964                 list[i]->backoff++;
4965             clock_Addmsec(&(list[i]->retryTime),
4966                           ((afs_uint32) list[i]->backoff) << 8);
4967         }
4968
4969         /* Wait a little extra for the ack on the last packet */
4970         if (lastPacket && !(list[i]->header.flags & RX_CLIENT_INITIATED)) {
4971             clock_Addmsec(&(list[i]->retryTime), 400);
4972         }
4973
4974         /* Record the time sent */
4975         list[i]->timeSent = *now;
4976
4977         /* Ask for an ack on retransmitted packets,  on every other packet
4978          * if the peer doesn't support slow start. Ask for an ack on every
4979          * packet until the congestion window reaches the ack rate. */
4980         if (list[i]->header.serial) {
4981             requestAck = 1;
4982             rx_MutexIncrement(rx_stats.dataPacketsReSent, rx_stats_mutex);
4983         } else {
4984             /* improved RTO calculation- not Karn */
4985             list[i]->firstSent = *now;
4986             if (!lastPacket && (call->cwind <= (u_short) (conn->ackRate + 1)
4987                                 || (!(call->flags & RX_CALL_SLOW_START_OK)
4988                                     && (list[i]->header.seq & 1)))) {
4989                 requestAck = 1;
4990             }
4991         }
4992
4993         MUTEX_ENTER(&peer->peer_lock);
4994         peer->nSent++;
4995         if (resending)
4996             peer->reSends++;
4997         rx_MutexIncrement(rx_stats.dataPacketsSent, rx_stats_mutex);
4998         MUTEX_EXIT(&peer->peer_lock);
4999
5000         /* Tag this packet as not being the last in this group,
5001          * for the receiver's benefit */
5002         if (i < len - 1 || moreFlag) {
5003             list[i]->header.flags |= RX_MORE_PACKETS;
5004         }
5005
5006         /* Install the new retransmit time for the packet, and
5007          * record the time sent */
5008         list[i]->timeSent = *now;
5009     }
5010
5011     if (requestAck) {
5012         list[len - 1]->header.flags |= RX_REQUEST_ACK;
5013     }
5014
5015     /* Since we're about to send a data packet to the peer, it's
5016      * safe to nuke any scheduled end-of-packets ack */
5017     rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
5018
5019     CALL_HOLD(call, RX_CALL_REFCOUNT_SEND);
5020     MUTEX_EXIT(&call->lock);
5021     if (len > 1) {
5022         rxi_SendPacketList(call, conn, list, len, istack);
5023     } else {
5024         rxi_SendPacket(call, conn, list[0], istack);
5025     }
5026     MUTEX_ENTER(&call->lock);
5027     CALL_RELE(call, RX_CALL_REFCOUNT_SEND);
5028
5029     /* Update last send time for this call (for keep-alive
5030      * processing), and for the connection (so that we can discover
5031      * idle connections) */
5032     call->lastSendData = conn->lastSendTime = call->lastSendTime = clock_Sec();
5033 }
5034
5035 /* When sending packets we need to follow these rules:
5036  * 1. Never send more than maxDgramPackets in a jumbogram.
5037  * 2. Never send a packet with more than two iovecs in a jumbogram.
5038  * 3. Never send a retransmitted packet in a jumbogram.
5039  * 4. Never send more than cwind/4 packets in a jumbogram
5040  * We always keep the last list we should have sent so we
5041  * can set the RX_MORE_PACKETS flags correctly.
5042  */
5043 static void
5044 rxi_SendXmitList(struct rx_call *call, struct rx_packet **list, int len,
5045                  int istack, struct clock *now, struct clock *retryTime,
5046                  int resending)
5047 {
5048     int i, cnt, lastCnt = 0;
5049     struct rx_packet **listP, **lastP = 0;
5050     struct rx_peer *peer = call->conn->peer;
5051     int morePackets = 0;
5052
5053     for (cnt = 0, listP = &list[0], i = 0; i < len; i++) {
5054         /* Does the current packet force us to flush the current list? */
5055         if (cnt > 0
5056             && (list[i]->header.serial || (list[i]->flags & RX_PKTFLAG_ACKED)
5057                 || list[i]->length > RX_JUMBOBUFFERSIZE)) {
5058             if (lastCnt > 0) {
5059                 rxi_SendList(call, lastP, lastCnt, istack, 1, now, retryTime,
5060                              resending);
5061                 /* If the call enters an error state stop sending, or if
5062                  * we entered congestion recovery mode, stop sending */
5063                 if (call->error || (call->flags & RX_CALL_FAST_RECOVER_WAIT))
5064                     return;
5065             }
5066             lastP = listP;
5067             lastCnt = cnt;
5068             listP = &list[i];
5069             cnt = 0;
5070         }
5071         /* Add the current packet to the list if it hasn't been acked.
5072          * Otherwise adjust the list pointer to skip the current packet.  */
5073         if (!(list[i]->flags & RX_PKTFLAG_ACKED)) {
5074             cnt++;
5075             /* Do we need to flush the list? */
5076             if (cnt >= (int)peer->maxDgramPackets
5077                 || cnt >= (int)call->nDgramPackets || cnt >= (int)call->cwind
5078                 || list[i]->header.serial
5079                 || list[i]->length != RX_JUMBOBUFFERSIZE) {
5080                 if (lastCnt > 0) {
5081                     rxi_SendList(call, lastP, lastCnt, istack, 1, now,
5082                                  retryTime, resending);
5083                     /* If the call enters an error state stop sending, or if
5084                      * we entered congestion recovery mode, stop sending */
5085                     if (call->error
5086                         || (call->flags & RX_CALL_FAST_RECOVER_WAIT))
5087                         return;
5088                 }
5089                 lastP = listP;
5090                 lastCnt = cnt;
5091                 listP = &list[i + 1];
5092                 cnt = 0;
5093             }
5094         } else {
5095             if (cnt != 0) {
5096                 osi_Panic("rxi_SendList error");
5097             }
5098             listP = &list[i + 1];
5099         }
5100     }
5101
5102     /* Send the whole list when the call is in receive mode, when
5103      * the call is in eof mode, when we are in fast recovery mode,
5104      * and when we have the last packet */
5105     if ((list[len - 1]->header.flags & RX_LAST_PACKET)
5106         || call->mode == RX_MODE_RECEIVING || call->mode == RX_MODE_EOF
5107         || (call->flags & RX_CALL_FAST_RECOVER)) {
5108         /* Check for the case where the current list contains
5109          * an acked packet. Since we always send retransmissions
5110          * in a separate packet, we only need to check the first
5111          * packet in the list */
5112         if (cnt > 0 && !(listP[0]->flags & RX_PKTFLAG_ACKED)) {
5113             morePackets = 1;
5114         }
5115         if (lastCnt > 0) {
5116             rxi_SendList(call, lastP, lastCnt, istack, morePackets, now,
5117                          retryTime, resending);
5118             /* If the call enters an error state stop sending, or if
5119              * we entered congestion recovery mode, stop sending */
5120             if (call->error || (call->flags & RX_CALL_FAST_RECOVER_WAIT))
5121                 return;
5122         }
5123         if (morePackets) {
5124             rxi_SendList(call, listP, cnt, istack, 0, now, retryTime,
5125                          resending);
5126         }
5127     } else if (lastCnt > 0) {
5128         rxi_SendList(call, lastP, lastCnt, istack, 0, now, retryTime,
5129                      resending);
5130     }
5131 }
5132
5133 #ifdef  RX_ENABLE_LOCKS
5134 /* Call rxi_Start, below, but with the call lock held. */
5135 void
5136 rxi_StartUnlocked(struct rxevent *event, register struct rx_call *call,
5137                   void *arg1, int istack)
5138 {
5139     MUTEX_ENTER(&call->lock);
5140     rxi_Start(event, call, arg1, istack);
5141     MUTEX_EXIT(&call->lock);
5142 }
5143 #endif /* RX_ENABLE_LOCKS */
5144
5145 /* This routine is called when new packets are readied for
5146  * transmission and when retransmission may be necessary, or when the
5147  * transmission window or burst count are favourable.  This should be
5148  * better optimized for new packets, the usual case, now that we've
5149  * got rid of queues of send packets. XXXXXXXXXXX */
5150 void
5151 rxi_Start(struct rxevent *event, register struct rx_call *call,
5152           void *arg1, int istack)
5153 {
5154     struct rx_packet *p;
5155     register struct rx_packet *nxp;     /* Next pointer for queue_Scan */
5156     struct rx_peer *peer = call->conn->peer;
5157     struct clock now, usenow, retryTime;
5158     int haveEvent;
5159     int nXmitPackets;
5160     int maxXmitPackets;
5161     struct rx_packet **xmitList;
5162     int resending = 0;
5163
5164     /* If rxi_Start is being called as a result of a resend event,
5165      * then make sure that the event pointer is removed from the call
5166      * structure, since there is no longer a per-call retransmission
5167      * event pending. */
5168     if (event && event == call->resendEvent) {
5169         CALL_RELE(call, RX_CALL_REFCOUNT_RESEND);
5170         call->resendEvent = NULL;
5171         resending = 1;
5172         if (queue_IsEmpty(&call->tq)) {
5173             /* Nothing to do */
5174             return;
5175         }
5176         /* Timeouts trigger congestion recovery */
5177 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5178         if (call->flags & RX_CALL_FAST_RECOVER_WAIT) {
5179             /* someone else is waiting to start recovery */
5180             return;
5181         }
5182         call->flags |= RX_CALL_FAST_RECOVER_WAIT;
5183         rxi_WaitforTQBusy(call);
5184 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
5185         call->flags &= ~RX_CALL_FAST_RECOVER_WAIT;
5186         call->flags |= RX_CALL_FAST_RECOVER;
5187         if (peer->maxDgramPackets > 1) {
5188             call->MTU = RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE;
5189         } else {
5190             call->MTU = MIN(peer->natMTU, peer->maxMTU);
5191         }
5192         call->ssthresh = MAX(4, MIN((int)call->cwind, (int)call->twind)) >> 1;
5193         call->nDgramPackets = 1;
5194         call->cwind = 1;
5195         call->nextCwind = 1;
5196         call->nAcks = 0;
5197         call->nNacks = 0;
5198         MUTEX_ENTER(&peer->peer_lock);
5199         peer->MTU = call->MTU;
5200         peer->cwind = call->cwind;
5201         peer->nDgramPackets = 1;
5202         peer->congestSeq++;
5203         call->congestSeq = peer->congestSeq;
5204         MUTEX_EXIT(&peer->peer_lock);
5205         /* Clear retry times on packets. Otherwise, it's possible for
5206          * some packets in the queue to force resends at rates faster
5207          * than recovery rates.
5208          */
5209         for (queue_Scan(&call->tq, p, nxp, rx_packet)) {
5210             if (!(p->flags & RX_PKTFLAG_ACKED)) {
5211                 clock_Zero(&p->retryTime);
5212             }
5213         }
5214     }
5215     if (call->error) {
5216 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5217         rx_MutexIncrement(rx_tq_debug.rxi_start_in_error, rx_stats_mutex);
5218 #endif
5219         return;
5220     }
5221
5222     if (queue_IsNotEmpty(&call->tq)) {  /* If we have anything to send */
5223         /* Get clock to compute the re-transmit time for any packets
5224          * in this burst.  Note, if we back off, it's reasonable to
5225          * back off all of the packets in the same manner, even if
5226          * some of them have been retransmitted more times than more
5227          * recent additions.
5228          * Do a dance to avoid blocking after setting now. */
5229         clock_Zero(&retryTime);
5230         MUTEX_ENTER(&peer->peer_lock);
5231         clock_Add(&retryTime, &peer->timeout);
5232         MUTEX_EXIT(&peer->peer_lock);
5233         clock_GetTime(&now);
5234         clock_Add(&retryTime, &now);
5235         usenow = now;
5236         /* Send (or resend) any packets that need it, subject to
5237          * window restrictions and congestion burst control
5238          * restrictions.  Ask for an ack on the last packet sent in
5239          * this burst.  For now, we're relying upon the window being
5240          * considerably bigger than the largest number of packets that
5241          * are typically sent at once by one initial call to
5242          * rxi_Start.  This is probably bogus (perhaps we should ask
5243          * for an ack when we're half way through the current
5244          * window?).  Also, for non file transfer applications, this
5245          * may end up asking for an ack for every packet.  Bogus. XXXX
5246          */
5247         /*
5248          * But check whether we're here recursively, and let the other guy
5249          * do the work.
5250          */
5251 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5252         if (!(call->flags & RX_CALL_TQ_BUSY)) {
5253             call->flags |= RX_CALL_TQ_BUSY;
5254             do {
5255 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
5256             restart:
5257 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5258                 call->flags &= ~RX_CALL_NEED_START;
5259 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
5260                 nXmitPackets = 0;
5261                 maxXmitPackets = MIN(call->twind, call->cwind);
5262                 xmitList = (struct rx_packet **)
5263                     osi_Alloc(maxXmitPackets * sizeof(struct rx_packet *));
5264                 if (xmitList == NULL)
5265                     osi_Panic("rxi_Start, failed to allocate xmit list");
5266                 for (queue_Scan(&call->tq, p, nxp, rx_packet)) {
5267                     if (call->flags & RX_CALL_FAST_RECOVER_WAIT) {
5268                         /* We shouldn't be sending packets if a thread is waiting
5269                          * to initiate congestion recovery */
5270                         break;
5271                     }
5272                     if ((nXmitPackets)
5273                         && (call->flags & RX_CALL_FAST_RECOVER)) {
5274                         /* Only send one packet during fast recovery */
5275                         break;
5276                     }
5277                     if ((p->flags & RX_PKTFLAG_FREE)
5278                         || (!queue_IsEnd(&call->tq, nxp)
5279                             && (nxp->flags & RX_PKTFLAG_FREE))
5280                         || (p == (struct rx_packet *)&rx_freePacketQueue)
5281                         || (nxp == (struct rx_packet *)&rx_freePacketQueue)) {
5282                         osi_Panic("rxi_Start: xmit queue clobbered");
5283                     }
5284                     if (p->flags & RX_PKTFLAG_ACKED) {
5285                         /* Since we may block, don't trust this */
5286                         usenow.sec = usenow.usec = 0;
5287                         rx_MutexIncrement(rx_stats.ignoreAckedPacket, rx_stats_mutex);
5288                         continue;       /* Ignore this packet if it has been acknowledged */
5289                     }
5290
5291                     /* Turn off all flags except these ones, which are the same
5292                      * on each transmission */
5293                     p->header.flags &= RX_PRESET_FLAGS;
5294
5295                     if (p->header.seq >=
5296                         call->tfirst + MIN((int)call->twind,
5297                                            (int)(call->nSoftAcked +
5298                                                  call->cwind))) {
5299                         call->flags |= RX_CALL_WAIT_WINDOW_SEND;        /* Wait for transmit window */
5300                         /* Note: if we're waiting for more window space, we can
5301                          * still send retransmits; hence we don't return here, but
5302                          * break out to schedule a retransmit event */
5303                         dpf(("call %d waiting for window",
5304                              *(call->callNumber)));
5305                         break;
5306                     }
5307
5308                     /* Transmit the packet if it needs to be sent. */
5309                     if (!clock_Lt(&now, &p->retryTime)) {
5310                         if (nXmitPackets == maxXmitPackets) {
5311                             rxi_SendXmitList(call, xmitList, nXmitPackets,
5312                                              istack, &now, &retryTime,
5313                                              resending);
5314                             osi_Free(xmitList, maxXmitPackets *
5315                                      sizeof(struct rx_packet *));
5316                             goto restart;
5317                         }
5318                         xmitList[nXmitPackets++] = p;
5319                     }
5320                 }
5321
5322                 /* xmitList now hold pointers to all of the packets that are
5323                  * ready to send. Now we loop to send the packets */
5324                 if (nXmitPackets > 0) {
5325                     rxi_SendXmitList(call, xmitList, nXmitPackets, istack,
5326                                      &now, &retryTime, resending);
5327                 }
5328                 osi_Free(xmitList,
5329                          maxXmitPackets * sizeof(struct rx_packet *));
5330
5331 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5332                 /*
5333                  * TQ references no longer protected by this flag; they must remain
5334                  * protected by the global lock.
5335                  */
5336                 if (call->flags & RX_CALL_FAST_RECOVER_WAIT) {
5337                     call->flags &= ~RX_CALL_TQ_BUSY;
5338                     if (call->tqWaiters || (call->flags & RX_CALL_TQ_WAIT)) {
5339                         dpf(("call %x has %d waiters and flags %d\n", call, call->tqWaiters, call->flags));
5340 #ifdef RX_ENABLE_LOCKS
5341                         osirx_AssertMine(&call->lock, "rxi_Start start");
5342                         CV_BROADCAST(&call->cv_tq);
5343 #else /* RX_ENABLE_LOCKS */
5344                         osi_rxWakeup(&call->tq);
5345 #endif /* RX_ENABLE_LOCKS */
5346                     }
5347                     return;
5348                 }
5349                 if (call->error) {
5350                     /* We went into the error state while sending packets. Now is
5351                      * the time to reset the call. This will also inform the using
5352                      * process that the call is in an error state.
5353                      */
5354                     rx_MutexIncrement(rx_tq_debug.rxi_start_aborted, rx_stats_mutex);
5355                     call->flags &= ~RX_CALL_TQ_BUSY;
5356                     if (call->tqWaiters || (call->flags & RX_CALL_TQ_WAIT)) {
5357                         dpf(("call %x has %d waiters and flags %d\n", call, call->tqWaiters, call->flags));
5358 #ifdef RX_ENABLE_LOCKS
5359                         osirx_AssertMine(&call->lock, "rxi_Start middle");
5360                         CV_BROADCAST(&call->cv_tq);
5361 #else /* RX_ENABLE_LOCKS */
5362                         osi_rxWakeup(&call->tq);
5363 #endif /* RX_ENABLE_LOCKS */
5364                     }
5365                     rxi_CallError(call, call->error);
5366                     return;
5367                 }
5368 #ifdef RX_ENABLE_LOCKS
5369                 if (call->flags & RX_CALL_TQ_SOME_ACKED) {
5370                     register int missing;
5371                     call->flags &= ~RX_CALL_TQ_SOME_ACKED;
5372                     /* Some packets have received acks. If they all have, we can clear
5373                      * the transmit queue.
5374                      */
5375                     for (missing =
5376                          0, queue_Scan(&call->tq, p, nxp, rx_packet)) {
5377                         if (p->header.seq < call->tfirst
5378                             && (p->flags & RX_PKTFLAG_ACKED)) {
5379                             queue_Remove(p);
5380                             rxi_FreePacket(p);
5381                         } else
5382                             missing = 1;
5383                     }
5384                     if (!missing)
5385                         call->flags |= RX_CALL_TQ_CLEARME;
5386                 }
5387 #endif /* RX_ENABLE_LOCKS */
5388                 /* Don't bother doing retransmits if the TQ is cleared. */
5389                 if (call->flags & RX_CALL_TQ_CLEARME) {
5390                     rxi_ClearTransmitQueue(call, 1);
5391                 } else
5392 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
5393                 {
5394
5395                     /* Always post a resend event, if there is anything in the
5396                      * queue, and resend is possible.  There should be at least
5397                      * one unacknowledged packet in the queue ... otherwise none
5398                      * of these packets should be on the queue in the first place.
5399                      */
5400                     if (call->resendEvent) {
5401                         /* Cancel the existing event and post a new one */
5402                         rxevent_Cancel(call->resendEvent, call,
5403                                        RX_CALL_REFCOUNT_RESEND);
5404                     }
5405
5406                     /* The retry time is the retry time on the first unacknowledged
5407                      * packet inside the current window */
5408                     for (haveEvent =
5409                          0, queue_Scan(&call->tq, p, nxp, rx_packet)) {
5410                         /* Don't set timers for packets outside the window */
5411                         if (p->header.seq >= call->tfirst + call->twind) {
5412                             break;
5413                         }
5414
5415                         if (!(p->flags & RX_PKTFLAG_ACKED)
5416                             && !clock_IsZero(&p->retryTime)) {
5417                             haveEvent = 1;
5418                             retryTime = p->retryTime;
5419                             break;
5420                         }
5421                     }
5422
5423                     /* Post a new event to re-run rxi_Start when retries may be needed */
5424                     if (haveEvent && !(call->flags & RX_CALL_NEED_START)) {
5425 #ifdef RX_ENABLE_LOCKS
5426                         CALL_HOLD(call, RX_CALL_REFCOUNT_RESEND);
5427                         call->resendEvent =
5428                             rxevent_PostNow2(&retryTime, &usenow,
5429                                              rxi_StartUnlocked,
5430                                              (void *)call, 0, istack);
5431 #else /* RX_ENABLE_LOCKS */
5432                         call->resendEvent =
5433                             rxevent_PostNow2(&retryTime, &usenow, rxi_Start,
5434                                              (void *)call, 0, istack);
5435 #endif /* RX_ENABLE_LOCKS */
5436                     }
5437                 }
5438 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5439             } while (call->flags & RX_CALL_NEED_START);
5440             /*
5441              * TQ references no longer protected by this flag; they must remain
5442              * protected by the global lock.
5443              */
5444             call->flags &= ~RX_CALL_TQ_BUSY;
5445             if (call->tqWaiters || (call->flags & RX_CALL_TQ_WAIT)) {
5446                 dpf(("call %x has %d waiters and flags %d\n", call, call->tqWaiters, call->flags));
5447 #ifdef RX_ENABLE_LOCKS
5448                 osirx_AssertMine(&call->lock, "rxi_Start end");
5449                 CV_BROADCAST(&call->cv_tq);
5450 #else /* RX_ENABLE_LOCKS */
5451                 osi_rxWakeup(&call->tq);
5452 #endif /* RX_ENABLE_LOCKS */
5453             }
5454         } else {
5455             call->flags |= RX_CALL_NEED_START;
5456         }
5457 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
5458     } else {
5459         if (call->resendEvent) {
5460             rxevent_Cancel(call->resendEvent, call, RX_CALL_REFCOUNT_RESEND);
5461         }
5462     }
5463 }
5464
5465 /* Also adjusts the keep alive parameters for the call, to reflect
5466  * that we have just sent a packet (so keep alives aren't sent
5467  * immediately) */
5468 void
5469 rxi_Send(register struct rx_call *call, register struct rx_packet *p,
5470          int istack)
5471 {
5472     register struct rx_connection *conn = call->conn;
5473
5474     /* Stamp each packet with the user supplied status */
5475     p->header.userStatus = call->localStatus;
5476
5477     /* Allow the security object controlling this call's security to
5478      * make any last-minute changes to the packet */
5479     RXS_SendPacket(conn->securityObject, call, p);
5480
5481     /* Since we're about to send SOME sort of packet to the peer, it's
5482      * safe to nuke any scheduled end-of-packets ack */
5483     rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
5484
5485     /* Actually send the packet, filling in more connection-specific fields */
5486     CALL_HOLD(call, RX_CALL_REFCOUNT_SEND);
5487     MUTEX_EXIT(&call->lock);
5488     rxi_SendPacket(call, conn, p, istack);
5489     MUTEX_ENTER(&call->lock);
5490     CALL_RELE(call, RX_CALL_REFCOUNT_SEND);
5491
5492     /* Update last send time for this call (for keep-alive
5493      * processing), and for the connection (so that we can discover
5494      * idle connections) */
5495     conn->lastSendTime = call->lastSendTime = clock_Sec();
5496     /* Don't count keepalives here, so idleness can be tracked. */
5497     if (p->header.type != RX_PACKET_TYPE_ACK)
5498         call->lastSendData = call->lastSendTime;
5499 }
5500
5501
5502 /* Check if a call needs to be destroyed.  Called by keep-alive code to ensure
5503  * that things are fine.  Also called periodically to guarantee that nothing
5504  * falls through the cracks (e.g. (error + dally) connections have keepalive
5505  * turned off.  Returns 0 if conn is well, -1 otherwise.  If otherwise, call
5506  *  may be freed!
5507  * haveCTLock Set if calling from rxi_ReapConnections
5508  */
5509 #ifdef RX_ENABLE_LOCKS
5510 int
5511 rxi_CheckCall(register struct rx_call *call, int haveCTLock)
5512 #else /* RX_ENABLE_LOCKS */
5513 int
5514 rxi_CheckCall(register struct rx_call *call)
5515 #endif                          /* RX_ENABLE_LOCKS */
5516 {
5517     register struct rx_connection *conn = call->conn;
5518     afs_uint32 now;
5519     afs_uint32 deadTime;
5520
5521 #ifdef RX_GLOBAL_RXLOCK_KERNEL
5522     if (call->flags & RX_CALL_TQ_BUSY) {
5523         /* Call is active and will be reset by rxi_Start if it's
5524          * in an error state.
5525          */
5526         return 0;
5527     }
5528 #endif
5529     /* dead time + RTT + 8*MDEV, rounded up to next second. */
5530     deadTime =
5531         (((afs_uint32) conn->secondsUntilDead << 10) +
5532          ((afs_uint32) conn->peer->rtt >> 3) +
5533          ((afs_uint32) conn->peer->rtt_dev << 1) + 1023) >> 10;
5534     now = clock_Sec();
5535     /* These are computed to the second (+- 1 second).  But that's
5536      * good enough for these values, which should be a significant
5537      * number of seconds. */
5538     if (now > (call->lastReceiveTime + deadTime)) {
5539         if (call->state == RX_STATE_ACTIVE) {
5540 #ifdef ADAPT_PMTU
5541 #if defined(KERNEL) && defined(AFS_SUN57_ENV)
5542             ire_t *ire;
5543 #if defined(AFS_SUN510_ENV) && defined(GLOBAL_NETSTACKID)
5544             netstack_t *ns =  netstack_find_by_stackid(GLOBAL_NETSTACKID);
5545             ip_stack_t *ipst = ns->netstack_ip;
5546 #endif
5547             ire = ire_cache_lookup(call->conn->peer->host
5548 #if defined(AFS_SUN510_ENV) && defined(ALL_ZONES)
5549                                    , ALL_ZONES
5550 #if defined(AFS_SUN510_ENV) && (defined(ICL_3_ARG) || defined(GLOBAL_NETSTACKID))
5551                                    , NULL
5552 #if defined(AFS_SUN510_ENV) && defined(GLOBAL_NETSTACKID)
5553                                    , ipst
5554 #endif
5555 #endif
5556 #endif
5557                 );
5558
5559             if (ire && ire->ire_max_frag > 0)
5560                 rxi_SetPeerMtu(call->conn->peer->host, 0, ire->ire_max_frag);
5561 #if defined(GLOBAL_NETSTACKID)
5562             netstack_rele(ns);
5563 #endif
5564 #endif
5565 #endif /* ADAPT_PMTU */
5566             rxi_CallError(call, RX_CALL_DEAD);
5567             return -1;
5568         } else {
5569 #ifdef RX_ENABLE_LOCKS
5570             /* Cancel pending events */
5571             rxevent_Cancel(call->delayedAckEvent, call,
5572                            RX_CALL_REFCOUNT_DELAY);
5573             rxevent_Cancel(call->resendEvent, call, RX_CALL_REFCOUNT_RESEND);
5574             rxevent_Cancel(call->keepAliveEvent, call,
5575                            RX_CALL_REFCOUNT_ALIVE);
5576             if (call->refCount == 0) {
5577                 rxi_FreeCall(call, haveCTLock);
5578                 return -2;
5579             }
5580             return -1;
5581 #else /* RX_ENABLE_LOCKS */
5582             rxi_FreeCall(call);
5583             return -2;
5584 #endif /* RX_ENABLE_LOCKS */
5585         }
5586         /* Non-active calls are destroyed if they are not responding
5587          * to pings; active calls are simply flagged in error, so the
5588          * attached process can die reasonably gracefully. */
5589     }
5590     /* see if we have a non-activity timeout */
5591     if (call->startWait && conn->idleDeadTime
5592         && ((call->startWait + conn->idleDeadTime) < now)) {
5593         if (call->state == RX_STATE_ACTIVE) {
5594             rxi_CallError(call, RX_CALL_TIMEOUT);
5595             return -1;
5596         }
5597     }
5598     if (call->lastSendData && conn->idleDeadTime && (conn->idleDeadErr != 0)
5599         && ((call->lastSendData + conn->idleDeadTime) < now)) {
5600         if (call->state == RX_STATE_ACTIVE) {
5601             rxi_CallError(call, conn->idleDeadErr);
5602             return -1;
5603         }
5604     }
5605     /* see if we have a hard timeout */
5606     if (conn->hardDeadTime
5607         && (now > (conn->hardDeadTime + call->startTime.sec))) {
5608         if (call->state == RX_STATE_ACTIVE)
5609             rxi_CallError(call, RX_CALL_TIMEOUT);
5610         return -1;
5611     }
5612     return 0;
5613 }
5614
5615
5616 /* When a call is in progress, this routine is called occasionally to
5617  * make sure that some traffic has arrived (or been sent to) the peer.
5618  * If nothing has arrived in a reasonable amount of time, the call is
5619  * declared dead; if nothing has been sent for a while, we send a
5620  * keep-alive packet (if we're actually trying to keep the call alive)
5621  */
5622 void
5623 rxi_KeepAliveEvent(struct rxevent *event, register struct rx_call *call,
5624                    char *dummy)
5625 {
5626     struct rx_connection *conn;
5627     afs_uint32 now;
5628
5629     MUTEX_ENTER(&call->lock);
5630     CALL_RELE(call, RX_CALL_REFCOUNT_ALIVE);
5631     if (event == call->keepAliveEvent)
5632         call->keepAliveEvent = NULL;
5633     now = clock_Sec();
5634
5635 #ifdef RX_ENABLE_LOCKS
5636     if (rxi_CheckCall(call, 0)) {
5637         MUTEX_EXIT(&call->lock);
5638         return;
5639     }
5640 #else /* RX_ENABLE_LOCKS */
5641     if (rxi_CheckCall(call))
5642         return;
5643 #endif /* RX_ENABLE_LOCKS */
5644
5645     /* Don't try to keep alive dallying calls */
5646     if (call->state == RX_STATE_DALLY) {
5647         MUTEX_EXIT(&call->lock);
5648         return;
5649     }
5650
5651     conn = call->conn;
5652     if ((now - call->lastSendTime) > conn->secondsUntilPing) {
5653         /* Don't try to send keepalives if there is unacknowledged data */
5654         /* the rexmit code should be good enough, this little hack
5655          * doesn't quite work XXX */
5656         (void)rxi_SendAck(call, NULL, 0, RX_ACK_PING, 0);
5657     }
5658     rxi_ScheduleKeepAliveEvent(call);
5659     MUTEX_EXIT(&call->lock);
5660 }
5661
5662
5663 void
5664 rxi_ScheduleKeepAliveEvent(register struct rx_call *call)
5665 {
5666     if (!call->keepAliveEvent) {
5667         struct clock when, now;
5668         clock_GetTime(&now);
5669         when = now;
5670         when.sec += call->conn->secondsUntilPing;
5671         CALL_HOLD(call, RX_CALL_REFCOUNT_ALIVE);
5672         call->keepAliveEvent =
5673             rxevent_PostNow(&when, &now, rxi_KeepAliveEvent, call, 0);
5674     }
5675 }
5676
5677 /* N.B. rxi_KeepAliveOff:  is defined earlier as a macro */
5678 void
5679 rxi_KeepAliveOn(register struct rx_call *call)
5680 {
5681     /* Pretend last packet received was received now--i.e. if another
5682      * packet isn't received within the keep alive time, then the call
5683      * will die; Initialize last send time to the current time--even
5684      * if a packet hasn't been sent yet.  This will guarantee that a
5685      * keep-alive is sent within the ping time */
5686     call->lastReceiveTime = call->lastSendTime = clock_Sec();
5687     rxi_ScheduleKeepAliveEvent(call);
5688 }
5689
5690 /* This routine is called to send connection abort messages
5691  * that have been delayed to throttle looping clients. */
5692 void
5693 rxi_SendDelayedConnAbort(struct rxevent *event,
5694                          register struct rx_connection *conn, char *dummy)
5695 {
5696     afs_int32 error;
5697     struct rx_packet *packet;
5698
5699     MUTEX_ENTER(&conn->conn_data_lock);
5700     conn->delayedAbortEvent = NULL;
5701     error = htonl(conn->error);
5702     conn->abortCount++;
5703     MUTEX_EXIT(&conn->conn_data_lock);
5704     packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
5705     if (packet) {
5706         packet =
5707             rxi_SendSpecial((struct rx_call *)0, conn, packet,
5708                             RX_PACKET_TYPE_ABORT, (char *)&error,
5709                             sizeof(error), 0);
5710         rxi_FreePacket(packet);
5711     }
5712 }
5713
5714 /* This routine is called to send call abort messages
5715  * that have been delayed to throttle looping clients. */
5716 void
5717 rxi_SendDelayedCallAbort(struct rxevent *event, register struct rx_call *call,
5718                          char *dummy)
5719 {
5720     afs_int32 error;
5721     struct rx_packet *packet;
5722
5723     MUTEX_ENTER(&call->lock);
5724     call->delayedAbortEvent = NULL;
5725     error = htonl(call->error);
5726     call->abortCount++;
5727     packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
5728     if (packet) {
5729         packet =
5730             rxi_SendSpecial(call, call->conn, packet, RX_PACKET_TYPE_ABORT,
5731                             (char *)&error, sizeof(error), 0);
5732         rxi_FreePacket(packet);
5733     }
5734     CALL_RELE(call, RX_CALL_REFCOUNT_ABORT);
5735     MUTEX_EXIT(&call->lock);
5736 }
5737
5738 /* This routine is called periodically (every RX_AUTH_REQUEST_TIMEOUT
5739  * seconds) to ask the client to authenticate itself.  The routine
5740  * issues a challenge to the client, which is obtained from the
5741  * security object associated with the connection */
5742 void
5743 rxi_ChallengeEvent(struct rxevent *event, register struct rx_connection *conn,
5744                    void *arg1, int tries)
5745 {
5746     conn->challengeEvent = NULL;
5747     if (RXS_CheckAuthentication(conn->securityObject, conn) != 0) {
5748         register struct rx_packet *packet;
5749         struct clock when, now;
5750
5751         if (tries <= 0) {
5752             /* We've failed to authenticate for too long.
5753              * Reset any calls waiting for authentication;
5754              * they are all in RX_STATE_PRECALL.
5755              */
5756             int i;
5757
5758             MUTEX_ENTER(&conn->conn_call_lock);
5759             for (i = 0; i < RX_MAXCALLS; i++) {
5760                 struct rx_call *call = conn->call[i];
5761                 if (call) {
5762                     MUTEX_ENTER(&call->lock);
5763                     if (call->state == RX_STATE_PRECALL) {
5764                         rxi_CallError(call, RX_CALL_DEAD);
5765                         rxi_SendCallAbort(call, NULL, 0, 0);
5766                     }
5767                     MUTEX_EXIT(&call->lock);
5768                 }
5769             }
5770             MUTEX_EXIT(&conn->conn_call_lock);
5771             return;
5772         }
5773
5774         packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
5775         if (packet) {
5776             /* If there's no packet available, do this later. */
5777             RXS_GetChallenge(conn->securityObject, conn, packet);
5778             rxi_SendSpecial((struct rx_call *)0, conn, packet,
5779                             RX_PACKET_TYPE_CHALLENGE, NULL, -1, 0);
5780             rxi_FreePacket(packet);
5781         }
5782         clock_GetTime(&now);
5783         when = now;
5784         when.sec += RX_CHALLENGE_TIMEOUT;
5785         conn->challengeEvent =
5786             rxevent_PostNow2(&when, &now, rxi_ChallengeEvent, conn, 0,
5787                          (tries - 1));
5788     }
5789 }
5790
5791 /* Call this routine to start requesting the client to authenticate
5792  * itself.  This will continue until authentication is established,
5793  * the call times out, or an invalid response is returned.  The
5794  * security object associated with the connection is asked to create
5795  * the challenge at this time.  N.B.  rxi_ChallengeOff is a macro,
5796  * defined earlier. */
5797 void
5798 rxi_ChallengeOn(register struct rx_connection *conn)
5799 {
5800     if (!conn->challengeEvent) {
5801         RXS_CreateChallenge(conn->securityObject, conn);
5802         rxi_ChallengeEvent(NULL, conn, 0, RX_CHALLENGE_MAXTRIES);
5803     };
5804 }
5805
5806
5807 /* Compute round trip time of the packet provided, in *rttp.
5808  */
5809
5810 /* rxi_ComputeRoundTripTime is called with peer locked. */
5811 /* sentp and/or peer may be null */
5812 void
5813 rxi_ComputeRoundTripTime(register struct rx_packet *p,
5814                          register struct clock *sentp,
5815                          register struct rx_peer *peer)
5816 {
5817     struct clock thisRtt, *rttp = &thisRtt;
5818
5819     register int rtt_timeout;
5820
5821     clock_GetTime(rttp);
5822
5823     if (clock_Lt(rttp, sentp)) {
5824         clock_Zero(rttp);
5825         return;                 /* somebody set the clock back, don't count this time. */
5826     }
5827     clock_Sub(rttp, sentp);
5828     MUTEX_ENTER(&rx_stats_mutex);
5829     if (clock_Lt(rttp, &rx_stats.minRtt))
5830         rx_stats.minRtt = *rttp;
5831     if (clock_Gt(rttp, &rx_stats.maxRtt)) {
5832         if (rttp->sec > 60) {
5833             MUTEX_EXIT(&rx_stats_mutex);
5834             return;             /* somebody set the clock ahead */
5835         }
5836         rx_stats.maxRtt = *rttp;
5837     }
5838     clock_Add(&rx_stats.totalRtt, rttp);
5839     rx_stats.nRttSamples++;
5840     MUTEX_EXIT(&rx_stats_mutex);
5841
5842     /* better rtt calculation courtesy of UMich crew (dave,larry,peter,?) */
5843
5844     /* Apply VanJacobson round-trip estimations */
5845     if (peer->rtt) {
5846         register int delta;
5847
5848         /*
5849          * srtt (peer->rtt) is in units of one-eighth-milliseconds.
5850          * srtt is stored as fixed point with 3 bits after the binary
5851          * point (i.e., scaled by 8). The following magic is
5852          * equivalent to the smoothing algorithm in rfc793 with an
5853          * alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed point).
5854          * srtt*8 = srtt*8 + rtt - srtt
5855          * srtt = srtt + rtt/8 - srtt/8
5856          */
5857
5858         delta = MSEC(rttp) - (peer->rtt >> 3);
5859         peer->rtt += delta;
5860
5861         /*
5862          * We accumulate a smoothed rtt variance (actually, a smoothed
5863          * mean difference), then set the retransmit timer to smoothed
5864          * rtt + 4 times the smoothed variance (was 2x in van's original
5865          * paper, but 4x works better for me, and apparently for him as
5866          * well).
5867          * rttvar is stored as
5868          * fixed point with 2 bits after the binary point (scaled by
5869          * 4).  The following is equivalent to rfc793 smoothing with
5870          * an alpha of .75 (rttvar = rttvar*3/4 + |delta| / 4).  This
5871          * replaces rfc793's wired-in beta.
5872          * dev*4 = dev*4 + (|actual - expected| - dev)
5873          */
5874
5875         if (delta < 0)
5876             delta = -delta;
5877
5878         delta -= (peer->rtt_dev >> 2);
5879         peer->rtt_dev += delta;
5880     } else {
5881         /* I don't have a stored RTT so I start with this value.  Since I'm
5882          * probably just starting a call, and will be pushing more data down
5883          * this, I expect congestion to increase rapidly.  So I fudge a
5884          * little, and I set deviance to half the rtt.  In practice,
5885          * deviance tends to approach something a little less than
5886          * half the smoothed rtt. */
5887         peer->rtt = (MSEC(rttp) << 3) + 8;
5888         peer->rtt_dev = peer->rtt >> 2; /* rtt/2: they're scaled differently */
5889     }
5890     /* the timeout is RTT + 4*MDEV + 0.35 sec   This is because one end or
5891      * the other of these connections is usually in a user process, and can
5892      * be switched and/or swapped out.  So on fast, reliable networks, the
5893      * timeout would otherwise be too short.
5894      */
5895     rtt_timeout = (peer->rtt >> 3) + peer->rtt_dev + 350;
5896     clock_Zero(&(peer->timeout));
5897     clock_Addmsec(&(peer->timeout), rtt_timeout);
5898
5899     dpf(("rxi_ComputeRoundTripTime(rtt=%d ms, srtt=%d ms, rtt_dev=%d ms, timeout=%d.%0.3d sec)\n", MSEC(rttp), peer->rtt >> 3, peer->rtt_dev >> 2, (peer->timeout.sec), (peer->timeout.usec)));
5900 }
5901
5902
5903 /* Find all server connections that have not been active for a long time, and
5904  * toss them */
5905 void
5906 rxi_ReapConnections(void)
5907 {
5908     struct clock now, when;
5909     clock_GetTime(&now);
5910
5911     /* Find server connection structures that haven't been used for
5912      * greater than rx_idleConnectionTime */
5913     {
5914         struct rx_connection **conn_ptr, **conn_end;
5915         int i, havecalls = 0;
5916         MUTEX_ENTER(&rx_connHashTable_lock);
5917         for (conn_ptr = &rx_connHashTable[0], conn_end =
5918              &rx_connHashTable[rx_hashTableSize]; conn_ptr < conn_end;
5919              conn_ptr++) {
5920             struct rx_connection *conn, *next;
5921             struct rx_call *call;
5922             int result;
5923
5924           rereap:
5925             for (conn = *conn_ptr; conn; conn = next) {
5926                 /* XXX -- Shouldn't the connection be locked? */
5927                 next = conn->next;
5928                 havecalls = 0;
5929                 for (i = 0; i < RX_MAXCALLS; i++) {
5930                     call = conn->call[i];
5931                     if (call) {
5932                         havecalls = 1;
5933                         MUTEX_ENTER(&call->lock);
5934 #ifdef RX_ENABLE_LOCKS
5935                         result = rxi_CheckCall(call, 1);
5936 #else /* RX_ENABLE_LOCKS */
5937                         result = rxi_CheckCall(call);
5938 #endif /* RX_ENABLE_LOCKS */
5939                         MUTEX_EXIT(&call->lock);
5940                         if (result == -2) {
5941                             /* If CheckCall freed the call, it might
5942                              * have destroyed  the connection as well,
5943                              * which screws up the linked lists.
5944                              */
5945                             goto rereap;
5946                         }
5947                     }
5948                 }
5949                 if (conn->type == RX_SERVER_CONNECTION) {
5950                     /* This only actually destroys the connection if
5951                      * there are no outstanding calls */
5952                     MUTEX_ENTER(&conn->conn_data_lock);
5953                     if (!havecalls && !conn->refCount
5954                         && ((conn->lastSendTime + rx_idleConnectionTime) <
5955                             now.sec)) {
5956                         conn->refCount++;       /* it will be decr in rx_DestroyConn */
5957                         MUTEX_EXIT(&conn->conn_data_lock);
5958 #ifdef RX_ENABLE_LOCKS
5959                         rxi_DestroyConnectionNoLock(conn);
5960 #else /* RX_ENABLE_LOCKS */
5961                         rxi_DestroyConnection(conn);
5962 #endif /* RX_ENABLE_LOCKS */
5963                     }
5964 #ifdef RX_ENABLE_LOCKS
5965                     else {
5966                         MUTEX_EXIT(&conn->conn_data_lock);
5967                     }
5968 #endif /* RX_ENABLE_LOCKS */
5969                 }
5970             }
5971         }
5972 #ifdef RX_ENABLE_LOCKS
5973         while (rx_connCleanup_list) {
5974             struct rx_connection *conn;
5975             conn = rx_connCleanup_list;
5976             rx_connCleanup_list = rx_connCleanup_list->next;
5977             MUTEX_EXIT(&rx_connHashTable_lock);
5978             rxi_CleanupConnection(conn);
5979             MUTEX_ENTER(&rx_connHashTable_lock);
5980         }
5981         MUTEX_EXIT(&rx_connHashTable_lock);
5982 #endif /* RX_ENABLE_LOCKS */
5983     }
5984
5985     /* Find any peer structures that haven't been used (haven't had an
5986      * associated connection) for greater than rx_idlePeerTime */
5987     {
5988         struct rx_peer **peer_ptr, **peer_end;
5989         int code;
5990         MUTEX_ENTER(&rx_rpc_stats);
5991         MUTEX_ENTER(&rx_peerHashTable_lock);
5992         for (peer_ptr = &rx_peerHashTable[0], peer_end =
5993              &rx_peerHashTable[rx_hashTableSize]; peer_ptr < peer_end;
5994              peer_ptr++) {
5995             struct rx_peer *peer, *next, *prev;
5996             for (prev = peer = *peer_ptr; peer; peer = next) {
5997                 next = peer->next;
5998                 code = MUTEX_TRYENTER(&peer->peer_lock);
5999                 if ((code) && (peer->refCount == 0)
6000                     && ((peer->idleWhen + rx_idlePeerTime) < now.sec)) {
6001                     rx_interface_stat_p rpc_stat, nrpc_stat;
6002                     size_t space;
6003                     MUTEX_EXIT(&peer->peer_lock);
6004                     MUTEX_DESTROY(&peer->peer_lock);
6005                     for (queue_Scan
6006                          (&peer->rpcStats, rpc_stat, nrpc_stat,
6007                           rx_interface_stat)) {
6008                         unsigned int num_funcs;
6009                         if (!rpc_stat)
6010                             break;
6011                         queue_Remove(&rpc_stat->queue_header);
6012                         queue_Remove(&rpc_stat->all_peers);
6013                         num_funcs = rpc_stat->stats[0].func_total;
6014                         space =
6015                             sizeof(rx_interface_stat_t) +
6016                             rpc_stat->stats[0].func_total *
6017                             sizeof(rx_function_entry_v1_t);
6018
6019                         rxi_Free(rpc_stat, space);
6020                         rxi_rpc_peer_stat_cnt -= num_funcs;
6021                     }
6022                     rxi_FreePeer(peer);
6023                     rx_MutexDecrement(rx_stats.nPeerStructs, rx_stats_mutex);
6024                     if (peer == *peer_ptr) {
6025                         *peer_ptr = next;
6026                         prev = next;
6027                     } else
6028                         prev->next = next;
6029                 } else {
6030                     if (code) {
6031                         MUTEX_EXIT(&peer->peer_lock);
6032                     }
6033                     prev = peer;
6034                 }
6035             }
6036         }
6037         MUTEX_EXIT(&rx_peerHashTable_lock);
6038         MUTEX_EXIT(&rx_rpc_stats);
6039     }
6040
6041     /* THIS HACK IS A TEMPORARY HACK.  The idea is that the race condition in
6042      * rxi_AllocSendPacket, if it hits, will be handled at the next conn
6043      * GC, just below.  Really, we shouldn't have to keep moving packets from
6044      * one place to another, but instead ought to always know if we can
6045      * afford to hold onto a packet in its particular use.  */
6046     MUTEX_ENTER(&rx_freePktQ_lock);
6047     if (rx_waitingForPackets) {
6048         rx_waitingForPackets = 0;
6049 #ifdef  RX_ENABLE_LOCKS
6050         CV_BROADCAST(&rx_waitingForPackets_cv);
6051 #else
6052         osi_rxWakeup(&rx_waitingForPackets);
6053 #endif
6054     }
6055     MUTEX_EXIT(&rx_freePktQ_lock);
6056
6057     when = now;
6058     when.sec += RX_REAP_TIME;   /* Check every RX_REAP_TIME seconds */
6059     rxevent_Post(&when, rxi_ReapConnections, 0, 0);
6060 }
6061
6062
6063 /* rxs_Release - This isn't strictly necessary but, since the macro name from
6064  * rx.h is sort of strange this is better.  This is called with a security
6065  * object before it is discarded.  Each connection using a security object has
6066  * its own refcount to the object so it won't actually be freed until the last
6067  * connection is destroyed.
6068  *
6069  * This is the only rxs module call.  A hold could also be written but no one
6070  * needs it. */
6071
6072 int
6073 rxs_Release(struct rx_securityClass *aobj)
6074 {
6075     return RXS_Close(aobj);
6076 }
6077
6078 #ifdef ADAPT_WINDOW
6079 #define RXRATE_PKT_OH   (RX_HEADER_SIZE + RX_IPUDP_SIZE)
6080 #define RXRATE_SMALL_PKT    (RXRATE_PKT_OH + sizeof(struct rx_ackPacket))
6081 #define RXRATE_AVG_SMALL_PKT    (RXRATE_PKT_OH + (sizeof(struct rx_ackPacket)/2))
6082 #define RXRATE_LARGE_PKT    (RXRATE_SMALL_PKT + 256)
6083
6084 /* Adjust our estimate of the transmission rate to this peer, given
6085  * that the packet p was just acked. We can adjust peer->timeout and
6086  * call->twind. Pragmatically, this is called
6087  * only with packets of maximal length.
6088  * Called with peer and call locked.
6089  */
6090
6091 static void
6092 rxi_ComputeRate(register struct rx_peer *peer, register struct rx_call *call,
6093                 struct rx_packet *p, struct rx_packet *ackp, u_char ackReason)
6094 {
6095     afs_int32 xferSize, xferMs;
6096     register afs_int32 minTime;
6097     struct clock newTO;
6098
6099     /* Count down packets */
6100     if (peer->rateFlag > 0)
6101         peer->rateFlag--;
6102     /* Do nothing until we're enabled */
6103     if (peer->rateFlag != 0)
6104         return;
6105     if (!call->conn)
6106         return;
6107
6108     /* Count only when the ack seems legitimate */
6109     switch (ackReason) {
6110     case RX_ACK_REQUESTED:
6111         xferSize =
6112             p->length + RX_HEADER_SIZE + call->conn->securityMaxTrailerSize;
6113         xferMs = peer->rtt;
6114         break;
6115
6116     case RX_ACK_PING_RESPONSE:
6117         if (p)                  /* want the response to ping-request, not data send */
6118             return;
6119         clock_GetTime(&newTO);
6120         if (clock_Gt(&newTO, &call->pingRequestTime)) {
6121             clock_Sub(&newTO, &call->pingRequestTime);
6122             xferMs = (newTO.sec * 1000) + (newTO.usec / 1000);
6123         } else {
6124             return;
6125         }
6126         xferSize = rx_AckDataSize(rx_Window) + RX_HEADER_SIZE;
6127         break;
6128
6129     default:
6130         return;
6131     }
6132
6133     dpf(("CONG peer %lx/%u: sample (%s) size %ld, %ld ms (to %lu.%06lu, rtt %u, ps %u)", ntohl(peer->host), ntohs(peer->port), (ackReason == RX_ACK_REQUESTED ? "dataack" : "pingack"), xferSize, xferMs, peer->timeout.sec, peer->timeout.usec, peer->smRtt, peer->ifMTU));
6134
6135     /* Track only packets that are big enough. */
6136     if ((p->length + RX_HEADER_SIZE + call->conn->securityMaxTrailerSize) <
6137         peer->ifMTU)
6138         return;
6139
6140     /* absorb RTT data (in milliseconds) for these big packets */
6141     if (peer->smRtt == 0) {
6142         peer->smRtt = xferMs;
6143     } else {
6144         peer->smRtt = ((peer->smRtt * 15) + xferMs + 4) >> 4;
6145         if (!peer->smRtt)
6146             peer->smRtt = 1;
6147     }
6148
6149     if (peer->countDown) {
6150         peer->countDown--;
6151         return;
6152     }
6153     peer->countDown = 10;       /* recalculate only every so often */
6154
6155     /* In practice, we can measure only the RTT for full packets,
6156      * because of the way Rx acks the data that it receives.  (If it's
6157      * smaller than a full packet, it often gets implicitly acked
6158      * either by the call response (from a server) or by the next call
6159      * (from a client), and either case confuses transmission times
6160      * with processing times.)  Therefore, replace the above
6161      * more-sophisticated processing with a simpler version, where the
6162      * smoothed RTT is kept for full-size packets, and the time to
6163      * transmit a windowful of full-size packets is simply RTT *
6164      * windowSize. Again, we take two steps:
6165      - ensure the timeout is large enough for a single packet's RTT;
6166      - ensure that the window is small enough to fit in the desired timeout.*/
6167
6168     /* First, the timeout check. */
6169     minTime = peer->smRtt;
6170     /* Get a reasonable estimate for a timeout period */
6171     minTime += minTime;
6172     newTO.sec = minTime / 1000;
6173     newTO.usec = (minTime - (newTO.sec * 1000)) * 1000;
6174
6175     /* Increase the timeout period so that we can always do at least
6176      * one packet exchange */
6177     if (clock_Gt(&newTO, &peer->timeout)) {
6178
6179         dpf(("CONG peer %lx/%u: timeout %lu.%06lu ==> %lu.%06lu (rtt %u, ps %u)", ntohl(peer->host), ntohs(peer->port), peer->timeout.sec, peer->timeout.usec, newTO.sec, newTO.usec, peer->smRtt, peer->packetSize));
6180
6181         peer->timeout = newTO;
6182     }
6183
6184     /* Now, get an estimate for the transmit window size. */
6185     minTime = peer->timeout.sec * 1000 + (peer->timeout.usec / 1000);
6186     /* Now, convert to the number of full packets that could fit in a
6187      * reasonable fraction of that interval */
6188     minTime /= (peer->smRtt << 1);
6189     xferSize = minTime;         /* (make a copy) */
6190
6191     /* Now clamp the size to reasonable bounds. */
6192     if (minTime <= 1)
6193         minTime = 1;
6194     else if (minTime > rx_Window)
6195         minTime = rx_Window;
6196 /*    if (minTime != peer->maxWindow) {
6197       dpf(("CONG peer %lx/%u: windowsize %lu ==> %lu (to %lu.%06lu, rtt %u, ps %u)",
6198              ntohl(peer->host), ntohs(peer->port), peer->maxWindow, minTime,
6199              peer->timeout.sec, peer->timeout.usec, peer->smRtt,
6200              peer->packetSize));
6201       peer->maxWindow = minTime;
6202         elide... call->twind = minTime;
6203     }
6204 */
6205
6206     /* Cut back on the peer timeout if it had earlier grown unreasonably.
6207      * Discern this by calculating the timeout necessary for rx_Window
6208      * packets. */
6209     if ((xferSize > rx_Window) && (peer->timeout.sec >= 3)) {
6210         /* calculate estimate for transmission interval in milliseconds */
6211         minTime = rx_Window * peer->smRtt;
6212         if (minTime < 1000) {
6213             dpf(("CONG peer %lx/%u: cut TO %lu.%06lu by 0.5 (rtt %u, ps %u)",
6214                  ntohl(peer->host), ntohs(peer->port), peer->timeout.sec,
6215                  peer->timeout.usec, peer->smRtt, peer->packetSize));
6216
6217             newTO.sec = 0;      /* cut back on timeout by half a second */
6218             newTO.usec = 500000;
6219             clock_Sub(&peer->timeout, &newTO);
6220         }
6221     }
6222
6223     return;
6224 }                               /* end of rxi_ComputeRate */
6225 #endif /* ADAPT_WINDOW */
6226
6227
6228 #ifdef RXDEBUG
6229 void
6230 rxi_DebugInit(void)
6231 {
6232 #ifdef AFS_NT40_ENV
6233 #define TRACE_OPTION_DEBUGLOG 4
6234     HKEY parmKey;
6235     DWORD dummyLen;
6236     DWORD TraceOption;
6237     long code;
6238
6239     rxdebug_active = 0;
6240
6241     code = RegOpenKeyEx(HKEY_LOCAL_MACHINE, AFSREG_CLT_SVC_PARAM_SUBKEY,
6242                          0, KEY_QUERY_VALUE, &parmKey);
6243     if (code != ERROR_SUCCESS)
6244         return;
6245
6246     dummyLen = sizeof(TraceOption);
6247     code = RegQueryValueEx(parmKey, "TraceOption", NULL, NULL,
6248                            (BYTE *) &TraceOption, &dummyLen);
6249     if (code == ERROR_SUCCESS) {
6250         rxdebug_active = (TraceOption & TRACE_OPTION_DEBUGLOG) ? 1 : 0;
6251     }
6252     RegCloseKey (parmKey);
6253 #endif /* AFS_NT40_ENV */
6254 }
6255
6256 #ifdef AFS_NT40_ENV
6257 void
6258 rx_DebugOnOff(int on)
6259 {
6260     rxdebug_active = on;
6261 }
6262 #endif /* AFS_NT40_ENV */
6263
6264
6265 /* Don't call this debugging routine directly; use dpf */
6266 void
6267 rxi_DebugPrint(char *format, int a1, int a2, int a3, int a4, int a5, int a6,
6268                int a7, int a8, int a9, int a10, int a11, int a12, int a13,
6269                int a14, int a15)
6270 {
6271 #ifdef AFS_NT40_ENV
6272     char msg[512];
6273     char tformat[256];
6274     size_t len;
6275
6276     len = _snprintf(tformat, sizeof(tformat), "tid[%d] %s", GetCurrentThreadId(), format);
6277
6278     if (len > 0) {
6279         len = _snprintf(msg, sizeof(msg)-2,
6280                         tformat, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
6281                         a11, a12, a13, a14, a15);
6282         if (len > 0) {
6283             if (msg[len-1] != '\n') {
6284                 msg[len] = '\n';
6285                 msg[len+1] = '\0';
6286             }
6287             OutputDebugString(msg);
6288         }
6289     }
6290 #else
6291     struct clock now;
6292     clock_GetTime(&now);
6293     fprintf(rx_Log, " %u.%.3u:", (unsigned int)now.sec,
6294             (unsigned int)now.usec / 1000);
6295     fprintf(rx_Log, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12,
6296             a13, a14, a15);
6297     putc('\n', rx_Log);
6298 #endif
6299 }
6300
6301 /*
6302  * This function is used to process the rx_stats structure that is local
6303  * to a process as well as an rx_stats structure received from a remote
6304  * process (via rxdebug).  Therefore, it needs to do minimal version
6305  * checking.
6306  */
6307 void
6308 rx_PrintTheseStats(FILE * file, struct rx_stats *s, int size,
6309                    afs_int32 freePackets, char version)
6310 {
6311     int i;
6312
6313     if (size != sizeof(struct rx_stats)) {
6314         fprintf(file,
6315                 "Unexpected size of stats structure: was %d, expected %d\n",
6316                 size, sizeof(struct rx_stats));
6317     }
6318
6319     fprintf(file, "rx stats: free packets %d, allocs %d, ", (int)freePackets,
6320             s->packetRequests);
6321
6322     if (version >= RX_DEBUGI_VERSION_W_NEWPACKETTYPES) {
6323         fprintf(file, "alloc-failures(rcv %d/%d,send %d/%d,ack %d)\n",
6324                 s->receivePktAllocFailures, s->receiveCbufPktAllocFailures,
6325                 s->sendPktAllocFailures, s->sendCbufPktAllocFailures,
6326                 s->specialPktAllocFailures);
6327     } else {
6328         fprintf(file, "alloc-failures(rcv %d,send %d,ack %d)\n",
6329                 s->receivePktAllocFailures, s->sendPktAllocFailures,
6330                 s->specialPktAllocFailures);
6331     }
6332
6333     fprintf(file,
6334             "   greedy %d, " "bogusReads %d (last from host %x), "
6335             "noPackets %d, " "noBuffers %d, " "selects %d, "
6336             "sendSelects %d\n", s->socketGreedy, s->bogusPacketOnRead,
6337             s->bogusHost, s->noPacketOnRead, s->noPacketBuffersOnRead,
6338             s->selects, s->sendSelects);
6339
6340     fprintf(file, "   packets read: ");
6341     for (i = 0; i < RX_N_PACKET_TYPES; i++) {
6342         fprintf(file, "%s %d ", rx_packetTypes[i], s->packetsRead[i]);
6343     }
6344     fprintf(file, "\n");
6345
6346     fprintf(file,
6347             "   other read counters: data %d, " "ack %d, " "dup %d "
6348             "spurious %d " "dally %d\n", s->dataPacketsRead,
6349             s->ackPacketsRead, s->dupPacketsRead, s->spuriousPacketsRead,
6350             s->ignorePacketDally);
6351
6352     fprintf(file, "   packets sent: ");
6353     for (i = 0; i < RX_N_PACKET_TYPES; i++) {
6354         fprintf(file, "%s %d ", rx_packetTypes[i], s->packetsSent[i]);
6355     }
6356     fprintf(file, "\n");
6357
6358     fprintf(file,
6359             "   other send counters: ack %d, " "data %d (not resends), "
6360             "resends %d, " "pushed %d, " "acked&ignored %d\n",
6361             s->ackPacketsSent, s->dataPacketsSent, s->dataPacketsReSent,
6362             s->dataPacketsPushed, s->ignoreAckedPacket);
6363
6364     fprintf(file,
6365             "   \t(these should be small) sendFailed %d, " "fatalErrors %d\n",
6366             s->netSendFailures, (int)s->fatalErrors);
6367
6368     if (s->nRttSamples) {
6369         fprintf(file, "   Average rtt is %0.3f, with %d samples\n",
6370                 clock_Float(&s->totalRtt) / s->nRttSamples, s->nRttSamples);
6371
6372         fprintf(file, "   Minimum rtt is %0.3f, maximum is %0.3f\n",
6373                 clock_Float(&s->minRtt), clock_Float(&s->maxRtt));
6374     }
6375
6376     fprintf(file,
6377             "   %d server connections, " "%d client connections, "
6378             "%d peer structs, " "%d call structs, " "%d free call structs\n",
6379             s->nServerConns, s->nClientConns, s->nPeerStructs,
6380             s->nCallStructs, s->nFreeCallStructs);
6381
6382 #if     !defined(AFS_PTHREAD_ENV) && !defined(AFS_USE_GETTIMEOFDAY)
6383     fprintf(file, "   %d clock updates\n", clock_nUpdates);
6384 #endif
6385
6386 }
6387
6388 /* for backward compatibility */
6389 void
6390 rx_PrintStats(FILE * file)
6391 {
6392     MUTEX_ENTER(&rx_stats_mutex);
6393     rx_PrintTheseStats(file, &rx_stats, sizeof(rx_stats), rx_nFreePackets,
6394                        RX_DEBUGI_VERSION);
6395     MUTEX_EXIT(&rx_stats_mutex);
6396 }
6397
6398 void
6399 rx_PrintPeerStats(FILE * file, struct rx_peer *peer)
6400 {
6401     fprintf(file, "Peer %x.%d.  " "Burst size %d, " "burst wait %u.%d.\n",
6402             ntohl(peer->host), (int)peer->port, (int)peer->burstSize,
6403             (int)peer->burstWait.sec, (int)peer->burstWait.usec);
6404
6405     fprintf(file,
6406             "   Rtt %d, " "retry time %u.%06d, " "total sent %d, "
6407             "resent %d\n", peer->rtt, (int)peer->timeout.sec,
6408             (int)peer->timeout.usec, peer->nSent, peer->reSends);
6409
6410     fprintf(file,
6411             "   Packet size %d, " "max in packet skew %d, "
6412             "max out packet skew %d\n", peer->ifMTU, (int)peer->inPacketSkew,
6413             (int)peer->outPacketSkew);
6414 }
6415
6416 #ifdef AFS_PTHREAD_ENV
6417 /*
6418  * This mutex protects the following static variables:
6419  * counter
6420  */
6421
6422 #define LOCK_RX_DEBUG assert(pthread_mutex_lock(&rx_debug_mutex)==0)
6423 #define UNLOCK_RX_DEBUG assert(pthread_mutex_unlock(&rx_debug_mutex)==0)
6424 #else
6425 #define LOCK_RX_DEBUG
6426 #define UNLOCK_RX_DEBUG
6427 #endif /* AFS_PTHREAD_ENV */
6428
6429 static int
6430 MakeDebugCall(osi_socket socket, afs_uint32 remoteAddr, afs_uint16 remotePort,
6431               u_char type, void *inputData, size_t inputLength,
6432               void *outputData, size_t outputLength)
6433 {
6434     static afs_int32 counter = 100;
6435     time_t waitTime, waitCount, startTime;
6436     struct rx_header theader;
6437     char tbuffer[1500];
6438     register afs_int32 code;
6439     struct timeval tv_now, tv_wake, tv_delta;
6440     struct sockaddr_in taddr, faddr;
6441     int faddrLen;
6442     fd_set imask;
6443     register char *tp;
6444
6445     startTime = time(0);
6446     waitTime = 1;
6447     waitCount = 5;
6448     LOCK_RX_DEBUG;
6449     counter++;
6450     UNLOCK_RX_DEBUG;
6451     tp = &tbuffer[sizeof(struct rx_header)];
6452     taddr.sin_family = AF_INET;
6453     taddr.sin_port = remotePort;
6454     taddr.sin_addr.s_addr = remoteAddr;
6455 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
6456     taddr.sin_len = sizeof(struct sockaddr_in);
6457 #endif
6458     while (1) {
6459         memset(&theader, 0, sizeof(theader));
6460         theader.epoch = htonl(999);
6461         theader.cid = 0;
6462         theader.callNumber = htonl(counter);
6463         theader.seq = 0;
6464         theader.serial = 0;
6465         theader.type = type;
6466         theader.flags = RX_CLIENT_INITIATED | RX_LAST_PACKET;
6467         theader.serviceId = 0;
6468
6469         memcpy(tbuffer, &theader, sizeof(theader));
6470         memcpy(tp, inputData, inputLength);
6471         code =
6472             sendto(socket, tbuffer, inputLength + sizeof(struct rx_header), 0,
6473                    (struct sockaddr *)&taddr, sizeof(struct sockaddr_in));
6474
6475         /* see if there's a packet available */
6476         gettimeofday(&tv_wake,0);
6477         tv_wake.tv_sec += waitTime;
6478         for (;;) {
6479             FD_ZERO(&imask);
6480             FD_SET(socket, &imask);
6481             tv_delta.tv_sec = tv_wake.tv_sec;
6482             tv_delta.tv_usec = tv_wake.tv_usec;
6483             gettimeofday(&tv_now, 0);
6484
6485             if (tv_delta.tv_usec < tv_now.tv_usec) {
6486                 /* borrow */
6487                 tv_delta.tv_usec += 1000000;
6488                 tv_delta.tv_sec--;
6489             }
6490             tv_delta.tv_usec -= tv_now.tv_usec;
6491
6492             if (tv_delta.tv_sec < tv_now.tv_sec) {
6493                 /* time expired */
6494                 break;
6495             }
6496             tv_delta.tv_sec -= tv_now.tv_sec;
6497
6498             code = select(socket + 1, &imask, 0, 0, &tv_delta);
6499             if (code == 1 && FD_ISSET(socket, &imask)) {
6500                 /* now receive a packet */
6501                 faddrLen = sizeof(struct sockaddr_in);
6502                 code =
6503                     recvfrom(socket, tbuffer, sizeof(tbuffer), 0,
6504                              (struct sockaddr *)&faddr, &faddrLen);
6505
6506                 if (code > 0) {
6507                     memcpy(&theader, tbuffer, sizeof(struct rx_header));
6508                     if (counter == ntohl(theader.callNumber))
6509                         goto success;
6510                     continue;
6511                 }
6512             }
6513             break;
6514         }
6515
6516         /* see if we've timed out */
6517         if (!--waitCount) {
6518             return -1;
6519         }
6520         waitTime <<= 1;
6521     }
6522
6523  success:
6524     code -= sizeof(struct rx_header);
6525     if (code > outputLength)
6526         code = outputLength;
6527     memcpy(outputData, tp, code);
6528     return code;
6529 }
6530
6531 afs_int32
6532 rx_GetServerDebug(osi_socket socket, afs_uint32 remoteAddr,
6533                   afs_uint16 remotePort, struct rx_debugStats * stat,
6534                   afs_uint32 * supportedValues)
6535 {
6536     struct rx_debugIn in;
6537     afs_int32 rc = 0;
6538
6539     *supportedValues = 0;
6540     in.type = htonl(RX_DEBUGI_GETSTATS);
6541     in.index = 0;
6542
6543     rc = MakeDebugCall(socket, remoteAddr, remotePort, RX_PACKET_TYPE_DEBUG,
6544                        &in, sizeof(in), stat, sizeof(*stat));
6545
6546     /*
6547      * If the call was successful, fixup the version and indicate
6548      * what contents of the stat structure are valid.
6549      * Also do net to host conversion of fields here.
6550      */
6551
6552     if (rc >= 0) {
6553         if (stat->version >= RX_DEBUGI_VERSION_W_SECSTATS) {
6554             *supportedValues |= RX_SERVER_DEBUG_SEC_STATS;
6555         }
6556         if (stat->version >= RX_DEBUGI_VERSION_W_GETALLCONN) {
6557             *supportedValues |= RX_SERVER_DEBUG_ALL_CONN;
6558         }
6559         if (stat->version >= RX_DEBUGI_VERSION_W_RXSTATS) {
6560             *supportedValues |= RX_SERVER_DEBUG_RX_STATS;
6561         }
6562         if (stat->version >= RX_DEBUGI_VERSION_W_WAITERS) {
6563             *supportedValues |= RX_SERVER_DEBUG_WAITER_CNT;
6564         }
6565         if (stat->version >= RX_DEBUGI_VERSION_W_IDLETHREADS) {
6566             *supportedValues |= RX_SERVER_DEBUG_IDLE_THREADS;
6567         }
6568         if (stat->version >= RX_DEBUGI_VERSION_W_NEWPACKETTYPES) {
6569             *supportedValues |= RX_SERVER_DEBUG_NEW_PACKETS;
6570         }
6571         if (stat->version >= RX_DEBUGI_VERSION_W_GETPEER) {
6572             *supportedValues |= RX_SERVER_DEBUG_ALL_PEER;
6573         }
6574         if (stat->version >= RX_DEBUGI_VERSION_W_WAITED) {
6575             *supportedValues |= RX_SERVER_DEBUG_WAITED_CNT;
6576         }
6577
6578         stat->nFreePackets = ntohl(stat->nFreePackets);
6579         stat->packetReclaims = ntohl(stat->packetReclaims);
6580         stat->callsExecuted = ntohl(stat->callsExecuted);
6581         stat->nWaiting = ntohl(stat->nWaiting);
6582         stat->idleThreads = ntohl(stat->idleThreads);
6583     }
6584
6585     return rc;
6586 }
6587
6588 afs_int32
6589 rx_GetServerStats(osi_socket socket, afs_uint32 remoteAddr,
6590                   afs_uint16 remotePort, struct rx_stats * stat,
6591                   afs_uint32 * supportedValues)
6592 {
6593     struct rx_debugIn in;
6594     afs_int32 *lp = (afs_int32 *) stat;
6595     int i;
6596     afs_int32 rc = 0;
6597
6598     /*
6599      * supportedValues is currently unused, but added to allow future
6600      * versioning of this function.
6601      */
6602
6603     *supportedValues = 0;
6604     in.type = htonl(RX_DEBUGI_RXSTATS);
6605     in.index = 0;
6606     memset(stat, 0, sizeof(*stat));
6607
6608     rc = MakeDebugCall(socket, remoteAddr, remotePort, RX_PACKET_TYPE_DEBUG,
6609                        &in, sizeof(in), stat, sizeof(*stat));
6610
6611     if (rc >= 0) {
6612
6613         /*
6614          * Do net to host conversion here
6615          */
6616
6617         for (i = 0; i < sizeof(*stat) / sizeof(afs_int32); i++, lp++) {
6618             *lp = ntohl(*lp);
6619         }
6620     }
6621
6622     return rc;
6623 }
6624
6625 afs_int32
6626 rx_GetServerVersion(osi_socket socket, afs_uint32 remoteAddr,
6627                     afs_uint16 remotePort, size_t version_length,
6628                     char *version)
6629 {
6630     char a[1] = { 0 };
6631     return MakeDebugCall(socket, remoteAddr, remotePort,
6632                          RX_PACKET_TYPE_VERSION, a, 1, version,
6633                          version_length);
6634 }
6635
6636 afs_int32
6637 rx_GetServerConnections(osi_socket socket, afs_uint32 remoteAddr,
6638                         afs_uint16 remotePort, afs_int32 * nextConnection,
6639                         int allConnections, afs_uint32 debugSupportedValues,
6640                         struct rx_debugConn * conn,
6641                         afs_uint32 * supportedValues)
6642 {
6643     struct rx_debugIn in;
6644     afs_int32 rc = 0;
6645     int i;
6646
6647     /*
6648      * supportedValues is currently unused, but added to allow future
6649      * versioning of this function.
6650      */
6651
6652     *supportedValues = 0;
6653     if (allConnections) {
6654         in.type = htonl(RX_DEBUGI_GETALLCONN);
6655     } else {
6656         in.type = htonl(RX_DEBUGI_GETCONN);
6657     }
6658     in.index = htonl(*nextConnection);
6659     memset(conn, 0, sizeof(*conn));
6660
6661     rc = MakeDebugCall(socket, remoteAddr, remotePort, RX_PACKET_TYPE_DEBUG,
6662                        &in, sizeof(in), conn, sizeof(*conn));
6663
6664     if (rc >= 0) {
6665         *nextConnection += 1;
6666
6667         /*
6668          * Convert old connection format to new structure.
6669          */
6670
6671         if (debugSupportedValues & RX_SERVER_DEBUG_OLD_CONN) {
6672             struct rx_debugConn_vL *vL = (struct rx_debugConn_vL *)conn;
6673 #define MOVEvL(a) (conn->a = vL->a)
6674
6675             /* any old or unrecognized version... */
6676             for (i = 0; i < RX_MAXCALLS; i++) {
6677                 MOVEvL(callState[i]);
6678                 MOVEvL(callMode[i]);
6679                 MOVEvL(callFlags[i]);
6680                 MOVEvL(callOther[i]);
6681             }
6682             if (debugSupportedValues & RX_SERVER_DEBUG_SEC_STATS) {
6683                 MOVEvL(secStats.type);
6684                 MOVEvL(secStats.level);
6685                 MOVEvL(secStats.flags);
6686                 MOVEvL(secStats.expires);
6687                 MOVEvL(secStats.packetsReceived);
6688                 MOVEvL(secStats.packetsSent);
6689                 MOVEvL(secStats.bytesReceived);
6690                 MOVEvL(secStats.bytesSent);
6691             }
6692         }
6693
6694         /*
6695          * Do net to host conversion here
6696          * NOTE:
6697          *    I don't convert host or port since we are most likely
6698          *    going to want these in NBO.
6699          */
6700         conn->cid = ntohl(conn->cid);
6701         conn->serial = ntohl(conn->serial);
6702         for (i = 0; i < RX_MAXCALLS; i++) {
6703             conn->callNumber[i] = ntohl(conn->callNumber[i]);
6704         }
6705         conn->error = ntohl(conn->error);
6706         conn->secStats.flags = ntohl(conn->secStats.flags);
6707         conn->secStats.expires = ntohl(conn->secStats.expires);
6708         conn->secStats.packetsReceived =
6709             ntohl(conn->secStats.packetsReceived);
6710         conn->secStats.packetsSent = ntohl(conn->secStats.packetsSent);
6711         conn->secStats.bytesReceived = ntohl(conn->secStats.bytesReceived);
6712         conn->secStats.bytesSent = ntohl(conn->secStats.bytesSent);
6713         conn->epoch = ntohl(conn->epoch);
6714         conn->natMTU = ntohl(conn->natMTU);
6715     }
6716
6717     return rc;
6718 }
6719
6720 afs_int32
6721 rx_GetServerPeers(osi_socket socket, afs_uint32 remoteAddr,
6722                   afs_uint16 remotePort, afs_int32 * nextPeer,
6723                   afs_uint32 debugSupportedValues, struct rx_debugPeer * peer,
6724                   afs_uint32 * supportedValues)
6725 {
6726     struct rx_debugIn in;
6727     afs_int32 rc = 0;
6728
6729     /*
6730      * supportedValues is currently unused, but added to allow future
6731      * versioning of this function.
6732      */
6733
6734     *supportedValues = 0;
6735     in.type = htonl(RX_DEBUGI_GETPEER);
6736     in.index = htonl(*nextPeer);
6737     memset(peer, 0, sizeof(*peer));
6738
6739     rc = MakeDebugCall(socket, remoteAddr, remotePort, RX_PACKET_TYPE_DEBUG,
6740                        &in, sizeof(in), peer, sizeof(*peer));
6741
6742     if (rc >= 0) {
6743         *nextPeer += 1;
6744
6745         /*
6746          * Do net to host conversion here
6747          * NOTE:
6748          *    I don't convert host or port since we are most likely
6749          *    going to want these in NBO.
6750          */
6751         peer->ifMTU = ntohs(peer->ifMTU);
6752         peer->idleWhen = ntohl(peer->idleWhen);
6753         peer->refCount = ntohs(peer->refCount);
6754         peer->burstWait.sec = ntohl(peer->burstWait.sec);
6755         peer->burstWait.usec = ntohl(peer->burstWait.usec);
6756         peer->rtt = ntohl(peer->rtt);
6757         peer->rtt_dev = ntohl(peer->rtt_dev);
6758         peer->timeout.sec = ntohl(peer->timeout.sec);
6759         peer->timeout.usec = ntohl(peer->timeout.usec);
6760         peer->nSent = ntohl(peer->nSent);
6761         peer->reSends = ntohl(peer->reSends);
6762         peer->inPacketSkew = ntohl(peer->inPacketSkew);
6763         peer->outPacketSkew = ntohl(peer->outPacketSkew);
6764         peer->rateFlag = ntohl(peer->rateFlag);
6765         peer->natMTU = ntohs(peer->natMTU);
6766         peer->maxMTU = ntohs(peer->maxMTU);
6767         peer->maxDgramPackets = ntohs(peer->maxDgramPackets);
6768         peer->ifDgramPackets = ntohs(peer->ifDgramPackets);
6769         peer->MTU = ntohs(peer->MTU);
6770         peer->cwind = ntohs(peer->cwind);
6771         peer->nDgramPackets = ntohs(peer->nDgramPackets);
6772         peer->congestSeq = ntohs(peer->congestSeq);
6773         peer->bytesSent.high = ntohl(peer->bytesSent.high);
6774         peer->bytesSent.low = ntohl(peer->bytesSent.low);
6775         peer->bytesReceived.high = ntohl(peer->bytesReceived.high);
6776         peer->bytesReceived.low = ntohl(peer->bytesReceived.low);
6777     }
6778
6779     return rc;
6780 }
6781 #endif /* RXDEBUG */
6782
6783 void
6784 shutdown_rx(void)
6785 {
6786     struct rx_serverQueueEntry *np;
6787     register int i, j;
6788 #ifndef KERNEL
6789     register struct rx_call *call;
6790     register struct rx_serverQueueEntry *sq;
6791 #endif /* KERNEL */
6792
6793     LOCK_RX_INIT;
6794     if (rxinit_status == 1) {
6795         UNLOCK_RX_INIT;
6796         return;                 /* Already shutdown. */
6797     }
6798 #ifndef KERNEL
6799     rx_port = 0;
6800 #ifndef AFS_PTHREAD_ENV
6801     FD_ZERO(&rx_selectMask);
6802 #endif /* AFS_PTHREAD_ENV */
6803     rxi_dataQuota = RX_MAX_QUOTA;
6804 #ifndef AFS_PTHREAD_ENV
6805     rxi_StopListener();
6806 #endif /* AFS_PTHREAD_ENV */
6807     shutdown_rxevent();
6808     rx_SetEpoch(0);
6809 #ifndef AFS_PTHREAD_ENV
6810 #ifndef AFS_USE_GETTIMEOFDAY
6811     clock_UnInit();
6812 #endif /* AFS_USE_GETTIMEOFDAY */
6813 #endif /* AFS_PTHREAD_ENV */
6814
6815     while (!queue_IsEmpty(&rx_freeCallQueue)) {
6816         call = queue_First(&rx_freeCallQueue, rx_call);
6817         queue_Remove(call);
6818         rxi_Free(call, sizeof(struct rx_call));
6819     }
6820
6821     while (!queue_IsEmpty(&rx_idleServerQueue)) {
6822         sq = queue_First(&rx_idleServerQueue, rx_serverQueueEntry);
6823         queue_Remove(sq);
6824     }
6825 #endif /* KERNEL */
6826
6827     {
6828         struct rx_peer **peer_ptr, **peer_end;
6829         for (peer_ptr = &rx_peerHashTable[0], peer_end =
6830              &rx_peerHashTable[rx_hashTableSize]; peer_ptr < peer_end;
6831              peer_ptr++) {
6832             struct rx_peer *peer, *next;
6833             for (peer = *peer_ptr; peer; peer = next) {
6834                 rx_interface_stat_p rpc_stat, nrpc_stat;
6835                 size_t space;
6836                 for (queue_Scan
6837                      (&peer->rpcStats, rpc_stat, nrpc_stat,
6838                       rx_interface_stat)) {
6839                     unsigned int num_funcs;
6840                     if (!rpc_stat)
6841                         break;
6842                     queue_Remove(&rpc_stat->queue_header);
6843                     queue_Remove(&rpc_stat->all_peers);
6844                     num_funcs = rpc_stat->stats[0].func_total;
6845                     space =
6846                         sizeof(rx_interface_stat_t) +
6847                         rpc_stat->stats[0].func_total *
6848                         sizeof(rx_function_entry_v1_t);
6849
6850                     rxi_Free(rpc_stat, space);
6851                     MUTEX_ENTER(&rx_rpc_stats);
6852                     rxi_rpc_peer_stat_cnt -= num_funcs;
6853                     MUTEX_EXIT(&rx_rpc_stats);
6854                 }
6855                 next = peer->next;
6856                 rxi_FreePeer(peer);
6857                 rx_MutexDecrement(rx_stats.nPeerStructs, rx_stats_mutex);
6858             }
6859         }
6860     }
6861     for (i = 0; i < RX_MAX_SERVICES; i++) {
6862         if (rx_services[i])
6863             rxi_Free(rx_services[i], sizeof(*rx_services[i]));
6864     }
6865     for (i = 0; i < rx_hashTableSize; i++) {
6866         register struct rx_connection *tc, *ntc;
6867         MUTEX_ENTER(&rx_connHashTable_lock);
6868         for (tc = rx_connHashTable[i]; tc; tc = ntc) {
6869             ntc = tc->next;
6870             for (j = 0; j < RX_MAXCALLS; j++) {
6871                 if (tc->call[j]) {
6872                     rxi_Free(tc->call[j], sizeof(*tc->call[j]));
6873                 }
6874             }
6875             rxi_Free(tc, sizeof(*tc));
6876         }
6877         MUTEX_EXIT(&rx_connHashTable_lock);
6878     }
6879
6880     MUTEX_ENTER(&freeSQEList_lock);
6881
6882     while ((np = rx_FreeSQEList)) {
6883         rx_FreeSQEList = *(struct rx_serverQueueEntry **)np;
6884         MUTEX_DESTROY(&np->lock);
6885         rxi_Free(np, sizeof(*np));
6886     }
6887
6888     MUTEX_EXIT(&freeSQEList_lock);
6889     MUTEX_DESTROY(&freeSQEList_lock);
6890     MUTEX_DESTROY(&rx_freeCallQueue_lock);
6891     MUTEX_DESTROY(&rx_connHashTable_lock);
6892     MUTEX_DESTROY(&rx_peerHashTable_lock);
6893     MUTEX_DESTROY(&rx_serverPool_lock);
6894
6895     osi_Free(rx_connHashTable,
6896              rx_hashTableSize * sizeof(struct rx_connection *));
6897     osi_Free(rx_peerHashTable, rx_hashTableSize * sizeof(struct rx_peer *));
6898
6899     UNPIN(rx_connHashTable,
6900           rx_hashTableSize * sizeof(struct rx_connection *));
6901     UNPIN(rx_peerHashTable, rx_hashTableSize * sizeof(struct rx_peer *));
6902
6903     rxi_FreeAllPackets();
6904
6905     MUTEX_ENTER(&rx_stats_mutex);
6906     rxi_dataQuota = RX_MAX_QUOTA;
6907     rxi_availProcs = rxi_totalMin = rxi_minDeficit = 0;
6908     MUTEX_EXIT(&rx_stats_mutex);
6909
6910     rxinit_status = 1;
6911     UNLOCK_RX_INIT;
6912 }
6913
6914 #ifdef RX_ENABLE_LOCKS
6915 void
6916 osirx_AssertMine(afs_kmutex_t * lockaddr, char *msg)
6917 {
6918     if (!MUTEX_ISMINE(lockaddr))
6919         osi_Panic("Lock not held: %s", msg);
6920 }
6921 #endif /* RX_ENABLE_LOCKS */
6922
6923 #ifndef KERNEL
6924
6925 /*
6926  * Routines to implement connection specific data.
6927  */
6928
6929 int
6930 rx_KeyCreate(rx_destructor_t rtn)
6931 {
6932     int key;
6933     MUTEX_ENTER(&rxi_keyCreate_lock);
6934     key = rxi_keyCreate_counter++;
6935     rxi_keyCreate_destructor = (rx_destructor_t *)
6936         realloc((void *)rxi_keyCreate_destructor,
6937                 (key + 1) * sizeof(rx_destructor_t));
6938     rxi_keyCreate_destructor[key] = rtn;
6939     MUTEX_EXIT(&rxi_keyCreate_lock);
6940     return key;
6941 }
6942
6943 void
6944 rx_SetSpecific(struct rx_connection *conn, int key, void *ptr)
6945 {
6946     int i;
6947     MUTEX_ENTER(&conn->conn_data_lock);
6948     if (!conn->specific) {
6949         conn->specific = (void **)malloc((key + 1) * sizeof(void *));
6950         for (i = 0; i < key; i++)
6951             conn->specific[i] = NULL;
6952         conn->nSpecific = key + 1;
6953         conn->specific[key] = ptr;
6954     } else if (key >= conn->nSpecific) {
6955         conn->specific = (void **)
6956             realloc(conn->specific, (key + 1) * sizeof(void *));
6957         for (i = conn->nSpecific; i < key; i++)
6958             conn->specific[i] = NULL;
6959         conn->nSpecific = key + 1;
6960         conn->specific[key] = ptr;
6961     } else {
6962         if (conn->specific[key] && rxi_keyCreate_destructor[key])
6963             (*rxi_keyCreate_destructor[key]) (conn->specific[key]);
6964         conn->specific[key] = ptr;
6965     }
6966     MUTEX_EXIT(&conn->conn_data_lock);
6967 }
6968
6969 void *
6970 rx_GetSpecific(struct rx_connection *conn, int key)
6971 {
6972     void *ptr;
6973     MUTEX_ENTER(&conn->conn_data_lock);
6974     if (key >= conn->nSpecific)
6975         ptr = NULL;
6976     else
6977         ptr = conn->specific[key];
6978     MUTEX_EXIT(&conn->conn_data_lock);
6979     return ptr;
6980 }
6981
6982 #endif /* !KERNEL */
6983
6984 /*
6985  * processStats is a queue used to store the statistics for the local
6986  * process.  Its contents are similar to the contents of the rpcStats
6987  * queue on a rx_peer structure, but the actual data stored within
6988  * this queue contains totals across the lifetime of the process (assuming
6989  * the stats have not been reset) - unlike the per peer structures
6990  * which can come and go based upon the peer lifetime.
6991  */
6992
6993 static struct rx_queue processStats = { &processStats, &processStats };
6994
6995 /*
6996  * peerStats is a queue used to store the statistics for all peer structs.
6997  * Its contents are the union of all the peer rpcStats queues.
6998  */
6999
7000 static struct rx_queue peerStats = { &peerStats, &peerStats };
7001
7002 /*
7003  * rxi_monitor_processStats is used to turn process wide stat collection
7004  * on and off
7005  */
7006
7007 static int rxi_monitor_processStats = 0;
7008
7009 /*
7010  * rxi_monitor_peerStats is used to turn per peer stat collection on and off
7011  */
7012
7013 static int rxi_monitor_peerStats = 0;
7014
7015 /*
7016  * rxi_AddRpcStat - given all of the information for a particular rpc
7017  * call, create (if needed) and update the stat totals for the rpc.
7018  *
7019  * PARAMETERS
7020  *
7021  * IN stats - the queue of stats that will be updated with the new value
7022  *
7023  * IN rxInterface - a unique number that identifies the rpc interface
7024  *
7025  * IN currentFunc - the index of the function being invoked
7026  *
7027  * IN totalFunc - the total number of functions in this interface
7028  *
7029  * IN queueTime - the amount of time this function waited for a thread
7030  *
7031  * IN execTime - the amount of time this function invocation took to execute
7032  *
7033  * IN bytesSent - the number bytes sent by this invocation
7034  *
7035  * IN bytesRcvd - the number bytes received by this invocation
7036  *
7037  * IN isServer - if true, this invocation was made to a server
7038  *
7039  * IN remoteHost - the ip address of the remote host
7040  *
7041  * IN remotePort - the port of the remote host
7042  *
7043  * IN addToPeerList - if != 0, add newly created stat to the global peer list
7044  *
7045  * INOUT counter - if a new stats structure is allocated, the counter will
7046  * be updated with the new number of allocated stat structures
7047  *
7048  * RETURN CODES
7049  *
7050  * Returns void.
7051  */
7052
7053 static int
7054 rxi_AddRpcStat(struct rx_queue *stats, afs_uint32 rxInterface,
7055                afs_uint32 currentFunc, afs_uint32 totalFunc,
7056                struct clock *queueTime, struct clock *execTime,
7057                afs_hyper_t * bytesSent, afs_hyper_t * bytesRcvd, int isServer,
7058                afs_uint32 remoteHost, afs_uint32 remotePort,
7059                int addToPeerList, unsigned int *counter)
7060 {
7061     int rc = 0;
7062     rx_interface_stat_p rpc_stat, nrpc_stat;
7063
7064     /*
7065      * See if there's already a structure for this interface
7066      */
7067
7068     for (queue_Scan(stats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7069         if ((rpc_stat->stats[0].interfaceId == rxInterface)
7070             && (rpc_stat->stats[0].remote_is_server == isServer))
7071             break;
7072     }
7073
7074     /*
7075      * Didn't find a match so allocate a new structure and add it to the
7076      * queue.
7077      */
7078
7079     if (queue_IsEnd(stats, rpc_stat) || (rpc_stat == NULL)
7080         || (rpc_stat->stats[0].interfaceId != rxInterface)
7081         || (rpc_stat->stats[0].remote_is_server != isServer)) {
7082         int i;
7083         size_t space;
7084
7085         space =
7086             sizeof(rx_interface_stat_t) +
7087             totalFunc * sizeof(rx_function_entry_v1_t);
7088
7089         rpc_stat = (rx_interface_stat_p) rxi_Alloc(space);
7090         if (rpc_stat == NULL) {
7091             rc = 1;
7092             goto fail;
7093         }
7094         *counter += totalFunc;
7095         for (i = 0; i < totalFunc; i++) {
7096             rpc_stat->stats[i].remote_peer = remoteHost;
7097             rpc_stat->stats[i].remote_port = remotePort;
7098             rpc_stat->stats[i].remote_is_server = isServer;
7099             rpc_stat->stats[i].interfaceId = rxInterface;
7100             rpc_stat->stats[i].func_total = totalFunc;
7101             rpc_stat->stats[i].func_index = i;
7102             hzero(rpc_stat->stats[i].invocations);
7103             hzero(rpc_stat->stats[i].bytes_sent);
7104             hzero(rpc_stat->stats[i].bytes_rcvd);
7105             rpc_stat->stats[i].queue_time_sum.sec = 0;
7106             rpc_stat->stats[i].queue_time_sum.usec = 0;
7107             rpc_stat->stats[i].queue_time_sum_sqr.sec = 0;
7108             rpc_stat->stats[i].queue_time_sum_sqr.usec = 0;
7109             rpc_stat->stats[i].queue_time_min.sec = 9999999;
7110             rpc_stat->stats[i].queue_time_min.usec = 9999999;
7111             rpc_stat->stats[i].queue_time_max.sec = 0;
7112             rpc_stat->stats[i].queue_time_max.usec = 0;
7113             rpc_stat->stats[i].execution_time_sum.sec = 0;
7114             rpc_stat->stats[i].execution_time_sum.usec = 0;
7115             rpc_stat->stats[i].execution_time_sum_sqr.sec = 0;
7116             rpc_stat->stats[i].execution_time_sum_sqr.usec = 0;
7117             rpc_stat->stats[i].execution_time_min.sec = 9999999;
7118             rpc_stat->stats[i].execution_time_min.usec = 9999999;
7119             rpc_stat->stats[i].execution_time_max.sec = 0;
7120             rpc_stat->stats[i].execution_time_max.usec = 0;
7121         }
7122         queue_Prepend(stats, rpc_stat);
7123         if (addToPeerList) {
7124             queue_Prepend(&peerStats, &rpc_stat->all_peers);
7125         }
7126     }
7127
7128     /*
7129      * Increment the stats for this function
7130      */
7131
7132     hadd32(rpc_stat->stats[currentFunc].invocations, 1);
7133     hadd(rpc_stat->stats[currentFunc].bytes_sent, *bytesSent);
7134     hadd(rpc_stat->stats[currentFunc].bytes_rcvd, *bytesRcvd);
7135     clock_Add(&rpc_stat->stats[currentFunc].queue_time_sum, queueTime);
7136     clock_AddSq(&rpc_stat->stats[currentFunc].queue_time_sum_sqr, queueTime);
7137     if (clock_Lt(queueTime, &rpc_stat->stats[currentFunc].queue_time_min)) {
7138         rpc_stat->stats[currentFunc].queue_time_min = *queueTime;
7139     }
7140     if (clock_Gt(queueTime, &rpc_stat->stats[currentFunc].queue_time_max)) {
7141         rpc_stat->stats[currentFunc].queue_time_max = *queueTime;
7142     }
7143     clock_Add(&rpc_stat->stats[currentFunc].execution_time_sum, execTime);
7144     clock_AddSq(&rpc_stat->stats[currentFunc].execution_time_sum_sqr,
7145                 execTime);
7146     if (clock_Lt(execTime, &rpc_stat->stats[currentFunc].execution_time_min)) {
7147         rpc_stat->stats[currentFunc].execution_time_min = *execTime;
7148     }
7149     if (clock_Gt(execTime, &rpc_stat->stats[currentFunc].execution_time_max)) {
7150         rpc_stat->stats[currentFunc].execution_time_max = *execTime;
7151     }
7152
7153   fail:
7154     return rc;
7155 }
7156
7157 /*
7158  * rx_IncrementTimeAndCount - increment the times and count for a particular
7159  * rpc function.
7160  *
7161  * PARAMETERS
7162  *
7163  * IN peer - the peer who invoked the rpc
7164  *
7165  * IN rxInterface - a unique number that identifies the rpc interface
7166  *
7167  * IN currentFunc - the index of the function being invoked
7168  *
7169  * IN totalFunc - the total number of functions in this interface
7170  *
7171  * IN queueTime - the amount of time this function waited for a thread
7172  *
7173  * IN execTime - the amount of time this function invocation took to execute
7174  *
7175  * IN bytesSent - the number bytes sent by this invocation
7176  *
7177  * IN bytesRcvd - the number bytes received by this invocation
7178  *
7179  * IN isServer - if true, this invocation was made to a server
7180  *
7181  * RETURN CODES
7182  *
7183  * Returns void.
7184  */
7185
7186 void
7187 rx_IncrementTimeAndCount(struct rx_peer *peer, afs_uint32 rxInterface,
7188                          afs_uint32 currentFunc, afs_uint32 totalFunc,
7189                          struct clock *queueTime, struct clock *execTime,
7190                          afs_hyper_t * bytesSent, afs_hyper_t * bytesRcvd,
7191                          int isServer)
7192 {
7193
7194     if (!(rxi_monitor_peerStats || rxi_monitor_processStats))
7195         return;
7196
7197     MUTEX_ENTER(&rx_rpc_stats);
7198     MUTEX_ENTER(&peer->peer_lock);
7199
7200     if (rxi_monitor_peerStats) {
7201         rxi_AddRpcStat(&peer->rpcStats, rxInterface, currentFunc, totalFunc,
7202                        queueTime, execTime, bytesSent, bytesRcvd, isServer,
7203                        peer->host, peer->port, 1, &rxi_rpc_peer_stat_cnt);
7204     }
7205
7206     if (rxi_monitor_processStats) {
7207         rxi_AddRpcStat(&processStats, rxInterface, currentFunc, totalFunc,
7208                        queueTime, execTime, bytesSent, bytesRcvd, isServer,
7209                        0xffffffff, 0xffffffff, 0, &rxi_rpc_process_stat_cnt);
7210     }
7211
7212     MUTEX_EXIT(&peer->peer_lock);
7213     MUTEX_EXIT(&rx_rpc_stats);
7214
7215 }
7216
7217 /*
7218  * rx_MarshallProcessRPCStats - marshall an array of rpc statistics
7219  *
7220  * PARAMETERS
7221  *
7222  * IN callerVersion - the rpc stat version of the caller.
7223  *
7224  * IN count - the number of entries to marshall.
7225  *
7226  * IN stats - pointer to stats to be marshalled.
7227  *
7228  * OUT ptr - Where to store the marshalled data.
7229  *
7230  * RETURN CODES
7231  *
7232  * Returns void.
7233  */
7234 void
7235 rx_MarshallProcessRPCStats(afs_uint32 callerVersion, int count,
7236                            rx_function_entry_v1_t * stats, afs_uint32 ** ptrP)
7237 {
7238     int i;
7239     afs_uint32 *ptr;
7240
7241     /*
7242      * We only support the first version
7243      */
7244     for (ptr = *ptrP, i = 0; i < count; i++, stats++) {
7245         *(ptr++) = stats->remote_peer;
7246         *(ptr++) = stats->remote_port;
7247         *(ptr++) = stats->remote_is_server;
7248         *(ptr++) = stats->interfaceId;
7249         *(ptr++) = stats->func_total;
7250         *(ptr++) = stats->func_index;
7251         *(ptr++) = hgethi(stats->invocations);
7252         *(ptr++) = hgetlo(stats->invocations);
7253         *(ptr++) = hgethi(stats->bytes_sent);
7254         *(ptr++) = hgetlo(stats->bytes_sent);
7255         *(ptr++) = hgethi(stats->bytes_rcvd);
7256         *(ptr++) = hgetlo(stats->bytes_rcvd);
7257         *(ptr++) = stats->queue_time_sum.sec;
7258         *(ptr++) = stats->queue_time_sum.usec;
7259         *(ptr++) = stats->queue_time_sum_sqr.sec;
7260         *(ptr++) = stats->queue_time_sum_sqr.usec;
7261         *(ptr++) = stats->queue_time_min.sec;
7262         *(ptr++) = stats->queue_time_min.usec;
7263         *(ptr++) = stats->queue_time_max.sec;
7264         *(ptr++) = stats->queue_time_max.usec;
7265         *(ptr++) = stats->execution_time_sum.sec;
7266         *(ptr++) = stats->execution_time_sum.usec;
7267         *(ptr++) = stats->execution_time_sum_sqr.sec;
7268         *(ptr++) = stats->execution_time_sum_sqr.usec;
7269         *(ptr++) = stats->execution_time_min.sec;
7270         *(ptr++) = stats->execution_time_min.usec;
7271         *(ptr++) = stats->execution_time_max.sec;
7272         *(ptr++) = stats->execution_time_max.usec;
7273     }
7274     *ptrP = ptr;
7275 }
7276
7277 /*
7278  * rx_RetrieveProcessRPCStats - retrieve all of the rpc statistics for
7279  * this process
7280  *
7281  * PARAMETERS
7282  *
7283  * IN callerVersion - the rpc stat version of the caller
7284  *
7285  * OUT myVersion - the rpc stat version of this function
7286  *
7287  * OUT clock_sec - local time seconds
7288  *
7289  * OUT clock_usec - local time microseconds
7290  *
7291  * OUT allocSize - the number of bytes allocated to contain stats
7292  *
7293  * OUT statCount - the number stats retrieved from this process.
7294  *
7295  * OUT stats - the actual stats retrieved from this process.
7296  *
7297  * RETURN CODES
7298  *
7299  * Returns void.  If successful, stats will != NULL.
7300  */
7301
7302 int
7303 rx_RetrieveProcessRPCStats(afs_uint32 callerVersion, afs_uint32 * myVersion,
7304                            afs_uint32 * clock_sec, afs_uint32 * clock_usec,
7305                            size_t * allocSize, afs_uint32 * statCount,
7306                            afs_uint32 ** stats)
7307 {
7308     size_t space = 0;
7309     afs_uint32 *ptr;
7310     struct clock now;
7311     int rc = 0;
7312
7313     *stats = 0;
7314     *allocSize = 0;
7315     *statCount = 0;
7316     *myVersion = RX_STATS_RETRIEVAL_VERSION;
7317
7318     /*
7319      * Check to see if stats are enabled
7320      */
7321
7322     MUTEX_ENTER(&rx_rpc_stats);
7323     if (!rxi_monitor_processStats) {
7324         MUTEX_EXIT(&rx_rpc_stats);
7325         return rc;
7326     }
7327
7328     clock_GetTime(&now);
7329     *clock_sec = now.sec;
7330     *clock_usec = now.usec;
7331
7332     /*
7333      * Allocate the space based upon the caller version
7334      *
7335      * If the client is at an older version than we are,
7336      * we return the statistic data in the older data format, but
7337      * we still return our version number so the client knows we
7338      * are maintaining more data than it can retrieve.
7339      */
7340
7341     if (callerVersion >= RX_STATS_RETRIEVAL_FIRST_EDITION) {
7342         space = rxi_rpc_process_stat_cnt * sizeof(rx_function_entry_v1_t);
7343         *statCount = rxi_rpc_process_stat_cnt;
7344     } else {
7345         /*
7346          * This can't happen yet, but in the future version changes
7347          * can be handled by adding additional code here
7348          */
7349     }
7350
7351     if (space > (size_t) 0) {
7352         *allocSize = space;
7353         ptr = *stats = (afs_uint32 *) rxi_Alloc(space);
7354
7355         if (ptr != NULL) {
7356             rx_interface_stat_p rpc_stat, nrpc_stat;
7357
7358
7359             for (queue_Scan
7360                  (&processStats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7361                 /*
7362                  * Copy the data based upon the caller version
7363                  */
7364                 rx_MarshallProcessRPCStats(callerVersion,
7365                                            rpc_stat->stats[0].func_total,
7366                                            rpc_stat->stats, &ptr);
7367             }
7368         } else {
7369             rc = ENOMEM;
7370         }
7371     }
7372     MUTEX_EXIT(&rx_rpc_stats);
7373     return rc;
7374 }
7375
7376 /*
7377  * rx_RetrievePeerRPCStats - retrieve all of the rpc statistics for the peers
7378  *
7379  * PARAMETERS
7380  *
7381  * IN callerVersion - the rpc stat version of the caller
7382  *
7383  * OUT myVersion - the rpc stat version of this function
7384  *
7385  * OUT clock_sec - local time seconds
7386  *
7387  * OUT clock_usec - local time microseconds
7388  *
7389  * OUT allocSize - the number of bytes allocated to contain stats
7390  *
7391  * OUT statCount - the number of stats retrieved from the individual
7392  * peer structures.
7393  *
7394  * OUT stats - the actual stats retrieved from the individual peer structures.
7395  *
7396  * RETURN CODES
7397  *
7398  * Returns void.  If successful, stats will != NULL.
7399  */
7400
7401 int
7402 rx_RetrievePeerRPCStats(afs_uint32 callerVersion, afs_uint32 * myVersion,
7403                         afs_uint32 * clock_sec, afs_uint32 * clock_usec,
7404                         size_t * allocSize, afs_uint32 * statCount,
7405                         afs_uint32 ** stats)
7406 {
7407     size_t space = 0;
7408     afs_uint32 *ptr;
7409     struct clock now;
7410     int rc = 0;
7411
7412     *stats = 0;
7413     *statCount = 0;
7414     *allocSize = 0;
7415     *myVersion = RX_STATS_RETRIEVAL_VERSION;
7416
7417     /*
7418      * Check to see if stats are enabled
7419      */
7420
7421     MUTEX_ENTER(&rx_rpc_stats);
7422     if (!rxi_monitor_peerStats) {
7423         MUTEX_EXIT(&rx_rpc_stats);
7424         return rc;
7425     }
7426
7427     clock_GetTime(&now);
7428     *clock_sec = now.sec;
7429     *clock_usec = now.usec;
7430
7431     /*
7432      * Allocate the space based upon the caller version
7433      *
7434      * If the client is at an older version than we are,
7435      * we return the statistic data in the older data format, but
7436      * we still return our version number so the client knows we
7437      * are maintaining more data than it can retrieve.
7438      */
7439
7440     if (callerVersion >= RX_STATS_RETRIEVAL_FIRST_EDITION) {
7441         space = rxi_rpc_peer_stat_cnt * sizeof(rx_function_entry_v1_t);
7442         *statCount = rxi_rpc_peer_stat_cnt;
7443     } else {
7444         /*
7445          * This can't happen yet, but in the future version changes
7446          * can be handled by adding additional code here
7447          */
7448     }
7449
7450     if (space > (size_t) 0) {
7451         *allocSize = space;
7452         ptr = *stats = (afs_uint32 *) rxi_Alloc(space);
7453
7454         if (ptr != NULL) {
7455             rx_interface_stat_p rpc_stat, nrpc_stat;
7456             char *fix_offset;
7457
7458             for (queue_Scan
7459                  (&peerStats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7460                 /*
7461                  * We have to fix the offset of rpc_stat since we are
7462                  * keeping this structure on two rx_queues.  The rx_queue
7463                  * package assumes that the rx_queue member is the first
7464                  * member of the structure.  That is, rx_queue assumes that
7465                  * any one item is only on one queue at a time.  We are
7466                  * breaking that assumption and so we have to do a little
7467                  * math to fix our pointers.
7468                  */
7469
7470                 fix_offset = (char *)rpc_stat;
7471                 fix_offset -= offsetof(rx_interface_stat_t, all_peers);
7472                 rpc_stat = (rx_interface_stat_p) fix_offset;
7473
7474                 /*
7475                  * Copy the data based upon the caller version
7476                  */
7477                 rx_MarshallProcessRPCStats(callerVersion,
7478                                            rpc_stat->stats[0].func_total,
7479                                            rpc_stat->stats, &ptr);
7480             }
7481         } else {
7482             rc = ENOMEM;
7483         }
7484     }
7485     MUTEX_EXIT(&rx_rpc_stats);
7486     return rc;
7487 }
7488
7489 /*
7490  * rx_FreeRPCStats - free memory allocated by
7491  *                   rx_RetrieveProcessRPCStats and rx_RetrievePeerRPCStats
7492  *
7493  * PARAMETERS
7494  *
7495  * IN stats - stats previously returned by rx_RetrieveProcessRPCStats or
7496  * rx_RetrievePeerRPCStats
7497  *
7498  * IN allocSize - the number of bytes in stats.
7499  *
7500  * RETURN CODES
7501  *
7502  * Returns void.
7503  */
7504
7505 void
7506 rx_FreeRPCStats(afs_uint32 * stats, size_t allocSize)
7507 {
7508     rxi_Free(stats, allocSize);
7509 }
7510
7511 /*
7512  * rx_queryProcessRPCStats - see if process rpc stat collection is
7513  * currently enabled.
7514  *
7515  * PARAMETERS
7516  *
7517  * RETURN CODES
7518  *
7519  * Returns 0 if stats are not enabled != 0 otherwise
7520  */
7521
7522 int
7523 rx_queryProcessRPCStats(void)
7524 {
7525     int rc;
7526     MUTEX_ENTER(&rx_rpc_stats);
7527     rc = rxi_monitor_processStats;
7528     MUTEX_EXIT(&rx_rpc_stats);
7529     return rc;
7530 }
7531
7532 /*
7533  * rx_queryPeerRPCStats - see if peer stat collection is currently enabled.
7534  *
7535  * PARAMETERS
7536  *
7537  * RETURN CODES
7538  *
7539  * Returns 0 if stats are not enabled != 0 otherwise
7540  */
7541
7542 int
7543 rx_queryPeerRPCStats(void)
7544 {
7545     int rc;
7546     MUTEX_ENTER(&rx_rpc_stats);
7547     rc = rxi_monitor_peerStats;
7548     MUTEX_EXIT(&rx_rpc_stats);
7549     return rc;
7550 }
7551
7552 /*
7553  * rx_enableProcessRPCStats - begin rpc stat collection for entire process
7554  *
7555  * PARAMETERS
7556  *
7557  * RETURN CODES
7558  *
7559  * Returns void.
7560  */
7561
7562 void
7563 rx_enableProcessRPCStats(void)
7564 {
7565     MUTEX_ENTER(&rx_rpc_stats);
7566     rx_enable_stats = 1;
7567     rxi_monitor_processStats = 1;
7568     MUTEX_EXIT(&rx_rpc_stats);
7569 }
7570
7571 /*
7572  * rx_enablePeerRPCStats - begin rpc stat collection per peer structure
7573  *
7574  * PARAMETERS
7575  *
7576  * RETURN CODES
7577  *
7578  * Returns void.
7579  */
7580
7581 void
7582 rx_enablePeerRPCStats(void)
7583 {
7584     MUTEX_ENTER(&rx_rpc_stats);
7585     rx_enable_stats = 1;
7586     rxi_monitor_peerStats = 1;
7587     MUTEX_EXIT(&rx_rpc_stats);
7588 }
7589
7590 /*
7591  * rx_disableProcessRPCStats - stop rpc stat collection for entire process
7592  *
7593  * PARAMETERS
7594  *
7595  * RETURN CODES
7596  *
7597  * Returns void.
7598  */
7599
7600 void
7601 rx_disableProcessRPCStats(void)
7602 {
7603     rx_interface_stat_p rpc_stat, nrpc_stat;
7604     size_t space;
7605
7606     MUTEX_ENTER(&rx_rpc_stats);
7607
7608     /*
7609      * Turn off process statistics and if peer stats is also off, turn
7610      * off everything
7611      */
7612
7613     rxi_monitor_processStats = 0;
7614     if (rxi_monitor_peerStats == 0) {
7615         rx_enable_stats = 0;
7616     }
7617
7618     for (queue_Scan(&processStats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7619         unsigned int num_funcs = 0;
7620         if (!rpc_stat)
7621             break;
7622         queue_Remove(rpc_stat);
7623         num_funcs = rpc_stat->stats[0].func_total;
7624         space =
7625             sizeof(rx_interface_stat_t) +
7626             rpc_stat->stats[0].func_total * sizeof(rx_function_entry_v1_t);
7627
7628         rxi_Free(rpc_stat, space);
7629         rxi_rpc_process_stat_cnt -= num_funcs;
7630     }
7631     MUTEX_EXIT(&rx_rpc_stats);
7632 }
7633
7634 /*
7635  * rx_disablePeerRPCStats - stop rpc stat collection for peers
7636  *
7637  * PARAMETERS
7638  *
7639  * RETURN CODES
7640  *
7641  * Returns void.
7642  */
7643
7644 void
7645 rx_disablePeerRPCStats(void)
7646 {
7647     struct rx_peer **peer_ptr, **peer_end;
7648     int code;
7649
7650     MUTEX_ENTER(&rx_rpc_stats);
7651
7652     /*
7653      * Turn off peer statistics and if process stats is also off, turn
7654      * off everything
7655      */
7656
7657     rxi_monitor_peerStats = 0;
7658     if (rxi_monitor_processStats == 0) {
7659         rx_enable_stats = 0;
7660     }
7661
7662     MUTEX_ENTER(&rx_peerHashTable_lock);
7663     for (peer_ptr = &rx_peerHashTable[0], peer_end =
7664          &rx_peerHashTable[rx_hashTableSize]; peer_ptr < peer_end;
7665          peer_ptr++) {
7666         struct rx_peer *peer, *next, *prev;
7667         for (prev = peer = *peer_ptr; peer; peer = next) {
7668             next = peer->next;
7669             code = MUTEX_TRYENTER(&peer->peer_lock);
7670             if (code) {
7671                 rx_interface_stat_p rpc_stat, nrpc_stat;
7672                 size_t space;
7673                 for (queue_Scan
7674                      (&peer->rpcStats, rpc_stat, nrpc_stat,
7675                       rx_interface_stat)) {
7676                     unsigned int num_funcs = 0;
7677                     if (!rpc_stat)
7678                         break;
7679                     queue_Remove(&rpc_stat->queue_header);
7680                     queue_Remove(&rpc_stat->all_peers);
7681                     num_funcs = rpc_stat->stats[0].func_total;
7682                     space =
7683                         sizeof(rx_interface_stat_t) +
7684                         rpc_stat->stats[0].func_total *
7685                         sizeof(rx_function_entry_v1_t);
7686
7687                     rxi_Free(rpc_stat, space);
7688                     rxi_rpc_peer_stat_cnt -= num_funcs;
7689                 }
7690                 MUTEX_EXIT(&peer->peer_lock);
7691                 if (prev == *peer_ptr) {
7692                     *peer_ptr = next;
7693                     prev = next;
7694                 } else
7695                     prev->next = next;
7696             } else {
7697                 prev = peer;
7698             }
7699         }
7700     }
7701     MUTEX_EXIT(&rx_peerHashTable_lock);
7702     MUTEX_EXIT(&rx_rpc_stats);
7703 }
7704
7705 /*
7706  * rx_clearProcessRPCStats - clear the contents of the rpc stats according
7707  * to clearFlag
7708  *
7709  * PARAMETERS
7710  *
7711  * IN clearFlag - flag indicating which stats to clear
7712  *
7713  * RETURN CODES
7714  *
7715  * Returns void.
7716  */
7717
7718 void
7719 rx_clearProcessRPCStats(afs_uint32 clearFlag)
7720 {
7721     rx_interface_stat_p rpc_stat, nrpc_stat;
7722
7723     MUTEX_ENTER(&rx_rpc_stats);
7724
7725     for (queue_Scan(&processStats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7726         unsigned int num_funcs = 0, i;
7727         num_funcs = rpc_stat->stats[0].func_total;
7728         for (i = 0; i < num_funcs; i++) {
7729             if (clearFlag & AFS_RX_STATS_CLEAR_INVOCATIONS) {
7730                 hzero(rpc_stat->stats[i].invocations);
7731             }
7732             if (clearFlag & AFS_RX_STATS_CLEAR_BYTES_SENT) {
7733                 hzero(rpc_stat->stats[i].bytes_sent);
7734             }
7735             if (clearFlag & AFS_RX_STATS_CLEAR_BYTES_RCVD) {
7736                 hzero(rpc_stat->stats[i].bytes_rcvd);
7737             }
7738             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_SUM) {
7739                 rpc_stat->stats[i].queue_time_sum.sec = 0;
7740                 rpc_stat->stats[i].queue_time_sum.usec = 0;
7741             }
7742             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_SQUARE) {
7743                 rpc_stat->stats[i].queue_time_sum_sqr.sec = 0;
7744                 rpc_stat->stats[i].queue_time_sum_sqr.usec = 0;
7745             }
7746             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_MIN) {
7747                 rpc_stat->stats[i].queue_time_min.sec = 9999999;
7748                 rpc_stat->stats[i].queue_time_min.usec = 9999999;
7749             }
7750             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_MAX) {
7751                 rpc_stat->stats[i].queue_time_max.sec = 0;
7752                 rpc_stat->stats[i].queue_time_max.usec = 0;
7753             }
7754             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_SUM) {
7755                 rpc_stat->stats[i].execution_time_sum.sec = 0;
7756                 rpc_stat->stats[i].execution_time_sum.usec = 0;
7757             }
7758             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_SQUARE) {
7759                 rpc_stat->stats[i].execution_time_sum_sqr.sec = 0;
7760                 rpc_stat->stats[i].execution_time_sum_sqr.usec = 0;
7761             }
7762             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_MIN) {
7763                 rpc_stat->stats[i].execution_time_min.sec = 9999999;
7764                 rpc_stat->stats[i].execution_time_min.usec = 9999999;
7765             }
7766             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_MAX) {
7767                 rpc_stat->stats[i].execution_time_max.sec = 0;
7768                 rpc_stat->stats[i].execution_time_max.usec = 0;
7769             }
7770         }
7771     }
7772
7773     MUTEX_EXIT(&rx_rpc_stats);
7774 }
7775
7776 /*
7777  * rx_clearPeerRPCStats - clear the contents of the rpc stats according
7778  * to clearFlag
7779  *
7780  * PARAMETERS
7781  *
7782  * IN clearFlag - flag indicating which stats to clear
7783  *
7784  * RETURN CODES
7785  *
7786  * Returns void.
7787  */
7788
7789 void
7790 rx_clearPeerRPCStats(afs_uint32 clearFlag)
7791 {
7792     rx_interface_stat_p rpc_stat, nrpc_stat;
7793
7794     MUTEX_ENTER(&rx_rpc_stats);
7795
7796     for (queue_Scan(&peerStats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7797         unsigned int num_funcs = 0, i;
7798         char *fix_offset;
7799         /*
7800          * We have to fix the offset of rpc_stat since we are
7801          * keeping this structure on two rx_queues.  The rx_queue
7802          * package assumes that the rx_queue member is the first
7803          * member of the structure.  That is, rx_queue assumes that
7804          * any one item is only on one queue at a time.  We are
7805          * breaking that assumption and so we have to do a little
7806          * math to fix our pointers.
7807          */
7808
7809         fix_offset = (char *)rpc_stat;
7810         fix_offset -= offsetof(rx_interface_stat_t, all_peers);
7811         rpc_stat = (rx_interface_stat_p) fix_offset;
7812
7813         num_funcs = rpc_stat->stats[0].func_total;
7814         for (i = 0; i < num_funcs; i++) {
7815             if (clearFlag & AFS_RX_STATS_CLEAR_INVOCATIONS) {
7816                 hzero(rpc_stat->stats[i].invocations);
7817             }
7818             if (clearFlag & AFS_RX_STATS_CLEAR_BYTES_SENT) {
7819                 hzero(rpc_stat->stats[i].bytes_sent);
7820             }
7821             if (clearFlag & AFS_RX_STATS_CLEAR_BYTES_RCVD) {
7822                 hzero(rpc_stat->stats[i].bytes_rcvd);
7823             }
7824             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_SUM) {
7825                 rpc_stat->stats[i].queue_time_sum.sec = 0;
7826                 rpc_stat->stats[i].queue_time_sum.usec = 0;
7827             }
7828             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_SQUARE) {
7829                 rpc_stat->stats[i].queue_time_sum_sqr.sec = 0;
7830                 rpc_stat->stats[i].queue_time_sum_sqr.usec = 0;
7831             }
7832             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_MIN) {
7833                 rpc_stat->stats[i].queue_time_min.sec = 9999999;
7834                 rpc_stat->stats[i].queue_time_min.usec = 9999999;
7835             }
7836             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_MAX) {
7837                 rpc_stat->stats[i].queue_time_max.sec = 0;
7838                 rpc_stat->stats[i].queue_time_max.usec = 0;
7839             }
7840             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_SUM) {
7841                 rpc_stat->stats[i].execution_time_sum.sec = 0;
7842                 rpc_stat->stats[i].execution_time_sum.usec = 0;
7843             }
7844             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_SQUARE) {
7845                 rpc_stat->stats[i].execution_time_sum_sqr.sec = 0;
7846                 rpc_stat->stats[i].execution_time_sum_sqr.usec = 0;
7847             }
7848             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_MIN) {
7849                 rpc_stat->stats[i].execution_time_min.sec = 9999999;
7850                 rpc_stat->stats[i].execution_time_min.usec = 9999999;
7851             }
7852             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_MAX) {
7853                 rpc_stat->stats[i].execution_time_max.sec = 0;
7854                 rpc_stat->stats[i].execution_time_max.usec = 0;
7855             }
7856         }
7857     }
7858
7859     MUTEX_EXIT(&rx_rpc_stats);
7860 }
7861
7862 /*
7863  * rxi_rxstat_userok points to a routine that returns 1 if the caller
7864  * is authorized to enable/disable/clear RX statistics.
7865  */
7866 static int (*rxi_rxstat_userok) (struct rx_call * call) = NULL;
7867
7868 void
7869 rx_SetRxStatUserOk(int (*proc) (struct rx_call * call))
7870 {
7871     rxi_rxstat_userok = proc;
7872 }
7873
7874 int
7875 rx_RxStatUserOk(struct rx_call *call)
7876 {
7877     if (!rxi_rxstat_userok)
7878         return 0;
7879     return rxi_rxstat_userok(call);
7880 }
7881
7882 #ifdef AFS_NT40_ENV
7883 /*
7884  * DllMain() -- Entry-point function called by the DllMainCRTStartup()
7885  *     function in the MSVC runtime DLL (msvcrt.dll).
7886  *
7887  *     Note: the system serializes calls to this function.
7888  */
7889 BOOL WINAPI
7890 DllMain(HINSTANCE dllInstHandle,        /* instance handle for this DLL module */
7891         DWORD reason,                   /* reason function is being called */
7892         LPVOID reserved)                /* reserved for future use */
7893 {
7894     switch (reason) {
7895     case DLL_PROCESS_ATTACH:
7896         /* library is being attached to a process */
7897         INIT_PTHREAD_LOCKS;
7898         return TRUE;
7899
7900     case DLL_PROCESS_DETACH:
7901         return TRUE;
7902
7903     default:
7904         return FALSE;
7905     }
7906 }
7907 #endif
7908