src/rx/rx.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 /* RX:  Extended Remote Procedure Call */
  11
  12 #include <afsconfig.h>
  13 #ifdef  KERNEL
  14 #include "afs/param.h"
  15 #else
  16 #include <afs/param.h>
  17 #endif
  18
  19 RCSID
  20     ("$Header$");
  21
  22 #ifdef KERNEL
  23 #include "afs/sysincludes.h"
  24 #include "afsincludes.h"
  25 #ifndef UKERNEL
  26 #include "h/types.h"
  27 #include "h/time.h"
  28 #include "h/stat.h"
  29 #ifdef  AFS_OSF_ENV
  30 #include <net/net_globals.h>
  31 #endif /* AFS_OSF_ENV */
  32 #ifdef AFS_LINUX20_ENV
  33 #include "h/socket.h"
  34 #endif
  35 #include "netinet/in.h"
  36 #ifdef AFS_SUN57_ENV
  37 #include "inet/common.h"
  38 #include "inet/ip.h"
  39 #include "inet/ip_ire.h"
  40 #endif
  41 #include "afs/afs_args.h"
  42 #include "afs/afs_osi.h"
  43 #ifdef RX_KERNEL_TRACE
  44 #include "rx_kcommon.h"
  45 #endif
  46 #if     (defined(AFS_AUX_ENV) || defined(AFS_AIX_ENV))
  47 #include "h/systm.h"
  48 #endif
  49 #ifdef RXDEBUG
  50 #undef RXDEBUG                  /* turn off debugging */
  51 #endif /* RXDEBUG */
  52 #if defined(AFS_SGI_ENV)
  53 #include "sys/debug.h"
  54 #endif
  55 #include "afsint.h"
  56 #ifdef  AFS_OSF_ENV
  57 #undef kmem_alloc
  58 #undef kmem_free
  59 #undef mem_alloc
  60 #undef mem_free
  61 #undef register
  62 #endif /* AFS_OSF_ENV */
  63 #else /* !UKERNEL */
  64 #include "afs/sysincludes.h"
  65 #include "afsincludes.h"
  66 #endif /* !UKERNEL */
  67 #include "afs/lock.h"
  68 #include "rx_kmutex.h"
  69 #include "rx_kernel.h"
  70 #include "rx_clock.h"
  71 #include "rx_queue.h"
  72 #include "rx.h"
  73 #include "rx_globals.h"
  74 #include "rx_trace.h"
  75 #define AFSOP_STOP_RXCALLBACK   210     /* Stop CALLBACK process */
  76 #define AFSOP_STOP_AFS          211     /* Stop AFS process */
  77 #define AFSOP_STOP_BKG          212     /* Stop BKG process */
  78 #include "afsint.h"
  79 extern afs_int32 afs_termState;
  80 #ifdef AFS_AIX41_ENV
  81 #include "sys/lockl.h"
  82 #include "sys/lock_def.h"
  83 #endif /* AFS_AIX41_ENV */
  84 # include "rxgen_consts.h"
  85 #else /* KERNEL */
  86 # include <sys/types.h>
  87 # include <string.h>
  88 # include <errno.h>
  89 #ifdef AFS_NT40_ENV
  90 # include <stdlib.h>
  91 # include <fcntl.h>
  92 # include <afs/afsutil.h>
  93 # include <WINNT\afsreg.h>
  94 #else
  95 # include <sys/socket.h>
  96 # include <sys/file.h>
  97 # include <netdb.h>
  98 # include <sys/stat.h>
  99 # include <netinet/in.h>
 100 # include <sys/time.h>
 101 #endif
 102 # include "rx.h"
 103 # include "rx_user.h"
 104 # include "rx_clock.h"
 105 # include "rx_queue.h"
 106 # include "rx_globals.h"
 107 # include "rx_trace.h"
 108 # include <afs/rxgen_consts.h>
 109 #endif /* KERNEL */
 110
 111 int (*registerProgram) () = 0;
 112 int (*swapNameProgram) () = 0;
 113
 114 /* Local static routines */
 115 static void rxi_DestroyConnectionNoLock(register struct rx_connection *conn);
 116 #ifdef RX_ENABLE_LOCKS
 117 static void rxi_SetAcksInTransmitQueue(register struct rx_call *call);
 118 #endif
 119
 120 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
 121 struct rx_tq_debug {
 122     afs_int32 rxi_start_aborted;        /* rxi_start awoke after rxi_Send in error. */
 123     afs_int32 rxi_start_in_error;
 124 } rx_tq_debug;
 125 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
 126
 127 /*
 128  * rxi_rpc_peer_stat_cnt counts the total number of peer stat structures
 129  * currently allocated within rx.  This number is used to allocate the
 130  * memory required to return the statistics when queried.
 131  */
 132
 133 static unsigned int rxi_rpc_peer_stat_cnt;
 134
 135 /*
 136  * rxi_rpc_process_stat_cnt counts the total number of local process stat
 137  * structures currently allocated within rx.  The number is used to allocate
 138  * the memory required to return the statistics when queried.
 139  */
 140
 141 static unsigned int rxi_rpc_process_stat_cnt;
 142
 143 #if !defined(offsetof)
 144 #include <stddef.h>             /* for definition of offsetof() */
 145 #endif
 146
 147 #ifdef AFS_PTHREAD_ENV
 148 #include <assert.h>
 149
 150 /*
 151  * Use procedural initialization of mutexes/condition variables
 152  * to ease NT porting
 153  */
 154
 155 extern pthread_mutex_t rx_stats_mutex;
 156 extern pthread_mutex_t des_init_mutex;
 157 extern pthread_mutex_t des_random_mutex;
 158 extern pthread_mutex_t rx_clock_mutex;
 159 extern pthread_mutex_t rxi_connCacheMutex;
 160 extern pthread_mutex_t rx_event_mutex;
 161 extern pthread_mutex_t osi_malloc_mutex;
 162 extern pthread_mutex_t event_handler_mutex;
 163 extern pthread_mutex_t listener_mutex;
 164 extern pthread_mutex_t rx_if_init_mutex;
 165 extern pthread_mutex_t rx_if_mutex;
 166 extern pthread_mutex_t rxkad_client_uid_mutex;
 167 extern pthread_mutex_t rxkad_random_mutex;
 168
 169 extern pthread_cond_t rx_event_handler_cond;
 170 extern pthread_cond_t rx_listener_cond;
 171
 172 static pthread_mutex_t epoch_mutex;
 173 static pthread_mutex_t rx_init_mutex;
 174 static pthread_mutex_t rx_debug_mutex;
 175
 176 static void
 177 rxi_InitPthread(void)
 178 {
 179     assert(pthread_mutex_init(&rx_clock_mutex, (const pthread_mutexattr_t *)0)
 180            == 0);
 181     assert(pthread_mutex_init(&rx_stats_mutex, (const pthread_mutexattr_t *)0)
 182            == 0);
 183     assert(pthread_mutex_init
 184            (&rxi_connCacheMutex, (const pthread_mutexattr_t *)0) == 0);
 185     assert(pthread_mutex_init(&rx_init_mutex, (const pthread_mutexattr_t *)0)
 186            == 0);
 187     assert(pthread_mutex_init(&epoch_mutex, (const pthread_mutexattr_t *)0) ==
 188            0);
 189     assert(pthread_mutex_init(&rx_event_mutex, (const pthread_mutexattr_t *)0)
 190            == 0);
 191     assert(pthread_mutex_init(&des_init_mutex, (const pthread_mutexattr_t *)0)
 192            == 0);
 193     assert(pthread_mutex_init
 194            (&des_random_mutex, (const pthread_mutexattr_t *)0) == 0);
 195     assert(pthread_mutex_init
 196            (&osi_malloc_mutex, (const pthread_mutexattr_t *)0) == 0);
 197     assert(pthread_mutex_init
 198            (&event_handler_mutex, (const pthread_mutexattr_t *)0) == 0);
 199     assert(pthread_mutex_init(&listener_mutex, (const pthread_mutexattr_t *)0)
 200            == 0);
 201     assert(pthread_mutex_init
 202            (&rx_if_init_mutex, (const pthread_mutexattr_t *)0) == 0);
 203     assert(pthread_mutex_init(&rx_if_mutex, (const pthread_mutexattr_t *)0) ==
 204            0);
 205     assert(pthread_mutex_init
 206            (&rxkad_client_uid_mutex, (const pthread_mutexattr_t *)0) == 0);
 207     assert(pthread_mutex_init
 208            (&rxkad_random_mutex, (const pthread_mutexattr_t *)0) == 0);
 209     assert(pthread_mutex_init(&rx_debug_mutex, (const pthread_mutexattr_t *)0)
 210            == 0);
 211
 212     assert(pthread_cond_init
 213            (&rx_event_handler_cond, (const pthread_condattr_t *)0) == 0);
 214     assert(pthread_cond_init(&rx_listener_cond, (const pthread_condattr_t *)0)
 215            == 0);
 216     assert(pthread_key_create(&rx_thread_id_key, NULL) == 0);
 217     assert(pthread_key_create(&rx_ts_info_key, NULL) == 0);
 218
 219     rxkad_global_stats_init();
 220 }
 221
 222 pthread_once_t rx_once_init = PTHREAD_ONCE_INIT;
 223 #define INIT_PTHREAD_LOCKS \
 224 assert(pthread_once(&rx_once_init, rxi_InitPthread)==0)
 225 /*
 226  * The rx_stats_mutex mutex protects the following global variables:
 227  * rxi_dataQuota
 228  * rxi_minDeficit
 229  * rxi_availProcs
 230  * rxi_totalMin
 231  * rxi_lowConnRefCount
 232  * rxi_lowPeerRefCount
 233  * rxi_nCalls
 234  * rxi_Alloccnt
 235  * rxi_Allocsize
 236  * rx_nFreePackets
 237  * rx_tq_debug
 238  * rx_stats
 239  */
 240 #else
 241 #define INIT_PTHREAD_LOCKS
 242 #endif
 243
 244
 245 /* Variables for handling the minProcs implementation.  availProcs gives the
 246  * number of threads available in the pool at this moment (not counting dudes
 247  * executing right now).  totalMin gives the total number of procs required
 248  * for handling all minProcs requests.  minDeficit is a dynamic variable
 249  * tracking the # of procs required to satisfy all of the remaining minProcs
 250  * demands.
 251  * For fine grain locking to work, the quota check and the reservation of
 252  * a server thread has to come while rxi_availProcs and rxi_minDeficit
 253  * are locked. To this end, the code has been modified under #ifdef
 254  * RX_ENABLE_LOCKS so that quota checks and reservation occur at the
 255  * same time. A new function, ReturnToServerPool() returns the allocation.
 256  *
 257  * A call can be on several queue's (but only one at a time). When
 258  * rxi_ResetCall wants to remove the call from a queue, it has to ensure
 259  * that no one else is touching the queue. To this end, we store the address
 260  * of the queue lock in the call structure (under the call lock) when we
 261  * put the call on a queue, and we clear the call_queue_lock when the
 262  * call is removed from a queue (once the call lock has been obtained).
 263  * This allows rxi_ResetCall to safely synchronize with others wishing
 264  * to manipulate the queue.
 265  */
 266
 267 #ifdef RX_ENABLE_LOCKS
 268 static afs_kmutex_t rx_rpc_stats;
 269 void rxi_StartUnlocked();
 270 #endif
 271
 272 /* We keep a "last conn pointer" in rxi_FindConnection. The odds are
 273 ** pretty good that the next packet coming in is from the same connection
 274 ** as the last packet, since we're send multiple packets in a transmit window.
 275 */
 276 struct rx_connection *rxLastConn = 0;
 277
 278 #ifdef RX_ENABLE_LOCKS
 279 /* The locking hierarchy for rx fine grain locking is composed of these
 280  * tiers:
 281  *
 282  * rx_connHashTable_lock - synchronizes conn creation, rx_connHashTable access
 283  * conn_call_lock - used to synchonize rx_EndCall and rx_NewCall
 284  * call->lock - locks call data fields.
 285  * These are independent of each other:
 286  *      rx_freeCallQueue_lock
 287  *      rxi_keyCreate_lock
 288  * rx_serverPool_lock
 289  * freeSQEList_lock
 290  *
 291  * serverQueueEntry->lock
 292  * rx_rpc_stats
 293  * rx_peerHashTable_lock - locked under rx_connHashTable_lock
 294  * peer->lock - locks peer data fields.
 295  * conn_data_lock - that more than one thread is not updating a conn data
 296  *                  field at the same time.
 297  * rx_freePktQ_lock
 298  *
 299  * lowest level:
 300  *      multi_handle->lock
 301  *      rxevent_lock
 302  *      rx_stats_mutex
 303  *
 304  * Do we need a lock to protect the peer field in the conn structure?
 305  *      conn->peer was previously a constant for all intents and so has no
 306  *      lock protecting this field. The multihomed client delta introduced
 307  *      a RX code change : change the peer field in the connection structure
 308  *      to that remote inetrface from which the last packet for this
 309  *      connection was sent out. This may become an issue if further changes
 310  *      are made.
 311  */
 312 #define SET_CALL_QUEUE_LOCK(C, L) (C)->call_queue_lock = (L)
 313 #define CLEAR_CALL_QUEUE_LOCK(C) (C)->call_queue_lock = NULL
 314 #ifdef RX_LOCKS_DB
 315 /* rxdb_fileID is used to identify the lock location, along with line#. */
 316 static int rxdb_fileID = RXDB_FILE_RX;
 317 #endif /* RX_LOCKS_DB */
 318 #else /* RX_ENABLE_LOCKS */
 319 #define SET_CALL_QUEUE_LOCK(C, L)
 320 #define CLEAR_CALL_QUEUE_LOCK(C)
 321 #endif /* RX_ENABLE_LOCKS */
 322 struct rx_serverQueueEntry *rx_waitForPacket = 0;
 323 struct rx_serverQueueEntry *rx_waitingForPacket = 0;
 324
 325 /* ------------Exported Interfaces------------- */
 326
 327 /* This function allows rxkad to set the epoch to a suitably random number
 328  * which rx_NewConnection will use in the future.  The principle purpose is to
 329  * get rxnull connections to use the same epoch as the rxkad connections do, at
 330  * least once the first rxkad connection is established.  This is important now
 331  * that the host/port addresses aren't used in FindConnection: the uniqueness
 332  * of epoch/cid matters and the start time won't do. */
 333
 334 #ifdef AFS_PTHREAD_ENV
 335 /*
 336  * This mutex protects the following global variables:
 337  * rx_epoch
 338  */
 339
 340 #define LOCK_EPOCH assert(pthread_mutex_lock(&epoch_mutex)==0)
 341 #define UNLOCK_EPOCH assert(pthread_mutex_unlock(&epoch_mutex)==0)
 342 #else
 343 #define LOCK_EPOCH
 344 #define UNLOCK_EPOCH
 345 #endif /* AFS_PTHREAD_ENV */
 346
 347 void
 348 rx_SetEpoch(afs_uint32 epoch)
 349 {
 350     LOCK_EPOCH;
 351     rx_epoch = epoch;
 352     UNLOCK_EPOCH;
 353 }
 354
 355 /* Initialize rx.  A port number may be mentioned, in which case this
 356  * becomes the default port number for any service installed later.
 357  * If 0 is provided for the port number, a random port will be chosen
 358  * by the kernel.  Whether this will ever overlap anything in
 359  * /etc/services is anybody's guess...  Returns 0 on success, -1 on
 360  * error. */
 361 static int rxinit_status = 1;
 362 #ifdef AFS_PTHREAD_ENV
 363 /*
 364  * This mutex protects the following global variables:
 365  * rxinit_status
 366  */
 367
 368 #define LOCK_RX_INIT assert(pthread_mutex_lock(&rx_init_mutex)==0)
 369 #define UNLOCK_RX_INIT assert(pthread_mutex_unlock(&rx_init_mutex)==0)
 370 #else
 371 #define LOCK_RX_INIT
 372 #define UNLOCK_RX_INIT
 373 #endif
 374
 375 int
 376 rx_InitHost(u_int host, u_int port)
 377 {
 378 #ifdef KERNEL
 379     osi_timeval_t tv;
 380 #else /* KERNEL */
 381     struct timeval tv;
 382 #endif /* KERNEL */
 383     char *htable, *ptable;
 384     int tmp_status;
 385
 386 #if defined(AFS_DJGPP_ENV) && !defined(DEBUG)
 387     __djgpp_set_quiet_socket(1);
 388 #endif
 389
 390     SPLVAR;
 391
 392     INIT_PTHREAD_LOCKS;
 393     LOCK_RX_INIT;
 394     if (rxinit_status == 0) {
 395         tmp_status = rxinit_status;
 396         UNLOCK_RX_INIT;
 397         return tmp_status;      /* Already started; return previous error code. */
 398     }
 399 #ifdef RXDEBUG
 400     rxi_DebugInit();
 401 #endif
 402 #ifdef AFS_NT40_ENV
 403     if (afs_winsockInit() < 0)
 404         return -1;
 405 #endif
 406
 407 #ifndef KERNEL
 408     /*
 409      * Initialize anything necessary to provide a non-premptive threading
 410      * environment.
 411      */
 412     rxi_InitializeThreadSupport();
 413 #endif
 414
 415     /* Allocate and initialize a socket for client and perhaps server
 416      * connections. */
 417
 418     rx_socket = rxi_GetHostUDPSocket(host, (u_short) port);
 419     if (rx_socket == OSI_NULLSOCKET) {
 420         UNLOCK_RX_INIT;
 421         return RX_ADDRINUSE;
 422     }
 423 #ifdef  RX_ENABLE_LOCKS
 424 #ifdef RX_LOCKS_DB
 425     rxdb_init();
 426 #endif /* RX_LOCKS_DB */
 427     MUTEX_INIT(&rx_stats_mutex, "rx_stats_mutex", MUTEX_DEFAULT, 0);
 428     MUTEX_INIT(&rx_rpc_stats, "rx_rpc_stats", MUTEX_DEFAULT, 0);
 429     MUTEX_INIT(&rx_freePktQ_lock, "rx_freePktQ_lock", MUTEX_DEFAULT, 0);
 430     MUTEX_INIT(&freeSQEList_lock, "freeSQEList lock", MUTEX_DEFAULT, 0);
 431     MUTEX_INIT(&rx_freeCallQueue_lock, "rx_freeCallQueue_lock", MUTEX_DEFAULT,
 432                0);
 433     CV_INIT(&rx_waitingForPackets_cv, "rx_waitingForPackets_cv", CV_DEFAULT,
 434             0);
 435     MUTEX_INIT(&rx_peerHashTable_lock, "rx_peerHashTable_lock", MUTEX_DEFAULT,
 436                0);
 437     MUTEX_INIT(&rx_connHashTable_lock, "rx_connHashTable_lock", MUTEX_DEFAULT,
 438                0);
 439     MUTEX_INIT(&rx_serverPool_lock, "rx_serverPool_lock", MUTEX_DEFAULT, 0);
 440 #ifndef KERNEL
 441     MUTEX_INIT(&rxi_keyCreate_lock, "rxi_keyCreate_lock", MUTEX_DEFAULT, 0);
 442 #endif /* !KERNEL */
 443 #if defined(KERNEL) && defined(AFS_HPUX110_ENV)
 444     if (!uniprocessor)
 445         rx_sleepLock = alloc_spinlock(LAST_HELD_ORDER - 10, "rx_sleepLock");
 446 #endif /* KERNEL && AFS_HPUX110_ENV */
 447 #endif /* RX_ENABLE_LOCKS */
 448
 449     rxi_nCalls = 0;
 450     rx_connDeadTime = 12;
 451     rx_tranquil = 0;            /* reset flag */
 452     memset((char *)&rx_stats, 0, sizeof(struct rx_stats));
 453     htable = (char *)
 454         osi_Alloc(rx_hashTableSize * sizeof(struct rx_connection *));
 455     PIN(htable, rx_hashTableSize * sizeof(struct rx_connection *));     /* XXXXX */
 456     memset(htable, 0, rx_hashTableSize * sizeof(struct rx_connection *));
 457     ptable = (char *)osi_Alloc(rx_hashTableSize * sizeof(struct rx_peer *));
 458     PIN(ptable, rx_hashTableSize * sizeof(struct rx_peer *));   /* XXXXX */
 459     memset(ptable, 0, rx_hashTableSize * sizeof(struct rx_peer *));
 460
 461     /* Malloc up a bunch of packets & buffers */
 462     rx_nFreePackets = 0;
 463     queue_Init(&rx_freePacketQueue);
 464     rxi_NeedMorePackets = FALSE;
 465 #ifdef RX_ENABLE_TSFPQ
 466     rx_nPackets = 0;    /* in TSFPQ version, rx_nPackets is managed by rxi_MorePackets* */
 467     rxi_MorePacketsTSFPQ(rx_extraPackets + RX_MAX_QUOTA + 2, RX_TS_FPQ_FLUSH_GLOBAL, 0);
 468 #else /* RX_ENABLE_TSFPQ */
 469     rx_nPackets = rx_extraPackets + RX_MAX_QUOTA + 2;   /* fudge */
 470     rxi_MorePackets(rx_nPackets);
 471 #endif /* RX_ENABLE_TSFPQ */
 472     rx_CheckPackets();
 473
 474     NETPRI;
 475
 476     clock_Init();
 477
 478 #if defined(AFS_NT40_ENV) && !defined(AFS_PTHREAD_ENV)
 479     tv.tv_sec = clock_now.sec;
 480     tv.tv_usec = clock_now.usec;
 481     srand((unsigned int)tv.tv_usec);
 482 #else
 483     osi_GetTime(&tv);
 484 #endif
 485     if (port) {
 486         rx_port = port;
 487     } else {
 488 #if defined(KERNEL) && !defined(UKERNEL)
 489         /* Really, this should never happen in a real kernel */
 490         rx_port = 0;
 491 #else
 492         struct sockaddr_in addr;
 493         int addrlen = sizeof(addr);
 494         if (getsockname((int)rx_socket, (struct sockaddr *)&addr, &addrlen)) {
 495             rx_Finalize();
 496             return -1;
 497         }
 498         rx_port = addr.sin_port;
 499 #endif
 500     }
 501     rx_stats.minRtt.sec = 9999999;
 502 #ifdef  KERNEL
 503     rx_SetEpoch(tv.tv_sec | 0x80000000);
 504 #else
 505     rx_SetEpoch(tv.tv_sec);     /* Start time of this package, rxkad
 506                                  * will provide a randomer value. */
 507 #endif
 508     MUTEX_ENTER(&rx_stats_mutex);
 509     rxi_dataQuota += rx_extraQuota;     /* + extra pkts caller asked to rsrv */
 510     MUTEX_EXIT(&rx_stats_mutex);
 511     /* *Slightly* random start time for the cid.  This is just to help
 512      * out with the hashing function at the peer */
 513     rx_nextCid = ((tv.tv_sec ^ tv.tv_usec) << RX_CIDSHIFT);
 514     rx_connHashTable = (struct rx_connection **)htable;
 515     rx_peerHashTable = (struct rx_peer **)ptable;
 516
 517     rx_lastAckDelay.sec = 0;
 518     rx_lastAckDelay.usec = 400000;      /* 400 milliseconds */
 519     rx_hardAckDelay.sec = 0;
 520     rx_hardAckDelay.usec = 100000;      /* 100 milliseconds */
 521     rx_softAckDelay.sec = 0;
 522     rx_softAckDelay.usec = 100000;      /* 100 milliseconds */
 523
 524     rxevent_Init(20, rxi_ReScheduleEvents);
 525
 526     /* Initialize various global queues */
 527     queue_Init(&rx_idleServerQueue);
 528     queue_Init(&rx_incomingCallQueue);
 529     queue_Init(&rx_freeCallQueue);
 530
 531 #if defined(AFS_NT40_ENV) && !defined(KERNEL)
 532     /* Initialize our list of usable IP addresses. */
 533     rx_GetIFInfo();
 534 #endif
 535
 536     /* Start listener process (exact function is dependent on the
 537      * implementation environment--kernel or user space) */
 538     rxi_StartListener();
 539
 540     USERPRI;
 541     tmp_status = rxinit_status = 0;
 542     UNLOCK_RX_INIT;
 543     return tmp_status;
 544 }
 545
 546 int
 547 rx_Init(u_int port)
 548 {
 549     return rx_InitHost(htonl(INADDR_ANY), port);
 550 }
 551
 552 /* called with unincremented nRequestsRunning to see if it is OK to start
 553  * a new thread in this service.  Could be "no" for two reasons: over the
 554  * max quota, or would prevent others from reaching their min quota.
 555  */
 556 #ifdef RX_ENABLE_LOCKS
 557 /* This verion of QuotaOK reserves quota if it's ok while the
 558  * rx_serverPool_lock is held.  Return quota using ReturnToServerPool().
 559  */
 560 static int
 561 QuotaOK(register struct rx_service *aservice)
 562 {
 563     /* check if over max quota */
 564     if (aservice->nRequestsRunning >= aservice->maxProcs) {
 565         return 0;
 566     }
 567
 568     /* under min quota, we're OK */
 569     /* otherwise, can use only if there are enough to allow everyone
 570      * to go to their min quota after this guy starts.
 571      */
 572     MUTEX_ENTER(&rx_stats_mutex);
 573     if ((aservice->nRequestsRunning < aservice->minProcs)
 574         || (rxi_availProcs > rxi_minDeficit)) {
 575         aservice->nRequestsRunning++;
 576         /* just started call in minProcs pool, need fewer to maintain
 577          * guarantee */
 578         if (aservice->nRequestsRunning <= aservice->minProcs)
 579             rxi_minDeficit--;
 580         rxi_availProcs--;
 581         MUTEX_EXIT(&rx_stats_mutex);
 582         return 1;
 583     }
 584     MUTEX_EXIT(&rx_stats_mutex);
 585
 586     return 0;
 587 }
 588
 589 static void
 590 ReturnToServerPool(register struct rx_service *aservice)
 591 {
 592     aservice->nRequestsRunning--;
 593     MUTEX_ENTER(&rx_stats_mutex);
 594     if (aservice->nRequestsRunning < aservice->minProcs)
 595         rxi_minDeficit++;
 596     rxi_availProcs++;
 597     MUTEX_EXIT(&rx_stats_mutex);
 598 }
 599
 600 #else /* RX_ENABLE_LOCKS */
 601 static int
 602 QuotaOK(register struct rx_service *aservice)
 603 {
 604     int rc = 0;
 605     /* under min quota, we're OK */
 606     if (aservice->nRequestsRunning < aservice->minProcs)
 607         return 1;
 608
 609     /* check if over max quota */
 610     if (aservice->nRequestsRunning >= aservice->maxProcs)
 611         return 0;
 612
 613     /* otherwise, can use only if there are enough to allow everyone
 614      * to go to their min quota after this guy starts.
 615      */
 616     if (rxi_availProcs > rxi_minDeficit)
 617         rc = 1;
 618     return rc;
 619 }
 620 #endif /* RX_ENABLE_LOCKS */
 621
 622 #ifndef KERNEL
 623 /* Called by rx_StartServer to start up lwp's to service calls.
 624    NExistingProcs gives the number of procs already existing, and which
 625    therefore needn't be created. */
 626 void
 627 rxi_StartServerProcs(int nExistingProcs)
 628 {
 629     register struct rx_service *service;
 630     register int i;
 631     int maxdiff = 0;
 632     int nProcs = 0;
 633
 634     /* For each service, reserve N processes, where N is the "minimum"
 635      * number of processes that MUST be able to execute a request in parallel,
 636      * at any time, for that process.  Also compute the maximum difference
 637      * between any service's maximum number of processes that can run
 638      * (i.e. the maximum number that ever will be run, and a guarantee
 639      * that this number will run if other services aren't running), and its
 640      * minimum number.  The result is the extra number of processes that
 641      * we need in order to provide the latter guarantee */
 642     for (i = 0; i < RX_MAX_SERVICES; i++) {
 643         int diff;
 644         service = rx_services[i];
 645         if (service == (struct rx_service *)0)
 646             break;
 647         nProcs += service->minProcs;
 648         diff = service->maxProcs - service->minProcs;
 649         if (diff > maxdiff)
 650             maxdiff = diff;
 651     }
 652     nProcs += maxdiff;          /* Extra processes needed to allow max number requested to run in any given service, under good conditions */
 653     nProcs -= nExistingProcs;   /* Subtract the number of procs that were previously created for use as server procs */
 654     for (i = 0; i < nProcs; i++) {
 655         rxi_StartServerProc(rx_ServerProc, rx_stackSize);
 656     }
 657 }
 658 #endif /* KERNEL */
 659
 660 #ifdef AFS_NT40_ENV
 661 /* This routine is only required on Windows */
 662 void
 663 rx_StartClientThread(void)
 664 {
 665 #ifdef AFS_PTHREAD_ENV
 666     pthread_t pid;
 667     pid = pthread_self();
 668 #endif /* AFS_PTHREAD_ENV */
 669 }
 670 #endif /* AFS_NT40_ENV */
 671
 672 /* This routine must be called if any services are exported.  If the
 673  * donateMe flag is set, the calling process is donated to the server
 674  * process pool */
 675 void
 676 rx_StartServer(int donateMe)
 677 {
 678     register struct rx_service *service;
 679     register int i;
 680     SPLVAR;
 681     clock_NewTime();
 682
 683     NETPRI;
 684     /* Start server processes, if necessary (exact function is dependent
 685      * on the implementation environment--kernel or user space).  DonateMe
 686      * will be 1 if there is 1 pre-existing proc, i.e. this one.  In this
 687      * case, one less new proc will be created rx_StartServerProcs.
 688      */
 689     rxi_StartServerProcs(donateMe);
 690
 691     /* count up the # of threads in minProcs, and add set the min deficit to
 692      * be that value, too.
 693      */
 694     for (i = 0; i < RX_MAX_SERVICES; i++) {
 695         service = rx_services[i];
 696         if (service == (struct rx_service *)0)
 697             break;
 698         MUTEX_ENTER(&rx_stats_mutex);
 699         rxi_totalMin += service->minProcs;
 700         /* below works even if a thread is running, since minDeficit would
 701          * still have been decremented and later re-incremented.
 702          */
 703         rxi_minDeficit += service->minProcs;
 704         MUTEX_EXIT(&rx_stats_mutex);
 705     }
 706
 707     /* Turn on reaping of idle server connections */
 708     rxi_ReapConnections();
 709
 710     USERPRI;
 711
 712     if (donateMe) {
 713 #ifndef AFS_NT40_ENV
 714 #ifndef KERNEL
 715         char name[32];
 716         static int nProcs;
 717 #ifdef AFS_PTHREAD_ENV
 718         pid_t pid;
 719         pid = (pid_t) pthread_self();
 720 #else /* AFS_PTHREAD_ENV */
 721         PROCESS pid;
 722         LWP_CurrentProcess(&pid);
 723 #endif /* AFS_PTHREAD_ENV */
 724
 725         sprintf(name, "srv_%d", ++nProcs);
 726         if (registerProgram)
 727             (*registerProgram) (pid, name);
 728 #endif /* KERNEL */
 729 #endif /* AFS_NT40_ENV */
 730         rx_ServerProc(NULL);    /* Never returns */
 731     }
 732 #ifdef RX_ENABLE_TSFPQ
 733     /* no use leaving packets around in this thread's local queue if
 734      * it isn't getting donated to the server thread pool.
 735      */
 736     rxi_FlushLocalPacketsTSFPQ();
 737 #endif /* RX_ENABLE_TSFPQ */
 738     return;
 739 }
 740
 741 /* Create a new client connection to the specified service, using the
 742  * specified security object to implement the security model for this
 743  * connection. */
 744 struct rx_connection *
 745 rx_NewConnection(register afs_uint32 shost, u_short sport, u_short sservice,
 746                  register struct rx_securityClass *securityObject,
 747                  int serviceSecurityIndex)
 748 {
 749     int hashindex, i;
 750     afs_int32 cid, cix, nclones;
 751     register struct rx_connection *conn, *tconn, *ptconn;
 752
 753     SPLVAR;
 754
 755     clock_NewTime();
 756     dpf(("rx_NewConnection(host %x, port %u, service %u, securityObject %x, serviceSecurityIndex %d)\n", ntohl(shost), ntohs(sport), sservice, securityObject, serviceSecurityIndex));
 757
 758         conn = tconn = 0;
 759         nclones = rx_max_clones_per_connection;
 760
 761     /* Vasilsi said: "NETPRI protects Cid and Alloc", but can this be true in
 762      * the case of kmem_alloc? */
 763
 764     NETPRI;
 765     MUTEX_ENTER(&rx_connHashTable_lock);
 766
 767     /* send in the clones */
 768     for(cix = 0; cix <= nclones; ++cix) {
 769
 770           ptconn = tconn;
 771           tconn = rxi_AllocConnection();
 772           tconn->type = RX_CLIENT_CONNECTION;
 773           tconn->epoch = rx_epoch;
 774           tconn->peer = rxi_FindPeer(shost, sport, 0, 1);
 775           tconn->serviceId = sservice;
 776           tconn->securityObject = securityObject;
 777           tconn->securityData = (void *) 0;
 778           tconn->securityIndex = serviceSecurityIndex;
 779           tconn->ackRate = RX_FAST_ACK_RATE;
 780           tconn->nSpecific = 0;
 781           tconn->specific = NULL;
 782           tconn->challengeEvent = NULL;
 783           tconn->delayedAbortEvent = NULL;
 784           tconn->abortCount = 0;
 785           tconn->error = 0;
 786     for (i = 0; i < RX_MAXCALLS; i++) {
 787         tconn->twind[i] = rx_initSendWindow;
 788         tconn->rwind[i] = rx_initReceiveWindow;
 789     }
 790           tconn->parent = 0;
 791           tconn->next_clone = 0;
 792           tconn->nclones = nclones;
 793           rx_SetConnDeadTime(tconn, rx_connDeadTime);
 794
 795           if(cix == 0) {
 796                 conn = tconn;
 797           } else {
 798                 tconn->flags |= RX_CLONED_CONNECTION;
 799                 tconn->parent = conn;
 800                 ptconn->next_clone = tconn;
 801           }
 802
 803           /* generic connection setup */
 804 #ifdef  RX_ENABLE_LOCKS
 805           MUTEX_INIT(&tconn->conn_call_lock, "conn call lock", MUTEX_DEFAULT, 0);
 806           MUTEX_INIT(&tconn->conn_data_lock, "conn data lock", MUTEX_DEFAULT, 0);
 807           CV_INIT(&tconn->conn_call_cv, "conn call cv", CV_DEFAULT, 0);
 808 #endif
 809           cid = (rx_nextCid += RX_MAXCALLS);
 810           tconn->cid = cid;
 811           RXS_NewConnection(securityObject, tconn);
 812           hashindex =
 813                 CONN_HASH(shost, sport, tconn->cid, tconn->epoch,
 814                                   RX_CLIENT_CONNECTION);
 815           tconn->refCount++; /* no lock required since only this thread knows */
 816           tconn->next = rx_connHashTable[hashindex];
 817           rx_connHashTable[hashindex] = tconn;
 818           rx_MutexIncrement(rx_stats.nClientConns, rx_stats_mutex);
 819     }
 820
 821     MUTEX_EXIT(&rx_connHashTable_lock);
 822     USERPRI;
 823     return conn;
 824 }
 825
 826 void
 827 rx_SetConnDeadTime(register struct rx_connection *conn, register int seconds)
 828 {
 829   /* The idea is to set the dead time to a value that allows several
 830    * keepalives to be dropped without timing out the connection. */
 831   struct rx_connection *tconn;
 832   tconn = conn;
 833   do {
 834         tconn->secondsUntilDead = MAX(seconds, 6);
 835         tconn->secondsUntilPing = tconn->secondsUntilDead / 6;
 836   } while(tconn->next_clone && (tconn = tconn->next_clone));
 837 }
 838
 839 int rxi_lowPeerRefCount = 0;
 840 int rxi_lowConnRefCount = 0;
 841
 842 /*
 843  * Cleanup a connection that was destroyed in rxi_DestroyConnectioNoLock.
 844  * NOTE: must not be called with rx_connHashTable_lock held.
 845  */
 846 void
 847 rxi_CleanupConnection(struct rx_connection *conn)
 848 {
 849     /* Notify the service exporter, if requested, that this connection
 850      * is being destroyed */
 851     if (conn->type == RX_SERVER_CONNECTION && conn->service->destroyConnProc)
 852         (*conn->service->destroyConnProc) (conn);
 853
 854     /* Notify the security module that this connection is being destroyed */
 855     RXS_DestroyConnection(conn->securityObject, conn);
 856
 857     /* If this is the last connection using the rx_peer struct, set its
 858      * idle time to now. rxi_ReapConnections will reap it if it's still
 859      * idle (refCount == 0) after rx_idlePeerTime (60 seconds) have passed.
 860      */
 861     MUTEX_ENTER(&rx_peerHashTable_lock);
 862     if (conn->peer->refCount < 2) {
 863         conn->peer->idleWhen = clock_Sec();
 864         if (conn->peer->refCount < 1) {
 865             conn->peer->refCount = 1;
 866             MUTEX_ENTER(&rx_stats_mutex);
 867             rxi_lowPeerRefCount++;
 868             MUTEX_EXIT(&rx_stats_mutex);
 869         }
 870     }
 871     conn->peer->refCount--;
 872     MUTEX_EXIT(&rx_peerHashTable_lock);
 873
 874     if (conn->type == RX_SERVER_CONNECTION)
 875         rx_MutexDecrement(rx_stats.nServerConns, rx_stats_mutex);
 876     else
 877         rx_MutexDecrement(rx_stats.nClientConns, rx_stats_mutex);
 878 #ifndef KERNEL
 879     if (conn->specific) {
 880         int i;
 881         for (i = 0; i < conn->nSpecific; i++) {
 882             if (conn->specific[i] && rxi_keyCreate_destructor[i])
 883                 (*rxi_keyCreate_destructor[i]) (conn->specific[i]);
 884             conn->specific[i] = NULL;
 885         }
 886         free(conn->specific);
 887     }
 888     conn->specific = NULL;
 889     conn->nSpecific = 0;
 890 #endif /* !KERNEL */
 891
 892     MUTEX_DESTROY(&conn->conn_call_lock);
 893     MUTEX_DESTROY(&conn->conn_data_lock);
 894     CV_DESTROY(&conn->conn_call_cv);
 895
 896     rxi_FreeConnection(conn);
 897 }
 898
 899 /* Destroy the specified connection */
 900 void
 901 rxi_DestroyConnection(register struct rx_connection *conn)
 902 {
 903   register struct rx_connection *tconn, *dtconn;
 904
 905   MUTEX_ENTER(&rx_connHashTable_lock);
 906
 907   if(!(conn->flags & RX_CLONED_CONNECTION)) {
 908         tconn = conn->next_clone;
 909         conn->next_clone = 0; /* once */
 910         do {
 911           if(tconn) {
 912                 dtconn = tconn;
 913                 tconn = tconn->next_clone;
 914                 rxi_DestroyConnectionNoLock(dtconn);
 915                 /* destroyed? */
 916                 if (dtconn == rx_connCleanup_list) {
 917                   rx_connCleanup_list = rx_connCleanup_list->next;
 918                   MUTEX_EXIT(&rx_connHashTable_lock);
 919                   /* rxi_CleanupConnection will free tconn */
 920                   rxi_CleanupConnection(dtconn);
 921                   MUTEX_ENTER(&rx_connHashTable_lock);
 922                   (conn->nclones)--;
 923                 }
 924           }
 925         } while(tconn);
 926   }
 927
 928   rxi_DestroyConnectionNoLock(conn);
 929   /* conn should be at the head of the cleanup list */
 930   if (conn == rx_connCleanup_list) {
 931         rx_connCleanup_list = rx_connCleanup_list->next;
 932         MUTEX_EXIT(&rx_connHashTable_lock);
 933         rxi_CleanupConnection(conn);
 934   }
 935 #ifdef RX_ENABLE_LOCKS
 936   else {
 937         MUTEX_EXIT(&rx_connHashTable_lock);
 938   }
 939 #endif /* RX_ENABLE_LOCKS */
 940 }
 941
 942 static void
 943 rxi_DestroyConnectionNoLock(register struct rx_connection *conn)
 944 {
 945     register struct rx_connection **conn_ptr;
 946     register int havecalls = 0;
 947     struct rx_packet *packet;
 948     int i;
 949     SPLVAR;
 950
 951     clock_NewTime();
 952
 953     NETPRI;
 954     MUTEX_ENTER(&conn->conn_data_lock);
 955     if (conn->refCount > 0)
 956         conn->refCount--;
 957     else {
 958         MUTEX_ENTER(&rx_stats_mutex);
 959         rxi_lowConnRefCount++;
 960         MUTEX_EXIT(&rx_stats_mutex);
 961     }
 962
 963     if ((conn->refCount > 0) || (conn->flags & RX_CONN_BUSY)) {
 964         /* Busy; wait till the last guy before proceeding */
 965         MUTEX_EXIT(&conn->conn_data_lock);
 966         USERPRI;
 967         return;
 968     }
 969
 970     /* If the client previously called rx_NewCall, but it is still
 971      * waiting, treat this as a running call, and wait to destroy the
 972      * connection later when the call completes. */
 973     if ((conn->type == RX_CLIENT_CONNECTION)
 974         && (conn->flags & RX_CONN_MAKECALL_WAITING)) {
 975         conn->flags |= RX_CONN_DESTROY_ME;
 976         MUTEX_EXIT(&conn->conn_data_lock);
 977         USERPRI;
 978         return;
 979     }
 980     MUTEX_EXIT(&conn->conn_data_lock);
 981
 982     /* Check for extant references to this connection */
 983     for (i = 0; i < RX_MAXCALLS; i++) {
 984         register struct rx_call *call = conn->call[i];
 985         if (call) {
 986             havecalls = 1;
 987             if (conn->type == RX_CLIENT_CONNECTION) {
 988                 MUTEX_ENTER(&call->lock);
 989                 if (call->delayedAckEvent) {
 990                     /* Push the final acknowledgment out now--there
 991                      * won't be a subsequent call to acknowledge the
 992                      * last reply packets */
 993                     rxevent_Cancel(call->delayedAckEvent, call,
 994                                    RX_CALL_REFCOUNT_DELAY);
 995                     if (call->state == RX_STATE_PRECALL
 996                         || call->state == RX_STATE_ACTIVE) {
 997                         rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
 998                     } else {
 999                         rxi_AckAll(NULL, call, 0);
1000                     }
1001                 }
1002                 MUTEX_EXIT(&call->lock);
1003             }
1004         }
1005     }
1006 #ifdef RX_ENABLE_LOCKS
1007     if (!havecalls) {
1008         if (MUTEX_TRYENTER(&conn->conn_data_lock)) {
1009             MUTEX_EXIT(&conn->conn_data_lock);
1010         } else {
1011             /* Someone is accessing a packet right now. */
1012             havecalls = 1;
1013         }
1014     }
1015 #endif /* RX_ENABLE_LOCKS */
1016
1017     if (havecalls) {
1018         /* Don't destroy the connection if there are any call
1019          * structures still in use */
1020         MUTEX_ENTER(&conn->conn_data_lock);
1021         conn->flags |= RX_CONN_DESTROY_ME;
1022         MUTEX_EXIT(&conn->conn_data_lock);
1023         USERPRI;
1024         return;
1025     }
1026
1027     if (conn->delayedAbortEvent) {
1028         rxevent_Cancel(conn->delayedAbortEvent, (struct rx_call *)0, 0);
1029         packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1030         if (packet) {
1031             MUTEX_ENTER(&conn->conn_data_lock);
1032             rxi_SendConnectionAbort(conn, packet, 0, 1);
1033             MUTEX_EXIT(&conn->conn_data_lock);
1034             rxi_FreePacket(packet);
1035         }
1036     }
1037
1038     /* Remove from connection hash table before proceeding */
1039     conn_ptr =
1040         &rx_connHashTable[CONN_HASH
1041                           (peer->host, peer->port, conn->cid, conn->epoch,
1042                            conn->type)];
1043     for (; *conn_ptr; conn_ptr = &(*conn_ptr)->next) {
1044         if (*conn_ptr == conn) {
1045             *conn_ptr = conn->next;
1046             break;
1047         }
1048     }
1049     /* if the conn that we are destroying was the last connection, then we
1050      * clear rxLastConn as well */
1051     if (rxLastConn == conn)
1052         rxLastConn = 0;
1053
1054     /* Make sure the connection is completely reset before deleting it. */
1055     /* get rid of pending events that could zap us later */
1056     if (conn->challengeEvent)
1057         rxevent_Cancel(conn->challengeEvent, (struct rx_call *)0, 0);
1058     if (conn->checkReachEvent)
1059         rxevent_Cancel(conn->checkReachEvent, (struct rx_call *)0, 0);
1060
1061     /* Add the connection to the list of destroyed connections that
1062      * need to be cleaned up. This is necessary to avoid deadlocks
1063      * in the routines we call to inform others that this connection is
1064      * being destroyed. */
1065     conn->next = rx_connCleanup_list;
1066     rx_connCleanup_list = conn;
1067 }
1068
1069 /* Externally available version */
1070 void
1071 rx_DestroyConnection(register struct rx_connection *conn)
1072 {
1073     SPLVAR;
1074
1075     NETPRI;
1076     rxi_DestroyConnection(conn);
1077     USERPRI;
1078 }
1079
1080 void
1081 rx_GetConnection(register struct rx_connection *conn)
1082 {
1083     SPLVAR;
1084
1085     NETPRI;
1086     MUTEX_ENTER(&conn->conn_data_lock);
1087     conn->refCount++;
1088     MUTEX_EXIT(&conn->conn_data_lock);
1089     USERPRI;
1090 }
1091
1092 /* Wait for the transmit queue to no longer be busy.
1093  * requires the call->lock to be held */
1094 static void rxi_WaitforTQBusy(struct rx_call *call) {
1095     while (call->flags & RX_CALL_TQ_BUSY) {
1096         call->flags |= RX_CALL_TQ_WAIT;
1097         call->tqWaiters++;
1098 #ifdef RX_ENABLE_LOCKS
1099         osirx_AssertMine(&call->lock, "rxi_WaitforTQ lock");
1100         CV_WAIT(&call->cv_tq, &call->lock);
1101 #else /* RX_ENABLE_LOCKS */
1102         osi_rxSleep(&call->tq);
1103 #endif /* RX_ENABLE_LOCKS */
1104         call->tqWaiters--;
1105         if (call->tqWaiters == 0) {
1106             call->flags &= ~RX_CALL_TQ_WAIT;
1107         }
1108     }
1109 }
1110 /* Start a new rx remote procedure call, on the specified connection.
1111  * If wait is set to 1, wait for a free call channel; otherwise return
1112  * 0.  Maxtime gives the maximum number of seconds this call may take,
1113  * after rx_NewCall returns.  After this time interval, a call to any
1114  * of rx_SendData, rx_ReadData, etc. will fail with RX_CALL_TIMEOUT.
1115  * For fine grain locking, we hold the conn_call_lock in order to
1116  * to ensure that we don't get signalle after we found a call in an active
1117  * state and before we go to sleep.
1118  */
1119 struct rx_call *
1120 rx_NewCall(register struct rx_connection *conn)
1121 {
1122     register int i;
1123     register struct rx_call *call;
1124         register struct rx_connection *tconn;
1125     struct clock queueTime;
1126     SPLVAR;
1127
1128     clock_NewTime();
1129     dpf(("rx_NewCall(conn %x)\n", conn));
1130
1131     NETPRI;
1132     clock_GetTime(&queueTime);
1133     MUTEX_ENTER(&conn->conn_call_lock);
1134
1135     /*
1136      * Check if there are others waiting for a new call.
1137      * If so, let them go first to avoid starving them.
1138      * This is a fairly simple scheme, and might not be
1139      * a complete solution for large numbers of waiters.
1140      *
1141      * makeCallWaiters keeps track of the number of
1142      * threads waiting to make calls and the
1143      * RX_CONN_MAKECALL_WAITING flag bit is used to
1144      * indicate that there are indeed calls waiting.
1145      * The flag is set when the waiter is incremented.
1146      * It is only cleared in rx_EndCall when
1147      * makeCallWaiters is 0.  This prevents us from
1148      * accidently destroying the connection while it
1149      * is potentially about to be used.
1150      */
1151     MUTEX_ENTER(&conn->conn_data_lock);
1152     if (conn->makeCallWaiters) {
1153         conn->flags |= RX_CONN_MAKECALL_WAITING;
1154         conn->makeCallWaiters++;
1155         MUTEX_EXIT(&conn->conn_data_lock);
1156
1157 #ifdef  RX_ENABLE_LOCKS
1158         CV_WAIT(&conn->conn_call_cv, &conn->conn_call_lock);
1159 #else
1160         osi_rxSleep(conn);
1161 #endif
1162         MUTEX_ENTER(&conn->conn_data_lock);
1163         conn->makeCallWaiters--;
1164     }
1165     MUTEX_EXIT(&conn->conn_data_lock);
1166
1167         /* search for next free call on this connection or
1168          * its clones, if any */
1169     for (;;) {
1170                 tconn = conn;
1171                 do {
1172                         for (i = 0; i < RX_MAXCALLS; i++) {
1173                                 call = tconn->call[i];
1174                                 if (call) {
1175                                         MUTEX_ENTER(&call->lock);
1176                                         if (call->state == RX_STATE_DALLY) {
1177                                                 rxi_ResetCall(call, 0);
1178                                                 (*call->callNumber)++;
1179                                                 goto f_call;
1180                                         }
1181                                         MUTEX_EXIT(&call->lock);
1182                                 } else {
1183                                         call = rxi_NewCall(tconn, i);
1184                                         goto f_call;
1185                                 }
1186                         } /* for i < RX_MAXCALLS */
1187                 } while (tconn->next_clone && (tconn = tconn->next_clone));
1188
1189         f_call:
1190
1191                 if (i < RX_MAXCALLS) {
1192                         break;
1193                 }
1194
1195                 /* to be here, all available calls for this connection (and all
1196                  * its clones) must be in use */
1197
1198                 MUTEX_ENTER(&conn->conn_data_lock);
1199                 conn->flags |= RX_CONN_MAKECALL_WAITING;
1200                 conn->makeCallWaiters++;
1201                 MUTEX_EXIT(&conn->conn_data_lock);
1202
1203 #ifdef  RX_ENABLE_LOCKS
1204                 CV_WAIT(&conn->conn_call_cv, &conn->conn_call_lock);
1205 #else
1206                 osi_rxSleep(conn);
1207 #endif
1208                 MUTEX_ENTER(&conn->conn_data_lock);
1209                 conn->makeCallWaiters--;
1210                 MUTEX_EXIT(&conn->conn_data_lock);
1211     } /* for ;; */
1212     /*
1213      * Wake up anyone else who might be giving us a chance to
1214      * run (see code above that avoids resource starvation).
1215      */
1216 #ifdef  RX_ENABLE_LOCKS
1217     CV_BROADCAST(&conn->conn_call_cv);
1218 #else
1219     osi_rxWakeup(conn);
1220 #endif
1221
1222     CALL_HOLD(call, RX_CALL_REFCOUNT_BEGIN);
1223
1224     /* Client is initially in send mode */
1225     call->state = RX_STATE_ACTIVE;
1226     call->error = conn->error;
1227     if (call->error)
1228         call->mode = RX_MODE_ERROR;
1229     else
1230         call->mode = RX_MODE_SENDING;
1231
1232     /* remember start time for call in case we have hard dead time limit */
1233     call->queueTime = queueTime;
1234     clock_GetTime(&call->startTime);
1235     hzero(call->bytesSent);
1236     hzero(call->bytesRcvd);
1237
1238     /* Turn on busy protocol. */
1239     rxi_KeepAliveOn(call);
1240
1241     MUTEX_EXIT(&call->lock);
1242     MUTEX_EXIT(&conn->conn_call_lock);
1243     USERPRI;
1244
1245 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
1246     /* Now, if TQ wasn't cleared earlier, do it now. */
1247     MUTEX_ENTER(&call->lock);
1248     rxi_WaitforTQBusy(call);
1249     if (call->flags & RX_CALL_TQ_CLEARME) {
1250         rxi_ClearTransmitQueue(call, 0);
1251         queue_Init(&call->tq);
1252     }
1253     MUTEX_EXIT(&call->lock);
1254 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
1255
1256     dpf(("rx_NewCall(call %x)\n", call));
1257     return call;
1258 }
1259
1260 int
1261 rxi_HasActiveCalls(register struct rx_connection *aconn)
1262 {
1263     register int i;
1264     register struct rx_call *tcall;
1265     SPLVAR;
1266
1267     NETPRI;
1268     for (i = 0; i < RX_MAXCALLS; i++) {
1269         if ((tcall = aconn->call[i])) {
1270             if ((tcall->state == RX_STATE_ACTIVE)
1271                 || (tcall->state == RX_STATE_PRECALL)) {
1272                 USERPRI;
1273                 return 1;
1274             }
1275         }
1276     }
1277     USERPRI;
1278     return 0;
1279 }
1280
1281 int
1282 rxi_GetCallNumberVector(register struct rx_connection *aconn,
1283                         register afs_int32 * aint32s)
1284 {
1285     register int i;
1286     register struct rx_call *tcall;
1287     SPLVAR;
1288
1289     NETPRI;
1290     for (i = 0; i < RX_MAXCALLS; i++) {
1291         if ((tcall = aconn->call[i]) && (tcall->state == RX_STATE_DALLY))
1292             aint32s[i] = aconn->callNumber[i] + 1;
1293         else
1294             aint32s[i] = aconn->callNumber[i];
1295     }
1296     USERPRI;
1297     return 0;
1298 }
1299
1300 int
1301 rxi_SetCallNumberVector(register struct rx_connection *aconn,
1302                         register afs_int32 * aint32s)
1303 {
1304     register int i;
1305     register struct rx_call *tcall;
1306     SPLVAR;
1307
1308     NETPRI;
1309     for (i = 0; i < RX_MAXCALLS; i++) {
1310         if ((tcall = aconn->call[i]) && (tcall->state == RX_STATE_DALLY))
1311             aconn->callNumber[i] = aint32s[i] - 1;
1312         else
1313             aconn->callNumber[i] = aint32s[i];
1314     }
1315     USERPRI;
1316     return 0;
1317 }
1318
1319 /* Advertise a new service.  A service is named locally by a UDP port
1320  * number plus a 16-bit service id.  Returns (struct rx_service *) 0
1321  * on a failure.
1322  *
1323      char *serviceName;  Name for identification purposes (e.g. the
1324                          service name might be used for probing for
1325                          statistics) */
1326 struct rx_service *
1327 rx_NewServiceHost(afs_uint32 host, u_short port, u_short serviceId,
1328                   char *serviceName, struct rx_securityClass **securityObjects,
1329                   int nSecurityObjects,
1330                   afs_int32(*serviceProc) (struct rx_call * acall))
1331 {
1332     osi_socket socket = OSI_NULLSOCKET;
1333     register struct rx_service *tservice;
1334     register int i;
1335     SPLVAR;
1336
1337     clock_NewTime();
1338
1339     if (serviceId == 0) {
1340         (osi_Msg
1341          "rx_NewService:  service id for service %s is not non-zero.\n",
1342          serviceName);
1343         return 0;
1344     }
1345     if (port == 0) {
1346         if (rx_port == 0) {
1347             (osi_Msg
1348              "rx_NewService: A non-zero port must be specified on this call if a non-zero port was not provided at Rx initialization (service %s).\n",
1349              serviceName);
1350             return 0;
1351         }
1352         port = rx_port;
1353         socket = rx_socket;
1354     }
1355
1356     tservice = rxi_AllocService();
1357     NETPRI;
1358     for (i = 0; i < RX_MAX_SERVICES; i++) {
1359         register struct rx_service *service = rx_services[i];
1360         if (service) {
1361             if (port == service->servicePort && host == service->serviceHost) {
1362                 if (service->serviceId == serviceId) {
1363                     /* The identical service has already been
1364                      * installed; if the caller was intending to
1365                      * change the security classes used by this
1366                      * service, he/she loses. */
1367                     (osi_Msg
1368                      "rx_NewService: tried to install service %s with service id %d, which is already in use for service %s\n",
1369                      serviceName, serviceId, service->serviceName);
1370                     USERPRI;
1371                     rxi_FreeService(tservice);
1372                     return service;
1373                 }
1374                 /* Different service, same port: re-use the socket
1375                  * which is bound to the same port */
1376                 socket = service->socket;
1377             }
1378         } else {
1379             if (socket == OSI_NULLSOCKET) {
1380                 /* If we don't already have a socket (from another
1381                  * service on same port) get a new one */
1382                 socket = rxi_GetHostUDPSocket(host, port);
1383                 if (socket == OSI_NULLSOCKET) {
1384                     USERPRI;
1385                     rxi_FreeService(tservice);
1386                     return 0;
1387                 }
1388             }
1389             service = tservice;
1390             service->socket = socket;
1391             service->serviceHost = host;
1392             service->servicePort = port;
1393             service->serviceId = serviceId;
1394             service->serviceName = serviceName;
1395             service->nSecurityObjects = nSecurityObjects;
1396             service->securityObjects = securityObjects;
1397             service->minProcs = 0;
1398             service->maxProcs = 1;
1399             service->idleDeadTime = 60;
1400             service->idleDeadErr = 0;
1401             service->connDeadTime = rx_connDeadTime;
1402             service->executeRequestProc = serviceProc;
1403             service->checkReach = 0;
1404             rx_services[i] = service;   /* not visible until now */
1405             USERPRI;
1406             return service;
1407         }
1408     }
1409     USERPRI;
1410     rxi_FreeService(tservice);
1411     (osi_Msg "rx_NewService: cannot support > %d services\n",
1412      RX_MAX_SERVICES);
1413     return 0;
1414 }
1415
1416 /* Set configuration options for all of a service's security objects */
1417
1418 afs_int32
1419 rx_SetSecurityConfiguration(struct rx_service *service,
1420                             rx_securityConfigVariables type,
1421                             void *value)
1422 {
1423     int i;
1424     for (i = 0; i<service->nSecurityObjects; i++) {
1425         if (service->securityObjects[i]) {
1426             RXS_SetConfiguration(service->securityObjects[i], NULL, type,
1427                                  value, NULL);
1428         }
1429     }
1430     return 0;
1431 }
1432
1433 struct rx_service *
1434 rx_NewService(u_short port, u_short serviceId, char *serviceName,
1435               struct rx_securityClass **securityObjects, int nSecurityObjects,
1436               afs_int32(*serviceProc) (struct rx_call * acall))
1437 {
1438     return rx_NewServiceHost(htonl(INADDR_ANY), port, serviceId, serviceName, securityObjects, nSecurityObjects, serviceProc);
1439 }
1440
1441 /* Generic request processing loop. This routine should be called
1442  * by the implementation dependent rx_ServerProc. If socketp is
1443  * non-null, it will be set to the file descriptor that this thread
1444  * is now listening on. If socketp is null, this routine will never
1445  * returns. */
1446 void
1447 rxi_ServerProc(int threadID, struct rx_call *newcall, osi_socket * socketp)
1448 {
1449     register struct rx_call *call;
1450     register afs_int32 code;
1451     register struct rx_service *tservice = NULL;
1452
1453     for (;;) {
1454         if (newcall) {
1455             call = newcall;
1456             newcall = NULL;
1457         } else {
1458             call = rx_GetCall(threadID, tservice, socketp);
1459             if (socketp && *socketp != OSI_NULLSOCKET) {
1460                 /* We are now a listener thread */
1461                 return;
1462             }
1463         }
1464
1465         /* if server is restarting( typically smooth shutdown) then do not
1466          * allow any new calls.
1467          */
1468
1469         if (rx_tranquil && (call != NULL)) {
1470             SPLVAR;
1471
1472             NETPRI;
1473             MUTEX_ENTER(&call->lock);
1474
1475             rxi_CallError(call, RX_RESTARTING);
1476             rxi_SendCallAbort(call, (struct rx_packet *)0, 0, 0);
1477
1478             MUTEX_EXIT(&call->lock);
1479             USERPRI;
1480         }
1481 #ifdef  KERNEL
1482         if (afs_termState == AFSOP_STOP_RXCALLBACK) {
1483 #ifdef RX_ENABLE_LOCKS
1484             AFS_GLOCK();
1485 #endif /* RX_ENABLE_LOCKS */
1486             afs_termState = AFSOP_STOP_AFS;
1487             afs_osi_Wakeup(&afs_termState);
1488 #ifdef RX_ENABLE_LOCKS
1489             AFS_GUNLOCK();
1490 #endif /* RX_ENABLE_LOCKS */
1491             return;
1492         }
1493 #endif
1494
1495         tservice = call->conn->service;
1496
1497         if (tservice->beforeProc)
1498             (*tservice->beforeProc) (call);
1499
1500         code = call->conn->service->executeRequestProc(call);
1501
1502         if (tservice->afterProc)
1503             (*tservice->afterProc) (call, code);
1504
1505         rx_EndCall(call, code);
1506         MUTEX_ENTER(&rx_stats_mutex);
1507         rxi_nCalls++;
1508         MUTEX_EXIT(&rx_stats_mutex);
1509     }
1510 }
1511
1512
1513 void
1514 rx_WakeupServerProcs(void)
1515 {
1516     struct rx_serverQueueEntry *np, *tqp;
1517     SPLVAR;
1518
1519     NETPRI;
1520     MUTEX_ENTER(&rx_serverPool_lock);
1521
1522 #ifdef RX_ENABLE_LOCKS
1523     if (rx_waitForPacket)
1524         CV_BROADCAST(&rx_waitForPacket->cv);
1525 #else /* RX_ENABLE_LOCKS */
1526     if (rx_waitForPacket)
1527         osi_rxWakeup(rx_waitForPacket);
1528 #endif /* RX_ENABLE_LOCKS */
1529     MUTEX_ENTER(&freeSQEList_lock);
1530     for (np = rx_FreeSQEList; np; np = tqp) {
1531         tqp = *(struct rx_serverQueueEntry **)np;
1532 #ifdef RX_ENABLE_LOCKS
1533         CV_BROADCAST(&np->cv);
1534 #else /* RX_ENABLE_LOCKS */
1535         osi_rxWakeup(np);
1536 #endif /* RX_ENABLE_LOCKS */
1537     }
1538     MUTEX_EXIT(&freeSQEList_lock);
1539     for (queue_Scan(&rx_idleServerQueue, np, tqp, rx_serverQueueEntry)) {
1540 #ifdef RX_ENABLE_LOCKS
1541         CV_BROADCAST(&np->cv);
1542 #else /* RX_ENABLE_LOCKS */
1543         osi_rxWakeup(np);
1544 #endif /* RX_ENABLE_LOCKS */
1545     }
1546     MUTEX_EXIT(&rx_serverPool_lock);
1547     USERPRI;
1548 }
1549
1550 /* meltdown:
1551  * One thing that seems to happen is that all the server threads get
1552  * tied up on some empty or slow call, and then a whole bunch of calls
1553  * arrive at once, using up the packet pool, so now there are more
1554  * empty calls.  The most critical resources here are server threads
1555  * and the free packet pool.  The "doreclaim" code seems to help in
1556  * general.  I think that eventually we arrive in this state: there
1557  * are lots of pending calls which do have all their packets present,
1558  * so they won't be reclaimed, are multi-packet calls, so they won't
1559  * be scheduled until later, and thus are tying up most of the free
1560  * packet pool for a very long time.
1561  * future options:
1562  * 1.  schedule multi-packet calls if all the packets are present.
1563  * Probably CPU-bound operation, useful to return packets to pool.
1564  * Do what if there is a full window, but the last packet isn't here?
1565  * 3.  preserve one thread which *only* runs "best" calls, otherwise
1566  * it sleeps and waits for that type of call.
1567  * 4.  Don't necessarily reserve a whole window for each thread.  In fact,
1568  * the current dataquota business is badly broken.  The quota isn't adjusted
1569  * to reflect how many packets are presently queued for a running call.
1570  * So, when we schedule a queued call with a full window of packets queued
1571  * up for it, that *should* free up a window full of packets for other 2d-class
1572  * calls to be able to use from the packet pool.  But it doesn't.
1573  *
1574  * NB.  Most of the time, this code doesn't run -- since idle server threads
1575  * sit on the idle server queue and are assigned by "...ReceivePacket" as soon
1576  * as a new call arrives.
1577  */
1578 /* Sleep until a call arrives.  Returns a pointer to the call, ready
1579  * for an rx_Read. */
1580 #ifdef RX_ENABLE_LOCKS
1581 struct rx_call *
1582 rx_GetCall(int tno, struct rx_service *cur_service, osi_socket * socketp)
1583 {
1584     struct rx_serverQueueEntry *sq;
1585     register struct rx_call *call = (struct rx_call *)0;
1586     struct rx_service *service = NULL;
1587     SPLVAR;
1588
1589     MUTEX_ENTER(&freeSQEList_lock);
1590
1591     if ((sq = rx_FreeSQEList)) {
1592         rx_FreeSQEList = *(struct rx_serverQueueEntry **)sq;
1593         MUTEX_EXIT(&freeSQEList_lock);
1594     } else {                    /* otherwise allocate a new one and return that */
1595         MUTEX_EXIT(&freeSQEList_lock);
1596         sq = (struct rx_serverQueueEntry *)
1597             rxi_Alloc(sizeof(struct rx_serverQueueEntry));
1598         MUTEX_INIT(&sq->lock, "server Queue lock", MUTEX_DEFAULT, 0);
1599         CV_INIT(&sq->cv, "server Queue lock", CV_DEFAULT, 0);
1600     }
1601
1602     MUTEX_ENTER(&rx_serverPool_lock);
1603     if (cur_service != NULL) {
1604         ReturnToServerPool(cur_service);
1605     }
1606     while (1) {
1607         if (queue_IsNotEmpty(&rx_incomingCallQueue)) {
1608             register struct rx_call *tcall, *ncall, *choice2 = NULL;
1609
1610             /* Scan for eligible incoming calls.  A call is not eligible
1611              * if the maximum number of calls for its service type are
1612              * already executing */
1613             /* One thread will process calls FCFS (to prevent starvation),
1614              * while the other threads may run ahead looking for calls which
1615              * have all their input data available immediately.  This helps
1616              * keep threads from blocking, waiting for data from the client. */
1617             for (queue_Scan(&rx_incomingCallQueue, tcall, ncall, rx_call)) {
1618                 service = tcall->conn->service;
1619                 if (!QuotaOK(service)) {
1620                     continue;
1621                 }
1622                 if (tno == rxi_fcfs_thread_num
1623                     || !tcall->queue_item_header.next) {
1624                     /* If we're the fcfs thread , then  we'll just use
1625                      * this call. If we haven't been able to find an optimal
1626                      * choice, and we're at the end of the list, then use a
1627                      * 2d choice if one has been identified.  Otherwise... */
1628                     call = (choice2 ? choice2 : tcall);
1629                     service = call->conn->service;
1630                 } else if (!queue_IsEmpty(&tcall->rq)) {
1631                     struct rx_packet *rp;
1632                     rp = queue_First(&tcall->rq, rx_packet);
1633                     if (rp->header.seq == 1) {
1634                         if (!meltdown_1pkt
1635                             || (rp->header.flags & RX_LAST_PACKET)) {
1636                             call = tcall;
1637                         } else if (rxi_2dchoice && !choice2
1638                                    && !(tcall->flags & RX_CALL_CLEARED)
1639                                    && (tcall->rprev > rxi_HardAckRate)) {
1640                             choice2 = tcall;
1641                         } else
1642                             rxi_md2cnt++;
1643                     }
1644                 }
1645                 if (call) {
1646                     break;
1647                 } else {
1648                     ReturnToServerPool(service);
1649                 }
1650             }
1651         }
1652
1653         if (call) {
1654             queue_Remove(call);
1655             MUTEX_EXIT(&rx_serverPool_lock);
1656             MUTEX_ENTER(&call->lock);
1657
1658             if (call->flags & RX_CALL_WAIT_PROC) {
1659                 call->flags &= ~RX_CALL_WAIT_PROC;
1660                 MUTEX_ENTER(&rx_stats_mutex);
1661                 rx_nWaiting--;
1662                 MUTEX_EXIT(&rx_stats_mutex);
1663             }
1664
1665             if (call->state != RX_STATE_PRECALL || call->error) {
1666                 MUTEX_EXIT(&call->lock);
1667                 MUTEX_ENTER(&rx_serverPool_lock);
1668                 ReturnToServerPool(service);
1669                 call = NULL;
1670                 continue;
1671             }
1672
1673             if (queue_IsEmpty(&call->rq)
1674                 || queue_First(&call->rq, rx_packet)->header.seq != 1)
1675                 rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
1676
1677             CLEAR_CALL_QUEUE_LOCK(call);
1678             break;
1679         } else {
1680             /* If there are no eligible incoming calls, add this process
1681              * to the idle server queue, to wait for one */
1682             sq->newcall = 0;
1683             sq->tno = tno;
1684             if (socketp) {
1685                 *socketp = OSI_NULLSOCKET;
1686             }
1687             sq->socketp = socketp;
1688             queue_Append(&rx_idleServerQueue, sq);
1689 #ifndef AFS_AIX41_ENV
1690             rx_waitForPacket = sq;
1691 #else
1692             rx_waitingForPacket = sq;
1693 #endif /* AFS_AIX41_ENV */
1694             do {
1695                 CV_WAIT(&sq->cv, &rx_serverPool_lock);
1696 #ifdef  KERNEL
1697                 if (afs_termState == AFSOP_STOP_RXCALLBACK) {
1698                     MUTEX_EXIT(&rx_serverPool_lock);
1699                     return (struct rx_call *)0;
1700                 }
1701 #endif
1702             } while (!(call = sq->newcall)
1703                      && !(socketp && *socketp != OSI_NULLSOCKET));
1704             MUTEX_EXIT(&rx_serverPool_lock);
1705             if (call) {
1706                 MUTEX_ENTER(&call->lock);
1707             }
1708             break;
1709         }
1710     }
1711
1712     MUTEX_ENTER(&freeSQEList_lock);
1713     *(struct rx_serverQueueEntry **)sq = rx_FreeSQEList;
1714     rx_FreeSQEList = sq;
1715     MUTEX_EXIT(&freeSQEList_lock);
1716
1717     if (call) {
1718         clock_GetTime(&call->startTime);
1719         call->state = RX_STATE_ACTIVE;
1720         call->mode = RX_MODE_RECEIVING;
1721 #ifdef RX_KERNEL_TRACE
1722         if (ICL_SETACTIVE(afs_iclSetp)) {
1723             int glockOwner = ISAFS_GLOCK();
1724             if (!glockOwner)
1725                 AFS_GLOCK();
1726             afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
1727                        __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER,
1728                        call);
1729             if (!glockOwner)
1730                 AFS_GUNLOCK();
1731         }
1732 #endif
1733
1734         rxi_calltrace(RX_CALL_START, call);
1735         dpf(("rx_GetCall(port=%d, service=%d) ==> call %x\n",
1736              call->conn->service->servicePort, call->conn->service->serviceId,
1737              call));
1738
1739         CALL_HOLD(call, RX_CALL_REFCOUNT_BEGIN);
1740         MUTEX_EXIT(&call->lock);
1741     } else {
1742         dpf(("rx_GetCall(socketp=0x%x, *socketp=0x%x)\n", socketp, *socketp));
1743     }
1744
1745     return call;
1746 }
1747 #else /* RX_ENABLE_LOCKS */
1748 struct rx_call *
1749 rx_GetCall(int tno, struct rx_service *cur_service, osi_socket * socketp)
1750 {
1751     struct rx_serverQueueEntry *sq;
1752     register struct rx_call *call = (struct rx_call *)0, *choice2;
1753     struct rx_service *service = NULL;
1754     SPLVAR;
1755
1756     NETPRI;
1757     MUTEX_ENTER(&freeSQEList_lock);
1758
1759     if ((sq = rx_FreeSQEList)) {
1760         rx_FreeSQEList = *(struct rx_serverQueueEntry **)sq;
1761         MUTEX_EXIT(&freeSQEList_lock);
1762     } else {                    /* otherwise allocate a new one and return that */
1763         MUTEX_EXIT(&freeSQEList_lock);
1764         sq = (struct rx_serverQueueEntry *)
1765             rxi_Alloc(sizeof(struct rx_serverQueueEntry));
1766         MUTEX_INIT(&sq->lock, "server Queue lock", MUTEX_DEFAULT, 0);
1767         CV_INIT(&sq->cv, "server Queue lock", CV_DEFAULT, 0);
1768     }
1769     MUTEX_ENTER(&sq->lock);
1770
1771     if (cur_service != NULL) {
1772         cur_service->nRequestsRunning--;
1773         if (cur_service->nRequestsRunning < cur_service->minProcs)
1774             rxi_minDeficit++;
1775         rxi_availProcs++;
1776     }
1777     if (queue_IsNotEmpty(&rx_incomingCallQueue)) {
1778         register struct rx_call *tcall, *ncall;
1779         /* Scan for eligible incoming calls.  A call is not eligible
1780          * if the maximum number of calls for its service type are
1781          * already executing */
1782         /* One thread will process calls FCFS (to prevent starvation),
1783          * while the other threads may run ahead looking for calls which
1784          * have all their input data available immediately.  This helps
1785          * keep threads from blocking, waiting for data from the client. */
1786         choice2 = (struct rx_call *)0;
1787         for (queue_Scan(&rx_incomingCallQueue, tcall, ncall, rx_call)) {
1788             service = tcall->conn->service;
1789             if (QuotaOK(service)) {
1790                 if (tno == rxi_fcfs_thread_num
1791                     || !tcall->queue_item_header.next) {
1792                     /* If we're the fcfs thread, then  we'll just use
1793                      * this call. If we haven't been able to find an optimal
1794                      * choice, and we're at the end of the list, then use a
1795                      * 2d choice if one has been identified.  Otherwise... */
1796                     call = (choice2 ? choice2 : tcall);
1797                     service = call->conn->service;
1798                 } else if (!queue_IsEmpty(&tcall->rq)) {
1799                     struct rx_packet *rp;
1800                     rp = queue_First(&tcall->rq, rx_packet);
1801                     if (rp->header.seq == 1
1802                         && (!meltdown_1pkt
1803                             || (rp->header.flags & RX_LAST_PACKET))) {
1804                         call = tcall;
1805                     } else if (rxi_2dchoice && !choice2
1806                                && !(tcall->flags & RX_CALL_CLEARED)
1807                                && (tcall->rprev > rxi_HardAckRate)) {
1808                         choice2 = tcall;
1809                     } else
1810                         rxi_md2cnt++;
1811                 }
1812             }
1813             if (call)
1814                 break;
1815         }
1816     }
1817
1818     if (call) {
1819         queue_Remove(call);
1820         /* we can't schedule a call if there's no data!!! */
1821         /* send an ack if there's no data, if we're missing the
1822          * first packet, or we're missing something between first
1823          * and last -- there's a "hole" in the incoming data. */
1824         if (queue_IsEmpty(&call->rq)
1825             || queue_First(&call->rq, rx_packet)->header.seq != 1
1826             || call->rprev != queue_Last(&call->rq, rx_packet)->header.seq)
1827             rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
1828
1829         call->flags &= (~RX_CALL_WAIT_PROC);
1830         service->nRequestsRunning++;
1831         /* just started call in minProcs pool, need fewer to maintain
1832          * guarantee */
1833         if (service->nRequestsRunning <= service->minProcs)
1834             rxi_minDeficit--;
1835         rxi_availProcs--;
1836         rx_nWaiting--;
1837         /* MUTEX_EXIT(&call->lock); */
1838     } else {
1839         /* If there are no eligible incoming calls, add this process
1840          * to the idle server queue, to wait for one */
1841         sq->newcall = 0;
1842         if (socketp) {
1843             *socketp = OSI_NULLSOCKET;
1844         }
1845         sq->socketp = socketp;
1846         queue_Append(&rx_idleServerQueue, sq);
1847         do {
1848             osi_rxSleep(sq);
1849 #ifdef  KERNEL
1850             if (afs_termState == AFSOP_STOP_RXCALLBACK) {
1851                 USERPRI;
1852                 rxi_Free(sq, sizeof(struct rx_serverQueueEntry));
1853                 return (struct rx_call *)0;
1854             }
1855 #endif
1856         } while (!(call = sq->newcall)
1857                  && !(socketp && *socketp != OSI_NULLSOCKET));
1858     }
1859     MUTEX_EXIT(&sq->lock);
1860
1861     MUTEX_ENTER(&freeSQEList_lock);
1862     *(struct rx_serverQueueEntry **)sq = rx_FreeSQEList;
1863     rx_FreeSQEList = sq;
1864     MUTEX_EXIT(&freeSQEList_lock);
1865
1866     if (call) {
1867         clock_GetTime(&call->startTime);
1868         call->state = RX_STATE_ACTIVE;
1869         call->mode = RX_MODE_RECEIVING;
1870 #ifdef RX_KERNEL_TRACE
1871         if (ICL_SETACTIVE(afs_iclSetp)) {
1872             int glockOwner = ISAFS_GLOCK();
1873             if (!glockOwner)
1874                 AFS_GLOCK();
1875             afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
1876                        __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER,
1877                        call);
1878             if (!glockOwner)
1879                 AFS_GUNLOCK();
1880         }
1881 #endif
1882
1883         rxi_calltrace(RX_CALL_START, call);
1884         dpf(("rx_GetCall(port=%d, service=%d) ==> call %x\n",
1885              call->conn->service->servicePort, call->conn->service->serviceId,
1886              call));
1887     } else {
1888         dpf(("rx_GetCall(socketp=0x%x, *socketp=0x%x)\n", socketp, *socketp));
1889     }
1890
1891     USERPRI;
1892
1893     return call;
1894 }
1895 #endif /* RX_ENABLE_LOCKS */
1896
1897
1898
1899 /* Establish a procedure to be called when a packet arrives for a
1900  * call.  This routine will be called at most once after each call,
1901  * and will also be called if there is an error condition on the or
1902  * the call is complete.  Used by multi rx to build a selection
1903  * function which determines which of several calls is likely to be a
1904  * good one to read from.
1905  * NOTE: the way this is currently implemented it is probably only a
1906  * good idea to (1) use it immediately after a newcall (clients only)
1907  * and (2) only use it once.  Other uses currently void your warranty
1908  */
1909 void
1910 rx_SetArrivalProc(register struct rx_call *call,
1911                   register void (*proc) (register struct rx_call * call,
1912                                         register void * mh,
1913                                         register int index),
1914                   register void * handle, register int arg)
1915 {
1916     call->arrivalProc = proc;
1917     call->arrivalProcHandle = handle;
1918     call->arrivalProcArg = arg;
1919 }
1920
1921 /* Call is finished (possibly prematurely).  Return rc to the peer, if
1922  * appropriate, and return the final error code from the conversation
1923  * to the caller */
1924
1925 afs_int32
1926 rx_EndCall(register struct rx_call *call, afs_int32 rc)
1927 {
1928     register struct rx_connection *conn = call->conn;
1929     register struct rx_service *service;
1930     afs_int32 error;
1931     SPLVAR;
1932
1933
1934
1935     dpf(("rx_EndCall(call %x rc %d error %d abortCode %d)\n", call, rc, call->error, call->abortCode));
1936
1937     NETPRI;
1938     MUTEX_ENTER(&call->lock);
1939
1940     if (rc == 0 && call->error == 0) {
1941         call->abortCode = 0;
1942         call->abortCount = 0;
1943     }
1944
1945     call->arrivalProc = (void (*)())0;
1946     if (rc && call->error == 0) {
1947         rxi_CallError(call, rc);
1948         /* Send an abort message to the peer if this error code has
1949          * only just been set.  If it was set previously, assume the
1950          * peer has already been sent the error code or will request it
1951          */
1952         rxi_SendCallAbort(call, (struct rx_packet *)0, 0, 0);
1953     }
1954     if (conn->type == RX_SERVER_CONNECTION) {
1955         /* Make sure reply or at least dummy reply is sent */
1956         if (call->mode == RX_MODE_RECEIVING) {
1957             rxi_WriteProc(call, 0, 0);
1958         }
1959         if (call->mode == RX_MODE_SENDING) {
1960             rxi_FlushWrite(call);
1961         }
1962         service = conn->service;
1963         rxi_calltrace(RX_CALL_END, call);
1964         /* Call goes to hold state until reply packets are acknowledged */
1965         if (call->tfirst + call->nSoftAcked < call->tnext) {
1966             call->state = RX_STATE_HOLD;
1967         } else {
1968             call->state = RX_STATE_DALLY;
1969             rxi_ClearTransmitQueue(call, 0);
1970             rxevent_Cancel(call->resendEvent, call, RX_CALL_REFCOUNT_RESEND);
1971             rxevent_Cancel(call->keepAliveEvent, call,
1972                            RX_CALL_REFCOUNT_ALIVE);
1973         }
1974     } else {                    /* Client connection */
1975         char dummy;
1976         /* Make sure server receives input packets, in the case where
1977          * no reply arguments are expected */
1978         if ((call->mode == RX_MODE_SENDING)
1979             || (call->mode == RX_MODE_RECEIVING && call->rnext == 1)) {
1980             (void)rxi_ReadProc(call, &dummy, 1);
1981         }
1982
1983         /* If we had an outstanding delayed ack, be nice to the server
1984          * and force-send it now.
1985          */
1986         if (call->delayedAckEvent) {
1987             rxevent_Cancel(call->delayedAckEvent, call,
1988                            RX_CALL_REFCOUNT_DELAY);
1989             call->delayedAckEvent = NULL;
1990             rxi_SendDelayedAck(NULL, call, NULL);
1991         }
1992
1993         /* We need to release the call lock since it's lower than the
1994          * conn_call_lock and we don't want to hold the conn_call_lock
1995          * over the rx_ReadProc call. The conn_call_lock needs to be held
1996          * here for the case where rx_NewCall is perusing the calls on
1997          * the connection structure. We don't want to signal until
1998          * rx_NewCall is in a stable state. Otherwise, rx_NewCall may
1999          * have checked this call, found it active and by the time it
2000          * goes to sleep, will have missed the signal.
2001          *
2002          * Do not clear the RX_CONN_MAKECALL_WAITING flag as long as
2003          * there are threads waiting to use the conn object.
2004          */
2005         MUTEX_EXIT(&call->lock);
2006         MUTEX_ENTER(&conn->conn_call_lock);
2007         MUTEX_ENTER(&call->lock);
2008         MUTEX_ENTER(&conn->conn_data_lock);
2009         conn->flags |= RX_CONN_BUSY;
2010         if (conn->flags & RX_CONN_MAKECALL_WAITING) {
2011             if (conn->makeCallWaiters == 0)
2012                 conn->flags &= (~RX_CONN_MAKECALL_WAITING);
2013             MUTEX_EXIT(&conn->conn_data_lock);
2014 #ifdef  RX_ENABLE_LOCKS
2015             CV_BROADCAST(&conn->conn_call_cv);
2016 #else
2017             osi_rxWakeup(conn);
2018 #endif
2019         }
2020 #ifdef RX_ENABLE_LOCKS
2021         else {
2022             MUTEX_EXIT(&conn->conn_data_lock);
2023         }
2024 #endif /* RX_ENABLE_LOCKS */
2025         call->state = RX_STATE_DALLY;
2026     }
2027     error = call->error;
2028
2029     /* currentPacket, nLeft, and NFree must be zeroed here, because
2030      * ResetCall cannot: ResetCall may be called at splnet(), in the
2031      * kernel version, and may interrupt the macros rx_Read or
2032      * rx_Write, which run at normal priority for efficiency. */
2033     if (call->currentPacket) {
2034         queue_Prepend(&call->iovq, call->currentPacket);
2035         call->currentPacket = (struct rx_packet *)0;
2036     }
2037
2038     call->nLeft = call->nFree = call->curlen = 0;
2039
2040     /* Free any packets from the last call to ReadvProc/WritevProc */
2041     rxi_FreePackets(0, &call->iovq);
2042
2043     CALL_RELE(call, RX_CALL_REFCOUNT_BEGIN);
2044     MUTEX_EXIT(&call->lock);
2045     if (conn->type == RX_CLIENT_CONNECTION) {
2046         MUTEX_EXIT(&conn->conn_call_lock);
2047         conn->flags &= ~RX_CONN_BUSY;
2048     }
2049     USERPRI;
2050     /*
2051      * Map errors to the local host's errno.h format.
2052      */
2053     error = ntoh_syserr_conv(error);
2054     return error;
2055 }
2056
2057 #if !defined(KERNEL)
2058
2059 /* Call this routine when shutting down a server or client (especially
2060  * clients).  This will allow Rx to gracefully garbage collect server
2061  * connections, and reduce the number of retries that a server might
2062  * make to a dead client.
2063  * This is not quite right, since some calls may still be ongoing and
2064  * we can't lock them to destroy them. */
2065 void
2066 rx_Finalize(void)
2067 {
2068     register struct rx_connection **conn_ptr, **conn_end;
2069
2070     INIT_PTHREAD_LOCKS;
2071     LOCK_RX_INIT;
2072     if (rxinit_status == 1) {
2073         UNLOCK_RX_INIT;
2074         return;                 /* Already shutdown. */
2075     }
2076     rxi_DeleteCachedConnections();
2077     if (rx_connHashTable) {
2078         MUTEX_ENTER(&rx_connHashTable_lock);
2079         for (conn_ptr = &rx_connHashTable[0], conn_end =
2080              &rx_connHashTable[rx_hashTableSize]; conn_ptr < conn_end;
2081              conn_ptr++) {
2082             struct rx_connection *conn, *next;
2083             for (conn = *conn_ptr; conn; conn = next) {
2084                 next = conn->next;
2085                 if (conn->type == RX_CLIENT_CONNECTION) {
2086                     /* MUTEX_ENTER(&conn->conn_data_lock); when used in kernel */
2087                     conn->refCount++;
2088                     /* MUTEX_EXIT(&conn->conn_data_lock); when used in kernel */
2089 #ifdef RX_ENABLE_LOCKS
2090                     rxi_DestroyConnectionNoLock(conn);
2091 #else /* RX_ENABLE_LOCKS */
2092                     rxi_DestroyConnection(conn);
2093 #endif /* RX_ENABLE_LOCKS */
2094                 }
2095             }
2096         }
2097 #ifdef RX_ENABLE_LOCKS
2098         while (rx_connCleanup_list) {
2099             struct rx_connection *conn;
2100             conn = rx_connCleanup_list;
2101             rx_connCleanup_list = rx_connCleanup_list->next;
2102             MUTEX_EXIT(&rx_connHashTable_lock);
2103             rxi_CleanupConnection(conn);
2104             MUTEX_ENTER(&rx_connHashTable_lock);
2105         }
2106         MUTEX_EXIT(&rx_connHashTable_lock);
2107 #endif /* RX_ENABLE_LOCKS */
2108     }
2109     rxi_flushtrace();
2110
2111 #ifdef AFS_NT40_ENV
2112     afs_winsockCleanup();
2113 #endif
2114
2115     rxinit_status = 1;
2116     UNLOCK_RX_INIT;
2117 }
2118 #endif
2119
2120 /* if we wakeup packet waiter too often, can get in loop with two
2121     AllocSendPackets each waking each other up (from ReclaimPacket calls) */
2122 void
2123 rxi_PacketsUnWait(void)
2124 {
2125     if (!rx_waitingForPackets) {
2126         return;
2127     }
2128 #ifdef KERNEL
2129     if (rxi_OverQuota(RX_PACKET_CLASS_SEND)) {
2130         return;                 /* still over quota */
2131     }
2132 #endif /* KERNEL */
2133     rx_waitingForPackets = 0;
2134 #ifdef  RX_ENABLE_LOCKS
2135     CV_BROADCAST(&rx_waitingForPackets_cv);
2136 #else
2137     osi_rxWakeup(&rx_waitingForPackets);
2138 #endif
2139     return;
2140 }
2141
2142
2143 /* ------------------Internal interfaces------------------------- */
2144
2145 /* Return this process's service structure for the
2146  * specified socket and service */
2147 struct rx_service *
2148 rxi_FindService(register osi_socket socket, register u_short serviceId)
2149 {
2150     register struct rx_service **sp;
2151     for (sp = &rx_services[0]; *sp; sp++) {
2152         if ((*sp)->serviceId == serviceId && (*sp)->socket == socket)
2153             return *sp;
2154     }
2155     return 0;
2156 }
2157
2158 /* Allocate a call structure, for the indicated channel of the
2159  * supplied connection.  The mode and state of the call must be set by
2160  * the caller. Returns the call with mutex locked. */
2161 struct rx_call *
2162 rxi_NewCall(register struct rx_connection *conn, register int channel)
2163 {
2164     register struct rx_call *call;
2165 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
2166     register struct rx_call *cp;        /* Call pointer temp */
2167     register struct rx_call *nxp;       /* Next call pointer, for queue_Scan */
2168 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2169
2170     dpf(("rxi_NewCall(conn %x, channel %d)\n", conn, channel));
2171
2172     /* Grab an existing call structure, or allocate a new one.
2173      * Existing call structures are assumed to have been left reset by
2174      * rxi_FreeCall */
2175     MUTEX_ENTER(&rx_freeCallQueue_lock);
2176
2177 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
2178     /*
2179      * EXCEPT that the TQ might not yet be cleared out.
2180      * Skip over those with in-use TQs.
2181      */
2182     call = NULL;
2183     for (queue_Scan(&rx_freeCallQueue, cp, nxp, rx_call)) {
2184         if (!(cp->flags & RX_CALL_TQ_BUSY)) {
2185             call = cp;
2186             break;
2187         }
2188     }
2189     if (call) {
2190 #else /* AFS_GLOBAL_RXLOCK_KERNEL */
2191     if (queue_IsNotEmpty(&rx_freeCallQueue)) {
2192         call = queue_First(&rx_freeCallQueue, rx_call);
2193 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2194         queue_Remove(call);
2195         rx_MutexDecrement(rx_stats.nFreeCallStructs, rx_stats_mutex);
2196         MUTEX_EXIT(&rx_freeCallQueue_lock);
2197         MUTEX_ENTER(&call->lock);
2198         CLEAR_CALL_QUEUE_LOCK(call);
2199 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
2200         /* Now, if TQ wasn't cleared earlier, do it now. */
2201         if (call->flags & RX_CALL_TQ_CLEARME) {
2202             rxi_ClearTransmitQueue(call, 0);
2203             queue_Init(&call->tq);
2204         }
2205 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2206         /* Bind the call to its connection structure */
2207         call->conn = conn;
2208         rxi_ResetCall(call, 1);
2209     } else {
2210         call = (struct rx_call *)rxi_Alloc(sizeof(struct rx_call));
2211
2212         MUTEX_EXIT(&rx_freeCallQueue_lock);
2213         MUTEX_INIT(&call->lock, "call lock", MUTEX_DEFAULT, NULL);
2214         MUTEX_ENTER(&call->lock);
2215         CV_INIT(&call->cv_twind, "call twind", CV_DEFAULT, 0);
2216         CV_INIT(&call->cv_rq, "call rq", CV_DEFAULT, 0);
2217         CV_INIT(&call->cv_tq, "call tq", CV_DEFAULT, 0);
2218
2219         rx_MutexIncrement(rx_stats.nFreeCallStructs, rx_stats_mutex);
2220         /* Initialize once-only items */
2221         queue_Init(&call->tq);
2222         queue_Init(&call->rq);
2223         queue_Init(&call->iovq);
2224         /* Bind the call to its connection structure (prereq for reset) */
2225         call->conn = conn;
2226         rxi_ResetCall(call, 1);
2227     }
2228     call->channel = channel;
2229     call->callNumber = &conn->callNumber[channel];
2230     call->rwind = conn->rwind[channel];
2231     call->twind = conn->twind[channel];
2232     /* Note that the next expected call number is retained (in
2233      * conn->callNumber[i]), even if we reallocate the call structure
2234      */
2235     conn->call[channel] = call;
2236     /* if the channel's never been used (== 0), we should start at 1, otherwise
2237      * the call number is valid from the last time this channel was used */
2238     if (*call->callNumber == 0)
2239         *call->callNumber = 1;
2240
2241     return call;
2242 }
2243
2244 /* A call has been inactive long enough that so we can throw away
2245  * state, including the call structure, which is placed on the call
2246  * free list.
2247  * Call is locked upon entry.
2248  * haveCTLock set if called from rxi_ReapConnections
2249  */
2250 #ifdef RX_ENABLE_LOCKS
2251 void
2252 rxi_FreeCall(register struct rx_call *call, int haveCTLock)
2253 #else /* RX_ENABLE_LOCKS */
2254 void
2255 rxi_FreeCall(register struct rx_call *call)
2256 #endif                          /* RX_ENABLE_LOCKS */
2257 {
2258     register int channel = call->channel;
2259     register struct rx_connection *conn = call->conn;
2260
2261
2262     if (call->state == RX_STATE_DALLY || call->state == RX_STATE_HOLD)
2263         (*call->callNumber)++;
2264     rxi_ResetCall(call, 0);
2265     call->conn->call[channel] = (struct rx_call *)0;
2266
2267     MUTEX_ENTER(&rx_freeCallQueue_lock);
2268     SET_CALL_QUEUE_LOCK(call, &rx_freeCallQueue_lock);
2269 #ifdef AFS_GLOBAL_RXLOCK_KERNEL
2270     /* A call may be free even though its transmit queue is still in use.
2271      * Since we search the call list from head to tail, put busy calls at
2272      * the head of the list, and idle calls at the tail.
2273      */
2274     if (call->flags & RX_CALL_TQ_BUSY)
2275         queue_Prepend(&rx_freeCallQueue, call);
2276     else
2277         queue_Append(&rx_freeCallQueue, call);
2278 #else /* AFS_GLOBAL_RXLOCK_KERNEL */
2279     queue_Append(&rx_freeCallQueue, call);
2280 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2281     rx_MutexIncrement(rx_stats.nFreeCallStructs, rx_stats_mutex);
2282     MUTEX_EXIT(&rx_freeCallQueue_lock);
2283
2284     /* Destroy the connection if it was previously slated for
2285      * destruction, i.e. the Rx client code previously called
2286      * rx_DestroyConnection (client connections), or
2287      * rxi_ReapConnections called the same routine (server
2288      * connections).  Only do this, however, if there are no
2289      * outstanding calls. Note that for fine grain locking, there appears
2290      * to be a deadlock in that rxi_FreeCall has a call locked and
2291      * DestroyConnectionNoLock locks each call in the conn. But note a
2292      * few lines up where we have removed this call from the conn.
2293      * If someone else destroys a connection, they either have no
2294      * call lock held or are going through this section of code.
2295      */
2296     if (conn->flags & RX_CONN_DESTROY_ME && !(conn->flags & RX_CONN_MAKECALL_WAITING)) {
2297         MUTEX_ENTER(&conn->conn_data_lock);
2298         conn->refCount++;
2299         MUTEX_EXIT(&conn->conn_data_lock);
2300 #ifdef RX_ENABLE_LOCKS
2301         if (haveCTLock)
2302             rxi_DestroyConnectionNoLock(conn);
2303         else
2304             rxi_DestroyConnection(conn);
2305 #else /* RX_ENABLE_LOCKS */
2306         rxi_DestroyConnection(conn);
2307 #endif /* RX_ENABLE_LOCKS */
2308     }
2309 }
2310
2311 afs_int32 rxi_Alloccnt = 0, rxi_Allocsize = 0;
2312 char *
2313 rxi_Alloc(register size_t size)
2314 {
2315     register char *p;
2316
2317     rx_MutexAdd1Increment2(rxi_Allocsize, (afs_int32)size, rxi_Alloccnt, rx_stats_mutex);
2318     p = (char *)osi_Alloc(size);
2319
2320     if (!p)
2321         osi_Panic("rxi_Alloc error");
2322     memset(p, 0, size);
2323     return p;
2324 }
2325
2326 void
2327 rxi_Free(void *addr, register size_t size)
2328 {
2329     rx_MutexAdd1Decrement2(rxi_Allocsize, -(afs_int32)size, rxi_Alloccnt, rx_stats_mutex);
2330     osi_Free(addr, size);
2331 }
2332
2333 void
2334 rxi_SetPeerMtu(register afs_uint32 host, register afs_uint32 port, int mtu)
2335 {
2336     struct rx_peer **peer_ptr, **peer_end;
2337     int hashIndex;
2338
2339     MUTEX_ENTER(&rx_peerHashTable_lock);
2340     if (port == 0) {
2341        for (peer_ptr = &rx_peerHashTable[0], peer_end =
2342                 &rx_peerHashTable[rx_hashTableSize]; peer_ptr < peer_end;
2343             peer_ptr++) {
2344            struct rx_peer *peer, *next;
2345            for (peer = *peer_ptr; peer; peer = next) {
2346                next = peer->next;
2347                if (host == peer->host) {
2348                    MUTEX_ENTER(&peer->peer_lock);
2349                    peer->ifMTU=MIN(mtu, peer->ifMTU);
2350                    peer->natMTU = rxi_AdjustIfMTU(peer->ifMTU);
2351                    MUTEX_EXIT(&peer->peer_lock);
2352                }
2353            }
2354        }
2355     } else {
2356        struct rx_peer *peer, *next;
2357        hashIndex = PEER_HASH(host, port);
2358        for (peer = rx_peerHashTable[hashIndex]; peer; peer = peer->next) {
2359            if ((peer->host == host) && (peer->port == port)) {
2360                MUTEX_ENTER(&peer->peer_lock);
2361                peer->ifMTU=MIN(mtu, peer->ifMTU);
2362                peer->natMTU = rxi_AdjustIfMTU(peer->ifMTU);
2363                MUTEX_EXIT(&peer->peer_lock);
2364            }
2365        }
2366     }
2367     MUTEX_EXIT(&rx_peerHashTable_lock);
2368 }
2369
2370 /* Find the peer process represented by the supplied (host,port)
2371  * combination.  If there is no appropriate active peer structure, a
2372  * new one will be allocated and initialized
2373  * The origPeer, if set, is a pointer to a peer structure on which the
2374  * refcount will be be decremented. This is used to replace the peer
2375  * structure hanging off a connection structure */
2376 struct rx_peer *
2377 rxi_FindPeer(register afs_uint32 host, register u_short port,
2378              struct rx_peer *origPeer, int create)
2379 {
2380     register struct rx_peer *pp;
2381     int hashIndex;
2382     hashIndex = PEER_HASH(host, port);
2383     MUTEX_ENTER(&rx_peerHashTable_lock);
2384     for (pp = rx_peerHashTable[hashIndex]; pp; pp = pp->next) {
2385         if ((pp->host == host) && (pp->port == port))
2386             break;
2387     }
2388     if (!pp) {
2389         if (create) {
2390             pp = rxi_AllocPeer();       /* This bzero's *pp */
2391             pp->host = host;    /* set here or in InitPeerParams is zero */
2392             pp->port = port;
2393             MUTEX_INIT(&pp->peer_lock, "peer_lock", MUTEX_DEFAULT, 0);
2394             queue_Init(&pp->congestionQueue);
2395             queue_Init(&pp->rpcStats);
2396             pp->next = rx_peerHashTable[hashIndex];
2397             rx_peerHashTable[hashIndex] = pp;
2398             rxi_InitPeerParams(pp);
2399             rx_MutexIncrement(rx_stats.nPeerStructs, rx_stats_mutex);
2400         }
2401     }
2402     if (pp && create) {
2403         pp->refCount++;
2404     }
2405     if (origPeer)
2406         origPeer->refCount--;
2407     MUTEX_EXIT(&rx_peerHashTable_lock);
2408     return pp;
2409 }
2410
2411
2412 /* Find the connection at (host, port) started at epoch, and with the
2413  * given connection id.  Creates the server connection if necessary.
2414  * The type specifies whether a client connection or a server
2415  * connection is desired.  In both cases, (host, port) specify the
2416  * peer's (host, pair) pair.  Client connections are not made
2417  * automatically by this routine.  The parameter socket gives the
2418  * socket descriptor on which the packet was received.  This is used,
2419  * in the case of server connections, to check that *new* connections
2420  * come via a valid (port, serviceId).  Finally, the securityIndex
2421  * parameter must match the existing index for the connection.  If a
2422  * server connection is created, it will be created using the supplied
2423  * index, if the index is valid for this service */
2424 struct rx_connection *
2425 rxi_FindConnection(osi_socket socket, register afs_int32 host,
2426                    register u_short port, u_short serviceId, afs_uint32 cid,
2427                    afs_uint32 epoch, int type, u_int securityIndex)
2428 {
2429     int hashindex, flag, i;
2430     register struct rx_connection *conn;
2431     hashindex = CONN_HASH(host, port, cid, epoch, type);
2432     MUTEX_ENTER(&rx_connHashTable_lock);
2433     rxLastConn ? (conn = rxLastConn, flag = 0) : (conn =
2434                                                   rx_connHashTable[hashindex],
2435                                                   flag = 1);
2436     for (; conn;) {
2437         if ((conn->type == type) && ((cid & RX_CIDMASK) == conn->cid)
2438             && (epoch == conn->epoch)) {
2439             register struct rx_peer *pp = conn->peer;
2440             if (securityIndex != conn->securityIndex) {
2441                 /* this isn't supposed to happen, but someone could forge a packet
2442                  * like this, and there seems to be some CM bug that makes this
2443                  * happen from time to time -- in which case, the fileserver
2444                  * asserts. */
2445                 MUTEX_EXIT(&rx_connHashTable_lock);
2446                 return (struct rx_connection *)0;
2447             }
2448             if (pp->host == host && pp->port == port)
2449                 break;
2450             if (type == RX_CLIENT_CONNECTION && pp->port == port)
2451                 break;
2452             /* So what happens when it's a callback connection? */
2453             if (                /*type == RX_CLIENT_CONNECTION && */
2454                    (conn->epoch & 0x80000000))
2455                 break;
2456         }
2457         if (!flag) {
2458             /* the connection rxLastConn that was used the last time is not the
2459              ** one we are looking for now. Hence, start searching in the hash */
2460             flag = 1;
2461             conn = rx_connHashTable[hashindex];
2462         } else
2463             conn = conn->next;
2464     }
2465     if (!conn) {
2466         struct rx_service *service;
2467         if (type == RX_CLIENT_CONNECTION) {
2468             MUTEX_EXIT(&rx_connHashTable_lock);
2469             return (struct rx_connection *)0;
2470         }
2471         service = rxi_FindService(socket, serviceId);
2472         if (!service || (securityIndex >= service->nSecurityObjects)
2473             || (service->securityObjects[securityIndex] == 0)) {
2474             MUTEX_EXIT(&rx_connHashTable_lock);
2475             return (struct rx_connection *)0;
2476         }
2477         conn = rxi_AllocConnection();   /* This bzero's the connection */
2478         MUTEX_INIT(&conn->conn_call_lock, "conn call lock", MUTEX_DEFAULT, 0);
2479         MUTEX_INIT(&conn->conn_data_lock, "conn data lock", MUTEX_DEFAULT, 0);
2480         CV_INIT(&conn->conn_call_cv, "conn call cv", CV_DEFAULT, 0);
2481         conn->next = rx_connHashTable[hashindex];
2482         rx_connHashTable[hashindex] = conn;
2483         conn->peer = rxi_FindPeer(host, port, 0, 1);
2484         conn->type = RX_SERVER_CONNECTION;
2485         conn->lastSendTime = clock_Sec();       /* don't GC immediately */
2486         conn->epoch = epoch;
2487         conn->cid = cid & RX_CIDMASK;
2488         /* conn->serial = conn->lastSerial = 0; */
2489         /* conn->timeout = 0; */
2490         conn->ackRate = RX_FAST_ACK_RATE;
2491         conn->service = service;
2492         conn->serviceId = serviceId;
2493         conn->securityIndex = securityIndex;
2494         conn->securityObject = service->securityObjects[securityIndex];
2495         conn->nSpecific = 0;
2496         conn->specific = NULL;
2497         rx_SetConnDeadTime(conn, service->connDeadTime);
2498         rx_SetConnIdleDeadTime(conn, service->idleDeadTime);
2499         rx_SetServerConnIdleDeadErr(conn, service->idleDeadErr);
2500         for (i = 0; i < RX_MAXCALLS; i++) {
2501             conn->twind[i] = rx_initSendWindow;
2502             conn->rwind[i] = rx_initReceiveWindow;
2503         }
2504         /* Notify security object of the new connection */
2505         RXS_NewConnection(conn->securityObject, conn);
2506         /* XXXX Connection timeout? */
2507         if (service->newConnProc)
2508             (*service->newConnProc) (conn);
2509         rx_MutexIncrement(rx_stats.nServerConns, rx_stats_mutex);
2510     }
2511
2512     MUTEX_ENTER(&conn->conn_data_lock);
2513     conn->refCount++;
2514     MUTEX_EXIT(&conn->conn_data_lock);
2515
2516     rxLastConn = conn;          /* store this connection as the last conn used */
2517     MUTEX_EXIT(&rx_connHashTable_lock);
2518     return conn;
2519 }
2520
2521 /* There are two packet tracing routines available for testing and monitoring
2522  * Rx.  One is called just after every packet is received and the other is
2523  * called just before every packet is sent.  Received packets, have had their
2524  * headers decoded, and packets to be sent have not yet had their headers
2525  * encoded.  Both take two parameters: a pointer to the packet and a sockaddr
2526  * containing the network address.  Both can be modified.  The return value, if
2527  * non-zero, indicates that the packet should be dropped.  */
2528
2529 int (*rx_justReceived) () = 0;
2530 int (*rx_almostSent) () = 0;
2531
2532 /* A packet has been received off the interface.  Np is the packet, socket is
2533  * the socket number it was received from (useful in determining which service
2534  * this packet corresponds to), and (host, port) reflect the host,port of the
2535  * sender.  This call returns the packet to the caller if it is finished with
2536  * it, rather than de-allocating it, just as a small performance hack */
2537
2538 struct rx_packet *
2539 rxi_ReceivePacket(register struct rx_packet *np, osi_socket socket,
2540                   afs_uint32 host, u_short port, int *tnop,
2541                   struct rx_call **newcallp)
2542 {
2543     register struct rx_call *call;
2544     register struct rx_connection *conn;
2545     int channel;
2546     afs_uint32 currentCallNumber;
2547     int type;
2548     int skew;
2549 #ifdef RXDEBUG
2550     char *packetType;
2551 #endif
2552     struct rx_packet *tnp;
2553
2554 #ifdef RXDEBUG
2555 /* We don't print out the packet until now because (1) the time may not be
2556  * accurate enough until now in the lwp implementation (rx_Listener only gets
2557  * the time after the packet is read) and (2) from a protocol point of view,
2558  * this is the first time the packet has been seen */
2559     packetType = (np->header.type > 0 && np->header.type < RX_N_PACKET_TYPES)
2560         ? rx_packetTypes[np->header.type - 1] : "*UNKNOWN*";
2561     dpf(("R %d %s: %x.%d.%d.%d.%d.%d.%d flags %d, packet %x",
2562          np->header.serial, packetType, ntohl(host), ntohs(port), np->header.serviceId,
2563          np->header.epoch, np->header.cid, np->header.callNumber,
2564          np->header.seq, np->header.flags, np));
2565 #endif
2566
2567     if (np->header.type == RX_PACKET_TYPE_VERSION) {
2568         return rxi_ReceiveVersionPacket(np, socket, host, port, 1);
2569     }
2570
2571     if (np->header.type == RX_PACKET_TYPE_DEBUG) {
2572         return rxi_ReceiveDebugPacket(np, socket, host, port, 1);
2573     }
2574 #ifdef RXDEBUG
2575     /* If an input tracer function is defined, call it with the packet and
2576      * network address.  Note this function may modify its arguments. */
2577     if (rx_justReceived) {
2578         struct sockaddr_in addr;
2579         int drop;
2580         addr.sin_family = AF_INET;
2581         addr.sin_port = port;
2582         addr.sin_addr.s_addr = host;
2583 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2584         addr.sin_len = sizeof(addr);
2585 #endif /* AFS_OSF_ENV */
2586         drop = (*rx_justReceived) (np, &addr);
2587         /* drop packet if return value is non-zero */
2588         if (drop)
2589             return np;
2590         port = addr.sin_port;   /* in case fcn changed addr */
2591         host = addr.sin_addr.s_addr;
2592     }
2593 #endif
2594
2595     /* If packet was not sent by the client, then *we* must be the client */
2596     type = ((np->header.flags & RX_CLIENT_INITIATED) != RX_CLIENT_INITIATED)
2597         ? RX_CLIENT_CONNECTION : RX_SERVER_CONNECTION;
2598
2599     /* Find the connection (or fabricate one, if we're the server & if
2600      * necessary) associated with this packet */
2601     conn =
2602         rxi_FindConnection(socket, host, port, np->header.serviceId,
2603                            np->header.cid, np->header.epoch, type,
2604                            np->header.securityIndex);
2605
2606     if (!conn) {
2607         /* If no connection found or fabricated, just ignore the packet.
2608          * (An argument could be made for sending an abort packet for
2609          * the conn) */
2610         return np;
2611     }
2612
2613     MUTEX_ENTER(&conn->conn_data_lock);
2614     if (conn->maxSerial < np->header.serial)
2615         conn->maxSerial = np->header.serial;
2616     MUTEX_EXIT(&conn->conn_data_lock);
2617
2618     /* If the connection is in an error state, send an abort packet and ignore
2619      * the incoming packet */
2620     if (conn->error) {
2621         /* Don't respond to an abort packet--we don't want loops! */
2622         MUTEX_ENTER(&conn->conn_data_lock);
2623         if (np->header.type != RX_PACKET_TYPE_ABORT)
2624             np = rxi_SendConnectionAbort(conn, np, 1, 0);
2625         conn->refCount--;
2626         MUTEX_EXIT(&conn->conn_data_lock);
2627         return np;
2628     }
2629
2630     /* Check for connection-only requests (i.e. not call specific). */
2631     if (np->header.callNumber == 0) {
2632         switch (np->header.type) {
2633         case RX_PACKET_TYPE_ABORT: {
2634             /* What if the supplied error is zero? */
2635             afs_int32 errcode = ntohl(rx_GetInt32(np, 0));
2636             dpf(("rxi_ReceivePacket ABORT rx_GetInt32 = %d", errcode));
2637             rxi_ConnectionError(conn, errcode);
2638             MUTEX_ENTER(&conn->conn_data_lock);
2639             conn->refCount--;
2640             MUTEX_EXIT(&conn->conn_data_lock);
2641             return np;
2642         }
2643         case RX_PACKET_TYPE_CHALLENGE:
2644             tnp = rxi_ReceiveChallengePacket(conn, np, 1);
2645             MUTEX_ENTER(&conn->conn_data_lock);
2646             conn->refCount--;
2647             MUTEX_EXIT(&conn->conn_data_lock);
2648             return tnp;
2649         case RX_PACKET_TYPE_RESPONSE:
2650             tnp = rxi_ReceiveResponsePacket(conn, np, 1);
2651             MUTEX_ENTER(&conn->conn_data_lock);
2652             conn->refCount--;
2653             MUTEX_EXIT(&conn->conn_data_lock);
2654             return tnp;
2655         case RX_PACKET_TYPE_PARAMS:
2656         case RX_PACKET_TYPE_PARAMS + 1:
2657         case RX_PACKET_TYPE_PARAMS + 2:
2658             /* ignore these packet types for now */
2659             MUTEX_ENTER(&conn->conn_data_lock);
2660             conn->refCount--;
2661             MUTEX_EXIT(&conn->conn_data_lock);
2662             return np;
2663
2664
2665         default:
2666             /* Should not reach here, unless the peer is broken: send an
2667              * abort packet */
2668             rxi_ConnectionError(conn, RX_PROTOCOL_ERROR);
2669             MUTEX_ENTER(&conn->conn_data_lock);
2670             tnp = rxi_SendConnectionAbort(conn, np, 1, 0);
2671             conn->refCount--;
2672             MUTEX_EXIT(&conn->conn_data_lock);
2673             return tnp;
2674         }
2675     }
2676
2677     channel = np->header.cid & RX_CHANNELMASK;
2678     call = conn->call[channel];
2679 #ifdef  RX_ENABLE_LOCKS
2680     if (call)
2681         MUTEX_ENTER(&call->lock);
2682     /* Test to see if call struct is still attached to conn. */
2683     if (call != conn->call[channel]) {
2684         if (call)
2685             MUTEX_EXIT(&call->lock);
2686         if (type == RX_SERVER_CONNECTION) {
2687             call = conn->call[channel];
2688             /* If we started with no call attached and there is one now,
2689              * another thread is also running this routine and has gotten
2690              * the connection channel. We should drop this packet in the tests
2691              * below. If there was a call on this connection and it's now
2692              * gone, then we'll be making a new call below.
2693              * If there was previously a call and it's now different then
2694              * the old call was freed and another thread running this routine
2695              * has created a call on this channel. One of these two threads
2696              * has a packet for the old call and the code below handles those
2697              * cases.
2698              */
2699             if (call)
2700                 MUTEX_ENTER(&call->lock);
2701         } else {
2702             /* This packet can't be for this call. If the new call address is
2703              * 0 then no call is running on this channel. If there is a call
2704              * then, since this is a client connection we're getting data for
2705              * it must be for the previous call.
2706              */
2707             rx_MutexIncrement(rx_stats.spuriousPacketsRead, rx_stats_mutex);
2708             MUTEX_ENTER(&conn->conn_data_lock);
2709             conn->refCount--;
2710             MUTEX_EXIT(&conn->conn_data_lock);
2711             return np;
2712         }
2713     }
2714 #endif
2715     currentCallNumber = conn->callNumber[channel];
2716
2717     if (type == RX_SERVER_CONNECTION) { /* We're the server */
2718         if (np->header.callNumber < currentCallNumber) {
2719             rx_MutexIncrement(rx_stats.spuriousPacketsRead, rx_stats_mutex);
2720 #ifdef  RX_ENABLE_LOCKS
2721             if (call)
2722                 MUTEX_EXIT(&call->lock);
2723 #endif
2724             MUTEX_ENTER(&conn->conn_data_lock);
2725             conn->refCount--;
2726             MUTEX_EXIT(&conn->conn_data_lock);
2727             return np;
2728         }
2729         if (!call) {
2730             MUTEX_ENTER(&conn->conn_call_lock);
2731             call = rxi_NewCall(conn, channel);
2732             MUTEX_EXIT(&conn->conn_call_lock);
2733             *call->callNumber = np->header.callNumber;
2734             if (np->header.callNumber == 0)
2735                 dpf(("RecPacket call 0 %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", np->header.serial, rx_packetTypes[np->header.type - 1], ntohl(conn->peer->host), ntohs(conn->peer->port), np->header.serial, np->header.epoch, np->header.cid, np->header.callNumber, np->header.seq, np->header.flags, (unsigned long)np, np->retryTime.sec, np->retryTime.usec / 1000, np->length));
2736
2737             call->state = RX_STATE_PRECALL;
2738             clock_GetTime(&call->queueTime);
2739             hzero(call->bytesSent);
2740             hzero(call->bytesRcvd);
2741             /*
2742              * If the number of queued calls exceeds the overload
2743              * threshold then abort this call.
2744              */
2745             if ((rx_BusyThreshold > 0) && (rx_nWaiting > rx_BusyThreshold)) {
2746                 struct rx_packet *tp;
2747
2748                 rxi_CallError(call, rx_BusyError);
2749                 tp = rxi_SendCallAbort(call, np, 1, 0);
2750                 MUTEX_EXIT(&call->lock);
2751                 MUTEX_ENTER(&conn->conn_data_lock);
2752                 conn->refCount--;
2753                 MUTEX_EXIT(&conn->conn_data_lock);
2754                 rx_MutexIncrement(rx_stats.nBusies, rx_stats_mutex);
2755                 return tp;
2756             }
2757             rxi_KeepAliveOn(call);
2758         } else if (np->header.callNumber != currentCallNumber) {
2759             /* Wait until the transmit queue is idle before deciding
2760              * whether to reset the current call. Chances are that the
2761              * call will be in ether DALLY or HOLD state once the TQ_BUSY
2762              * flag is cleared.
2763              */
2764 #ifdef AFS_GLOBAL_RXLOCK_KERNEL
2765             while ((call->state == RX_STATE_ACTIVE)
2766                    && (call->flags & RX_CALL_TQ_BUSY)) {
2767                 call->flags |= RX_CALL_TQ_WAIT;
2768                 call->tqWaiters++;
2769 #ifdef RX_ENABLE_LOCKS
2770                 osirx_AssertMine(&call->lock, "rxi_Start lock3");
2771                 CV_WAIT(&call->cv_tq, &call->lock);
2772 #else /* RX_ENABLE_LOCKS */
2773                 osi_rxSleep(&call->tq);
2774 #endif /* RX_ENABLE_LOCKS */
2775                 call->tqWaiters--;
2776                 if (call->tqWaiters == 0)
2777                     call->flags &= ~RX_CALL_TQ_WAIT;
2778             }
2779 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2780             /* If the new call cannot be taken right now send a busy and set
2781              * the error condition in this call, so that it terminates as
2782              * quickly as possible */
2783             if (call->state == RX_STATE_ACTIVE) {
2784                 struct rx_packet *tp;
2785
2786                 rxi_CallError(call, RX_CALL_DEAD);
2787                 tp = rxi_SendSpecial(call, conn, np, RX_PACKET_TYPE_BUSY,
2788                                      NULL, 0, 1);
2789                 MUTEX_EXIT(&call->lock);
2790                 MUTEX_ENTER(&conn->conn_data_lock);
2791                 conn->refCount--;
2792                 MUTEX_EXIT(&conn->conn_data_lock);
2793                 return tp;
2794             }
2795             rxi_ResetCall(call, 0);
2796             *call->callNumber = np->header.callNumber;
2797             if (np->header.callNumber == 0)
2798                 dpf(("RecPacket call 0 %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", np->header.serial, rx_packetTypes[np->header.type - 1], ntohl(conn->peer->host), ntohs(conn->peer->port), np->header.serial, np->header.epoch, np->header.cid, np->header.callNumber, np->header.seq, np->header.flags, (unsigned long)np, np->retryTime.sec, np->retryTime.usec / 1000, np->length));
2799
2800             call->state = RX_STATE_PRECALL;
2801             clock_GetTime(&call->queueTime);
2802             hzero(call->bytesSent);
2803             hzero(call->bytesRcvd);
2804             /*
2805              * If the number of queued calls exceeds the overload
2806              * threshold then abort this call.
2807              */
2808             if ((rx_BusyThreshold > 0) && (rx_nWaiting > rx_BusyThreshold)) {
2809                 struct rx_packet *tp;
2810
2811                 rxi_CallError(call, rx_BusyError);
2812                 tp = rxi_SendCallAbort(call, np, 1, 0);
2813                 MUTEX_EXIT(&call->lock);
2814                 MUTEX_ENTER(&conn->conn_data_lock);
2815                 conn->refCount--;
2816                 MUTEX_EXIT(&conn->conn_data_lock);
2817                 rx_MutexIncrement(rx_stats.nBusies, rx_stats_mutex);
2818                 return tp;
2819             }
2820             rxi_KeepAliveOn(call);
2821         } else {
2822             /* Continuing call; do nothing here. */
2823         }
2824     } else {                    /* we're the client */
2825         /* Ignore all incoming acknowledgements for calls in DALLY state */
2826         if (call && (call->state == RX_STATE_DALLY)
2827             && (np->header.type == RX_PACKET_TYPE_ACK)) {
2828             rx_MutexIncrement(rx_stats.ignorePacketDally, rx_stats_mutex);
2829 #ifdef  RX_ENABLE_LOCKS
2830             if (call) {
2831                 MUTEX_EXIT(&call->lock);
2832             }
2833 #endif
2834             MUTEX_ENTER(&conn->conn_data_lock);
2835             conn->refCount--;
2836             MUTEX_EXIT(&conn->conn_data_lock);
2837             return np;
2838         }
2839
2840         /* Ignore anything that's not relevant to the current call.  If there
2841          * isn't a current call, then no packet is relevant. */
2842         if (!call || (np->header.callNumber != currentCallNumber)) {
2843             rx_MutexIncrement(rx_stats.spuriousPacketsRead, rx_stats_mutex);
2844 #ifdef  RX_ENABLE_LOCKS
2845             if (call) {
2846                 MUTEX_EXIT(&call->lock);
2847             }
2848 #endif
2849             MUTEX_ENTER(&conn->conn_data_lock);
2850             conn->refCount--;
2851             MUTEX_EXIT(&conn->conn_data_lock);
2852             return np;
2853         }
2854         /* If the service security object index stamped in the packet does not
2855          * match the connection's security index, ignore the packet */
2856         if (np->header.securityIndex != conn->securityIndex) {
2857 #ifdef  RX_ENABLE_LOCKS
2858             MUTEX_EXIT(&call->lock);
2859 #endif
2860             MUTEX_ENTER(&conn->conn_data_lock);
2861             conn->refCount--;
2862             MUTEX_EXIT(&conn->conn_data_lock);
2863             return np;
2864         }
2865
2866         /* If we're receiving the response, then all transmit packets are
2867          * implicitly acknowledged.  Get rid of them. */
2868         if (np->header.type == RX_PACKET_TYPE_DATA) {
2869 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
2870             /* XXX Hack. Because we must release the global rx lock when
2871              * sending packets (osi_NetSend) we drop all acks while we're
2872              * traversing the tq in rxi_Start sending packets out because
2873              * packets may move to the freePacketQueue as result of being here!
2874              * So we drop these packets until we're safely out of the
2875              * traversing. Really ugly!
2876              * For fine grain RX locking, we set the acked field in the
2877              * packets and let rxi_Start remove them from the transmit queue.
2878              */
2879             if (call->flags & RX_CALL_TQ_BUSY) {
2880 #ifdef  RX_ENABLE_LOCKS
2881                 rxi_SetAcksInTransmitQueue(call);
2882 #else
2883                 conn->refCount--;
2884                 return np;      /* xmitting; drop packet */
2885 #endif
2886             } else {
2887                 rxi_ClearTransmitQueue(call, 0);
2888             }
2889 #else /* AFS_GLOBAL_RXLOCK_KERNEL */
2890             rxi_ClearTransmitQueue(call, 0);
2891 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2892         } else {
2893             if (np->header.type == RX_PACKET_TYPE_ACK) {
2894                 /* now check to see if this is an ack packet acknowledging that the
2895                  * server actually *lost* some hard-acked data.  If this happens we
2896                  * ignore this packet, as it may indicate that the server restarted in
2897                  * the middle of a call.  It is also possible that this is an old ack
2898                  * packet.  We don't abort the connection in this case, because this
2899                  * *might* just be an old ack packet.  The right way to detect a server
2900                  * restart in the midst of a call is to notice that the server epoch
2901                  * changed, btw.  */
2902                 /* XXX I'm not sure this is exactly right, since tfirst **IS**
2903                  * XXX unacknowledged.  I think that this is off-by-one, but
2904                  * XXX I don't dare change it just yet, since it will
2905                  * XXX interact badly with the server-restart detection
2906                  * XXX code in receiveackpacket.  */
2907                 if (ntohl(rx_GetInt32(np, FIRSTACKOFFSET)) < call->tfirst) {
2908                     rx_MutexIncrement(rx_stats.spuriousPacketsRead, rx_stats_mutex);
2909                     MUTEX_EXIT(&call->lock);
2910                     MUTEX_ENTER(&conn->conn_data_lock);
2911                     conn->refCount--;
2912                     MUTEX_EXIT(&conn->conn_data_lock);
2913                     return np;
2914                 }
2915             }
2916         }                       /* else not a data packet */
2917     }
2918
2919     osirx_AssertMine(&call->lock, "rxi_ReceivePacket middle");
2920     /* Set remote user defined status from packet */
2921     call->remoteStatus = np->header.userStatus;
2922
2923     /* Note the gap between the expected next packet and the actual
2924      * packet that arrived, when the new packet has a smaller serial number
2925      * than expected.  Rioses frequently reorder packets all by themselves,
2926      * so this will be quite important with very large window sizes.
2927      * Skew is checked against 0 here to avoid any dependence on the type of
2928      * inPacketSkew (which may be unsigned).  In C, -1 > (unsigned) 0 is always
2929      * true!
2930      * The inPacketSkew should be a smoothed running value, not just a maximum.  MTUXXX
2931      * see CalculateRoundTripTime for an example of how to keep smoothed values.
2932      * I think using a beta of 1/8 is probably appropriate.  93.04.21
2933      */
2934     MUTEX_ENTER(&conn->conn_data_lock);
2935     skew = conn->lastSerial - np->header.serial;
2936     conn->lastSerial = np->header.serial;
2937     MUTEX_EXIT(&conn->conn_data_lock);
2938     if (skew > 0) {
2939         register struct rx_peer *peer;
2940         peer = conn->peer;
2941         if (skew > peer->inPacketSkew) {
2942             dpf(("*** In skew changed from %d to %d\n", peer->inPacketSkew,
2943                  skew));
2944             peer->inPacketSkew = skew;
2945         }
2946     }
2947
2948     /* Now do packet type-specific processing */
2949     switch (np->header.type) {
2950     case RX_PACKET_TYPE_DATA:
2951         np = rxi_ReceiveDataPacket(call, np, 1, socket, host, port, tnop,
2952                                    newcallp);
2953         break;
2954     case RX_PACKET_TYPE_ACK:
2955         /* Respond immediately to ack packets requesting acknowledgement
2956          * (ping packets) */
2957         if (np->header.flags & RX_REQUEST_ACK) {
2958             if (call->error)
2959                 (void)rxi_SendCallAbort(call, 0, 1, 0);
2960             else
2961                 (void)rxi_SendAck(call, 0, np->header.serial,
2962                                   RX_ACK_PING_RESPONSE, 1);
2963         }
2964         np = rxi_ReceiveAckPacket(call, np, 1);
2965         break;
2966     case RX_PACKET_TYPE_ABORT: {
2967         /* An abort packet: reset the call, passing the error up to the user. */
2968         /* What if error is zero? */
2969         /* What if the error is -1? the application will treat it as a timeout. */
2970         afs_int32 errdata = ntohl(*(afs_int32 *) rx_DataOf(np));
2971         dpf(("rxi_ReceivePacket ABORT rx_DataOf = %d", errdata));
2972         rxi_CallError(call, errdata);
2973         MUTEX_EXIT(&call->lock);
2974         MUTEX_ENTER(&conn->conn_data_lock);
2975         conn->refCount--;
2976         MUTEX_EXIT(&conn->conn_data_lock);
2977         return np;              /* xmitting; drop packet */
2978     }
2979     case RX_PACKET_TYPE_BUSY:
2980         /* XXXX */
2981         break;
2982     case RX_PACKET_TYPE_ACKALL:
2983         /* All packets acknowledged, so we can drop all packets previously
2984          * readied for sending */
2985 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
2986         /* XXX Hack. We because we can't release the global rx lock when
2987          * sending packets (osi_NetSend) we drop all ack pkts while we're
2988          * traversing the tq in rxi_Start sending packets out because
2989          * packets may move to the freePacketQueue as result of being
2990          * here! So we drop these packets until we're safely out of the
2991          * traversing. Really ugly!
2992          * For fine grain RX locking, we set the acked field in the packets
2993          * and let rxi_Start remove the packets from the transmit queue.
2994          */
2995         if (call->flags & RX_CALL_TQ_BUSY) {
2996 #ifdef  RX_ENABLE_LOCKS
2997             rxi_SetAcksInTransmitQueue(call);
2998             break;
2999 #else /* RX_ENABLE_LOCKS */
3000             MUTEX_EXIT(&call->lock);
3001             MUTEX_ENTER(&conn->conn_data_lock);
3002             conn->refCount--;
3003             MUTEX_EXIT(&conn->conn_data_lock);
3004             return np;          /* xmitting; drop packet */
3005 #endif /* RX_ENABLE_LOCKS */
3006         }
3007 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
3008         rxi_ClearTransmitQueue(call, 0);
3009         break;
3010     default:
3011         /* Should not reach here, unless the peer is broken: send an abort
3012          * packet */
3013         rxi_CallError(call, RX_PROTOCOL_ERROR);
3014         np = rxi_SendCallAbort(call, np, 1, 0);
3015         break;
3016     };
3017     /* Note when this last legitimate packet was received, for keep-alive
3018      * processing.  Note, we delay getting the time until now in the hope that
3019      * the packet will be delivered to the user before any get time is required
3020      * (if not, then the time won't actually be re-evaluated here). */
3021     call->lastReceiveTime = clock_Sec();
3022     MUTEX_EXIT(&call->lock);
3023     MUTEX_ENTER(&conn->conn_data_lock);
3024     conn->refCount--;
3025     MUTEX_EXIT(&conn->conn_data_lock);
3026     return np;
3027 }
3028
3029 /* return true if this is an "interesting" connection from the point of view
3030     of someone trying to debug the system */
3031 int
3032 rxi_IsConnInteresting(struct rx_connection *aconn)
3033 {
3034     register int i;
3035     register struct rx_call *tcall;
3036
3037     if (aconn->flags & (RX_CONN_MAKECALL_WAITING | RX_CONN_DESTROY_ME))
3038         return 1;
3039     for (i = 0; i < RX_MAXCALLS; i++) {
3040         tcall = aconn->call[i];
3041         if (tcall) {
3042             if ((tcall->state == RX_STATE_PRECALL)
3043                 || (tcall->state == RX_STATE_ACTIVE))
3044                 return 1;
3045             if ((tcall->mode == RX_MODE_SENDING)
3046                 || (tcall->mode == RX_MODE_RECEIVING))
3047                 return 1;
3048         }
3049     }
3050     return 0;
3051 }
3052
3053 #ifdef KERNEL
3054 /* if this is one of the last few packets AND it wouldn't be used by the
3055    receiving call to immediately satisfy a read request, then drop it on
3056    the floor, since accepting it might prevent a lock-holding thread from
3057    making progress in its reading. If a call has been cleared while in
3058    the precall state then ignore all subsequent packets until the call
3059    is assigned to a thread. */
3060
3061 static int
3062 TooLow(struct rx_packet *ap, struct rx_call *acall)
3063 {
3064     int rc = 0;
3065     MUTEX_ENTER(&rx_stats_mutex);
3066     if (((ap->header.seq != 1) && (acall->flags & RX_CALL_CLEARED)
3067          && (acall->state == RX_STATE_PRECALL))
3068         || ((rx_nFreePackets < rxi_dataQuota + 2)
3069             && !((ap->header.seq < acall->rnext + rx_initSendWindow)
3070                  && (acall->flags & RX_CALL_READER_WAIT)))) {
3071         rc = 1;
3072     }
3073     MUTEX_EXIT(&rx_stats_mutex);
3074     return rc;
3075 }
3076 #endif /* KERNEL */
3077
3078 static void
3079 rxi_CheckReachEvent(struct rxevent *event, struct rx_connection *conn,
3080                     struct rx_call *acall)
3081 {
3082     struct rx_call *call = acall;
3083     struct clock when, now;
3084     int i, waiting;
3085
3086     MUTEX_ENTER(&conn->conn_data_lock);
3087     conn->checkReachEvent = NULL;
3088     waiting = conn->flags & RX_CONN_ATTACHWAIT;
3089     if (event)
3090         conn->refCount--;
3091     MUTEX_EXIT(&conn->conn_data_lock);
3092
3093     if (waiting) {
3094         if (!call) {
3095             MUTEX_ENTER(&conn->conn_call_lock);
3096             MUTEX_ENTER(&conn->conn_data_lock);
3097             for (i = 0; i < RX_MAXCALLS; i++) {
3098                 struct rx_call *tc = conn->call[i];
3099                 if (tc && tc->state == RX_STATE_PRECALL) {
3100                     call = tc;
3101                     break;
3102                 }
3103             }
3104             if (!call)
3105                 /* Indicate that rxi_CheckReachEvent is no longer running by
3106                  * clearing the flag.  Must be atomic under conn_data_lock to
3107                  * avoid a new call slipping by: rxi_CheckConnReach holds
3108                  * conn_data_lock while checking RX_CONN_ATTACHWAIT.
3109                  */
3110                 conn->flags &= ~RX_CONN_ATTACHWAIT;
3111             MUTEX_EXIT(&conn->conn_data_lock);
3112             MUTEX_EXIT(&conn->conn_call_lock);
3113         }
3114
3115         if (call) {
3116             if (call != acall)
3117                 MUTEX_ENTER(&call->lock);
3118             rxi_SendAck(call, NULL, 0, RX_ACK_PING, 0);
3119             if (call != acall)
3120                 MUTEX_EXIT(&call->lock);
3121
3122             clock_GetTime(&now);
3123             when = now;
3124             when.sec += RX_CHECKREACH_TIMEOUT;
3125             MUTEX_ENTER(&conn->conn_data_lock);
3126             if (!conn->checkReachEvent) {
3127                 conn->refCount++;
3128                 conn->checkReachEvent =
3129                     rxevent_PostNow(&when, &now, rxi_CheckReachEvent, conn,
3130                                     NULL);
3131             }
3132             MUTEX_EXIT(&conn->conn_data_lock);
3133         }
3134     }
3135 }
3136
3137 static int
3138 rxi_CheckConnReach(struct rx_connection *conn, struct rx_call *call)
3139 {
3140     struct rx_service *service = conn->service;
3141     struct rx_peer *peer = conn->peer;
3142     afs_uint32 now, lastReach;
3143
3144     if (service->checkReach == 0)
3145         return 0;
3146
3147     now = clock_Sec();
3148     MUTEX_ENTER(&peer->peer_lock);
3149     lastReach = peer->lastReachTime;
3150     MUTEX_EXIT(&peer->peer_lock);
3151     if (now - lastReach < RX_CHECKREACH_TTL)
3152         return 0;
3153
3154     MUTEX_ENTER(&conn->conn_data_lock);
3155     if (conn->flags & RX_CONN_ATTACHWAIT) {
3156         MUTEX_EXIT(&conn->conn_data_lock);
3157         return 1;
3158     }
3159     conn->flags |= RX_CONN_ATTACHWAIT;
3160     MUTEX_EXIT(&conn->conn_data_lock);
3161     if (!conn->checkReachEvent)
3162         rxi_CheckReachEvent(NULL, conn, call);
3163
3164     return 1;
3165 }
3166
3167 /* try to attach call, if authentication is complete */
3168 static void
3169 TryAttach(register struct rx_call *acall, register osi_socket socket,
3170           register int *tnop, register struct rx_call **newcallp,
3171           int reachOverride)
3172 {
3173     struct rx_connection *conn = acall->conn;
3174
3175     if (conn->type == RX_SERVER_CONNECTION
3176         && acall->state == RX_STATE_PRECALL) {
3177         /* Don't attach until we have any req'd. authentication. */
3178         if (RXS_CheckAuthentication(conn->securityObject, conn) == 0) {
3179             if (reachOverride || rxi_CheckConnReach(conn, acall) == 0)
3180                 rxi_AttachServerProc(acall, socket, tnop, newcallp);
3181             /* Note:  this does not necessarily succeed; there
3182              * may not any proc available
3183              */
3184         } else {
3185             rxi_ChallengeOn(acall->conn);
3186         }
3187     }
3188 }
3189
3190 /* A data packet has been received off the interface.  This packet is
3191  * appropriate to the call (the call is in the right state, etc.).  This
3192  * routine can return a packet to the caller, for re-use */
3193
3194 struct rx_packet *
3195 rxi_ReceiveDataPacket(register struct rx_call *call,
3196                       register struct rx_packet *np, int istack,
3197                       osi_socket socket, afs_uint32 host, u_short port,
3198                       int *tnop, struct rx_call **newcallp)
3199 {
3200     int ackNeeded = 0;          /* 0 means no, otherwise ack_reason */
3201     int newPackets = 0;
3202     int didHardAck = 0;
3203     int haveLast = 0;
3204     afs_uint32 seq, serial, flags;
3205     int isFirst;
3206     struct rx_packet *tnp;
3207     struct clock when, now;
3208     rx_MutexIncrement(rx_stats.dataPacketsRead, rx_stats_mutex);
3209
3210 #ifdef KERNEL
3211     /* If there are no packet buffers, drop this new packet, unless we can find
3212      * packet buffers from inactive calls */
3213     if (!call->error
3214         && (rxi_OverQuota(RX_PACKET_CLASS_RECEIVE) || TooLow(np, call))) {
3215         MUTEX_ENTER(&rx_freePktQ_lock);
3216         rxi_NeedMorePackets = TRUE;
3217         MUTEX_EXIT(&rx_freePktQ_lock);
3218         rx_MutexIncrement(rx_stats.noPacketBuffersOnRead, rx_stats_mutex);
3219         call->rprev = np->header.serial;
3220         rxi_calltrace(RX_TRACE_DROP, call);
3221         dpf(("packet %x dropped on receipt - quota problems", np));
3222         if (rxi_doreclaim)
3223             rxi_ClearReceiveQueue(call);
3224         clock_GetTime(&now);
3225         when = now;
3226         clock_Add(&when, &rx_softAckDelay);
3227         if (!call->delayedAckEvent
3228             || clock_Gt(&call->delayedAckEvent->eventTime, &when)) {
3229             rxevent_Cancel(call->delayedAckEvent, call,
3230                            RX_CALL_REFCOUNT_DELAY);
3231             CALL_HOLD(call, RX_CALL_REFCOUNT_DELAY);
3232             call->delayedAckEvent =
3233                 rxevent_PostNow(&when, &now, rxi_SendDelayedAck, call, 0);
3234         }
3235         /* we've damaged this call already, might as well do it in. */
3236         return np;
3237     }
3238 #endif /* KERNEL */
3239
3240     /*
3241      * New in AFS 3.5, if the RX_JUMBO_PACKET flag is set then this
3242      * packet is one of several packets transmitted as a single
3243      * datagram. Do not send any soft or hard acks until all packets
3244      * in a jumbogram have been processed. Send negative acks right away.
3245      */
3246     for (isFirst = 1, tnp = NULL; isFirst || tnp; isFirst = 0) {
3247         /* tnp is non-null when there are more packets in the
3248          * current jumbo gram */
3249         if (tnp) {
3250             if (np)
3251                 rxi_FreePacket(np);
3252             np = tnp;
3253         }
3254
3255         seq = np->header.seq;
3256         serial = np->header.serial;
3257         flags = np->header.flags;
3258
3259         /* If the call is in an error state, send an abort message */
3260         if (call->error)
3261             return rxi_SendCallAbort(call, np, istack, 0);
3262
3263         /* The RX_JUMBO_PACKET is set in all but the last packet in each
3264          * AFS 3.5 jumbogram. */
3265         if (flags & RX_JUMBO_PACKET) {
3266             tnp = rxi_SplitJumboPacket(np, host, port, isFirst);
3267         } else {
3268             tnp = NULL;
3269         }
3270
3271         if (np->header.spare != 0) {
3272             MUTEX_ENTER(&call->conn->conn_data_lock);
3273             call->conn->flags |= RX_CONN_USING_PACKET_CKSUM;
3274             MUTEX_EXIT(&call->conn->conn_data_lock);
3275         }
3276
3277         /* The usual case is that this is the expected next packet */
3278         if (seq == call->rnext) {
3279
3280             /* Check to make sure it is not a duplicate of one already queued */
3281             if (queue_IsNotEmpty(&call->rq)
3282                 && queue_First(&call->rq, rx_packet)->header.seq == seq) {
3283                 rx_MutexIncrement(rx_stats.dupPacketsRead, rx_stats_mutex);
3284                 dpf(("packet %x dropped on receipt - duplicate", np));
3285                 rxevent_Cancel(call->delayedAckEvent, call,
3286                                RX_CALL_REFCOUNT_DELAY);
3287                 np = rxi_SendAck(call, np, serial, RX_ACK_DUPLICATE, istack);
3288                 ackNeeded = 0;
3289                 call->rprev = seq;
3290                 continue;
3291             }
3292
3293             /* It's the next packet. Stick it on the receive queue
3294              * for this call. Set newPackets to make sure we wake
3295              * the reader once all packets have been processed */
3296             queue_Prepend(&call->rq, np);
3297             call->nSoftAcks++;
3298             np = NULL;          /* We can't use this anymore */
3299             newPackets = 1;
3300
3301             /* If an ack is requested then set a flag to make sure we
3302              * send an acknowledgement for this packet */
3303             if (flags & RX_REQUEST_ACK) {
3304                 ackNeeded = RX_ACK_REQUESTED;
3305             }
3306
3307             /* Keep track of whether we have received the last packet */
3308             if (flags & RX_LAST_PACKET) {
3309                 call->flags |= RX_CALL_HAVE_LAST;
3310                 haveLast = 1;
3311             }
3312
3313             /* Check whether we have all of the packets for this call */
3314             if (call->flags & RX_CALL_HAVE_LAST) {
3315                 afs_uint32 tseq;        /* temporary sequence number */
3316                 struct rx_packet *tp;   /* Temporary packet pointer */
3317                 struct rx_packet *nxp;  /* Next pointer, for queue_Scan */
3318
3319                 for (tseq = seq, queue_Scan(&call->rq, tp, nxp, rx_packet)) {
3320                     if (tseq != tp->header.seq)
3321                         break;
3322                     if (tp->header.flags & RX_LAST_PACKET) {
3323                         call->flags |= RX_CALL_RECEIVE_DONE;
3324                         break;
3325                     }
3326                     tseq++;
3327                 }
3328             }
3329
3330             /* Provide asynchronous notification for those who want it
3331              * (e.g. multi rx) */
3332             if (call->arrivalProc) {
3333                 (*call->arrivalProc) (call, call->arrivalProcHandle,
3334                                       call->arrivalProcArg);
3335                 call->arrivalProc = (void (*)())0;
3336             }
3337
3338             /* Update last packet received */
3339             call->rprev = seq;
3340
3341             /* If there is no server process serving this call, grab
3342              * one, if available. We only need to do this once. If a
3343              * server thread is available, this thread becomes a server
3344              * thread and the server thread becomes a listener thread. */
3345             if (isFirst) {
3346                 TryAttach(call, socket, tnop, newcallp, 0);
3347             }
3348         }
3349         /* This is not the expected next packet. */
3350         else {
3351             /* Determine whether this is a new or old packet, and if it's
3352              * a new one, whether it fits into the current receive window.
3353              * Also figure out whether the packet was delivered in sequence.
3354              * We use the prev variable to determine whether the new packet
3355              * is the successor of its immediate predecessor in the
3356              * receive queue, and the missing flag to determine whether
3357              * any of this packets predecessors are missing.  */
3358
3359             afs_uint32 prev;    /* "Previous packet" sequence number */
3360             struct rx_packet *tp;       /* Temporary packet pointer */
3361             struct rx_packet *nxp;      /* Next pointer, for queue_Scan */
3362             int missing;        /* Are any predecessors missing? */
3363
3364             /* If the new packet's sequence number has been sent to the
3365              * application already, then this is a duplicate */
3366             if (seq < call->rnext) {
3367                 rx_MutexIncrement(rx_stats.dupPacketsRead, rx_stats_mutex);
3368                 rxevent_Cancel(call->delayedAckEvent, call,
3369                                RX_CALL_REFCOUNT_DELAY);
3370                 np = rxi_SendAck(call, np, serial, RX_ACK_DUPLICATE, istack);
3371                 ackNeeded = 0;
3372                 call->rprev = seq;
3373                 continue;
3374             }
3375
3376             /* If the sequence number is greater than what can be
3377              * accomodated by the current window, then send a negative
3378              * acknowledge and drop the packet */
3379             if ((call->rnext + call->rwind) <= seq) {
3380                 rxevent_Cancel(call->delayedAckEvent, call,
3381                                RX_CALL_REFCOUNT_DELAY);
3382                 np = rxi_SendAck(call, np, serial, RX_ACK_EXCEEDS_WINDOW,
3383                                  istack);
3384                 ackNeeded = 0;
3385                 call->rprev = seq;
3386                 continue;
3387             }
3388
3389             /* Look for the packet in the queue of old received packets */
3390             for (prev = call->rnext - 1, missing =
3391                  0, queue_Scan(&call->rq, tp, nxp, rx_packet)) {
3392                 /*Check for duplicate packet */
3393                 if (seq == tp->header.seq) {
3394                     rx_MutexIncrement(rx_stats.dupPacketsRead, rx_stats_mutex);
3395                     rxevent_Cancel(call->delayedAckEvent, call,
3396                                    RX_CALL_REFCOUNT_DELAY);
3397                     np = rxi_SendAck(call, np, serial, RX_ACK_DUPLICATE,
3398                                      istack);
3399                     ackNeeded = 0;
3400                     call->rprev = seq;
3401                     goto nextloop;
3402                 }
3403                 /* If we find a higher sequence packet, break out and
3404                  * insert the new packet here. */
3405                 if (seq < tp->header.seq)
3406                     break;
3407                 /* Check for missing packet */
3408                 if (tp->header.seq != prev + 1) {
3409                     missing = 1;
3410                 }
3411
3412                 prev = tp->header.seq;
3413             }
3414
3415             /* Keep track of whether we have received the last packet. */
3416             if (flags & RX_LAST_PACKET) {
3417                 call->flags |= RX_CALL_HAVE_LAST;
3418             }
3419
3420             /* It's within the window: add it to the the receive queue.
3421              * tp is left by the previous loop either pointing at the
3422              * packet before which to insert the new packet, or at the
3423              * queue head if the queue is empty or the packet should be
3424              * appended. */
3425             queue_InsertBefore(tp, np);
3426             call->nSoftAcks++;
3427             np = NULL;
3428
3429             /* Check whether we have all of the packets for this call */
3430             if ((call->flags & RX_CALL_HAVE_LAST)
3431                 && !(call->flags & RX_CALL_RECEIVE_DONE)) {
3432                 afs_uint32 tseq;        /* temporary sequence number */
3433
3434                 for (tseq =
3435                      call->rnext, queue_Scan(&call->rq, tp, nxp, rx_packet)) {
3436                     if (tseq != tp->header.seq)
3437                         break;
3438                     if (tp->header.flags & RX_LAST_PACKET) {
3439                         call->flags |= RX_CALL_RECEIVE_DONE;
3440                         break;
3441                     }
3442                     tseq++;
3443                 }
3444             }
3445
3446             /* We need to send an ack of the packet is out of sequence,
3447              * or if an ack was requested by the peer. */
3448             if (seq != prev + 1 || missing) {
3449                 ackNeeded = RX_ACK_OUT_OF_SEQUENCE;
3450             } else if (flags & RX_REQUEST_ACK) {
3451                 ackNeeded = RX_ACK_REQUESTED;
3452             }
3453
3454             /* Acknowledge the last packet for each call */
3455             if (flags & RX_LAST_PACKET) {
3456                 haveLast = 1;
3457             }
3458
3459             call->rprev = seq;
3460         }
3461       nextloop:;
3462     }
3463
3464     if (newPackets) {
3465         /*
3466          * If the receiver is waiting for an iovec, fill the iovec
3467          * using the data from the receive queue */
3468         if (call->flags & RX_CALL_IOVEC_WAIT) {
3469             didHardAck = rxi_FillReadVec(call, serial);
3470             /* the call may have been aborted */
3471             if (call->error) {
3472                 return NULL;
3473             }
3474             if (didHardAck) {
3475                 ackNeeded = 0;
3476             }
3477         }
3478
3479         /* Wakeup the reader if any */
3480         if ((call->flags & RX_CALL_READER_WAIT)
3481             && (!(call->flags & RX_CALL_IOVEC_WAIT) || !(call->iovNBytes)
3482                 || (call->iovNext >= call->iovMax)
3483                 || (call->flags & RX_CALL_RECEIVE_DONE))) {
3484             call->flags &= ~RX_CALL_READER_WAIT;
3485 #ifdef  RX_ENABLE_LOCKS
3486             CV_BROADCAST(&call->cv_rq);
3487 #else
3488             osi_rxWakeup(&call->rq);
3489 #endif
3490         }
3491     }
3492
3493     /*
3494      * Send an ack when requested by the peer, or once every
3495      * rxi_SoftAckRate packets until the last packet has been
3496      * received. Always send a soft ack for the last packet in
3497      * the server's reply. */
3498     if (ackNeeded) {
3499         rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
3500         np = rxi_SendAck(call, np, serial, ackNeeded, istack);
3501     } else if (call->nSoftAcks > (u_short) rxi_SoftAckRate) {
3502         rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
3503         np = rxi_SendAck(call, np, serial, RX_ACK_IDLE, istack);
3504     } else if (call->nSoftAcks) {
3505         clock_GetTime(&now);
3506         when = now;
3507         if (haveLast && !(flags & RX_CLIENT_INITIATED)) {
3508             clock_Add(&when, &rx_lastAckDelay);
3509         } else {
3510             clock_Add(&when, &rx_softAckDelay);
3511         }
3512         if (!call->delayedAckEvent
3513             || clock_Gt(&call->delayedAckEvent->eventTime, &when)) {
3514             rxevent_Cancel(call->delayedAckEvent, call,
3515                            RX_CALL_REFCOUNT_DELAY);
3516             CALL_HOLD(call, RX_CALL_REFCOUNT_DELAY);
3517             call->delayedAckEvent =
3518                 rxevent_PostNow(&when, &now, rxi_SendDelayedAck, call, 0);
3519         }
3520     } else if (call->flags & RX_CALL_RECEIVE_DONE) {
3521         rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
3522     }
3523
3524     return np;
3525 }
3526
3527 #ifdef  ADAPT_WINDOW
3528 static void rxi_ComputeRate();
3529 #endif
3530
3531 static void
3532 rxi_UpdatePeerReach(struct rx_connection *conn, struct rx_call *acall)
3533 {
3534     struct rx_peer *peer = conn->peer;
3535
3536     MUTEX_ENTER(&peer->peer_lock);
3537     peer->lastReachTime = clock_Sec();
3538     MUTEX_EXIT(&peer->peer_lock);
3539
3540     MUTEX_ENTER(&conn->conn_data_lock);
3541     if (conn->flags & RX_CONN_ATTACHWAIT) {
3542         int i;
3543
3544         conn->flags &= ~RX_CONN_ATTACHWAIT;
3545         MUTEX_EXIT(&conn->conn_data_lock);
3546
3547         for (i = 0; i < RX_MAXCALLS; i++) {
3548             struct rx_call *call = conn->call[i];
3549             if (call) {
3550                 if (call != acall)
3551                     MUTEX_ENTER(&call->lock);
3552                 /* tnop can be null if newcallp is null */
3553                 TryAttach(call, (osi_socket) - 1, NULL, NULL, 1);
3554                 if (call != acall)
3555                     MUTEX_EXIT(&call->lock);
3556             }
3557         }
3558     } else
3559         MUTEX_EXIT(&conn->conn_data_lock);
3560 }
3561
3562 static const char *
3563 rx_ack_reason(int reason)
3564 {
3565     switch (reason) {
3566     case RX_ACK_REQUESTED:
3567         return "requested";
3568     case RX_ACK_DUPLICATE:
3569         return "duplicate";
3570     case RX_ACK_OUT_OF_SEQUENCE:
3571         return "sequence";
3572     case RX_ACK_EXCEEDS_WINDOW:
3573         return "window";
3574     case RX_ACK_NOSPACE:
3575         return "nospace";
3576     case RX_ACK_PING:
3577         return "ping";
3578     case RX_ACK_PING_RESPONSE:
3579         return "response";
3580     case RX_ACK_DELAY:
3581         return "delay";
3582     case RX_ACK_IDLE:
3583         return "idle";
3584     default:
3585         return "unknown!!";
3586     }
3587 }
3588
3589
3590 /* rxi_ComputePeerNetStats
3591  *
3592  * Called exclusively by rxi_ReceiveAckPacket to compute network link
3593  * estimates (like RTT and throughput) based on ack packets.  Caller
3594  * must ensure that the packet in question is the right one (i.e.
3595  * serial number matches).
3596  */
3597 static void
3598 rxi_ComputePeerNetStats(struct rx_call *call, struct rx_packet *p,
3599                         struct rx_ackPacket *ap, struct rx_packet *np)
3600 {
3601     struct rx_peer *peer = call->conn->peer;
3602
3603     /* Use RTT if not delayed by client. */
3604     if (ap->reason != RX_ACK_DELAY)
3605         rxi_ComputeRoundTripTime(p, &p->timeSent, peer);
3606 #ifdef ADAPT_WINDOW
3607     rxi_ComputeRate(peer, call, p, np, ap->reason);
3608 #endif
3609 }
3610
3611 /* The real smarts of the whole thing.  */
3612 struct rx_packet *
3613 rxi_ReceiveAckPacket(register struct rx_call *call, struct rx_packet *np,
3614                      int istack)
3615 {
3616     struct rx_ackPacket *ap;
3617     int nAcks;
3618     register struct rx_packet *tp;
3619     register struct rx_packet *nxp;     /* Next packet pointer for queue_Scan */
3620     register struct rx_connection *conn = call->conn;
3621     struct rx_peer *peer = conn->peer;
3622     afs_uint32 first;
3623     afs_uint32 serial;
3624     /* because there are CM's that are bogus, sending weird values for this. */
3625     afs_uint32 skew = 0;
3626     int nbytes;
3627     int missing;
3628     int acked;
3629     int nNacked = 0;
3630     int newAckCount = 0;
3631     u_short maxMTU = 0;         /* Set if peer supports AFS 3.4a jumbo datagrams */
3632     int maxDgramPackets = 0;    /* Set if peer supports AFS 3.5 jumbo datagrams */
3633
3634     rx_MutexIncrement(rx_stats.ackPacketsRead, rx_stats_mutex);
3635     ap = (struct rx_ackPacket *)rx_DataOf(np);
3636     nbytes = rx_Contiguous(np) - (int)((ap->acks) - (u_char *) ap);
3637     if (nbytes < 0)
3638         return np;              /* truncated ack packet */
3639
3640     /* depends on ack packet struct */
3641     nAcks = MIN((unsigned)nbytes, (unsigned)ap->nAcks);
3642     first = ntohl(ap->firstPacket);
3643     serial = ntohl(ap->serial);
3644     /* temporarily disabled -- needs to degrade over time
3645      * skew = ntohs(ap->maxSkew); */
3646
3647     /* Ignore ack packets received out of order */
3648     if (first < call->tfirst) {
3649         return np;
3650     }
3651
3652     if (np->header.flags & RX_SLOW_START_OK) {
3653         call->flags |= RX_CALL_SLOW_START_OK;
3654     }
3655
3656     if (ap->reason == RX_ACK_PING_RESPONSE)
3657         rxi_UpdatePeerReach(conn, call);
3658
3659 #ifdef RXDEBUG
3660 #ifdef AFS_NT40_ENV
3661     if (rxdebug_active) {
3662         char msg[512];
3663         size_t len;
3664
3665         len = _snprintf(msg, sizeof(msg),
3666                         "tid[%d] RACK: reason %s serial %u previous %u seq %u skew %d first %u acks %u space %u ",
3667                          GetCurrentThreadId(), rx_ack_reason(ap->reason),
3668                          ntohl(ap->serial), ntohl(ap->previousPacket),
3669                          (unsigned int)np->header.seq, (unsigned int)skew,
3670                          ntohl(ap->firstPacket), ap->nAcks, ntohs(ap->bufferSpace) );
3671         if (nAcks) {
3672             int offset;
3673
3674             for (offset = 0; offset < nAcks && len < sizeof(msg); offset++)
3675                 msg[len++] = (ap->acks[offset] == RX_ACK_TYPE_NACK ? '-' : '*');
3676         }
3677         msg[len++]='\n';
3678         msg[len] = '\0';
3679         OutputDebugString(msg);
3680     }
3681 #else /* AFS_NT40_ENV */
3682     if (rx_Log) {
3683         fprintf(rx_Log,
3684                 "RACK: reason %x previous %u seq %u serial %u skew %d first %u",
3685                 ap->reason, ntohl(ap->previousPacket),
3686                 (unsigned int)np->header.seq, (unsigned int)serial,
3687                 (unsigned int)skew, ntohl(ap->firstPacket));
3688         if (nAcks) {
3689             int offset;
3690             for (offset = 0; offset < nAcks; offset++)
3691                 putc(ap->acks[offset] == RX_ACK_TYPE_NACK ? '-' : '*',
3692                      rx_Log);
3693         }
3694         putc('\n', rx_Log);
3695     }
3696 #endif /* AFS_NT40_ENV */
3697 #endif
3698
3699     /* Update the outgoing packet skew value to the latest value of
3700      * the peer's incoming packet skew value.  The ack packet, of
3701      * course, could arrive out of order, but that won't affect things
3702      * much */
3703     MUTEX_ENTER(&peer->peer_lock);
3704     peer->outPacketSkew = skew;
3705
3706     /* Check for packets that no longer need to be transmitted, and
3707      * discard them.  This only applies to packets positively
3708      * acknowledged as having been sent to the peer's upper level.
3709      * All other packets must be retained.  So only packets with
3710      * sequence numbers < ap->firstPacket are candidates. */
3711     for (queue_Scan(&call->tq, tp, nxp, rx_packet)) {
3712         if (tp->header.seq >= first)
3713             break;
3714         call->tfirst = tp->header.seq + 1;
3715         if (serial
3716             && (tp->header.serial == serial || tp->firstSerial == serial))
3717             rxi_ComputePeerNetStats(call, tp, ap, np);
3718         if (!(tp->flags & RX_PKTFLAG_ACKED)) {
3719             newAckCount++;
3720         }
3721 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
3722         /* XXX Hack. Because we have to release the global rx lock when sending
3723          * packets (osi_NetSend) we drop all acks while we're traversing the tq
3724          * in rxi_Start sending packets out because packets may move to the
3725          * freePacketQueue as result of being here! So we drop these packets until
3726          * we're safely out of the traversing. Really ugly!
3727          * To make it even uglier, if we're using fine grain locking, we can
3728          * set the ack bits in the packets and have rxi_Start remove the packets
3729          * when it's done transmitting.
3730          */
3731         if (call->flags & RX_CALL_TQ_BUSY) {
3732 #ifdef RX_ENABLE_LOCKS
3733             tp->flags |= RX_PKTFLAG_ACKED;
3734             call->flags |= RX_CALL_TQ_SOME_ACKED;
3735 #else /* RX_ENABLE_LOCKS */
3736             break;
3737 #endif /* RX_ENABLE_LOCKS */
3738         } else
3739 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
3740         {
3741             queue_Remove(tp);
3742             rxi_FreePacket(tp); /* rxi_FreePacket mustn't wake up anyone, preemptively. */
3743         }
3744     }
3745
3746 #ifdef ADAPT_WINDOW
3747     /* Give rate detector a chance to respond to ping requests */
3748     if (ap->reason == RX_ACK_PING_RESPONSE) {
3749         rxi_ComputeRate(peer, call, 0, np, ap->reason);
3750     }
3751 #endif
3752
3753     /* N.B. we don't turn off any timers here.  They'll go away by themselves, anyway */
3754
3755     /* Now go through explicit acks/nacks and record the results in
3756      * the waiting packets.  These are packets that can't be released
3757      * yet, even with a positive acknowledge.  This positive
3758      * acknowledge only means the packet has been received by the
3759      * peer, not that it will be retained long enough to be sent to
3760      * the peer's upper level.  In addition, reset the transmit timers
3761      * of any missing packets (those packets that must be missing
3762      * because this packet was out of sequence) */
3763
3764     call->nSoftAcked = 0;
3765     for (missing = 0, queue_Scan(&call->tq, tp, nxp, rx_packet)) {
3766         /* Update round trip time if the ack was stimulated on receipt
3767          * of this packet */
3768 #ifdef AFS_GLOBAL_RXLOCK_KERNEL
3769 #ifdef RX_ENABLE_LOCKS
3770         if (tp->header.seq >= first)
3771 #endif /* RX_ENABLE_LOCKS */
3772 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
3773             if (serial
3774                 && (tp->header.serial == serial || tp->firstSerial == serial))
3775                 rxi_ComputePeerNetStats(call, tp, ap, np);
3776
3777         /* Set the acknowledge flag per packet based on the
3778          * information in the ack packet. An acknowlegded packet can
3779          * be downgraded when the server has discarded a packet it
3780          * soacked previously, or when an ack packet is received
3781          * out of sequence. */
3782         if (tp->header.seq < first) {
3783             /* Implicit ack information */
3784             if (!(tp->flags & RX_PKTFLAG_ACKED)) {
3785                 newAckCount++;
3786             }
3787             tp->flags |= RX_PKTFLAG_ACKED;
3788         } else if (tp->header.seq < first + nAcks) {
3789             /* Explicit ack information:  set it in the packet appropriately */
3790             if (ap->acks[tp->header.seq - first] == RX_ACK_TYPE_ACK) {
3791                 if (!(tp->flags & RX_PKTFLAG_ACKED)) {
3792                     newAckCount++;
3793                     tp->flags |= RX_PKTFLAG_ACKED;
3794                 }
3795                 if (missing) {
3796                     nNacked++;
3797                 } else {
3798                     call->nSoftAcked++;
3799                 }
3800             } else /* RX_ACK_TYPE_NACK */ {
3801                 tp->flags &= ~RX_PKTFLAG_ACKED;
3802                 missing = 1;
3803             }
3804         } else {
3805             tp->flags &= ~RX_PKTFLAG_ACKED;
3806             missing = 1;
3807         }
3808
3809         /* If packet isn't yet acked, and it has been transmitted at least
3810          * once, reset retransmit time using latest timeout
3811          * ie, this should readjust the retransmit timer for all outstanding
3812          * packets...  So we don't just retransmit when we should know better*/
3813
3814         if (!(tp->flags & RX_PKTFLAG_ACKED) && !clock_IsZero(&tp->retryTime)) {
3815             tp->retryTime = tp->timeSent;
3816             clock_Add(&tp->retryTime, &peer->timeout);
3817             /* shift by eight because one quarter-sec ~ 256 milliseconds */
3818             clock_Addmsec(&(tp->retryTime), ((afs_uint32) tp->backoff) << 8);
3819         }
3820     }
3821
3822     /* If the window has been extended by this acknowledge packet,
3823      * then wakeup a sender waiting in alloc for window space, or try
3824      * sending packets now, if he's been sitting on packets due to
3825      * lack of window space */
3826     if (call->tnext < (call->tfirst + call->twind)) {
3827 #ifdef  RX_ENABLE_LOCKS
3828         CV_SIGNAL(&call->cv_twind);
3829 #else
3830         if (call->flags & RX_CALL_WAIT_WINDOW_ALLOC) {
3831             call->flags &= ~RX_CALL_WAIT_WINDOW_ALLOC;
3832             osi_rxWakeup(&call->twind);
3833         }
3834 #endif
3835         if (call->flags & RX_CALL_WAIT_WINDOW_SEND) {
3836             call->flags &= ~RX_CALL_WAIT_WINDOW_SEND;
3837         }
3838     }
3839
3840     /* if the ack packet has a receivelen field hanging off it,
3841      * update our state */
3842     if (np->length >= rx_AckDataSize(ap->nAcks) + 2 * sizeof(afs_int32)) {
3843         afs_uint32 tSize;
3844
3845         /* If the ack packet has a "recommended" size that is less than
3846          * what I am using now, reduce my size to match */
3847         rx_packetread(np, rx_AckDataSize(ap->nAcks) + sizeof(afs_int32),
3848                       (int)sizeof(afs_int32), &tSize);
3849         tSize = (afs_uint32) ntohl(tSize);
3850         peer->natMTU = rxi_AdjustIfMTU(MIN(tSize, peer->ifMTU));
3851
3852         /* Get the maximum packet size to send to this peer */
3853         rx_packetread(np, rx_AckDataSize(ap->nAcks), (int)sizeof(afs_int32),
3854                       &tSize);
3855         tSize = (afs_uint32) ntohl(tSize);
3856         tSize = (afs_uint32) MIN(tSize, rx_MyMaxSendSize);
3857         tSize = rxi_AdjustMaxMTU(peer->natMTU, tSize);
3858
3859         /* sanity check - peer might have restarted with different params.
3860          * If peer says "send less", dammit, send less...  Peer should never
3861          * be unable to accept packets of the size that prior AFS versions would
3862          * send without asking.  */
3863         if (peer->maxMTU != tSize) {
3864             if (peer->maxMTU > tSize) /* possible cong., maxMTU decreased */
3865                 peer->congestSeq++;
3866             peer->maxMTU = tSize;
3867             peer->MTU = MIN(tSize, peer->MTU);
3868             call->MTU = MIN(call->MTU, tSize);
3869         }
3870
3871         if (np->length == rx_AckDataSize(ap->nAcks) + 3 * sizeof(afs_int32)) {
3872             /* AFS 3.4a */
3873             rx_packetread(np,
3874                           rx_AckDataSize(ap->nAcks) + 2 * sizeof(afs_int32),
3875                           (int)sizeof(afs_int32), &tSize);
3876             tSize = (afs_uint32) ntohl(tSize);  /* peer's receive window, if it's */
3877             if (tSize < call->twind) {  /* smaller than our send */
3878                 call->twind = tSize;    /* window, we must send less... */
3879                 call->ssthresh = MIN(call->twind, call->ssthresh);
3880                 call->conn->twind[call->channel] = call->twind;
3881             }
3882
3883             /* Only send jumbograms to 3.4a fileservers. 3.3a RX gets the
3884              * network MTU confused with the loopback MTU. Calculate the
3885              * maximum MTU here for use in the slow start code below.
3886              */
3887             maxMTU = peer->maxMTU;
3888             /* Did peer restart with older RX version? */
3889             if (peer->maxDgramPackets > 1) {
3890                 peer->maxDgramPackets = 1;
3891             }
3892         } else if (np->length >=
3893                    rx_AckDataSize(ap->nAcks) + 4 * sizeof(afs_int32)) {
3894             /* AFS 3.5 */
3895             rx_packetread(np,
3896                           rx_AckDataSize(ap->nAcks) + 2 * sizeof(afs_int32),
3897                           sizeof(afs_int32), &tSize);
3898             tSize = (afs_uint32) ntohl(tSize);
3899             /*
3900              * As of AFS 3.5 we set the send window to match the receive window.
3901              */
3902             if (tSize < call->twind) {
3903                 call->twind = tSize;
3904                 call->conn->twind[call->channel] = call->twind;
3905                 call->ssthresh = MIN(call->twind, call->ssthresh);
3906             } else if (tSize > call->twind) {
3907                 call->twind = tSize;
3908                 call->conn->twind[call->channel] = call->twind;
3909             }
3910
3911             /*
3912              * As of AFS 3.5, a jumbogram is more than one fixed size
3913              * packet transmitted in a single UDP datagram. If the remote
3914              * MTU is smaller than our local MTU then never send a datagram
3915              * larger than the natural MTU.
3916              */
3917             rx_packetread(np,
3918                           rx_AckDataSize(ap->nAcks) + 3 * sizeof(afs_int32),
3919                           sizeof(afs_int32), &tSize);
3920             maxDgramPackets = (afs_uint32) ntohl(tSize);
3921             maxDgramPackets = MIN(maxDgramPackets, rxi_nDgramPackets);
3922             maxDgramPackets =
3923                 MIN(maxDgramPackets, (int)(peer->ifDgramPackets));
3924             maxDgramPackets = MIN(maxDgramPackets, tSize);
3925             if (maxDgramPackets > 1) {
3926                 peer->maxDgramPackets = maxDgramPackets;
3927                 call->MTU = RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE;
3928             } else {
3929                 peer->maxDgramPackets = 1;
3930                 call->MTU = peer->natMTU;
3931             }
3932         } else if (peer->maxDgramPackets > 1) {
3933             /* Restarted with lower version of RX */
3934             peer->maxDgramPackets = 1;
3935         }
3936     } else if (peer->maxDgramPackets > 1
3937                || peer->maxMTU != OLD_MAX_PACKET_SIZE) {
3938         /* Restarted with lower version of RX */
3939         peer->maxMTU = OLD_MAX_PACKET_SIZE;
3940         peer->natMTU = OLD_MAX_PACKET_SIZE;
3941         peer->MTU = OLD_MAX_PACKET_SIZE;
3942         peer->maxDgramPackets = 1;
3943         peer->nDgramPackets = 1;
3944         peer->congestSeq++;
3945         call->MTU = OLD_MAX_PACKET_SIZE;
3946     }
3947
3948     if (nNacked) {
3949         /*
3950          * Calculate how many datagrams were successfully received after
3951          * the first missing packet and adjust the negative ack counter
3952          * accordingly.
3953          */
3954         call->nAcks = 0;
3955         call->nNacks++;
3956         nNacked = (nNacked + call->nDgramPackets - 1) / call->nDgramPackets;
3957         if (call->nNacks < nNacked) {
3958             call->nNacks = nNacked;
3959         }
3960     } else {
3961         call->nAcks += newAckCount;
3962         call->nNacks = 0;
3963     }
3964
3965     if (call->flags & RX_CALL_FAST_RECOVER) {
3966         if (nNacked) {
3967             call->cwind = MIN((int)(call->cwind + 1), rx_maxSendWindow);
3968         } else {
3969             call->flags &= ~RX_CALL_FAST_RECOVER;
3970             call->cwind = call->nextCwind;
3971             call->nextCwind = 0;
3972             call->nAcks = 0;
3973         }
3974         call->nCwindAcks = 0;
3975     } else if (nNacked && call->nNacks >= (u_short) rx_nackThreshold) {
3976         /* Three negative acks in a row trigger congestion recovery */
3977 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
3978         MUTEX_EXIT(&peer->peer_lock);
3979         if (call->flags & RX_CALL_FAST_RECOVER_WAIT) {
3980             /* someone else is waiting to start recovery */
3981             return np;
3982         }
3983         call->flags |= RX_CALL_FAST_RECOVER_WAIT;
3984         rxi_WaitforTQBusy(call);
3985         MUTEX_ENTER(&peer->peer_lock);
3986 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
3987         call->flags &= ~RX_CALL_FAST_RECOVER_WAIT;
3988         call->flags |= RX_CALL_FAST_RECOVER;
3989         call->ssthresh = MAX(4, MIN((int)call->cwind, (int)call->twind)) >> 1;
3990         call->cwind =
3991             MIN((int)(call->ssthresh + rx_nackThreshold), rx_maxSendWindow);
3992         call->nDgramPackets = MAX(2, (int)call->nDgramPackets) >> 1;
3993         call->nextCwind = call->ssthresh;
3994         call->nAcks = 0;
3995         call->nNacks = 0;
3996         peer->MTU = call->MTU;
3997         peer->cwind = call->nextCwind;
3998         peer->nDgramPackets = call->nDgramPackets;
3999         peer->congestSeq++;
4000         call->congestSeq = peer->congestSeq;
4001         /* Reset the resend times on the packets that were nacked
4002          * so we will retransmit as soon as the window permits*/
4003         for (acked = 0, queue_ScanBackwards(&call->tq, tp, nxp, rx_packet)) {
4004             if (acked) {
4005                 if (!(tp->flags & RX_PKTFLAG_ACKED)) {
4006                     clock_Zero(&tp->retryTime);
4007                 }
4008             } else if (tp->flags & RX_PKTFLAG_ACKED) {
4009                 acked = 1;
4010             }
4011         }
4012     } else {
4013         /* If cwind is smaller than ssthresh, then increase
4014          * the window one packet for each ack we receive (exponential
4015          * growth).
4016          * If cwind is greater than or equal to ssthresh then increase
4017          * the congestion window by one packet for each cwind acks we
4018          * receive (linear growth).  */
4019         if (call->cwind < call->ssthresh) {
4020             call->cwind =
4021                 MIN((int)call->ssthresh, (int)(call->cwind + newAckCount));
4022             call->nCwindAcks = 0;
4023         } else {
4024             call->nCwindAcks += newAckCount;
4025             if (call->nCwindAcks >= call->cwind) {
4026                 call->nCwindAcks = 0;
4027                 call->cwind = MIN((int)(call->cwind + 1), rx_maxSendWindow);
4028             }
4029         }
4030         /*
4031          * If we have received several acknowledgements in a row then
4032          * it is time to increase the size of our datagrams
4033          */
4034         if ((int)call->nAcks > rx_nDgramThreshold) {
4035             if (peer->maxDgramPackets > 1) {
4036                 if (call->nDgramPackets < peer->maxDgramPackets) {
4037                     call->nDgramPackets++;
4038                 }
4039                 call->MTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE;
4040             } else if (call->MTU < peer->maxMTU) {
4041                 call->MTU += peer->natMTU;
4042                 call->MTU = MIN(call->MTU, peer->maxMTU);
4043             }
4044             call->nAcks = 0;
4045         }
4046     }
4047
4048     MUTEX_EXIT(&peer->peer_lock);       /* rxi_Start will lock peer. */
4049
4050     /* Servers need to hold the call until all response packets have
4051      * been acknowledged. Soft acks are good enough since clients
4052      * are not allowed to clear their receive queues. */
4053     if (call->state == RX_STATE_HOLD
4054         && call->tfirst + call->nSoftAcked >= call->tnext) {
4055         call->state = RX_STATE_DALLY;
4056         rxi_ClearTransmitQueue(call, 0);
4057     } else if (!queue_IsEmpty(&call->tq)) {
4058         rxi_Start(0, call, 0, istack);
4059     }
4060     return np;
4061 }
4062
4063 /* Received a response to a challenge packet */
4064 struct rx_packet *
4065 rxi_ReceiveResponsePacket(register struct rx_connection *conn,
4066                           register struct rx_packet *np, int istack)
4067 {
4068     int error;
4069
4070     /* Ignore the packet if we're the client */
4071     if (conn->type == RX_CLIENT_CONNECTION)
4072         return np;
4073
4074     /* If already authenticated, ignore the packet (it's probably a retry) */
4075     if (RXS_CheckAuthentication(conn->securityObject, conn) == 0)
4076         return np;
4077
4078     /* Otherwise, have the security object evaluate the response packet */
4079     error = RXS_CheckResponse(conn->securityObject, conn, np);
4080     if (error) {
4081         /* If the response is invalid, reset the connection, sending
4082          * an abort to the peer */
4083 #ifndef KERNEL
4084         rxi_Delay(1);
4085 #endif
4086         rxi_ConnectionError(conn, error);
4087         MUTEX_ENTER(&conn->conn_data_lock);
4088         np = rxi_SendConnectionAbort(conn, np, istack, 0);
4089         MUTEX_EXIT(&conn->conn_data_lock);
4090         return np;
4091     } else {
4092         /* If the response is valid, any calls waiting to attach
4093          * servers can now do so */
4094         int i;
4095
4096         for (i = 0; i < RX_MAXCALLS; i++) {
4097             struct rx_call *call = conn->call[i];
4098             if (call) {
4099                 MUTEX_ENTER(&call->lock);
4100                 if (call->state == RX_STATE_PRECALL)
4101                     rxi_AttachServerProc(call, (osi_socket) - 1, NULL, NULL);
4102                 /* tnop can be null if newcallp is null */
4103                 MUTEX_EXIT(&call->lock);
4104             }
4105         }
4106
4107         /* Update the peer reachability information, just in case
4108          * some calls went into attach-wait while we were waiting
4109          * for authentication..
4110          */
4111         rxi_UpdatePeerReach(conn, NULL);
4112     }
4113     return np;
4114 }
4115
4116 /* A client has received an authentication challenge: the security
4117  * object is asked to cough up a respectable response packet to send
4118  * back to the server.  The server is responsible for retrying the
4119  * challenge if it fails to get a response. */
4120
4121 struct rx_packet *
4122 rxi_ReceiveChallengePacket(register struct rx_connection *conn,
4123                            register struct rx_packet *np, int istack)
4124 {
4125     int error;
4126
4127     /* Ignore the challenge if we're the server */
4128     if (conn->type == RX_SERVER_CONNECTION)
4129         return np;
4130
4131     /* Ignore the challenge if the connection is otherwise idle; someone's
4132      * trying to use us as an oracle. */
4133     if (!rxi_HasActiveCalls(conn))
4134         return np;
4135
4136     /* Send the security object the challenge packet.  It is expected to fill
4137      * in the response. */
4138     error = RXS_GetResponse(conn->securityObject, conn, np);
4139
4140     /* If the security object is unable to return a valid response, reset the
4141      * connection and send an abort to the peer.  Otherwise send the response
4142      * packet to the peer connection. */
4143     if (error) {
4144         rxi_ConnectionError(conn, error);
4145         MUTEX_ENTER(&conn->conn_data_lock);
4146         np = rxi_SendConnectionAbort(conn, np, istack, 0);
4147         MUTEX_EXIT(&conn->conn_data_lock);
4148     } else {
4149         np = rxi_SendSpecial((struct rx_call *)0, conn, np,
4150                              RX_PACKET_TYPE_RESPONSE, NULL, -1, istack);
4151     }
4152     return np;
4153 }
4154
4155
4156 /* Find an available server process to service the current request in
4157  * the given call structure.  If one isn't available, queue up this
4158  * call so it eventually gets one */
4159 void
4160 rxi_AttachServerProc(register struct rx_call *call,
4161                      register osi_socket socket, register int *tnop,
4162                      register struct rx_call **newcallp)
4163 {
4164     register struct rx_serverQueueEntry *sq;
4165     register struct rx_service *service = call->conn->service;
4166     register int haveQuota = 0;
4167
4168     /* May already be attached */
4169     if (call->state == RX_STATE_ACTIVE)
4170         return;
4171
4172     MUTEX_ENTER(&rx_serverPool_lock);
4173
4174     haveQuota = QuotaOK(service);
4175     if ((!haveQuota) || queue_IsEmpty(&rx_idleServerQueue)) {
4176         /* If there are no processes available to service this call,
4177          * put the call on the incoming call queue (unless it's
4178          * already on the queue).
4179          */
4180 #ifdef RX_ENABLE_LOCKS
4181         if (haveQuota)
4182             ReturnToServerPool(service);
4183 #endif /* RX_ENABLE_LOCKS */
4184
4185         if (!(call->flags & RX_CALL_WAIT_PROC)) {
4186             call->flags |= RX_CALL_WAIT_PROC;
4187             MUTEX_ENTER(&rx_stats_mutex);
4188             rx_nWaiting++;
4189             rx_nWaited++;
4190             MUTEX_EXIT(&rx_stats_mutex);
4191             rxi_calltrace(RX_CALL_ARRIVAL, call);
4192             SET_CALL_QUEUE_LOCK(call, &rx_serverPool_lock);
4193             queue_Append(&rx_incomingCallQueue, call);
4194         }
4195     } else {
4196         sq = queue_First(&rx_idleServerQueue, rx_serverQueueEntry);
4197
4198         /* If hot threads are enabled, and both newcallp and sq->socketp
4199          * are non-null, then this thread will process the call, and the
4200          * idle server thread will start listening on this threads socket.
4201          */
4202         queue_Remove(sq);
4203         if (rx_enable_hot_thread && newcallp && sq->socketp) {
4204             *newcallp = call;
4205             *tnop = sq->tno;
4206             *sq->socketp = socket;
4207             clock_GetTime(&call->startTime);
4208             CALL_HOLD(call, RX_CALL_REFCOUNT_BEGIN);
4209         } else {
4210             sq->newcall = call;
4211         }
4212         if (call->flags & RX_CALL_WAIT_PROC) {
4213             /* Conservative:  I don't think this should happen */
4214             call->flags &= ~RX_CALL_WAIT_PROC;
4215             if (queue_IsOnQueue(call)) {
4216                 queue_Remove(call);
4217                 MUTEX_ENTER(&rx_stats_mutex);
4218                 rx_nWaiting--;
4219                 MUTEX_EXIT(&rx_stats_mutex);
4220             }
4221         }
4222         call->state = RX_STATE_ACTIVE;
4223         call->mode = RX_MODE_RECEIVING;
4224 #ifdef RX_KERNEL_TRACE
4225         {
4226             int glockOwner = ISAFS_GLOCK();
4227             if (!glockOwner)
4228                 AFS_GLOCK();
4229             afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
4230                        __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER,
4231                        call);
4232             if (!glockOwner)
4233                 AFS_GUNLOCK();
4234         }
4235 #endif
4236         if (call->flags & RX_CALL_CLEARED) {
4237             /* send an ack now to start the packet flow up again */
4238             call->flags &= ~RX_CALL_CLEARED;
4239             rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
4240         }
4241 #ifdef  RX_ENABLE_LOCKS
4242         CV_SIGNAL(&sq->cv);
4243 #else
4244         service->nRequestsRunning++;
4245         if (service->nRequestsRunning <= service->minProcs)
4246             rxi_minDeficit--;
4247         rxi_availProcs--;
4248         osi_rxWakeup(sq);
4249 #endif
4250     }
4251     MUTEX_EXIT(&rx_serverPool_lock);
4252 }
4253
4254 /* Delay the sending of an acknowledge event for a short while, while
4255  * a new call is being prepared (in the case of a client) or a reply
4256  * is being prepared (in the case of a server).  Rather than sending
4257  * an ack packet, an ACKALL packet is sent. */
4258 void
4259 rxi_AckAll(struct rxevent *event, register struct rx_call *call, char *dummy)
4260 {
4261 #ifdef RX_ENABLE_LOCKS
4262     if (event) {
4263         MUTEX_ENTER(&call->lock);
4264         call->delayedAckEvent = NULL;
4265         CALL_RELE(call, RX_CALL_REFCOUNT_ACKALL);
4266     }
4267     rxi_SendSpecial(call, call->conn, (struct rx_packet *)0,
4268                     RX_PACKET_TYPE_ACKALL, NULL, 0, 0);
4269     if (event)
4270         MUTEX_EXIT(&call->lock);
4271 #else /* RX_ENABLE_LOCKS */
4272     if (event)
4273         call->delayedAckEvent = NULL;
4274     rxi_SendSpecial(call, call->conn, (struct rx_packet *)0,
4275                     RX_PACKET_TYPE_ACKALL, NULL, 0, 0);
4276 #endif /* RX_ENABLE_LOCKS */
4277 }
4278
4279 void
4280 rxi_SendDelayedAck(struct rxevent *event, register struct rx_call *call,
4281                    char *dummy)
4282 {
4283 #ifdef RX_ENABLE_LOCKS
4284     if (event) {
4285         MUTEX_ENTER(&call->lock);
4286         if (event == call->delayedAckEvent)
4287             call->delayedAckEvent = NULL;
4288         CALL_RELE(call, RX_CALL_REFCOUNT_DELAY);
4289     }
4290     (void)rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
4291     if (event)
4292         MUTEX_EXIT(&call->lock);
4293 #else /* RX_ENABLE_LOCKS */
4294     if (event)
4295         call->delayedAckEvent = NULL;
4296     (void)rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
4297 #endif /* RX_ENABLE_LOCKS */
4298 }
4299
4300
4301 #ifdef RX_ENABLE_LOCKS
4302 /* Set ack in all packets in transmit queue. rxi_Start will deal with
4303  * clearing them out.
4304  */
4305 static void
4306 rxi_SetAcksInTransmitQueue(register struct rx_call *call)
4307 {
4308     register struct rx_packet *p, *tp;
4309     int someAcked = 0;
4310
4311     for (queue_Scan(&call->tq, p, tp, rx_packet)) {
4312         p->flags |= RX_PKTFLAG_ACKED;
4313         someAcked = 1;
4314     }
4315     if (someAcked) {
4316         call->flags |= RX_CALL_TQ_CLEARME;
4317         call->flags |= RX_CALL_TQ_SOME_ACKED;
4318     }
4319
4320     rxevent_Cancel(call->resendEvent, call, RX_CALL_REFCOUNT_RESEND);
4321     rxevent_Cancel(call->keepAliveEvent, call, RX_CALL_REFCOUNT_ALIVE);
4322     call->tfirst = call->tnext;
4323     call->nSoftAcked = 0;
4324
4325     if (call->flags & RX_CALL_FAST_RECOVER) {
4326         call->flags &= ~RX_CALL_FAST_RECOVER;
4327         call->cwind = call->nextCwind;
4328         call->nextCwind = 0;
4329     }
4330
4331     CV_SIGNAL(&call->cv_twind);
4332 }
4333 #endif /* RX_ENABLE_LOCKS */
4334
4335 /* Clear out the transmit queue for the current call (all packets have
4336  * been received by peer) */
4337 void
4338 rxi_ClearTransmitQueue(register struct rx_call *call, register int force)
4339 {
4340 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
4341     register struct rx_packet *p, *tp;
4342
4343     if (!force && (call->flags & RX_CALL_TQ_BUSY)) {
4344         int someAcked = 0;
4345         for (queue_Scan(&call->tq, p, tp, rx_packet)) {
4346             p->flags |= RX_PKTFLAG_ACKED;
4347             someAcked = 1;
4348         }
4349         if (someAcked) {
4350             call->flags |= RX_CALL_TQ_CLEARME;
4351             call->flags |= RX_CALL_TQ_SOME_ACKED;
4352         }
4353     } else {
4354 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
4355         rxi_FreePackets(0, &call->tq);
4356 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
4357         call->flags &= ~RX_CALL_TQ_CLEARME;
4358     }
4359 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
4360
4361     rxevent_Cancel(call->resendEvent, call, RX_CALL_REFCOUNT_RESEND);
4362     rxevent_Cancel(call->keepAliveEvent, call, RX_CALL_REFCOUNT_ALIVE);
4363     call->tfirst = call->tnext; /* implicitly acknowledge all data already sent */
4364     call->nSoftAcked = 0;
4365
4366     if (call->flags & RX_CALL_FAST_RECOVER) {
4367         call->flags &= ~RX_CALL_FAST_RECOVER;
4368         call->cwind = call->nextCwind;
4369     }
4370 #ifdef  RX_ENABLE_LOCKS
4371     CV_SIGNAL(&call->cv_twind);
4372 #else
4373     osi_rxWakeup(&call->twind);
4374 #endif
4375 }
4376
4377 void
4378 rxi_ClearReceiveQueue(register struct rx_call *call)
4379 {
4380     if (queue_IsNotEmpty(&call->rq)) {
4381         rx_packetReclaims += rxi_FreePackets(0, &call->rq);
4382         call->flags &= ~(RX_CALL_RECEIVE_DONE | RX_CALL_HAVE_LAST);
4383     }
4384     if (call->state == RX_STATE_PRECALL) {
4385         call->flags |= RX_CALL_CLEARED;
4386     }
4387 }
4388
4389 /* Send an abort packet for the specified call */
4390 struct rx_packet *
4391 rxi_SendCallAbort(register struct rx_call *call, struct rx_packet *packet,
4392                   int istack, int force)
4393 {
4394     afs_int32 error;
4395     struct clock when, now;
4396
4397     if (!call->error)
4398         return packet;
4399
4400     /* Clients should never delay abort messages */
4401     if (rx_IsClientConn(call->conn))
4402         force = 1;
4403
4404     if (call->abortCode != call->error) {
4405         call->abortCode = call->error;
4406         call->abortCount = 0;
4407     }
4408
4409     if (force || rxi_callAbortThreshhold == 0
4410         || call->abortCount < rxi_callAbortThreshhold) {
4411         if (call->delayedAbortEvent) {
4412             rxevent_Cancel(call->delayedAbortEvent, call,
4413                            RX_CALL_REFCOUNT_ABORT);
4414         }
4415         error = htonl(call->error);
4416         call->abortCount++;
4417         packet =
4418             rxi_SendSpecial(call, call->conn, packet, RX_PACKET_TYPE_ABORT,
4419                             (char *)&error, sizeof(error), istack);
4420     } else if (!call->delayedAbortEvent) {
4421         clock_GetTime(&now);
4422         when = now;
4423         clock_Addmsec(&when, rxi_callAbortDelay);
4424         CALL_HOLD(call, RX_CALL_REFCOUNT_ABORT);
4425         call->delayedAbortEvent =
4426             rxevent_PostNow(&when, &now, rxi_SendDelayedCallAbort, call, 0);
4427     }
4428     return packet;
4429 }
4430
4431 /* Send an abort packet for the specified connection.  Packet is an
4432  * optional pointer to a packet that can be used to send the abort.
4433  * Once the number of abort messages reaches the threshhold, an
4434  * event is scheduled to send the abort. Setting the force flag
4435  * overrides sending delayed abort messages.
4436  *
4437  * NOTE: Called with conn_data_lock held. conn_data_lock is dropped
4438  *       to send the abort packet.
4439  */
4440 struct rx_packet *
4441 rxi_SendConnectionAbort(register struct rx_connection *conn,
4442                         struct rx_packet *packet, int istack, int force)
4443 {
4444     afs_int32 error;
4445     struct clock when, now;
4446
4447     if (!conn->error)
4448         return packet;
4449
4450     /* Clients should never delay abort messages */
4451     if (rx_IsClientConn(conn))
4452         force = 1;
4453
4454     if (force || rxi_connAbortThreshhold == 0
4455         || conn->abortCount < rxi_connAbortThreshhold) {
4456         if (conn->delayedAbortEvent) {
4457             rxevent_Cancel(conn->delayedAbortEvent, (struct rx_call *)0, 0);
4458         }
4459         error = htonl(conn->error);
4460         conn->abortCount++;
4461         MUTEX_EXIT(&conn->conn_data_lock);
4462         packet =
4463             rxi_SendSpecial((struct rx_call *)0, conn, packet,
4464                             RX_PACKET_TYPE_ABORT, (char *)&error,
4465                             sizeof(error), istack);
4466         MUTEX_ENTER(&conn->conn_data_lock);
4467     } else if (!conn->delayedAbortEvent) {
4468         clock_GetTime(&now);
4469         when = now;
4470         clock_Addmsec(&when, rxi_connAbortDelay);
4471         conn->delayedAbortEvent =
4472             rxevent_PostNow(&when, &now, rxi_SendDelayedConnAbort, conn, 0);
4473     }
4474     return packet;
4475 }
4476
4477 /* Associate an error all of the calls owned by a connection.  Called
4478  * with error non-zero.  This is only for really fatal things, like
4479  * bad authentication responses.  The connection itself is set in
4480  * error at this point, so that future packets received will be
4481  * rejected. */
4482 void
4483 rxi_ConnectionError(register struct rx_connection *conn,
4484                     register afs_int32 error)
4485 {
4486     if (error) {
4487         register int i;
4488
4489         dpf(("rxi_ConnectionError conn %x error %d", conn, error));
4490
4491         MUTEX_ENTER(&conn->conn_data_lock);
4492         if (conn->challengeEvent)
4493             rxevent_Cancel(conn->challengeEvent, (struct rx_call *)0, 0);
4494         if (conn->checkReachEvent) {
4495             rxevent_Cancel(conn->checkReachEvent, (struct rx_call *)0, 0);
4496             conn->checkReachEvent = 0;
4497             conn->flags &= ~RX_CONN_ATTACHWAIT;
4498             conn->refCount--;
4499         }
4500         MUTEX_EXIT(&conn->conn_data_lock);
4501         for (i = 0; i < RX_MAXCALLS; i++) {
4502             struct rx_call *call = conn->call[i];
4503             if (call) {
4504                 MUTEX_ENTER(&call->lock);
4505                 rxi_CallError(call, error);
4506                 MUTEX_EXIT(&call->lock);
4507             }
4508         }
4509         conn->error = error;
4510         rx_MutexIncrement(rx_stats.fatalErrors, rx_stats_mutex);
4511     }
4512 }
4513
4514 void
4515 rxi_CallError(register struct rx_call *call, afs_int32 error)
4516 {
4517     dpf(("rxi_CallError call %x error %d call->error %d", call, error, call->error));
4518     if (call->error)
4519         error = call->error;
4520
4521 #ifdef RX_GLOBAL_RXLOCK_KERNEL
4522     if (!((call->flags & RX_CALL_TQ_BUSY) || (call->tqWaiters > 0))) {
4523         rxi_ResetCall(call, 0);
4524     }
4525 #else
4526     rxi_ResetCall(call, 0);
4527 #endif
4528     call->error = error;
4529     call->mode = RX_MODE_ERROR;
4530 }
4531
4532 /* Reset various fields in a call structure, and wakeup waiting
4533  * processes.  Some fields aren't changed: state & mode are not
4534  * touched (these must be set by the caller), and bufptr, nLeft, and
4535  * nFree are not reset, since these fields are manipulated by
4536  * unprotected macros, and may only be reset by non-interrupting code.
4537  */
4538 #ifdef ADAPT_WINDOW
4539 /* this code requires that call->conn be set properly as a pre-condition. */
4540 #endif /* ADAPT_WINDOW */
4541
4542 void
4543 rxi_ResetCall(register struct rx_call *call, register int newcall)
4544 {
4545     register int flags;
4546     register struct rx_peer *peer;
4547     struct rx_packet *packet;
4548
4549     dpf(("rxi_ResetCall(call %x, newcall %d)\n", call, newcall));
4550
4551     /* Notify anyone who is waiting for asynchronous packet arrival */
4552     if (call->arrivalProc) {
4553         (*call->arrivalProc) (call, call->arrivalProcHandle,
4554                               call->arrivalProcArg);
4555         call->arrivalProc = (void (*)())0;
4556     }
4557
4558     if (call->delayedAbortEvent) {
4559         rxevent_Cancel(call->delayedAbortEvent, call, RX_CALL_REFCOUNT_ABORT);
4560         packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
4561         if (packet) {
4562             rxi_SendCallAbort(call, packet, 0, 1);
4563             rxi_FreePacket(packet);
4564         }
4565     }
4566
4567     /*
4568      * Update the peer with the congestion information in this call
4569      * so other calls on this connection can pick up where this call
4570      * left off. If the congestion sequence numbers don't match then
4571      * another call experienced a retransmission.
4572      */
4573     peer = call->conn->peer;
4574     MUTEX_ENTER(&peer->peer_lock);
4575     if (!newcall) {
4576         if (call->congestSeq == peer->congestSeq) {
4577             peer->cwind = MAX(peer->cwind, call->cwind);
4578             peer->MTU = MAX(peer->MTU, call->MTU);
4579             peer->nDgramPackets =
4580                 MAX(peer->nDgramPackets, call->nDgramPackets);
4581         }
4582     } else {
4583         call->abortCode = 0;
4584         call->abortCount = 0;
4585     }
4586     if (peer->maxDgramPackets > 1) {
4587         call->MTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE;
4588     } else {
4589         call->MTU = peer->MTU;
4590     }
4591     call->cwind = MIN((int)peer->cwind, (int)peer->nDgramPackets);
4592     call->ssthresh = rx_maxSendWindow;
4593     call->nDgramPackets = peer->nDgramPackets;
4594     call->congestSeq = peer->congestSeq;
4595     MUTEX_EXIT(&peer->peer_lock);
4596
4597     flags = call->flags;
4598     rxi_ClearReceiveQueue(call);
4599 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
4600     if (flags & RX_CALL_TQ_BUSY) {
4601         call->flags = RX_CALL_TQ_CLEARME | RX_CALL_TQ_BUSY;
4602         call->flags |= (flags & RX_CALL_TQ_WAIT);
4603     } else
4604 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
4605     {
4606         rxi_ClearTransmitQueue(call, 0);
4607         queue_Init(&call->tq);
4608         if (call->tqWaiters || (flags & RX_CALL_TQ_WAIT)) {
4609             dpf(("rcall %x has %d waiters and flags %d\n", call, call->tqWaiters, call->flags));
4610         }
4611         call->flags = 0;
4612         while (call->tqWaiters) {
4613 #ifdef RX_ENABLE_LOCKS
4614             CV_BROADCAST(&call->cv_tq);
4615 #else /* RX_ENABLE_LOCKS */
4616             osi_rxWakeup(&call->tq);
4617 #endif /* RX_ENABLE_LOCKS */
4618             call->tqWaiters--;
4619         }
4620     }
4621     queue_Init(&call->rq);
4622     call->error = 0;
4623     call->twind = call->conn->twind[call->channel];
4624     call->rwind = call->conn->rwind[call->channel];
4625     call->nSoftAcked = 0;
4626     call->nextCwind = 0;
4627     call->nAcks = 0;
4628     call->nNacks = 0;
4629     call->nCwindAcks = 0;
4630     call->nSoftAcks = 0;
4631     call->nHardAcks = 0;
4632
4633     call->tfirst = call->rnext = call->tnext = 1;
4634     call->rprev = 0;
4635     call->lastAcked = 0;
4636     call->localStatus = call->remoteStatus = 0;
4637
4638     if (flags & RX_CALL_READER_WAIT) {
4639 #ifdef  RX_ENABLE_LOCKS
4640         CV_BROADCAST(&call->cv_rq);
4641 #else
4642         osi_rxWakeup(&call->rq);
4643 #endif
4644     }
4645     if (flags & RX_CALL_WAIT_PACKETS) {
4646         MUTEX_ENTER(&rx_freePktQ_lock);
4647         rxi_PacketsUnWait();    /* XXX */
4648         MUTEX_EXIT(&rx_freePktQ_lock);
4649     }
4650 #ifdef  RX_ENABLE_LOCKS
4651     CV_SIGNAL(&call->cv_twind);
4652 #else
4653     if (flags & RX_CALL_WAIT_WINDOW_ALLOC)
4654         osi_rxWakeup(&call->twind);
4655 #endif
4656
4657 #ifdef RX_ENABLE_LOCKS
4658     /* The following ensures that we don't mess with any queue while some
4659      * other thread might also be doing so. The call_queue_lock field is
4660      * is only modified under the call lock. If the call is in the process
4661      * of being removed from a queue, the call is not locked until the
4662      * the queue lock is dropped and only then is the call_queue_lock field
4663      * zero'd out. So it's safe to lock the queue if call_queue_lock is set.
4664      * Note that any other routine which removes a call from a queue has to
4665      * obtain the queue lock before examing the queue and removing the call.
4666      */
4667     if (call->call_queue_lock) {
4668         MUTEX_ENTER(call->call_queue_lock);
4669         if (queue_IsOnQueue(call)) {
4670             queue_Remove(call);
4671             if (flags & RX_CALL_WAIT_PROC) {
4672                 MUTEX_ENTER(&rx_stats_mutex);
4673                 rx_nWaiting--;
4674                 MUTEX_EXIT(&rx_stats_mutex);
4675             }
4676         }
4677         MUTEX_EXIT(call->call_queue_lock);
4678         CLEAR_CALL_QUEUE_LOCK(call);
4679     }
4680 #else /* RX_ENABLE_LOCKS */
4681     if (queue_IsOnQueue(call)) {
4682         queue_Remove(call);
4683         if (flags & RX_CALL_WAIT_PROC)
4684             rx_nWaiting--;
4685     }
4686 #endif /* RX_ENABLE_LOCKS */
4687
4688     rxi_KeepAliveOff(call);
4689     rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
4690 }
4691
4692 /* Send an acknowledge for the indicated packet (seq,serial) of the
4693  * indicated call, for the indicated reason (reason).  This
4694  * acknowledge will specifically acknowledge receiving the packet, and
4695  * will also specify which other packets for this call have been
4696  * received.  This routine returns the packet that was used to the
4697  * caller.  The caller is responsible for freeing it or re-using it.
4698  * This acknowledgement also returns the highest sequence number
4699  * actually read out by the higher level to the sender; the sender
4700  * promises to keep around packets that have not been read by the
4701  * higher level yet (unless, of course, the sender decides to abort
4702  * the call altogether).  Any of p, seq, serial, pflags, or reason may
4703  * be set to zero without ill effect.  That is, if they are zero, they
4704  * will not convey any information.
4705  * NOW there is a trailer field, after the ack where it will safely be
4706  * ignored by mundanes, which indicates the maximum size packet this
4707  * host can swallow.  */
4708 /*
4709     register struct rx_packet *optionalPacket;  use to send ack (or null)
4710     int seq;                     Sequence number of the packet we are acking
4711     int serial;                  Serial number of the packet
4712     int pflags;                  Flags field from packet header
4713     int reason;                  Reason an acknowledge was prompted
4714 */
4715
4716 struct rx_packet *
4717 rxi_SendAck(register struct rx_call *call,
4718             register struct rx_packet *optionalPacket, int serial, int reason,
4719             int istack)
4720 {
4721     struct rx_ackPacket *ap;
4722     register struct rx_packet *rqp;
4723     register struct rx_packet *nxp;     /* For queue_Scan */
4724     register struct rx_packet *p;
4725     u_char offset;
4726     afs_int32 templ;
4727 #ifdef RX_ENABLE_TSFPQ
4728     struct rx_ts_info_t * rx_ts_info;
4729 #endif
4730
4731     /*
4732      * Open the receive window once a thread starts reading packets
4733      */
4734     if (call->rnext > 1) {
4735         call->conn->rwind[call->channel] = call->rwind = rx_maxReceiveWindow;
4736     }
4737
4738     call->nHardAcks = 0;
4739     call->nSoftAcks = 0;
4740     if (call->rnext > call->lastAcked)
4741         call->lastAcked = call->rnext;
4742     p = optionalPacket;
4743
4744     if (p) {
4745         rx_computelen(p, p->length);    /* reset length, you never know */
4746     } /* where that's been...         */
4747 #ifdef RX_ENABLE_TSFPQ
4748     else {
4749         RX_TS_INFO_GET(rx_ts_info);
4750         if ((p = rx_ts_info->local_special_packet)) {
4751             rx_computelen(p, p->length);
4752         } else if ((p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL))) {
4753             rx_ts_info->local_special_packet = p;
4754         } else { /* We won't send the ack, but don't panic. */
4755             return optionalPacket;
4756         }
4757     }
4758 #else
4759     else if (!(p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL))) {
4760         /* We won't send the ack, but don't panic. */
4761         return optionalPacket;
4762     }
4763 #endif
4764
4765     templ =
4766         rx_AckDataSize(call->rwind) + 4 * sizeof(afs_int32) -
4767         rx_GetDataSize(p);
4768     if (templ > 0) {
4769         if (rxi_AllocDataBuf(p, templ, RX_PACKET_CLASS_SPECIAL) > 0) {
4770 #ifndef RX_ENABLE_TSFPQ
4771             if (!optionalPacket)
4772                 rxi_FreePacket(p);
4773 #endif
4774             return optionalPacket;
4775         }
4776         templ = rx_AckDataSize(call->rwind) + 2 * sizeof(afs_int32);
4777         if (rx_Contiguous(p) < templ) {
4778 #ifndef RX_ENABLE_TSFPQ
4779             if (!optionalPacket)
4780                 rxi_FreePacket(p);
4781 #endif
4782             return optionalPacket;
4783         }
4784     }
4785
4786
4787     /* MTUXXX failing to send an ack is very serious.  We should */
4788     /* try as hard as possible to send even a partial ack; it's */
4789     /* better than nothing. */
4790     ap = (struct rx_ackPacket *)rx_DataOf(p);
4791     ap->bufferSpace = htonl(0); /* Something should go here, sometime */
4792     ap->reason = reason;
4793
4794     /* The skew computation used to be bogus, I think it's better now. */
4795     /* We should start paying attention to skew.    XXX  */
4796     ap->serial = htonl(serial);
4797     ap->maxSkew = 0;            /* used to be peer->inPacketSkew */
4798
4799     ap->firstPacket = htonl(call->rnext);       /* First packet not yet forwarded to reader */
4800     ap->previousPacket = htonl(call->rprev);    /* Previous packet received */
4801
4802     /* No fear of running out of ack packet here because there can only be at most
4803      * one window full of unacknowledged packets.  The window size must be constrained
4804      * to be less than the maximum ack size, of course.  Also, an ack should always
4805      * fit into a single packet -- it should not ever be fragmented.  */
4806     for (offset = 0, queue_Scan(&call->rq, rqp, nxp, rx_packet)) {
4807         if (!rqp || !call->rq.next
4808             || (rqp->header.seq > (call->rnext + call->rwind))) {
4809 #ifndef RX_ENABLE_TSFPQ
4810             if (!optionalPacket)
4811                 rxi_FreePacket(p);
4812 #endif
4813             rxi_CallError(call, RX_CALL_DEAD);
4814             return optionalPacket;
4815         }
4816
4817         while (rqp->header.seq > call->rnext + offset)
4818             ap->acks[offset++] = RX_ACK_TYPE_NACK;
4819         ap->acks[offset++] = RX_ACK_TYPE_ACK;
4820
4821         if ((offset > (u_char) rx_maxReceiveWindow) || (offset > call->rwind)) {
4822 #ifndef RX_ENABLE_TSFPQ
4823             if (!optionalPacket)
4824                 rxi_FreePacket(p);
4825 #endif
4826             rxi_CallError(call, RX_CALL_DEAD);
4827             return optionalPacket;
4828         }
4829     }
4830
4831     ap->nAcks = offset;
4832     p->length = rx_AckDataSize(offset) + 4 * sizeof(afs_int32);
4833
4834     /* these are new for AFS 3.3 */
4835     templ = rxi_AdjustMaxMTU(call->conn->peer->ifMTU, rx_maxReceiveSize);
4836     templ = htonl(templ);
4837     rx_packetwrite(p, rx_AckDataSize(offset), sizeof(afs_int32), &templ);
4838     templ = htonl(call->conn->peer->ifMTU);
4839     rx_packetwrite(p, rx_AckDataSize(offset) + sizeof(afs_int32),
4840                    sizeof(afs_int32), &templ);
4841
4842     /* new for AFS 3.4 */
4843     templ = htonl(call->rwind);
4844     rx_packetwrite(p, rx_AckDataSize(offset) + 2 * sizeof(afs_int32),
4845                    sizeof(afs_int32), &templ);
4846
4847     /* new for AFS 3.5 */
4848     templ = htonl(call->conn->peer->ifDgramPackets);
4849     rx_packetwrite(p, rx_AckDataSize(offset) + 3 * sizeof(afs_int32),
4850                    sizeof(afs_int32), &templ);
4851
4852     p->header.serviceId = call->conn->serviceId;
4853     p->header.cid = (call->conn->cid | call->channel);
4854     p->header.callNumber = *call->callNumber;
4855     p->header.seq = 0;
4856     p->header.securityIndex = call->conn->securityIndex;
4857     p->header.epoch = call->conn->epoch;
4858     p->header.type = RX_PACKET_TYPE_ACK;
4859     p->header.flags = RX_SLOW_START_OK;
4860     if (reason == RX_ACK_PING) {
4861         p->header.flags |= RX_REQUEST_ACK;
4862 #ifdef ADAPT_WINDOW
4863         clock_GetTime(&call->pingRequestTime);
4864 #endif
4865     }
4866     if (call->conn->type == RX_CLIENT_CONNECTION)
4867         p->header.flags |= RX_CLIENT_INITIATED;
4868
4869 #ifdef RXDEBUG
4870 #ifdef AFS_NT40_ENV
4871     if (rxdebug_active) {
4872         char msg[512];
4873         size_t len;
4874
4875         len = _snprintf(msg, sizeof(msg),
4876                         "tid[%d] SACK: reason %s serial %u previous %u seq %u first %u acks %u space %u ",
4877                          GetCurrentThreadId(), rx_ack_reason(ap->reason),
4878                          ntohl(ap->serial), ntohl(ap->previousPacket),
4879                          (unsigned int)p->header.seq, ntohl(ap->firstPacket),
4880                          ap->nAcks, ntohs(ap->bufferSpace) );
4881         if (ap->nAcks) {
4882             int offset;
4883
4884             for (offset = 0; offset < ap->nAcks && len < sizeof(msg); offset++)
4885                 msg[len++] = (ap->acks[offset] == RX_ACK_TYPE_NACK ? '-' : '*');
4886         }
4887         msg[len++]='\n';
4888         msg[len] = '\0';
4889         OutputDebugString(msg);
4890     }
4891 #else /* AFS_NT40_ENV */
4892     if (rx_Log) {
4893         fprintf(rx_Log, "SACK: reason %x previous %u seq %u first %u ",
4894                 ap->reason, ntohl(ap->previousPacket),
4895                 (unsigned int)p->header.seq, ntohl(ap->firstPacket));
4896         if (ap->nAcks) {
4897             for (offset = 0; offset < ap->nAcks; offset++)
4898                 putc(ap->acks[offset] == RX_ACK_TYPE_NACK ? '-' : '*',
4899                      rx_Log);
4900         }
4901         putc('\n', rx_Log);
4902     }
4903 #endif /* AFS_NT40_ENV */
4904 #endif
4905     {
4906         register int i, nbytes = p->length;
4907
4908         for (i = 1; i < p->niovecs; i++) {      /* vec 0 is ALWAYS header */
4909             if (nbytes <= p->wirevec[i].iov_len) {
4910                 register int savelen, saven;
4911
4912                 savelen = p->wirevec[i].iov_len;
4913                 saven = p->niovecs;
4914                 p->wirevec[i].iov_len = nbytes;
4915                 p->niovecs = i + 1;
4916                 rxi_Send(call, p, istack);
4917                 p->wirevec[i].iov_len = savelen;
4918                 p->niovecs = saven;
4919                 break;
4920             } else
4921                 nbytes -= p->wirevec[i].iov_len;
4922         }
4923     }
4924     rx_MutexIncrement(rx_stats.ackPacketsSent, rx_stats_mutex);
4925 #ifndef RX_ENABLE_TSFPQ
4926     if (!optionalPacket)
4927         rxi_FreePacket(p);
4928 #endif
4929     return optionalPacket;      /* Return packet for re-use by caller */
4930 }
4931
4932 /* Send all of the packets in the list in single datagram */
4933 static void
4934 rxi_SendList(struct rx_call *call, struct rx_packet **list, int len,
4935              int istack, int moreFlag, struct clock *now,
4936              struct clock *retryTime, int resending)
4937 {
4938     int i;
4939     int requestAck = 0;
4940     int lastPacket = 0;
4941     struct rx_connection *conn = call->conn;
4942     struct rx_peer *peer = conn->peer;
4943
4944     MUTEX_ENTER(&peer->peer_lock);
4945     peer->nSent += len;
4946     if (resending)
4947         peer->reSends += len;
4948     rx_MutexIncrement(rx_stats.dataPacketsSent, rx_stats_mutex);
4949     MUTEX_EXIT(&peer->peer_lock);
4950
4951     if (list[len - 1]->header.flags & RX_LAST_PACKET) {
4952         lastPacket = 1;
4953     }
4954
4955     /* Set the packet flags and schedule the resend events */
4956     /* Only request an ack for the last packet in the list */
4957     for (i = 0; i < len; i++) {
4958         list[i]->retryTime = *retryTime;
4959         if (list[i]->header.serial) {
4960             /* Exponentially backoff retry times */
4961             if (list[i]->backoff < MAXBACKOFF) {
4962                 /* so it can't stay == 0 */
4963                 list[i]->backoff = (list[i]->backoff << 1) + 1;
4964             } else
4965                 list[i]->backoff++;
4966             clock_Addmsec(&(list[i]->retryTime),
4967                           ((afs_uint32) list[i]->backoff) << 8);
4968         }
4969
4970         /* Wait a little extra for the ack on the last packet */
4971         if (lastPacket && !(list[i]->header.flags & RX_CLIENT_INITIATED)) {
4972             clock_Addmsec(&(list[i]->retryTime), 400);
4973         }
4974
4975         /* Record the time sent */
4976         list[i]->timeSent = *now;
4977
4978         /* Ask for an ack on retransmitted packets,  on every other packet
4979          * if the peer doesn't support slow start. Ask for an ack on every
4980          * packet until the congestion window reaches the ack rate. */
4981         if (list[i]->header.serial) {
4982             requestAck = 1;
4983             rx_MutexIncrement(rx_stats.dataPacketsReSent, rx_stats_mutex);
4984         } else {
4985             /* improved RTO calculation- not Karn */
4986             list[i]->firstSent = *now;
4987             if (!lastPacket && (call->cwind <= (u_short) (conn->ackRate + 1)
4988                                 || (!(call->flags & RX_CALL_SLOW_START_OK)
4989                                     && (list[i]->header.seq & 1)))) {
4990                 requestAck = 1;
4991             }
4992         }
4993
4994         MUTEX_ENTER(&peer->peer_lock);
4995         peer->nSent++;
4996         if (resending)
4997             peer->reSends++;
4998         rx_MutexIncrement(rx_stats.dataPacketsSent, rx_stats_mutex);
4999         MUTEX_EXIT(&peer->peer_lock);
5000
5001         /* Tag this packet as not being the last in this group,
5002          * for the receiver's benefit */
5003         if (i < len - 1 || moreFlag) {
5004             list[i]->header.flags |= RX_MORE_PACKETS;
5005         }
5006
5007         /* Install the new retransmit time for the packet, and
5008          * record the time sent */
5009         list[i]->timeSent = *now;
5010     }
5011
5012     if (requestAck) {
5013         list[len - 1]->header.flags |= RX_REQUEST_ACK;
5014     }
5015
5016     /* Since we're about to send a data packet to the peer, it's
5017      * safe to nuke any scheduled end-of-packets ack */
5018     rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
5019
5020     CALL_HOLD(call, RX_CALL_REFCOUNT_SEND);
5021     MUTEX_EXIT(&call->lock);
5022     if (len > 1) {
5023         rxi_SendPacketList(call, conn, list, len, istack);
5024     } else {
5025         rxi_SendPacket(call, conn, list[0], istack);
5026     }
5027     MUTEX_ENTER(&call->lock);
5028     CALL_RELE(call, RX_CALL_REFCOUNT_SEND);
5029
5030     /* Update last send time for this call (for keep-alive
5031      * processing), and for the connection (so that we can discover
5032      * idle connections) */
5033     call->lastSendData = conn->lastSendTime = call->lastSendTime = clock_Sec();
5034 }
5035
5036 /* When sending packets we need to follow these rules:
5037  * 1. Never send more than maxDgramPackets in a jumbogram.
5038  * 2. Never send a packet with more than two iovecs in a jumbogram.
5039  * 3. Never send a retransmitted packet in a jumbogram.
5040  * 4. Never send more than cwind/4 packets in a jumbogram
5041  * We always keep the last list we should have sent so we
5042  * can set the RX_MORE_PACKETS flags correctly.
5043  */
5044 static void
5045 rxi_SendXmitList(struct rx_call *call, struct rx_packet **list, int len,
5046                  int istack, struct clock *now, struct clock *retryTime,
5047                  int resending)
5048 {
5049     int i, cnt, lastCnt = 0;
5050     struct rx_packet **listP, **lastP = 0;
5051     struct rx_peer *peer = call->conn->peer;
5052     int morePackets = 0;
5053
5054     for (cnt = 0, listP = &list[0], i = 0; i < len; i++) {
5055         /* Does the current packet force us to flush the current list? */
5056         if (cnt > 0
5057             && (list[i]->header.serial || (list[i]->flags & RX_PKTFLAG_ACKED)
5058                 || list[i]->length > RX_JUMBOBUFFERSIZE)) {
5059             if (lastCnt > 0) {
5060                 rxi_SendList(call, lastP, lastCnt, istack, 1, now, retryTime,
5061                              resending);
5062                 /* If the call enters an error state stop sending, or if
5063                  * we entered congestion recovery mode, stop sending */
5064                 if (call->error || (call->flags & RX_CALL_FAST_RECOVER_WAIT))
5065                     return;
5066             }
5067             lastP = listP;
5068             lastCnt = cnt;
5069             listP = &list[i];
5070             cnt = 0;
5071         }
5072         /* Add the current packet to the list if it hasn't been acked.
5073          * Otherwise adjust the list pointer to skip the current packet.  */
5074         if (!(list[i]->flags & RX_PKTFLAG_ACKED)) {
5075             cnt++;
5076             /* Do we need to flush the list? */
5077             if (cnt >= (int)peer->maxDgramPackets
5078                 || cnt >= (int)call->nDgramPackets || cnt >= (int)call->cwind
5079                 || list[i]->header.serial
5080                 || list[i]->length != RX_JUMBOBUFFERSIZE) {
5081                 if (lastCnt > 0) {
5082                     rxi_SendList(call, lastP, lastCnt, istack, 1, now,
5083                                  retryTime, resending);
5084                     /* If the call enters an error state stop sending, or if
5085                      * we entered congestion recovery mode, stop sending */
5086                     if (call->error
5087                         || (call->flags & RX_CALL_FAST_RECOVER_WAIT))
5088                         return;
5089                 }
5090                 lastP = listP;
5091                 lastCnt = cnt;
5092                 listP = &list[i + 1];
5093                 cnt = 0;
5094             }
5095         } else {
5096             if (cnt != 0) {
5097                 osi_Panic("rxi_SendList error");
5098             }
5099             listP = &list[i + 1];
5100         }
5101     }
5102
5103     /* Send the whole list when the call is in receive mode, when
5104      * the call is in eof mode, when we are in fast recovery mode,
5105      * and when we have the last packet */
5106     if ((list[len - 1]->header.flags & RX_LAST_PACKET)
5107         || call->mode == RX_MODE_RECEIVING || call->mode == RX_MODE_EOF
5108         || (call->flags & RX_CALL_FAST_RECOVER)) {
5109         /* Check for the case where the current list contains
5110          * an acked packet. Since we always send retransmissions
5111          * in a separate packet, we only need to check the first
5112          * packet in the list */
5113         if (cnt > 0 && !(listP[0]->flags & RX_PKTFLAG_ACKED)) {
5114             morePackets = 1;
5115         }
5116         if (lastCnt > 0) {
5117             rxi_SendList(call, lastP, lastCnt, istack, morePackets, now,
5118                          retryTime, resending);
5119             /* If the call enters an error state stop sending, or if
5120              * we entered congestion recovery mode, stop sending */
5121             if (call->error || (call->flags & RX_CALL_FAST_RECOVER_WAIT))
5122                 return;
5123         }
5124         if (morePackets) {
5125             rxi_SendList(call, listP, cnt, istack, 0, now, retryTime,
5126                          resending);
5127         }
5128     } else if (lastCnt > 0) {
5129         rxi_SendList(call, lastP, lastCnt, istack, 0, now, retryTime,
5130                      resending);
5131     }
5132 }
5133
5134 #ifdef  RX_ENABLE_LOCKS
5135 /* Call rxi_Start, below, but with the call lock held. */
5136 void
5137 rxi_StartUnlocked(struct rxevent *event, register struct rx_call *call,
5138                   void *arg1, int istack)
5139 {
5140     MUTEX_ENTER(&call->lock);
5141     rxi_Start(event, call, arg1, istack);
5142     MUTEX_EXIT(&call->lock);
5143 }
5144 #endif /* RX_ENABLE_LOCKS */
5145
5146 /* This routine is called when new packets are readied for
5147  * transmission and when retransmission may be necessary, or when the
5148  * transmission window or burst count are favourable.  This should be
5149  * better optimized for new packets, the usual case, now that we've
5150  * got rid of queues of send packets. XXXXXXXXXXX */
5151 void
5152 rxi_Start(struct rxevent *event, register struct rx_call *call,
5153           void *arg1, int istack)
5154 {
5155     struct rx_packet *p;
5156     register struct rx_packet *nxp;     /* Next pointer for queue_Scan */
5157     struct rx_peer *peer = call->conn->peer;
5158     struct clock now, usenow, retryTime;
5159     int haveEvent;
5160     int nXmitPackets;
5161     int maxXmitPackets;
5162     struct rx_packet **xmitList;
5163     int resending = 0;
5164
5165     /* If rxi_Start is being called as a result of a resend event,
5166      * then make sure that the event pointer is removed from the call
5167      * structure, since there is no longer a per-call retransmission
5168      * event pending. */
5169     if (event && event == call->resendEvent) {
5170         CALL_RELE(call, RX_CALL_REFCOUNT_RESEND);
5171         call->resendEvent = NULL;
5172         resending = 1;
5173         if (queue_IsEmpty(&call->tq)) {
5174             /* Nothing to do */
5175             return;
5176         }
5177         /* Timeouts trigger congestion recovery */
5178 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5179         if (call->flags & RX_CALL_FAST_RECOVER_WAIT) {
5180             /* someone else is waiting to start recovery */
5181             return;
5182         }
5183         call->flags |= RX_CALL_FAST_RECOVER_WAIT;
5184         rxi_WaitforTQBusy(call);
5185 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
5186         call->flags &= ~RX_CALL_FAST_RECOVER_WAIT;
5187         call->flags |= RX_CALL_FAST_RECOVER;
5188         if (peer->maxDgramPackets > 1) {
5189             call->MTU = RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE;
5190         } else {
5191             call->MTU = MIN(peer->natMTU, peer->maxMTU);
5192         }
5193         call->ssthresh = MAX(4, MIN((int)call->cwind, (int)call->twind)) >> 1;
5194         call->nDgramPackets = 1;
5195         call->cwind = 1;
5196         call->nextCwind = 1;
5197         call->nAcks = 0;
5198         call->nNacks = 0;
5199         MUTEX_ENTER(&peer->peer_lock);
5200         peer->MTU = call->MTU;
5201         peer->cwind = call->cwind;
5202         peer->nDgramPackets = 1;
5203         peer->congestSeq++;
5204         call->congestSeq = peer->congestSeq;
5205         MUTEX_EXIT(&peer->peer_lock);
5206         /* Clear retry times on packets. Otherwise, it's possible for
5207          * some packets in the queue to force resends at rates faster
5208          * than recovery rates.
5209          */
5210         for (queue_Scan(&call->tq, p, nxp, rx_packet)) {
5211             if (!(p->flags & RX_PKTFLAG_ACKED)) {
5212                 clock_Zero(&p->retryTime);
5213             }
5214         }
5215     }
5216     if (call->error) {
5217 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5218         rx_MutexIncrement(rx_tq_debug.rxi_start_in_error, rx_stats_mutex);
5219 #endif
5220         return;
5221     }
5222
5223     if (queue_IsNotEmpty(&call->tq)) {  /* If we have anything to send */
5224         /* Get clock to compute the re-transmit time for any packets
5225          * in this burst.  Note, if we back off, it's reasonable to
5226          * back off all of the packets in the same manner, even if
5227          * some of them have been retransmitted more times than more
5228          * recent additions.
5229          * Do a dance to avoid blocking after setting now. */
5230         clock_Zero(&retryTime);
5231         MUTEX_ENTER(&peer->peer_lock);
5232         clock_Add(&retryTime, &peer->timeout);
5233         MUTEX_EXIT(&peer->peer_lock);
5234         clock_GetTime(&now);
5235         clock_Add(&retryTime, &now);
5236         usenow = now;
5237         /* Send (or resend) any packets that need it, subject to
5238          * window restrictions and congestion burst control
5239          * restrictions.  Ask for an ack on the last packet sent in
5240          * this burst.  For now, we're relying upon the window being
5241          * considerably bigger than the largest number of packets that
5242          * are typically sent at once by one initial call to
5243          * rxi_Start.  This is probably bogus (perhaps we should ask
5244          * for an ack when we're half way through the current
5245          * window?).  Also, for non file transfer applications, this
5246          * may end up asking for an ack for every packet.  Bogus. XXXX
5247          */
5248         /*
5249          * But check whether we're here recursively, and let the other guy
5250          * do the work.
5251          */
5252 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5253         if (!(call->flags & RX_CALL_TQ_BUSY)) {
5254             call->flags |= RX_CALL_TQ_BUSY;
5255             do {
5256 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
5257             restart:
5258 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5259                 call->flags &= ~RX_CALL_NEED_START;
5260 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
5261                 nXmitPackets = 0;
5262                 maxXmitPackets = MIN(call->twind, call->cwind);
5263                 xmitList = (struct rx_packet **)
5264                     osi_Alloc(maxXmitPackets * sizeof(struct rx_packet *));
5265                 if (xmitList == NULL)
5266                     osi_Panic("rxi_Start, failed to allocate xmit list");
5267                 for (queue_Scan(&call->tq, p, nxp, rx_packet)) {
5268                     if (call->flags & RX_CALL_FAST_RECOVER_WAIT) {
5269                         /* We shouldn't be sending packets if a thread is waiting
5270                          * to initiate congestion recovery */
5271                         break;
5272                     }
5273                     if ((nXmitPackets)
5274                         && (call->flags & RX_CALL_FAST_RECOVER)) {
5275                         /* Only send one packet during fast recovery */
5276                         break;
5277                     }
5278                     if ((p->flags & RX_PKTFLAG_FREE)
5279                         || (!queue_IsEnd(&call->tq, nxp)
5280                             && (nxp->flags & RX_PKTFLAG_FREE))
5281                         || (p == (struct rx_packet *)&rx_freePacketQueue)
5282                         || (nxp == (struct rx_packet *)&rx_freePacketQueue)) {
5283                         osi_Panic("rxi_Start: xmit queue clobbered");
5284                     }
5285                     if (p->flags & RX_PKTFLAG_ACKED) {
5286                         /* Since we may block, don't trust this */
5287                         usenow.sec = usenow.usec = 0;
5288                         rx_MutexIncrement(rx_stats.ignoreAckedPacket, rx_stats_mutex);
5289                         continue;       /* Ignore this packet if it has been acknowledged */
5290                     }
5291
5292                     /* Turn off all flags except these ones, which are the same
5293                      * on each transmission */
5294                     p->header.flags &= RX_PRESET_FLAGS;
5295
5296                     if (p->header.seq >=
5297                         call->tfirst + MIN((int)call->twind,
5298                                            (int)(call->nSoftAcked +
5299                                                  call->cwind))) {
5300                         call->flags |= RX_CALL_WAIT_WINDOW_SEND;        /* Wait for transmit window */
5301                         /* Note: if we're waiting for more window space, we can
5302                          * still send retransmits; hence we don't return here, but
5303                          * break out to schedule a retransmit event */
5304                         dpf(("call %d waiting for window",
5305                              *(call->callNumber)));
5306                         break;
5307                     }
5308
5309                     /* Transmit the packet if it needs to be sent. */
5310                     if (!clock_Lt(&now, &p->retryTime)) {
5311                         if (nXmitPackets == maxXmitPackets) {
5312                             rxi_SendXmitList(call, xmitList, nXmitPackets,
5313                                              istack, &now, &retryTime,
5314                                              resending);
5315                             osi_Free(xmitList, maxXmitPackets *
5316                                      sizeof(struct rx_packet *));
5317                             goto restart;
5318                         }
5319                         xmitList[nXmitPackets++] = p;
5320                     }
5321                 }
5322
5323                 /* xmitList now hold pointers to all of the packets that are
5324                  * ready to send. Now we loop to send the packets */
5325                 if (nXmitPackets > 0) {
5326                     rxi_SendXmitList(call, xmitList, nXmitPackets, istack,
5327                                      &now, &retryTime, resending);
5328                 }
5329                 osi_Free(xmitList,
5330                          maxXmitPackets * sizeof(struct rx_packet *));
5331
5332 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5333                 /*
5334                  * TQ references no longer protected by this flag; they must remain
5335                  * protected by the global lock.
5336                  */
5337                 if (call->flags & RX_CALL_FAST_RECOVER_WAIT) {
5338                     call->flags &= ~RX_CALL_TQ_BUSY;
5339                     if (call->tqWaiters || (call->flags & RX_CALL_TQ_WAIT)) {
5340                         dpf(("call %x has %d waiters and flags %d\n", call, call->tqWaiters, call->flags));
5341 #ifdef RX_ENABLE_LOCKS
5342                         osirx_AssertMine(&call->lock, "rxi_Start start");
5343                         CV_BROADCAST(&call->cv_tq);
5344 #else /* RX_ENABLE_LOCKS */
5345                         osi_rxWakeup(&call->tq);
5346 #endif /* RX_ENABLE_LOCKS */
5347                     }
5348                     return;
5349                 }
5350                 if (call->error) {
5351                     /* We went into the error state while sending packets. Now is
5352                      * the time to reset the call. This will also inform the using
5353                      * process that the call is in an error state.
5354                      */
5355                     rx_MutexIncrement(rx_tq_debug.rxi_start_aborted, rx_stats_mutex);
5356                     call->flags &= ~RX_CALL_TQ_BUSY;
5357                     if (call->tqWaiters || (call->flags & RX_CALL_TQ_WAIT)) {
5358                         dpf(("call %x has %d waiters and flags %d\n", call, call->tqWaiters, call->flags));
5359 #ifdef RX_ENABLE_LOCKS
5360                         osirx_AssertMine(&call->lock, "rxi_Start middle");
5361                         CV_BROADCAST(&call->cv_tq);
5362 #else /* RX_ENABLE_LOCKS */
5363                         osi_rxWakeup(&call->tq);
5364 #endif /* RX_ENABLE_LOCKS */
5365                     }
5366                     rxi_CallError(call, call->error);
5367                     return;
5368                 }
5369 #ifdef RX_ENABLE_LOCKS
5370                 if (call->flags & RX_CALL_TQ_SOME_ACKED) {
5371                     register int missing;
5372                     call->flags &= ~RX_CALL_TQ_SOME_ACKED;
5373                     /* Some packets have received acks. If they all have, we can clear
5374                      * the transmit queue.
5375                      */
5376                     for (missing =
5377                          0, queue_Scan(&call->tq, p, nxp, rx_packet)) {
5378                         if (p->header.seq < call->tfirst
5379                             && (p->flags & RX_PKTFLAG_ACKED)) {
5380                             queue_Remove(p);
5381                             rxi_FreePacket(p);
5382                         } else
5383                             missing = 1;
5384                     }
5385                     if (!missing)
5386                         call->flags |= RX_CALL_TQ_CLEARME;
5387                 }
5388 #endif /* RX_ENABLE_LOCKS */
5389                 /* Don't bother doing retransmits if the TQ is cleared. */
5390                 if (call->flags & RX_CALL_TQ_CLEARME) {
5391                     rxi_ClearTransmitQueue(call, 1);
5392                 } else
5393 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
5394                 {
5395
5396                     /* Always post a resend event, if there is anything in the
5397                      * queue, and resend is possible.  There should be at least
5398                      * one unacknowledged packet in the queue ... otherwise none
5399                      * of these packets should be on the queue in the first place.
5400                      */
5401                     if (call->resendEvent) {
5402                         /* Cancel the existing event and post a new one */
5403                         rxevent_Cancel(call->resendEvent, call,
5404                                        RX_CALL_REFCOUNT_RESEND);
5405                     }
5406
5407                     /* The retry time is the retry time on the first unacknowledged
5408                      * packet inside the current window */
5409                     for (haveEvent =
5410                          0, queue_Scan(&call->tq, p, nxp, rx_packet)) {
5411                         /* Don't set timers for packets outside the window */
5412                         if (p->header.seq >= call->tfirst + call->twind) {
5413                             break;
5414                         }
5415
5416                         if (!(p->flags & RX_PKTFLAG_ACKED)
5417                             && !clock_IsZero(&p->retryTime)) {
5418                             haveEvent = 1;
5419                             retryTime = p->retryTime;
5420                             break;
5421                         }
5422                     }
5423
5424                     /* Post a new event to re-run rxi_Start when retries may be needed */
5425                     if (haveEvent && !(call->flags & RX_CALL_NEED_START)) {
5426 #ifdef RX_ENABLE_LOCKS
5427                         CALL_HOLD(call, RX_CALL_REFCOUNT_RESEND);
5428                         call->resendEvent =
5429                             rxevent_PostNow2(&retryTime, &usenow,
5430                                              rxi_StartUnlocked,
5431                                              (void *)call, 0, istack);
5432 #else /* RX_ENABLE_LOCKS */
5433                         call->resendEvent =
5434                             rxevent_PostNow2(&retryTime, &usenow, rxi_Start,
5435                                              (void *)call, 0, istack);
5436 #endif /* RX_ENABLE_LOCKS */
5437                     }
5438                 }
5439 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5440             } while (call->flags & RX_CALL_NEED_START);
5441             /*
5442              * TQ references no longer protected by this flag; they must remain
5443              * protected by the global lock.
5444              */
5445             call->flags &= ~RX_CALL_TQ_BUSY;
5446             if (call->tqWaiters || (call->flags & RX_CALL_TQ_WAIT)) {
5447                 dpf(("call %x has %d waiters and flags %d\n", call, call->tqWaiters, call->flags));
5448 #ifdef RX_ENABLE_LOCKS
5449                 osirx_AssertMine(&call->lock, "rxi_Start end");
5450                 CV_BROADCAST(&call->cv_tq);
5451 #else /* RX_ENABLE_LOCKS */
5452                 osi_rxWakeup(&call->tq);
5453 #endif /* RX_ENABLE_LOCKS */
5454             }
5455         } else {
5456             call->flags |= RX_CALL_NEED_START;
5457         }
5458 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
5459     } else {
5460         if (call->resendEvent) {
5461             rxevent_Cancel(call->resendEvent, call, RX_CALL_REFCOUNT_RESEND);
5462         }
5463     }
5464 }
5465
5466 /* Also adjusts the keep alive parameters for the call, to reflect
5467  * that we have just sent a packet (so keep alives aren't sent
5468  * immediately) */
5469 void
5470 rxi_Send(register struct rx_call *call, register struct rx_packet *p,
5471          int istack)
5472 {
5473     register struct rx_connection *conn = call->conn;
5474
5475     /* Stamp each packet with the user supplied status */
5476     p->header.userStatus = call->localStatus;
5477
5478     /* Allow the security object controlling this call's security to
5479      * make any last-minute changes to the packet */
5480     RXS_SendPacket(conn->securityObject, call, p);
5481
5482     /* Since we're about to send SOME sort of packet to the peer, it's
5483      * safe to nuke any scheduled end-of-packets ack */
5484     rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
5485
5486     /* Actually send the packet, filling in more connection-specific fields */
5487     CALL_HOLD(call, RX_CALL_REFCOUNT_SEND);
5488     MUTEX_EXIT(&call->lock);
5489     rxi_SendPacket(call, conn, p, istack);
5490     MUTEX_ENTER(&call->lock);
5491     CALL_RELE(call, RX_CALL_REFCOUNT_SEND);
5492
5493     /* Update last send time for this call (for keep-alive
5494      * processing), and for the connection (so that we can discover
5495      * idle connections) */
5496     conn->lastSendTime = call->lastSendTime = clock_Sec();
5497     /* Don't count keepalives here, so idleness can be tracked. */
5498     if (p->header.type != RX_PACKET_TYPE_ACK)
5499         call->lastSendData = call->lastSendTime;
5500 }
5501
5502
5503 /* Check if a call needs to be destroyed.  Called by keep-alive code to ensure
5504  * that things are fine.  Also called periodically to guarantee that nothing
5505  * falls through the cracks (e.g. (error + dally) connections have keepalive
5506  * turned off.  Returns 0 if conn is well, -1 otherwise.  If otherwise, call
5507  *  may be freed!
5508  * haveCTLock Set if calling from rxi_ReapConnections
5509  */
5510 #ifdef RX_ENABLE_LOCKS
5511 int
5512 rxi_CheckCall(register struct rx_call *call, int haveCTLock)
5513 #else /* RX_ENABLE_LOCKS */
5514 int
5515 rxi_CheckCall(register struct rx_call *call)
5516 #endif                          /* RX_ENABLE_LOCKS */
5517 {
5518     register struct rx_connection *conn = call->conn;
5519     afs_uint32 now;
5520     afs_uint32 deadTime;
5521
5522 #ifdef RX_GLOBAL_RXLOCK_KERNEL
5523     if (call->flags & RX_CALL_TQ_BUSY) {
5524         /* Call is active and will be reset by rxi_Start if it's
5525          * in an error state.
5526          */
5527         return 0;
5528     }
5529 #endif
5530     /* dead time + RTT + 8*MDEV, rounded up to next second. */
5531     deadTime =
5532         (((afs_uint32) conn->secondsUntilDead << 10) +
5533          ((afs_uint32) conn->peer->rtt >> 3) +
5534          ((afs_uint32) conn->peer->rtt_dev << 1) + 1023) >> 10;
5535     now = clock_Sec();
5536     /* These are computed to the second (+- 1 second).  But that's
5537      * good enough for these values, which should be a significant
5538      * number of seconds. */
5539     if (now > (call->lastReceiveTime + deadTime)) {
5540         if (call->state == RX_STATE_ACTIVE) {
5541 #ifdef ADAPT_PMTU
5542 #if defined(KERNEL) && defined(AFS_SUN57_ENV)
5543             ire_t *ire;
5544 #if defined(AFS_SUN510_ENV) && defined(GLOBAL_NETSTACKID)
5545             netstack_t *ns =  netstack_find_by_stackid(GLOBAL_NETSTACKID);
5546             ip_stack_t *ipst = ns->netstack_ip;
5547 #endif
5548             ire = ire_cache_lookup(call->conn->peer->host
5549 #if defined(AFS_SUN510_ENV) && defined(ALL_ZONES)
5550                                    , ALL_ZONES
5551 #if defined(AFS_SUN510_ENV) && (defined(ICL_3_ARG) || defined(GLOBAL_NETSTACKID))
5552                                    , NULL
5553 #if defined(AFS_SUN510_ENV) && defined(GLOBAL_NETSTACKID)
5554                                    , ipst
5555 #endif
5556 #endif
5557 #endif
5558                 );
5559
5560             if (ire && ire->ire_max_frag > 0)
5561                 rxi_SetPeerMtu(call->conn->peer->host, 0, ire->ire_max_frag);
5562 #if defined(GLOBAL_NETSTACKID)
5563             netstack_rele(ns);
5564 #endif
5565 #endif
5566 #endif /* ADAPT_PMTU */
5567             rxi_CallError(call, RX_CALL_DEAD);
5568             return -1;
5569         } else {
5570 #ifdef RX_ENABLE_LOCKS
5571             /* Cancel pending events */
5572             rxevent_Cancel(call->delayedAckEvent, call,
5573                            RX_CALL_REFCOUNT_DELAY);
5574             rxevent_Cancel(call->resendEvent, call, RX_CALL_REFCOUNT_RESEND);
5575             rxevent_Cancel(call->keepAliveEvent, call,
5576                            RX_CALL_REFCOUNT_ALIVE);
5577             if (call->refCount == 0) {
5578                 rxi_FreeCall(call, haveCTLock);
5579                 return -2;
5580             }
5581             return -1;
5582 #else /* RX_ENABLE_LOCKS */
5583             rxi_FreeCall(call);
5584             return -2;
5585 #endif /* RX_ENABLE_LOCKS */
5586         }
5587         /* Non-active calls are destroyed if they are not responding
5588          * to pings; active calls are simply flagged in error, so the
5589          * attached process can die reasonably gracefully. */
5590     }
5591     /* see if we have a non-activity timeout */
5592     if (call->startWait && conn->idleDeadTime
5593         && ((call->startWait + conn->idleDeadTime) < now)) {
5594         if (call->state == RX_STATE_ACTIVE) {
5595             rxi_CallError(call, RX_CALL_TIMEOUT);
5596             return -1;
5597         }
5598     }
5599     if (call->lastSendData && conn->idleDeadTime && (conn->idleDeadErr != 0)
5600         && ((call->lastSendData + conn->idleDeadTime) < now)) {
5601         if (call->state == RX_STATE_ACTIVE) {
5602             rxi_CallError(call, conn->idleDeadErr);
5603             return -1;
5604         }
5605     }
5606     /* see if we have a hard timeout */
5607     if (conn->hardDeadTime
5608         && (now > (conn->hardDeadTime + call->startTime.sec))) {
5609         if (call->state == RX_STATE_ACTIVE)
5610             rxi_CallError(call, RX_CALL_TIMEOUT);
5611         return -1;
5612     }
5613     return 0;
5614 }
5615
5616
5617 /* When a call is in progress, this routine is called occasionally to
5618  * make sure that some traffic has arrived (or been sent to) the peer.
5619  * If nothing has arrived in a reasonable amount of time, the call is
5620  * declared dead; if nothing has been sent for a while, we send a
5621  * keep-alive packet (if we're actually trying to keep the call alive)
5622  */
5623 void
5624 rxi_KeepAliveEvent(struct rxevent *event, register struct rx_call *call,
5625                    char *dummy)
5626 {
5627     struct rx_connection *conn;
5628     afs_uint32 now;
5629
5630     MUTEX_ENTER(&call->lock);
5631     CALL_RELE(call, RX_CALL_REFCOUNT_ALIVE);
5632     if (event == call->keepAliveEvent)
5633         call->keepAliveEvent = NULL;
5634     now = clock_Sec();
5635
5636 #ifdef RX_ENABLE_LOCKS
5637     if (rxi_CheckCall(call, 0)) {
5638         MUTEX_EXIT(&call->lock);
5639         return;
5640     }
5641 #else /* RX_ENABLE_LOCKS */
5642     if (rxi_CheckCall(call))
5643         return;
5644 #endif /* RX_ENABLE_LOCKS */
5645
5646     /* Don't try to keep alive dallying calls */
5647     if (call->state == RX_STATE_DALLY) {
5648         MUTEX_EXIT(&call->lock);
5649         return;
5650     }
5651
5652     conn = call->conn;
5653     if ((now - call->lastSendTime) > conn->secondsUntilPing) {
5654         /* Don't try to send keepalives if there is unacknowledged data */
5655         /* the rexmit code should be good enough, this little hack
5656          * doesn't quite work XXX */
5657         (void)rxi_SendAck(call, NULL, 0, RX_ACK_PING, 0);
5658     }
5659     rxi_ScheduleKeepAliveEvent(call);
5660     MUTEX_EXIT(&call->lock);
5661 }
5662
5663
5664 void
5665 rxi_ScheduleKeepAliveEvent(register struct rx_call *call)
5666 {
5667     if (!call->keepAliveEvent) {
5668         struct clock when, now;
5669         clock_GetTime(&now);
5670         when = now;
5671         when.sec += call->conn->secondsUntilPing;
5672         CALL_HOLD(call, RX_CALL_REFCOUNT_ALIVE);
5673         call->keepAliveEvent =
5674             rxevent_PostNow(&when, &now, rxi_KeepAliveEvent, call, 0);
5675     }
5676 }
5677
5678 /* N.B. rxi_KeepAliveOff:  is defined earlier as a macro */
5679 void
5680 rxi_KeepAliveOn(register struct rx_call *call)
5681 {
5682     /* Pretend last packet received was received now--i.e. if another
5683      * packet isn't received within the keep alive time, then the call
5684      * will die; Initialize last send time to the current time--even
5685      * if a packet hasn't been sent yet.  This will guarantee that a
5686      * keep-alive is sent within the ping time */
5687     call->lastReceiveTime = call->lastSendTime = clock_Sec();
5688     rxi_ScheduleKeepAliveEvent(call);
5689 }
5690
5691 /* This routine is called to send connection abort messages
5692  * that have been delayed to throttle looping clients. */
5693 void
5694 rxi_SendDelayedConnAbort(struct rxevent *event,
5695                          register struct rx_connection *conn, char *dummy)
5696 {
5697     afs_int32 error;
5698     struct rx_packet *packet;
5699
5700     MUTEX_ENTER(&conn->conn_data_lock);
5701     conn->delayedAbortEvent = NULL;
5702     error = htonl(conn->error);
5703     conn->abortCount++;
5704     MUTEX_EXIT(&conn->conn_data_lock);
5705     packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
5706     if (packet) {
5707         packet =
5708             rxi_SendSpecial((struct rx_call *)0, conn, packet,
5709                             RX_PACKET_TYPE_ABORT, (char *)&error,
5710                             sizeof(error), 0);
5711         rxi_FreePacket(packet);
5712     }
5713 }
5714
5715 /* This routine is called to send call abort messages
5716  * that have been delayed to throttle looping clients. */
5717 void
5718 rxi_SendDelayedCallAbort(struct rxevent *event, register struct rx_call *call,
5719                          char *dummy)
5720 {
5721     afs_int32 error;
5722     struct rx_packet *packet;
5723
5724     MUTEX_ENTER(&call->lock);
5725     call->delayedAbortEvent = NULL;
5726     error = htonl(call->error);
5727     call->abortCount++;
5728     packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
5729     if (packet) {
5730         packet =
5731             rxi_SendSpecial(call, call->conn, packet, RX_PACKET_TYPE_ABORT,
5732                             (char *)&error, sizeof(error), 0);
5733         rxi_FreePacket(packet);
5734     }
5735     CALL_RELE(call, RX_CALL_REFCOUNT_ABORT);
5736     MUTEX_EXIT(&call->lock);
5737 }
5738
5739 /* This routine is called periodically (every RX_AUTH_REQUEST_TIMEOUT
5740  * seconds) to ask the client to authenticate itself.  The routine
5741  * issues a challenge to the client, which is obtained from the
5742  * security object associated with the connection */
5743 void
5744 rxi_ChallengeEvent(struct rxevent *event, register struct rx_connection *conn,
5745                    void *arg1, int tries)
5746 {
5747     conn->challengeEvent = NULL;
5748     if (RXS_CheckAuthentication(conn->securityObject, conn) != 0) {
5749         register struct rx_packet *packet;
5750         struct clock when, now;
5751
5752         if (tries <= 0) {
5753             /* We've failed to authenticate for too long.
5754              * Reset any calls waiting for authentication;
5755              * they are all in RX_STATE_PRECALL.
5756              */
5757             int i;
5758
5759             MUTEX_ENTER(&conn->conn_call_lock);
5760             for (i = 0; i < RX_MAXCALLS; i++) {
5761                 struct rx_call *call = conn->call[i];
5762                 if (call) {
5763                     MUTEX_ENTER(&call->lock);
5764                     if (call->state == RX_STATE_PRECALL) {
5765                         rxi_CallError(call, RX_CALL_DEAD);
5766                         rxi_SendCallAbort(call, NULL, 0, 0);
5767                     }
5768                     MUTEX_EXIT(&call->lock);
5769                 }
5770             }
5771             MUTEX_EXIT(&conn->conn_call_lock);
5772             return;
5773         }
5774
5775         packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
5776         if (packet) {
5777             /* If there's no packet available, do this later. */
5778             RXS_GetChallenge(conn->securityObject, conn, packet);
5779             rxi_SendSpecial((struct rx_call *)0, conn, packet,
5780                             RX_PACKET_TYPE_CHALLENGE, NULL, -1, 0);
5781             rxi_FreePacket(packet);
5782         }
5783         clock_GetTime(&now);
5784         when = now;
5785         when.sec += RX_CHALLENGE_TIMEOUT;
5786         conn->challengeEvent =
5787             rxevent_PostNow2(&when, &now, rxi_ChallengeEvent, conn, 0,
5788                          (tries - 1));
5789     }
5790 }
5791
5792 /* Call this routine to start requesting the client to authenticate
5793  * itself.  This will continue until authentication is established,
5794  * the call times out, or an invalid response is returned.  The
5795  * security object associated with the connection is asked to create
5796  * the challenge at this time.  N.B.  rxi_ChallengeOff is a macro,
5797  * defined earlier. */
5798 void
5799 rxi_ChallengeOn(register struct rx_connection *conn)
5800 {
5801     if (!conn->challengeEvent) {
5802         RXS_CreateChallenge(conn->securityObject, conn);
5803         rxi_ChallengeEvent(NULL, conn, 0, RX_CHALLENGE_MAXTRIES);
5804     };
5805 }
5806
5807
5808 /* Compute round trip time of the packet provided, in *rttp.
5809  */
5810
5811 /* rxi_ComputeRoundTripTime is called with peer locked. */
5812 /* sentp and/or peer may be null */
5813 void
5814 rxi_ComputeRoundTripTime(register struct rx_packet *p,
5815                          register struct clock *sentp,
5816                          register struct rx_peer *peer)
5817 {
5818     struct clock thisRtt, *rttp = &thisRtt;
5819
5820     register int rtt_timeout;
5821
5822     clock_GetTime(rttp);
5823
5824     if (clock_Lt(rttp, sentp)) {
5825         clock_Zero(rttp);
5826         return;                 /* somebody set the clock back, don't count this time. */
5827     }
5828     clock_Sub(rttp, sentp);
5829     MUTEX_ENTER(&rx_stats_mutex);
5830     if (clock_Lt(rttp, &rx_stats.minRtt))
5831         rx_stats.minRtt = *rttp;
5832     if (clock_Gt(rttp, &rx_stats.maxRtt)) {
5833         if (rttp->sec > 60) {
5834             MUTEX_EXIT(&rx_stats_mutex);
5835             return;             /* somebody set the clock ahead */
5836         }
5837         rx_stats.maxRtt = *rttp;
5838     }
5839     clock_Add(&rx_stats.totalRtt, rttp);
5840     rx_stats.nRttSamples++;
5841     MUTEX_EXIT(&rx_stats_mutex);
5842
5843     /* better rtt calculation courtesy of UMich crew (dave,larry,peter,?) */
5844
5845     /* Apply VanJacobson round-trip estimations */
5846     if (peer->rtt) {
5847         register int delta;
5848
5849         /*
5850          * srtt (peer->rtt) is in units of one-eighth-milliseconds.
5851          * srtt is stored as fixed point with 3 bits after the binary
5852          * point (i.e., scaled by 8). The following magic is
5853          * equivalent to the smoothing algorithm in rfc793 with an
5854          * alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed point).
5855          * srtt*8 = srtt*8 + rtt - srtt
5856          * srtt = srtt + rtt/8 - srtt/8
5857          */
5858
5859         delta = MSEC(rttp) - (peer->rtt >> 3);
5860         peer->rtt += delta;
5861
5862         /*
5863          * We accumulate a smoothed rtt variance (actually, a smoothed
5864          * mean difference), then set the retransmit timer to smoothed
5865          * rtt + 4 times the smoothed variance (was 2x in van's original
5866          * paper, but 4x works better for me, and apparently for him as
5867          * well).
5868          * rttvar is stored as
5869          * fixed point with 2 bits after the binary point (scaled by
5870          * 4).  The following is equivalent to rfc793 smoothing with
5871          * an alpha of .75 (rttvar = rttvar*3/4 + |delta| / 4).  This
5872          * replaces rfc793's wired-in beta.
5873          * dev*4 = dev*4 + (|actual - expected| - dev)
5874          */
5875
5876         if (delta < 0)
5877             delta = -delta;
5878
5879         delta -= (peer->rtt_dev >> 2);
5880         peer->rtt_dev += delta;
5881     } else {
5882         /* I don't have a stored RTT so I start with this value.  Since I'm
5883          * probably just starting a call, and will be pushing more data down
5884          * this, I expect congestion to increase rapidly.  So I fudge a
5885          * little, and I set deviance to half the rtt.  In practice,
5886          * deviance tends to approach something a little less than
5887          * half the smoothed rtt. */
5888         peer->rtt = (MSEC(rttp) << 3) + 8;
5889         peer->rtt_dev = peer->rtt >> 2; /* rtt/2: they're scaled differently */
5890     }
5891     /* the timeout is RTT + 4*MDEV + 0.35 sec   This is because one end or
5892      * the other of these connections is usually in a user process, and can
5893      * be switched and/or swapped out.  So on fast, reliable networks, the
5894      * timeout would otherwise be too short.
5895      */
5896     rtt_timeout = (peer->rtt >> 3) + peer->rtt_dev + 350;
5897     clock_Zero(&(peer->timeout));
5898     clock_Addmsec(&(peer->timeout), rtt_timeout);
5899
5900     dpf(("rxi_ComputeRoundTripTime(rtt=%d ms, srtt=%d ms, rtt_dev=%d ms, timeout=%d.%0.3d sec)\n", MSEC(rttp), peer->rtt >> 3, peer->rtt_dev >> 2, (peer->timeout.sec), (peer->timeout.usec)));
5901 }
5902
5903
5904 /* Find all server connections that have not been active for a long time, and
5905  * toss them */
5906 void
5907 rxi_ReapConnections(void)
5908 {
5909     struct clock now, when;
5910     clock_GetTime(&now);
5911
5912     /* Find server connection structures that haven't been used for
5913      * greater than rx_idleConnectionTime */
5914     {
5915         struct rx_connection **conn_ptr, **conn_end;
5916         int i, havecalls = 0;
5917         MUTEX_ENTER(&rx_connHashTable_lock);
5918         for (conn_ptr = &rx_connHashTable[0], conn_end =
5919              &rx_connHashTable[rx_hashTableSize]; conn_ptr < conn_end;
5920              conn_ptr++) {
5921             struct rx_connection *conn, *next;
5922             struct rx_call *call;
5923             int result;
5924
5925           rereap:
5926             for (conn = *conn_ptr; conn; conn = next) {
5927                 /* XXX -- Shouldn't the connection be locked? */
5928                 next = conn->next;
5929                 havecalls = 0;
5930                 for (i = 0; i < RX_MAXCALLS; i++) {
5931                     call = conn->call[i];
5932                     if (call) {
5933                         havecalls = 1;
5934                         MUTEX_ENTER(&call->lock);
5935 #ifdef RX_ENABLE_LOCKS
5936                         result = rxi_CheckCall(call, 1);
5937 #else /* RX_ENABLE_LOCKS */
5938                         result = rxi_CheckCall(call);
5939 #endif /* RX_ENABLE_LOCKS */
5940                         MUTEX_EXIT(&call->lock);
5941                         if (result == -2) {
5942                             /* If CheckCall freed the call, it might
5943                              * have destroyed  the connection as well,
5944                              * which screws up the linked lists.
5945                              */
5946                             goto rereap;
5947                         }
5948                     }
5949                 }
5950                 if (conn->type == RX_SERVER_CONNECTION) {
5951                     /* This only actually destroys the connection if
5952                      * there are no outstanding calls */
5953                     MUTEX_ENTER(&conn->conn_data_lock);
5954                     if (!havecalls && !conn->refCount
5955                         && ((conn->lastSendTime + rx_idleConnectionTime) <
5956                             now.sec)) {
5957                         conn->refCount++;       /* it will be decr in rx_DestroyConn */
5958                         MUTEX_EXIT(&conn->conn_data_lock);
5959 #ifdef RX_ENABLE_LOCKS
5960                         rxi_DestroyConnectionNoLock(conn);
5961 #else /* RX_ENABLE_LOCKS */
5962                         rxi_DestroyConnection(conn);
5963 #endif /* RX_ENABLE_LOCKS */
5964                     }
5965 #ifdef RX_ENABLE_LOCKS
5966                     else {
5967                         MUTEX_EXIT(&conn->conn_data_lock);
5968                     }
5969 #endif /* RX_ENABLE_LOCKS */
5970                 }
5971             }
5972         }
5973 #ifdef RX_ENABLE_LOCKS
5974         while (rx_connCleanup_list) {
5975             struct rx_connection *conn;
5976             conn = rx_connCleanup_list;
5977             rx_connCleanup_list = rx_connCleanup_list->next;
5978             MUTEX_EXIT(&rx_connHashTable_lock);
5979             rxi_CleanupConnection(conn);
5980             MUTEX_ENTER(&rx_connHashTable_lock);
5981         }
5982         MUTEX_EXIT(&rx_connHashTable_lock);
5983 #endif /* RX_ENABLE_LOCKS */
5984     }
5985
5986     /* Find any peer structures that haven't been used (haven't had an
5987      * associated connection) for greater than rx_idlePeerTime */
5988     {
5989         struct rx_peer **peer_ptr, **peer_end;
5990         int code;
5991         MUTEX_ENTER(&rx_rpc_stats);
5992         MUTEX_ENTER(&rx_peerHashTable_lock);
5993         for (peer_ptr = &rx_peerHashTable[0], peer_end =
5994              &rx_peerHashTable[rx_hashTableSize]; peer_ptr < peer_end;
5995              peer_ptr++) {
5996             struct rx_peer *peer, *next, *prev;
5997             for (prev = peer = *peer_ptr; peer; peer = next) {
5998                 next = peer->next;
5999                 code = MUTEX_TRYENTER(&peer->peer_lock);
6000                 if ((code) && (peer->refCount == 0)
6001                     && ((peer->idleWhen + rx_idlePeerTime) < now.sec)) {
6002                     rx_interface_stat_p rpc_stat, nrpc_stat;
6003                     size_t space;
6004                     MUTEX_EXIT(&peer->peer_lock);
6005                     MUTEX_DESTROY(&peer->peer_lock);
6006                     for (queue_Scan
6007                          (&peer->rpcStats, rpc_stat, nrpc_stat,
6008                           rx_interface_stat)) {
6009                         unsigned int num_funcs;
6010                         if (!rpc_stat)
6011                             break;
6012                         queue_Remove(&rpc_stat->queue_header);
6013                         queue_Remove(&rpc_stat->all_peers);
6014                         num_funcs = rpc_stat->stats[0].func_total;
6015                         space =
6016                             sizeof(rx_interface_stat_t) +
6017                             rpc_stat->stats[0].func_total *
6018                             sizeof(rx_function_entry_v1_t);
6019
6020                         rxi_Free(rpc_stat, space);
6021                         rxi_rpc_peer_stat_cnt -= num_funcs;
6022                     }
6023                     rxi_FreePeer(peer);
6024                     rx_MutexDecrement(rx_stats.nPeerStructs, rx_stats_mutex);
6025                     if (peer == *peer_ptr) {
6026                         *peer_ptr = next;
6027                         prev = next;
6028                     } else
6029                         prev->next = next;
6030                 } else {
6031                     if (code) {
6032                         MUTEX_EXIT(&peer->peer_lock);
6033                     }
6034                     prev = peer;
6035                 }
6036             }
6037         }
6038         MUTEX_EXIT(&rx_peerHashTable_lock);
6039         MUTEX_EXIT(&rx_rpc_stats);
6040     }
6041
6042     /* THIS HACK IS A TEMPORARY HACK.  The idea is that the race condition in
6043      * rxi_AllocSendPacket, if it hits, will be handled at the next conn
6044      * GC, just below.  Really, we shouldn't have to keep moving packets from
6045      * one place to another, but instead ought to always know if we can
6046      * afford to hold onto a packet in its particular use.  */
6047     MUTEX_ENTER(&rx_freePktQ_lock);
6048     if (rx_waitingForPackets) {
6049         rx_waitingForPackets = 0;
6050 #ifdef  RX_ENABLE_LOCKS
6051         CV_BROADCAST(&rx_waitingForPackets_cv);
6052 #else
6053         osi_rxWakeup(&rx_waitingForPackets);
6054 #endif
6055     }
6056     MUTEX_EXIT(&rx_freePktQ_lock);
6057
6058     when = now;
6059     when.sec += RX_REAP_TIME;   /* Check every RX_REAP_TIME seconds */
6060     rxevent_Post(&when, rxi_ReapConnections, 0, 0);
6061 }
6062
6063
6064 /* rxs_Release - This isn't strictly necessary but, since the macro name from
6065  * rx.h is sort of strange this is better.  This is called with a security
6066  * object before it is discarded.  Each connection using a security object has
6067  * its own refcount to the object so it won't actually be freed until the last
6068  * connection is destroyed.
6069  *
6070  * This is the only rxs module call.  A hold could also be written but no one
6071  * needs it. */
6072
6073 int
6074 rxs_Release(struct rx_securityClass *aobj)
6075 {
6076     return RXS_Close(aobj);
6077 }
6078
6079 #ifdef ADAPT_WINDOW
6080 #define RXRATE_PKT_OH   (RX_HEADER_SIZE + RX_IPUDP_SIZE)
6081 #define RXRATE_SMALL_PKT    (RXRATE_PKT_OH + sizeof(struct rx_ackPacket))
6082 #define RXRATE_AVG_SMALL_PKT    (RXRATE_PKT_OH + (sizeof(struct rx_ackPacket)/2))
6083 #define RXRATE_LARGE_PKT    (RXRATE_SMALL_PKT + 256)
6084
6085 /* Adjust our estimate of the transmission rate to this peer, given
6086  * that the packet p was just acked. We can adjust peer->timeout and
6087  * call->twind. Pragmatically, this is called
6088  * only with packets of maximal length.
6089  * Called with peer and call locked.
6090  */
6091
6092 static void
6093 rxi_ComputeRate(register struct rx_peer *peer, register struct rx_call *call,
6094                 struct rx_packet *p, struct rx_packet *ackp, u_char ackReason)
6095 {
6096     afs_int32 xferSize, xferMs;
6097     register afs_int32 minTime;
6098     struct clock newTO;
6099
6100     /* Count down packets */
6101     if (peer->rateFlag > 0)
6102         peer->rateFlag--;
6103     /* Do nothing until we're enabled */
6104     if (peer->rateFlag != 0)
6105         return;
6106     if (!call->conn)
6107         return;
6108
6109     /* Count only when the ack seems legitimate */
6110     switch (ackReason) {
6111     case RX_ACK_REQUESTED:
6112         xferSize =
6113             p->length + RX_HEADER_SIZE + call->conn->securityMaxTrailerSize;
6114         xferMs = peer->rtt;
6115         break;
6116
6117     case RX_ACK_PING_RESPONSE:
6118         if (p)                  /* want the response to ping-request, not data send */
6119             return;
6120         clock_GetTime(&newTO);
6121         if (clock_Gt(&newTO, &call->pingRequestTime)) {
6122             clock_Sub(&newTO, &call->pingRequestTime);
6123             xferMs = (newTO.sec * 1000) + (newTO.usec / 1000);
6124         } else {
6125             return;
6126         }
6127         xferSize = rx_AckDataSize(rx_Window) + RX_HEADER_SIZE;
6128         break;
6129
6130     default:
6131         return;
6132     }
6133
6134     dpf(("CONG peer %lx/%u: sample (%s) size %ld, %ld ms (to %lu.%06lu, rtt %u, ps %u)", ntohl(peer->host), ntohs(peer->port), (ackReason == RX_ACK_REQUESTED ? "dataack" : "pingack"), xferSize, xferMs, peer->timeout.sec, peer->timeout.usec, peer->smRtt, peer->ifMTU));
6135
6136     /* Track only packets that are big enough. */
6137     if ((p->length + RX_HEADER_SIZE + call->conn->securityMaxTrailerSize) <
6138         peer->ifMTU)
6139         return;
6140
6141     /* absorb RTT data (in milliseconds) for these big packets */
6142     if (peer->smRtt == 0) {
6143         peer->smRtt = xferMs;
6144     } else {
6145         peer->smRtt = ((peer->smRtt * 15) + xferMs + 4) >> 4;
6146         if (!peer->smRtt)
6147             peer->smRtt = 1;
6148     }
6149
6150     if (peer->countDown) {
6151         peer->countDown--;
6152         return;
6153     }
6154     peer->countDown = 10;       /* recalculate only every so often */
6155
6156     /* In practice, we can measure only the RTT for full packets,
6157      * because of the way Rx acks the data that it receives.  (If it's
6158      * smaller than a full packet, it often gets implicitly acked
6159      * either by the call response (from a server) or by the next call
6160      * (from a client), and either case confuses transmission times
6161      * with processing times.)  Therefore, replace the above
6162      * more-sophisticated processing with a simpler version, where the
6163      * smoothed RTT is kept for full-size packets, and the time to
6164      * transmit a windowful of full-size packets is simply RTT *
6165      * windowSize. Again, we take two steps:
6166      - ensure the timeout is large enough for a single packet's RTT;
6167      - ensure that the window is small enough to fit in the desired timeout.*/
6168
6169     /* First, the timeout check. */
6170     minTime = peer->smRtt;
6171     /* Get a reasonable estimate for a timeout period */
6172     minTime += minTime;
6173     newTO.sec = minTime / 1000;
6174     newTO.usec = (minTime - (newTO.sec * 1000)) * 1000;
6175
6176     /* Increase the timeout period so that we can always do at least
6177      * one packet exchange */
6178     if (clock_Gt(&newTO, &peer->timeout)) {
6179
6180         dpf(("CONG peer %lx/%u: timeout %lu.%06lu ==> %lu.%06lu (rtt %u, ps %u)", ntohl(peer->host), ntohs(peer->port), peer->timeout.sec, peer->timeout.usec, newTO.sec, newTO.usec, peer->smRtt, peer->packetSize));
6181
6182         peer->timeout = newTO;
6183     }
6184
6185     /* Now, get an estimate for the transmit window size. */
6186     minTime = peer->timeout.sec * 1000 + (peer->timeout.usec / 1000);
6187     /* Now, convert to the number of full packets that could fit in a
6188      * reasonable fraction of that interval */
6189     minTime /= (peer->smRtt << 1);
6190     xferSize = minTime;         /* (make a copy) */
6191
6192     /* Now clamp the size to reasonable bounds. */
6193     if (minTime <= 1)
6194         minTime = 1;
6195     else if (minTime > rx_Window)
6196         minTime = rx_Window;
6197 /*    if (minTime != peer->maxWindow) {
6198       dpf(("CONG peer %lx/%u: windowsize %lu ==> %lu (to %lu.%06lu, rtt %u, ps %u)",
6199              ntohl(peer->host), ntohs(peer->port), peer->maxWindow, minTime,
6200              peer->timeout.sec, peer->timeout.usec, peer->smRtt,
6201              peer->packetSize));
6202       peer->maxWindow = minTime;
6203         elide... call->twind = minTime;
6204     }
6205 */
6206
6207     /* Cut back on the peer timeout if it had earlier grown unreasonably.
6208      * Discern this by calculating the timeout necessary for rx_Window
6209      * packets. */
6210     if ((xferSize > rx_Window) && (peer->timeout.sec >= 3)) {
6211         /* calculate estimate for transmission interval in milliseconds */
6212         minTime = rx_Window * peer->smRtt;
6213         if (minTime < 1000) {
6214             dpf(("CONG peer %lx/%u: cut TO %lu.%06lu by 0.5 (rtt %u, ps %u)",
6215                  ntohl(peer->host), ntohs(peer->port), peer->timeout.sec,
6216                  peer->timeout.usec, peer->smRtt, peer->packetSize));
6217
6218             newTO.sec = 0;      /* cut back on timeout by half a second */
6219             newTO.usec = 500000;
6220             clock_Sub(&peer->timeout, &newTO);
6221         }
6222     }
6223
6224     return;
6225 }                               /* end of rxi_ComputeRate */
6226 #endif /* ADAPT_WINDOW */
6227
6228
6229 #ifdef RXDEBUG
6230 void
6231 rxi_DebugInit(void)
6232 {
6233 #ifdef AFS_NT40_ENV
6234 #define TRACE_OPTION_DEBUGLOG 4
6235     HKEY parmKey;
6236     DWORD dummyLen;
6237     DWORD TraceOption;
6238     long code;
6239
6240     rxdebug_active = 0;
6241
6242     code = RegOpenKeyEx(HKEY_LOCAL_MACHINE, AFSREG_CLT_SVC_PARAM_SUBKEY,
6243                          0, KEY_QUERY_VALUE, &parmKey);
6244     if (code != ERROR_SUCCESS)
6245         return;
6246
6247     dummyLen = sizeof(TraceOption);
6248     code = RegQueryValueEx(parmKey, "TraceOption", NULL, NULL,
6249                            (BYTE *) &TraceOption, &dummyLen);
6250     if (code == ERROR_SUCCESS) {
6251         rxdebug_active = (TraceOption & TRACE_OPTION_DEBUGLOG) ? 1 : 0;
6252     }
6253     RegCloseKey (parmKey);
6254 #endif /* AFS_NT40_ENV */
6255 }
6256
6257 #ifdef AFS_NT40_ENV
6258 void
6259 rx_DebugOnOff(int on)
6260 {
6261     rxdebug_active = on;
6262 }
6263 #endif /* AFS_NT40_ENV */
6264
6265
6266 /* Don't call this debugging routine directly; use dpf */
6267 void
6268 rxi_DebugPrint(char *format, int a1, int a2, int a3, int a4, int a5, int a6,
6269                int a7, int a8, int a9, int a10, int a11, int a12, int a13,
6270                int a14, int a15)
6271 {
6272 #ifdef AFS_NT40_ENV
6273     char msg[512];
6274     char tformat[256];
6275     size_t len;
6276
6277     len = _snprintf(tformat, sizeof(tformat), "tid[%d] %s", GetCurrentThreadId(), format);
6278
6279     if (len > 0) {
6280         len = _snprintf(msg, sizeof(msg)-2,
6281                         tformat, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
6282                         a11, a12, a13, a14, a15);
6283         if (len > 0) {
6284             if (msg[len-1] != '\n') {
6285                 msg[len] = '\n';
6286                 msg[len+1] = '\0';
6287             }
6288             OutputDebugString(msg);
6289         }
6290     }
6291 #else
6292     struct clock now;
6293     clock_GetTime(&now);
6294     fprintf(rx_Log, " %u.%.3u:", (unsigned int)now.sec,
6295             (unsigned int)now.usec / 1000);
6296     fprintf(rx_Log, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12,
6297             a13, a14, a15);
6298     putc('\n', rx_Log);
6299 #endif
6300 }
6301
6302 /*
6303  * This function is used to process the rx_stats structure that is local
6304  * to a process as well as an rx_stats structure received from a remote
6305  * process (via rxdebug).  Therefore, it needs to do minimal version
6306  * checking.
6307  */
6308 void
6309 rx_PrintTheseStats(FILE * file, struct rx_stats *s, int size,
6310                    afs_int32 freePackets, char version)
6311 {
6312     int i;
6313
6314     if (size != sizeof(struct rx_stats)) {
6315         fprintf(file,
6316                 "Unexpected size of stats structure: was %d, expected %d\n",
6317                 size, sizeof(struct rx_stats));
6318     }
6319
6320     fprintf(file, "rx stats: free packets %d, allocs %d, ", (int)freePackets,
6321             s->packetRequests);
6322
6323     if (version >= RX_DEBUGI_VERSION_W_NEWPACKETTYPES) {
6324         fprintf(file, "alloc-failures(rcv %d/%d,send %d/%d,ack %d)\n",
6325                 s->receivePktAllocFailures, s->receiveCbufPktAllocFailures,
6326                 s->sendPktAllocFailures, s->sendCbufPktAllocFailures,
6327                 s->specialPktAllocFailures);
6328     } else {
6329         fprintf(file, "alloc-failures(rcv %d,send %d,ack %d)\n",
6330                 s->receivePktAllocFailures, s->sendPktAllocFailures,
6331                 s->specialPktAllocFailures);
6332     }
6333
6334     fprintf(file,
6335             "   greedy %d, " "bogusReads %d (last from host %x), "
6336             "noPackets %d, " "noBuffers %d, " "selects %d, "
6337             "sendSelects %d\n", s->socketGreedy, s->bogusPacketOnRead,
6338             s->bogusHost, s->noPacketOnRead, s->noPacketBuffersOnRead,
6339             s->selects, s->sendSelects);
6340
6341     fprintf(file, "   packets read: ");
6342     for (i = 0; i < RX_N_PACKET_TYPES; i++) {
6343         fprintf(file, "%s %d ", rx_packetTypes[i], s->packetsRead[i]);
6344     }
6345     fprintf(file, "\n");
6346
6347     fprintf(file,
6348             "   other read counters: data %d, " "ack %d, " "dup %d "
6349             "spurious %d " "dally %d\n", s->dataPacketsRead,
6350             s->ackPacketsRead, s->dupPacketsRead, s->spuriousPacketsRead,
6351             s->ignorePacketDally);
6352
6353     fprintf(file, "   packets sent: ");
6354     for (i = 0; i < RX_N_PACKET_TYPES; i++) {
6355         fprintf(file, "%s %d ", rx_packetTypes[i], s->packetsSent[i]);
6356     }
6357     fprintf(file, "\n");
6358
6359     fprintf(file,
6360             "   other send counters: ack %d, " "data %d (not resends), "
6361             "resends %d, " "pushed %d, " "acked&ignored %d\n",
6362             s->ackPacketsSent, s->dataPacketsSent, s->dataPacketsReSent,
6363             s->dataPacketsPushed, s->ignoreAckedPacket);
6364
6365     fprintf(file,
6366             "   \t(these should be small) sendFailed %d, " "fatalErrors %d\n",
6367             s->netSendFailures, (int)s->fatalErrors);
6368
6369     if (s->nRttSamples) {
6370         fprintf(file, "   Average rtt is %0.3f, with %d samples\n",
6371                 clock_Float(&s->totalRtt) / s->nRttSamples, s->nRttSamples);
6372
6373         fprintf(file, "   Minimum rtt is %0.3f, maximum is %0.3f\n",
6374                 clock_Float(&s->minRtt), clock_Float(&s->maxRtt));
6375     }
6376
6377     fprintf(file,
6378             "   %d server connections, " "%d client connections, "
6379             "%d peer structs, " "%d call structs, " "%d free call structs\n",
6380             s->nServerConns, s->nClientConns, s->nPeerStructs,
6381             s->nCallStructs, s->nFreeCallStructs);
6382
6383 #if     !defined(AFS_PTHREAD_ENV) && !defined(AFS_USE_GETTIMEOFDAY)
6384     fprintf(file, "   %d clock updates\n", clock_nUpdates);
6385 #endif
6386
6387 }
6388
6389 /* for backward compatibility */
6390 void
6391 rx_PrintStats(FILE * file)
6392 {
6393     MUTEX_ENTER(&rx_stats_mutex);
6394     rx_PrintTheseStats(file, &rx_stats, sizeof(rx_stats), rx_nFreePackets,
6395                        RX_DEBUGI_VERSION);
6396     MUTEX_EXIT(&rx_stats_mutex);
6397 }
6398
6399 void
6400 rx_PrintPeerStats(FILE * file, struct rx_peer *peer)
6401 {
6402     fprintf(file, "Peer %x.%d.  " "Burst size %d, " "burst wait %u.%d.\n",
6403             ntohl(peer->host), (int)peer->port, (int)peer->burstSize,
6404             (int)peer->burstWait.sec, (int)peer->burstWait.usec);
6405
6406     fprintf(file,
6407             "   Rtt %d, " "retry time %u.%06d, " "total sent %d, "
6408             "resent %d\n", peer->rtt, (int)peer->timeout.sec,
6409             (int)peer->timeout.usec, peer->nSent, peer->reSends);
6410
6411     fprintf(file,
6412             "   Packet size %d, " "max in packet skew %d, "
6413             "max out packet skew %d\n", peer->ifMTU, (int)peer->inPacketSkew,
6414             (int)peer->outPacketSkew);
6415 }
6416
6417 #ifdef AFS_PTHREAD_ENV
6418 /*
6419  * This mutex protects the following static variables:
6420  * counter
6421  */
6422
6423 #define LOCK_RX_DEBUG assert(pthread_mutex_lock(&rx_debug_mutex)==0)
6424 #define UNLOCK_RX_DEBUG assert(pthread_mutex_unlock(&rx_debug_mutex)==0)
6425 #else
6426 #define LOCK_RX_DEBUG
6427 #define UNLOCK_RX_DEBUG
6428 #endif /* AFS_PTHREAD_ENV */
6429
6430 static int
6431 MakeDebugCall(osi_socket socket, afs_uint32 remoteAddr, afs_uint16 remotePort,
6432               u_char type, void *inputData, size_t inputLength,
6433               void *outputData, size_t outputLength)
6434 {
6435     static afs_int32 counter = 100;
6436     time_t waitTime, waitCount, startTime;
6437     struct rx_header theader;
6438     char tbuffer[1500];
6439     register afs_int32 code;
6440     struct timeval tv_now, tv_wake, tv_delta;
6441     struct sockaddr_in taddr, faddr;
6442     int faddrLen;
6443     fd_set imask;
6444     register char *tp;
6445
6446     startTime = time(0);
6447     waitTime = 1;
6448     waitCount = 5;
6449     LOCK_RX_DEBUG;
6450     counter++;
6451     UNLOCK_RX_DEBUG;
6452     tp = &tbuffer[sizeof(struct rx_header)];
6453     taddr.sin_family = AF_INET;
6454     taddr.sin_port = remotePort;
6455     taddr.sin_addr.s_addr = remoteAddr;
6456 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
6457     taddr.sin_len = sizeof(struct sockaddr_in);
6458 #endif
6459     while (1) {
6460         memset(&theader, 0, sizeof(theader));
6461         theader.epoch = htonl(999);
6462         theader.cid = 0;
6463         theader.callNumber = htonl(counter);
6464         theader.seq = 0;
6465         theader.serial = 0;
6466         theader.type = type;
6467         theader.flags = RX_CLIENT_INITIATED | RX_LAST_PACKET;
6468         theader.serviceId = 0;
6469
6470         memcpy(tbuffer, &theader, sizeof(theader));
6471         memcpy(tp, inputData, inputLength);
6472         code =
6473             sendto(socket, tbuffer, inputLength + sizeof(struct rx_header), 0,
6474                    (struct sockaddr *)&taddr, sizeof(struct sockaddr_in));
6475
6476         /* see if there's a packet available */
6477         gettimeofday(&tv_wake,0);
6478         tv_wake.tv_sec += waitTime;
6479         for (;;) {
6480             FD_ZERO(&imask);
6481             FD_SET(socket, &imask);
6482             tv_delta.tv_sec = tv_wake.tv_sec;
6483             tv_delta.tv_usec = tv_wake.tv_usec;
6484             gettimeofday(&tv_now, 0);
6485
6486             if (tv_delta.tv_usec < tv_now.tv_usec) {
6487                 /* borrow */
6488                 tv_delta.tv_usec += 1000000;
6489                 tv_delta.tv_sec--;
6490             }
6491             tv_delta.tv_usec -= tv_now.tv_usec;
6492
6493             if (tv_delta.tv_sec < tv_now.tv_sec) {
6494                 /* time expired */
6495                 break;
6496             }
6497             tv_delta.tv_sec -= tv_now.tv_sec;
6498
6499             code = select(socket + 1, &imask, 0, 0, &tv_delta);
6500             if (code == 1 && FD_ISSET(socket, &imask)) {
6501                 /* now receive a packet */
6502                 faddrLen = sizeof(struct sockaddr_in);
6503                 code =
6504                     recvfrom(socket, tbuffer, sizeof(tbuffer), 0,
6505                              (struct sockaddr *)&faddr, &faddrLen);
6506
6507                 if (code > 0) {
6508                     memcpy(&theader, tbuffer, sizeof(struct rx_header));
6509                     if (counter == ntohl(theader.callNumber))
6510                         goto success;
6511                     continue;
6512                 }
6513             }
6514             break;
6515         }
6516
6517         /* see if we've timed out */
6518         if (!--waitCount) {
6519             return -1;
6520         }
6521         waitTime <<= 1;
6522     }
6523
6524  success:
6525     code -= sizeof(struct rx_header);
6526     if (code > outputLength)
6527         code = outputLength;
6528     memcpy(outputData, tp, code);
6529     return code;
6530 }
6531
6532 afs_int32
6533 rx_GetServerDebug(osi_socket socket, afs_uint32 remoteAddr,
6534                   afs_uint16 remotePort, struct rx_debugStats * stat,
6535                   afs_uint32 * supportedValues)
6536 {
6537     struct rx_debugIn in;
6538     afs_int32 rc = 0;
6539
6540     *supportedValues = 0;
6541     in.type = htonl(RX_DEBUGI_GETSTATS);
6542     in.index = 0;
6543
6544     rc = MakeDebugCall(socket, remoteAddr, remotePort, RX_PACKET_TYPE_DEBUG,
6545                        &in, sizeof(in), stat, sizeof(*stat));
6546
6547     /*
6548      * If the call was successful, fixup the version and indicate
6549      * what contents of the stat structure are valid.
6550      * Also do net to host conversion of fields here.
6551      */
6552
6553     if (rc >= 0) {
6554         if (stat->version >= RX_DEBUGI_VERSION_W_SECSTATS) {
6555             *supportedValues |= RX_SERVER_DEBUG_SEC_STATS;
6556         }
6557         if (stat->version >= RX_DEBUGI_VERSION_W_GETALLCONN) {
6558             *supportedValues |= RX_SERVER_DEBUG_ALL_CONN;
6559         }
6560         if (stat->version >= RX_DEBUGI_VERSION_W_RXSTATS) {
6561             *supportedValues |= RX_SERVER_DEBUG_RX_STATS;
6562         }
6563         if (stat->version >= RX_DEBUGI_VERSION_W_WAITERS) {
6564             *supportedValues |= RX_SERVER_DEBUG_WAITER_CNT;
6565         }
6566         if (stat->version >= RX_DEBUGI_VERSION_W_IDLETHREADS) {
6567             *supportedValues |= RX_SERVER_DEBUG_IDLE_THREADS;
6568         }
6569         if (stat->version >= RX_DEBUGI_VERSION_W_NEWPACKETTYPES) {
6570             *supportedValues |= RX_SERVER_DEBUG_NEW_PACKETS;
6571         }
6572         if (stat->version >= RX_DEBUGI_VERSION_W_GETPEER) {
6573             *supportedValues |= RX_SERVER_DEBUG_ALL_PEER;
6574         }
6575         if (stat->version >= RX_DEBUGI_VERSION_W_WAITED) {
6576             *supportedValues |= RX_SERVER_DEBUG_WAITED_CNT;
6577         }
6578
6579         stat->nFreePackets = ntohl(stat->nFreePackets);
6580         stat->packetReclaims = ntohl(stat->packetReclaims);
6581         stat->callsExecuted = ntohl(stat->callsExecuted);
6582         stat->nWaiting = ntohl(stat->nWaiting);
6583         stat->idleThreads = ntohl(stat->idleThreads);
6584     }
6585
6586     return rc;
6587 }
6588
6589 afs_int32
6590 rx_GetServerStats(osi_socket socket, afs_uint32 remoteAddr,
6591                   afs_uint16 remotePort, struct rx_stats * stat,
6592                   afs_uint32 * supportedValues)
6593 {
6594     struct rx_debugIn in;
6595     afs_int32 *lp = (afs_int32 *) stat;
6596     int i;
6597     afs_int32 rc = 0;
6598
6599     /*
6600      * supportedValues is currently unused, but added to allow future
6601      * versioning of this function.
6602      */
6603
6604     *supportedValues = 0;
6605     in.type = htonl(RX_DEBUGI_RXSTATS);
6606     in.index = 0;
6607     memset(stat, 0, sizeof(*stat));
6608
6609     rc = MakeDebugCall(socket, remoteAddr, remotePort, RX_PACKET_TYPE_DEBUG,
6610                        &in, sizeof(in), stat, sizeof(*stat));
6611
6612     if (rc >= 0) {
6613
6614         /*
6615          * Do net to host conversion here
6616          */
6617
6618         for (i = 0; i < sizeof(*stat) / sizeof(afs_int32); i++, lp++) {
6619             *lp = ntohl(*lp);
6620         }
6621     }
6622
6623     return rc;
6624 }
6625
6626 afs_int32
6627 rx_GetServerVersion(osi_socket socket, afs_uint32 remoteAddr,
6628                     afs_uint16 remotePort, size_t version_length,
6629                     char *version)
6630 {
6631     char a[1] = { 0 };
6632     return MakeDebugCall(socket, remoteAddr, remotePort,
6633                          RX_PACKET_TYPE_VERSION, a, 1, version,
6634                          version_length);
6635 }
6636
6637 afs_int32
6638 rx_GetServerConnections(osi_socket socket, afs_uint32 remoteAddr,
6639                         afs_uint16 remotePort, afs_int32 * nextConnection,
6640                         int allConnections, afs_uint32 debugSupportedValues,
6641                         struct rx_debugConn * conn,
6642                         afs_uint32 * supportedValues)
6643 {
6644     struct rx_debugIn in;
6645     afs_int32 rc = 0;
6646     int i;
6647
6648     /*
6649      * supportedValues is currently unused, but added to allow future
6650      * versioning of this function.
6651      */
6652
6653     *supportedValues = 0;
6654     if (allConnections) {
6655         in.type = htonl(RX_DEBUGI_GETALLCONN);
6656     } else {
6657         in.type = htonl(RX_DEBUGI_GETCONN);
6658     }
6659     in.index = htonl(*nextConnection);
6660     memset(conn, 0, sizeof(*conn));
6661
6662     rc = MakeDebugCall(socket, remoteAddr, remotePort, RX_PACKET_TYPE_DEBUG,
6663                        &in, sizeof(in), conn, sizeof(*conn));
6664
6665     if (rc >= 0) {
6666         *nextConnection += 1;
6667
6668         /*
6669          * Convert old connection format to new structure.
6670          */
6671
6672         if (debugSupportedValues & RX_SERVER_DEBUG_OLD_CONN) {
6673             struct rx_debugConn_vL *vL = (struct rx_debugConn_vL *)conn;
6674 #define MOVEvL(a) (conn->a = vL->a)
6675
6676             /* any old or unrecognized version... */
6677             for (i = 0; i < RX_MAXCALLS; i++) {
6678                 MOVEvL(callState[i]);
6679                 MOVEvL(callMode[i]);
6680                 MOVEvL(callFlags[i]);
6681                 MOVEvL(callOther[i]);
6682             }
6683             if (debugSupportedValues & RX_SERVER_DEBUG_SEC_STATS) {
6684                 MOVEvL(secStats.type);
6685                 MOVEvL(secStats.level);
6686                 MOVEvL(secStats.flags);
6687                 MOVEvL(secStats.expires);
6688                 MOVEvL(secStats.packetsReceived);
6689                 MOVEvL(secStats.packetsSent);
6690                 MOVEvL(secStats.bytesReceived);
6691                 MOVEvL(secStats.bytesSent);
6692             }
6693         }
6694
6695         /*
6696          * Do net to host conversion here
6697          * NOTE:
6698          *    I don't convert host or port since we are most likely
6699          *    going to want these in NBO.
6700          */
6701         conn->cid = ntohl(conn->cid);
6702         conn->serial = ntohl(conn->serial);
6703         for (i = 0; i < RX_MAXCALLS; i++) {
6704             conn->callNumber[i] = ntohl(conn->callNumber[i]);
6705         }
6706         conn->error = ntohl(conn->error);
6707         conn->secStats.flags = ntohl(conn->secStats.flags);
6708         conn->secStats.expires = ntohl(conn->secStats.expires);
6709         conn->secStats.packetsReceived =
6710             ntohl(conn->secStats.packetsReceived);
6711         conn->secStats.packetsSent = ntohl(conn->secStats.packetsSent);
6712         conn->secStats.bytesReceived = ntohl(conn->secStats.bytesReceived);
6713         conn->secStats.bytesSent = ntohl(conn->secStats.bytesSent);
6714         conn->epoch = ntohl(conn->epoch);
6715         conn->natMTU = ntohl(conn->natMTU);
6716     }
6717
6718     return rc;
6719 }
6720
6721 afs_int32
6722 rx_GetServerPeers(osi_socket socket, afs_uint32 remoteAddr,
6723                   afs_uint16 remotePort, afs_int32 * nextPeer,
6724                   afs_uint32 debugSupportedValues, struct rx_debugPeer * peer,
6725                   afs_uint32 * supportedValues)
6726 {
6727     struct rx_debugIn in;
6728     afs_int32 rc = 0;
6729
6730     /*
6731      * supportedValues is currently unused, but added to allow future
6732      * versioning of this function.
6733      */
6734
6735     *supportedValues = 0;
6736     in.type = htonl(RX_DEBUGI_GETPEER);
6737     in.index = htonl(*nextPeer);
6738     memset(peer, 0, sizeof(*peer));
6739
6740     rc = MakeDebugCall(socket, remoteAddr, remotePort, RX_PACKET_TYPE_DEBUG,
6741                        &in, sizeof(in), peer, sizeof(*peer));
6742
6743     if (rc >= 0) {
6744         *nextPeer += 1;
6745
6746         /*
6747          * Do net to host conversion here
6748          * NOTE:
6749          *    I don't convert host or port since we are most likely
6750          *    going to want these in NBO.
6751          */
6752         peer->ifMTU = ntohs(peer->ifMTU);
6753         peer->idleWhen = ntohl(peer->idleWhen);
6754         peer->refCount = ntohs(peer->refCount);
6755         peer->burstWait.sec = ntohl(peer->burstWait.sec);
6756         peer->burstWait.usec = ntohl(peer->burstWait.usec);
6757         peer->rtt = ntohl(peer->rtt);
6758         peer->rtt_dev = ntohl(peer->rtt_dev);
6759         peer->timeout.sec = ntohl(peer->timeout.sec);
6760         peer->timeout.usec = ntohl(peer->timeout.usec);
6761         peer->nSent = ntohl(peer->nSent);
6762         peer->reSends = ntohl(peer->reSends);
6763         peer->inPacketSkew = ntohl(peer->inPacketSkew);
6764         peer->outPacketSkew = ntohl(peer->outPacketSkew);
6765         peer->rateFlag = ntohl(peer->rateFlag);
6766         peer->natMTU = ntohs(peer->natMTU);
6767         peer->maxMTU = ntohs(peer->maxMTU);
6768         peer->maxDgramPackets = ntohs(peer->maxDgramPackets);
6769         peer->ifDgramPackets = ntohs(peer->ifDgramPackets);
6770         peer->MTU = ntohs(peer->MTU);
6771         peer->cwind = ntohs(peer->cwind);
6772         peer->nDgramPackets = ntohs(peer->nDgramPackets);
6773         peer->congestSeq = ntohs(peer->congestSeq);
6774         peer->bytesSent.high = ntohl(peer->bytesSent.high);
6775         peer->bytesSent.low = ntohl(peer->bytesSent.low);
6776         peer->bytesReceived.high = ntohl(peer->bytesReceived.high);
6777         peer->bytesReceived.low = ntohl(peer->bytesReceived.low);
6778     }
6779
6780     return rc;
6781 }
6782 #endif /* RXDEBUG */
6783
6784 void
6785 shutdown_rx(void)
6786 {
6787     struct rx_serverQueueEntry *np;
6788     register int i, j;
6789 #ifndef KERNEL
6790     register struct rx_call *call;
6791     register struct rx_serverQueueEntry *sq;
6792 #endif /* KERNEL */
6793
6794     LOCK_RX_INIT;
6795     if (rxinit_status == 1) {
6796         UNLOCK_RX_INIT;
6797         return;                 /* Already shutdown. */
6798     }
6799 #ifndef KERNEL
6800     rx_port = 0;
6801 #ifndef AFS_PTHREAD_ENV
6802     FD_ZERO(&rx_selectMask);
6803 #endif /* AFS_PTHREAD_ENV */
6804     rxi_dataQuota = RX_MAX_QUOTA;
6805 #ifndef AFS_PTHREAD_ENV
6806     rxi_StopListener();
6807 #endif /* AFS_PTHREAD_ENV */
6808     shutdown_rxevent();
6809     rx_SetEpoch(0);
6810 #ifndef AFS_PTHREAD_ENV
6811 #ifndef AFS_USE_GETTIMEOFDAY
6812     clock_UnInit();
6813 #endif /* AFS_USE_GETTIMEOFDAY */
6814 #endif /* AFS_PTHREAD_ENV */
6815
6816     while (!queue_IsEmpty(&rx_freeCallQueue)) {
6817         call = queue_First(&rx_freeCallQueue, rx_call);
6818         queue_Remove(call);
6819         rxi_Free(call, sizeof(struct rx_call));
6820     }
6821
6822     while (!queue_IsEmpty(&rx_idleServerQueue)) {
6823         sq = queue_First(&rx_idleServerQueue, rx_serverQueueEntry);
6824         queue_Remove(sq);
6825     }
6826 #endif /* KERNEL */
6827
6828     {
6829         struct rx_peer **peer_ptr, **peer_end;
6830         for (peer_ptr = &rx_peerHashTable[0], peer_end =
6831              &rx_peerHashTable[rx_hashTableSize]; peer_ptr < peer_end;
6832              peer_ptr++) {
6833             struct rx_peer *peer, *next;
6834             for (peer = *peer_ptr; peer; peer = next) {
6835                 rx_interface_stat_p rpc_stat, nrpc_stat;
6836                 size_t space;
6837                 for (queue_Scan
6838                      (&peer->rpcStats, rpc_stat, nrpc_stat,
6839                       rx_interface_stat)) {
6840                     unsigned int num_funcs;
6841                     if (!rpc_stat)
6842                         break;
6843                     queue_Remove(&rpc_stat->queue_header);
6844                     queue_Remove(&rpc_stat->all_peers);
6845                     num_funcs = rpc_stat->stats[0].func_total;
6846                     space =
6847                         sizeof(rx_interface_stat_t) +
6848                         rpc_stat->stats[0].func_total *
6849                         sizeof(rx_function_entry_v1_t);
6850
6851                     rxi_Free(rpc_stat, space);
6852                     MUTEX_ENTER(&rx_rpc_stats);
6853                     rxi_rpc_peer_stat_cnt -= num_funcs;
6854                     MUTEX_EXIT(&rx_rpc_stats);
6855                 }
6856                 next = peer->next;
6857                 rxi_FreePeer(peer);
6858                 rx_MutexDecrement(rx_stats.nPeerStructs, rx_stats_mutex);
6859             }
6860         }
6861     }
6862     for (i = 0; i < RX_MAX_SERVICES; i++) {
6863         if (rx_services[i])
6864             rxi_Free(rx_services[i], sizeof(*rx_services[i]));
6865     }
6866     for (i = 0; i < rx_hashTableSize; i++) {
6867         register struct rx_connection *tc, *ntc;
6868         MUTEX_ENTER(&rx_connHashTable_lock);
6869         for (tc = rx_connHashTable[i]; tc; tc = ntc) {
6870             ntc = tc->next;
6871             for (j = 0; j < RX_MAXCALLS; j++) {
6872                 if (tc->call[j]) {
6873                     rxi_Free(tc->call[j], sizeof(*tc->call[j]));
6874                 }
6875             }
6876             rxi_Free(tc, sizeof(*tc));
6877         }
6878         MUTEX_EXIT(&rx_connHashTable_lock);
6879     }
6880
6881     MUTEX_ENTER(&freeSQEList_lock);
6882
6883     while ((np = rx_FreeSQEList)) {
6884         rx_FreeSQEList = *(struct rx_serverQueueEntry **)np;
6885         MUTEX_DESTROY(&np->lock);
6886         rxi_Free(np, sizeof(*np));
6887     }
6888
6889     MUTEX_EXIT(&freeSQEList_lock);
6890     MUTEX_DESTROY(&freeSQEList_lock);
6891     MUTEX_DESTROY(&rx_freeCallQueue_lock);
6892     MUTEX_DESTROY(&rx_connHashTable_lock);
6893     MUTEX_DESTROY(&rx_peerHashTable_lock);
6894     MUTEX_DESTROY(&rx_serverPool_lock);
6895
6896     osi_Free(rx_connHashTable,
6897              rx_hashTableSize * sizeof(struct rx_connection *));
6898     osi_Free(rx_peerHashTable, rx_hashTableSize * sizeof(struct rx_peer *));
6899
6900     UNPIN(rx_connHashTable,
6901           rx_hashTableSize * sizeof(struct rx_connection *));
6902     UNPIN(rx_peerHashTable, rx_hashTableSize * sizeof(struct rx_peer *));
6903
6904     rxi_FreeAllPackets();
6905
6906     MUTEX_ENTER(&rx_stats_mutex);
6907     rxi_dataQuota = RX_MAX_QUOTA;
6908     rxi_availProcs = rxi_totalMin = rxi_minDeficit = 0;
6909     MUTEX_EXIT(&rx_stats_mutex);
6910
6911     rxinit_status = 1;
6912     UNLOCK_RX_INIT;
6913 }
6914
6915 #ifdef RX_ENABLE_LOCKS
6916 void
6917 osirx_AssertMine(afs_kmutex_t * lockaddr, char *msg)
6918 {
6919     if (!MUTEX_ISMINE(lockaddr))
6920         osi_Panic("Lock not held: %s", msg);
6921 }
6922 #endif /* RX_ENABLE_LOCKS */
6923
6924 #ifndef KERNEL
6925
6926 /*
6927  * Routines to implement connection specific data.
6928  */
6929
6930 int
6931 rx_KeyCreate(rx_destructor_t rtn)
6932 {
6933     int key;
6934     MUTEX_ENTER(&rxi_keyCreate_lock);
6935     key = rxi_keyCreate_counter++;
6936     rxi_keyCreate_destructor = (rx_destructor_t *)
6937         realloc((void *)rxi_keyCreate_destructor,
6938                 (key + 1) * sizeof(rx_destructor_t));
6939     rxi_keyCreate_destructor[key] = rtn;
6940     MUTEX_EXIT(&rxi_keyCreate_lock);
6941     return key;
6942 }
6943
6944 void
6945 rx_SetSpecific(struct rx_connection *conn, int key, void *ptr)
6946 {
6947     int i;
6948     MUTEX_ENTER(&conn->conn_data_lock);
6949     if (!conn->specific) {
6950         conn->specific = (void **)malloc((key + 1) * sizeof(void *));
6951         for (i = 0; i < key; i++)
6952             conn->specific[i] = NULL;
6953         conn->nSpecific = key + 1;
6954         conn->specific[key] = ptr;
6955     } else if (key >= conn->nSpecific) {
6956         conn->specific = (void **)
6957             realloc(conn->specific, (key + 1) * sizeof(void *));
6958         for (i = conn->nSpecific; i < key; i++)
6959             conn->specific[i] = NULL;
6960         conn->nSpecific = key + 1;
6961         conn->specific[key] = ptr;
6962     } else {
6963         if (conn->specific[key] && rxi_keyCreate_destructor[key])
6964             (*rxi_keyCreate_destructor[key]) (conn->specific[key]);
6965         conn->specific[key] = ptr;
6966     }
6967     MUTEX_EXIT(&conn->conn_data_lock);
6968 }
6969
6970 void *
6971 rx_GetSpecific(struct rx_connection *conn, int key)
6972 {
6973     void *ptr;
6974     MUTEX_ENTER(&conn->conn_data_lock);
6975     if (key >= conn->nSpecific)
6976         ptr = NULL;
6977     else
6978         ptr = conn->specific[key];
6979     MUTEX_EXIT(&conn->conn_data_lock);
6980     return ptr;
6981 }
6982
6983 #endif /* !KERNEL */
6984
6985 /*
6986  * processStats is a queue used to store the statistics for the local
6987  * process.  Its contents are similar to the contents of the rpcStats
6988  * queue on a rx_peer structure, but the actual data stored within
6989  * this queue contains totals across the lifetime of the process (assuming
6990  * the stats have not been reset) - unlike the per peer structures
6991  * which can come and go based upon the peer lifetime.
6992  */
6993
6994 static struct rx_queue processStats = { &processStats, &processStats };
6995
6996 /*
6997  * peerStats is a queue used to store the statistics for all peer structs.
6998  * Its contents are the union of all the peer rpcStats queues.
6999  */
7000
7001 static struct rx_queue peerStats = { &peerStats, &peerStats };
7002
7003 /*
7004  * rxi_monitor_processStats is used to turn process wide stat collection
7005  * on and off
7006  */
7007
7008 static int rxi_monitor_processStats = 0;
7009
7010 /*
7011  * rxi_monitor_peerStats is used to turn per peer stat collection on and off
7012  */
7013
7014 static int rxi_monitor_peerStats = 0;
7015
7016 /*
7017  * rxi_AddRpcStat - given all of the information for a particular rpc
7018  * call, create (if needed) and update the stat totals for the rpc.
7019  *
7020  * PARAMETERS
7021  *
7022  * IN stats - the queue of stats that will be updated with the new value
7023  *
7024  * IN rxInterface - a unique number that identifies the rpc interface
7025  *
7026  * IN currentFunc - the index of the function being invoked
7027  *
7028  * IN totalFunc - the total number of functions in this interface
7029  *
7030  * IN queueTime - the amount of time this function waited for a thread
7031  *
7032  * IN execTime - the amount of time this function invocation took to execute
7033  *
7034  * IN bytesSent - the number bytes sent by this invocation
7035  *
7036  * IN bytesRcvd - the number bytes received by this invocation
7037  *
7038  * IN isServer - if true, this invocation was made to a server
7039  *
7040  * IN remoteHost - the ip address of the remote host
7041  *
7042  * IN remotePort - the port of the remote host
7043  *
7044  * IN addToPeerList - if != 0, add newly created stat to the global peer list
7045  *
7046  * INOUT counter - if a new stats structure is allocated, the counter will
7047  * be updated with the new number of allocated stat structures
7048  *
7049  * RETURN CODES
7050  *
7051  * Returns void.
7052  */
7053
7054 static int
7055 rxi_AddRpcStat(struct rx_queue *stats, afs_uint32 rxInterface,
7056                afs_uint32 currentFunc, afs_uint32 totalFunc,
7057                struct clock *queueTime, struct clock *execTime,
7058                afs_hyper_t * bytesSent, afs_hyper_t * bytesRcvd, int isServer,
7059                afs_uint32 remoteHost, afs_uint32 remotePort,
7060                int addToPeerList, unsigned int *counter)
7061 {
7062     int rc = 0;
7063     rx_interface_stat_p rpc_stat, nrpc_stat;
7064
7065     /*
7066      * See if there's already a structure for this interface
7067      */
7068
7069     for (queue_Scan(stats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7070         if ((rpc_stat->stats[0].interfaceId == rxInterface)
7071             && (rpc_stat->stats[0].remote_is_server == isServer))
7072             break;
7073     }
7074
7075     /*
7076      * Didn't find a match so allocate a new structure and add it to the
7077      * queue.
7078      */
7079
7080     if (queue_IsEnd(stats, rpc_stat) || (rpc_stat == NULL)
7081         || (rpc_stat->stats[0].interfaceId != rxInterface)
7082         || (rpc_stat->stats[0].remote_is_server != isServer)) {
7083         int i;
7084         size_t space;
7085
7086         space =
7087             sizeof(rx_interface_stat_t) +
7088             totalFunc * sizeof(rx_function_entry_v1_t);
7089
7090         rpc_stat = (rx_interface_stat_p) rxi_Alloc(space);
7091         if (rpc_stat == NULL) {
7092             rc = 1;
7093             goto fail;
7094         }
7095         *counter += totalFunc;
7096         for (i = 0; i < totalFunc; i++) {
7097             rpc_stat->stats[i].remote_peer = remoteHost;
7098             rpc_stat->stats[i].remote_port = remotePort;
7099             rpc_stat->stats[i].remote_is_server = isServer;
7100             rpc_stat->stats[i].interfaceId = rxInterface;
7101             rpc_stat->stats[i].func_total = totalFunc;
7102             rpc_stat->stats[i].func_index = i;
7103             hzero(rpc_stat->stats[i].invocations);
7104             hzero(rpc_stat->stats[i].bytes_sent);
7105             hzero(rpc_stat->stats[i].bytes_rcvd);
7106             rpc_stat->stats[i].queue_time_sum.sec = 0;
7107             rpc_stat->stats[i].queue_time_sum.usec = 0;
7108             rpc_stat->stats[i].queue_time_sum_sqr.sec = 0;
7109             rpc_stat->stats[i].queue_time_sum_sqr.usec = 0;
7110             rpc_stat->stats[i].queue_time_min.sec = 9999999;
7111             rpc_stat->stats[i].queue_time_min.usec = 9999999;
7112             rpc_stat->stats[i].queue_time_max.sec = 0;
7113             rpc_stat->stats[i].queue_time_max.usec = 0;
7114             rpc_stat->stats[i].execution_time_sum.sec = 0;
7115             rpc_stat->stats[i].execution_time_sum.usec = 0;
7116             rpc_stat->stats[i].execution_time_sum_sqr.sec = 0;
7117             rpc_stat->stats[i].execution_time_sum_sqr.usec = 0;
7118             rpc_stat->stats[i].execution_time_min.sec = 9999999;
7119             rpc_stat->stats[i].execution_time_min.usec = 9999999;
7120             rpc_stat->stats[i].execution_time_max.sec = 0;
7121             rpc_stat->stats[i].execution_time_max.usec = 0;
7122         }
7123         queue_Prepend(stats, rpc_stat);
7124         if (addToPeerList) {
7125             queue_Prepend(&peerStats, &rpc_stat->all_peers);
7126         }
7127     }
7128
7129     /*
7130      * Increment the stats for this function
7131      */
7132
7133     hadd32(rpc_stat->stats[currentFunc].invocations, 1);
7134     hadd(rpc_stat->stats[currentFunc].bytes_sent, *bytesSent);
7135     hadd(rpc_stat->stats[currentFunc].bytes_rcvd, *bytesRcvd);
7136     clock_Add(&rpc_stat->stats[currentFunc].queue_time_sum, queueTime);
7137     clock_AddSq(&rpc_stat->stats[currentFunc].queue_time_sum_sqr, queueTime);
7138     if (clock_Lt(queueTime, &rpc_stat->stats[currentFunc].queue_time_min)) {
7139         rpc_stat->stats[currentFunc].queue_time_min = *queueTime;
7140     }
7141     if (clock_Gt(queueTime, &rpc_stat->stats[currentFunc].queue_time_max)) {
7142         rpc_stat->stats[currentFunc].queue_time_max = *queueTime;
7143     }
7144     clock_Add(&rpc_stat->stats[currentFunc].execution_time_sum, execTime);
7145     clock_AddSq(&rpc_stat->stats[currentFunc].execution_time_sum_sqr,
7146                 execTime);
7147     if (clock_Lt(execTime, &rpc_stat->stats[currentFunc].execution_time_min)) {
7148         rpc_stat->stats[currentFunc].execution_time_min = *execTime;
7149     }
7150     if (clock_Gt(execTime, &rpc_stat->stats[currentFunc].execution_time_max)) {
7151         rpc_stat->stats[currentFunc].execution_time_max = *execTime;
7152     }
7153
7154   fail:
7155     return rc;
7156 }
7157
7158 /*
7159  * rx_IncrementTimeAndCount - increment the times and count for a particular
7160  * rpc function.
7161  *
7162  * PARAMETERS
7163  *
7164  * IN peer - the peer who invoked the rpc
7165  *
7166  * IN rxInterface - a unique number that identifies the rpc interface
7167  *
7168  * IN currentFunc - the index of the function being invoked
7169  *
7170  * IN totalFunc - the total number of functions in this interface
7171  *
7172  * IN queueTime - the amount of time this function waited for a thread
7173  *
7174  * IN execTime - the amount of time this function invocation took to execute
7175  *
7176  * IN bytesSent - the number bytes sent by this invocation
7177  *
7178  * IN bytesRcvd - the number bytes received by this invocation
7179  *
7180  * IN isServer - if true, this invocation was made to a server
7181  *
7182  * RETURN CODES
7183  *
7184  * Returns void.
7185  */
7186
7187 void
7188 rx_IncrementTimeAndCount(struct rx_peer *peer, afs_uint32 rxInterface,
7189                          afs_uint32 currentFunc, afs_uint32 totalFunc,
7190                          struct clock *queueTime, struct clock *execTime,
7191                          afs_hyper_t * bytesSent, afs_hyper_t * bytesRcvd,
7192                          int isServer)
7193 {
7194
7195     if (!(rxi_monitor_peerStats || rxi_monitor_processStats))
7196         return;
7197
7198     MUTEX_ENTER(&rx_rpc_stats);
7199     MUTEX_ENTER(&peer->peer_lock);
7200
7201     if (rxi_monitor_peerStats) {
7202         rxi_AddRpcStat(&peer->rpcStats, rxInterface, currentFunc, totalFunc,
7203                        queueTime, execTime, bytesSent, bytesRcvd, isServer,
7204                        peer->host, peer->port, 1, &rxi_rpc_peer_stat_cnt);
7205     }
7206
7207     if (rxi_monitor_processStats) {
7208         rxi_AddRpcStat(&processStats, rxInterface, currentFunc, totalFunc,
7209                        queueTime, execTime, bytesSent, bytesRcvd, isServer,
7210                        0xffffffff, 0xffffffff, 0, &rxi_rpc_process_stat_cnt);
7211     }
7212
7213     MUTEX_EXIT(&peer->peer_lock);
7214     MUTEX_EXIT(&rx_rpc_stats);
7215
7216 }
7217
7218 /*
7219  * rx_MarshallProcessRPCStats - marshall an array of rpc statistics
7220  *
7221  * PARAMETERS
7222  *
7223  * IN callerVersion - the rpc stat version of the caller.
7224  *
7225  * IN count - the number of entries to marshall.
7226  *
7227  * IN stats - pointer to stats to be marshalled.
7228  *
7229  * OUT ptr - Where to store the marshalled data.
7230  *
7231  * RETURN CODES
7232  *
7233  * Returns void.
7234  */
7235 void
7236 rx_MarshallProcessRPCStats(afs_uint32 callerVersion, int count,
7237                            rx_function_entry_v1_t * stats, afs_uint32 ** ptrP)
7238 {
7239     int i;
7240     afs_uint32 *ptr;
7241
7242     /*
7243      * We only support the first version
7244      */
7245     for (ptr = *ptrP, i = 0; i < count; i++, stats++) {
7246         *(ptr++) = stats->remote_peer;
7247         *(ptr++) = stats->remote_port;
7248         *(ptr++) = stats->remote_is_server;
7249         *(ptr++) = stats->interfaceId;
7250         *(ptr++) = stats->func_total;
7251         *(ptr++) = stats->func_index;
7252         *(ptr++) = hgethi(stats->invocations);
7253         *(ptr++) = hgetlo(stats->invocations);
7254         *(ptr++) = hgethi(stats->bytes_sent);
7255         *(ptr++) = hgetlo(stats->bytes_sent);
7256         *(ptr++) = hgethi(stats->bytes_rcvd);
7257         *(ptr++) = hgetlo(stats->bytes_rcvd);
7258         *(ptr++) = stats->queue_time_sum.sec;
7259         *(ptr++) = stats->queue_time_sum.usec;
7260         *(ptr++) = stats->queue_time_sum_sqr.sec;
7261         *(ptr++) = stats->queue_time_sum_sqr.usec;
7262         *(ptr++) = stats->queue_time_min.sec;
7263         *(ptr++) = stats->queue_time_min.usec;
7264         *(ptr++) = stats->queue_time_max.sec;
7265         *(ptr++) = stats->queue_time_max.usec;
7266         *(ptr++) = stats->execution_time_sum.sec;
7267         *(ptr++) = stats->execution_time_sum.usec;
7268         *(ptr++) = stats->execution_time_sum_sqr.sec;
7269         *(ptr++) = stats->execution_time_sum_sqr.usec;
7270         *(ptr++) = stats->execution_time_min.sec;
7271         *(ptr++) = stats->execution_time_min.usec;
7272         *(ptr++) = stats->execution_time_max.sec;
7273         *(ptr++) = stats->execution_time_max.usec;
7274     }
7275     *ptrP = ptr;
7276 }
7277
7278 /*
7279  * rx_RetrieveProcessRPCStats - retrieve all of the rpc statistics for
7280  * this process
7281  *
7282  * PARAMETERS
7283  *
7284  * IN callerVersion - the rpc stat version of the caller
7285  *
7286  * OUT myVersion - the rpc stat version of this function
7287  *
7288  * OUT clock_sec - local time seconds
7289  *
7290  * OUT clock_usec - local time microseconds
7291  *
7292  * OUT allocSize - the number of bytes allocated to contain stats
7293  *
7294  * OUT statCount - the number stats retrieved from this process.
7295  *
7296  * OUT stats - the actual stats retrieved from this process.
7297  *
7298  * RETURN CODES
7299  *
7300  * Returns void.  If successful, stats will != NULL.
7301  */
7302
7303 int
7304 rx_RetrieveProcessRPCStats(afs_uint32 callerVersion, afs_uint32 * myVersion,
7305                            afs_uint32 * clock_sec, afs_uint32 * clock_usec,
7306                            size_t * allocSize, afs_uint32 * statCount,
7307                            afs_uint32 ** stats)
7308 {
7309     size_t space = 0;
7310     afs_uint32 *ptr;
7311     struct clock now;
7312     int rc = 0;
7313
7314     *stats = 0;
7315     *allocSize = 0;
7316     *statCount = 0;
7317     *myVersion = RX_STATS_RETRIEVAL_VERSION;
7318
7319     /*
7320      * Check to see if stats are enabled
7321      */
7322
7323     MUTEX_ENTER(&rx_rpc_stats);
7324     if (!rxi_monitor_processStats) {
7325         MUTEX_EXIT(&rx_rpc_stats);
7326         return rc;
7327     }
7328
7329     clock_GetTime(&now);
7330     *clock_sec = now.sec;
7331     *clock_usec = now.usec;
7332
7333     /*
7334      * Allocate the space based upon the caller version
7335      *
7336      * If the client is at an older version than we are,
7337      * we return the statistic data in the older data format, but
7338      * we still return our version number so the client knows we
7339      * are maintaining more data than it can retrieve.
7340      */
7341
7342     if (callerVersion >= RX_STATS_RETRIEVAL_FIRST_EDITION) {
7343         space = rxi_rpc_process_stat_cnt * sizeof(rx_function_entry_v1_t);
7344         *statCount = rxi_rpc_process_stat_cnt;
7345     } else {
7346         /*
7347          * This can't happen yet, but in the future version changes
7348          * can be handled by adding additional code here
7349          */
7350     }
7351
7352     if (space > (size_t) 0) {
7353         *allocSize = space;
7354         ptr = *stats = (afs_uint32 *) rxi_Alloc(space);
7355
7356         if (ptr != NULL) {
7357             rx_interface_stat_p rpc_stat, nrpc_stat;
7358
7359
7360             for (queue_Scan
7361                  (&processStats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7362                 /*
7363                  * Copy the data based upon the caller version
7364                  */
7365                 rx_MarshallProcessRPCStats(callerVersion,
7366                                            rpc_stat->stats[0].func_total,
7367                                            rpc_stat->stats, &ptr);
7368             }
7369         } else {
7370             rc = ENOMEM;
7371         }
7372     }
7373     MUTEX_EXIT(&rx_rpc_stats);
7374     return rc;
7375 }
7376
7377 /*
7378  * rx_RetrievePeerRPCStats - retrieve all of the rpc statistics for the peers
7379  *
7380  * PARAMETERS
7381  *
7382  * IN callerVersion - the rpc stat version of the caller
7383  *
7384  * OUT myVersion - the rpc stat version of this function
7385  *
7386  * OUT clock_sec - local time seconds
7387  *
7388  * OUT clock_usec - local time microseconds
7389  *
7390  * OUT allocSize - the number of bytes allocated to contain stats
7391  *
7392  * OUT statCount - the number of stats retrieved from the individual
7393  * peer structures.
7394  *
7395  * OUT stats - the actual stats retrieved from the individual peer structures.
7396  *
7397  * RETURN CODES
7398  *
7399  * Returns void.  If successful, stats will != NULL.
7400  */
7401
7402 int
7403 rx_RetrievePeerRPCStats(afs_uint32 callerVersion, afs_uint32 * myVersion,
7404                         afs_uint32 * clock_sec, afs_uint32 * clock_usec,
7405                         size_t * allocSize, afs_uint32 * statCount,
7406                         afs_uint32 ** stats)
7407 {
7408     size_t space = 0;
7409     afs_uint32 *ptr;
7410     struct clock now;
7411     int rc = 0;
7412
7413     *stats = 0;
7414     *statCount = 0;
7415     *allocSize = 0;
7416     *myVersion = RX_STATS_RETRIEVAL_VERSION;
7417
7418     /*
7419      * Check to see if stats are enabled
7420      */
7421
7422     MUTEX_ENTER(&rx_rpc_stats);
7423     if (!rxi_monitor_peerStats) {
7424         MUTEX_EXIT(&rx_rpc_stats);
7425         return rc;
7426     }
7427
7428     clock_GetTime(&now);
7429     *clock_sec = now.sec;
7430     *clock_usec = now.usec;
7431
7432     /*
7433      * Allocate the space based upon the caller version
7434      *
7435      * If the client is at an older version than we are,
7436      * we return the statistic data in the older data format, but
7437      * we still return our version number so the client knows we
7438      * are maintaining more data than it can retrieve.
7439      */
7440
7441     if (callerVersion >= RX_STATS_RETRIEVAL_FIRST_EDITION) {
7442         space = rxi_rpc_peer_stat_cnt * sizeof(rx_function_entry_v1_t);
7443         *statCount = rxi_rpc_peer_stat_cnt;
7444     } else {
7445         /*
7446          * This can't happen yet, but in the future version changes
7447          * can be handled by adding additional code here
7448          */
7449     }
7450
7451     if (space > (size_t) 0) {
7452         *allocSize = space;
7453         ptr = *stats = (afs_uint32 *) rxi_Alloc(space);
7454
7455         if (ptr != NULL) {
7456             rx_interface_stat_p rpc_stat, nrpc_stat;
7457             char *fix_offset;
7458
7459             for (queue_Scan
7460                  (&peerStats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7461                 /*
7462                  * We have to fix the offset of rpc_stat since we are
7463                  * keeping this structure on two rx_queues.  The rx_queue
7464                  * package assumes that the rx_queue member is the first
7465                  * member of the structure.  That is, rx_queue assumes that
7466                  * any one item is only on one queue at a time.  We are
7467                  * breaking that assumption and so we have to do a little
7468                  * math to fix our pointers.
7469                  */
7470
7471                 fix_offset = (char *)rpc_stat;
7472                 fix_offset -= offsetof(rx_interface_stat_t, all_peers);
7473                 rpc_stat = (rx_interface_stat_p) fix_offset;
7474
7475                 /*
7476                  * Copy the data based upon the caller version
7477                  */
7478                 rx_MarshallProcessRPCStats(callerVersion,
7479                                            rpc_stat->stats[0].func_total,
7480                                            rpc_stat->stats, &ptr);
7481             }
7482         } else {
7483             rc = ENOMEM;
7484         }
7485     }
7486     MUTEX_EXIT(&rx_rpc_stats);
7487     return rc;
7488 }
7489
7490 /*
7491  * rx_FreeRPCStats - free memory allocated by
7492  *                   rx_RetrieveProcessRPCStats and rx_RetrievePeerRPCStats
7493  *
7494  * PARAMETERS
7495  *
7496  * IN stats - stats previously returned by rx_RetrieveProcessRPCStats or
7497  * rx_RetrievePeerRPCStats
7498  *
7499  * IN allocSize - the number of bytes in stats.
7500  *
7501  * RETURN CODES
7502  *
7503  * Returns void.
7504  */
7505
7506 void
7507 rx_FreeRPCStats(afs_uint32 * stats, size_t allocSize)
7508 {
7509     rxi_Free(stats, allocSize);
7510 }
7511
7512 /*
7513  * rx_queryProcessRPCStats - see if process rpc stat collection is
7514  * currently enabled.
7515  *
7516  * PARAMETERS
7517  *
7518  * RETURN CODES
7519  *
7520  * Returns 0 if stats are not enabled != 0 otherwise
7521  */
7522
7523 int
7524 rx_queryProcessRPCStats(void)
7525 {
7526     int rc;
7527     MUTEX_ENTER(&rx_rpc_stats);
7528     rc = rxi_monitor_processStats;
7529     MUTEX_EXIT(&rx_rpc_stats);
7530     return rc;
7531 }
7532
7533 /*
7534  * rx_queryPeerRPCStats - see if peer stat collection is currently enabled.
7535  *
7536  * PARAMETERS
7537  *
7538  * RETURN CODES
7539  *
7540  * Returns 0 if stats are not enabled != 0 otherwise
7541  */
7542
7543 int
7544 rx_queryPeerRPCStats(void)
7545 {
7546     int rc;
7547     MUTEX_ENTER(&rx_rpc_stats);
7548     rc = rxi_monitor_peerStats;
7549     MUTEX_EXIT(&rx_rpc_stats);
7550     return rc;
7551 }
7552
7553 /*
7554  * rx_enableProcessRPCStats - begin rpc stat collection for entire process
7555  *
7556  * PARAMETERS
7557  *
7558  * RETURN CODES
7559  *
7560  * Returns void.
7561  */
7562
7563 void
7564 rx_enableProcessRPCStats(void)
7565 {
7566     MUTEX_ENTER(&rx_rpc_stats);
7567     rx_enable_stats = 1;
7568     rxi_monitor_processStats = 1;
7569     MUTEX_EXIT(&rx_rpc_stats);
7570 }
7571
7572 /*
7573  * rx_enablePeerRPCStats - begin rpc stat collection per peer structure
7574  *
7575  * PARAMETERS
7576  *
7577  * RETURN CODES
7578  *
7579  * Returns void.
7580  */
7581
7582 void
7583 rx_enablePeerRPCStats(void)
7584 {
7585     MUTEX_ENTER(&rx_rpc_stats);
7586     rx_enable_stats = 1;
7587     rxi_monitor_peerStats = 1;
7588     MUTEX_EXIT(&rx_rpc_stats);
7589 }
7590
7591 /*
7592  * rx_disableProcessRPCStats - stop rpc stat collection for entire process
7593  *
7594  * PARAMETERS
7595  *
7596  * RETURN CODES
7597  *
7598  * Returns void.
7599  */
7600
7601 void
7602 rx_disableProcessRPCStats(void)
7603 {
7604     rx_interface_stat_p rpc_stat, nrpc_stat;
7605     size_t space;
7606
7607     MUTEX_ENTER(&rx_rpc_stats);
7608
7609     /*
7610      * Turn off process statistics and if peer stats is also off, turn
7611      * off everything
7612      */
7613
7614     rxi_monitor_processStats = 0;
7615     if (rxi_monitor_peerStats == 0) {
7616         rx_enable_stats = 0;
7617     }
7618
7619     for (queue_Scan(&processStats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7620         unsigned int num_funcs = 0;
7621         if (!rpc_stat)
7622             break;
7623         queue_Remove(rpc_stat);
7624         num_funcs = rpc_stat->stats[0].func_total;
7625         space =
7626             sizeof(rx_interface_stat_t) +
7627             rpc_stat->stats[0].func_total * sizeof(rx_function_entry_v1_t);
7628
7629         rxi_Free(rpc_stat, space);
7630         rxi_rpc_process_stat_cnt -= num_funcs;
7631     }
7632     MUTEX_EXIT(&rx_rpc_stats);
7633 }
7634
7635 /*
7636  * rx_disablePeerRPCStats - stop rpc stat collection for peers
7637  *
7638  * PARAMETERS
7639  *
7640  * RETURN CODES
7641  *
7642  * Returns void.
7643  */
7644
7645 void
7646 rx_disablePeerRPCStats(void)
7647 {
7648     struct rx_peer **peer_ptr, **peer_end;
7649     int code;
7650
7651     MUTEX_ENTER(&rx_rpc_stats);
7652
7653     /*
7654      * Turn off peer statistics and if process stats is also off, turn
7655      * off everything
7656      */
7657
7658     rxi_monitor_peerStats = 0;
7659     if (rxi_monitor_processStats == 0) {
7660         rx_enable_stats = 0;
7661     }
7662
7663     MUTEX_ENTER(&rx_peerHashTable_lock);
7664     for (peer_ptr = &rx_peerHashTable[0], peer_end =
7665          &rx_peerHashTable[rx_hashTableSize]; peer_ptr < peer_end;
7666          peer_ptr++) {
7667         struct rx_peer *peer, *next, *prev;
7668         for (prev = peer = *peer_ptr; peer; peer = next) {
7669             next = peer->next;
7670             code = MUTEX_TRYENTER(&peer->peer_lock);
7671             if (code) {
7672                 rx_interface_stat_p rpc_stat, nrpc_stat;
7673                 size_t space;
7674                 for (queue_Scan
7675                      (&peer->rpcStats, rpc_stat, nrpc_stat,
7676                       rx_interface_stat)) {
7677                     unsigned int num_funcs = 0;
7678                     if (!rpc_stat)
7679                         break;
7680                     queue_Remove(&rpc_stat->queue_header);
7681                     queue_Remove(&rpc_stat->all_peers);
7682                     num_funcs = rpc_stat->stats[0].func_total;
7683                     space =
7684                         sizeof(rx_interface_stat_t) +
7685                         rpc_stat->stats[0].func_total *
7686                         sizeof(rx_function_entry_v1_t);
7687
7688                     rxi_Free(rpc_stat, space);
7689                     rxi_rpc_peer_stat_cnt -= num_funcs;
7690                 }
7691                 MUTEX_EXIT(&peer->peer_lock);
7692                 if (prev == *peer_ptr) {
7693                     *peer_ptr = next;
7694                     prev = next;
7695                 } else
7696                     prev->next = next;
7697             } else {
7698                 prev = peer;
7699             }
7700         }
7701     }
7702     MUTEX_EXIT(&rx_peerHashTable_lock);
7703     MUTEX_EXIT(&rx_rpc_stats);
7704 }
7705
7706 /*
7707  * rx_clearProcessRPCStats - clear the contents of the rpc stats according
7708  * to clearFlag
7709  *
7710  * PARAMETERS
7711  *
7712  * IN clearFlag - flag indicating which stats to clear
7713  *
7714  * RETURN CODES
7715  *
7716  * Returns void.
7717  */
7718
7719 void
7720 rx_clearProcessRPCStats(afs_uint32 clearFlag)
7721 {
7722     rx_interface_stat_p rpc_stat, nrpc_stat;
7723
7724     MUTEX_ENTER(&rx_rpc_stats);
7725
7726     for (queue_Scan(&processStats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7727         unsigned int num_funcs = 0, i;
7728         num_funcs = rpc_stat->stats[0].func_total;
7729         for (i = 0; i < num_funcs; i++) {
7730             if (clearFlag & AFS_RX_STATS_CLEAR_INVOCATIONS) {
7731                 hzero(rpc_stat->stats[i].invocations);
7732             }
7733             if (clearFlag & AFS_RX_STATS_CLEAR_BYTES_SENT) {
7734                 hzero(rpc_stat->stats[i].bytes_sent);
7735             }
7736             if (clearFlag & AFS_RX_STATS_CLEAR_BYTES_RCVD) {
7737                 hzero(rpc_stat->stats[i].bytes_rcvd);
7738             }
7739             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_SUM) {
7740                 rpc_stat->stats[i].queue_time_sum.sec = 0;
7741                 rpc_stat->stats[i].queue_time_sum.usec = 0;
7742             }
7743             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_SQUARE) {
7744                 rpc_stat->stats[i].queue_time_sum_sqr.sec = 0;
7745                 rpc_stat->stats[i].queue_time_sum_sqr.usec = 0;
7746             }
7747             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_MIN) {
7748                 rpc_stat->stats[i].queue_time_min.sec = 9999999;
7749                 rpc_stat->stats[i].queue_time_min.usec = 9999999;
7750             }
7751             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_MAX) {
7752                 rpc_stat->stats[i].queue_time_max.sec = 0;
7753                 rpc_stat->stats[i].queue_time_max.usec = 0;
7754             }
7755             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_SUM) {
7756                 rpc_stat->stats[i].execution_time_sum.sec = 0;
7757                 rpc_stat->stats[i].execution_time_sum.usec = 0;
7758             }
7759             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_SQUARE) {
7760                 rpc_stat->stats[i].execution_time_sum_sqr.sec = 0;
7761                 rpc_stat->stats[i].execution_time_sum_sqr.usec = 0;
7762             }
7763             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_MIN) {
7764                 rpc_stat->stats[i].execution_time_min.sec = 9999999;
7765                 rpc_stat->stats[i].execution_time_min.usec = 9999999;
7766             }
7767             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_MAX) {
7768                 rpc_stat->stats[i].execution_time_max.sec = 0;
7769                 rpc_stat->stats[i].execution_time_max.usec = 0;
7770             }
7771         }
7772     }
7773
7774     MUTEX_EXIT(&rx_rpc_stats);
7775 }
7776
7777 /*
7778  * rx_clearPeerRPCStats - clear the contents of the rpc stats according
7779  * to clearFlag
7780  *
7781  * PARAMETERS
7782  *
7783  * IN clearFlag - flag indicating which stats to clear
7784  *
7785  * RETURN CODES
7786  *
7787  * Returns void.
7788  */
7789
7790 void
7791 rx_clearPeerRPCStats(afs_uint32 clearFlag)
7792 {
7793     rx_interface_stat_p rpc_stat, nrpc_stat;
7794
7795     MUTEX_ENTER(&rx_rpc_stats);
7796
7797     for (queue_Scan(&peerStats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7798         unsigned int num_funcs = 0, i;
7799         char *fix_offset;
7800         /*
7801          * We have to fix the offset of rpc_stat since we are
7802          * keeping this structure on two rx_queues.  The rx_queue
7803          * package assumes that the rx_queue member is the first
7804          * member of the structure.  That is, rx_queue assumes that
7805          * any one item is only on one queue at a time.  We are
7806          * breaking that assumption and so we have to do a little
7807          * math to fix our pointers.
7808          */
7809
7810         fix_offset = (char *)rpc_stat;
7811         fix_offset -= offsetof(rx_interface_stat_t, all_peers);
7812         rpc_stat = (rx_interface_stat_p) fix_offset;
7813
7814         num_funcs = rpc_stat->stats[0].func_total;
7815         for (i = 0; i < num_funcs; i++) {
7816             if (clearFlag & AFS_RX_STATS_CLEAR_INVOCATIONS) {
7817                 hzero(rpc_stat->stats[i].invocations);
7818             }
7819             if (clearFlag & AFS_RX_STATS_CLEAR_BYTES_SENT) {
7820                 hzero(rpc_stat->stats[i].bytes_sent);
7821             }
7822             if (clearFlag & AFS_RX_STATS_CLEAR_BYTES_RCVD) {
7823                 hzero(rpc_stat->stats[i].bytes_rcvd);
7824             }
7825             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_SUM) {
7826                 rpc_stat->stats[i].queue_time_sum.sec = 0;
7827                 rpc_stat->stats[i].queue_time_sum.usec = 0;
7828             }
7829             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_SQUARE) {
7830                 rpc_stat->stats[i].queue_time_sum_sqr.sec = 0;
7831                 rpc_stat->stats[i].queue_time_sum_sqr.usec = 0;
7832             }
7833             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_MIN) {
7834                 rpc_stat->stats[i].queue_time_min.sec = 9999999;
7835                 rpc_stat->stats[i].queue_time_min.usec = 9999999;
7836             }
7837             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_MAX) {
7838                 rpc_stat->stats[i].queue_time_max.sec = 0;
7839                 rpc_stat->stats[i].queue_time_max.usec = 0;
7840             }
7841             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_SUM) {
7842                 rpc_stat->stats[i].execution_time_sum.sec = 0;
7843                 rpc_stat->stats[i].execution_time_sum.usec = 0;
7844             }
7845             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_SQUARE) {
7846                 rpc_stat->stats[i].execution_time_sum_sqr.sec = 0;
7847                 rpc_stat->stats[i].execution_time_sum_sqr.usec = 0;
7848             }
7849             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_MIN) {
7850                 rpc_stat->stats[i].execution_time_min.sec = 9999999;
7851                 rpc_stat->stats[i].execution_time_min.usec = 9999999;
7852             }
7853             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_MAX) {
7854                 rpc_stat->stats[i].execution_time_max.sec = 0;
7855                 rpc_stat->stats[i].execution_time_max.usec = 0;
7856             }
7857         }
7858     }
7859
7860     MUTEX_EXIT(&rx_rpc_stats);
7861 }
7862
7863 /*
7864  * rxi_rxstat_userok points to a routine that returns 1 if the caller
7865  * is authorized to enable/disable/clear RX statistics.
7866  */
7867 static int (*rxi_rxstat_userok) (struct rx_call * call) = NULL;
7868
7869 void
7870 rx_SetRxStatUserOk(int (*proc) (struct rx_call * call))
7871 {
7872     rxi_rxstat_userok = proc;
7873 }
7874
7875 int
7876 rx_RxStatUserOk(struct rx_call *call)
7877 {
7878     if (!rxi_rxstat_userok)
7879         return 0;
7880     return rxi_rxstat_userok(call);
7881 }
7882
7883 #ifdef AFS_NT40_ENV
7884 /*
7885  * DllMain() -- Entry-point function called by the DllMainCRTStartup()
7886  *     function in the MSVC runtime DLL (msvcrt.dll).
7887  *
7888  *     Note: the system serializes calls to this function.
7889  */
7890 BOOL WINAPI
7891 DllMain(HINSTANCE dllInstHandle,        /* instance handle for this DLL module */
7892         DWORD reason,                   /* reason function is being called */
7893         LPVOID reserved)                /* reserved for future use */
7894 {
7895     switch (reason) {
7896     case DLL_PROCESS_ATTACH:
7897         /* library is being attached to a process */
7898         INIT_PTHREAD_LOCKS;
7899         return TRUE;
7900
7901     case DLL_PROCESS_DETACH:
7902         return TRUE;
7903
7904     default:
7905         return FALSE;
7906     }
7907 }
7908 #endif
7909