src/rx/rx.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 /* RX:  Extended Remote Procedure Call */
  11
  12 #include <afsconfig.h>
  13 #ifdef  KERNEL
  14 #include "afs/param.h"
  15 #else
  16 #include <afs/param.h>
  17 #endif
  18
  19 RCSID
  20     ("$Header$");
  21
  22 #ifdef KERNEL
  23 #include "afs/sysincludes.h"
  24 #include "afsincludes.h"
  25 #ifndef UKERNEL
  26 #include "h/types.h"
  27 #include "h/time.h"
  28 #include "h/stat.h"
  29 #ifdef  AFS_OSF_ENV
  30 #include <net/net_globals.h>
  31 #endif /* AFS_OSF_ENV */
  32 #ifdef AFS_LINUX20_ENV
  33 #include "h/socket.h"
  34 #endif
  35 #include "netinet/in.h"
  36 #ifdef AFS_SUN57_ENV
  37 #include "inet/common.h"
  38 #include "inet/ip.h"
  39 #include "inet/ip_ire.h"
  40 #endif
  41 #include "afs/afs_args.h"
  42 #include "afs/afs_osi.h"
  43 #ifdef RX_KERNEL_TRACE
  44 #include "rx_kcommon.h"
  45 #endif
  46 #if     (defined(AFS_AUX_ENV) || defined(AFS_AIX_ENV))
  47 #include "h/systm.h"
  48 #endif
  49 #ifdef RXDEBUG
  50 #undef RXDEBUG                  /* turn off debugging */
  51 #endif /* RXDEBUG */
  52 #if defined(AFS_SGI_ENV)
  53 #include "sys/debug.h"
  54 #endif
  55 #include "afsint.h"
  56 #ifdef  AFS_OSF_ENV
  57 #undef kmem_alloc
  58 #undef kmem_free
  59 #undef mem_alloc
  60 #undef mem_free
  61 #undef register
  62 #endif /* AFS_OSF_ENV */
  63 #else /* !UKERNEL */
  64 #include "afs/sysincludes.h"
  65 #include "afsincludes.h"
  66 #endif /* !UKERNEL */
  67 #include "afs/lock.h"
  68 #include "rx_kmutex.h"
  69 #include "rx_kernel.h"
  70 #include "rx_clock.h"
  71 #include "rx_queue.h"
  72 #include "rx.h"
  73 #include "rx_globals.h"
  74 #include "rx_trace.h"
  75 #define AFSOP_STOP_RXCALLBACK   210     /* Stop CALLBACK process */
  76 #define AFSOP_STOP_AFS          211     /* Stop AFS process */
  77 #define AFSOP_STOP_BKG          212     /* Stop BKG process */
  78 #include "afsint.h"
  79 extern afs_int32 afs_termState;
  80 #ifdef AFS_AIX41_ENV
  81 #include "sys/lockl.h"
  82 #include "sys/lock_def.h"
  83 #endif /* AFS_AIX41_ENV */
  84 # include "rxgen_consts.h"
  85 #else /* KERNEL */
  86 # include <sys/types.h>
  87 # include <string.h>
  88 # include <errno.h>
  89 #ifdef AFS_NT40_ENV
  90 # include <stdlib.h>
  91 # include <fcntl.h>
  92 # include <afs/afsutil.h>
  93 # include <WINNT\afsreg.h>
  94 #else
  95 # include <sys/socket.h>
  96 # include <sys/file.h>
  97 # include <netdb.h>
  98 # include <sys/stat.h>
  99 # include <netinet/in.h>
 100 # include <sys/time.h>
 101 #endif
 102 # include "rx.h"
 103 # include "rx_user.h"
 104 # include "rx_clock.h"
 105 # include "rx_queue.h"
 106 # include "rx_globals.h"
 107 # include "rx_trace.h"
 108 # include <afs/rxgen_consts.h>
 109 #endif /* KERNEL */
 110
 111 int (*registerProgram) () = 0;
 112 int (*swapNameProgram) () = 0;
 113
 114 /* Local static routines */
 115 static void rxi_DestroyConnectionNoLock(register struct rx_connection *conn);
 116 #ifdef RX_ENABLE_LOCKS
 117 static void rxi_SetAcksInTransmitQueue(register struct rx_call *call);
 118 #endif
 119
 120 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
 121 struct rx_tq_debug {
 122     afs_int32 rxi_start_aborted;        /* rxi_start awoke after rxi_Send in error. */
 123     afs_int32 rxi_start_in_error;
 124 } rx_tq_debug;
 125 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
 126
 127 /*
 128  * rxi_rpc_peer_stat_cnt counts the total number of peer stat structures
 129  * currently allocated within rx.  This number is used to allocate the
 130  * memory required to return the statistics when queried.
 131  */
 132
 133 static unsigned int rxi_rpc_peer_stat_cnt;
 134
 135 /*
 136  * rxi_rpc_process_stat_cnt counts the total number of local process stat
 137  * structures currently allocated within rx.  The number is used to allocate
 138  * the memory required to return the statistics when queried.
 139  */
 140
 141 static unsigned int rxi_rpc_process_stat_cnt;
 142
 143 #if !defined(offsetof)
 144 #include <stddef.h>             /* for definition of offsetof() */
 145 #endif
 146
 147 #ifdef AFS_PTHREAD_ENV
 148 #include <assert.h>
 149
 150 /*
 151  * Use procedural initialization of mutexes/condition variables
 152  * to ease NT porting
 153  */
 154
 155 extern pthread_mutex_t rx_stats_mutex;
 156 extern pthread_mutex_t des_init_mutex;
 157 extern pthread_mutex_t des_random_mutex;
 158 extern pthread_mutex_t rx_clock_mutex;
 159 extern pthread_mutex_t rxi_connCacheMutex;
 160 extern pthread_mutex_t rx_event_mutex;
 161 extern pthread_mutex_t osi_malloc_mutex;
 162 extern pthread_mutex_t event_handler_mutex;
 163 extern pthread_mutex_t listener_mutex;
 164 extern pthread_mutex_t rx_if_init_mutex;
 165 extern pthread_mutex_t rx_if_mutex;
 166 extern pthread_mutex_t rxkad_client_uid_mutex;
 167 extern pthread_mutex_t rxkad_random_mutex;
 168
 169 extern pthread_cond_t rx_event_handler_cond;
 170 extern pthread_cond_t rx_listener_cond;
 171
 172 static pthread_mutex_t epoch_mutex;
 173 static pthread_mutex_t rx_init_mutex;
 174 static pthread_mutex_t rx_debug_mutex;
 175
 176 static void
 177 rxi_InitPthread(void)
 178 {
 179     assert(pthread_mutex_init(&rx_clock_mutex, (const pthread_mutexattr_t *)0)
 180            == 0);
 181     assert(pthread_mutex_init(&rx_stats_mutex, (const pthread_mutexattr_t *)0)
 182            == 0);
 183     assert(pthread_mutex_init
 184            (&rxi_connCacheMutex, (const pthread_mutexattr_t *)0) == 0);
 185     assert(pthread_mutex_init(&rx_init_mutex, (const pthread_mutexattr_t *)0)
 186            == 0);
 187     assert(pthread_mutex_init(&epoch_mutex, (const pthread_mutexattr_t *)0) ==
 188            0);
 189     assert(pthread_mutex_init(&rx_event_mutex, (const pthread_mutexattr_t *)0)
 190            == 0);
 191     assert(pthread_mutex_init(&des_init_mutex, (const pthread_mutexattr_t *)0)
 192            == 0);
 193     assert(pthread_mutex_init
 194            (&des_random_mutex, (const pthread_mutexattr_t *)0) == 0);
 195     assert(pthread_mutex_init
 196            (&osi_malloc_mutex, (const pthread_mutexattr_t *)0) == 0);
 197     assert(pthread_mutex_init
 198            (&event_handler_mutex, (const pthread_mutexattr_t *)0) == 0);
 199     assert(pthread_mutex_init(&listener_mutex, (const pthread_mutexattr_t *)0)
 200            == 0);
 201     assert(pthread_mutex_init
 202            (&rx_if_init_mutex, (const pthread_mutexattr_t *)0) == 0);
 203     assert(pthread_mutex_init(&rx_if_mutex, (const pthread_mutexattr_t *)0) ==
 204            0);
 205     assert(pthread_mutex_init
 206            (&rxkad_client_uid_mutex, (const pthread_mutexattr_t *)0) == 0);
 207     assert(pthread_mutex_init
 208            (&rxkad_random_mutex, (const pthread_mutexattr_t *)0) == 0);
 209     assert(pthread_mutex_init(&rx_debug_mutex, (const pthread_mutexattr_t *)0)
 210            == 0);
 211
 212     assert(pthread_cond_init
 213            (&rx_event_handler_cond, (const pthread_condattr_t *)0) == 0);
 214     assert(pthread_cond_init(&rx_listener_cond, (const pthread_condattr_t *)0)
 215            == 0);
 216     assert(pthread_key_create(&rx_thread_id_key, NULL) == 0);
 217     assert(pthread_key_create(&rx_ts_info_key, NULL) == 0);
 218
 219     rxkad_global_stats_init();
 220 }
 221
 222 pthread_once_t rx_once_init = PTHREAD_ONCE_INIT;
 223 #define INIT_PTHREAD_LOCKS \
 224 assert(pthread_once(&rx_once_init, rxi_InitPthread)==0)
 225 /*
 226  * The rx_stats_mutex mutex protects the following global variables:
 227  * rxi_dataQuota
 228  * rxi_minDeficit
 229  * rxi_availProcs
 230  * rxi_totalMin
 231  * rxi_lowConnRefCount
 232  * rxi_lowPeerRefCount
 233  * rxi_nCalls
 234  * rxi_Alloccnt
 235  * rxi_Allocsize
 236  * rx_nFreePackets
 237  * rx_tq_debug
 238  * rx_stats
 239  */
 240 #else
 241 #define INIT_PTHREAD_LOCKS
 242 #endif
 243
 244
 245 /* Variables for handling the minProcs implementation.  availProcs gives the
 246  * number of threads available in the pool at this moment (not counting dudes
 247  * executing right now).  totalMin gives the total number of procs required
 248  * for handling all minProcs requests.  minDeficit is a dynamic variable
 249  * tracking the # of procs required to satisfy all of the remaining minProcs
 250  * demands.
 251  * For fine grain locking to work, the quota check and the reservation of
 252  * a server thread has to come while rxi_availProcs and rxi_minDeficit
 253  * are locked. To this end, the code has been modified under #ifdef
 254  * RX_ENABLE_LOCKS so that quota checks and reservation occur at the
 255  * same time. A new function, ReturnToServerPool() returns the allocation.
 256  *
 257  * A call can be on several queue's (but only one at a time). When
 258  * rxi_ResetCall wants to remove the call from a queue, it has to ensure
 259  * that no one else is touching the queue. To this end, we store the address
 260  * of the queue lock in the call structure (under the call lock) when we
 261  * put the call on a queue, and we clear the call_queue_lock when the
 262  * call is removed from a queue (once the call lock has been obtained).
 263  * This allows rxi_ResetCall to safely synchronize with others wishing
 264  * to manipulate the queue.
 265  */
 266
 267 #ifdef RX_ENABLE_LOCKS
 268 static afs_kmutex_t rx_rpc_stats;
 269 void rxi_StartUnlocked();
 270 #endif
 271
 272 /* We keep a "last conn pointer" in rxi_FindConnection. The odds are
 273 ** pretty good that the next packet coming in is from the same connection
 274 ** as the last packet, since we're send multiple packets in a transmit window.
 275 */
 276 struct rx_connection *rxLastConn = 0;
 277
 278 #ifdef RX_ENABLE_LOCKS
 279 /* The locking hierarchy for rx fine grain locking is composed of these
 280  * tiers:
 281  *
 282  * rx_connHashTable_lock - synchronizes conn creation, rx_connHashTable access
 283  * conn_call_lock - used to synchonize rx_EndCall and rx_NewCall
 284  * call->lock - locks call data fields.
 285  * These are independent of each other:
 286  *      rx_freeCallQueue_lock
 287  *      rxi_keyCreate_lock
 288  * rx_serverPool_lock
 289  * freeSQEList_lock
 290  *
 291  * serverQueueEntry->lock
 292  * rx_rpc_stats
 293  * rx_peerHashTable_lock - locked under rx_connHashTable_lock
 294  * peer->lock - locks peer data fields.
 295  * conn_data_lock - that more than one thread is not updating a conn data
 296  *                  field at the same time.
 297  * rx_freePktQ_lock
 298  *
 299  * lowest level:
 300  *      multi_handle->lock
 301  *      rxevent_lock
 302  *      rx_stats_mutex
 303  *
 304  * Do we need a lock to protect the peer field in the conn structure?
 305  *      conn->peer was previously a constant for all intents and so has no
 306  *      lock protecting this field. The multihomed client delta introduced
 307  *      a RX code change : change the peer field in the connection structure
 308  *      to that remote inetrface from which the last packet for this
 309  *      connection was sent out. This may become an issue if further changes
 310  *      are made.
 311  */
 312 #define SET_CALL_QUEUE_LOCK(C, L) (C)->call_queue_lock = (L)
 313 #define CLEAR_CALL_QUEUE_LOCK(C) (C)->call_queue_lock = NULL
 314 #ifdef RX_LOCKS_DB
 315 /* rxdb_fileID is used to identify the lock location, along with line#. */
 316 static int rxdb_fileID = RXDB_FILE_RX;
 317 #endif /* RX_LOCKS_DB */
 318 #else /* RX_ENABLE_LOCKS */
 319 #define SET_CALL_QUEUE_LOCK(C, L)
 320 #define CLEAR_CALL_QUEUE_LOCK(C)
 321 #endif /* RX_ENABLE_LOCKS */
 322 struct rx_serverQueueEntry *rx_waitForPacket = 0;
 323 struct rx_serverQueueEntry *rx_waitingForPacket = 0;
 324
 325 /* ------------Exported Interfaces------------- */
 326
 327 /* This function allows rxkad to set the epoch to a suitably random number
 328  * which rx_NewConnection will use in the future.  The principle purpose is to
 329  * get rxnull connections to use the same epoch as the rxkad connections do, at
 330  * least once the first rxkad connection is established.  This is important now
 331  * that the host/port addresses aren't used in FindConnection: the uniqueness
 332  * of epoch/cid matters and the start time won't do. */
 333
 334 #ifdef AFS_PTHREAD_ENV
 335 /*
 336  * This mutex protects the following global variables:
 337  * rx_epoch
 338  */
 339
 340 #define LOCK_EPOCH assert(pthread_mutex_lock(&epoch_mutex)==0)
 341 #define UNLOCK_EPOCH assert(pthread_mutex_unlock(&epoch_mutex)==0)
 342 #else
 343 #define LOCK_EPOCH
 344 #define UNLOCK_EPOCH
 345 #endif /* AFS_PTHREAD_ENV */
 346
 347 void
 348 rx_SetEpoch(afs_uint32 epoch)
 349 {
 350     LOCK_EPOCH;
 351     rx_epoch = epoch;
 352     UNLOCK_EPOCH;
 353 }
 354
 355 /* Initialize rx.  A port number may be mentioned, in which case this
 356  * becomes the default port number for any service installed later.
 357  * If 0 is provided for the port number, a random port will be chosen
 358  * by the kernel.  Whether this will ever overlap anything in
 359  * /etc/services is anybody's guess...  Returns 0 on success, -1 on
 360  * error. */
 361 #ifndef AFS_NT40_ENV
 362 static
 363 #endif
 364 int rxinit_status = 1;
 365 #ifdef AFS_PTHREAD_ENV
 366 /*
 367  * This mutex protects the following global variables:
 368  * rxinit_status
 369  */
 370
 371 #define LOCK_RX_INIT assert(pthread_mutex_lock(&rx_init_mutex)==0)
 372 #define UNLOCK_RX_INIT assert(pthread_mutex_unlock(&rx_init_mutex)==0)
 373 #else
 374 #define LOCK_RX_INIT
 375 #define UNLOCK_RX_INIT
 376 #endif
 377
 378 int
 379 rx_InitHost(u_int host, u_int port)
 380 {
 381 #ifdef KERNEL
 382     osi_timeval_t tv;
 383 #else /* KERNEL */
 384     struct timeval tv;
 385 #endif /* KERNEL */
 386     char *htable, *ptable;
 387     int tmp_status;
 388
 389 #if defined(AFS_DJGPP_ENV) && !defined(DEBUG)
 390     __djgpp_set_quiet_socket(1);
 391 #endif
 392
 393     SPLVAR;
 394
 395     INIT_PTHREAD_LOCKS;
 396     LOCK_RX_INIT;
 397     if (rxinit_status == 0) {
 398         tmp_status = rxinit_status;
 399         UNLOCK_RX_INIT;
 400         return tmp_status;      /* Already started; return previous error code. */
 401     }
 402 #ifdef RXDEBUG
 403     rxi_DebugInit();
 404 #endif
 405 #ifdef AFS_NT40_ENV
 406     if (afs_winsockInit() < 0)
 407         return -1;
 408 #endif
 409
 410 #ifndef KERNEL
 411     /*
 412      * Initialize anything necessary to provide a non-premptive threading
 413      * environment.
 414      */
 415     rxi_InitializeThreadSupport();
 416 #endif
 417
 418     /* Allocate and initialize a socket for client and perhaps server
 419      * connections. */
 420
 421     rx_socket = rxi_GetHostUDPSocket(host, (u_short) port);
 422     if (rx_socket == OSI_NULLSOCKET) {
 423         UNLOCK_RX_INIT;
 424         return RX_ADDRINUSE;
 425     }
 426 #ifdef  RX_ENABLE_LOCKS
 427 #ifdef RX_LOCKS_DB
 428     rxdb_init();
 429 #endif /* RX_LOCKS_DB */
 430     MUTEX_INIT(&rx_stats_mutex, "rx_stats_mutex", MUTEX_DEFAULT, 0);
 431     MUTEX_INIT(&rx_rpc_stats, "rx_rpc_stats", MUTEX_DEFAULT, 0);
 432     MUTEX_INIT(&rx_freePktQ_lock, "rx_freePktQ_lock", MUTEX_DEFAULT, 0);
 433     MUTEX_INIT(&freeSQEList_lock, "freeSQEList lock", MUTEX_DEFAULT, 0);
 434     MUTEX_INIT(&rx_freeCallQueue_lock, "rx_freeCallQueue_lock", MUTEX_DEFAULT,
 435                0);
 436     CV_INIT(&rx_waitingForPackets_cv, "rx_waitingForPackets_cv", CV_DEFAULT,
 437             0);
 438     MUTEX_INIT(&rx_peerHashTable_lock, "rx_peerHashTable_lock", MUTEX_DEFAULT,
 439                0);
 440     MUTEX_INIT(&rx_connHashTable_lock, "rx_connHashTable_lock", MUTEX_DEFAULT,
 441                0);
 442     MUTEX_INIT(&rx_serverPool_lock, "rx_serverPool_lock", MUTEX_DEFAULT, 0);
 443 #ifndef KERNEL
 444     MUTEX_INIT(&rxi_keyCreate_lock, "rxi_keyCreate_lock", MUTEX_DEFAULT, 0);
 445 #endif /* !KERNEL */
 446 #if defined(KERNEL) && defined(AFS_HPUX110_ENV)
 447     if (!uniprocessor)
 448         rx_sleepLock = alloc_spinlock(LAST_HELD_ORDER - 10, "rx_sleepLock");
 449 #endif /* KERNEL && AFS_HPUX110_ENV */
 450 #endif /* RX_ENABLE_LOCKS */
 451
 452     rxi_nCalls = 0;
 453     rx_connDeadTime = 12;
 454     rx_tranquil = 0;            /* reset flag */
 455     memset((char *)&rx_stats, 0, sizeof(struct rx_stats));
 456     htable = (char *)
 457         osi_Alloc(rx_hashTableSize * sizeof(struct rx_connection *));
 458     PIN(htable, rx_hashTableSize * sizeof(struct rx_connection *));     /* XXXXX */
 459     memset(htable, 0, rx_hashTableSize * sizeof(struct rx_connection *));
 460     ptable = (char *)osi_Alloc(rx_hashTableSize * sizeof(struct rx_peer *));
 461     PIN(ptable, rx_hashTableSize * sizeof(struct rx_peer *));   /* XXXXX */
 462     memset(ptable, 0, rx_hashTableSize * sizeof(struct rx_peer *));
 463
 464     /* Malloc up a bunch of packets & buffers */
 465     rx_nFreePackets = 0;
 466     queue_Init(&rx_freePacketQueue);
 467     rxi_NeedMorePackets = FALSE;
 468 #ifdef RX_ENABLE_TSFPQ
 469     rx_nPackets = 0;    /* in TSFPQ version, rx_nPackets is managed by rxi_MorePackets* */
 470     rxi_MorePacketsTSFPQ(rx_extraPackets + RX_MAX_QUOTA + 2, RX_TS_FPQ_FLUSH_GLOBAL, 0);
 471 #else /* RX_ENABLE_TSFPQ */
 472     rx_nPackets = rx_extraPackets + RX_MAX_QUOTA + 2;   /* fudge */
 473     rxi_MorePackets(rx_nPackets);
 474 #endif /* RX_ENABLE_TSFPQ */
 475     rx_CheckPackets();
 476
 477     NETPRI;
 478
 479     clock_Init();
 480
 481 #if defined(AFS_NT40_ENV) && !defined(AFS_PTHREAD_ENV)
 482     tv.tv_sec = clock_now.sec;
 483     tv.tv_usec = clock_now.usec;
 484     srand((unsigned int)tv.tv_usec);
 485 #else
 486     osi_GetTime(&tv);
 487 #endif
 488     if (port) {
 489         rx_port = port;
 490     } else {
 491 #if defined(KERNEL) && !defined(UKERNEL)
 492         /* Really, this should never happen in a real kernel */
 493         rx_port = 0;
 494 #else
 495         struct sockaddr_in addr;
 496         int addrlen = sizeof(addr);
 497         if (getsockname((int)rx_socket, (struct sockaddr *)&addr, &addrlen)) {
 498             rx_Finalize();
 499             return -1;
 500         }
 501         rx_port = addr.sin_port;
 502 #endif
 503     }
 504     rx_stats.minRtt.sec = 9999999;
 505 #ifdef  KERNEL
 506     rx_SetEpoch(tv.tv_sec | 0x80000000);
 507 #else
 508     rx_SetEpoch(tv.tv_sec);     /* Start time of this package, rxkad
 509                                  * will provide a randomer value. */
 510 #endif
 511     MUTEX_ENTER(&rx_stats_mutex);
 512     rxi_dataQuota += rx_extraQuota;     /* + extra pkts caller asked to rsrv */
 513     MUTEX_EXIT(&rx_stats_mutex);
 514     /* *Slightly* random start time for the cid.  This is just to help
 515      * out with the hashing function at the peer */
 516     rx_nextCid = ((tv.tv_sec ^ tv.tv_usec) << RX_CIDSHIFT);
 517     rx_connHashTable = (struct rx_connection **)htable;
 518     rx_peerHashTable = (struct rx_peer **)ptable;
 519
 520     rx_lastAckDelay.sec = 0;
 521     rx_lastAckDelay.usec = 400000;      /* 400 milliseconds */
 522     rx_hardAckDelay.sec = 0;
 523     rx_hardAckDelay.usec = 100000;      /* 100 milliseconds */
 524     rx_softAckDelay.sec = 0;
 525     rx_softAckDelay.usec = 100000;      /* 100 milliseconds */
 526
 527     rxevent_Init(20, rxi_ReScheduleEvents);
 528
 529     /* Initialize various global queues */
 530     queue_Init(&rx_idleServerQueue);
 531     queue_Init(&rx_incomingCallQueue);
 532     queue_Init(&rx_freeCallQueue);
 533
 534 #if defined(AFS_NT40_ENV) && !defined(KERNEL)
 535     /* Initialize our list of usable IP addresses. */
 536     rx_GetIFInfo();
 537 #endif
 538
 539     /* Start listener process (exact function is dependent on the
 540      * implementation environment--kernel or user space) */
 541     rxi_StartListener();
 542
 543     USERPRI;
 544     tmp_status = rxinit_status = 0;
 545     UNLOCK_RX_INIT;
 546     return tmp_status;
 547 }
 548
 549 int
 550 rx_Init(u_int port)
 551 {
 552     return rx_InitHost(htonl(INADDR_ANY), port);
 553 }
 554
 555 /* called with unincremented nRequestsRunning to see if it is OK to start
 556  * a new thread in this service.  Could be "no" for two reasons: over the
 557  * max quota, or would prevent others from reaching their min quota.
 558  */
 559 #ifdef RX_ENABLE_LOCKS
 560 /* This verion of QuotaOK reserves quota if it's ok while the
 561  * rx_serverPool_lock is held.  Return quota using ReturnToServerPool().
 562  */
 563 static int
 564 QuotaOK(register struct rx_service *aservice)
 565 {
 566     /* check if over max quota */
 567     if (aservice->nRequestsRunning >= aservice->maxProcs) {
 568         return 0;
 569     }
 570
 571     /* under min quota, we're OK */
 572     /* otherwise, can use only if there are enough to allow everyone
 573      * to go to their min quota after this guy starts.
 574      */
 575     MUTEX_ENTER(&rx_stats_mutex);
 576     if ((aservice->nRequestsRunning < aservice->minProcs)
 577         || (rxi_availProcs > rxi_minDeficit)) {
 578         aservice->nRequestsRunning++;
 579         /* just started call in minProcs pool, need fewer to maintain
 580          * guarantee */
 581         if (aservice->nRequestsRunning <= aservice->minProcs)
 582             rxi_minDeficit--;
 583         rxi_availProcs--;
 584         MUTEX_EXIT(&rx_stats_mutex);
 585         return 1;
 586     }
 587     MUTEX_EXIT(&rx_stats_mutex);
 588
 589     return 0;
 590 }
 591
 592 static void
 593 ReturnToServerPool(register struct rx_service *aservice)
 594 {
 595     aservice->nRequestsRunning--;
 596     MUTEX_ENTER(&rx_stats_mutex);
 597     if (aservice->nRequestsRunning < aservice->minProcs)
 598         rxi_minDeficit++;
 599     rxi_availProcs++;
 600     MUTEX_EXIT(&rx_stats_mutex);
 601 }
 602
 603 #else /* RX_ENABLE_LOCKS */
 604 static int
 605 QuotaOK(register struct rx_service *aservice)
 606 {
 607     int rc = 0;
 608     /* under min quota, we're OK */
 609     if (aservice->nRequestsRunning < aservice->minProcs)
 610         return 1;
 611
 612     /* check if over max quota */
 613     if (aservice->nRequestsRunning >= aservice->maxProcs)
 614         return 0;
 615
 616     /* otherwise, can use only if there are enough to allow everyone
 617      * to go to their min quota after this guy starts.
 618      */
 619     if (rxi_availProcs > rxi_minDeficit)
 620         rc = 1;
 621     return rc;
 622 }
 623 #endif /* RX_ENABLE_LOCKS */
 624
 625 #ifndef KERNEL
 626 /* Called by rx_StartServer to start up lwp's to service calls.
 627    NExistingProcs gives the number of procs already existing, and which
 628    therefore needn't be created. */
 629 void
 630 rxi_StartServerProcs(int nExistingProcs)
 631 {
 632     register struct rx_service *service;
 633     register int i;
 634     int maxdiff = 0;
 635     int nProcs = 0;
 636
 637     /* For each service, reserve N processes, where N is the "minimum"
 638      * number of processes that MUST be able to execute a request in parallel,
 639      * at any time, for that process.  Also compute the maximum difference
 640      * between any service's maximum number of processes that can run
 641      * (i.e. the maximum number that ever will be run, and a guarantee
 642      * that this number will run if other services aren't running), and its
 643      * minimum number.  The result is the extra number of processes that
 644      * we need in order to provide the latter guarantee */
 645     for (i = 0; i < RX_MAX_SERVICES; i++) {
 646         int diff;
 647         service = rx_services[i];
 648         if (service == (struct rx_service *)0)
 649             break;
 650         nProcs += service->minProcs;
 651         diff = service->maxProcs - service->minProcs;
 652         if (diff > maxdiff)
 653             maxdiff = diff;
 654     }
 655     nProcs += maxdiff;          /* Extra processes needed to allow max number requested to run in any given service, under good conditions */
 656     nProcs -= nExistingProcs;   /* Subtract the number of procs that were previously created for use as server procs */
 657     for (i = 0; i < nProcs; i++) {
 658         rxi_StartServerProc(rx_ServerProc, rx_stackSize);
 659     }
 660 }
 661 #endif /* KERNEL */
 662
 663 #ifdef AFS_NT40_ENV
 664 /* This routine is only required on Windows */
 665 void
 666 rx_StartClientThread(void)
 667 {
 668 #ifdef AFS_PTHREAD_ENV
 669     pthread_t pid;
 670     pid = pthread_self();
 671 #endif /* AFS_PTHREAD_ENV */
 672 }
 673 #endif /* AFS_NT40_ENV */
 674
 675 /* This routine must be called if any services are exported.  If the
 676  * donateMe flag is set, the calling process is donated to the server
 677  * process pool */
 678 void
 679 rx_StartServer(int donateMe)
 680 {
 681     register struct rx_service *service;
 682     register int i;
 683     SPLVAR;
 684     clock_NewTime();
 685
 686     NETPRI;
 687     /* Start server processes, if necessary (exact function is dependent
 688      * on the implementation environment--kernel or user space).  DonateMe
 689      * will be 1 if there is 1 pre-existing proc, i.e. this one.  In this
 690      * case, one less new proc will be created rx_StartServerProcs.
 691      */
 692     rxi_StartServerProcs(donateMe);
 693
 694     /* count up the # of threads in minProcs, and add set the min deficit to
 695      * be that value, too.
 696      */
 697     for (i = 0; i < RX_MAX_SERVICES; i++) {
 698         service = rx_services[i];
 699         if (service == (struct rx_service *)0)
 700             break;
 701         MUTEX_ENTER(&rx_stats_mutex);
 702         rxi_totalMin += service->minProcs;
 703         /* below works even if a thread is running, since minDeficit would
 704          * still have been decremented and later re-incremented.
 705          */
 706         rxi_minDeficit += service->minProcs;
 707         MUTEX_EXIT(&rx_stats_mutex);
 708     }
 709
 710     /* Turn on reaping of idle server connections */
 711     rxi_ReapConnections();
 712
 713     USERPRI;
 714
 715     if (donateMe) {
 716 #ifndef AFS_NT40_ENV
 717 #ifndef KERNEL
 718         char name[32];
 719         static int nProcs;
 720 #ifdef AFS_PTHREAD_ENV
 721         pid_t pid;
 722         pid = (pid_t) pthread_self();
 723 #else /* AFS_PTHREAD_ENV */
 724         PROCESS pid;
 725         LWP_CurrentProcess(&pid);
 726 #endif /* AFS_PTHREAD_ENV */
 727
 728         sprintf(name, "srv_%d", ++nProcs);
 729         if (registerProgram)
 730             (*registerProgram) (pid, name);
 731 #endif /* KERNEL */
 732 #endif /* AFS_NT40_ENV */
 733         rx_ServerProc(NULL);    /* Never returns */
 734     }
 735 #ifdef RX_ENABLE_TSFPQ
 736     /* no use leaving packets around in this thread's local queue if
 737      * it isn't getting donated to the server thread pool.
 738      */
 739     rxi_FlushLocalPacketsTSFPQ();
 740 #endif /* RX_ENABLE_TSFPQ */
 741     return;
 742 }
 743
 744 /* Create a new client connection to the specified service, using the
 745  * specified security object to implement the security model for this
 746  * connection. */
 747 struct rx_connection *
 748 rx_NewConnection(register afs_uint32 shost, u_short sport, u_short sservice,
 749                  register struct rx_securityClass *securityObject,
 750                  int serviceSecurityIndex)
 751 {
 752     int hashindex, i;
 753     afs_int32 cid;
 754     register struct rx_connection *conn;
 755
 756     SPLVAR;
 757
 758     clock_NewTime();
 759     dpf(("rx_NewConnection(host %x, port %u, service %u, securityObject %x, serviceSecurityIndex %d)\n", ntohl(shost), ntohs(sport), sservice, securityObject, serviceSecurityIndex));
 760
 761     /* Vasilsi said: "NETPRI protects Cid and Alloc", but can this be true in
 762      * the case of kmem_alloc? */
 763     conn = rxi_AllocConnection();
 764 #ifdef  RX_ENABLE_LOCKS
 765     MUTEX_INIT(&conn->conn_call_lock, "conn call lock", MUTEX_DEFAULT, 0);
 766     MUTEX_INIT(&conn->conn_data_lock, "conn call lock", MUTEX_DEFAULT, 0);
 767     CV_INIT(&conn->conn_call_cv, "conn call cv", CV_DEFAULT, 0);
 768 #endif
 769     NETPRI;
 770     MUTEX_ENTER(&rx_connHashTable_lock);
 771     cid = (rx_nextCid += RX_MAXCALLS);
 772     conn->type = RX_CLIENT_CONNECTION;
 773     conn->cid = cid;
 774     conn->epoch = rx_epoch;
 775     conn->peer = rxi_FindPeer(shost, sport, 0, 1);
 776     conn->serviceId = sservice;
 777     conn->securityObject = securityObject;
 778     conn->securityData = (void *) 0;
 779     conn->securityIndex = serviceSecurityIndex;
 780     rx_SetConnDeadTime(conn, rx_connDeadTime);
 781     conn->ackRate = RX_FAST_ACK_RATE;
 782     conn->nSpecific = 0;
 783     conn->specific = NULL;
 784     conn->challengeEvent = NULL;
 785     conn->delayedAbortEvent = NULL;
 786     conn->abortCount = 0;
 787     conn->error = 0;
 788     for (i = 0; i < RX_MAXCALLS; i++) {
 789         conn->twind[i] = rx_initSendWindow;
 790         conn->rwind[i] = rx_initReceiveWindow;
 791     }
 792
 793     RXS_NewConnection(securityObject, conn);
 794     hashindex =
 795         CONN_HASH(shost, sport, conn->cid, conn->epoch, RX_CLIENT_CONNECTION);
 796
 797     conn->refCount++;           /* no lock required since only this thread knows... */
 798     conn->next = rx_connHashTable[hashindex];
 799     rx_connHashTable[hashindex] = conn;
 800     rx_MutexIncrement(rx_stats.nClientConns, rx_stats_mutex);
 801     MUTEX_EXIT(&rx_connHashTable_lock);
 802     USERPRI;
 803     return conn;
 804 }
 805
 806 void
 807 rx_SetConnDeadTime(register struct rx_connection *conn, register int seconds)
 808 {
 809     /* The idea is to set the dead time to a value that allows several
 810      * keepalives to be dropped without timing out the connection. */
 811     conn->secondsUntilDead = MAX(seconds, 6);
 812     conn->secondsUntilPing = conn->secondsUntilDead / 6;
 813 }
 814
 815 int rxi_lowPeerRefCount = 0;
 816 int rxi_lowConnRefCount = 0;
 817
 818 /*
 819  * Cleanup a connection that was destroyed in rxi_DestroyConnectioNoLock.
 820  * NOTE: must not be called with rx_connHashTable_lock held.
 821  */
 822 void
 823 rxi_CleanupConnection(struct rx_connection *conn)
 824 {
 825     /* Notify the service exporter, if requested, that this connection
 826      * is being destroyed */
 827     if (conn->type == RX_SERVER_CONNECTION && conn->service->destroyConnProc)
 828         (*conn->service->destroyConnProc) (conn);
 829
 830     /* Notify the security module that this connection is being destroyed */
 831     RXS_DestroyConnection(conn->securityObject, conn);
 832
 833     /* If this is the last connection using the rx_peer struct, set its
 834      * idle time to now. rxi_ReapConnections will reap it if it's still
 835      * idle (refCount == 0) after rx_idlePeerTime (60 seconds) have passed.
 836      */
 837     MUTEX_ENTER(&rx_peerHashTable_lock);
 838     if (conn->peer->refCount < 2) {
 839         conn->peer->idleWhen = clock_Sec();
 840         if (conn->peer->refCount < 1) {
 841             conn->peer->refCount = 1;
 842             MUTEX_ENTER(&rx_stats_mutex);
 843             rxi_lowPeerRefCount++;
 844             MUTEX_EXIT(&rx_stats_mutex);
 845         }
 846     }
 847     conn->peer->refCount--;
 848     MUTEX_EXIT(&rx_peerHashTable_lock);
 849
 850     if (conn->type == RX_SERVER_CONNECTION)
 851         rx_MutexDecrement(rx_stats.nServerConns, rx_stats_mutex);
 852     else
 853         rx_MutexDecrement(rx_stats.nClientConns, rx_stats_mutex);
 854 #ifndef KERNEL
 855     if (conn->specific) {
 856         int i;
 857         for (i = 0; i < conn->nSpecific; i++) {
 858             if (conn->specific[i] && rxi_keyCreate_destructor[i])
 859                 (*rxi_keyCreate_destructor[i]) (conn->specific[i]);
 860             conn->specific[i] = NULL;
 861         }
 862         free(conn->specific);
 863     }
 864     conn->specific = NULL;
 865     conn->nSpecific = 0;
 866 #endif /* !KERNEL */
 867
 868     MUTEX_DESTROY(&conn->conn_call_lock);
 869     MUTEX_DESTROY(&conn->conn_data_lock);
 870     CV_DESTROY(&conn->conn_call_cv);
 871
 872     rxi_FreeConnection(conn);
 873 }
 874
 875 /* Destroy the specified connection */
 876 void
 877 rxi_DestroyConnection(register struct rx_connection *conn)
 878 {
 879     MUTEX_ENTER(&rx_connHashTable_lock);
 880     rxi_DestroyConnectionNoLock(conn);
 881     /* conn should be at the head of the cleanup list */
 882     if (conn == rx_connCleanup_list) {
 883         rx_connCleanup_list = rx_connCleanup_list->next;
 884         MUTEX_EXIT(&rx_connHashTable_lock);
 885         rxi_CleanupConnection(conn);
 886     }
 887 #ifdef RX_ENABLE_LOCKS
 888     else {
 889         MUTEX_EXIT(&rx_connHashTable_lock);
 890     }
 891 #endif /* RX_ENABLE_LOCKS */
 892 }
 893
 894 static void
 895 rxi_DestroyConnectionNoLock(register struct rx_connection *conn)
 896 {
 897     register struct rx_connection **conn_ptr;
 898     register int havecalls = 0;
 899     struct rx_packet *packet;
 900     int i;
 901     SPLVAR;
 902
 903     clock_NewTime();
 904
 905     NETPRI;
 906     MUTEX_ENTER(&conn->conn_data_lock);
 907     if (conn->refCount > 0)
 908         conn->refCount--;
 909     else {
 910         MUTEX_ENTER(&rx_stats_mutex);
 911         rxi_lowConnRefCount++;
 912         MUTEX_EXIT(&rx_stats_mutex);
 913     }
 914
 915     if ((conn->refCount > 0) || (conn->flags & RX_CONN_BUSY)) {
 916         /* Busy; wait till the last guy before proceeding */
 917         MUTEX_EXIT(&conn->conn_data_lock);
 918         USERPRI;
 919         return;
 920     }
 921
 922     /* If the client previously called rx_NewCall, but it is still
 923      * waiting, treat this as a running call, and wait to destroy the
 924      * connection later when the call completes. */
 925     if ((conn->type == RX_CLIENT_CONNECTION)
 926         && (conn->flags & RX_CONN_MAKECALL_WAITING)) {
 927         conn->flags |= RX_CONN_DESTROY_ME;
 928         MUTEX_EXIT(&conn->conn_data_lock);
 929         USERPRI;
 930         return;
 931     }
 932     MUTEX_EXIT(&conn->conn_data_lock);
 933
 934     /* Check for extant references to this connection */
 935     for (i = 0; i < RX_MAXCALLS; i++) {
 936         register struct rx_call *call = conn->call[i];
 937         if (call) {
 938             havecalls = 1;
 939             if (conn->type == RX_CLIENT_CONNECTION) {
 940                 MUTEX_ENTER(&call->lock);
 941                 if (call->delayedAckEvent) {
 942                     /* Push the final acknowledgment out now--there
 943                      * won't be a subsequent call to acknowledge the
 944                      * last reply packets */
 945                     rxevent_Cancel(call->delayedAckEvent, call,
 946                                    RX_CALL_REFCOUNT_DELAY);
 947                     if (call->state == RX_STATE_PRECALL
 948                         || call->state == RX_STATE_ACTIVE) {
 949                         rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
 950                     } else {
 951                         rxi_AckAll(NULL, call, 0);
 952                     }
 953                 }
 954                 MUTEX_EXIT(&call->lock);
 955             }
 956         }
 957     }
 958 #ifdef RX_ENABLE_LOCKS
 959     if (!havecalls) {
 960         if (MUTEX_TRYENTER(&conn->conn_data_lock)) {
 961             MUTEX_EXIT(&conn->conn_data_lock);
 962         } else {
 963             /* Someone is accessing a packet right now. */
 964             havecalls = 1;
 965         }
 966     }
 967 #endif /* RX_ENABLE_LOCKS */
 968
 969     if (havecalls) {
 970         /* Don't destroy the connection if there are any call
 971          * structures still in use */
 972         MUTEX_ENTER(&conn->conn_data_lock);
 973         conn->flags |= RX_CONN_DESTROY_ME;
 974         MUTEX_EXIT(&conn->conn_data_lock);
 975         USERPRI;
 976         return;
 977     }
 978
 979     if (conn->delayedAbortEvent) {
 980         rxevent_Cancel(conn->delayedAbortEvent, (struct rx_call *)0, 0);
 981         packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
 982         if (packet) {
 983             MUTEX_ENTER(&conn->conn_data_lock);
 984             rxi_SendConnectionAbort(conn, packet, 0, 1);
 985             MUTEX_EXIT(&conn->conn_data_lock);
 986             rxi_FreePacket(packet);
 987         }
 988     }
 989
 990     /* Remove from connection hash table before proceeding */
 991     conn_ptr =
 992         &rx_connHashTable[CONN_HASH
 993                           (peer->host, peer->port, conn->cid, conn->epoch,
 994                            conn->type)];
 995     for (; *conn_ptr; conn_ptr = &(*conn_ptr)->next) {
 996         if (*conn_ptr == conn) {
 997             *conn_ptr = conn->next;
 998             break;
 999         }
1000     }
1001     /* if the conn that we are destroying was the last connection, then we
1002      * clear rxLastConn as well */
1003     if (rxLastConn == conn)
1004         rxLastConn = 0;
1005
1006     /* Make sure the connection is completely reset before deleting it. */
1007     /* get rid of pending events that could zap us later */
1008     if (conn->challengeEvent)
1009         rxevent_Cancel(conn->challengeEvent, (struct rx_call *)0, 0);
1010     if (conn->checkReachEvent)
1011         rxevent_Cancel(conn->checkReachEvent, (struct rx_call *)0, 0);
1012
1013     /* Add the connection to the list of destroyed connections that
1014      * need to be cleaned up. This is necessary to avoid deadlocks
1015      * in the routines we call to inform others that this connection is
1016      * being destroyed. */
1017     conn->next = rx_connCleanup_list;
1018     rx_connCleanup_list = conn;
1019 }
1020
1021 /* Externally available version */
1022 void
1023 rx_DestroyConnection(register struct rx_connection *conn)
1024 {
1025     SPLVAR;
1026
1027     NETPRI;
1028     rxi_DestroyConnection(conn);
1029     USERPRI;
1030 }
1031
1032 void
1033 rx_GetConnection(register struct rx_connection *conn)
1034 {
1035     SPLVAR;
1036
1037     NETPRI;
1038     MUTEX_ENTER(&conn->conn_data_lock);
1039     conn->refCount++;
1040     MUTEX_EXIT(&conn->conn_data_lock);
1041     USERPRI;
1042 }
1043
1044 /* Wait for the transmit queue to no longer be busy.
1045  * requires the call->lock to be held */
1046 static void rxi_WaitforTQBusy(struct rx_call *call) {
1047     while (call->flags & RX_CALL_TQ_BUSY) {
1048         call->flags |= RX_CALL_TQ_WAIT;
1049         call->tqWaiters++;
1050 #ifdef RX_ENABLE_LOCKS
1051         osirx_AssertMine(&call->lock, "rxi_WaitforTQ lock");
1052         CV_WAIT(&call->cv_tq, &call->lock);
1053 #else /* RX_ENABLE_LOCKS */
1054         osi_rxSleep(&call->tq);
1055 #endif /* RX_ENABLE_LOCKS */
1056         call->tqWaiters--;
1057         if (call->tqWaiters == 0) {
1058             call->flags &= ~RX_CALL_TQ_WAIT;
1059         }
1060     }
1061 }
1062 /* Start a new rx remote procedure call, on the specified connection.
1063  * If wait is set to 1, wait for a free call channel; otherwise return
1064  * 0.  Maxtime gives the maximum number of seconds this call may take,
1065  * after rx_NewCall returns.  After this time interval, a call to any
1066  * of rx_SendData, rx_ReadData, etc. will fail with RX_CALL_TIMEOUT.
1067  * For fine grain locking, we hold the conn_call_lock in order to
1068  * to ensure that we don't get signalle after we found a call in an active
1069  * state and before we go to sleep.
1070  */
1071 struct rx_call *
1072 rx_NewCall(register struct rx_connection *conn)
1073 {
1074     register int i;
1075     register struct rx_call *call;
1076     struct clock queueTime;
1077     SPLVAR;
1078
1079     clock_NewTime();
1080     dpf(("rx_NewCall(conn %x)\n", conn));
1081
1082     NETPRI;
1083     clock_GetTime(&queueTime);
1084     MUTEX_ENTER(&conn->conn_call_lock);
1085
1086     /*
1087      * Check if there are others waiting for a new call.
1088      * If so, let them go first to avoid starving them.
1089      * This is a fairly simple scheme, and might not be
1090      * a complete solution for large numbers of waiters.
1091      *
1092      * makeCallWaiters keeps track of the number of
1093      * threads waiting to make calls and the
1094      * RX_CONN_MAKECALL_WAITING flag bit is used to
1095      * indicate that there are indeed calls waiting.
1096      * The flag is set when the waiter is incremented.
1097      * It is only cleared in rx_EndCall when
1098      * makeCallWaiters is 0.  This prevents us from
1099      * accidently destroying the connection while it
1100      * is potentially about to be used.
1101      */
1102     MUTEX_ENTER(&conn->conn_data_lock);
1103     if (conn->makeCallWaiters) {
1104         conn->flags |= RX_CONN_MAKECALL_WAITING;
1105         conn->makeCallWaiters++;
1106         MUTEX_EXIT(&conn->conn_data_lock);
1107
1108 #ifdef  RX_ENABLE_LOCKS
1109         CV_WAIT(&conn->conn_call_cv, &conn->conn_call_lock);
1110 #else
1111         osi_rxSleep(conn);
1112 #endif
1113         MUTEX_ENTER(&conn->conn_data_lock);
1114         conn->makeCallWaiters--;
1115     }
1116     MUTEX_EXIT(&conn->conn_data_lock);
1117
1118     for (;;) {
1119         for (i = 0; i < RX_MAXCALLS; i++) {
1120             call = conn->call[i];
1121             if (call) {
1122                 MUTEX_ENTER(&call->lock);
1123                 if (call->state == RX_STATE_DALLY) {
1124                     rxi_ResetCall(call, 0);
1125                     (*call->callNumber)++;
1126                     break;
1127                 }
1128                 MUTEX_EXIT(&call->lock);
1129             } else {
1130                 call = rxi_NewCall(conn, i);
1131                 break;
1132             }
1133         }
1134         if (i < RX_MAXCALLS) {
1135             break;
1136         }
1137         MUTEX_ENTER(&conn->conn_data_lock);
1138         conn->flags |= RX_CONN_MAKECALL_WAITING;
1139         conn->makeCallWaiters++;
1140         MUTEX_EXIT(&conn->conn_data_lock);
1141
1142 #ifdef  RX_ENABLE_LOCKS
1143         CV_WAIT(&conn->conn_call_cv, &conn->conn_call_lock);
1144 #else
1145         osi_rxSleep(conn);
1146 #endif
1147         MUTEX_ENTER(&conn->conn_data_lock);
1148         conn->makeCallWaiters--;
1149         MUTEX_EXIT(&conn->conn_data_lock);
1150     }
1151     /*
1152      * Wake up anyone else who might be giving us a chance to
1153      * run (see code above that avoids resource starvation).
1154      */
1155 #ifdef  RX_ENABLE_LOCKS
1156     CV_BROADCAST(&conn->conn_call_cv);
1157 #else
1158     osi_rxWakeup(conn);
1159 #endif
1160
1161     CALL_HOLD(call, RX_CALL_REFCOUNT_BEGIN);
1162
1163     /* Client is initially in send mode */
1164     call->state = RX_STATE_ACTIVE;
1165     call->error = conn->error;
1166     if (call->error)
1167         call->mode = RX_MODE_ERROR;
1168     else
1169         call->mode = RX_MODE_SENDING;
1170
1171     /* remember start time for call in case we have hard dead time limit */
1172     call->queueTime = queueTime;
1173     clock_GetTime(&call->startTime);
1174     hzero(call->bytesSent);
1175     hzero(call->bytesRcvd);
1176
1177     /* Turn on busy protocol. */
1178     rxi_KeepAliveOn(call);
1179
1180     MUTEX_EXIT(&call->lock);
1181     MUTEX_EXIT(&conn->conn_call_lock);
1182     USERPRI;
1183
1184 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
1185     /* Now, if TQ wasn't cleared earlier, do it now. */
1186     MUTEX_ENTER(&call->lock);
1187     rxi_WaitforTQBusy(call);
1188     if (call->flags & RX_CALL_TQ_CLEARME) {
1189         rxi_ClearTransmitQueue(call, 0);
1190         queue_Init(&call->tq);
1191     }
1192     MUTEX_EXIT(&call->lock);
1193 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
1194
1195     dpf(("rx_NewCall(call %x)\n", call));
1196     return call;
1197 }
1198
1199 int
1200 rxi_HasActiveCalls(register struct rx_connection *aconn)
1201 {
1202     register int i;
1203     register struct rx_call *tcall;
1204     SPLVAR;
1205
1206     NETPRI;
1207     for (i = 0; i < RX_MAXCALLS; i++) {
1208         if ((tcall = aconn->call[i])) {
1209             if ((tcall->state == RX_STATE_ACTIVE)
1210                 || (tcall->state == RX_STATE_PRECALL)) {
1211                 USERPRI;
1212                 return 1;
1213             }
1214         }
1215     }
1216     USERPRI;
1217     return 0;
1218 }
1219
1220 int
1221 rxi_GetCallNumberVector(register struct rx_connection *aconn,
1222                         register afs_int32 * aint32s)
1223 {
1224     register int i;
1225     register struct rx_call *tcall;
1226     SPLVAR;
1227
1228     NETPRI;
1229     for (i = 0; i < RX_MAXCALLS; i++) {
1230         if ((tcall = aconn->call[i]) && (tcall->state == RX_STATE_DALLY))
1231             aint32s[i] = aconn->callNumber[i] + 1;
1232         else
1233             aint32s[i] = aconn->callNumber[i];
1234     }
1235     USERPRI;
1236     return 0;
1237 }
1238
1239 int
1240 rxi_SetCallNumberVector(register struct rx_connection *aconn,
1241                         register afs_int32 * aint32s)
1242 {
1243     register int i;
1244     register struct rx_call *tcall;
1245     SPLVAR;
1246
1247     NETPRI;
1248     for (i = 0; i < RX_MAXCALLS; i++) {
1249         if ((tcall = aconn->call[i]) && (tcall->state == RX_STATE_DALLY))
1250             aconn->callNumber[i] = aint32s[i] - 1;
1251         else
1252             aconn->callNumber[i] = aint32s[i];
1253     }
1254     USERPRI;
1255     return 0;
1256 }
1257
1258 /* Advertise a new service.  A service is named locally by a UDP port
1259  * number plus a 16-bit service id.  Returns (struct rx_service *) 0
1260  * on a failure.
1261  *
1262      char *serviceName;  Name for identification purposes (e.g. the
1263                          service name might be used for probing for
1264                          statistics) */
1265 struct rx_service *
1266 rx_NewServiceHost(afs_uint32 host, u_short port, u_short serviceId,
1267                   char *serviceName, struct rx_securityClass **securityObjects,
1268                   int nSecurityObjects,
1269                   afs_int32(*serviceProc) (struct rx_call * acall))
1270 {
1271     osi_socket socket = OSI_NULLSOCKET;
1272     register struct rx_service *tservice;
1273     register int i;
1274     SPLVAR;
1275
1276     clock_NewTime();
1277
1278     if (serviceId == 0) {
1279         (osi_Msg
1280          "rx_NewService:  service id for service %s is not non-zero.\n",
1281          serviceName);
1282         return 0;
1283     }
1284     if (port == 0) {
1285         if (rx_port == 0) {
1286             (osi_Msg
1287              "rx_NewService: A non-zero port must be specified on this call if a non-zero port was not provided at Rx initialization (service %s).\n",
1288              serviceName);
1289             return 0;
1290         }
1291         port = rx_port;
1292         socket = rx_socket;
1293     }
1294
1295     tservice = rxi_AllocService();
1296     NETPRI;
1297     for (i = 0; i < RX_MAX_SERVICES; i++) {
1298         register struct rx_service *service = rx_services[i];
1299         if (service) {
1300             if (port == service->servicePort && host == service->serviceHost) {
1301                 if (service->serviceId == serviceId) {
1302                     /* The identical service has already been
1303                      * installed; if the caller was intending to
1304                      * change the security classes used by this
1305                      * service, he/she loses. */
1306                     (osi_Msg
1307                      "rx_NewService: tried to install service %s with service id %d, which is already in use for service %s\n",
1308                      serviceName, serviceId, service->serviceName);
1309                     USERPRI;
1310                     rxi_FreeService(tservice);
1311                     return service;
1312                 }
1313                 /* Different service, same port: re-use the socket
1314                  * which is bound to the same port */
1315                 socket = service->socket;
1316             }
1317         } else {
1318             if (socket == OSI_NULLSOCKET) {
1319                 /* If we don't already have a socket (from another
1320                  * service on same port) get a new one */
1321                 socket = rxi_GetHostUDPSocket(host, port);
1322                 if (socket == OSI_NULLSOCKET) {
1323                     USERPRI;
1324                     rxi_FreeService(tservice);
1325                     return 0;
1326                 }
1327             }
1328             service = tservice;
1329             service->socket = socket;
1330             service->serviceHost = host;
1331             service->servicePort = port;
1332             service->serviceId = serviceId;
1333             service->serviceName = serviceName;
1334             service->nSecurityObjects = nSecurityObjects;
1335             service->securityObjects = securityObjects;
1336             service->minProcs = 0;
1337             service->maxProcs = 1;
1338             service->idleDeadTime = 60;
1339             service->idleDeadErr = 0;
1340             service->connDeadTime = rx_connDeadTime;
1341             service->executeRequestProc = serviceProc;
1342             service->checkReach = 0;
1343             rx_services[i] = service;   /* not visible until now */
1344             USERPRI;
1345             return service;
1346         }
1347     }
1348     USERPRI;
1349     rxi_FreeService(tservice);
1350     (osi_Msg "rx_NewService: cannot support > %d services\n",
1351      RX_MAX_SERVICES);
1352     return 0;
1353 }
1354
1355 /* Set configuration options for all of a service's security objects */
1356
1357 afs_int32
1358 rx_SetSecurityConfiguration(struct rx_service *service,
1359                             rx_securityConfigVariables type,
1360                             void *value)
1361 {
1362     int i;
1363     for (i = 0; i<service->nSecurityObjects; i++) {
1364         if (service->securityObjects[i]) {
1365             RXS_SetConfiguration(service->securityObjects[i], NULL, type,
1366                                  value, NULL);
1367         }
1368     }
1369     return 0;
1370 }
1371
1372 struct rx_service *
1373 rx_NewService(u_short port, u_short serviceId, char *serviceName,
1374               struct rx_securityClass **securityObjects, int nSecurityObjects,
1375               afs_int32(*serviceProc) (struct rx_call * acall))
1376 {
1377     return rx_NewServiceHost(htonl(INADDR_ANY), port, serviceId, serviceName, securityObjects, nSecurityObjects, serviceProc);
1378 }
1379
1380 /* Generic request processing loop. This routine should be called
1381  * by the implementation dependent rx_ServerProc. If socketp is
1382  * non-null, it will be set to the file descriptor that this thread
1383  * is now listening on. If socketp is null, this routine will never
1384  * returns. */
1385 void
1386 rxi_ServerProc(int threadID, struct rx_call *newcall, osi_socket * socketp)
1387 {
1388     register struct rx_call *call;
1389     register afs_int32 code;
1390     register struct rx_service *tservice = NULL;
1391
1392     for (;;) {
1393         if (newcall) {
1394             call = newcall;
1395             newcall = NULL;
1396         } else {
1397             call = rx_GetCall(threadID, tservice, socketp);
1398             if (socketp && *socketp != OSI_NULLSOCKET) {
1399                 /* We are now a listener thread */
1400                 return;
1401             }
1402         }
1403
1404         /* if server is restarting( typically smooth shutdown) then do not
1405          * allow any new calls.
1406          */
1407
1408         if (rx_tranquil && (call != NULL)) {
1409             SPLVAR;
1410
1411             NETPRI;
1412             MUTEX_ENTER(&call->lock);
1413
1414             rxi_CallError(call, RX_RESTARTING);
1415             rxi_SendCallAbort(call, (struct rx_packet *)0, 0, 0);
1416
1417             MUTEX_EXIT(&call->lock);
1418             USERPRI;
1419         }
1420 #ifdef  KERNEL
1421         if (afs_termState == AFSOP_STOP_RXCALLBACK) {
1422 #ifdef RX_ENABLE_LOCKS
1423             AFS_GLOCK();
1424 #endif /* RX_ENABLE_LOCKS */
1425             afs_termState = AFSOP_STOP_AFS;
1426             afs_osi_Wakeup(&afs_termState);
1427 #ifdef RX_ENABLE_LOCKS
1428             AFS_GUNLOCK();
1429 #endif /* RX_ENABLE_LOCKS */
1430             return;
1431         }
1432 #endif
1433
1434         tservice = call->conn->service;
1435
1436         if (tservice->beforeProc)
1437             (*tservice->beforeProc) (call);
1438
1439         code = call->conn->service->executeRequestProc(call);
1440
1441         if (tservice->afterProc)
1442             (*tservice->afterProc) (call, code);
1443
1444         rx_EndCall(call, code);
1445         MUTEX_ENTER(&rx_stats_mutex);
1446         rxi_nCalls++;
1447         MUTEX_EXIT(&rx_stats_mutex);
1448     }
1449 }
1450
1451
1452 void
1453 rx_WakeupServerProcs(void)
1454 {
1455     struct rx_serverQueueEntry *np, *tqp;
1456     SPLVAR;
1457
1458     NETPRI;
1459     MUTEX_ENTER(&rx_serverPool_lock);
1460
1461 #ifdef RX_ENABLE_LOCKS
1462     if (rx_waitForPacket)
1463         CV_BROADCAST(&rx_waitForPacket->cv);
1464 #else /* RX_ENABLE_LOCKS */
1465     if (rx_waitForPacket)
1466         osi_rxWakeup(rx_waitForPacket);
1467 #endif /* RX_ENABLE_LOCKS */
1468     MUTEX_ENTER(&freeSQEList_lock);
1469     for (np = rx_FreeSQEList; np; np = tqp) {
1470         tqp = *(struct rx_serverQueueEntry **)np;
1471 #ifdef RX_ENABLE_LOCKS
1472         CV_BROADCAST(&np->cv);
1473 #else /* RX_ENABLE_LOCKS */
1474         osi_rxWakeup(np);
1475 #endif /* RX_ENABLE_LOCKS */
1476     }
1477     MUTEX_EXIT(&freeSQEList_lock);
1478     for (queue_Scan(&rx_idleServerQueue, np, tqp, rx_serverQueueEntry)) {
1479 #ifdef RX_ENABLE_LOCKS
1480         CV_BROADCAST(&np->cv);
1481 #else /* RX_ENABLE_LOCKS */
1482         osi_rxWakeup(np);
1483 #endif /* RX_ENABLE_LOCKS */
1484     }
1485     MUTEX_EXIT(&rx_serverPool_lock);
1486     USERPRI;
1487 }
1488
1489 /* meltdown:
1490  * One thing that seems to happen is that all the server threads get
1491  * tied up on some empty or slow call, and then a whole bunch of calls
1492  * arrive at once, using up the packet pool, so now there are more
1493  * empty calls.  The most critical resources here are server threads
1494  * and the free packet pool.  The "doreclaim" code seems to help in
1495  * general.  I think that eventually we arrive in this state: there
1496  * are lots of pending calls which do have all their packets present,
1497  * so they won't be reclaimed, are multi-packet calls, so they won't
1498  * be scheduled until later, and thus are tying up most of the free
1499  * packet pool for a very long time.
1500  * future options:
1501  * 1.  schedule multi-packet calls if all the packets are present.
1502  * Probably CPU-bound operation, useful to return packets to pool.
1503  * Do what if there is a full window, but the last packet isn't here?
1504  * 3.  preserve one thread which *only* runs "best" calls, otherwise
1505  * it sleeps and waits for that type of call.
1506  * 4.  Don't necessarily reserve a whole window for each thread.  In fact,
1507  * the current dataquota business is badly broken.  The quota isn't adjusted
1508  * to reflect how many packets are presently queued for a running call.
1509  * So, when we schedule a queued call with a full window of packets queued
1510  * up for it, that *should* free up a window full of packets for other 2d-class
1511  * calls to be able to use from the packet pool.  But it doesn't.
1512  *
1513  * NB.  Most of the time, this code doesn't run -- since idle server threads
1514  * sit on the idle server queue and are assigned by "...ReceivePacket" as soon
1515  * as a new call arrives.
1516  */
1517 /* Sleep until a call arrives.  Returns a pointer to the call, ready
1518  * for an rx_Read. */
1519 #ifdef RX_ENABLE_LOCKS
1520 struct rx_call *
1521 rx_GetCall(int tno, struct rx_service *cur_service, osi_socket * socketp)
1522 {
1523     struct rx_serverQueueEntry *sq;
1524     register struct rx_call *call = (struct rx_call *)0;
1525     struct rx_service *service = NULL;
1526     SPLVAR;
1527
1528     MUTEX_ENTER(&freeSQEList_lock);
1529
1530     if ((sq = rx_FreeSQEList)) {
1531         rx_FreeSQEList = *(struct rx_serverQueueEntry **)sq;
1532         MUTEX_EXIT(&freeSQEList_lock);
1533     } else {                    /* otherwise allocate a new one and return that */
1534         MUTEX_EXIT(&freeSQEList_lock);
1535         sq = (struct rx_serverQueueEntry *)
1536             rxi_Alloc(sizeof(struct rx_serverQueueEntry));
1537         MUTEX_INIT(&sq->lock, "server Queue lock", MUTEX_DEFAULT, 0);
1538         CV_INIT(&sq->cv, "server Queue lock", CV_DEFAULT, 0);
1539     }
1540
1541     MUTEX_ENTER(&rx_serverPool_lock);
1542     if (cur_service != NULL) {
1543         ReturnToServerPool(cur_service);
1544     }
1545     while (1) {
1546         if (queue_IsNotEmpty(&rx_incomingCallQueue)) {
1547             register struct rx_call *tcall, *ncall, *choice2 = NULL;
1548
1549             /* Scan for eligible incoming calls.  A call is not eligible
1550              * if the maximum number of calls for its service type are
1551              * already executing */
1552             /* One thread will process calls FCFS (to prevent starvation),
1553              * while the other threads may run ahead looking for calls which
1554              * have all their input data available immediately.  This helps
1555              * keep threads from blocking, waiting for data from the client. */
1556             for (queue_Scan(&rx_incomingCallQueue, tcall, ncall, rx_call)) {
1557                 service = tcall->conn->service;
1558                 if (!QuotaOK(service)) {
1559                     continue;
1560                 }
1561                 if (tno == rxi_fcfs_thread_num
1562                     || !tcall->queue_item_header.next) {
1563                     /* If we're the fcfs thread , then  we'll just use
1564                      * this call. If we haven't been able to find an optimal
1565                      * choice, and we're at the end of the list, then use a
1566                      * 2d choice if one has been identified.  Otherwise... */
1567                     call = (choice2 ? choice2 : tcall);
1568                     service = call->conn->service;
1569                 } else if (!queue_IsEmpty(&tcall->rq)) {
1570                     struct rx_packet *rp;
1571                     rp = queue_First(&tcall->rq, rx_packet);
1572                     if (rp->header.seq == 1) {
1573                         if (!meltdown_1pkt
1574                             || (rp->header.flags & RX_LAST_PACKET)) {
1575                             call = tcall;
1576                         } else if (rxi_2dchoice && !choice2
1577                                    && !(tcall->flags & RX_CALL_CLEARED)
1578                                    && (tcall->rprev > rxi_HardAckRate)) {
1579                             choice2 = tcall;
1580                         } else
1581                             rxi_md2cnt++;
1582                     }
1583                 }
1584                 if (call) {
1585                     break;
1586                 } else {
1587                     ReturnToServerPool(service);
1588                 }
1589             }
1590         }
1591
1592         if (call) {
1593             queue_Remove(call);
1594             MUTEX_EXIT(&rx_serverPool_lock);
1595             MUTEX_ENTER(&call->lock);
1596
1597             if (call->flags & RX_CALL_WAIT_PROC) {
1598                 call->flags &= ~RX_CALL_WAIT_PROC;
1599                 MUTEX_ENTER(&rx_stats_mutex);
1600                 rx_nWaiting--;
1601                 MUTEX_EXIT(&rx_stats_mutex);
1602             }
1603
1604             if (call->state != RX_STATE_PRECALL || call->error) {
1605                 MUTEX_EXIT(&call->lock);
1606                 MUTEX_ENTER(&rx_serverPool_lock);
1607                 ReturnToServerPool(service);
1608                 call = NULL;
1609                 continue;
1610             }
1611
1612             if (queue_IsEmpty(&call->rq)
1613                 || queue_First(&call->rq, rx_packet)->header.seq != 1)
1614                 rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
1615
1616             CLEAR_CALL_QUEUE_LOCK(call);
1617             break;
1618         } else {
1619             /* If there are no eligible incoming calls, add this process
1620              * to the idle server queue, to wait for one */
1621             sq->newcall = 0;
1622             sq->tno = tno;
1623             if (socketp) {
1624                 *socketp = OSI_NULLSOCKET;
1625             }
1626             sq->socketp = socketp;
1627             queue_Append(&rx_idleServerQueue, sq);
1628 #ifndef AFS_AIX41_ENV
1629             rx_waitForPacket = sq;
1630 #else
1631             rx_waitingForPacket = sq;
1632 #endif /* AFS_AIX41_ENV */
1633             do {
1634                 CV_WAIT(&sq->cv, &rx_serverPool_lock);
1635 #ifdef  KERNEL
1636                 if (afs_termState == AFSOP_STOP_RXCALLBACK) {
1637                     MUTEX_EXIT(&rx_serverPool_lock);
1638                     return (struct rx_call *)0;
1639                 }
1640 #endif
1641             } while (!(call = sq->newcall)
1642                      && !(socketp && *socketp != OSI_NULLSOCKET));
1643             MUTEX_EXIT(&rx_serverPool_lock);
1644             if (call) {
1645                 MUTEX_ENTER(&call->lock);
1646             }
1647             break;
1648         }
1649     }
1650
1651     MUTEX_ENTER(&freeSQEList_lock);
1652     *(struct rx_serverQueueEntry **)sq = rx_FreeSQEList;
1653     rx_FreeSQEList = sq;
1654     MUTEX_EXIT(&freeSQEList_lock);
1655
1656     if (call) {
1657         clock_GetTime(&call->startTime);
1658         call->state = RX_STATE_ACTIVE;
1659         call->mode = RX_MODE_RECEIVING;
1660 #ifdef RX_KERNEL_TRACE
1661         if (ICL_SETACTIVE(afs_iclSetp)) {
1662             int glockOwner = ISAFS_GLOCK();
1663             if (!glockOwner)
1664                 AFS_GLOCK();
1665             afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
1666                        __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER,
1667                        call);
1668             if (!glockOwner)
1669                 AFS_GUNLOCK();
1670         }
1671 #endif
1672
1673         rxi_calltrace(RX_CALL_START, call);
1674         dpf(("rx_GetCall(port=%d, service=%d) ==> call %x\n",
1675              call->conn->service->servicePort, call->conn->service->serviceId,
1676              call));
1677
1678         CALL_HOLD(call, RX_CALL_REFCOUNT_BEGIN);
1679         MUTEX_EXIT(&call->lock);
1680     } else {
1681         dpf(("rx_GetCall(socketp=0x%x, *socketp=0x%x)\n", socketp, *socketp));
1682     }
1683
1684     return call;
1685 }
1686 #else /* RX_ENABLE_LOCKS */
1687 struct rx_call *
1688 rx_GetCall(int tno, struct rx_service *cur_service, osi_socket * socketp)
1689 {
1690     struct rx_serverQueueEntry *sq;
1691     register struct rx_call *call = (struct rx_call *)0, *choice2;
1692     struct rx_service *service = NULL;
1693     SPLVAR;
1694
1695     NETPRI;
1696     MUTEX_ENTER(&freeSQEList_lock);
1697
1698     if ((sq = rx_FreeSQEList)) {
1699         rx_FreeSQEList = *(struct rx_serverQueueEntry **)sq;
1700         MUTEX_EXIT(&freeSQEList_lock);
1701     } else {                    /* otherwise allocate a new one and return that */
1702         MUTEX_EXIT(&freeSQEList_lock);
1703         sq = (struct rx_serverQueueEntry *)
1704             rxi_Alloc(sizeof(struct rx_serverQueueEntry));
1705         MUTEX_INIT(&sq->lock, "server Queue lock", MUTEX_DEFAULT, 0);
1706         CV_INIT(&sq->cv, "server Queue lock", CV_DEFAULT, 0);
1707     }
1708     MUTEX_ENTER(&sq->lock);
1709
1710     if (cur_service != NULL) {
1711         cur_service->nRequestsRunning--;
1712         if (cur_service->nRequestsRunning < cur_service->minProcs)
1713             rxi_minDeficit++;
1714         rxi_availProcs++;
1715     }
1716     if (queue_IsNotEmpty(&rx_incomingCallQueue)) {
1717         register struct rx_call *tcall, *ncall;
1718         /* Scan for eligible incoming calls.  A call is not eligible
1719          * if the maximum number of calls for its service type are
1720          * already executing */
1721         /* One thread will process calls FCFS (to prevent starvation),
1722          * while the other threads may run ahead looking for calls which
1723          * have all their input data available immediately.  This helps
1724          * keep threads from blocking, waiting for data from the client. */
1725         choice2 = (struct rx_call *)0;
1726         for (queue_Scan(&rx_incomingCallQueue, tcall, ncall, rx_call)) {
1727             service = tcall->conn->service;
1728             if (QuotaOK(service)) {
1729                 if (tno == rxi_fcfs_thread_num
1730                     || !tcall->queue_item_header.next) {
1731                     /* If we're the fcfs thread, then  we'll just use
1732                      * this call. If we haven't been able to find an optimal
1733                      * choice, and we're at the end of the list, then use a
1734                      * 2d choice if one has been identified.  Otherwise... */
1735                     call = (choice2 ? choice2 : tcall);
1736                     service = call->conn->service;
1737                 } else if (!queue_IsEmpty(&tcall->rq)) {
1738                     struct rx_packet *rp;
1739                     rp = queue_First(&tcall->rq, rx_packet);
1740                     if (rp->header.seq == 1
1741                         && (!meltdown_1pkt
1742                             || (rp->header.flags & RX_LAST_PACKET))) {
1743                         call = tcall;
1744                     } else if (rxi_2dchoice && !choice2
1745                                && !(tcall->flags & RX_CALL_CLEARED)
1746                                && (tcall->rprev > rxi_HardAckRate)) {
1747                         choice2 = tcall;
1748                     } else
1749                         rxi_md2cnt++;
1750                 }
1751             }
1752             if (call)
1753                 break;
1754         }
1755     }
1756
1757     if (call) {
1758         queue_Remove(call);
1759         /* we can't schedule a call if there's no data!!! */
1760         /* send an ack if there's no data, if we're missing the
1761          * first packet, or we're missing something between first
1762          * and last -- there's a "hole" in the incoming data. */
1763         if (queue_IsEmpty(&call->rq)
1764             || queue_First(&call->rq, rx_packet)->header.seq != 1
1765             || call->rprev != queue_Last(&call->rq, rx_packet)->header.seq)
1766             rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
1767
1768         call->flags &= (~RX_CALL_WAIT_PROC);
1769         service->nRequestsRunning++;
1770         /* just started call in minProcs pool, need fewer to maintain
1771          * guarantee */
1772         if (service->nRequestsRunning <= service->minProcs)
1773             rxi_minDeficit--;
1774         rxi_availProcs--;
1775         rx_nWaiting--;
1776         /* MUTEX_EXIT(&call->lock); */
1777     } else {
1778         /* If there are no eligible incoming calls, add this process
1779          * to the idle server queue, to wait for one */
1780         sq->newcall = 0;
1781         if (socketp) {
1782             *socketp = OSI_NULLSOCKET;
1783         }
1784         sq->socketp = socketp;
1785         queue_Append(&rx_idleServerQueue, sq);
1786         do {
1787             osi_rxSleep(sq);
1788 #ifdef  KERNEL
1789             if (afs_termState == AFSOP_STOP_RXCALLBACK) {
1790                 USERPRI;
1791                 rxi_Free(sq, sizeof(struct rx_serverQueueEntry));
1792                 return (struct rx_call *)0;
1793             }
1794 #endif
1795         } while (!(call = sq->newcall)
1796                  && !(socketp && *socketp != OSI_NULLSOCKET));
1797     }
1798     MUTEX_EXIT(&sq->lock);
1799
1800     MUTEX_ENTER(&freeSQEList_lock);
1801     *(struct rx_serverQueueEntry **)sq = rx_FreeSQEList;
1802     rx_FreeSQEList = sq;
1803     MUTEX_EXIT(&freeSQEList_lock);
1804
1805     if (call) {
1806         clock_GetTime(&call->startTime);
1807         call->state = RX_STATE_ACTIVE;
1808         call->mode = RX_MODE_RECEIVING;
1809 #ifdef RX_KERNEL_TRACE
1810         if (ICL_SETACTIVE(afs_iclSetp)) {
1811             int glockOwner = ISAFS_GLOCK();
1812             if (!glockOwner)
1813                 AFS_GLOCK();
1814             afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
1815                        __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER,
1816                        call);
1817             if (!glockOwner)
1818                 AFS_GUNLOCK();
1819         }
1820 #endif
1821
1822         rxi_calltrace(RX_CALL_START, call);
1823         dpf(("rx_GetCall(port=%d, service=%d) ==> call %x\n",
1824              call->conn->service->servicePort, call->conn->service->serviceId,
1825              call));
1826     } else {
1827         dpf(("rx_GetCall(socketp=0x%x, *socketp=0x%x)\n", socketp, *socketp));
1828     }
1829
1830     USERPRI;
1831
1832     return call;
1833 }
1834 #endif /* RX_ENABLE_LOCKS */
1835
1836
1837
1838 /* Establish a procedure to be called when a packet arrives for a
1839  * call.  This routine will be called at most once after each call,
1840  * and will also be called if there is an error condition on the or
1841  * the call is complete.  Used by multi rx to build a selection
1842  * function which determines which of several calls is likely to be a
1843  * good one to read from.
1844  * NOTE: the way this is currently implemented it is probably only a
1845  * good idea to (1) use it immediately after a newcall (clients only)
1846  * and (2) only use it once.  Other uses currently void your warranty
1847  */
1848 void
1849 rx_SetArrivalProc(register struct rx_call *call,
1850                   register void (*proc) (register struct rx_call * call,
1851                                         register void * mh,
1852                                         register int index),
1853                   register void * handle, register int arg)
1854 {
1855     call->arrivalProc = proc;
1856     call->arrivalProcHandle = handle;
1857     call->arrivalProcArg = arg;
1858 }
1859
1860 /* Call is finished (possibly prematurely).  Return rc to the peer, if
1861  * appropriate, and return the final error code from the conversation
1862  * to the caller */
1863
1864 afs_int32
1865 rx_EndCall(register struct rx_call *call, afs_int32 rc)
1866 {
1867     register struct rx_connection *conn = call->conn;
1868     register struct rx_service *service;
1869     afs_int32 error;
1870     SPLVAR;
1871
1872
1873
1874     dpf(("rx_EndCall(call %x rc %d error %d abortCode %d)\n", call, rc, call->error, call->abortCode));
1875
1876     NETPRI;
1877     MUTEX_ENTER(&call->lock);
1878
1879     if (rc == 0 && call->error == 0) {
1880         call->abortCode = 0;
1881         call->abortCount = 0;
1882     }
1883
1884     call->arrivalProc = (void (*)())0;
1885     if (rc && call->error == 0) {
1886         rxi_CallError(call, rc);
1887         /* Send an abort message to the peer if this error code has
1888          * only just been set.  If it was set previously, assume the
1889          * peer has already been sent the error code or will request it
1890          */
1891         rxi_SendCallAbort(call, (struct rx_packet *)0, 0, 0);
1892     }
1893     if (conn->type == RX_SERVER_CONNECTION) {
1894         /* Make sure reply or at least dummy reply is sent */
1895         if (call->mode == RX_MODE_RECEIVING) {
1896             rxi_WriteProc(call, 0, 0);
1897         }
1898         if (call->mode == RX_MODE_SENDING) {
1899             rxi_FlushWrite(call);
1900         }
1901         service = conn->service;
1902         rxi_calltrace(RX_CALL_END, call);
1903         /* Call goes to hold state until reply packets are acknowledged */
1904         if (call->tfirst + call->nSoftAcked < call->tnext) {
1905             call->state = RX_STATE_HOLD;
1906         } else {
1907             call->state = RX_STATE_DALLY;
1908             rxi_ClearTransmitQueue(call, 0);
1909             rxevent_Cancel(call->resendEvent, call, RX_CALL_REFCOUNT_RESEND);
1910             rxevent_Cancel(call->keepAliveEvent, call,
1911                            RX_CALL_REFCOUNT_ALIVE);
1912         }
1913     } else {                    /* Client connection */
1914         char dummy;
1915         /* Make sure server receives input packets, in the case where
1916          * no reply arguments are expected */
1917         if ((call->mode == RX_MODE_SENDING)
1918             || (call->mode == RX_MODE_RECEIVING && call->rnext == 1)) {
1919             (void)rxi_ReadProc(call, &dummy, 1);
1920         }
1921
1922         /* If we had an outstanding delayed ack, be nice to the server
1923          * and force-send it now.
1924          */
1925         if (call->delayedAckEvent) {
1926             rxevent_Cancel(call->delayedAckEvent, call,
1927                            RX_CALL_REFCOUNT_DELAY);
1928             call->delayedAckEvent = NULL;
1929             rxi_SendDelayedAck(NULL, call, NULL);
1930         }
1931
1932         /* We need to release the call lock since it's lower than the
1933          * conn_call_lock and we don't want to hold the conn_call_lock
1934          * over the rx_ReadProc call. The conn_call_lock needs to be held
1935          * here for the case where rx_NewCall is perusing the calls on
1936          * the connection structure. We don't want to signal until
1937          * rx_NewCall is in a stable state. Otherwise, rx_NewCall may
1938          * have checked this call, found it active and by the time it
1939          * goes to sleep, will have missed the signal.
1940          *
1941          * Do not clear the RX_CONN_MAKECALL_WAITING flag as long as
1942          * there are threads waiting to use the conn object.
1943          */
1944         MUTEX_EXIT(&call->lock);
1945         MUTEX_ENTER(&conn->conn_call_lock);
1946         MUTEX_ENTER(&call->lock);
1947         MUTEX_ENTER(&conn->conn_data_lock);
1948         conn->flags |= RX_CONN_BUSY;
1949         if (conn->flags & RX_CONN_MAKECALL_WAITING) {
1950             if (conn->makeCallWaiters == 0)
1951                 conn->flags &= (~RX_CONN_MAKECALL_WAITING);
1952             MUTEX_EXIT(&conn->conn_data_lock);
1953 #ifdef  RX_ENABLE_LOCKS
1954             CV_BROADCAST(&conn->conn_call_cv);
1955 #else
1956             osi_rxWakeup(conn);
1957 #endif
1958         }
1959 #ifdef RX_ENABLE_LOCKS
1960         else {
1961             MUTEX_EXIT(&conn->conn_data_lock);
1962         }
1963 #endif /* RX_ENABLE_LOCKS */
1964         call->state = RX_STATE_DALLY;
1965     }
1966     error = call->error;
1967
1968     /* currentPacket, nLeft, and NFree must be zeroed here, because
1969      * ResetCall cannot: ResetCall may be called at splnet(), in the
1970      * kernel version, and may interrupt the macros rx_Read or
1971      * rx_Write, which run at normal priority for efficiency. */
1972     if (call->currentPacket) {
1973         queue_Prepend(&call->iovq, call->currentPacket);
1974         call->currentPacket = (struct rx_packet *)0;
1975     }
1976
1977     call->nLeft = call->nFree = call->curlen = 0;
1978
1979     /* Free any packets from the last call to ReadvProc/WritevProc */
1980     rxi_FreePackets(0, &call->iovq);
1981
1982     CALL_RELE(call, RX_CALL_REFCOUNT_BEGIN);
1983     MUTEX_EXIT(&call->lock);
1984     if (conn->type == RX_CLIENT_CONNECTION) {
1985         MUTEX_EXIT(&conn->conn_call_lock);
1986         conn->flags &= ~RX_CONN_BUSY;
1987     }
1988     USERPRI;
1989     /*
1990      * Map errors to the local host's errno.h format.
1991      */
1992     error = ntoh_syserr_conv(error);
1993     return error;
1994 }
1995
1996 #if !defined(KERNEL)
1997
1998 /* Call this routine when shutting down a server or client (especially
1999  * clients).  This will allow Rx to gracefully garbage collect server
2000  * connections, and reduce the number of retries that a server might
2001  * make to a dead client.
2002  * This is not quite right, since some calls may still be ongoing and
2003  * we can't lock them to destroy them. */
2004 void
2005 rx_Finalize(void)
2006 {
2007     register struct rx_connection **conn_ptr, **conn_end;
2008
2009     INIT_PTHREAD_LOCKS;
2010     LOCK_RX_INIT;
2011     if (rxinit_status == 1) {
2012         UNLOCK_RX_INIT;
2013         return;                 /* Already shutdown. */
2014     }
2015     rxi_DeleteCachedConnections();
2016     if (rx_connHashTable) {
2017         MUTEX_ENTER(&rx_connHashTable_lock);
2018         for (conn_ptr = &rx_connHashTable[0], conn_end =
2019              &rx_connHashTable[rx_hashTableSize]; conn_ptr < conn_end;
2020              conn_ptr++) {
2021             struct rx_connection *conn, *next;
2022             for (conn = *conn_ptr; conn; conn = next) {
2023                 next = conn->next;
2024                 if (conn->type == RX_CLIENT_CONNECTION) {
2025                     /* MUTEX_ENTER(&conn->conn_data_lock); when used in kernel */
2026                     conn->refCount++;
2027                     /* MUTEX_EXIT(&conn->conn_data_lock); when used in kernel */
2028 #ifdef RX_ENABLE_LOCKS
2029                     rxi_DestroyConnectionNoLock(conn);
2030 #else /* RX_ENABLE_LOCKS */
2031                     rxi_DestroyConnection(conn);
2032 #endif /* RX_ENABLE_LOCKS */
2033                 }
2034             }
2035         }
2036 #ifdef RX_ENABLE_LOCKS
2037         while (rx_connCleanup_list) {
2038             struct rx_connection *conn;
2039             conn = rx_connCleanup_list;
2040             rx_connCleanup_list = rx_connCleanup_list->next;
2041             MUTEX_EXIT(&rx_connHashTable_lock);
2042             rxi_CleanupConnection(conn);
2043             MUTEX_ENTER(&rx_connHashTable_lock);
2044         }
2045         MUTEX_EXIT(&rx_connHashTable_lock);
2046 #endif /* RX_ENABLE_LOCKS */
2047     }
2048     rxi_flushtrace();
2049
2050 #ifdef AFS_NT40_ENV
2051     afs_winsockCleanup();
2052 #endif
2053
2054     rxinit_status = 1;
2055     UNLOCK_RX_INIT;
2056 }
2057 #endif
2058
2059 /* if we wakeup packet waiter too often, can get in loop with two
2060     AllocSendPackets each waking each other up (from ReclaimPacket calls) */
2061 void
2062 rxi_PacketsUnWait(void)
2063 {
2064     if (!rx_waitingForPackets) {
2065         return;
2066     }
2067 #ifdef KERNEL
2068     if (rxi_OverQuota(RX_PACKET_CLASS_SEND)) {
2069         return;                 /* still over quota */
2070     }
2071 #endif /* KERNEL */
2072     rx_waitingForPackets = 0;
2073 #ifdef  RX_ENABLE_LOCKS
2074     CV_BROADCAST(&rx_waitingForPackets_cv);
2075 #else
2076     osi_rxWakeup(&rx_waitingForPackets);
2077 #endif
2078     return;
2079 }
2080
2081
2082 /* ------------------Internal interfaces------------------------- */
2083
2084 /* Return this process's service structure for the
2085  * specified socket and service */
2086 struct rx_service *
2087 rxi_FindService(register osi_socket socket, register u_short serviceId)
2088 {
2089     register struct rx_service **sp;
2090     for (sp = &rx_services[0]; *sp; sp++) {
2091         if ((*sp)->serviceId == serviceId && (*sp)->socket == socket)
2092             return *sp;
2093     }
2094     return 0;
2095 }
2096
2097 /* Allocate a call structure, for the indicated channel of the
2098  * supplied connection.  The mode and state of the call must be set by
2099  * the caller. Returns the call with mutex locked. */
2100 struct rx_call *
2101 rxi_NewCall(register struct rx_connection *conn, register int channel)
2102 {
2103     register struct rx_call *call;
2104 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
2105     register struct rx_call *cp;        /* Call pointer temp */
2106     register struct rx_call *nxp;       /* Next call pointer, for queue_Scan */
2107 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2108
2109     dpf(("rxi_NewCall(conn %x, channel %d)\n", conn, channel));
2110
2111     /* Grab an existing call structure, or allocate a new one.
2112      * Existing call structures are assumed to have been left reset by
2113      * rxi_FreeCall */
2114     MUTEX_ENTER(&rx_freeCallQueue_lock);
2115
2116 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
2117     /*
2118      * EXCEPT that the TQ might not yet be cleared out.
2119      * Skip over those with in-use TQs.
2120      */
2121     call = NULL;
2122     for (queue_Scan(&rx_freeCallQueue, cp, nxp, rx_call)) {
2123         if (!(cp->flags & RX_CALL_TQ_BUSY)) {
2124             call = cp;
2125             break;
2126         }
2127     }
2128     if (call) {
2129 #else /* AFS_GLOBAL_RXLOCK_KERNEL */
2130     if (queue_IsNotEmpty(&rx_freeCallQueue)) {
2131         call = queue_First(&rx_freeCallQueue, rx_call);
2132 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2133         queue_Remove(call);
2134         rx_MutexDecrement(rx_stats.nFreeCallStructs, rx_stats_mutex);
2135         MUTEX_EXIT(&rx_freeCallQueue_lock);
2136         MUTEX_ENTER(&call->lock);
2137         CLEAR_CALL_QUEUE_LOCK(call);
2138 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
2139         /* Now, if TQ wasn't cleared earlier, do it now. */
2140         if (call->flags & RX_CALL_TQ_CLEARME) {
2141             rxi_ClearTransmitQueue(call, 0);
2142             queue_Init(&call->tq);
2143         }
2144 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2145         /* Bind the call to its connection structure */
2146         call->conn = conn;
2147         rxi_ResetCall(call, 1);
2148     } else {
2149         call = (struct rx_call *)rxi_Alloc(sizeof(struct rx_call));
2150
2151         MUTEX_EXIT(&rx_freeCallQueue_lock);
2152         MUTEX_INIT(&call->lock, "call lock", MUTEX_DEFAULT, NULL);
2153         MUTEX_ENTER(&call->lock);
2154         CV_INIT(&call->cv_twind, "call twind", CV_DEFAULT, 0);
2155         CV_INIT(&call->cv_rq, "call rq", CV_DEFAULT, 0);
2156         CV_INIT(&call->cv_tq, "call tq", CV_DEFAULT, 0);
2157
2158         rx_MutexIncrement(rx_stats.nFreeCallStructs, rx_stats_mutex);
2159         /* Initialize once-only items */
2160         queue_Init(&call->tq);
2161         queue_Init(&call->rq);
2162         queue_Init(&call->iovq);
2163         /* Bind the call to its connection structure (prereq for reset) */
2164         call->conn = conn;
2165         rxi_ResetCall(call, 1);
2166     }
2167     call->channel = channel;
2168     call->callNumber = &conn->callNumber[channel];
2169     call->rwind = conn->rwind[channel];
2170     call->twind = conn->twind[channel];
2171     /* Note that the next expected call number is retained (in
2172      * conn->callNumber[i]), even if we reallocate the call structure
2173      */
2174     conn->call[channel] = call;
2175     /* if the channel's never been used (== 0), we should start at 1, otherwise
2176      * the call number is valid from the last time this channel was used */
2177     if (*call->callNumber == 0)
2178         *call->callNumber = 1;
2179
2180     return call;
2181 }
2182
2183 /* A call has been inactive long enough that so we can throw away
2184  * state, including the call structure, which is placed on the call
2185  * free list.
2186  * Call is locked upon entry.
2187  * haveCTLock set if called from rxi_ReapConnections
2188  */
2189 #ifdef RX_ENABLE_LOCKS
2190 void
2191 rxi_FreeCall(register struct rx_call *call, int haveCTLock)
2192 #else /* RX_ENABLE_LOCKS */
2193 void
2194 rxi_FreeCall(register struct rx_call *call)
2195 #endif                          /* RX_ENABLE_LOCKS */
2196 {
2197     register int channel = call->channel;
2198     register struct rx_connection *conn = call->conn;
2199
2200
2201     if (call->state == RX_STATE_DALLY || call->state == RX_STATE_HOLD)
2202         (*call->callNumber)++;
2203     rxi_ResetCall(call, 0);
2204     call->conn->call[channel] = (struct rx_call *)0;
2205
2206     MUTEX_ENTER(&rx_freeCallQueue_lock);
2207     SET_CALL_QUEUE_LOCK(call, &rx_freeCallQueue_lock);
2208 #ifdef AFS_GLOBAL_RXLOCK_KERNEL
2209     /* A call may be free even though its transmit queue is still in use.
2210      * Since we search the call list from head to tail, put busy calls at
2211      * the head of the list, and idle calls at the tail.
2212      */
2213     if (call->flags & RX_CALL_TQ_BUSY)
2214         queue_Prepend(&rx_freeCallQueue, call);
2215     else
2216         queue_Append(&rx_freeCallQueue, call);
2217 #else /* AFS_GLOBAL_RXLOCK_KERNEL */
2218     queue_Append(&rx_freeCallQueue, call);
2219 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2220     rx_MutexIncrement(rx_stats.nFreeCallStructs, rx_stats_mutex);
2221     MUTEX_EXIT(&rx_freeCallQueue_lock);
2222
2223     /* Destroy the connection if it was previously slated for
2224      * destruction, i.e. the Rx client code previously called
2225      * rx_DestroyConnection (client connections), or
2226      * rxi_ReapConnections called the same routine (server
2227      * connections).  Only do this, however, if there are no
2228      * outstanding calls. Note that for fine grain locking, there appears
2229      * to be a deadlock in that rxi_FreeCall has a call locked and
2230      * DestroyConnectionNoLock locks each call in the conn. But note a
2231      * few lines up where we have removed this call from the conn.
2232      * If someone else destroys a connection, they either have no
2233      * call lock held or are going through this section of code.
2234      */
2235     if (conn->flags & RX_CONN_DESTROY_ME && !(conn->flags & RX_CONN_MAKECALL_WAITING)) {
2236         MUTEX_ENTER(&conn->conn_data_lock);
2237         conn->refCount++;
2238         MUTEX_EXIT(&conn->conn_data_lock);
2239 #ifdef RX_ENABLE_LOCKS
2240         if (haveCTLock)
2241             rxi_DestroyConnectionNoLock(conn);
2242         else
2243             rxi_DestroyConnection(conn);
2244 #else /* RX_ENABLE_LOCKS */
2245         rxi_DestroyConnection(conn);
2246 #endif /* RX_ENABLE_LOCKS */
2247     }
2248 }
2249
2250 afs_int32 rxi_Alloccnt = 0, rxi_Allocsize = 0;
2251 char *
2252 rxi_Alloc(register size_t size)
2253 {
2254     register char *p;
2255
2256     rx_MutexAdd1Increment2(rxi_Allocsize, (afs_int32)size, rxi_Alloccnt, rx_stats_mutex);
2257     p = (char *)osi_Alloc(size);
2258
2259     if (!p)
2260         osi_Panic("rxi_Alloc error");
2261     memset(p, 0, size);
2262     return p;
2263 }
2264
2265 void
2266 rxi_Free(void *addr, register size_t size)
2267 {
2268     rx_MutexAdd1Decrement2(rxi_Allocsize, -(afs_int32)size, rxi_Alloccnt, rx_stats_mutex);
2269     osi_Free(addr, size);
2270 }
2271
2272 void
2273 rxi_SetPeerMtu(register afs_uint32 host, register afs_uint32 port, int mtu)
2274 {
2275     struct rx_peer **peer_ptr, **peer_end;
2276     int hashIndex;
2277
2278     MUTEX_ENTER(&rx_peerHashTable_lock);
2279     if (port == 0) {
2280        for (peer_ptr = &rx_peerHashTable[0], peer_end =
2281                 &rx_peerHashTable[rx_hashTableSize]; peer_ptr < peer_end;
2282             peer_ptr++) {
2283            struct rx_peer *peer, *next;
2284            for (peer = *peer_ptr; peer; peer = next) {
2285                next = peer->next;
2286                if (host == peer->host) {
2287                    MUTEX_ENTER(&peer->peer_lock);
2288                    peer->ifMTU=MIN(mtu, peer->ifMTU);
2289                    peer->natMTU = rxi_AdjustIfMTU(peer->ifMTU);
2290                    MUTEX_EXIT(&peer->peer_lock);
2291                }
2292            }
2293        }
2294     } else {
2295        struct rx_peer *peer, *next;
2296        hashIndex = PEER_HASH(host, port);
2297        for (peer = rx_peerHashTable[hashIndex]; peer; peer = peer->next) {
2298            if ((peer->host == host) && (peer->port == port)) {
2299                MUTEX_ENTER(&peer->peer_lock);
2300                peer->ifMTU=MIN(mtu, peer->ifMTU);
2301                peer->natMTU = rxi_AdjustIfMTU(peer->ifMTU);
2302                MUTEX_EXIT(&peer->peer_lock);
2303            }
2304        }
2305     }
2306     MUTEX_EXIT(&rx_peerHashTable_lock);
2307 }
2308
2309 /* Find the peer process represented by the supplied (host,port)
2310  * combination.  If there is no appropriate active peer structure, a
2311  * new one will be allocated and initialized
2312  * The origPeer, if set, is a pointer to a peer structure on which the
2313  * refcount will be be decremented. This is used to replace the peer
2314  * structure hanging off a connection structure */
2315 struct rx_peer *
2316 rxi_FindPeer(register afs_uint32 host, register u_short port,
2317              struct rx_peer *origPeer, int create)
2318 {
2319     register struct rx_peer *pp;
2320     int hashIndex;
2321     hashIndex = PEER_HASH(host, port);
2322     MUTEX_ENTER(&rx_peerHashTable_lock);
2323     for (pp = rx_peerHashTable[hashIndex]; pp; pp = pp->next) {
2324         if ((pp->host == host) && (pp->port == port))
2325             break;
2326     }
2327     if (!pp) {
2328         if (create) {
2329             pp = rxi_AllocPeer();       /* This bzero's *pp */
2330             pp->host = host;    /* set here or in InitPeerParams is zero */
2331             pp->port = port;
2332             MUTEX_INIT(&pp->peer_lock, "peer_lock", MUTEX_DEFAULT, 0);
2333             queue_Init(&pp->congestionQueue);
2334             queue_Init(&pp->rpcStats);
2335             pp->next = rx_peerHashTable[hashIndex];
2336             rx_peerHashTable[hashIndex] = pp;
2337             rxi_InitPeerParams(pp);
2338             rx_MutexIncrement(rx_stats.nPeerStructs, rx_stats_mutex);
2339         }
2340     }
2341     if (pp && create) {
2342         pp->refCount++;
2343     }
2344     if (origPeer)
2345         origPeer->refCount--;
2346     MUTEX_EXIT(&rx_peerHashTable_lock);
2347     return pp;
2348 }
2349
2350
2351 /* Find the connection at (host, port) started at epoch, and with the
2352  * given connection id.  Creates the server connection if necessary.
2353  * The type specifies whether a client connection or a server
2354  * connection is desired.  In both cases, (host, port) specify the
2355  * peer's (host, pair) pair.  Client connections are not made
2356  * automatically by this routine.  The parameter socket gives the
2357  * socket descriptor on which the packet was received.  This is used,
2358  * in the case of server connections, to check that *new* connections
2359  * come via a valid (port, serviceId).  Finally, the securityIndex
2360  * parameter must match the existing index for the connection.  If a
2361  * server connection is created, it will be created using the supplied
2362  * index, if the index is valid for this service */
2363 struct rx_connection *
2364 rxi_FindConnection(osi_socket socket, register afs_int32 host,
2365                    register u_short port, u_short serviceId, afs_uint32 cid,
2366                    afs_uint32 epoch, int type, u_int securityIndex)
2367 {
2368     int hashindex, flag, i;
2369     register struct rx_connection *conn;
2370     hashindex = CONN_HASH(host, port, cid, epoch, type);
2371     MUTEX_ENTER(&rx_connHashTable_lock);
2372     rxLastConn ? (conn = rxLastConn, flag = 0) : (conn =
2373                                                   rx_connHashTable[hashindex],
2374                                                   flag = 1);
2375     for (; conn;) {
2376         if ((conn->type == type) && ((cid & RX_CIDMASK) == conn->cid)
2377             && (epoch == conn->epoch)) {
2378             register struct rx_peer *pp = conn->peer;
2379             if (securityIndex != conn->securityIndex) {
2380                 /* this isn't supposed to happen, but someone could forge a packet
2381                  * like this, and there seems to be some CM bug that makes this
2382                  * happen from time to time -- in which case, the fileserver
2383                  * asserts. */
2384                 MUTEX_EXIT(&rx_connHashTable_lock);
2385                 return (struct rx_connection *)0;
2386             }
2387             if (pp->host == host && pp->port == port)
2388                 break;
2389             if (type == RX_CLIENT_CONNECTION && pp->port == port)
2390                 break;
2391             /* So what happens when it's a callback connection? */
2392             if (                /*type == RX_CLIENT_CONNECTION && */
2393                    (conn->epoch & 0x80000000))
2394                 break;
2395         }
2396         if (!flag) {
2397             /* the connection rxLastConn that was used the last time is not the
2398              ** one we are looking for now. Hence, start searching in the hash */
2399             flag = 1;
2400             conn = rx_connHashTable[hashindex];
2401         } else
2402             conn = conn->next;
2403     }
2404     if (!conn) {
2405         struct rx_service *service;
2406         if (type == RX_CLIENT_CONNECTION) {
2407             MUTEX_EXIT(&rx_connHashTable_lock);
2408             return (struct rx_connection *)0;
2409         }
2410         service = rxi_FindService(socket, serviceId);
2411         if (!service || (securityIndex >= service->nSecurityObjects)
2412             || (service->securityObjects[securityIndex] == 0)) {
2413             MUTEX_EXIT(&rx_connHashTable_lock);
2414             return (struct rx_connection *)0;
2415         }
2416         conn = rxi_AllocConnection();   /* This bzero's the connection */
2417         MUTEX_INIT(&conn->conn_call_lock, "conn call lock", MUTEX_DEFAULT, 0);
2418         MUTEX_INIT(&conn->conn_data_lock, "conn data lock", MUTEX_DEFAULT, 0);
2419         CV_INIT(&conn->conn_call_cv, "conn call cv", CV_DEFAULT, 0);
2420         conn->next = rx_connHashTable[hashindex];
2421         rx_connHashTable[hashindex] = conn;
2422         conn->peer = rxi_FindPeer(host, port, 0, 1);
2423         conn->type = RX_SERVER_CONNECTION;
2424         conn->lastSendTime = clock_Sec();       /* don't GC immediately */
2425         conn->epoch = epoch;
2426         conn->cid = cid & RX_CIDMASK;
2427         /* conn->serial = conn->lastSerial = 0; */
2428         /* conn->timeout = 0; */
2429         conn->ackRate = RX_FAST_ACK_RATE;
2430         conn->service = service;
2431         conn->serviceId = serviceId;
2432         conn->securityIndex = securityIndex;
2433         conn->securityObject = service->securityObjects[securityIndex];
2434         conn->nSpecific = 0;
2435         conn->specific = NULL;
2436         rx_SetConnDeadTime(conn, service->connDeadTime);
2437         rx_SetConnIdleDeadTime(conn, service->idleDeadTime);
2438         rx_SetServerConnIdleDeadErr(conn, service->idleDeadErr);
2439         for (i = 0; i < RX_MAXCALLS; i++) {
2440             conn->twind[i] = rx_initSendWindow;
2441             conn->rwind[i] = rx_initReceiveWindow;
2442         }
2443         /* Notify security object of the new connection */
2444         RXS_NewConnection(conn->securityObject, conn);
2445         /* XXXX Connection timeout? */
2446         if (service->newConnProc)
2447             (*service->newConnProc) (conn);
2448         rx_MutexIncrement(rx_stats.nServerConns, rx_stats_mutex);
2449     }
2450
2451     MUTEX_ENTER(&conn->conn_data_lock);
2452     conn->refCount++;
2453     MUTEX_EXIT(&conn->conn_data_lock);
2454
2455     rxLastConn = conn;          /* store this connection as the last conn used */
2456     MUTEX_EXIT(&rx_connHashTable_lock);
2457     return conn;
2458 }
2459
2460 /* There are two packet tracing routines available for testing and monitoring
2461  * Rx.  One is called just after every packet is received and the other is
2462  * called just before every packet is sent.  Received packets, have had their
2463  * headers decoded, and packets to be sent have not yet had their headers
2464  * encoded.  Both take two parameters: a pointer to the packet and a sockaddr
2465  * containing the network address.  Both can be modified.  The return value, if
2466  * non-zero, indicates that the packet should be dropped.  */
2467
2468 int (*rx_justReceived) () = 0;
2469 int (*rx_almostSent) () = 0;
2470
2471 /* A packet has been received off the interface.  Np is the packet, socket is
2472  * the socket number it was received from (useful in determining which service
2473  * this packet corresponds to), and (host, port) reflect the host,port of the
2474  * sender.  This call returns the packet to the caller if it is finished with
2475  * it, rather than de-allocating it, just as a small performance hack */
2476
2477 struct rx_packet *
2478 rxi_ReceivePacket(register struct rx_packet *np, osi_socket socket,
2479                   afs_uint32 host, u_short port, int *tnop,
2480                   struct rx_call **newcallp)
2481 {
2482     register struct rx_call *call;
2483     register struct rx_connection *conn;
2484     int channel;
2485     afs_uint32 currentCallNumber;
2486     int type;
2487     int skew;
2488 #ifdef RXDEBUG
2489     char *packetType;
2490 #endif
2491     struct rx_packet *tnp;
2492
2493 #ifdef RXDEBUG
2494 /* We don't print out the packet until now because (1) the time may not be
2495  * accurate enough until now in the lwp implementation (rx_Listener only gets
2496  * the time after the packet is read) and (2) from a protocol point of view,
2497  * this is the first time the packet has been seen */
2498     packetType = (np->header.type > 0 && np->header.type < RX_N_PACKET_TYPES)
2499         ? rx_packetTypes[np->header.type - 1] : "*UNKNOWN*";
2500     dpf(("R %d %s: %x.%d.%d.%d.%d.%d.%d flags %d, packet %x",
2501          np->header.serial, packetType, ntohl(host), ntohs(port), np->header.serviceId,
2502          np->header.epoch, np->header.cid, np->header.callNumber,
2503          np->header.seq, np->header.flags, np));
2504 #endif
2505
2506     if (np->header.type == RX_PACKET_TYPE_VERSION) {
2507         return rxi_ReceiveVersionPacket(np, socket, host, port, 1);
2508     }
2509
2510     if (np->header.type == RX_PACKET_TYPE_DEBUG) {
2511         return rxi_ReceiveDebugPacket(np, socket, host, port, 1);
2512     }
2513 #ifdef RXDEBUG
2514     /* If an input tracer function is defined, call it with the packet and
2515      * network address.  Note this function may modify its arguments. */
2516     if (rx_justReceived) {
2517         struct sockaddr_in addr;
2518         int drop;
2519         addr.sin_family = AF_INET;
2520         addr.sin_port = port;
2521         addr.sin_addr.s_addr = host;
2522 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2523         addr.sin_len = sizeof(addr);
2524 #endif /* AFS_OSF_ENV */
2525         drop = (*rx_justReceived) (np, &addr);
2526         /* drop packet if return value is non-zero */
2527         if (drop)
2528             return np;
2529         port = addr.sin_port;   /* in case fcn changed addr */
2530         host = addr.sin_addr.s_addr;
2531     }
2532 #endif
2533
2534     /* If packet was not sent by the client, then *we* must be the client */
2535     type = ((np->header.flags & RX_CLIENT_INITIATED) != RX_CLIENT_INITIATED)
2536         ? RX_CLIENT_CONNECTION : RX_SERVER_CONNECTION;
2537
2538     /* Find the connection (or fabricate one, if we're the server & if
2539      * necessary) associated with this packet */
2540     conn =
2541         rxi_FindConnection(socket, host, port, np->header.serviceId,
2542                            np->header.cid, np->header.epoch, type,
2543                            np->header.securityIndex);
2544
2545     if (!conn) {
2546         /* If no connection found or fabricated, just ignore the packet.
2547          * (An argument could be made for sending an abort packet for
2548          * the conn) */
2549         return np;
2550     }
2551
2552     MUTEX_ENTER(&conn->conn_data_lock);
2553     if (conn->maxSerial < np->header.serial)
2554         conn->maxSerial = np->header.serial;
2555     MUTEX_EXIT(&conn->conn_data_lock);
2556
2557     /* If the connection is in an error state, send an abort packet and ignore
2558      * the incoming packet */
2559     if (conn->error) {
2560         /* Don't respond to an abort packet--we don't want loops! */
2561         MUTEX_ENTER(&conn->conn_data_lock);
2562         if (np->header.type != RX_PACKET_TYPE_ABORT)
2563             np = rxi_SendConnectionAbort(conn, np, 1, 0);
2564         conn->refCount--;
2565         MUTEX_EXIT(&conn->conn_data_lock);
2566         return np;
2567     }
2568
2569     /* Check for connection-only requests (i.e. not call specific). */
2570     if (np->header.callNumber == 0) {
2571         switch (np->header.type) {
2572         case RX_PACKET_TYPE_ABORT: {
2573             /* What if the supplied error is zero? */
2574             afs_int32 errcode = ntohl(rx_GetInt32(np, 0));
2575             dpf(("rxi_ReceivePacket ABORT rx_GetInt32 = %d", errcode));
2576             rxi_ConnectionError(conn, errcode);
2577             MUTEX_ENTER(&conn->conn_data_lock);
2578             conn->refCount--;
2579             MUTEX_EXIT(&conn->conn_data_lock);
2580             return np;
2581         }
2582         case RX_PACKET_TYPE_CHALLENGE:
2583             tnp = rxi_ReceiveChallengePacket(conn, np, 1);
2584             MUTEX_ENTER(&conn->conn_data_lock);
2585             conn->refCount--;
2586             MUTEX_EXIT(&conn->conn_data_lock);
2587             return tnp;
2588         case RX_PACKET_TYPE_RESPONSE:
2589             tnp = rxi_ReceiveResponsePacket(conn, np, 1);
2590             MUTEX_ENTER(&conn->conn_data_lock);
2591             conn->refCount--;
2592             MUTEX_EXIT(&conn->conn_data_lock);
2593             return tnp;
2594         case RX_PACKET_TYPE_PARAMS:
2595         case RX_PACKET_TYPE_PARAMS + 1:
2596         case RX_PACKET_TYPE_PARAMS + 2:
2597             /* ignore these packet types for now */
2598             MUTEX_ENTER(&conn->conn_data_lock);
2599             conn->refCount--;
2600             MUTEX_EXIT(&conn->conn_data_lock);
2601             return np;
2602
2603
2604         default:
2605             /* Should not reach here, unless the peer is broken: send an
2606              * abort packet */
2607             rxi_ConnectionError(conn, RX_PROTOCOL_ERROR);
2608             MUTEX_ENTER(&conn->conn_data_lock);
2609             tnp = rxi_SendConnectionAbort(conn, np, 1, 0);
2610             conn->refCount--;
2611             MUTEX_EXIT(&conn->conn_data_lock);
2612             return tnp;
2613         }
2614     }
2615
2616     channel = np->header.cid & RX_CHANNELMASK;
2617     call = conn->call[channel];
2618 #ifdef  RX_ENABLE_LOCKS
2619     if (call)
2620         MUTEX_ENTER(&call->lock);
2621     /* Test to see if call struct is still attached to conn. */
2622     if (call != conn->call[channel]) {
2623         if (call)
2624             MUTEX_EXIT(&call->lock);
2625         if (type == RX_SERVER_CONNECTION) {
2626             call = conn->call[channel];
2627             /* If we started with no call attached and there is one now,
2628              * another thread is also running this routine and has gotten
2629              * the connection channel. We should drop this packet in the tests
2630              * below. If there was a call on this connection and it's now
2631              * gone, then we'll be making a new call below.
2632              * If there was previously a call and it's now different then
2633              * the old call was freed and another thread running this routine
2634              * has created a call on this channel. One of these two threads
2635              * has a packet for the old call and the code below handles those
2636              * cases.
2637              */
2638             if (call)
2639                 MUTEX_ENTER(&call->lock);
2640         } else {
2641             /* This packet can't be for this call. If the new call address is
2642              * 0 then no call is running on this channel. If there is a call
2643              * then, since this is a client connection we're getting data for
2644              * it must be for the previous call.
2645              */
2646             rx_MutexIncrement(rx_stats.spuriousPacketsRead, rx_stats_mutex);
2647             MUTEX_ENTER(&conn->conn_data_lock);
2648             conn->refCount--;
2649             MUTEX_EXIT(&conn->conn_data_lock);
2650             return np;
2651         }
2652     }
2653 #endif
2654     currentCallNumber = conn->callNumber[channel];
2655
2656     if (type == RX_SERVER_CONNECTION) { /* We're the server */
2657         if (np->header.callNumber < currentCallNumber) {
2658             rx_MutexIncrement(rx_stats.spuriousPacketsRead, rx_stats_mutex);
2659 #ifdef  RX_ENABLE_LOCKS
2660             if (call)
2661                 MUTEX_EXIT(&call->lock);
2662 #endif
2663             MUTEX_ENTER(&conn->conn_data_lock);
2664             conn->refCount--;
2665             MUTEX_EXIT(&conn->conn_data_lock);
2666             return np;
2667         }
2668         if (!call) {
2669             MUTEX_ENTER(&conn->conn_call_lock);
2670             call = rxi_NewCall(conn, channel);
2671             MUTEX_EXIT(&conn->conn_call_lock);
2672             *call->callNumber = np->header.callNumber;
2673             if (np->header.callNumber == 0)
2674                 dpf(("RecPacket call 0 %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", np->header.serial, rx_packetTypes[np->header.type - 1], ntohl(conn->peer->host), ntohs(conn->peer->port), np->header.serial, np->header.epoch, np->header.cid, np->header.callNumber, np->header.seq, np->header.flags, (unsigned long)np, np->retryTime.sec, np->retryTime.usec / 1000, np->length));
2675
2676             call->state = RX_STATE_PRECALL;
2677             clock_GetTime(&call->queueTime);
2678             hzero(call->bytesSent);
2679             hzero(call->bytesRcvd);
2680             /*
2681              * If the number of queued calls exceeds the overload
2682              * threshold then abort this call.
2683              */
2684             if ((rx_BusyThreshold > 0) && (rx_nWaiting > rx_BusyThreshold)) {
2685                 struct rx_packet *tp;
2686
2687                 rxi_CallError(call, rx_BusyError);
2688                 tp = rxi_SendCallAbort(call, np, 1, 0);
2689                 MUTEX_EXIT(&call->lock);
2690                 MUTEX_ENTER(&conn->conn_data_lock);
2691                 conn->refCount--;
2692                 MUTEX_EXIT(&conn->conn_data_lock);
2693                 rx_MutexIncrement(rx_stats.nBusies, rx_stats_mutex);
2694                 return tp;
2695             }
2696             rxi_KeepAliveOn(call);
2697         } else if (np->header.callNumber != currentCallNumber) {
2698             /* Wait until the transmit queue is idle before deciding
2699              * whether to reset the current call. Chances are that the
2700              * call will be in ether DALLY or HOLD state once the TQ_BUSY
2701              * flag is cleared.
2702              */
2703 #ifdef AFS_GLOBAL_RXLOCK_KERNEL
2704             while ((call->state == RX_STATE_ACTIVE)
2705                    && (call->flags & RX_CALL_TQ_BUSY)) {
2706                 call->flags |= RX_CALL_TQ_WAIT;
2707                 call->tqWaiters++;
2708 #ifdef RX_ENABLE_LOCKS
2709                 osirx_AssertMine(&call->lock, "rxi_Start lock3");
2710                 CV_WAIT(&call->cv_tq, &call->lock);
2711 #else /* RX_ENABLE_LOCKS */
2712                 osi_rxSleep(&call->tq);
2713 #endif /* RX_ENABLE_LOCKS */
2714                 call->tqWaiters--;
2715                 if (call->tqWaiters == 0)
2716                     call->flags &= ~RX_CALL_TQ_WAIT;
2717             }
2718 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2719             /* If the new call cannot be taken right now send a busy and set
2720              * the error condition in this call, so that it terminates as
2721              * quickly as possible */
2722             if (call->state == RX_STATE_ACTIVE) {
2723                 struct rx_packet *tp;
2724
2725                 rxi_CallError(call, RX_CALL_DEAD);
2726                 tp = rxi_SendSpecial(call, conn, np, RX_PACKET_TYPE_BUSY,
2727                                      NULL, 0, 1);
2728                 MUTEX_EXIT(&call->lock);
2729                 MUTEX_ENTER(&conn->conn_data_lock);
2730                 conn->refCount--;
2731                 MUTEX_EXIT(&conn->conn_data_lock);
2732                 return tp;
2733             }
2734             rxi_ResetCall(call, 0);
2735             *call->callNumber = np->header.callNumber;
2736             if (np->header.callNumber == 0)
2737                 dpf(("RecPacket call 0 %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", np->header.serial, rx_packetTypes[np->header.type - 1], ntohl(conn->peer->host), ntohs(conn->peer->port), np->header.serial, np->header.epoch, np->header.cid, np->header.callNumber, np->header.seq, np->header.flags, (unsigned long)np, np->retryTime.sec, np->retryTime.usec / 1000, np->length));
2738
2739             call->state = RX_STATE_PRECALL;
2740             clock_GetTime(&call->queueTime);
2741             hzero(call->bytesSent);
2742             hzero(call->bytesRcvd);
2743             /*
2744              * If the number of queued calls exceeds the overload
2745              * threshold then abort this call.
2746              */
2747             if ((rx_BusyThreshold > 0) && (rx_nWaiting > rx_BusyThreshold)) {
2748                 struct rx_packet *tp;
2749
2750                 rxi_CallError(call, rx_BusyError);
2751                 tp = rxi_SendCallAbort(call, np, 1, 0);
2752                 MUTEX_EXIT(&call->lock);
2753                 MUTEX_ENTER(&conn->conn_data_lock);
2754                 conn->refCount--;
2755                 MUTEX_EXIT(&conn->conn_data_lock);
2756                 rx_MutexIncrement(rx_stats.nBusies, rx_stats_mutex);
2757                 return tp;
2758             }
2759             rxi_KeepAliveOn(call);
2760         } else {
2761             /* Continuing call; do nothing here. */
2762         }
2763     } else {                    /* we're the client */
2764         /* Ignore all incoming acknowledgements for calls in DALLY state */
2765         if (call && (call->state == RX_STATE_DALLY)
2766             && (np->header.type == RX_PACKET_TYPE_ACK)) {
2767             rx_MutexIncrement(rx_stats.ignorePacketDally, rx_stats_mutex);
2768 #ifdef  RX_ENABLE_LOCKS
2769             if (call) {
2770                 MUTEX_EXIT(&call->lock);
2771             }
2772 #endif
2773             MUTEX_ENTER(&conn->conn_data_lock);
2774             conn->refCount--;
2775             MUTEX_EXIT(&conn->conn_data_lock);
2776             return np;
2777         }
2778
2779         /* Ignore anything that's not relevant to the current call.  If there
2780          * isn't a current call, then no packet is relevant. */
2781         if (!call || (np->header.callNumber != currentCallNumber)) {
2782             rx_MutexIncrement(rx_stats.spuriousPacketsRead, rx_stats_mutex);
2783 #ifdef  RX_ENABLE_LOCKS
2784             if (call) {
2785                 MUTEX_EXIT(&call->lock);
2786             }
2787 #endif
2788             MUTEX_ENTER(&conn->conn_data_lock);
2789             conn->refCount--;
2790             MUTEX_EXIT(&conn->conn_data_lock);
2791             return np;
2792         }
2793         /* If the service security object index stamped in the packet does not
2794          * match the connection's security index, ignore the packet */
2795         if (np->header.securityIndex != conn->securityIndex) {
2796 #ifdef  RX_ENABLE_LOCKS
2797             MUTEX_EXIT(&call->lock);
2798 #endif
2799             MUTEX_ENTER(&conn->conn_data_lock);
2800             conn->refCount--;
2801             MUTEX_EXIT(&conn->conn_data_lock);
2802             return np;
2803         }
2804
2805         /* If we're receiving the response, then all transmit packets are
2806          * implicitly acknowledged.  Get rid of them. */
2807         if (np->header.type == RX_PACKET_TYPE_DATA) {
2808 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
2809             /* XXX Hack. Because we must release the global rx lock when
2810              * sending packets (osi_NetSend) we drop all acks while we're
2811              * traversing the tq in rxi_Start sending packets out because
2812              * packets may move to the freePacketQueue as result of being here!
2813              * So we drop these packets until we're safely out of the
2814              * traversing. Really ugly!
2815              * For fine grain RX locking, we set the acked field in the
2816              * packets and let rxi_Start remove them from the transmit queue.
2817              */
2818             if (call->flags & RX_CALL_TQ_BUSY) {
2819 #ifdef  RX_ENABLE_LOCKS
2820                 rxi_SetAcksInTransmitQueue(call);
2821 #else
2822                 conn->refCount--;
2823                 return np;      /* xmitting; drop packet */
2824 #endif
2825             } else {
2826                 rxi_ClearTransmitQueue(call, 0);
2827             }
2828 #else /* AFS_GLOBAL_RXLOCK_KERNEL */
2829             rxi_ClearTransmitQueue(call, 0);
2830 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2831         } else {
2832             if (np->header.type == RX_PACKET_TYPE_ACK) {
2833                 /* now check to see if this is an ack packet acknowledging that the
2834                  * server actually *lost* some hard-acked data.  If this happens we
2835                  * ignore this packet, as it may indicate that the server restarted in
2836                  * the middle of a call.  It is also possible that this is an old ack
2837                  * packet.  We don't abort the connection in this case, because this
2838                  * *might* just be an old ack packet.  The right way to detect a server
2839                  * restart in the midst of a call is to notice that the server epoch
2840                  * changed, btw.  */
2841                 /* XXX I'm not sure this is exactly right, since tfirst **IS**
2842                  * XXX unacknowledged.  I think that this is off-by-one, but
2843                  * XXX I don't dare change it just yet, since it will
2844                  * XXX interact badly with the server-restart detection
2845                  * XXX code in receiveackpacket.  */
2846                 if (ntohl(rx_GetInt32(np, FIRSTACKOFFSET)) < call->tfirst) {
2847                     rx_MutexIncrement(rx_stats.spuriousPacketsRead, rx_stats_mutex);
2848                     MUTEX_EXIT(&call->lock);
2849                     MUTEX_ENTER(&conn->conn_data_lock);
2850                     conn->refCount--;
2851                     MUTEX_EXIT(&conn->conn_data_lock);
2852                     return np;
2853                 }
2854             }
2855         }                       /* else not a data packet */
2856     }
2857
2858     osirx_AssertMine(&call->lock, "rxi_ReceivePacket middle");
2859     /* Set remote user defined status from packet */
2860     call->remoteStatus = np->header.userStatus;
2861
2862     /* Note the gap between the expected next packet and the actual
2863      * packet that arrived, when the new packet has a smaller serial number
2864      * than expected.  Rioses frequently reorder packets all by themselves,
2865      * so this will be quite important with very large window sizes.
2866      * Skew is checked against 0 here to avoid any dependence on the type of
2867      * inPacketSkew (which may be unsigned).  In C, -1 > (unsigned) 0 is always
2868      * true!
2869      * The inPacketSkew should be a smoothed running value, not just a maximum.  MTUXXX
2870      * see CalculateRoundTripTime for an example of how to keep smoothed values.
2871      * I think using a beta of 1/8 is probably appropriate.  93.04.21
2872      */
2873     MUTEX_ENTER(&conn->conn_data_lock);
2874     skew = conn->lastSerial - np->header.serial;
2875     conn->lastSerial = np->header.serial;
2876     MUTEX_EXIT(&conn->conn_data_lock);
2877     if (skew > 0) {
2878         register struct rx_peer *peer;
2879         peer = conn->peer;
2880         if (skew > peer->inPacketSkew) {
2881             dpf(("*** In skew changed from %d to %d\n", peer->inPacketSkew,
2882                  skew));
2883             peer->inPacketSkew = skew;
2884         }
2885     }
2886
2887     /* Now do packet type-specific processing */
2888     switch (np->header.type) {
2889     case RX_PACKET_TYPE_DATA:
2890         np = rxi_ReceiveDataPacket(call, np, 1, socket, host, port, tnop,
2891                                    newcallp);
2892         break;
2893     case RX_PACKET_TYPE_ACK:
2894         /* Respond immediately to ack packets requesting acknowledgement
2895          * (ping packets) */
2896         if (np->header.flags & RX_REQUEST_ACK) {
2897             if (call->error)
2898                 (void)rxi_SendCallAbort(call, 0, 1, 0);
2899             else
2900                 (void)rxi_SendAck(call, 0, np->header.serial,
2901                                   RX_ACK_PING_RESPONSE, 1);
2902         }
2903         np = rxi_ReceiveAckPacket(call, np, 1);
2904         break;
2905     case RX_PACKET_TYPE_ABORT: {
2906         /* An abort packet: reset the call, passing the error up to the user. */
2907         /* What if error is zero? */
2908         /* What if the error is -1? the application will treat it as a timeout. */
2909         afs_int32 errdata = ntohl(*(afs_int32 *) rx_DataOf(np));
2910         dpf(("rxi_ReceivePacket ABORT rx_DataOf = %d", errdata));
2911         rxi_CallError(call, errdata);
2912         MUTEX_EXIT(&call->lock);
2913         MUTEX_ENTER(&conn->conn_data_lock);
2914         conn->refCount--;
2915         MUTEX_EXIT(&conn->conn_data_lock);
2916         return np;              /* xmitting; drop packet */
2917     }
2918     case RX_PACKET_TYPE_BUSY:
2919         /* XXXX */
2920         break;
2921     case RX_PACKET_TYPE_ACKALL:
2922         /* All packets acknowledged, so we can drop all packets previously
2923          * readied for sending */
2924 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
2925         /* XXX Hack. We because we can't release the global rx lock when
2926          * sending packets (osi_NetSend) we drop all ack pkts while we're
2927          * traversing the tq in rxi_Start sending packets out because
2928          * packets may move to the freePacketQueue as result of being
2929          * here! So we drop these packets until we're safely out of the
2930          * traversing. Really ugly!
2931          * For fine grain RX locking, we set the acked field in the packets
2932          * and let rxi_Start remove the packets from the transmit queue.
2933          */
2934         if (call->flags & RX_CALL_TQ_BUSY) {
2935 #ifdef  RX_ENABLE_LOCKS
2936             rxi_SetAcksInTransmitQueue(call);
2937             break;
2938 #else /* RX_ENABLE_LOCKS */
2939             MUTEX_EXIT(&call->lock);
2940             MUTEX_ENTER(&conn->conn_data_lock);
2941             conn->refCount--;
2942             MUTEX_EXIT(&conn->conn_data_lock);
2943             return np;          /* xmitting; drop packet */
2944 #endif /* RX_ENABLE_LOCKS */
2945         }
2946 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2947         rxi_ClearTransmitQueue(call, 0);
2948         rxevent_Cancel(call->keepAliveEvent, call, RX_CALL_REFCOUNT_ALIVE);
2949         break;
2950     default:
2951         /* Should not reach here, unless the peer is broken: send an abort
2952          * packet */
2953         rxi_CallError(call, RX_PROTOCOL_ERROR);
2954         np = rxi_SendCallAbort(call, np, 1, 0);
2955         break;
2956     };
2957     /* Note when this last legitimate packet was received, for keep-alive
2958      * processing.  Note, we delay getting the time until now in the hope that
2959      * the packet will be delivered to the user before any get time is required
2960      * (if not, then the time won't actually be re-evaluated here). */
2961     call->lastReceiveTime = clock_Sec();
2962     MUTEX_EXIT(&call->lock);
2963     MUTEX_ENTER(&conn->conn_data_lock);
2964     conn->refCount--;
2965     MUTEX_EXIT(&conn->conn_data_lock);
2966     return np;
2967 }
2968
2969 /* return true if this is an "interesting" connection from the point of view
2970     of someone trying to debug the system */
2971 int
2972 rxi_IsConnInteresting(struct rx_connection *aconn)
2973 {
2974     register int i;
2975     register struct rx_call *tcall;
2976
2977     if (aconn->flags & (RX_CONN_MAKECALL_WAITING | RX_CONN_DESTROY_ME))
2978         return 1;
2979     for (i = 0; i < RX_MAXCALLS; i++) {
2980         tcall = aconn->call[i];
2981         if (tcall) {
2982             if ((tcall->state == RX_STATE_PRECALL)
2983                 || (tcall->state == RX_STATE_ACTIVE))
2984                 return 1;
2985             if ((tcall->mode == RX_MODE_SENDING)
2986                 || (tcall->mode == RX_MODE_RECEIVING))
2987                 return 1;
2988         }
2989     }
2990     return 0;
2991 }
2992
2993 #ifdef KERNEL
2994 /* if this is one of the last few packets AND it wouldn't be used by the
2995    receiving call to immediately satisfy a read request, then drop it on
2996    the floor, since accepting it might prevent a lock-holding thread from
2997    making progress in its reading. If a call has been cleared while in
2998    the precall state then ignore all subsequent packets until the call
2999    is assigned to a thread. */
3000
3001 static int
3002 TooLow(struct rx_packet *ap, struct rx_call *acall)
3003 {
3004     int rc = 0;
3005     MUTEX_ENTER(&rx_stats_mutex);
3006     if (((ap->header.seq != 1) && (acall->flags & RX_CALL_CLEARED)
3007          && (acall->state == RX_STATE_PRECALL))
3008         || ((rx_nFreePackets < rxi_dataQuota + 2)
3009             && !((ap->header.seq < acall->rnext + rx_initSendWindow)
3010                  && (acall->flags & RX_CALL_READER_WAIT)))) {
3011         rc = 1;
3012     }
3013     MUTEX_EXIT(&rx_stats_mutex);
3014     return rc;
3015 }
3016 #endif /* KERNEL */
3017
3018 static void
3019 rxi_CheckReachEvent(struct rxevent *event, struct rx_connection *conn,
3020                     struct rx_call *acall)
3021 {
3022     struct rx_call *call = acall;
3023     struct clock when, now;
3024     int i, waiting;
3025
3026     MUTEX_ENTER(&conn->conn_data_lock);
3027     conn->checkReachEvent = NULL;
3028     waiting = conn->flags & RX_CONN_ATTACHWAIT;
3029     if (event)
3030         conn->refCount--;
3031     MUTEX_EXIT(&conn->conn_data_lock);
3032
3033     if (waiting) {
3034         if (!call) {
3035             MUTEX_ENTER(&conn->conn_call_lock);
3036             MUTEX_ENTER(&conn->conn_data_lock);
3037             for (i = 0; i < RX_MAXCALLS; i++) {
3038                 struct rx_call *tc = conn->call[i];
3039                 if (tc && tc->state == RX_STATE_PRECALL) {
3040                     call = tc;
3041                     break;
3042                 }
3043             }
3044             if (!call)
3045                 /* Indicate that rxi_CheckReachEvent is no longer running by
3046                  * clearing the flag.  Must be atomic under conn_data_lock to
3047                  * avoid a new call slipping by: rxi_CheckConnReach holds
3048                  * conn_data_lock while checking RX_CONN_ATTACHWAIT.
3049                  */
3050                 conn->flags &= ~RX_CONN_ATTACHWAIT;
3051             MUTEX_EXIT(&conn->conn_data_lock);
3052             MUTEX_EXIT(&conn->conn_call_lock);
3053         }
3054
3055         if (call) {
3056             if (call != acall)
3057                 MUTEX_ENTER(&call->lock);
3058             rxi_SendAck(call, NULL, 0, RX_ACK_PING, 0);
3059             if (call != acall)
3060                 MUTEX_EXIT(&call->lock);
3061
3062             clock_GetTime(&now);
3063             when = now;
3064             when.sec += RX_CHECKREACH_TIMEOUT;
3065             MUTEX_ENTER(&conn->conn_data_lock);
3066             if (!conn->checkReachEvent) {
3067                 conn->refCount++;
3068                 conn->checkReachEvent =
3069                     rxevent_PostNow(&when, &now, rxi_CheckReachEvent, conn,
3070                                     NULL);
3071             }
3072             MUTEX_EXIT(&conn->conn_data_lock);
3073         }
3074     }
3075 }
3076
3077 static int
3078 rxi_CheckConnReach(struct rx_connection *conn, struct rx_call *call)
3079 {
3080     struct rx_service *service = conn->service;
3081     struct rx_peer *peer = conn->peer;
3082     afs_uint32 now, lastReach;
3083
3084     if (service->checkReach == 0)
3085         return 0;
3086
3087     now = clock_Sec();
3088     MUTEX_ENTER(&peer->peer_lock);
3089     lastReach = peer->lastReachTime;
3090     MUTEX_EXIT(&peer->peer_lock);
3091     if (now - lastReach < RX_CHECKREACH_TTL)
3092         return 0;
3093
3094     MUTEX_ENTER(&conn->conn_data_lock);
3095     if (conn->flags & RX_CONN_ATTACHWAIT) {
3096         MUTEX_EXIT(&conn->conn_data_lock);
3097         return 1;
3098     }
3099     conn->flags |= RX_CONN_ATTACHWAIT;
3100     MUTEX_EXIT(&conn->conn_data_lock);
3101     if (!conn->checkReachEvent)
3102         rxi_CheckReachEvent(NULL, conn, call);
3103
3104     return 1;
3105 }
3106
3107 /* try to attach call, if authentication is complete */
3108 static void
3109 TryAttach(register struct rx_call *acall, register osi_socket socket,
3110           register int *tnop, register struct rx_call **newcallp,
3111           int reachOverride)
3112 {
3113     struct rx_connection *conn = acall->conn;
3114
3115     if (conn->type == RX_SERVER_CONNECTION
3116         && acall->state == RX_STATE_PRECALL) {
3117         /* Don't attach until we have any req'd. authentication. */
3118         if (RXS_CheckAuthentication(conn->securityObject, conn) == 0) {
3119             if (reachOverride || rxi_CheckConnReach(conn, acall) == 0)
3120                 rxi_AttachServerProc(acall, socket, tnop, newcallp);
3121             /* Note:  this does not necessarily succeed; there
3122              * may not any proc available
3123              */
3124         } else {
3125             rxi_ChallengeOn(acall->conn);
3126         }
3127     }
3128 }
3129
3130 /* A data packet has been received off the interface.  This packet is
3131  * appropriate to the call (the call is in the right state, etc.).  This
3132  * routine can return a packet to the caller, for re-use */
3133
3134 struct rx_packet *
3135 rxi_ReceiveDataPacket(register struct rx_call *call,
3136                       register struct rx_packet *np, int istack,
3137                       osi_socket socket, afs_uint32 host, u_short port,
3138                       int *tnop, struct rx_call **newcallp)
3139 {
3140     int ackNeeded = 0;          /* 0 means no, otherwise ack_reason */
3141     int newPackets = 0;
3142     int didHardAck = 0;
3143     int haveLast = 0;
3144     afs_uint32 seq, serial, flags;
3145     int isFirst;
3146     struct rx_packet *tnp;
3147     struct clock when, now;
3148     rx_MutexIncrement(rx_stats.dataPacketsRead, rx_stats_mutex);
3149
3150 #ifdef KERNEL
3151     /* If there are no packet buffers, drop this new packet, unless we can find
3152      * packet buffers from inactive calls */
3153     if (!call->error
3154         && (rxi_OverQuota(RX_PACKET_CLASS_RECEIVE) || TooLow(np, call))) {
3155         MUTEX_ENTER(&rx_freePktQ_lock);
3156         rxi_NeedMorePackets = TRUE;
3157         MUTEX_EXIT(&rx_freePktQ_lock);
3158         rx_MutexIncrement(rx_stats.noPacketBuffersOnRead, rx_stats_mutex);
3159         call->rprev = np->header.serial;
3160         rxi_calltrace(RX_TRACE_DROP, call);
3161         dpf(("packet %x dropped on receipt - quota problems", np));
3162         if (rxi_doreclaim)
3163             rxi_ClearReceiveQueue(call);
3164         clock_GetTime(&now);
3165         when = now;
3166         clock_Add(&when, &rx_softAckDelay);
3167         if (!call->delayedAckEvent
3168             || clock_Gt(&call->delayedAckEvent->eventTime, &when)) {
3169             rxevent_Cancel(call->delayedAckEvent, call,
3170                            RX_CALL_REFCOUNT_DELAY);
3171             CALL_HOLD(call, RX_CALL_REFCOUNT_DELAY);
3172             call->delayedAckEvent =
3173                 rxevent_PostNow(&when, &now, rxi_SendDelayedAck, call, 0);
3174         }
3175         /* we've damaged this call already, might as well do it in. */
3176         return np;
3177     }
3178 #endif /* KERNEL */
3179
3180     /*
3181      * New in AFS 3.5, if the RX_JUMBO_PACKET flag is set then this
3182      * packet is one of several packets transmitted as a single
3183      * datagram. Do not send any soft or hard acks until all packets
3184      * in a jumbogram have been processed. Send negative acks right away.
3185      */
3186     for (isFirst = 1, tnp = NULL; isFirst || tnp; isFirst = 0) {
3187         /* tnp is non-null when there are more packets in the
3188          * current jumbo gram */
3189         if (tnp) {
3190             if (np)
3191                 rxi_FreePacket(np);
3192             np = tnp;
3193         }
3194
3195         seq = np->header.seq;
3196         serial = np->header.serial;
3197         flags = np->header.flags;
3198
3199         /* If the call is in an error state, send an abort message */
3200         if (call->error)
3201             return rxi_SendCallAbort(call, np, istack, 0);
3202
3203         /* The RX_JUMBO_PACKET is set in all but the last packet in each
3204          * AFS 3.5 jumbogram. */
3205         if (flags & RX_JUMBO_PACKET) {
3206             tnp = rxi_SplitJumboPacket(np, host, port, isFirst);
3207         } else {
3208             tnp = NULL;
3209         }
3210
3211         if (np->header.spare != 0) {
3212             MUTEX_ENTER(&call->conn->conn_data_lock);
3213             call->conn->flags |= RX_CONN_USING_PACKET_CKSUM;
3214             MUTEX_EXIT(&call->conn->conn_data_lock);
3215         }
3216
3217         /* The usual case is that this is the expected next packet */
3218         if (seq == call->rnext) {
3219
3220             /* Check to make sure it is not a duplicate of one already queued */
3221             if (queue_IsNotEmpty(&call->rq)
3222                 && queue_First(&call->rq, rx_packet)->header.seq == seq) {
3223                 rx_MutexIncrement(rx_stats.dupPacketsRead, rx_stats_mutex);
3224                 dpf(("packet %x dropped on receipt - duplicate", np));
3225                 rxevent_Cancel(call->delayedAckEvent, call,
3226                                RX_CALL_REFCOUNT_DELAY);
3227                 np = rxi_SendAck(call, np, serial, RX_ACK_DUPLICATE, istack);
3228                 ackNeeded = 0;
3229                 call->rprev = seq;
3230                 continue;
3231             }
3232
3233             /* It's the next packet. Stick it on the receive queue
3234              * for this call. Set newPackets to make sure we wake
3235              * the reader once all packets have been processed */
3236             queue_Prepend(&call->rq, np);
3237             call->nSoftAcks++;
3238             np = NULL;          /* We can't use this anymore */
3239             newPackets = 1;
3240
3241             /* If an ack is requested then set a flag to make sure we
3242              * send an acknowledgement for this packet */
3243             if (flags & RX_REQUEST_ACK) {
3244                 ackNeeded = RX_ACK_REQUESTED;
3245             }
3246
3247             /* Keep track of whether we have received the last packet */
3248             if (flags & RX_LAST_PACKET) {
3249                 call->flags |= RX_CALL_HAVE_LAST;
3250                 haveLast = 1;
3251             }
3252
3253             /* Check whether we have all of the packets for this call */
3254             if (call->flags & RX_CALL_HAVE_LAST) {
3255                 afs_uint32 tseq;        /* temporary sequence number */
3256                 struct rx_packet *tp;   /* Temporary packet pointer */
3257                 struct rx_packet *nxp;  /* Next pointer, for queue_Scan */
3258
3259                 for (tseq = seq, queue_Scan(&call->rq, tp, nxp, rx_packet)) {
3260                     if (tseq != tp->header.seq)
3261                         break;
3262                     if (tp->header.flags & RX_LAST_PACKET) {
3263                         call->flags |= RX_CALL_RECEIVE_DONE;
3264                         break;
3265                     }
3266                     tseq++;
3267                 }
3268             }
3269
3270             /* Provide asynchronous notification for those who want it
3271              * (e.g. multi rx) */
3272             if (call->arrivalProc) {
3273                 (*call->arrivalProc) (call, call->arrivalProcHandle,
3274                                       call->arrivalProcArg);
3275                 call->arrivalProc = (void (*)())0;
3276             }
3277
3278             /* Update last packet received */
3279             call->rprev = seq;
3280
3281             /* If there is no server process serving this call, grab
3282              * one, if available. We only need to do this once. If a
3283              * server thread is available, this thread becomes a server
3284              * thread and the server thread becomes a listener thread. */
3285             if (isFirst) {
3286                 TryAttach(call, socket, tnop, newcallp, 0);
3287             }
3288         }
3289         /* This is not the expected next packet. */
3290         else {
3291             /* Determine whether this is a new or old packet, and if it's
3292              * a new one, whether it fits into the current receive window.
3293              * Also figure out whether the packet was delivered in sequence.
3294              * We use the prev variable to determine whether the new packet
3295              * is the successor of its immediate predecessor in the
3296              * receive queue, and the missing flag to determine whether
3297              * any of this packets predecessors are missing.  */
3298
3299             afs_uint32 prev;    /* "Previous packet" sequence number */
3300             struct rx_packet *tp;       /* Temporary packet pointer */
3301             struct rx_packet *nxp;      /* Next pointer, for queue_Scan */
3302             int missing;        /* Are any predecessors missing? */
3303
3304             /* If the new packet's sequence number has been sent to the
3305              * application already, then this is a duplicate */
3306             if (seq < call->rnext) {
3307                 rx_MutexIncrement(rx_stats.dupPacketsRead, rx_stats_mutex);
3308                 rxevent_Cancel(call->delayedAckEvent, call,
3309                                RX_CALL_REFCOUNT_DELAY);
3310                 np = rxi_SendAck(call, np, serial, RX_ACK_DUPLICATE, istack);
3311                 ackNeeded = 0;
3312                 call->rprev = seq;
3313                 continue;
3314             }
3315
3316             /* If the sequence number is greater than what can be
3317              * accomodated by the current window, then send a negative
3318              * acknowledge and drop the packet */
3319             if ((call->rnext + call->rwind) <= seq) {
3320                 rxevent_Cancel(call->delayedAckEvent, call,
3321                                RX_CALL_REFCOUNT_DELAY);
3322                 np = rxi_SendAck(call, np, serial, RX_ACK_EXCEEDS_WINDOW,
3323                                  istack);
3324                 ackNeeded = 0;
3325                 call->rprev = seq;
3326                 continue;
3327             }
3328
3329             /* Look for the packet in the queue of old received packets */
3330             for (prev = call->rnext - 1, missing =
3331                  0, queue_Scan(&call->rq, tp, nxp, rx_packet)) {
3332                 /*Check for duplicate packet */
3333                 if (seq == tp->header.seq) {
3334                     rx_MutexIncrement(rx_stats.dupPacketsRead, rx_stats_mutex);
3335                     rxevent_Cancel(call->delayedAckEvent, call,
3336                                    RX_CALL_REFCOUNT_DELAY);
3337                     np = rxi_SendAck(call, np, serial, RX_ACK_DUPLICATE,
3338                                      istack);
3339                     ackNeeded = 0;
3340                     call->rprev = seq;
3341                     goto nextloop;
3342                 }
3343                 /* If we find a higher sequence packet, break out and
3344                  * insert the new packet here. */
3345                 if (seq < tp->header.seq)
3346                     break;
3347                 /* Check for missing packet */
3348                 if (tp->header.seq != prev + 1) {
3349                     missing = 1;
3350                 }
3351
3352                 prev = tp->header.seq;
3353             }
3354
3355             /* Keep track of whether we have received the last packet. */
3356             if (flags & RX_LAST_PACKET) {
3357                 call->flags |= RX_CALL_HAVE_LAST;
3358             }
3359
3360             /* It's within the window: add it to the the receive queue.
3361              * tp is left by the previous loop either pointing at the
3362              * packet before which to insert the new packet, or at the
3363              * queue head if the queue is empty or the packet should be
3364              * appended. */
3365             queue_InsertBefore(tp, np);
3366             call->nSoftAcks++;
3367             np = NULL;
3368
3369             /* Check whether we have all of the packets for this call */
3370             if ((call->flags & RX_CALL_HAVE_LAST)
3371                 && !(call->flags & RX_CALL_RECEIVE_DONE)) {
3372                 afs_uint32 tseq;        /* temporary sequence number */
3373
3374                 for (tseq =
3375                      call->rnext, queue_Scan(&call->rq, tp, nxp, rx_packet)) {
3376                     if (tseq != tp->header.seq)
3377                         break;
3378                     if (tp->header.flags & RX_LAST_PACKET) {
3379                         call->flags |= RX_CALL_RECEIVE_DONE;
3380                         break;
3381                     }
3382                     tseq++;
3383                 }
3384             }
3385
3386             /* We need to send an ack of the packet is out of sequence,
3387              * or if an ack was requested by the peer. */
3388             if (seq != prev + 1 || missing) {
3389                 ackNeeded = RX_ACK_OUT_OF_SEQUENCE;
3390             } else if (flags & RX_REQUEST_ACK) {
3391                 ackNeeded = RX_ACK_REQUESTED;
3392             }
3393
3394             /* Acknowledge the last packet for each call */
3395             if (flags & RX_LAST_PACKET) {
3396                 haveLast = 1;
3397             }
3398
3399             call->rprev = seq;
3400         }
3401       nextloop:;
3402     }
3403
3404     if (newPackets) {
3405         /*
3406          * If the receiver is waiting for an iovec, fill the iovec
3407          * using the data from the receive queue */
3408         if (call->flags & RX_CALL_IOVEC_WAIT) {
3409             didHardAck = rxi_FillReadVec(call, serial);
3410             /* the call may have been aborted */
3411             if (call->error) {
3412                 return NULL;
3413             }
3414             if (didHardAck) {
3415                 ackNeeded = 0;
3416             }
3417         }
3418
3419         /* Wakeup the reader if any */
3420         if ((call->flags & RX_CALL_READER_WAIT)
3421             && (!(call->flags & RX_CALL_IOVEC_WAIT) || !(call->iovNBytes)
3422                 || (call->iovNext >= call->iovMax)
3423                 || (call->flags & RX_CALL_RECEIVE_DONE))) {
3424             call->flags &= ~RX_CALL_READER_WAIT;
3425 #ifdef  RX_ENABLE_LOCKS
3426             CV_BROADCAST(&call->cv_rq);
3427 #else
3428             osi_rxWakeup(&call->rq);
3429 #endif
3430         }
3431     }
3432
3433     /*
3434      * Send an ack when requested by the peer, or once every
3435      * rxi_SoftAckRate packets until the last packet has been
3436      * received. Always send a soft ack for the last packet in
3437      * the server's reply. */
3438     if (ackNeeded) {
3439         rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
3440         np = rxi_SendAck(call, np, serial, ackNeeded, istack);
3441     } else if (call->nSoftAcks > (u_short) rxi_SoftAckRate) {
3442         rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
3443         np = rxi_SendAck(call, np, serial, RX_ACK_IDLE, istack);
3444     } else if (call->nSoftAcks) {
3445         clock_GetTime(&now);
3446         when = now;
3447         if (haveLast && !(flags & RX_CLIENT_INITIATED)) {
3448             clock_Add(&when, &rx_lastAckDelay);
3449         } else {
3450             clock_Add(&when, &rx_softAckDelay);
3451         }
3452         if (!call->delayedAckEvent
3453             || clock_Gt(&call->delayedAckEvent->eventTime, &when)) {
3454             rxevent_Cancel(call->delayedAckEvent, call,
3455                            RX_CALL_REFCOUNT_DELAY);
3456             CALL_HOLD(call, RX_CALL_REFCOUNT_DELAY);
3457             call->delayedAckEvent =
3458                 rxevent_PostNow(&when, &now, rxi_SendDelayedAck, call, 0);
3459         }
3460     } else if (call->flags & RX_CALL_RECEIVE_DONE) {
3461         rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
3462     }
3463
3464     return np;
3465 }
3466
3467 #ifdef  ADAPT_WINDOW
3468 static void rxi_ComputeRate();
3469 #endif
3470
3471 static void
3472 rxi_UpdatePeerReach(struct rx_connection *conn, struct rx_call *acall)
3473 {
3474     struct rx_peer *peer = conn->peer;
3475
3476     MUTEX_ENTER(&peer->peer_lock);
3477     peer->lastReachTime = clock_Sec();
3478     MUTEX_EXIT(&peer->peer_lock);
3479
3480     MUTEX_ENTER(&conn->conn_data_lock);
3481     if (conn->flags & RX_CONN_ATTACHWAIT) {
3482         int i;
3483
3484         conn->flags &= ~RX_CONN_ATTACHWAIT;
3485         MUTEX_EXIT(&conn->conn_data_lock);
3486
3487         for (i = 0; i < RX_MAXCALLS; i++) {
3488             struct rx_call *call = conn->call[i];
3489             if (call) {
3490                 if (call != acall)
3491                     MUTEX_ENTER(&call->lock);
3492                 /* tnop can be null if newcallp is null */
3493                 TryAttach(call, (osi_socket) - 1, NULL, NULL, 1);
3494                 if (call != acall)
3495                     MUTEX_EXIT(&call->lock);
3496             }
3497         }
3498     } else
3499         MUTEX_EXIT(&conn->conn_data_lock);
3500 }
3501
3502 static const char *
3503 rx_ack_reason(int reason)
3504 {
3505     switch (reason) {
3506     case RX_ACK_REQUESTED:
3507         return "requested";
3508     case RX_ACK_DUPLICATE:
3509         return "duplicate";
3510     case RX_ACK_OUT_OF_SEQUENCE:
3511         return "sequence";
3512     case RX_ACK_EXCEEDS_WINDOW:
3513         return "window";
3514     case RX_ACK_NOSPACE:
3515         return "nospace";
3516     case RX_ACK_PING:
3517         return "ping";
3518     case RX_ACK_PING_RESPONSE:
3519         return "response";
3520     case RX_ACK_DELAY:
3521         return "delay";
3522     case RX_ACK_IDLE:
3523         return "idle";
3524     default:
3525         return "unknown!!";
3526     }
3527 }
3528
3529
3530 /* rxi_ComputePeerNetStats
3531  *
3532  * Called exclusively by rxi_ReceiveAckPacket to compute network link
3533  * estimates (like RTT and throughput) based on ack packets.  Caller
3534  * must ensure that the packet in question is the right one (i.e.
3535  * serial number matches).
3536  */
3537 static void
3538 rxi_ComputePeerNetStats(struct rx_call *call, struct rx_packet *p,
3539                         struct rx_ackPacket *ap, struct rx_packet *np)
3540 {
3541     struct rx_peer *peer = call->conn->peer;
3542
3543     /* Use RTT if not delayed by client. */
3544     if (ap->reason != RX_ACK_DELAY)
3545         rxi_ComputeRoundTripTime(p, &p->timeSent, peer);
3546 #ifdef ADAPT_WINDOW
3547     rxi_ComputeRate(peer, call, p, np, ap->reason);
3548 #endif
3549 }
3550
3551 /* The real smarts of the whole thing.  */
3552 struct rx_packet *
3553 rxi_ReceiveAckPacket(register struct rx_call *call, struct rx_packet *np,
3554                      int istack)
3555 {
3556     struct rx_ackPacket *ap;
3557     int nAcks;
3558     register struct rx_packet *tp;
3559     register struct rx_packet *nxp;     /* Next packet pointer for queue_Scan */
3560     register struct rx_connection *conn = call->conn;
3561     struct rx_peer *peer = conn->peer;
3562     afs_uint32 first;
3563     afs_uint32 serial;
3564     /* because there are CM's that are bogus, sending weird values for this. */
3565     afs_uint32 skew = 0;
3566     int nbytes;
3567     int missing;
3568     int acked;
3569     int nNacked = 0;
3570     int newAckCount = 0;
3571     u_short maxMTU = 0;         /* Set if peer supports AFS 3.4a jumbo datagrams */
3572     int maxDgramPackets = 0;    /* Set if peer supports AFS 3.5 jumbo datagrams */
3573
3574     rx_MutexIncrement(rx_stats.ackPacketsRead, rx_stats_mutex);
3575     ap = (struct rx_ackPacket *)rx_DataOf(np);
3576     nbytes = rx_Contiguous(np) - (int)((ap->acks) - (u_char *) ap);
3577     if (nbytes < 0)
3578         return np;              /* truncated ack packet */
3579
3580     /* depends on ack packet struct */
3581     nAcks = MIN((unsigned)nbytes, (unsigned)ap->nAcks);
3582     first = ntohl(ap->firstPacket);
3583     serial = ntohl(ap->serial);
3584     /* temporarily disabled -- needs to degrade over time
3585      * skew = ntohs(ap->maxSkew); */
3586
3587     /* Ignore ack packets received out of order */
3588     if (first < call->tfirst) {
3589         return np;
3590     }
3591
3592     if (np->header.flags & RX_SLOW_START_OK) {
3593         call->flags |= RX_CALL_SLOW_START_OK;
3594     }
3595
3596     if (ap->reason == RX_ACK_PING_RESPONSE)
3597         rxi_UpdatePeerReach(conn, call);
3598
3599 #ifdef RXDEBUG
3600 #ifdef AFS_NT40_ENV
3601     if (rxdebug_active) {
3602         char msg[512];
3603         size_t len;
3604
3605         len = _snprintf(msg, sizeof(msg),
3606                         "tid[%d] RACK: reason %s serial %u previous %u seq %u skew %d first %u acks %u space %u ",
3607                          GetCurrentThreadId(), rx_ack_reason(ap->reason),
3608                          ntohl(ap->serial), ntohl(ap->previousPacket),
3609                          (unsigned int)np->header.seq, (unsigned int)skew,
3610                          ntohl(ap->firstPacket), ap->nAcks, ntohs(ap->bufferSpace) );
3611         if (nAcks) {
3612             int offset;
3613
3614             for (offset = 0; offset < nAcks && len < sizeof(msg); offset++)
3615                 msg[len++] = (ap->acks[offset] == RX_ACK_TYPE_NACK ? '-' : '*');
3616         }
3617         msg[len++]='\n';
3618         msg[len] = '\0';
3619         OutputDebugString(msg);
3620     }
3621 #else /* AFS_NT40_ENV */
3622     if (rx_Log) {
3623         fprintf(rx_Log,
3624                 "RACK: reason %x previous %u seq %u serial %u skew %d first %u",
3625                 ap->reason, ntohl(ap->previousPacket),
3626                 (unsigned int)np->header.seq, (unsigned int)serial,
3627                 (unsigned int)skew, ntohl(ap->firstPacket));
3628         if (nAcks) {
3629             int offset;
3630             for (offset = 0; offset < nAcks; offset++)
3631                 putc(ap->acks[offset] == RX_ACK_TYPE_NACK ? '-' : '*',
3632                      rx_Log);
3633         }
3634         putc('\n', rx_Log);
3635     }
3636 #endif /* AFS_NT40_ENV */
3637 #endif
3638
3639     /* Update the outgoing packet skew value to the latest value of
3640      * the peer's incoming packet skew value.  The ack packet, of
3641      * course, could arrive out of order, but that won't affect things
3642      * much */
3643     MUTEX_ENTER(&peer->peer_lock);
3644     peer->outPacketSkew = skew;
3645
3646     /* Check for packets that no longer need to be transmitted, and
3647      * discard them.  This only applies to packets positively
3648      * acknowledged as having been sent to the peer's upper level.
3649      * All other packets must be retained.  So only packets with
3650      * sequence numbers < ap->firstPacket are candidates. */
3651     for (queue_Scan(&call->tq, tp, nxp, rx_packet)) {
3652         if (tp->header.seq >= first)
3653             break;
3654         call->tfirst = tp->header.seq + 1;
3655         if (serial
3656             && (tp->header.serial == serial || tp->firstSerial == serial))
3657             rxi_ComputePeerNetStats(call, tp, ap, np);
3658         if (!(tp->flags & RX_PKTFLAG_ACKED)) {
3659             newAckCount++;
3660         }
3661 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
3662         /* XXX Hack. Because we have to release the global rx lock when sending
3663          * packets (osi_NetSend) we drop all acks while we're traversing the tq
3664          * in rxi_Start sending packets out because packets may move to the
3665          * freePacketQueue as result of being here! So we drop these packets until
3666          * we're safely out of the traversing. Really ugly!
3667          * To make it even uglier, if we're using fine grain locking, we can
3668          * set the ack bits in the packets and have rxi_Start remove the packets
3669          * when it's done transmitting.
3670          */
3671         if (call->flags & RX_CALL_TQ_BUSY) {
3672 #ifdef RX_ENABLE_LOCKS
3673             tp->flags |= RX_PKTFLAG_ACKED;
3674             call->flags |= RX_CALL_TQ_SOME_ACKED;
3675 #else /* RX_ENABLE_LOCKS */
3676             break;
3677 #endif /* RX_ENABLE_LOCKS */
3678         } else
3679 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
3680         {
3681             queue_Remove(tp);
3682             rxi_FreePacket(tp); /* rxi_FreePacket mustn't wake up anyone, preemptively. */
3683         }
3684     }
3685
3686 #ifdef ADAPT_WINDOW
3687     /* Give rate detector a chance to respond to ping requests */
3688     if (ap->reason == RX_ACK_PING_RESPONSE) {
3689         rxi_ComputeRate(peer, call, 0, np, ap->reason);
3690     }
3691 #endif
3692
3693     /* N.B. we don't turn off any timers here.  They'll go away by themselves, anyway */
3694
3695     /* Now go through explicit acks/nacks and record the results in
3696      * the waiting packets.  These are packets that can't be released
3697      * yet, even with a positive acknowledge.  This positive
3698      * acknowledge only means the packet has been received by the
3699      * peer, not that it will be retained long enough to be sent to
3700      * the peer's upper level.  In addition, reset the transmit timers
3701      * of any missing packets (those packets that must be missing
3702      * because this packet was out of sequence) */
3703
3704     call->nSoftAcked = 0;
3705     for (missing = 0, queue_Scan(&call->tq, tp, nxp, rx_packet)) {
3706         /* Update round trip time if the ack was stimulated on receipt
3707          * of this packet */
3708 #ifdef AFS_GLOBAL_RXLOCK_KERNEL
3709 #ifdef RX_ENABLE_LOCKS
3710         if (tp->header.seq >= first)
3711 #endif /* RX_ENABLE_LOCKS */
3712 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
3713             if (serial
3714                 && (tp->header.serial == serial || tp->firstSerial == serial))
3715                 rxi_ComputePeerNetStats(call, tp, ap, np);
3716
3717         /* Set the acknowledge flag per packet based on the
3718          * information in the ack packet. An acknowlegded packet can
3719          * be downgraded when the server has discarded a packet it
3720          * soacked previously, or when an ack packet is received
3721          * out of sequence. */
3722         if (tp->header.seq < first) {
3723             /* Implicit ack information */
3724             if (!(tp->flags & RX_PKTFLAG_ACKED)) {
3725                 newAckCount++;
3726             }
3727             tp->flags |= RX_PKTFLAG_ACKED;
3728         } else if (tp->header.seq < first + nAcks) {
3729             /* Explicit ack information:  set it in the packet appropriately */
3730             if (ap->acks[tp->header.seq - first] == RX_ACK_TYPE_ACK) {
3731                 if (!(tp->flags & RX_PKTFLAG_ACKED)) {
3732                     newAckCount++;
3733                     tp->flags |= RX_PKTFLAG_ACKED;
3734                 }
3735                 if (missing) {
3736                     nNacked++;
3737                 } else {
3738                     call->nSoftAcked++;
3739                 }
3740             } else /* RX_ACK_TYPE_NACK */ {
3741                 tp->flags &= ~RX_PKTFLAG_ACKED;
3742                 missing = 1;
3743             }
3744         } else {
3745             tp->flags &= ~RX_PKTFLAG_ACKED;
3746             missing = 1;
3747         }
3748
3749         /* If packet isn't yet acked, and it has been transmitted at least
3750          * once, reset retransmit time using latest timeout
3751          * ie, this should readjust the retransmit timer for all outstanding
3752          * packets...  So we don't just retransmit when we should know better*/
3753
3754         if (!(tp->flags & RX_PKTFLAG_ACKED) && !clock_IsZero(&tp->retryTime)) {
3755             tp->retryTime = tp->timeSent;
3756             clock_Add(&tp->retryTime, &peer->timeout);
3757             /* shift by eight because one quarter-sec ~ 256 milliseconds */
3758             clock_Addmsec(&(tp->retryTime), ((afs_uint32) tp->backoff) << 8);
3759         }
3760     }
3761
3762     /* If the window has been extended by this acknowledge packet,
3763      * then wakeup a sender waiting in alloc for window space, or try
3764      * sending packets now, if he's been sitting on packets due to
3765      * lack of window space */
3766     if (call->tnext < (call->tfirst + call->twind)) {
3767 #ifdef  RX_ENABLE_LOCKS
3768         CV_SIGNAL(&call->cv_twind);
3769 #else
3770         if (call->flags & RX_CALL_WAIT_WINDOW_ALLOC) {
3771             call->flags &= ~RX_CALL_WAIT_WINDOW_ALLOC;
3772             osi_rxWakeup(&call->twind);
3773         }
3774 #endif
3775         if (call->flags & RX_CALL_WAIT_WINDOW_SEND) {
3776             call->flags &= ~RX_CALL_WAIT_WINDOW_SEND;
3777         }
3778     }
3779
3780     /* if the ack packet has a receivelen field hanging off it,
3781      * update our state */
3782     if (np->length >= rx_AckDataSize(ap->nAcks) + 2 * sizeof(afs_int32)) {
3783         afs_uint32 tSize;
3784
3785         /* If the ack packet has a "recommended" size that is less than
3786          * what I am using now, reduce my size to match */
3787         rx_packetread(np, rx_AckDataSize(ap->nAcks) + sizeof(afs_int32),
3788                       (int)sizeof(afs_int32), &tSize);
3789         tSize = (afs_uint32) ntohl(tSize);
3790         peer->natMTU = rxi_AdjustIfMTU(MIN(tSize, peer->ifMTU));
3791
3792         /* Get the maximum packet size to send to this peer */
3793         rx_packetread(np, rx_AckDataSize(ap->nAcks), (int)sizeof(afs_int32),
3794                       &tSize);
3795         tSize = (afs_uint32) ntohl(tSize);
3796         tSize = (afs_uint32) MIN(tSize, rx_MyMaxSendSize);
3797         tSize = rxi_AdjustMaxMTU(peer->natMTU, tSize);
3798
3799         /* sanity check - peer might have restarted with different params.
3800          * If peer says "send less", dammit, send less...  Peer should never
3801          * be unable to accept packets of the size that prior AFS versions would
3802          * send without asking.  */
3803         if (peer->maxMTU != tSize) {
3804             if (peer->maxMTU > tSize) /* possible cong., maxMTU decreased */
3805                 peer->congestSeq++;
3806             peer->maxMTU = tSize;
3807             peer->MTU = MIN(tSize, peer->MTU);
3808             call->MTU = MIN(call->MTU, tSize);
3809         }
3810
3811         if (np->length == rx_AckDataSize(ap->nAcks) + 3 * sizeof(afs_int32)) {
3812             /* AFS 3.4a */
3813             rx_packetread(np,
3814                           rx_AckDataSize(ap->nAcks) + 2 * sizeof(afs_int32),
3815                           (int)sizeof(afs_int32), &tSize);
3816             tSize = (afs_uint32) ntohl(tSize);  /* peer's receive window, if it's */
3817             if (tSize < call->twind) {  /* smaller than our send */
3818                 call->twind = tSize;    /* window, we must send less... */
3819                 call->ssthresh = MIN(call->twind, call->ssthresh);
3820                 call->conn->twind[call->channel] = call->twind;
3821             }
3822
3823             /* Only send jumbograms to 3.4a fileservers. 3.3a RX gets the
3824              * network MTU confused with the loopback MTU. Calculate the
3825              * maximum MTU here for use in the slow start code below.
3826              */
3827             maxMTU = peer->maxMTU;
3828             /* Did peer restart with older RX version? */
3829             if (peer->maxDgramPackets > 1) {
3830                 peer->maxDgramPackets = 1;
3831             }
3832         } else if (np->length >=
3833                    rx_AckDataSize(ap->nAcks) + 4 * sizeof(afs_int32)) {
3834             /* AFS 3.5 */
3835             rx_packetread(np,
3836                           rx_AckDataSize(ap->nAcks) + 2 * sizeof(afs_int32),
3837                           sizeof(afs_int32), &tSize);
3838             tSize = (afs_uint32) ntohl(tSize);
3839             /*
3840              * As of AFS 3.5 we set the send window to match the receive window.
3841              */
3842             if (tSize < call->twind) {
3843                 call->twind = tSize;
3844                 call->conn->twind[call->channel] = call->twind;
3845                 call->ssthresh = MIN(call->twind, call->ssthresh);
3846             } else if (tSize > call->twind) {
3847                 call->twind = tSize;
3848                 call->conn->twind[call->channel] = call->twind;
3849             }
3850
3851             /*
3852              * As of AFS 3.5, a jumbogram is more than one fixed size
3853              * packet transmitted in a single UDP datagram. If the remote
3854              * MTU is smaller than our local MTU then never send a datagram
3855              * larger than the natural MTU.
3856              */
3857             rx_packetread(np,
3858                           rx_AckDataSize(ap->nAcks) + 3 * sizeof(afs_int32),
3859                           sizeof(afs_int32), &tSize);
3860             maxDgramPackets = (afs_uint32) ntohl(tSize);
3861             maxDgramPackets = MIN(maxDgramPackets, rxi_nDgramPackets);
3862             maxDgramPackets =
3863                 MIN(maxDgramPackets, (int)(peer->ifDgramPackets));
3864             maxDgramPackets = MIN(maxDgramPackets, tSize);
3865             if (maxDgramPackets > 1) {
3866                 peer->maxDgramPackets = maxDgramPackets;
3867                 call->MTU = RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE;
3868             } else {
3869                 peer->maxDgramPackets = 1;
3870                 call->MTU = peer->natMTU;
3871             }
3872         } else if (peer->maxDgramPackets > 1) {
3873             /* Restarted with lower version of RX */
3874             peer->maxDgramPackets = 1;
3875         }
3876     } else if (peer->maxDgramPackets > 1
3877                || peer->maxMTU != OLD_MAX_PACKET_SIZE) {
3878         /* Restarted with lower version of RX */
3879         peer->maxMTU = OLD_MAX_PACKET_SIZE;
3880         peer->natMTU = OLD_MAX_PACKET_SIZE;
3881         peer->MTU = OLD_MAX_PACKET_SIZE;
3882         peer->maxDgramPackets = 1;
3883         peer->nDgramPackets = 1;
3884         peer->congestSeq++;
3885         call->MTU = OLD_MAX_PACKET_SIZE;
3886     }
3887
3888     if (nNacked) {
3889         /*
3890          * Calculate how many datagrams were successfully received after
3891          * the first missing packet and adjust the negative ack counter
3892          * accordingly.
3893          */
3894         call->nAcks = 0;
3895         call->nNacks++;
3896         nNacked = (nNacked + call->nDgramPackets - 1) / call->nDgramPackets;
3897         if (call->nNacks < nNacked) {
3898             call->nNacks = nNacked;
3899         }
3900     } else {
3901         call->nAcks += newAckCount;
3902         call->nNacks = 0;
3903     }
3904
3905     if (call->flags & RX_CALL_FAST_RECOVER) {
3906         if (nNacked) {
3907             call->cwind = MIN((int)(call->cwind + 1), rx_maxSendWindow);
3908         } else {
3909             call->flags &= ~RX_CALL_FAST_RECOVER;
3910             call->cwind = call->nextCwind;
3911             call->nextCwind = 0;
3912             call->nAcks = 0;
3913         }
3914         call->nCwindAcks = 0;
3915     } else if (nNacked && call->nNacks >= (u_short) rx_nackThreshold) {
3916         /* Three negative acks in a row trigger congestion recovery */
3917 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
3918         MUTEX_EXIT(&peer->peer_lock);
3919         if (call->flags & RX_CALL_FAST_RECOVER_WAIT) {
3920             /* someone else is waiting to start recovery */
3921             return np;
3922         }
3923         call->flags |= RX_CALL_FAST_RECOVER_WAIT;
3924         rxi_WaitforTQBusy(call);
3925         MUTEX_ENTER(&peer->peer_lock);
3926 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
3927         call->flags &= ~RX_CALL_FAST_RECOVER_WAIT;
3928         call->flags |= RX_CALL_FAST_RECOVER;
3929         call->ssthresh = MAX(4, MIN((int)call->cwind, (int)call->twind)) >> 1;
3930         call->cwind =
3931             MIN((int)(call->ssthresh + rx_nackThreshold), rx_maxSendWindow);
3932         call->nDgramPackets = MAX(2, (int)call->nDgramPackets) >> 1;
3933         call->nextCwind = call->ssthresh;
3934         call->nAcks = 0;
3935         call->nNacks = 0;
3936         peer->MTU = call->MTU;
3937         peer->cwind = call->nextCwind;
3938         peer->nDgramPackets = call->nDgramPackets;
3939         peer->congestSeq++;
3940         call->congestSeq = peer->congestSeq;
3941         /* Reset the resend times on the packets that were nacked
3942          * so we will retransmit as soon as the window permits*/
3943         for (acked = 0, queue_ScanBackwards(&call->tq, tp, nxp, rx_packet)) {
3944             if (acked) {
3945                 if (!(tp->flags & RX_PKTFLAG_ACKED)) {
3946                     clock_Zero(&tp->retryTime);
3947                 }
3948             } else if (tp->flags & RX_PKTFLAG_ACKED) {
3949                 acked = 1;
3950             }
3951         }
3952     } else {
3953         /* If cwind is smaller than ssthresh, then increase
3954          * the window one packet for each ack we receive (exponential
3955          * growth).
3956          * If cwind is greater than or equal to ssthresh then increase
3957          * the congestion window by one packet for each cwind acks we
3958          * receive (linear growth).  */
3959         if (call->cwind < call->ssthresh) {
3960             call->cwind =
3961                 MIN((int)call->ssthresh, (int)(call->cwind + newAckCount));
3962             call->nCwindAcks = 0;
3963         } else {
3964             call->nCwindAcks += newAckCount;
3965             if (call->nCwindAcks >= call->cwind) {
3966                 call->nCwindAcks = 0;
3967                 call->cwind = MIN((int)(call->cwind + 1), rx_maxSendWindow);
3968             }
3969         }
3970         /*
3971          * If we have received several acknowledgements in a row then
3972          * it is time to increase the size of our datagrams
3973          */
3974         if ((int)call->nAcks > rx_nDgramThreshold) {
3975             if (peer->maxDgramPackets > 1) {
3976                 if (call->nDgramPackets < peer->maxDgramPackets) {
3977                     call->nDgramPackets++;
3978                 }
3979                 call->MTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE;
3980             } else if (call->MTU < peer->maxMTU) {
3981                 call->MTU += peer->natMTU;
3982                 call->MTU = MIN(call->MTU, peer->maxMTU);
3983             }
3984             call->nAcks = 0;
3985         }
3986     }
3987
3988     MUTEX_EXIT(&peer->peer_lock);       /* rxi_Start will lock peer. */
3989
3990     /* Servers need to hold the call until all response packets have
3991      * been acknowledged. Soft acks are good enough since clients
3992      * are not allowed to clear their receive queues. */
3993     if (call->state == RX_STATE_HOLD
3994         && call->tfirst + call->nSoftAcked >= call->tnext) {
3995         call->state = RX_STATE_DALLY;
3996         rxi_ClearTransmitQueue(call, 0);
3997         rxevent_Cancel(call->keepAliveEvent, call, RX_CALL_REFCOUNT_ALIVE);
3998     } else if (!queue_IsEmpty(&call->tq)) {
3999         rxi_Start(0, call, 0, istack);
4000     }
4001     return np;
4002 }
4003
4004 /* Received a response to a challenge packet */
4005 struct rx_packet *
4006 rxi_ReceiveResponsePacket(register struct rx_connection *conn,
4007                           register struct rx_packet *np, int istack)
4008 {
4009     int error;
4010
4011     /* Ignore the packet if we're the client */
4012     if (conn->type == RX_CLIENT_CONNECTION)
4013         return np;
4014
4015     /* If already authenticated, ignore the packet (it's probably a retry) */
4016     if (RXS_CheckAuthentication(conn->securityObject, conn) == 0)
4017         return np;
4018
4019     /* Otherwise, have the security object evaluate the response packet */
4020     error = RXS_CheckResponse(conn->securityObject, conn, np);
4021     if (error) {
4022         /* If the response is invalid, reset the connection, sending
4023          * an abort to the peer */
4024 #ifndef KERNEL
4025         rxi_Delay(1);
4026 #endif
4027         rxi_ConnectionError(conn, error);
4028         MUTEX_ENTER(&conn->conn_data_lock);
4029         np = rxi_SendConnectionAbort(conn, np, istack, 0);
4030         MUTEX_EXIT(&conn->conn_data_lock);
4031         return np;
4032     } else {
4033         /* If the response is valid, any calls waiting to attach
4034          * servers can now do so */
4035         int i;
4036
4037         for (i = 0; i < RX_MAXCALLS; i++) {
4038             struct rx_call *call = conn->call[i];
4039             if (call) {
4040                 MUTEX_ENTER(&call->lock);
4041                 if (call->state == RX_STATE_PRECALL)
4042                     rxi_AttachServerProc(call, (osi_socket) - 1, NULL, NULL);
4043                 /* tnop can be null if newcallp is null */
4044                 MUTEX_EXIT(&call->lock);
4045             }
4046         }
4047
4048         /* Update the peer reachability information, just in case
4049          * some calls went into attach-wait while we were waiting
4050          * for authentication..
4051          */
4052         rxi_UpdatePeerReach(conn, NULL);
4053     }
4054     return np;
4055 }
4056
4057 /* A client has received an authentication challenge: the security
4058  * object is asked to cough up a respectable response packet to send
4059  * back to the server.  The server is responsible for retrying the
4060  * challenge if it fails to get a response. */
4061
4062 struct rx_packet *
4063 rxi_ReceiveChallengePacket(register struct rx_connection *conn,
4064                            register struct rx_packet *np, int istack)
4065 {
4066     int error;
4067
4068     /* Ignore the challenge if we're the server */
4069     if (conn->type == RX_SERVER_CONNECTION)
4070         return np;
4071
4072     /* Ignore the challenge if the connection is otherwise idle; someone's
4073      * trying to use us as an oracle. */
4074     if (!rxi_HasActiveCalls(conn))
4075         return np;
4076
4077     /* Send the security object the challenge packet.  It is expected to fill
4078      * in the response. */
4079     error = RXS_GetResponse(conn->securityObject, conn, np);
4080
4081     /* If the security object is unable to return a valid response, reset the
4082      * connection and send an abort to the peer.  Otherwise send the response
4083      * packet to the peer connection. */
4084     if (error) {
4085         rxi_ConnectionError(conn, error);
4086         MUTEX_ENTER(&conn->conn_data_lock);
4087         np = rxi_SendConnectionAbort(conn, np, istack, 0);
4088         MUTEX_EXIT(&conn->conn_data_lock);
4089     } else {
4090         np = rxi_SendSpecial((struct rx_call *)0, conn, np,
4091                              RX_PACKET_TYPE_RESPONSE, NULL, -1, istack);
4092     }
4093     return np;
4094 }
4095
4096
4097 /* Find an available server process to service the current request in
4098  * the given call structure.  If one isn't available, queue up this
4099  * call so it eventually gets one */
4100 void
4101 rxi_AttachServerProc(register struct rx_call *call,
4102                      register osi_socket socket, register int *tnop,
4103                      register struct rx_call **newcallp)
4104 {
4105     register struct rx_serverQueueEntry *sq;
4106     register struct rx_service *service = call->conn->service;
4107     register int haveQuota = 0;
4108
4109     /* May already be attached */
4110     if (call->state == RX_STATE_ACTIVE)
4111         return;
4112
4113     MUTEX_ENTER(&rx_serverPool_lock);
4114
4115     haveQuota = QuotaOK(service);
4116     if ((!haveQuota) || queue_IsEmpty(&rx_idleServerQueue)) {
4117         /* If there are no processes available to service this call,
4118          * put the call on the incoming call queue (unless it's
4119          * already on the queue).
4120          */
4121 #ifdef RX_ENABLE_LOCKS
4122         if (haveQuota)
4123             ReturnToServerPool(service);
4124 #endif /* RX_ENABLE_LOCKS */
4125
4126         if (!(call->flags & RX_CALL_WAIT_PROC)) {
4127             call->flags |= RX_CALL_WAIT_PROC;
4128             MUTEX_ENTER(&rx_stats_mutex);
4129             rx_nWaiting++;
4130             rx_nWaited++;
4131             MUTEX_EXIT(&rx_stats_mutex);
4132             rxi_calltrace(RX_CALL_ARRIVAL, call);
4133             SET_CALL_QUEUE_LOCK(call, &rx_serverPool_lock);
4134             queue_Append(&rx_incomingCallQueue, call);
4135         }
4136     } else {
4137         sq = queue_First(&rx_idleServerQueue, rx_serverQueueEntry);
4138
4139         /* If hot threads are enabled, and both newcallp and sq->socketp
4140          * are non-null, then this thread will process the call, and the
4141          * idle server thread will start listening on this threads socket.
4142          */
4143         queue_Remove(sq);
4144         if (rx_enable_hot_thread && newcallp && sq->socketp) {
4145             *newcallp = call;
4146             *tnop = sq->tno;
4147             *sq->socketp = socket;
4148             clock_GetTime(&call->startTime);
4149             CALL_HOLD(call, RX_CALL_REFCOUNT_BEGIN);
4150         } else {
4151             sq->newcall = call;
4152         }
4153         if (call->flags & RX_CALL_WAIT_PROC) {
4154             /* Conservative:  I don't think this should happen */
4155             call->flags &= ~RX_CALL_WAIT_PROC;
4156             if (queue_IsOnQueue(call)) {
4157                 queue_Remove(call);
4158                 MUTEX_ENTER(&rx_stats_mutex);
4159                 rx_nWaiting--;
4160                 MUTEX_EXIT(&rx_stats_mutex);
4161             }
4162         }
4163         call->state = RX_STATE_ACTIVE;
4164         call->mode = RX_MODE_RECEIVING;
4165 #ifdef RX_KERNEL_TRACE
4166         {
4167             int glockOwner = ISAFS_GLOCK();
4168             if (!glockOwner)
4169                 AFS_GLOCK();
4170             afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
4171                        __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER,
4172                        call);
4173             if (!glockOwner)
4174                 AFS_GUNLOCK();
4175         }
4176 #endif
4177         if (call->flags & RX_CALL_CLEARED) {
4178             /* send an ack now to start the packet flow up again */
4179             call->flags &= ~RX_CALL_CLEARED;
4180             rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
4181         }
4182 #ifdef  RX_ENABLE_LOCKS
4183         CV_SIGNAL(&sq->cv);
4184 #else
4185         service->nRequestsRunning++;
4186         if (service->nRequestsRunning <= service->minProcs)
4187             rxi_minDeficit--;
4188         rxi_availProcs--;
4189         osi_rxWakeup(sq);
4190 #endif
4191     }
4192     MUTEX_EXIT(&rx_serverPool_lock);
4193 }
4194
4195 /* Delay the sending of an acknowledge event for a short while, while
4196  * a new call is being prepared (in the case of a client) or a reply
4197  * is being prepared (in the case of a server).  Rather than sending
4198  * an ack packet, an ACKALL packet is sent. */
4199 void
4200 rxi_AckAll(struct rxevent *event, register struct rx_call *call, char *dummy)
4201 {
4202 #ifdef RX_ENABLE_LOCKS
4203     if (event) {
4204         MUTEX_ENTER(&call->lock);
4205         call->delayedAckEvent = NULL;
4206         CALL_RELE(call, RX_CALL_REFCOUNT_ACKALL);
4207     }
4208     rxi_SendSpecial(call, call->conn, (struct rx_packet *)0,
4209                     RX_PACKET_TYPE_ACKALL, NULL, 0, 0);
4210     if (event)
4211         MUTEX_EXIT(&call->lock);
4212 #else /* RX_ENABLE_LOCKS */
4213     if (event)
4214         call->delayedAckEvent = NULL;
4215     rxi_SendSpecial(call, call->conn, (struct rx_packet *)0,
4216                     RX_PACKET_TYPE_ACKALL, NULL, 0, 0);
4217 #endif /* RX_ENABLE_LOCKS */
4218 }
4219
4220 void
4221 rxi_SendDelayedAck(struct rxevent *event, register struct rx_call *call,
4222                    char *dummy)
4223 {
4224 #ifdef RX_ENABLE_LOCKS
4225     if (event) {
4226         MUTEX_ENTER(&call->lock);
4227         if (event == call->delayedAckEvent)
4228             call->delayedAckEvent = NULL;
4229         CALL_RELE(call, RX_CALL_REFCOUNT_DELAY);
4230     }
4231     (void)rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
4232     if (event)
4233         MUTEX_EXIT(&call->lock);
4234 #else /* RX_ENABLE_LOCKS */
4235     if (event)
4236         call->delayedAckEvent = NULL;
4237     (void)rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
4238 #endif /* RX_ENABLE_LOCKS */
4239 }
4240
4241
4242 #ifdef RX_ENABLE_LOCKS
4243 /* Set ack in all packets in transmit queue. rxi_Start will deal with
4244  * clearing them out.
4245  */
4246 static void
4247 rxi_SetAcksInTransmitQueue(register struct rx_call *call)
4248 {
4249     register struct rx_packet *p, *tp;
4250     int someAcked = 0;
4251
4252     for (queue_Scan(&call->tq, p, tp, rx_packet)) {
4253         p->flags |= RX_PKTFLAG_ACKED;
4254         someAcked = 1;
4255     }
4256     if (someAcked) {
4257         call->flags |= RX_CALL_TQ_CLEARME;
4258         call->flags |= RX_CALL_TQ_SOME_ACKED;
4259     }
4260
4261     rxevent_Cancel(call->resendEvent, call, RX_CALL_REFCOUNT_RESEND);
4262     call->tfirst = call->tnext;
4263     call->nSoftAcked = 0;
4264
4265     if (call->flags & RX_CALL_FAST_RECOVER) {
4266         call->flags &= ~RX_CALL_FAST_RECOVER;
4267         call->cwind = call->nextCwind;
4268         call->nextCwind = 0;
4269     }
4270
4271     CV_SIGNAL(&call->cv_twind);
4272 }
4273 #endif /* RX_ENABLE_LOCKS */
4274
4275 /* Clear out the transmit queue for the current call (all packets have
4276  * been received by peer) */
4277 void
4278 rxi_ClearTransmitQueue(register struct rx_call *call, register int force)
4279 {
4280 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
4281     register struct rx_packet *p, *tp;
4282
4283     if (!force && (call->flags & RX_CALL_TQ_BUSY)) {
4284         int someAcked = 0;
4285         for (queue_Scan(&call->tq, p, tp, rx_packet)) {
4286             p->flags |= RX_PKTFLAG_ACKED;
4287             someAcked = 1;
4288         }
4289         if (someAcked) {
4290             call->flags |= RX_CALL_TQ_CLEARME;
4291             call->flags |= RX_CALL_TQ_SOME_ACKED;
4292         }
4293     } else {
4294 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
4295         rxi_FreePackets(0, &call->tq);
4296 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
4297         call->flags &= ~RX_CALL_TQ_CLEARME;
4298     }
4299 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
4300
4301     rxevent_Cancel(call->resendEvent, call, RX_CALL_REFCOUNT_RESEND);
4302     call->tfirst = call->tnext; /* implicitly acknowledge all data already sent */
4303     call->nSoftAcked = 0;
4304
4305     if (call->flags & RX_CALL_FAST_RECOVER) {
4306         call->flags &= ~RX_CALL_FAST_RECOVER;
4307         call->cwind = call->nextCwind;
4308     }
4309 #ifdef  RX_ENABLE_LOCKS
4310     CV_SIGNAL(&call->cv_twind);
4311 #else
4312     osi_rxWakeup(&call->twind);
4313 #endif
4314 }
4315
4316 void
4317 rxi_ClearReceiveQueue(register struct rx_call *call)
4318 {
4319     if (queue_IsNotEmpty(&call->rq)) {
4320         rx_packetReclaims += rxi_FreePackets(0, &call->rq);
4321         call->flags &= ~(RX_CALL_RECEIVE_DONE | RX_CALL_HAVE_LAST);
4322     }
4323     if (call->state == RX_STATE_PRECALL) {
4324         call->flags |= RX_CALL_CLEARED;
4325     }
4326 }
4327
4328 /* Send an abort packet for the specified call */
4329 struct rx_packet *
4330 rxi_SendCallAbort(register struct rx_call *call, struct rx_packet *packet,
4331                   int istack, int force)
4332 {
4333     afs_int32 error;
4334     struct clock when, now;
4335
4336     if (!call->error)
4337         return packet;
4338
4339     /* Clients should never delay abort messages */
4340     if (rx_IsClientConn(call->conn))
4341         force = 1;
4342
4343     if (call->abortCode != call->error) {
4344         call->abortCode = call->error;
4345         call->abortCount = 0;
4346     }
4347
4348     if (force || rxi_callAbortThreshhold == 0
4349         || call->abortCount < rxi_callAbortThreshhold) {
4350         if (call->delayedAbortEvent) {
4351             rxevent_Cancel(call->delayedAbortEvent, call,
4352                            RX_CALL_REFCOUNT_ABORT);
4353         }
4354         error = htonl(call->error);
4355         call->abortCount++;
4356         packet =
4357             rxi_SendSpecial(call, call->conn, packet, RX_PACKET_TYPE_ABORT,
4358                             (char *)&error, sizeof(error), istack);
4359     } else if (!call->delayedAbortEvent) {
4360         clock_GetTime(&now);
4361         when = now;
4362         clock_Addmsec(&when, rxi_callAbortDelay);
4363         CALL_HOLD(call, RX_CALL_REFCOUNT_ABORT);
4364         call->delayedAbortEvent =
4365             rxevent_PostNow(&when, &now, rxi_SendDelayedCallAbort, call, 0);
4366     }
4367     return packet;
4368 }
4369
4370 /* Send an abort packet for the specified connection.  Packet is an
4371  * optional pointer to a packet that can be used to send the abort.
4372  * Once the number of abort messages reaches the threshhold, an
4373  * event is scheduled to send the abort. Setting the force flag
4374  * overrides sending delayed abort messages.
4375  *
4376  * NOTE: Called with conn_data_lock held. conn_data_lock is dropped
4377  *       to send the abort packet.
4378  */
4379 struct rx_packet *
4380 rxi_SendConnectionAbort(register struct rx_connection *conn,
4381                         struct rx_packet *packet, int istack, int force)
4382 {
4383     afs_int32 error;
4384     struct clock when, now;
4385
4386     if (!conn->error)
4387         return packet;
4388
4389     /* Clients should never delay abort messages */
4390     if (rx_IsClientConn(conn))
4391         force = 1;
4392
4393     if (force || rxi_connAbortThreshhold == 0
4394         || conn->abortCount < rxi_connAbortThreshhold) {
4395         if (conn->delayedAbortEvent) {
4396             rxevent_Cancel(conn->delayedAbortEvent, (struct rx_call *)0, 0);
4397         }
4398         error = htonl(conn->error);
4399         conn->abortCount++;
4400         MUTEX_EXIT(&conn->conn_data_lock);
4401         packet =
4402             rxi_SendSpecial((struct rx_call *)0, conn, packet,
4403                             RX_PACKET_TYPE_ABORT, (char *)&error,
4404                             sizeof(error), istack);
4405         MUTEX_ENTER(&conn->conn_data_lock);
4406     } else if (!conn->delayedAbortEvent) {
4407         clock_GetTime(&now);
4408         when = now;
4409         clock_Addmsec(&when, rxi_connAbortDelay);
4410         conn->delayedAbortEvent =
4411             rxevent_PostNow(&when, &now, rxi_SendDelayedConnAbort, conn, 0);
4412     }
4413     return packet;
4414 }
4415
4416 /* Associate an error all of the calls owned by a connection.  Called
4417  * with error non-zero.  This is only for really fatal things, like
4418  * bad authentication responses.  The connection itself is set in
4419  * error at this point, so that future packets received will be
4420  * rejected. */
4421 void
4422 rxi_ConnectionError(register struct rx_connection *conn,
4423                     register afs_int32 error)
4424 {
4425     if (error) {
4426         register int i;
4427
4428         dpf(("rxi_ConnectionError conn %x error %d", conn, error));
4429
4430         MUTEX_ENTER(&conn->conn_data_lock);
4431         if (conn->challengeEvent)
4432             rxevent_Cancel(conn->challengeEvent, (struct rx_call *)0, 0);
4433         if (conn->checkReachEvent) {
4434             rxevent_Cancel(conn->checkReachEvent, (struct rx_call *)0, 0);
4435             conn->checkReachEvent = 0;
4436             conn->flags &= ~RX_CONN_ATTACHWAIT;
4437             conn->refCount--;
4438         }
4439         MUTEX_EXIT(&conn->conn_data_lock);
4440         for (i = 0; i < RX_MAXCALLS; i++) {
4441             struct rx_call *call = conn->call[i];
4442             if (call) {
4443                 MUTEX_ENTER(&call->lock);
4444                 rxi_CallError(call, error);
4445                 MUTEX_EXIT(&call->lock);
4446             }
4447         }
4448         conn->error = error;
4449         rx_MutexIncrement(rx_stats.fatalErrors, rx_stats_mutex);
4450     }
4451 }
4452
4453 void
4454 rxi_CallError(register struct rx_call *call, afs_int32 error)
4455 {
4456     dpf(("rxi_CallError call %x error %d call->error %d", call, error, call->error));
4457     if (call->error)
4458         error = call->error;
4459
4460 #ifdef RX_GLOBAL_RXLOCK_KERNEL
4461     if (!((call->flags & RX_CALL_TQ_BUSY) || (call->tqWaiters > 0))) {
4462         rxi_ResetCall(call, 0);
4463     }
4464 #else
4465     rxi_ResetCall(call, 0);
4466 #endif
4467     call->error = error;
4468     call->mode = RX_MODE_ERROR;
4469 }
4470
4471 /* Reset various fields in a call structure, and wakeup waiting
4472  * processes.  Some fields aren't changed: state & mode are not
4473  * touched (these must be set by the caller), and bufptr, nLeft, and
4474  * nFree are not reset, since these fields are manipulated by
4475  * unprotected macros, and may only be reset by non-interrupting code.
4476  */
4477 #ifdef ADAPT_WINDOW
4478 /* this code requires that call->conn be set properly as a pre-condition. */
4479 #endif /* ADAPT_WINDOW */
4480
4481 void
4482 rxi_ResetCall(register struct rx_call *call, register int newcall)
4483 {
4484     register int flags;
4485     register struct rx_peer *peer;
4486     struct rx_packet *packet;
4487
4488     dpf(("rxi_ResetCall(call %x, newcall %d)\n", call, newcall));
4489
4490     /* Notify anyone who is waiting for asynchronous packet arrival */
4491     if (call->arrivalProc) {
4492         (*call->arrivalProc) (call, call->arrivalProcHandle,
4493                               call->arrivalProcArg);
4494         call->arrivalProc = (void (*)())0;
4495     }
4496
4497     if (call->delayedAbortEvent) {
4498         rxevent_Cancel(call->delayedAbortEvent, call, RX_CALL_REFCOUNT_ABORT);
4499         packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
4500         if (packet) {
4501             rxi_SendCallAbort(call, packet, 0, 1);
4502             rxi_FreePacket(packet);
4503         }
4504     }
4505
4506     /*
4507      * Update the peer with the congestion information in this call
4508      * so other calls on this connection can pick up where this call
4509      * left off. If the congestion sequence numbers don't match then
4510      * another call experienced a retransmission.
4511      */
4512     peer = call->conn->peer;
4513     MUTEX_ENTER(&peer->peer_lock);
4514     if (!newcall) {
4515         if (call->congestSeq == peer->congestSeq) {
4516             peer->cwind = MAX(peer->cwind, call->cwind);
4517             peer->MTU = MAX(peer->MTU, call->MTU);
4518             peer->nDgramPackets =
4519                 MAX(peer->nDgramPackets, call->nDgramPackets);
4520         }
4521     } else {
4522         call->abortCode = 0;
4523         call->abortCount = 0;
4524     }
4525     if (peer->maxDgramPackets > 1) {
4526         call->MTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE;
4527     } else {
4528         call->MTU = peer->MTU;
4529     }
4530     call->cwind = MIN((int)peer->cwind, (int)peer->nDgramPackets);
4531     call->ssthresh = rx_maxSendWindow;
4532     call->nDgramPackets = peer->nDgramPackets;
4533     call->congestSeq = peer->congestSeq;
4534     MUTEX_EXIT(&peer->peer_lock);
4535
4536     flags = call->flags;
4537     rxi_ClearReceiveQueue(call);
4538 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
4539     if (flags & RX_CALL_TQ_BUSY) {
4540         call->flags = RX_CALL_TQ_CLEARME | RX_CALL_TQ_BUSY;
4541         call->flags |= (flags & RX_CALL_TQ_WAIT);
4542     } else
4543 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
4544     {
4545         rxi_ClearTransmitQueue(call, 0);
4546         queue_Init(&call->tq);
4547         if (call->tqWaiters || (flags & RX_CALL_TQ_WAIT)) {
4548             dpf(("rcall %x has %d waiters and flags %d\n", call, call->tqWaiters, call->flags));
4549         }
4550         call->flags = 0;
4551         while (call->tqWaiters) {
4552 #ifdef RX_ENABLE_LOCKS
4553             CV_BROADCAST(&call->cv_tq);
4554 #else /* RX_ENABLE_LOCKS */
4555             osi_rxWakeup(&call->tq);
4556 #endif /* RX_ENABLE_LOCKS */
4557             call->tqWaiters--;
4558         }
4559     }
4560     queue_Init(&call->rq);
4561     call->error = 0;
4562     call->twind = call->conn->twind[call->channel];
4563     call->rwind = call->conn->rwind[call->channel];
4564     call->nSoftAcked = 0;
4565     call->nextCwind = 0;
4566     call->nAcks = 0;
4567     call->nNacks = 0;
4568     call->nCwindAcks = 0;
4569     call->nSoftAcks = 0;
4570     call->nHardAcks = 0;
4571
4572     call->tfirst = call->rnext = call->tnext = 1;
4573     call->rprev = 0;
4574     call->lastAcked = 0;
4575     call->localStatus = call->remoteStatus = 0;
4576
4577     if (flags & RX_CALL_READER_WAIT) {
4578 #ifdef  RX_ENABLE_LOCKS
4579         CV_BROADCAST(&call->cv_rq);
4580 #else
4581         osi_rxWakeup(&call->rq);
4582 #endif
4583     }
4584     if (flags & RX_CALL_WAIT_PACKETS) {
4585         MUTEX_ENTER(&rx_freePktQ_lock);
4586         rxi_PacketsUnWait();    /* XXX */
4587         MUTEX_EXIT(&rx_freePktQ_lock);
4588     }
4589 #ifdef  RX_ENABLE_LOCKS
4590     CV_SIGNAL(&call->cv_twind);
4591 #else
4592     if (flags & RX_CALL_WAIT_WINDOW_ALLOC)
4593         osi_rxWakeup(&call->twind);
4594 #endif
4595
4596 #ifdef RX_ENABLE_LOCKS
4597     /* The following ensures that we don't mess with any queue while some
4598      * other thread might also be doing so. The call_queue_lock field is
4599      * is only modified under the call lock. If the call is in the process
4600      * of being removed from a queue, the call is not locked until the
4601      * the queue lock is dropped and only then is the call_queue_lock field
4602      * zero'd out. So it's safe to lock the queue if call_queue_lock is set.
4603      * Note that any other routine which removes a call from a queue has to
4604      * obtain the queue lock before examing the queue and removing the call.
4605      */
4606     if (call->call_queue_lock) {
4607         MUTEX_ENTER(call->call_queue_lock);
4608         if (queue_IsOnQueue(call)) {
4609             queue_Remove(call);
4610             if (flags & RX_CALL_WAIT_PROC) {
4611                 MUTEX_ENTER(&rx_stats_mutex);
4612                 rx_nWaiting--;
4613                 MUTEX_EXIT(&rx_stats_mutex);
4614             }
4615         }
4616         MUTEX_EXIT(call->call_queue_lock);
4617         CLEAR_CALL_QUEUE_LOCK(call);
4618     }
4619 #else /* RX_ENABLE_LOCKS */
4620     if (queue_IsOnQueue(call)) {
4621         queue_Remove(call);
4622         if (flags & RX_CALL_WAIT_PROC)
4623             rx_nWaiting--;
4624     }
4625 #endif /* RX_ENABLE_LOCKS */
4626
4627     rxi_KeepAliveOff(call);
4628     rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
4629 }
4630
4631 /* Send an acknowledge for the indicated packet (seq,serial) of the
4632  * indicated call, for the indicated reason (reason).  This
4633  * acknowledge will specifically acknowledge receiving the packet, and
4634  * will also specify which other packets for this call have been
4635  * received.  This routine returns the packet that was used to the
4636  * caller.  The caller is responsible for freeing it or re-using it.
4637  * This acknowledgement also returns the highest sequence number
4638  * actually read out by the higher level to the sender; the sender
4639  * promises to keep around packets that have not been read by the
4640  * higher level yet (unless, of course, the sender decides to abort
4641  * the call altogether).  Any of p, seq, serial, pflags, or reason may
4642  * be set to zero without ill effect.  That is, if they are zero, they
4643  * will not convey any information.
4644  * NOW there is a trailer field, after the ack where it will safely be
4645  * ignored by mundanes, which indicates the maximum size packet this
4646  * host can swallow.  */
4647 /*
4648     register struct rx_packet *optionalPacket;  use to send ack (or null)
4649     int seq;                     Sequence number of the packet we are acking
4650     int serial;                  Serial number of the packet
4651     int pflags;                  Flags field from packet header
4652     int reason;                  Reason an acknowledge was prompted
4653 */
4654
4655 struct rx_packet *
4656 rxi_SendAck(register struct rx_call *call,
4657             register struct rx_packet *optionalPacket, int serial, int reason,
4658             int istack)
4659 {
4660     struct rx_ackPacket *ap;
4661     register struct rx_packet *rqp;
4662     register struct rx_packet *nxp;     /* For queue_Scan */
4663     register struct rx_packet *p;
4664     u_char offset;
4665     afs_int32 templ;
4666 #ifdef RX_ENABLE_TSFPQ
4667     struct rx_ts_info_t * rx_ts_info;
4668 #endif
4669
4670     /*
4671      * Open the receive window once a thread starts reading packets
4672      */
4673     if (call->rnext > 1) {
4674         call->conn->rwind[call->channel] = call->rwind = rx_maxReceiveWindow;
4675     }
4676
4677     call->nHardAcks = 0;
4678     call->nSoftAcks = 0;
4679     if (call->rnext > call->lastAcked)
4680         call->lastAcked = call->rnext;
4681     p = optionalPacket;
4682
4683     if (p) {
4684         rx_computelen(p, p->length);    /* reset length, you never know */
4685     } /* where that's been...         */
4686 #ifdef RX_ENABLE_TSFPQ
4687     else {
4688         RX_TS_INFO_GET(rx_ts_info);
4689         if ((p = rx_ts_info->local_special_packet)) {
4690             rx_computelen(p, p->length);
4691         } else if ((p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL))) {
4692             rx_ts_info->local_special_packet = p;
4693         } else { /* We won't send the ack, but don't panic. */
4694             return optionalPacket;
4695         }
4696     }
4697 #else
4698     else if (!(p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL))) {
4699         /* We won't send the ack, but don't panic. */
4700         return optionalPacket;
4701     }
4702 #endif
4703
4704     templ =
4705         rx_AckDataSize(call->rwind) + 4 * sizeof(afs_int32) -
4706         rx_GetDataSize(p);
4707     if (templ > 0) {
4708         if (rxi_AllocDataBuf(p, templ, RX_PACKET_CLASS_SPECIAL) > 0) {
4709 #ifndef RX_ENABLE_TSFPQ
4710             if (!optionalPacket)
4711                 rxi_FreePacket(p);
4712 #endif
4713             return optionalPacket;
4714         }
4715         templ = rx_AckDataSize(call->rwind) + 2 * sizeof(afs_int32);
4716         if (rx_Contiguous(p) < templ) {
4717 #ifndef RX_ENABLE_TSFPQ
4718             if (!optionalPacket)
4719                 rxi_FreePacket(p);
4720 #endif
4721             return optionalPacket;
4722         }
4723     }
4724
4725
4726     /* MTUXXX failing to send an ack is very serious.  We should */
4727     /* try as hard as possible to send even a partial ack; it's */
4728     /* better than nothing. */
4729     ap = (struct rx_ackPacket *)rx_DataOf(p);
4730     ap->bufferSpace = htonl(0); /* Something should go here, sometime */
4731     ap->reason = reason;
4732
4733     /* The skew computation used to be bogus, I think it's better now. */
4734     /* We should start paying attention to skew.    XXX  */
4735     ap->serial = htonl(serial);
4736     ap->maxSkew = 0;            /* used to be peer->inPacketSkew */
4737
4738     ap->firstPacket = htonl(call->rnext);       /* First packet not yet forwarded to reader */
4739     ap->previousPacket = htonl(call->rprev);    /* Previous packet received */
4740
4741     /* No fear of running out of ack packet here because there can only be at most
4742      * one window full of unacknowledged packets.  The window size must be constrained
4743      * to be less than the maximum ack size, of course.  Also, an ack should always
4744      * fit into a single packet -- it should not ever be fragmented.  */
4745     for (offset = 0, queue_Scan(&call->rq, rqp, nxp, rx_packet)) {
4746         if (!rqp || !call->rq.next
4747             || (rqp->header.seq > (call->rnext + call->rwind))) {
4748 #ifndef RX_ENABLE_TSFPQ
4749             if (!optionalPacket)
4750                 rxi_FreePacket(p);
4751 #endif
4752             rxi_CallError(call, RX_CALL_DEAD);
4753             return optionalPacket;
4754         }
4755
4756         while (rqp->header.seq > call->rnext + offset)
4757             ap->acks[offset++] = RX_ACK_TYPE_NACK;
4758         ap->acks[offset++] = RX_ACK_TYPE_ACK;
4759
4760         if ((offset > (u_char) rx_maxReceiveWindow) || (offset > call->rwind)) {
4761 #ifndef RX_ENABLE_TSFPQ
4762             if (!optionalPacket)
4763                 rxi_FreePacket(p);
4764 #endif
4765             rxi_CallError(call, RX_CALL_DEAD);
4766             return optionalPacket;
4767         }
4768     }
4769
4770     ap->nAcks = offset;
4771     p->length = rx_AckDataSize(offset) + 4 * sizeof(afs_int32);
4772
4773     /* these are new for AFS 3.3 */
4774     templ = rxi_AdjustMaxMTU(call->conn->peer->ifMTU, rx_maxReceiveSize);
4775     templ = htonl(templ);
4776     rx_packetwrite(p, rx_AckDataSize(offset), sizeof(afs_int32), &templ);
4777     templ = htonl(call->conn->peer->ifMTU);
4778     rx_packetwrite(p, rx_AckDataSize(offset) + sizeof(afs_int32),
4779                    sizeof(afs_int32), &templ);
4780
4781     /* new for AFS 3.4 */
4782     templ = htonl(call->rwind);
4783     rx_packetwrite(p, rx_AckDataSize(offset) + 2 * sizeof(afs_int32),
4784                    sizeof(afs_int32), &templ);
4785
4786     /* new for AFS 3.5 */
4787     templ = htonl(call->conn->peer->ifDgramPackets);
4788     rx_packetwrite(p, rx_AckDataSize(offset) + 3 * sizeof(afs_int32),
4789                    sizeof(afs_int32), &templ);
4790
4791     p->header.serviceId = call->conn->serviceId;
4792     p->header.cid = (call->conn->cid | call->channel);
4793     p->header.callNumber = *call->callNumber;
4794     p->header.seq = 0;
4795     p->header.securityIndex = call->conn->securityIndex;
4796     p->header.epoch = call->conn->epoch;
4797     p->header.type = RX_PACKET_TYPE_ACK;
4798     p->header.flags = RX_SLOW_START_OK;
4799     if (reason == RX_ACK_PING) {
4800         p->header.flags |= RX_REQUEST_ACK;
4801 #ifdef ADAPT_WINDOW
4802         clock_GetTime(&call->pingRequestTime);
4803 #endif
4804     }
4805     if (call->conn->type == RX_CLIENT_CONNECTION)
4806         p->header.flags |= RX_CLIENT_INITIATED;
4807
4808 #ifdef RXDEBUG
4809 #ifdef AFS_NT40_ENV
4810     if (rxdebug_active) {
4811         char msg[512];
4812         size_t len;
4813
4814         len = _snprintf(msg, sizeof(msg),
4815                         "tid[%d] SACK: reason %s serial %u previous %u seq %u first %u acks %u space %u ",
4816                          GetCurrentThreadId(), rx_ack_reason(ap->reason),
4817                          ntohl(ap->serial), ntohl(ap->previousPacket),
4818                          (unsigned int)p->header.seq, ntohl(ap->firstPacket),
4819                          ap->nAcks, ntohs(ap->bufferSpace) );
4820         if (ap->nAcks) {
4821             int offset;
4822
4823             for (offset = 0; offset < ap->nAcks && len < sizeof(msg); offset++)
4824                 msg[len++] = (ap->acks[offset] == RX_ACK_TYPE_NACK ? '-' : '*');
4825         }
4826         msg[len++]='\n';
4827         msg[len] = '\0';
4828         OutputDebugString(msg);
4829     }
4830 #else /* AFS_NT40_ENV */
4831     if (rx_Log) {
4832         fprintf(rx_Log, "SACK: reason %x previous %u seq %u first %u ",
4833                 ap->reason, ntohl(ap->previousPacket),
4834                 (unsigned int)p->header.seq, ntohl(ap->firstPacket));
4835         if (ap->nAcks) {
4836             for (offset = 0; offset < ap->nAcks; offset++)
4837                 putc(ap->acks[offset] == RX_ACK_TYPE_NACK ? '-' : '*',
4838                      rx_Log);
4839         }
4840         putc('\n', rx_Log);
4841     }
4842 #endif /* AFS_NT40_ENV */
4843 #endif
4844     {
4845         register int i, nbytes = p->length;
4846
4847         for (i = 1; i < p->niovecs; i++) {      /* vec 0 is ALWAYS header */
4848             if (nbytes <= p->wirevec[i].iov_len) {
4849                 register int savelen, saven;
4850
4851                 savelen = p->wirevec[i].iov_len;
4852                 saven = p->niovecs;
4853                 p->wirevec[i].iov_len = nbytes;
4854                 p->niovecs = i + 1;
4855                 rxi_Send(call, p, istack);
4856                 p->wirevec[i].iov_len = savelen;
4857                 p->niovecs = saven;
4858                 break;
4859             } else
4860                 nbytes -= p->wirevec[i].iov_len;
4861         }
4862     }
4863     rx_MutexIncrement(rx_stats.ackPacketsSent, rx_stats_mutex);
4864 #ifndef RX_ENABLE_TSFPQ
4865     if (!optionalPacket)
4866         rxi_FreePacket(p);
4867 #endif
4868     return optionalPacket;      /* Return packet for re-use by caller */
4869 }
4870
4871 /* Send all of the packets in the list in single datagram */
4872 static void
4873 rxi_SendList(struct rx_call *call, struct rx_packet **list, int len,
4874              int istack, int moreFlag, struct clock *now,
4875              struct clock *retryTime, int resending)
4876 {
4877     int i;
4878     int requestAck = 0;
4879     int lastPacket = 0;
4880     struct rx_connection *conn = call->conn;
4881     struct rx_peer *peer = conn->peer;
4882
4883     MUTEX_ENTER(&peer->peer_lock);
4884     peer->nSent += len;
4885     if (resending)
4886         peer->reSends += len;
4887     rx_MutexIncrement(rx_stats.dataPacketsSent, rx_stats_mutex);
4888     MUTEX_EXIT(&peer->peer_lock);
4889
4890     if (list[len - 1]->header.flags & RX_LAST_PACKET) {
4891         lastPacket = 1;
4892     }
4893
4894     /* Set the packet flags and schedule the resend events */
4895     /* Only request an ack for the last packet in the list */
4896     for (i = 0; i < len; i++) {
4897         list[i]->retryTime = *retryTime;
4898         if (list[i]->header.serial) {
4899             /* Exponentially backoff retry times */
4900             if (list[i]->backoff < MAXBACKOFF) {
4901                 /* so it can't stay == 0 */
4902                 list[i]->backoff = (list[i]->backoff << 1) + 1;
4903             } else
4904                 list[i]->backoff++;
4905             clock_Addmsec(&(list[i]->retryTime),
4906                           ((afs_uint32) list[i]->backoff) << 8);
4907         }
4908
4909         /* Wait a little extra for the ack on the last packet */
4910         if (lastPacket && !(list[i]->header.flags & RX_CLIENT_INITIATED)) {
4911             clock_Addmsec(&(list[i]->retryTime), 400);
4912         }
4913
4914         /* Record the time sent */
4915         list[i]->timeSent = *now;
4916
4917         /* Ask for an ack on retransmitted packets,  on every other packet
4918          * if the peer doesn't support slow start. Ask for an ack on every
4919          * packet until the congestion window reaches the ack rate. */
4920         if (list[i]->header.serial) {
4921             requestAck = 1;
4922             rx_MutexIncrement(rx_stats.dataPacketsReSent, rx_stats_mutex);
4923         } else {
4924             /* improved RTO calculation- not Karn */
4925             list[i]->firstSent = *now;
4926             if (!lastPacket && (call->cwind <= (u_short) (conn->ackRate + 1)
4927                                 || (!(call->flags & RX_CALL_SLOW_START_OK)
4928                                     && (list[i]->header.seq & 1)))) {
4929                 requestAck = 1;
4930             }
4931         }
4932
4933         MUTEX_ENTER(&peer->peer_lock);
4934         peer->nSent++;
4935         if (resending)
4936             peer->reSends++;
4937         rx_MutexIncrement(rx_stats.dataPacketsSent, rx_stats_mutex);
4938         MUTEX_EXIT(&peer->peer_lock);
4939
4940         /* Tag this packet as not being the last in this group,
4941          * for the receiver's benefit */
4942         if (i < len - 1 || moreFlag) {
4943             list[i]->header.flags |= RX_MORE_PACKETS;
4944         }
4945
4946         /* Install the new retransmit time for the packet, and
4947          * record the time sent */
4948         list[i]->timeSent = *now;
4949     }
4950
4951     if (requestAck) {
4952         list[len - 1]->header.flags |= RX_REQUEST_ACK;
4953     }
4954
4955     /* Since we're about to send a data packet to the peer, it's
4956      * safe to nuke any scheduled end-of-packets ack */
4957     rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
4958
4959     CALL_HOLD(call, RX_CALL_REFCOUNT_SEND);
4960     MUTEX_EXIT(&call->lock);
4961     if (len > 1) {
4962         rxi_SendPacketList(call, conn, list, len, istack);
4963     } else {
4964         rxi_SendPacket(call, conn, list[0], istack);
4965     }
4966     MUTEX_ENTER(&call->lock);
4967     CALL_RELE(call, RX_CALL_REFCOUNT_SEND);
4968
4969     /* Update last send time for this call (for keep-alive
4970      * processing), and for the connection (so that we can discover
4971      * idle connections) */
4972     call->lastSendData = conn->lastSendTime = call->lastSendTime = clock_Sec();
4973 }
4974
4975 /* When sending packets we need to follow these rules:
4976  * 1. Never send more than maxDgramPackets in a jumbogram.
4977  * 2. Never send a packet with more than two iovecs in a jumbogram.
4978  * 3. Never send a retransmitted packet in a jumbogram.
4979  * 4. Never send more than cwind/4 packets in a jumbogram
4980  * We always keep the last list we should have sent so we
4981  * can set the RX_MORE_PACKETS flags correctly.
4982  */
4983 static void
4984 rxi_SendXmitList(struct rx_call *call, struct rx_packet **list, int len,
4985                  int istack, struct clock *now, struct clock *retryTime,
4986                  int resending)
4987 {
4988     int i, cnt, lastCnt = 0;
4989     struct rx_packet **listP, **lastP = 0;
4990     struct rx_peer *peer = call->conn->peer;
4991     int morePackets = 0;
4992
4993     for (cnt = 0, listP = &list[0], i = 0; i < len; i++) {
4994         /* Does the current packet force us to flush the current list? */
4995         if (cnt > 0
4996             && (list[i]->header.serial || (list[i]->flags & RX_PKTFLAG_ACKED)
4997                 || list[i]->length > RX_JUMBOBUFFERSIZE)) {
4998             if (lastCnt > 0) {
4999                 rxi_SendList(call, lastP, lastCnt, istack, 1, now, retryTime,
5000                              resending);
5001                 /* If the call enters an error state stop sending, or if
5002                  * we entered congestion recovery mode, stop sending */
5003                 if (call->error || (call->flags & RX_CALL_FAST_RECOVER_WAIT))
5004                     return;
5005             }
5006             lastP = listP;
5007             lastCnt = cnt;
5008             listP = &list[i];
5009             cnt = 0;
5010         }
5011         /* Add the current packet to the list if it hasn't been acked.
5012          * Otherwise adjust the list pointer to skip the current packet.  */
5013         if (!(list[i]->flags & RX_PKTFLAG_ACKED)) {
5014             cnt++;
5015             /* Do we need to flush the list? */
5016             if (cnt >= (int)peer->maxDgramPackets
5017                 || cnt >= (int)call->nDgramPackets || cnt >= (int)call->cwind
5018                 || list[i]->header.serial
5019                 || list[i]->length != RX_JUMBOBUFFERSIZE) {
5020                 if (lastCnt > 0) {
5021                     rxi_SendList(call, lastP, lastCnt, istack, 1, now,
5022                                  retryTime, resending);
5023                     /* If the call enters an error state stop sending, or if
5024                      * we entered congestion recovery mode, stop sending */
5025                     if (call->error
5026                         || (call->flags & RX_CALL_FAST_RECOVER_WAIT))
5027                         return;
5028                 }
5029                 lastP = listP;
5030                 lastCnt = cnt;
5031                 listP = &list[i + 1];
5032                 cnt = 0;
5033             }
5034         } else {
5035             if (cnt != 0) {
5036                 osi_Panic("rxi_SendList error");
5037             }
5038             listP = &list[i + 1];
5039         }
5040     }
5041
5042     /* Send the whole list when the call is in receive mode, when
5043      * the call is in eof mode, when we are in fast recovery mode,
5044      * and when we have the last packet */
5045     if ((list[len - 1]->header.flags & RX_LAST_PACKET)
5046         || call->mode == RX_MODE_RECEIVING || call->mode == RX_MODE_EOF
5047         || (call->flags & RX_CALL_FAST_RECOVER)) {
5048         /* Check for the case where the current list contains
5049          * an acked packet. Since we always send retransmissions
5050          * in a separate packet, we only need to check the first
5051          * packet in the list */
5052         if (cnt > 0 && !(listP[0]->flags & RX_PKTFLAG_ACKED)) {
5053             morePackets = 1;
5054         }
5055         if (lastCnt > 0) {
5056             rxi_SendList(call, lastP, lastCnt, istack, morePackets, now,
5057                          retryTime, resending);
5058             /* If the call enters an error state stop sending, or if
5059              * we entered congestion recovery mode, stop sending */
5060             if (call->error || (call->flags & RX_CALL_FAST_RECOVER_WAIT))
5061                 return;
5062         }
5063         if (morePackets) {
5064             rxi_SendList(call, listP, cnt, istack, 0, now, retryTime,
5065                          resending);
5066         }
5067     } else if (lastCnt > 0) {
5068         rxi_SendList(call, lastP, lastCnt, istack, 0, now, retryTime,
5069                      resending);
5070     }
5071 }
5072
5073 #ifdef  RX_ENABLE_LOCKS
5074 /* Call rxi_Start, below, but with the call lock held. */
5075 void
5076 rxi_StartUnlocked(struct rxevent *event, register struct rx_call *call,
5077                   void *arg1, int istack)
5078 {
5079     MUTEX_ENTER(&call->lock);
5080     rxi_Start(event, call, arg1, istack);
5081     MUTEX_EXIT(&call->lock);
5082 }
5083 #endif /* RX_ENABLE_LOCKS */
5084
5085 /* This routine is called when new packets are readied for
5086  * transmission and when retransmission may be necessary, or when the
5087  * transmission window or burst count are favourable.  This should be
5088  * better optimized for new packets, the usual case, now that we've
5089  * got rid of queues of send packets. XXXXXXXXXXX */
5090 void
5091 rxi_Start(struct rxevent *event, register struct rx_call *call,
5092           void *arg1, int istack)
5093 {
5094     struct rx_packet *p;
5095     register struct rx_packet *nxp;     /* Next pointer for queue_Scan */
5096     struct rx_peer *peer = call->conn->peer;
5097     struct clock now, usenow, retryTime;
5098     int haveEvent;
5099     int nXmitPackets;
5100     int maxXmitPackets;
5101     struct rx_packet **xmitList;
5102     int resending = 0;
5103
5104     /* If rxi_Start is being called as a result of a resend event,
5105      * then make sure that the event pointer is removed from the call
5106      * structure, since there is no longer a per-call retransmission
5107      * event pending. */
5108     if (event && event == call->resendEvent) {
5109         CALL_RELE(call, RX_CALL_REFCOUNT_RESEND);
5110         call->resendEvent = NULL;
5111         resending = 1;
5112         if (queue_IsEmpty(&call->tq)) {
5113             /* Nothing to do */
5114             return;
5115         }
5116         /* Timeouts trigger congestion recovery */
5117 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5118         if (call->flags & RX_CALL_FAST_RECOVER_WAIT) {
5119             /* someone else is waiting to start recovery */
5120             return;
5121         }
5122         call->flags |= RX_CALL_FAST_RECOVER_WAIT;
5123         rxi_WaitforTQBusy(call);
5124 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
5125         call->flags &= ~RX_CALL_FAST_RECOVER_WAIT;
5126         call->flags |= RX_CALL_FAST_RECOVER;
5127         if (peer->maxDgramPackets > 1) {
5128             call->MTU = RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE;
5129         } else {
5130             call->MTU = MIN(peer->natMTU, peer->maxMTU);
5131         }
5132         call->ssthresh = MAX(4, MIN((int)call->cwind, (int)call->twind)) >> 1;
5133         call->nDgramPackets = 1;
5134         call->cwind = 1;
5135         call->nextCwind = 1;
5136         call->nAcks = 0;
5137         call->nNacks = 0;
5138         MUTEX_ENTER(&peer->peer_lock);
5139         peer->MTU = call->MTU;
5140         peer->cwind = call->cwind;
5141         peer->nDgramPackets = 1;
5142         peer->congestSeq++;
5143         call->congestSeq = peer->congestSeq;
5144         MUTEX_EXIT(&peer->peer_lock);
5145         /* Clear retry times on packets. Otherwise, it's possible for
5146          * some packets in the queue to force resends at rates faster
5147          * than recovery rates.
5148          */
5149         for (queue_Scan(&call->tq, p, nxp, rx_packet)) {
5150             if (!(p->flags & RX_PKTFLAG_ACKED)) {
5151                 clock_Zero(&p->retryTime);
5152             }
5153         }
5154     }
5155     if (call->error) {
5156 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5157         rx_MutexIncrement(rx_tq_debug.rxi_start_in_error, rx_stats_mutex);
5158 #endif
5159         return;
5160     }
5161
5162     if (queue_IsNotEmpty(&call->tq)) {  /* If we have anything to send */
5163         /* Get clock to compute the re-transmit time for any packets
5164          * in this burst.  Note, if we back off, it's reasonable to
5165          * back off all of the packets in the same manner, even if
5166          * some of them have been retransmitted more times than more
5167          * recent additions.
5168          * Do a dance to avoid blocking after setting now. */
5169         clock_Zero(&retryTime);
5170         MUTEX_ENTER(&peer->peer_lock);
5171         clock_Add(&retryTime, &peer->timeout);
5172         MUTEX_EXIT(&peer->peer_lock);
5173         clock_GetTime(&now);
5174         clock_Add(&retryTime, &now);
5175         usenow = now;
5176         /* Send (or resend) any packets that need it, subject to
5177          * window restrictions and congestion burst control
5178          * restrictions.  Ask for an ack on the last packet sent in
5179          * this burst.  For now, we're relying upon the window being
5180          * considerably bigger than the largest number of packets that
5181          * are typically sent at once by one initial call to
5182          * rxi_Start.  This is probably bogus (perhaps we should ask
5183          * for an ack when we're half way through the current
5184          * window?).  Also, for non file transfer applications, this
5185          * may end up asking for an ack for every packet.  Bogus. XXXX
5186          */
5187         /*
5188          * But check whether we're here recursively, and let the other guy
5189          * do the work.
5190          */
5191 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5192         if (!(call->flags & RX_CALL_TQ_BUSY)) {
5193             call->flags |= RX_CALL_TQ_BUSY;
5194             do {
5195 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
5196             restart:
5197 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5198                 call->flags &= ~RX_CALL_NEED_START;
5199 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
5200                 nXmitPackets = 0;
5201                 maxXmitPackets = MIN(call->twind, call->cwind);
5202                 xmitList = (struct rx_packet **)
5203                     osi_Alloc(maxXmitPackets * sizeof(struct rx_packet *));
5204                 if (xmitList == NULL)
5205                     osi_Panic("rxi_Start, failed to allocate xmit list");
5206                 for (queue_Scan(&call->tq, p, nxp, rx_packet)) {
5207                     if (call->flags & RX_CALL_FAST_RECOVER_WAIT) {
5208                         /* We shouldn't be sending packets if a thread is waiting
5209                          * to initiate congestion recovery */
5210                         break;
5211                     }
5212                     if ((nXmitPackets)
5213                         && (call->flags & RX_CALL_FAST_RECOVER)) {
5214                         /* Only send one packet during fast recovery */
5215                         break;
5216                     }
5217                     if ((p->flags & RX_PKTFLAG_FREE)
5218                         || (!queue_IsEnd(&call->tq, nxp)
5219                             && (nxp->flags & RX_PKTFLAG_FREE))
5220                         || (p == (struct rx_packet *)&rx_freePacketQueue)
5221                         || (nxp == (struct rx_packet *)&rx_freePacketQueue)) {
5222                         osi_Panic("rxi_Start: xmit queue clobbered");
5223                     }
5224                     if (p->flags & RX_PKTFLAG_ACKED) {
5225                         /* Since we may block, don't trust this */
5226                         usenow.sec = usenow.usec = 0;
5227                         rx_MutexIncrement(rx_stats.ignoreAckedPacket, rx_stats_mutex);
5228                         continue;       /* Ignore this packet if it has been acknowledged */
5229                     }
5230
5231                     /* Turn off all flags except these ones, which are the same
5232                      * on each transmission */
5233                     p->header.flags &= RX_PRESET_FLAGS;
5234
5235                     if (p->header.seq >=
5236                         call->tfirst + MIN((int)call->twind,
5237                                            (int)(call->nSoftAcked +
5238                                                  call->cwind))) {
5239                         call->flags |= RX_CALL_WAIT_WINDOW_SEND;        /* Wait for transmit window */
5240                         /* Note: if we're waiting for more window space, we can
5241                          * still send retransmits; hence we don't return here, but
5242                          * break out to schedule a retransmit event */
5243                         dpf(("call %d waiting for window",
5244                              *(call->callNumber)));
5245                         break;
5246                     }
5247
5248                     /* Transmit the packet if it needs to be sent. */
5249                     if (!clock_Lt(&now, &p->retryTime)) {
5250                         if (nXmitPackets == maxXmitPackets) {
5251                             rxi_SendXmitList(call, xmitList, nXmitPackets,
5252                                              istack, &now, &retryTime,
5253                                              resending);
5254                             osi_Free(xmitList, maxXmitPackets *
5255                                      sizeof(struct rx_packet *));
5256                             goto restart;
5257                         }
5258                         xmitList[nXmitPackets++] = p;
5259                     }
5260                 }
5261
5262                 /* xmitList now hold pointers to all of the packets that are
5263                  * ready to send. Now we loop to send the packets */
5264                 if (nXmitPackets > 0) {
5265                     rxi_SendXmitList(call, xmitList, nXmitPackets, istack,
5266                                      &now, &retryTime, resending);
5267                 }
5268                 osi_Free(xmitList,
5269                          maxXmitPackets * sizeof(struct rx_packet *));
5270
5271 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5272                 /*
5273                  * TQ references no longer protected by this flag; they must remain
5274                  * protected by the global lock.
5275                  */
5276                 if (call->flags & RX_CALL_FAST_RECOVER_WAIT) {
5277                     call->flags &= ~RX_CALL_TQ_BUSY;
5278                     if (call->tqWaiters || (call->flags & RX_CALL_TQ_WAIT)) {
5279                         dpf(("call %x has %d waiters and flags %d\n", call, call->tqWaiters, call->flags));
5280 #ifdef RX_ENABLE_LOCKS
5281                         osirx_AssertMine(&call->lock, "rxi_Start start");
5282                         CV_BROADCAST(&call->cv_tq);
5283 #else /* RX_ENABLE_LOCKS */
5284                         osi_rxWakeup(&call->tq);
5285 #endif /* RX_ENABLE_LOCKS */
5286                     }
5287                     return;
5288                 }
5289                 if (call->error) {
5290                     /* We went into the error state while sending packets. Now is
5291                      * the time to reset the call. This will also inform the using
5292                      * process that the call is in an error state.
5293                      */
5294                     rx_MutexIncrement(rx_tq_debug.rxi_start_aborted, rx_stats_mutex);
5295                     call->flags &= ~RX_CALL_TQ_BUSY;
5296                     if (call->tqWaiters || (call->flags & RX_CALL_TQ_WAIT)) {
5297                         dpf(("call %x has %d waiters and flags %d\n", call, call->tqWaiters, call->flags));
5298 #ifdef RX_ENABLE_LOCKS
5299                         osirx_AssertMine(&call->lock, "rxi_Start middle");
5300                         CV_BROADCAST(&call->cv_tq);
5301 #else /* RX_ENABLE_LOCKS */
5302                         osi_rxWakeup(&call->tq);
5303 #endif /* RX_ENABLE_LOCKS */
5304                     }
5305                     rxi_CallError(call, call->error);
5306                     return;
5307                 }
5308 #ifdef RX_ENABLE_LOCKS
5309                 if (call->flags & RX_CALL_TQ_SOME_ACKED) {
5310                     register int missing;
5311                     call->flags &= ~RX_CALL_TQ_SOME_ACKED;
5312                     /* Some packets have received acks. If they all have, we can clear
5313                      * the transmit queue.
5314                      */
5315                     for (missing =
5316                          0, queue_Scan(&call->tq, p, nxp, rx_packet)) {
5317                         if (p->header.seq < call->tfirst
5318                             && (p->flags & RX_PKTFLAG_ACKED)) {
5319                             queue_Remove(p);
5320                             rxi_FreePacket(p);
5321                         } else
5322                             missing = 1;
5323                     }
5324                     if (!missing)
5325                         call->flags |= RX_CALL_TQ_CLEARME;
5326                 }
5327 #endif /* RX_ENABLE_LOCKS */
5328                 /* Don't bother doing retransmits if the TQ is cleared. */
5329                 if (call->flags & RX_CALL_TQ_CLEARME) {
5330                     rxi_ClearTransmitQueue(call, 1);
5331                 } else
5332 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
5333                 {
5334
5335                     /* Always post a resend event, if there is anything in the
5336                      * queue, and resend is possible.  There should be at least
5337                      * one unacknowledged packet in the queue ... otherwise none
5338                      * of these packets should be on the queue in the first place.
5339                      */
5340                     if (call->resendEvent) {
5341                         /* Cancel the existing event and post a new one */
5342                         rxevent_Cancel(call->resendEvent, call,
5343                                        RX_CALL_REFCOUNT_RESEND);
5344                     }
5345
5346                     /* The retry time is the retry time on the first unacknowledged
5347                      * packet inside the current window */
5348                     for (haveEvent =
5349                          0, queue_Scan(&call->tq, p, nxp, rx_packet)) {
5350                         /* Don't set timers for packets outside the window */
5351                         if (p->header.seq >= call->tfirst + call->twind) {
5352                             break;
5353                         }
5354
5355                         if (!(p->flags & RX_PKTFLAG_ACKED)
5356                             && !clock_IsZero(&p->retryTime)) {
5357                             haveEvent = 1;
5358                             retryTime = p->retryTime;
5359                             break;
5360                         }
5361                     }
5362
5363                     /* Post a new event to re-run rxi_Start when retries may be needed */
5364                     if (haveEvent && !(call->flags & RX_CALL_NEED_START)) {
5365 #ifdef RX_ENABLE_LOCKS
5366                         CALL_HOLD(call, RX_CALL_REFCOUNT_RESEND);
5367                         call->resendEvent =
5368                             rxevent_PostNow2(&retryTime, &usenow,
5369                                              rxi_StartUnlocked,
5370                                              (void *)call, 0, istack);
5371 #else /* RX_ENABLE_LOCKS */
5372                         call->resendEvent =
5373                             rxevent_PostNow2(&retryTime, &usenow, rxi_Start,
5374                                              (void *)call, 0, istack);
5375 #endif /* RX_ENABLE_LOCKS */
5376                     }
5377                 }
5378 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5379             } while (call->flags & RX_CALL_NEED_START);
5380             /*
5381              * TQ references no longer protected by this flag; they must remain
5382              * protected by the global lock.
5383              */
5384             call->flags &= ~RX_CALL_TQ_BUSY;
5385             if (call->tqWaiters || (call->flags & RX_CALL_TQ_WAIT)) {
5386                 dpf(("call %x has %d waiters and flags %d\n", call, call->tqWaiters, call->flags));
5387 #ifdef RX_ENABLE_LOCKS
5388                 osirx_AssertMine(&call->lock, "rxi_Start end");
5389                 CV_BROADCAST(&call->cv_tq);
5390 #else /* RX_ENABLE_LOCKS */
5391                 osi_rxWakeup(&call->tq);
5392 #endif /* RX_ENABLE_LOCKS */
5393             }
5394         } else {
5395             call->flags |= RX_CALL_NEED_START;
5396         }
5397 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
5398     } else {
5399         if (call->resendEvent) {
5400             rxevent_Cancel(call->resendEvent, call, RX_CALL_REFCOUNT_RESEND);
5401         }
5402     }
5403 }
5404
5405 /* Also adjusts the keep alive parameters for the call, to reflect
5406  * that we have just sent a packet (so keep alives aren't sent
5407  * immediately) */
5408 void
5409 rxi_Send(register struct rx_call *call, register struct rx_packet *p,
5410          int istack)
5411 {
5412     register struct rx_connection *conn = call->conn;
5413
5414     /* Stamp each packet with the user supplied status */
5415     p->header.userStatus = call->localStatus;
5416
5417     /* Allow the security object controlling this call's security to
5418      * make any last-minute changes to the packet */
5419     RXS_SendPacket(conn->securityObject, call, p);
5420
5421     /* Since we're about to send SOME sort of packet to the peer, it's
5422      * safe to nuke any scheduled end-of-packets ack */
5423     rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
5424
5425     /* Actually send the packet, filling in more connection-specific fields */
5426     CALL_HOLD(call, RX_CALL_REFCOUNT_SEND);
5427     MUTEX_EXIT(&call->lock);
5428     rxi_SendPacket(call, conn, p, istack);
5429     MUTEX_ENTER(&call->lock);
5430     CALL_RELE(call, RX_CALL_REFCOUNT_SEND);
5431
5432     /* Update last send time for this call (for keep-alive
5433      * processing), and for the connection (so that we can discover
5434      * idle connections) */
5435     conn->lastSendTime = call->lastSendTime = clock_Sec();
5436     /* Don't count keepalives here, so idleness can be tracked. */
5437     if (p->header.type != RX_PACKET_TYPE_ACK)
5438         call->lastSendData = call->lastSendTime;
5439 }
5440
5441
5442 /* Check if a call needs to be destroyed.  Called by keep-alive code to ensure
5443  * that things are fine.  Also called periodically to guarantee that nothing
5444  * falls through the cracks (e.g. (error + dally) connections have keepalive
5445  * turned off.  Returns 0 if conn is well, -1 otherwise.  If otherwise, call
5446  *  may be freed!
5447  * haveCTLock Set if calling from rxi_ReapConnections
5448  */
5449 #ifdef RX_ENABLE_LOCKS
5450 int
5451 rxi_CheckCall(register struct rx_call *call, int haveCTLock)
5452 #else /* RX_ENABLE_LOCKS */
5453 int
5454 rxi_CheckCall(register struct rx_call *call)
5455 #endif                          /* RX_ENABLE_LOCKS */
5456 {
5457     register struct rx_connection *conn = call->conn;
5458     afs_uint32 now;
5459     afs_uint32 deadTime;
5460
5461 #ifdef RX_GLOBAL_RXLOCK_KERNEL
5462     if (call->flags & RX_CALL_TQ_BUSY) {
5463         /* Call is active and will be reset by rxi_Start if it's
5464          * in an error state.
5465          */
5466         return 0;
5467     }
5468 #endif
5469     /* dead time + RTT + 8*MDEV, rounded up to next second. */
5470     deadTime =
5471         (((afs_uint32) conn->secondsUntilDead << 10) +
5472          ((afs_uint32) conn->peer->rtt >> 3) +
5473          ((afs_uint32) conn->peer->rtt_dev << 1) + 1023) >> 10;
5474     now = clock_Sec();
5475     /* These are computed to the second (+- 1 second).  But that's
5476      * good enough for these values, which should be a significant
5477      * number of seconds. */
5478     if (now > (call->lastReceiveTime + deadTime)) {
5479         if (call->state == RX_STATE_ACTIVE) {
5480 #ifdef ADAPT_PMTU
5481 #if defined(KERNEL) && defined(AFS_SUN57_ENV)
5482             ire_t *ire;
5483 #if defined(AFS_SUN510_ENV) && defined(GLOBAL_NETSTACKID)
5484             netstack_t *ns =  netstack_find_by_stackid(GLOBAL_NETSTACKID);
5485             ip_stack_t *ipst = ns->netstack_ip;
5486 #endif
5487             ire = ire_cache_lookup(call->conn->peer->host
5488 #if defined(AFS_SUN510_ENV) && defined(ALL_ZONES)
5489                                    , ALL_ZONES
5490 #if defined(AFS_SUN510_ENV) && (defined(ICL_3_ARG) || defined(GLOBAL_NETSTACKID))
5491                                    , NULL
5492 #if defined(AFS_SUN510_ENV) && defined(GLOBAL_NETSTACKID)
5493                                    , ipst
5494 #endif
5495 #endif
5496 #endif
5497                 );
5498
5499             if (ire && ire->ire_max_frag > 0)
5500                 rxi_SetPeerMtu(call->conn->peer->host, 0, ire->ire_max_frag);
5501 #if defined(GLOBAL_NETSTACKID)
5502             netstack_rele(ns);
5503 #endif
5504 #endif
5505 #endif /* ADAPT_PMTU */
5506             rxi_CallError(call, RX_CALL_DEAD);
5507             return -1;
5508         } else {
5509 #ifdef RX_ENABLE_LOCKS
5510             /* Cancel pending events */
5511             rxevent_Cancel(call->delayedAckEvent, call,
5512                            RX_CALL_REFCOUNT_DELAY);
5513             rxevent_Cancel(call->resendEvent, call, RX_CALL_REFCOUNT_RESEND);
5514             rxevent_Cancel(call->keepAliveEvent, call,
5515                            RX_CALL_REFCOUNT_ALIVE);
5516             if (call->refCount == 0) {
5517                 rxi_FreeCall(call, haveCTLock);
5518                 return -2;
5519             }
5520             return -1;
5521 #else /* RX_ENABLE_LOCKS */
5522             rxi_FreeCall(call);
5523             return -2;
5524 #endif /* RX_ENABLE_LOCKS */
5525         }
5526         /* Non-active calls are destroyed if they are not responding
5527          * to pings; active calls are simply flagged in error, so the
5528          * attached process can die reasonably gracefully. */
5529     }
5530     /* see if we have a non-activity timeout */
5531     if (call->startWait && conn->idleDeadTime
5532         && ((call->startWait + conn->idleDeadTime) < now)) {
5533         if (call->state == RX_STATE_ACTIVE) {
5534             rxi_CallError(call, RX_CALL_TIMEOUT);
5535             return -1;
5536         }
5537     }
5538     if (call->lastSendData && conn->idleDeadTime && (conn->idleDeadErr != 0)
5539         && ((call->lastSendData + conn->idleDeadTime) < now)) {
5540         if (call->state == RX_STATE_ACTIVE) {
5541             rxi_CallError(call, conn->idleDeadErr);
5542             return -1;
5543         }
5544     }
5545     /* see if we have a hard timeout */
5546     if (conn->hardDeadTime
5547         && (now > (conn->hardDeadTime + call->startTime.sec))) {
5548         if (call->state == RX_STATE_ACTIVE)
5549             rxi_CallError(call, RX_CALL_TIMEOUT);
5550         return -1;
5551     }
5552     return 0;
5553 }
5554
5555
5556 /* When a call is in progress, this routine is called occasionally to
5557  * make sure that some traffic has arrived (or been sent to) the peer.
5558  * If nothing has arrived in a reasonable amount of time, the call is
5559  * declared dead; if nothing has been sent for a while, we send a
5560  * keep-alive packet (if we're actually trying to keep the call alive)
5561  */
5562 void
5563 rxi_KeepAliveEvent(struct rxevent *event, register struct rx_call *call,
5564                    char *dummy)
5565 {
5566     struct rx_connection *conn;
5567     afs_uint32 now;
5568
5569     MUTEX_ENTER(&call->lock);
5570     CALL_RELE(call, RX_CALL_REFCOUNT_ALIVE);
5571     if (event == call->keepAliveEvent)
5572         call->keepAliveEvent = NULL;
5573     now = clock_Sec();
5574
5575 #ifdef RX_ENABLE_LOCKS
5576     if (rxi_CheckCall(call, 0)) {
5577         MUTEX_EXIT(&call->lock);
5578         return;
5579     }
5580 #else /* RX_ENABLE_LOCKS */
5581     if (rxi_CheckCall(call))
5582         return;
5583 #endif /* RX_ENABLE_LOCKS */
5584
5585     /* Don't try to keep alive dallying calls */
5586     if (call->state == RX_STATE_DALLY) {
5587         MUTEX_EXIT(&call->lock);
5588         return;
5589     }
5590
5591     conn = call->conn;
5592     if ((now - call->lastSendTime) > conn->secondsUntilPing) {
5593         /* Don't try to send keepalives if there is unacknowledged data */
5594         /* the rexmit code should be good enough, this little hack
5595          * doesn't quite work XXX */
5596         (void)rxi_SendAck(call, NULL, 0, RX_ACK_PING, 0);
5597     }
5598     rxi_ScheduleKeepAliveEvent(call);
5599     MUTEX_EXIT(&call->lock);
5600 }
5601
5602
5603 void
5604 rxi_ScheduleKeepAliveEvent(register struct rx_call *call)
5605 {
5606     if (!call->keepAliveEvent) {
5607         struct clock when, now;
5608         clock_GetTime(&now);
5609         when = now;
5610         when.sec += call->conn->secondsUntilPing;
5611         CALL_HOLD(call, RX_CALL_REFCOUNT_ALIVE);
5612         call->keepAliveEvent =
5613             rxevent_PostNow(&when, &now, rxi_KeepAliveEvent, call, 0);
5614     }
5615 }
5616
5617 /* N.B. rxi_KeepAliveOff:  is defined earlier as a macro */
5618 void
5619 rxi_KeepAliveOn(register struct rx_call *call)
5620 {
5621     /* Pretend last packet received was received now--i.e. if another
5622      * packet isn't received within the keep alive time, then the call
5623      * will die; Initialize last send time to the current time--even
5624      * if a packet hasn't been sent yet.  This will guarantee that a
5625      * keep-alive is sent within the ping time */
5626     call->lastReceiveTime = call->lastSendTime = clock_Sec();
5627     rxi_ScheduleKeepAliveEvent(call);
5628 }
5629
5630 /* This routine is called to send connection abort messages
5631  * that have been delayed to throttle looping clients. */
5632 void
5633 rxi_SendDelayedConnAbort(struct rxevent *event,
5634                          register struct rx_connection *conn, char *dummy)
5635 {
5636     afs_int32 error;
5637     struct rx_packet *packet;
5638
5639     MUTEX_ENTER(&conn->conn_data_lock);
5640     conn->delayedAbortEvent = NULL;
5641     error = htonl(conn->error);
5642     conn->abortCount++;
5643     MUTEX_EXIT(&conn->conn_data_lock);
5644     packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
5645     if (packet) {
5646         packet =
5647             rxi_SendSpecial((struct rx_call *)0, conn, packet,
5648                             RX_PACKET_TYPE_ABORT, (char *)&error,
5649                             sizeof(error), 0);
5650         rxi_FreePacket(packet);
5651     }
5652 }
5653
5654 /* This routine is called to send call abort messages
5655  * that have been delayed to throttle looping clients. */
5656 void
5657 rxi_SendDelayedCallAbort(struct rxevent *event, register struct rx_call *call,
5658                          char *dummy)
5659 {
5660     afs_int32 error;
5661     struct rx_packet *packet;
5662
5663     MUTEX_ENTER(&call->lock);
5664     call->delayedAbortEvent = NULL;
5665     error = htonl(call->error);
5666     call->abortCount++;
5667     packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
5668     if (packet) {
5669         packet =
5670             rxi_SendSpecial(call, call->conn, packet, RX_PACKET_TYPE_ABORT,
5671                             (char *)&error, sizeof(error), 0);
5672         rxi_FreePacket(packet);
5673     }
5674     CALL_RELE(call, RX_CALL_REFCOUNT_ABORT);
5675     MUTEX_EXIT(&call->lock);
5676 }
5677
5678 /* This routine is called periodically (every RX_AUTH_REQUEST_TIMEOUT
5679  * seconds) to ask the client to authenticate itself.  The routine
5680  * issues a challenge to the client, which is obtained from the
5681  * security object associated with the connection */
5682 void
5683 rxi_ChallengeEvent(struct rxevent *event, register struct rx_connection *conn,
5684                    void *arg1, int tries)
5685 {
5686     conn->challengeEvent = NULL;
5687     if (RXS_CheckAuthentication(conn->securityObject, conn) != 0) {
5688         register struct rx_packet *packet;
5689         struct clock when, now;
5690
5691         if (tries <= 0) {
5692             /* We've failed to authenticate for too long.
5693              * Reset any calls waiting for authentication;
5694              * they are all in RX_STATE_PRECALL.
5695              */
5696             int i;
5697
5698             MUTEX_ENTER(&conn->conn_call_lock);
5699             for (i = 0; i < RX_MAXCALLS; i++) {
5700                 struct rx_call *call = conn->call[i];
5701                 if (call) {
5702                     MUTEX_ENTER(&call->lock);
5703                     if (call->state == RX_STATE_PRECALL) {
5704                         rxi_CallError(call, RX_CALL_DEAD);
5705                         rxi_SendCallAbort(call, NULL, 0, 0);
5706                     }
5707                     MUTEX_EXIT(&call->lock);
5708                 }
5709             }
5710             MUTEX_EXIT(&conn->conn_call_lock);
5711             return;
5712         }
5713
5714         packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
5715         if (packet) {
5716             /* If there's no packet available, do this later. */
5717             RXS_GetChallenge(conn->securityObject, conn, packet);
5718             rxi_SendSpecial((struct rx_call *)0, conn, packet,
5719                             RX_PACKET_TYPE_CHALLENGE, NULL, -1, 0);
5720             rxi_FreePacket(packet);
5721         }
5722         clock_GetTime(&now);
5723         when = now;
5724         when.sec += RX_CHALLENGE_TIMEOUT;
5725         conn->challengeEvent =
5726             rxevent_PostNow2(&when, &now, rxi_ChallengeEvent, conn, 0,
5727                          (tries - 1));
5728     }
5729 }
5730
5731 /* Call this routine to start requesting the client to authenticate
5732  * itself.  This will continue until authentication is established,
5733  * the call times out, or an invalid response is returned.  The
5734  * security object associated with the connection is asked to create
5735  * the challenge at this time.  N.B.  rxi_ChallengeOff is a macro,
5736  * defined earlier. */
5737 void
5738 rxi_ChallengeOn(register struct rx_connection *conn)
5739 {
5740     if (!conn->challengeEvent) {
5741         RXS_CreateChallenge(conn->securityObject, conn);
5742         rxi_ChallengeEvent(NULL, conn, 0, RX_CHALLENGE_MAXTRIES);
5743     };
5744 }
5745
5746
5747 /* Compute round trip time of the packet provided, in *rttp.
5748  */
5749
5750 /* rxi_ComputeRoundTripTime is called with peer locked. */
5751 /* sentp and/or peer may be null */
5752 void
5753 rxi_ComputeRoundTripTime(register struct rx_packet *p,
5754                          register struct clock *sentp,
5755                          register struct rx_peer *peer)
5756 {
5757     struct clock thisRtt, *rttp = &thisRtt;
5758
5759     register int rtt_timeout;
5760
5761     clock_GetTime(rttp);
5762
5763     if (clock_Lt(rttp, sentp)) {
5764         clock_Zero(rttp);
5765         return;                 /* somebody set the clock back, don't count this time. */
5766     }
5767     clock_Sub(rttp, sentp);
5768     MUTEX_ENTER(&rx_stats_mutex);
5769     if (clock_Lt(rttp, &rx_stats.minRtt))
5770         rx_stats.minRtt = *rttp;
5771     if (clock_Gt(rttp, &rx_stats.maxRtt)) {
5772         if (rttp->sec > 60) {
5773             MUTEX_EXIT(&rx_stats_mutex);
5774             return;             /* somebody set the clock ahead */
5775         }
5776         rx_stats.maxRtt = *rttp;
5777     }
5778     clock_Add(&rx_stats.totalRtt, rttp);
5779     rx_stats.nRttSamples++;
5780     MUTEX_EXIT(&rx_stats_mutex);
5781
5782     /* better rtt calculation courtesy of UMich crew (dave,larry,peter,?) */
5783
5784     /* Apply VanJacobson round-trip estimations */
5785     if (peer->rtt) {
5786         register int delta;
5787
5788         /*
5789          * srtt (peer->rtt) is in units of one-eighth-milliseconds.
5790          * srtt is stored as fixed point with 3 bits after the binary
5791          * point (i.e., scaled by 8). The following magic is
5792          * equivalent to the smoothing algorithm in rfc793 with an
5793          * alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed point).
5794          * srtt*8 = srtt*8 + rtt - srtt
5795          * srtt = srtt + rtt/8 - srtt/8
5796          */
5797
5798         delta = MSEC(rttp) - (peer->rtt >> 3);
5799         peer->rtt += delta;
5800
5801         /*
5802          * We accumulate a smoothed rtt variance (actually, a smoothed
5803          * mean difference), then set the retransmit timer to smoothed
5804          * rtt + 4 times the smoothed variance (was 2x in van's original
5805          * paper, but 4x works better for me, and apparently for him as
5806          * well).
5807          * rttvar is stored as
5808          * fixed point with 2 bits after the binary point (scaled by
5809          * 4).  The following is equivalent to rfc793 smoothing with
5810          * an alpha of .75 (rttvar = rttvar*3/4 + |delta| / 4).  This
5811          * replaces rfc793's wired-in beta.
5812          * dev*4 = dev*4 + (|actual - expected| - dev)
5813          */
5814
5815         if (delta < 0)
5816             delta = -delta;
5817
5818         delta -= (peer->rtt_dev >> 2);
5819         peer->rtt_dev += delta;
5820     } else {
5821         /* I don't have a stored RTT so I start with this value.  Since I'm
5822          * probably just starting a call, and will be pushing more data down
5823          * this, I expect congestion to increase rapidly.  So I fudge a
5824          * little, and I set deviance to half the rtt.  In practice,
5825          * deviance tends to approach something a little less than
5826          * half the smoothed rtt. */
5827         peer->rtt = (MSEC(rttp) << 3) + 8;
5828         peer->rtt_dev = peer->rtt >> 2; /* rtt/2: they're scaled differently */
5829     }
5830     /* the timeout is RTT + 4*MDEV + 0.35 sec   This is because one end or
5831      * the other of these connections is usually in a user process, and can
5832      * be switched and/or swapped out.  So on fast, reliable networks, the
5833      * timeout would otherwise be too short.
5834      */
5835     rtt_timeout = (peer->rtt >> 3) + peer->rtt_dev + 350;
5836     clock_Zero(&(peer->timeout));
5837     clock_Addmsec(&(peer->timeout), rtt_timeout);
5838
5839     dpf(("rxi_ComputeRoundTripTime(rtt=%d ms, srtt=%d ms, rtt_dev=%d ms, timeout=%d.%0.3d sec)\n", MSEC(rttp), peer->rtt >> 3, peer->rtt_dev >> 2, (peer->timeout.sec), (peer->timeout.usec)));
5840 }
5841
5842
5843 /* Find all server connections that have not been active for a long time, and
5844  * toss them */
5845 void
5846 rxi_ReapConnections(void)
5847 {
5848     struct clock now, when;
5849     clock_GetTime(&now);
5850
5851     /* Find server connection structures that haven't been used for
5852      * greater than rx_idleConnectionTime */
5853     {
5854         struct rx_connection **conn_ptr, **conn_end;
5855         int i, havecalls = 0;
5856         MUTEX_ENTER(&rx_connHashTable_lock);
5857         for (conn_ptr = &rx_connHashTable[0], conn_end =
5858              &rx_connHashTable[rx_hashTableSize]; conn_ptr < conn_end;
5859              conn_ptr++) {
5860             struct rx_connection *conn, *next;
5861             struct rx_call *call;
5862             int result;
5863
5864           rereap:
5865             for (conn = *conn_ptr; conn; conn = next) {
5866                 /* XXX -- Shouldn't the connection be locked? */
5867                 next = conn->next;
5868                 havecalls = 0;
5869                 for (i = 0; i < RX_MAXCALLS; i++) {
5870                     call = conn->call[i];
5871                     if (call) {
5872                         havecalls = 1;
5873                         MUTEX_ENTER(&call->lock);
5874 #ifdef RX_ENABLE_LOCKS
5875                         result = rxi_CheckCall(call, 1);
5876 #else /* RX_ENABLE_LOCKS */
5877                         result = rxi_CheckCall(call);
5878 #endif /* RX_ENABLE_LOCKS */
5879                         MUTEX_EXIT(&call->lock);
5880                         if (result == -2) {
5881                             /* If CheckCall freed the call, it might
5882                              * have destroyed  the connection as well,
5883                              * which screws up the linked lists.
5884                              */
5885                             goto rereap;
5886                         }
5887                     }
5888                 }
5889                 if (conn->type == RX_SERVER_CONNECTION) {
5890                     /* This only actually destroys the connection if
5891                      * there are no outstanding calls */
5892                     MUTEX_ENTER(&conn->conn_data_lock);
5893                     if (!havecalls && !conn->refCount
5894                         && ((conn->lastSendTime + rx_idleConnectionTime) <
5895                             now.sec)) {
5896                         conn->refCount++;       /* it will be decr in rx_DestroyConn */
5897                         MUTEX_EXIT(&conn->conn_data_lock);
5898 #ifdef RX_ENABLE_LOCKS
5899                         rxi_DestroyConnectionNoLock(conn);
5900 #else /* RX_ENABLE_LOCKS */
5901                         rxi_DestroyConnection(conn);
5902 #endif /* RX_ENABLE_LOCKS */
5903                     }
5904 #ifdef RX_ENABLE_LOCKS
5905                     else {
5906                         MUTEX_EXIT(&conn->conn_data_lock);
5907                     }
5908 #endif /* RX_ENABLE_LOCKS */
5909                 }
5910             }
5911         }
5912 #ifdef RX_ENABLE_LOCKS
5913         while (rx_connCleanup_list) {
5914             struct rx_connection *conn;
5915             conn = rx_connCleanup_list;
5916             rx_connCleanup_list = rx_connCleanup_list->next;
5917             MUTEX_EXIT(&rx_connHashTable_lock);
5918             rxi_CleanupConnection(conn);
5919             MUTEX_ENTER(&rx_connHashTable_lock);
5920         }
5921         MUTEX_EXIT(&rx_connHashTable_lock);
5922 #endif /* RX_ENABLE_LOCKS */
5923     }
5924
5925     /* Find any peer structures that haven't been used (haven't had an
5926      * associated connection) for greater than rx_idlePeerTime */
5927     {
5928         struct rx_peer **peer_ptr, **peer_end;
5929         int code;
5930         MUTEX_ENTER(&rx_rpc_stats);
5931         MUTEX_ENTER(&rx_peerHashTable_lock);
5932         for (peer_ptr = &rx_peerHashTable[0], peer_end =
5933              &rx_peerHashTable[rx_hashTableSize]; peer_ptr < peer_end;
5934              peer_ptr++) {
5935             struct rx_peer *peer, *next, *prev;
5936             for (prev = peer = *peer_ptr; peer; peer = next) {
5937                 next = peer->next;
5938                 code = MUTEX_TRYENTER(&peer->peer_lock);
5939                 if ((code) && (peer->refCount == 0)
5940                     && ((peer->idleWhen + rx_idlePeerTime) < now.sec)) {
5941                     rx_interface_stat_p rpc_stat, nrpc_stat;
5942                     size_t space;
5943                     MUTEX_EXIT(&peer->peer_lock);
5944                     MUTEX_DESTROY(&peer->peer_lock);
5945                     for (queue_Scan
5946                          (&peer->rpcStats, rpc_stat, nrpc_stat,
5947                           rx_interface_stat)) {
5948                         unsigned int num_funcs;
5949                         if (!rpc_stat)
5950                             break;
5951                         queue_Remove(&rpc_stat->queue_header);
5952                         queue_Remove(&rpc_stat->all_peers);
5953                         num_funcs = rpc_stat->stats[0].func_total;
5954                         space =
5955                             sizeof(rx_interface_stat_t) +
5956                             rpc_stat->stats[0].func_total *
5957                             sizeof(rx_function_entry_v1_t);
5958
5959                         rxi_Free(rpc_stat, space);
5960                         rxi_rpc_peer_stat_cnt -= num_funcs;
5961                     }
5962                     rxi_FreePeer(peer);
5963                     rx_MutexDecrement(rx_stats.nPeerStructs, rx_stats_mutex);
5964                     if (peer == *peer_ptr) {
5965                         *peer_ptr = next;
5966                         prev = next;
5967                     } else
5968                         prev->next = next;
5969                 } else {
5970                     if (code) {
5971                         MUTEX_EXIT(&peer->peer_lock);
5972                     }
5973                     prev = peer;
5974                 }
5975             }
5976         }
5977         MUTEX_EXIT(&rx_peerHashTable_lock);
5978         MUTEX_EXIT(&rx_rpc_stats);
5979     }
5980
5981     /* THIS HACK IS A TEMPORARY HACK.  The idea is that the race condition in
5982      * rxi_AllocSendPacket, if it hits, will be handled at the next conn
5983      * GC, just below.  Really, we shouldn't have to keep moving packets from
5984      * one place to another, but instead ought to always know if we can
5985      * afford to hold onto a packet in its particular use.  */
5986     MUTEX_ENTER(&rx_freePktQ_lock);
5987     if (rx_waitingForPackets) {
5988         rx_waitingForPackets = 0;
5989 #ifdef  RX_ENABLE_LOCKS
5990         CV_BROADCAST(&rx_waitingForPackets_cv);
5991 #else
5992         osi_rxWakeup(&rx_waitingForPackets);
5993 #endif
5994     }
5995     MUTEX_EXIT(&rx_freePktQ_lock);
5996
5997     when = now;
5998     when.sec += RX_REAP_TIME;   /* Check every RX_REAP_TIME seconds */
5999     rxevent_Post(&when, rxi_ReapConnections, 0, 0);
6000 }
6001
6002
6003 /* rxs_Release - This isn't strictly necessary but, since the macro name from
6004  * rx.h is sort of strange this is better.  This is called with a security
6005  * object before it is discarded.  Each connection using a security object has
6006  * its own refcount to the object so it won't actually be freed until the last
6007  * connection is destroyed.
6008  *
6009  * This is the only rxs module call.  A hold could also be written but no one
6010  * needs it. */
6011
6012 int
6013 rxs_Release(struct rx_securityClass *aobj)
6014 {
6015     return RXS_Close(aobj);
6016 }
6017
6018 #ifdef ADAPT_WINDOW
6019 #define RXRATE_PKT_OH   (RX_HEADER_SIZE + RX_IPUDP_SIZE)
6020 #define RXRATE_SMALL_PKT    (RXRATE_PKT_OH + sizeof(struct rx_ackPacket))
6021 #define RXRATE_AVG_SMALL_PKT    (RXRATE_PKT_OH + (sizeof(struct rx_ackPacket)/2))
6022 #define RXRATE_LARGE_PKT    (RXRATE_SMALL_PKT + 256)
6023
6024 /* Adjust our estimate of the transmission rate to this peer, given
6025  * that the packet p was just acked. We can adjust peer->timeout and
6026  * call->twind. Pragmatically, this is called
6027  * only with packets of maximal length.
6028  * Called with peer and call locked.
6029  */
6030
6031 static void
6032 rxi_ComputeRate(register struct rx_peer *peer, register struct rx_call *call,
6033                 struct rx_packet *p, struct rx_packet *ackp, u_char ackReason)
6034 {
6035     afs_int32 xferSize, xferMs;
6036     register afs_int32 minTime;
6037     struct clock newTO;
6038
6039     /* Count down packets */
6040     if (peer->rateFlag > 0)
6041         peer->rateFlag--;
6042     /* Do nothing until we're enabled */
6043     if (peer->rateFlag != 0)
6044         return;
6045     if (!call->conn)
6046         return;
6047
6048     /* Count only when the ack seems legitimate */
6049     switch (ackReason) {
6050     case RX_ACK_REQUESTED:
6051         xferSize =
6052             p->length + RX_HEADER_SIZE + call->conn->securityMaxTrailerSize;
6053         xferMs = peer->rtt;
6054         break;
6055
6056     case RX_ACK_PING_RESPONSE:
6057         if (p)                  /* want the response to ping-request, not data send */
6058             return;
6059         clock_GetTime(&newTO);
6060         if (clock_Gt(&newTO, &call->pingRequestTime)) {
6061             clock_Sub(&newTO, &call->pingRequestTime);
6062             xferMs = (newTO.sec * 1000) + (newTO.usec / 1000);
6063         } else {
6064             return;
6065         }
6066         xferSize = rx_AckDataSize(rx_Window) + RX_HEADER_SIZE;
6067         break;
6068
6069     default:
6070         return;
6071     }
6072
6073     dpf(("CONG peer %lx/%u: sample (%s) size %ld, %ld ms (to %lu.%06lu, rtt %u, ps %u)", ntohl(peer->host), ntohs(peer->port), (ackReason == RX_ACK_REQUESTED ? "dataack" : "pingack"), xferSize, xferMs, peer->timeout.sec, peer->timeout.usec, peer->smRtt, peer->ifMTU));
6074
6075     /* Track only packets that are big enough. */
6076     if ((p->length + RX_HEADER_SIZE + call->conn->securityMaxTrailerSize) <
6077         peer->ifMTU)
6078         return;
6079
6080     /* absorb RTT data (in milliseconds) for these big packets */
6081     if (peer->smRtt == 0) {
6082         peer->smRtt = xferMs;
6083     } else {
6084         peer->smRtt = ((peer->smRtt * 15) + xferMs + 4) >> 4;
6085         if (!peer->smRtt)
6086             peer->smRtt = 1;
6087     }
6088
6089     if (peer->countDown) {
6090         peer->countDown--;
6091         return;
6092     }
6093     peer->countDown = 10;       /* recalculate only every so often */
6094
6095     /* In practice, we can measure only the RTT for full packets,
6096      * because of the way Rx acks the data that it receives.  (If it's
6097      * smaller than a full packet, it often gets implicitly acked
6098      * either by the call response (from a server) or by the next call
6099      * (from a client), and either case confuses transmission times
6100      * with processing times.)  Therefore, replace the above
6101      * more-sophisticated processing with a simpler version, where the
6102      * smoothed RTT is kept for full-size packets, and the time to
6103      * transmit a windowful of full-size packets is simply RTT *
6104      * windowSize. Again, we take two steps:
6105      - ensure the timeout is large enough for a single packet's RTT;
6106      - ensure that the window is small enough to fit in the desired timeout.*/
6107
6108     /* First, the timeout check. */
6109     minTime = peer->smRtt;
6110     /* Get a reasonable estimate for a timeout period */
6111     minTime += minTime;
6112     newTO.sec = minTime / 1000;
6113     newTO.usec = (minTime - (newTO.sec * 1000)) * 1000;
6114
6115     /* Increase the timeout period so that we can always do at least
6116      * one packet exchange */
6117     if (clock_Gt(&newTO, &peer->timeout)) {
6118
6119         dpf(("CONG peer %lx/%u: timeout %lu.%06lu ==> %lu.%06lu (rtt %u, ps %u)", ntohl(peer->host), ntohs(peer->port), peer->timeout.sec, peer->timeout.usec, newTO.sec, newTO.usec, peer->smRtt, peer->packetSize));
6120
6121         peer->timeout = newTO;
6122     }
6123
6124     /* Now, get an estimate for the transmit window size. */
6125     minTime = peer->timeout.sec * 1000 + (peer->timeout.usec / 1000);
6126     /* Now, convert to the number of full packets that could fit in a
6127      * reasonable fraction of that interval */
6128     minTime /= (peer->smRtt << 1);
6129     xferSize = minTime;         /* (make a copy) */
6130
6131     /* Now clamp the size to reasonable bounds. */
6132     if (minTime <= 1)
6133         minTime = 1;
6134     else if (minTime > rx_Window)
6135         minTime = rx_Window;
6136 /*    if (minTime != peer->maxWindow) {
6137       dpf(("CONG peer %lx/%u: windowsize %lu ==> %lu (to %lu.%06lu, rtt %u, ps %u)",
6138              ntohl(peer->host), ntohs(peer->port), peer->maxWindow, minTime,
6139              peer->timeout.sec, peer->timeout.usec, peer->smRtt,
6140              peer->packetSize));
6141       peer->maxWindow = minTime;
6142         elide... call->twind = minTime;
6143     }
6144 */
6145
6146     /* Cut back on the peer timeout if it had earlier grown unreasonably.
6147      * Discern this by calculating the timeout necessary for rx_Window
6148      * packets. */
6149     if ((xferSize > rx_Window) && (peer->timeout.sec >= 3)) {
6150         /* calculate estimate for transmission interval in milliseconds */
6151         minTime = rx_Window * peer->smRtt;
6152         if (minTime < 1000) {
6153             dpf(("CONG peer %lx/%u: cut TO %lu.%06lu by 0.5 (rtt %u, ps %u)",
6154                  ntohl(peer->host), ntohs(peer->port), peer->timeout.sec,
6155                  peer->timeout.usec, peer->smRtt, peer->packetSize));
6156
6157             newTO.sec = 0;      /* cut back on timeout by half a second */
6158             newTO.usec = 500000;
6159             clock_Sub(&peer->timeout, &newTO);
6160         }
6161     }
6162
6163     return;
6164 }                               /* end of rxi_ComputeRate */
6165 #endif /* ADAPT_WINDOW */
6166
6167
6168 #ifdef RXDEBUG
6169 void
6170 rxi_DebugInit(void)
6171 {
6172 #ifdef AFS_NT40_ENV
6173 #define TRACE_OPTION_DEBUGLOG 4
6174     HKEY parmKey;
6175     DWORD dummyLen;
6176     DWORD TraceOption;
6177     long code;
6178
6179     rxdebug_active = 0;
6180
6181     code = RegOpenKeyEx(HKEY_LOCAL_MACHINE, AFSREG_CLT_SVC_PARAM_SUBKEY,
6182                          0, KEY_QUERY_VALUE, &parmKey);
6183     if (code != ERROR_SUCCESS)
6184         return;
6185
6186     dummyLen = sizeof(TraceOption);
6187     code = RegQueryValueEx(parmKey, "TraceOption", NULL, NULL,
6188                            (BYTE *) &TraceOption, &dummyLen);
6189     if (code == ERROR_SUCCESS) {
6190         rxdebug_active = (TraceOption & TRACE_OPTION_DEBUGLOG) ? 1 : 0;
6191     }
6192     RegCloseKey (parmKey);
6193 #endif /* AFS_NT40_ENV */
6194 }
6195
6196 #ifdef AFS_NT40_ENV
6197 void
6198 rx_DebugOnOff(int on)
6199 {
6200     rxdebug_active = on;
6201 }
6202 #endif /* AFS_NT40_ENV */
6203
6204
6205 /* Don't call this debugging routine directly; use dpf */
6206 void
6207 rxi_DebugPrint(char *format, int a1, int a2, int a3, int a4, int a5, int a6,
6208                int a7, int a8, int a9, int a10, int a11, int a12, int a13,
6209                int a14, int a15)
6210 {
6211 #ifdef AFS_NT40_ENV
6212     char msg[512];
6213     char tformat[256];
6214     size_t len;
6215
6216     len = _snprintf(tformat, sizeof(tformat), "tid[%d] %s", GetCurrentThreadId(), format);
6217
6218     if (len > 0) {
6219         len = _snprintf(msg, sizeof(msg)-2,
6220                         tformat, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
6221                         a11, a12, a13, a14, a15);
6222         if (len > 0) {
6223             if (msg[len-1] != '\n') {
6224                 msg[len] = '\n';
6225                 msg[len+1] = '\0';
6226             }
6227             OutputDebugString(msg);
6228         }
6229     }
6230 #else
6231     struct clock now;
6232     clock_GetTime(&now);
6233     fprintf(rx_Log, " %u.%.3u:", (unsigned int)now.sec,
6234             (unsigned int)now.usec / 1000);
6235     fprintf(rx_Log, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12,
6236             a13, a14, a15);
6237     putc('\n', rx_Log);
6238 #endif
6239 }
6240
6241 /*
6242  * This function is used to process the rx_stats structure that is local
6243  * to a process as well as an rx_stats structure received from a remote
6244  * process (via rxdebug).  Therefore, it needs to do minimal version
6245  * checking.
6246  */
6247 void
6248 rx_PrintTheseStats(FILE * file, struct rx_stats *s, int size,
6249                    afs_int32 freePackets, char version)
6250 {
6251     int i;
6252
6253     if (size != sizeof(struct rx_stats)) {
6254         fprintf(file,
6255                 "Unexpected size of stats structure: was %d, expected %d\n",
6256                 size, sizeof(struct rx_stats));
6257     }
6258
6259     fprintf(file, "rx stats: free packets %d, allocs %d, ", (int)freePackets,
6260             s->packetRequests);
6261
6262     if (version >= RX_DEBUGI_VERSION_W_NEWPACKETTYPES) {
6263         fprintf(file, "alloc-failures(rcv %d/%d,send %d/%d,ack %d)\n",
6264                 s->receivePktAllocFailures, s->receiveCbufPktAllocFailures,
6265                 s->sendPktAllocFailures, s->sendCbufPktAllocFailures,
6266                 s->specialPktAllocFailures);
6267     } else {
6268         fprintf(file, "alloc-failures(rcv %d,send %d,ack %d)\n",
6269                 s->receivePktAllocFailures, s->sendPktAllocFailures,
6270                 s->specialPktAllocFailures);
6271     }
6272
6273     fprintf(file,
6274             "   greedy %d, " "bogusReads %d (last from host %x), "
6275             "noPackets %d, " "noBuffers %d, " "selects %d, "
6276             "sendSelects %d\n", s->socketGreedy, s->bogusPacketOnRead,
6277             s->bogusHost, s->noPacketOnRead, s->noPacketBuffersOnRead,
6278             s->selects, s->sendSelects);
6279
6280     fprintf(file, "   packets read: ");
6281     for (i = 0; i < RX_N_PACKET_TYPES; i++) {
6282         fprintf(file, "%s %d ", rx_packetTypes[i], s->packetsRead[i]);
6283     }
6284     fprintf(file, "\n");
6285
6286     fprintf(file,
6287             "   other read counters: data %d, " "ack %d, " "dup %d "
6288             "spurious %d " "dally %d\n", s->dataPacketsRead,
6289             s->ackPacketsRead, s->dupPacketsRead, s->spuriousPacketsRead,
6290             s->ignorePacketDally);
6291
6292     fprintf(file, "   packets sent: ");
6293     for (i = 0; i < RX_N_PACKET_TYPES; i++) {
6294         fprintf(file, "%s %d ", rx_packetTypes[i], s->packetsSent[i]);
6295     }
6296     fprintf(file, "\n");
6297
6298     fprintf(file,
6299             "   other send counters: ack %d, " "data %d (not resends), "
6300             "resends %d, " "pushed %d, " "acked&ignored %d\n",
6301             s->ackPacketsSent, s->dataPacketsSent, s->dataPacketsReSent,
6302             s->dataPacketsPushed, s->ignoreAckedPacket);
6303
6304     fprintf(file,
6305             "   \t(these should be small) sendFailed %d, " "fatalErrors %d\n",
6306             s->netSendFailures, (int)s->fatalErrors);
6307
6308     if (s->nRttSamples) {
6309         fprintf(file, "   Average rtt is %0.3f, with %d samples\n",
6310                 clock_Float(&s->totalRtt) / s->nRttSamples, s->nRttSamples);
6311
6312         fprintf(file, "   Minimum rtt is %0.3f, maximum is %0.3f\n",
6313                 clock_Float(&s->minRtt), clock_Float(&s->maxRtt));
6314     }
6315
6316     fprintf(file,
6317             "   %d server connections, " "%d client connections, "
6318             "%d peer structs, " "%d call structs, " "%d free call structs\n",
6319             s->nServerConns, s->nClientConns, s->nPeerStructs,
6320             s->nCallStructs, s->nFreeCallStructs);
6321
6322 #if     !defined(AFS_PTHREAD_ENV) && !defined(AFS_USE_GETTIMEOFDAY)
6323     fprintf(file, "   %d clock updates\n", clock_nUpdates);
6324 #endif
6325
6326 }
6327
6328 /* for backward compatibility */
6329 void
6330 rx_PrintStats(FILE * file)
6331 {
6332     MUTEX_ENTER(&rx_stats_mutex);
6333     rx_PrintTheseStats(file, &rx_stats, sizeof(rx_stats), rx_nFreePackets,
6334                        RX_DEBUGI_VERSION);
6335     MUTEX_EXIT(&rx_stats_mutex);
6336 }
6337
6338 void
6339 rx_PrintPeerStats(FILE * file, struct rx_peer *peer)
6340 {
6341     fprintf(file, "Peer %x.%d.  " "Burst size %d, " "burst wait %u.%d.\n",
6342             ntohl(peer->host), (int)peer->port, (int)peer->burstSize,
6343             (int)peer->burstWait.sec, (int)peer->burstWait.usec);
6344
6345     fprintf(file,
6346             "   Rtt %d, " "retry time %u.%06d, " "total sent %d, "
6347             "resent %d\n", peer->rtt, (int)peer->timeout.sec,
6348             (int)peer->timeout.usec, peer->nSent, peer->reSends);
6349
6350     fprintf(file,
6351             "   Packet size %d, " "max in packet skew %d, "
6352             "max out packet skew %d\n", peer->ifMTU, (int)peer->inPacketSkew,
6353             (int)peer->outPacketSkew);
6354 }
6355
6356 #ifdef AFS_PTHREAD_ENV
6357 /*
6358  * This mutex protects the following static variables:
6359  * counter
6360  */
6361
6362 #define LOCK_RX_DEBUG assert(pthread_mutex_lock(&rx_debug_mutex)==0)
6363 #define UNLOCK_RX_DEBUG assert(pthread_mutex_unlock(&rx_debug_mutex)==0)
6364 #else
6365 #define LOCK_RX_DEBUG
6366 #define UNLOCK_RX_DEBUG
6367 #endif /* AFS_PTHREAD_ENV */
6368
6369 static int
6370 MakeDebugCall(osi_socket socket, afs_uint32 remoteAddr, afs_uint16 remotePort,
6371               u_char type, void *inputData, size_t inputLength,
6372               void *outputData, size_t outputLength)
6373 {
6374     static afs_int32 counter = 100;
6375     time_t waitTime, waitCount, startTime;
6376     struct rx_header theader;
6377     char tbuffer[1500];
6378     register afs_int32 code;
6379     struct timeval tv_now, tv_wake, tv_delta;
6380     struct sockaddr_in taddr, faddr;
6381     int faddrLen;
6382     fd_set imask;
6383     register char *tp;
6384
6385     startTime = time(0);
6386     waitTime = 1;
6387     waitCount = 5;
6388     LOCK_RX_DEBUG;
6389     counter++;
6390     UNLOCK_RX_DEBUG;
6391     tp = &tbuffer[sizeof(struct rx_header)];
6392     taddr.sin_family = AF_INET;
6393     taddr.sin_port = remotePort;
6394     taddr.sin_addr.s_addr = remoteAddr;
6395 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
6396     taddr.sin_len = sizeof(struct sockaddr_in);
6397 #endif
6398     while (1) {
6399         memset(&theader, 0, sizeof(theader));
6400         theader.epoch = htonl(999);
6401         theader.cid = 0;
6402         theader.callNumber = htonl(counter);
6403         theader.seq = 0;
6404         theader.serial = 0;
6405         theader.type = type;
6406         theader.flags = RX_CLIENT_INITIATED | RX_LAST_PACKET;
6407         theader.serviceId = 0;
6408
6409         memcpy(tbuffer, &theader, sizeof(theader));
6410         memcpy(tp, inputData, inputLength);
6411         code =
6412             sendto(socket, tbuffer, inputLength + sizeof(struct rx_header), 0,
6413                    (struct sockaddr *)&taddr, sizeof(struct sockaddr_in));
6414
6415         /* see if there's a packet available */
6416         gettimeofday(&tv_wake,0);
6417         tv_wake.tv_sec += waitTime;
6418         for (;;) {
6419             FD_ZERO(&imask);
6420             FD_SET(socket, &imask);
6421             tv_delta.tv_sec = tv_wake.tv_sec;
6422             tv_delta.tv_usec = tv_wake.tv_usec;
6423             gettimeofday(&tv_now, 0);
6424
6425             if (tv_delta.tv_usec < tv_now.tv_usec) {
6426                 /* borrow */
6427                 tv_delta.tv_usec += 1000000;
6428                 tv_delta.tv_sec--;
6429             }
6430             tv_delta.tv_usec -= tv_now.tv_usec;
6431
6432             if (tv_delta.tv_sec < tv_now.tv_sec) {
6433                 /* time expired */
6434                 break;
6435             }
6436             tv_delta.tv_sec -= tv_now.tv_sec;
6437
6438             code = select(socket + 1, &imask, 0, 0, &tv_delta);
6439             if (code == 1 && FD_ISSET(socket, &imask)) {
6440                 /* now receive a packet */
6441                 faddrLen = sizeof(struct sockaddr_in);
6442                 code =
6443                     recvfrom(socket, tbuffer, sizeof(tbuffer), 0,
6444                              (struct sockaddr *)&faddr, &faddrLen);
6445
6446                 if (code > 0) {
6447                     memcpy(&theader, tbuffer, sizeof(struct rx_header));
6448                     if (counter == ntohl(theader.callNumber))
6449                         goto success;
6450                     continue;
6451                 }
6452             }
6453             break;
6454         }
6455
6456         /* see if we've timed out */
6457         if (!--waitCount) {
6458             return -1;
6459         }
6460         waitTime <<= 1;
6461     }
6462
6463  success:
6464     code -= sizeof(struct rx_header);
6465     if (code > outputLength)
6466         code = outputLength;
6467     memcpy(outputData, tp, code);
6468     return code;
6469 }
6470
6471 afs_int32
6472 rx_GetServerDebug(osi_socket socket, afs_uint32 remoteAddr,
6473                   afs_uint16 remotePort, struct rx_debugStats * stat,
6474                   afs_uint32 * supportedValues)
6475 {
6476     struct rx_debugIn in;
6477     afs_int32 rc = 0;
6478
6479     *supportedValues = 0;
6480     in.type = htonl(RX_DEBUGI_GETSTATS);
6481     in.index = 0;
6482
6483     rc = MakeDebugCall(socket, remoteAddr, remotePort, RX_PACKET_TYPE_DEBUG,
6484                        &in, sizeof(in), stat, sizeof(*stat));
6485
6486     /*
6487      * If the call was successful, fixup the version and indicate
6488      * what contents of the stat structure are valid.
6489      * Also do net to host conversion of fields here.
6490      */
6491
6492     if (rc >= 0) {
6493         if (stat->version >= RX_DEBUGI_VERSION_W_SECSTATS) {
6494             *supportedValues |= RX_SERVER_DEBUG_SEC_STATS;
6495         }
6496         if (stat->version >= RX_DEBUGI_VERSION_W_GETALLCONN) {
6497             *supportedValues |= RX_SERVER_DEBUG_ALL_CONN;
6498         }
6499         if (stat->version >= RX_DEBUGI_VERSION_W_RXSTATS) {
6500             *supportedValues |= RX_SERVER_DEBUG_RX_STATS;
6501         }
6502         if (stat->version >= RX_DEBUGI_VERSION_W_WAITERS) {
6503             *supportedValues |= RX_SERVER_DEBUG_WAITER_CNT;
6504         }
6505         if (stat->version >= RX_DEBUGI_VERSION_W_IDLETHREADS) {
6506             *supportedValues |= RX_SERVER_DEBUG_IDLE_THREADS;
6507         }
6508         if (stat->version >= RX_DEBUGI_VERSION_W_NEWPACKETTYPES) {
6509             *supportedValues |= RX_SERVER_DEBUG_NEW_PACKETS;
6510         }
6511         if (stat->version >= RX_DEBUGI_VERSION_W_GETPEER) {
6512             *supportedValues |= RX_SERVER_DEBUG_ALL_PEER;
6513         }
6514         if (stat->version >= RX_DEBUGI_VERSION_W_WAITED) {
6515             *supportedValues |= RX_SERVER_DEBUG_WAITED_CNT;
6516         }
6517
6518         stat->nFreePackets = ntohl(stat->nFreePackets);
6519         stat->packetReclaims = ntohl(stat->packetReclaims);
6520         stat->callsExecuted = ntohl(stat->callsExecuted);
6521         stat->nWaiting = ntohl(stat->nWaiting);
6522         stat->idleThreads = ntohl(stat->idleThreads);
6523     }
6524
6525     return rc;
6526 }
6527
6528 afs_int32
6529 rx_GetServerStats(osi_socket socket, afs_uint32 remoteAddr,
6530                   afs_uint16 remotePort, struct rx_stats * stat,
6531                   afs_uint32 * supportedValues)
6532 {
6533     struct rx_debugIn in;
6534     afs_int32 *lp = (afs_int32 *) stat;
6535     int i;
6536     afs_int32 rc = 0;
6537
6538     /*
6539      * supportedValues is currently unused, but added to allow future
6540      * versioning of this function.
6541      */
6542
6543     *supportedValues = 0;
6544     in.type = htonl(RX_DEBUGI_RXSTATS);
6545     in.index = 0;
6546     memset(stat, 0, sizeof(*stat));
6547
6548     rc = MakeDebugCall(socket, remoteAddr, remotePort, RX_PACKET_TYPE_DEBUG,
6549                        &in, sizeof(in), stat, sizeof(*stat));
6550
6551     if (rc >= 0) {
6552
6553         /*
6554          * Do net to host conversion here
6555          */
6556
6557         for (i = 0; i < sizeof(*stat) / sizeof(afs_int32); i++, lp++) {
6558             *lp = ntohl(*lp);
6559         }
6560     }
6561
6562     return rc;
6563 }
6564
6565 afs_int32
6566 rx_GetServerVersion(osi_socket socket, afs_uint32 remoteAddr,
6567                     afs_uint16 remotePort, size_t version_length,
6568                     char *version)
6569 {
6570     char a[1] = { 0 };
6571     return MakeDebugCall(socket, remoteAddr, remotePort,
6572                          RX_PACKET_TYPE_VERSION, a, 1, version,
6573                          version_length);
6574 }
6575
6576 afs_int32
6577 rx_GetServerConnections(osi_socket socket, afs_uint32 remoteAddr,
6578                         afs_uint16 remotePort, afs_int32 * nextConnection,
6579                         int allConnections, afs_uint32 debugSupportedValues,
6580                         struct rx_debugConn * conn,
6581                         afs_uint32 * supportedValues)
6582 {
6583     struct rx_debugIn in;
6584     afs_int32 rc = 0;
6585     int i;
6586
6587     /*
6588      * supportedValues is currently unused, but added to allow future
6589      * versioning of this function.
6590      */
6591
6592     *supportedValues = 0;
6593     if (allConnections) {
6594         in.type = htonl(RX_DEBUGI_GETALLCONN);
6595     } else {
6596         in.type = htonl(RX_DEBUGI_GETCONN);
6597     }
6598     in.index = htonl(*nextConnection);
6599     memset(conn, 0, sizeof(*conn));
6600
6601     rc = MakeDebugCall(socket, remoteAddr, remotePort, RX_PACKET_TYPE_DEBUG,
6602                        &in, sizeof(in), conn, sizeof(*conn));
6603
6604     if (rc >= 0) {
6605         *nextConnection += 1;
6606
6607         /*
6608          * Convert old connection format to new structure.
6609          */
6610
6611         if (debugSupportedValues & RX_SERVER_DEBUG_OLD_CONN) {
6612             struct rx_debugConn_vL *vL = (struct rx_debugConn_vL *)conn;
6613 #define MOVEvL(a) (conn->a = vL->a)
6614
6615             /* any old or unrecognized version... */
6616             for (i = 0; i < RX_MAXCALLS; i++) {
6617                 MOVEvL(callState[i]);
6618                 MOVEvL(callMode[i]);
6619                 MOVEvL(callFlags[i]);
6620                 MOVEvL(callOther[i]);
6621             }
6622             if (debugSupportedValues & RX_SERVER_DEBUG_SEC_STATS) {
6623                 MOVEvL(secStats.type);
6624                 MOVEvL(secStats.level);
6625                 MOVEvL(secStats.flags);
6626                 MOVEvL(secStats.expires);
6627                 MOVEvL(secStats.packetsReceived);
6628                 MOVEvL(secStats.packetsSent);
6629                 MOVEvL(secStats.bytesReceived);
6630                 MOVEvL(secStats.bytesSent);
6631             }
6632         }
6633
6634         /*
6635          * Do net to host conversion here
6636          * NOTE:
6637          *    I don't convert host or port since we are most likely
6638          *    going to want these in NBO.
6639          */
6640         conn->cid = ntohl(conn->cid);
6641         conn->serial = ntohl(conn->serial);
6642         for (i = 0; i < RX_MAXCALLS; i++) {
6643             conn->callNumber[i] = ntohl(conn->callNumber[i]);
6644         }
6645         conn->error = ntohl(conn->error);
6646         conn->secStats.flags = ntohl(conn->secStats.flags);
6647         conn->secStats.expires = ntohl(conn->secStats.expires);
6648         conn->secStats.packetsReceived =
6649             ntohl(conn->secStats.packetsReceived);
6650         conn->secStats.packetsSent = ntohl(conn->secStats.packetsSent);
6651         conn->secStats.bytesReceived = ntohl(conn->secStats.bytesReceived);
6652         conn->secStats.bytesSent = ntohl(conn->secStats.bytesSent);
6653         conn->epoch = ntohl(conn->epoch);
6654         conn->natMTU = ntohl(conn->natMTU);
6655     }
6656
6657     return rc;
6658 }
6659
6660 afs_int32
6661 rx_GetServerPeers(osi_socket socket, afs_uint32 remoteAddr,
6662                   afs_uint16 remotePort, afs_int32 * nextPeer,
6663                   afs_uint32 debugSupportedValues, struct rx_debugPeer * peer,
6664                   afs_uint32 * supportedValues)
6665 {
6666     struct rx_debugIn in;
6667     afs_int32 rc = 0;
6668
6669     /*
6670      * supportedValues is currently unused, but added to allow future
6671      * versioning of this function.
6672      */
6673
6674     *supportedValues = 0;
6675     in.type = htonl(RX_DEBUGI_GETPEER);
6676     in.index = htonl(*nextPeer);
6677     memset(peer, 0, sizeof(*peer));
6678
6679     rc = MakeDebugCall(socket, remoteAddr, remotePort, RX_PACKET_TYPE_DEBUG,
6680                        &in, sizeof(in), peer, sizeof(*peer));
6681
6682     if (rc >= 0) {
6683         *nextPeer += 1;
6684
6685         /*
6686          * Do net to host conversion here
6687          * NOTE:
6688          *    I don't convert host or port since we are most likely
6689          *    going to want these in NBO.
6690          */
6691         peer->ifMTU = ntohs(peer->ifMTU);
6692         peer->idleWhen = ntohl(peer->idleWhen);
6693         peer->refCount = ntohs(peer->refCount);
6694         peer->burstWait.sec = ntohl(peer->burstWait.sec);
6695         peer->burstWait.usec = ntohl(peer->burstWait.usec);
6696         peer->rtt = ntohl(peer->rtt);
6697         peer->rtt_dev = ntohl(peer->rtt_dev);
6698         peer->timeout.sec = ntohl(peer->timeout.sec);
6699         peer->timeout.usec = ntohl(peer->timeout.usec);
6700         peer->nSent = ntohl(peer->nSent);
6701         peer->reSends = ntohl(peer->reSends);
6702         peer->inPacketSkew = ntohl(peer->inPacketSkew);
6703         peer->outPacketSkew = ntohl(peer->outPacketSkew);
6704         peer->rateFlag = ntohl(peer->rateFlag);
6705         peer->natMTU = ntohs(peer->natMTU);
6706         peer->maxMTU = ntohs(peer->maxMTU);
6707         peer->maxDgramPackets = ntohs(peer->maxDgramPackets);
6708         peer->ifDgramPackets = ntohs(peer->ifDgramPackets);
6709         peer->MTU = ntohs(peer->MTU);
6710         peer->cwind = ntohs(peer->cwind);
6711         peer->nDgramPackets = ntohs(peer->nDgramPackets);
6712         peer->congestSeq = ntohs(peer->congestSeq);
6713         peer->bytesSent.high = ntohl(peer->bytesSent.high);
6714         peer->bytesSent.low = ntohl(peer->bytesSent.low);
6715         peer->bytesReceived.high = ntohl(peer->bytesReceived.high);
6716         peer->bytesReceived.low = ntohl(peer->bytesReceived.low);
6717     }
6718
6719     return rc;
6720 }
6721 #endif /* RXDEBUG */
6722
6723 void
6724 shutdown_rx(void)
6725 {
6726     struct rx_serverQueueEntry *np;
6727     register int i, j;
6728 #ifndef KERNEL
6729     register struct rx_call *call;
6730     register struct rx_serverQueueEntry *sq;
6731 #endif /* KERNEL */
6732
6733     LOCK_RX_INIT;
6734     if (rxinit_status == 1) {
6735         UNLOCK_RX_INIT;
6736         return;                 /* Already shutdown. */
6737     }
6738 #ifndef KERNEL
6739     rx_port = 0;
6740 #ifndef AFS_PTHREAD_ENV
6741     FD_ZERO(&rx_selectMask);
6742 #endif /* AFS_PTHREAD_ENV */
6743     rxi_dataQuota = RX_MAX_QUOTA;
6744 #ifndef AFS_PTHREAD_ENV
6745     rxi_StopListener();
6746 #endif /* AFS_PTHREAD_ENV */
6747     shutdown_rxevent();
6748     rx_SetEpoch(0);
6749 #ifndef AFS_PTHREAD_ENV
6750 #ifndef AFS_USE_GETTIMEOFDAY
6751     clock_UnInit();
6752 #endif /* AFS_USE_GETTIMEOFDAY */
6753 #endif /* AFS_PTHREAD_ENV */
6754
6755     while (!queue_IsEmpty(&rx_freeCallQueue)) {
6756         call = queue_First(&rx_freeCallQueue, rx_call);
6757         queue_Remove(call);
6758         rxi_Free(call, sizeof(struct rx_call));
6759     }
6760
6761     while (!queue_IsEmpty(&rx_idleServerQueue)) {
6762         sq = queue_First(&rx_idleServerQueue, rx_serverQueueEntry);
6763         queue_Remove(sq);
6764     }
6765 #endif /* KERNEL */
6766
6767     {
6768         struct rx_peer **peer_ptr, **peer_end;
6769         for (peer_ptr = &rx_peerHashTable[0], peer_end =
6770              &rx_peerHashTable[rx_hashTableSize]; peer_ptr < peer_end;
6771              peer_ptr++) {
6772             struct rx_peer *peer, *next;
6773             for (peer = *peer_ptr; peer; peer = next) {
6774                 rx_interface_stat_p rpc_stat, nrpc_stat;
6775                 size_t space;
6776                 for (queue_Scan
6777                      (&peer->rpcStats, rpc_stat, nrpc_stat,
6778                       rx_interface_stat)) {
6779                     unsigned int num_funcs;
6780                     if (!rpc_stat)
6781                         break;
6782                     queue_Remove(&rpc_stat->queue_header);
6783                     queue_Remove(&rpc_stat->all_peers);
6784                     num_funcs = rpc_stat->stats[0].func_total;
6785                     space =
6786                         sizeof(rx_interface_stat_t) +
6787                         rpc_stat->stats[0].func_total *
6788                         sizeof(rx_function_entry_v1_t);
6789
6790                     rxi_Free(rpc_stat, space);
6791                     MUTEX_ENTER(&rx_rpc_stats);
6792                     rxi_rpc_peer_stat_cnt -= num_funcs;
6793                     MUTEX_EXIT(&rx_rpc_stats);
6794                 }
6795                 next = peer->next;
6796                 rxi_FreePeer(peer);
6797                 rx_MutexDecrement(rx_stats.nPeerStructs, rx_stats_mutex);
6798             }
6799         }
6800     }
6801     for (i = 0; i < RX_MAX_SERVICES; i++) {
6802         if (rx_services[i])
6803             rxi_Free(rx_services[i], sizeof(*rx_services[i]));
6804     }
6805     for (i = 0; i < rx_hashTableSize; i++) {
6806         register struct rx_connection *tc, *ntc;
6807         MUTEX_ENTER(&rx_connHashTable_lock);
6808         for (tc = rx_connHashTable[i]; tc; tc = ntc) {
6809             ntc = tc->next;
6810             for (j = 0; j < RX_MAXCALLS; j++) {
6811                 if (tc->call[j]) {
6812                     rxi_Free(tc->call[j], sizeof(*tc->call[j]));
6813                 }
6814             }
6815             rxi_Free(tc, sizeof(*tc));
6816         }
6817         MUTEX_EXIT(&rx_connHashTable_lock);
6818     }
6819
6820     MUTEX_ENTER(&freeSQEList_lock);
6821
6822     while ((np = rx_FreeSQEList)) {
6823         rx_FreeSQEList = *(struct rx_serverQueueEntry **)np;
6824         MUTEX_DESTROY(&np->lock);
6825         rxi_Free(np, sizeof(*np));
6826     }
6827
6828     MUTEX_EXIT(&freeSQEList_lock);
6829     MUTEX_DESTROY(&freeSQEList_lock);
6830     MUTEX_DESTROY(&rx_freeCallQueue_lock);
6831     MUTEX_DESTROY(&rx_connHashTable_lock);
6832     MUTEX_DESTROY(&rx_peerHashTable_lock);
6833     MUTEX_DESTROY(&rx_serverPool_lock);
6834
6835     osi_Free(rx_connHashTable,
6836              rx_hashTableSize * sizeof(struct rx_connection *));
6837     osi_Free(rx_peerHashTable, rx_hashTableSize * sizeof(struct rx_peer *));
6838
6839     UNPIN(rx_connHashTable,
6840           rx_hashTableSize * sizeof(struct rx_connection *));
6841     UNPIN(rx_peerHashTable, rx_hashTableSize * sizeof(struct rx_peer *));
6842
6843     rxi_FreeAllPackets();
6844
6845     MUTEX_ENTER(&rx_stats_mutex);
6846     rxi_dataQuota = RX_MAX_QUOTA;
6847     rxi_availProcs = rxi_totalMin = rxi_minDeficit = 0;
6848     MUTEX_EXIT(&rx_stats_mutex);
6849
6850     rxinit_status = 1;
6851     UNLOCK_RX_INIT;
6852 }
6853
6854 #ifdef RX_ENABLE_LOCKS
6855 void
6856 osirx_AssertMine(afs_kmutex_t * lockaddr, char *msg)
6857 {
6858     if (!MUTEX_ISMINE(lockaddr))
6859         osi_Panic("Lock not held: %s", msg);
6860 }
6861 #endif /* RX_ENABLE_LOCKS */
6862
6863 #ifndef KERNEL
6864
6865 /*
6866  * Routines to implement connection specific data.
6867  */
6868
6869 int
6870 rx_KeyCreate(rx_destructor_t rtn)
6871 {
6872     int key;
6873     MUTEX_ENTER(&rxi_keyCreate_lock);
6874     key = rxi_keyCreate_counter++;
6875     rxi_keyCreate_destructor = (rx_destructor_t *)
6876         realloc((void *)rxi_keyCreate_destructor,
6877                 (key + 1) * sizeof(rx_destructor_t));
6878     rxi_keyCreate_destructor[key] = rtn;
6879     MUTEX_EXIT(&rxi_keyCreate_lock);
6880     return key;
6881 }
6882
6883 void
6884 rx_SetSpecific(struct rx_connection *conn, int key, void *ptr)
6885 {
6886     int i;
6887     MUTEX_ENTER(&conn->conn_data_lock);
6888     if (!conn->specific) {
6889         conn->specific = (void **)malloc((key + 1) * sizeof(void *));
6890         for (i = 0; i < key; i++)
6891             conn->specific[i] = NULL;
6892         conn->nSpecific = key + 1;
6893         conn->specific[key] = ptr;
6894     } else if (key >= conn->nSpecific) {
6895         conn->specific = (void **)
6896             realloc(conn->specific, (key + 1) * sizeof(void *));
6897         for (i = conn->nSpecific; i < key; i++)
6898             conn->specific[i] = NULL;
6899         conn->nSpecific = key + 1;
6900         conn->specific[key] = ptr;
6901     } else {
6902         if (conn->specific[key] && rxi_keyCreate_destructor[key])
6903             (*rxi_keyCreate_destructor[key]) (conn->specific[key]);
6904         conn->specific[key] = ptr;
6905     }
6906     MUTEX_EXIT(&conn->conn_data_lock);
6907 }
6908
6909 void *
6910 rx_GetSpecific(struct rx_connection *conn, int key)
6911 {
6912     void *ptr;
6913     MUTEX_ENTER(&conn->conn_data_lock);
6914     if (key >= conn->nSpecific)
6915         ptr = NULL;
6916     else
6917         ptr = conn->specific[key];
6918     MUTEX_EXIT(&conn->conn_data_lock);
6919     return ptr;
6920 }
6921
6922 #endif /* !KERNEL */
6923
6924 /*
6925  * processStats is a queue used to store the statistics for the local
6926  * process.  Its contents are similar to the contents of the rpcStats
6927  * queue on a rx_peer structure, but the actual data stored within
6928  * this queue contains totals across the lifetime of the process (assuming
6929  * the stats have not been reset) - unlike the per peer structures
6930  * which can come and go based upon the peer lifetime.
6931  */
6932
6933 static struct rx_queue processStats = { &processStats, &processStats };
6934
6935 /*
6936  * peerStats is a queue used to store the statistics for all peer structs.
6937  * Its contents are the union of all the peer rpcStats queues.
6938  */
6939
6940 static struct rx_queue peerStats = { &peerStats, &peerStats };
6941
6942 /*
6943  * rxi_monitor_processStats is used to turn process wide stat collection
6944  * on and off
6945  */
6946
6947 static int rxi_monitor_processStats = 0;
6948
6949 /*
6950  * rxi_monitor_peerStats is used to turn per peer stat collection on and off
6951  */
6952
6953 static int rxi_monitor_peerStats = 0;
6954
6955 /*
6956  * rxi_AddRpcStat - given all of the information for a particular rpc
6957  * call, create (if needed) and update the stat totals for the rpc.
6958  *
6959  * PARAMETERS
6960  *
6961  * IN stats - the queue of stats that will be updated with the new value
6962  *
6963  * IN rxInterface - a unique number that identifies the rpc interface
6964  *
6965  * IN currentFunc - the index of the function being invoked
6966  *
6967  * IN totalFunc - the total number of functions in this interface
6968  *
6969  * IN queueTime - the amount of time this function waited for a thread
6970  *
6971  * IN execTime - the amount of time this function invocation took to execute
6972  *
6973  * IN bytesSent - the number bytes sent by this invocation
6974  *
6975  * IN bytesRcvd - the number bytes received by this invocation
6976  *
6977  * IN isServer - if true, this invocation was made to a server
6978  *
6979  * IN remoteHost - the ip address of the remote host
6980  *
6981  * IN remotePort - the port of the remote host
6982  *
6983  * IN addToPeerList - if != 0, add newly created stat to the global peer list
6984  *
6985  * INOUT counter - if a new stats structure is allocated, the counter will
6986  * be updated with the new number of allocated stat structures
6987  *
6988  * RETURN CODES
6989  *
6990  * Returns void.
6991  */
6992
6993 static int
6994 rxi_AddRpcStat(struct rx_queue *stats, afs_uint32 rxInterface,
6995                afs_uint32 currentFunc, afs_uint32 totalFunc,
6996                struct clock *queueTime, struct clock *execTime,
6997                afs_hyper_t * bytesSent, afs_hyper_t * bytesRcvd, int isServer,
6998                afs_uint32 remoteHost, afs_uint32 remotePort,
6999                int addToPeerList, unsigned int *counter)
7000 {
7001     int rc = 0;
7002     rx_interface_stat_p rpc_stat, nrpc_stat;
7003
7004     /*
7005      * See if there's already a structure for this interface
7006      */
7007
7008     for (queue_Scan(stats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7009         if ((rpc_stat->stats[0].interfaceId == rxInterface)
7010             && (rpc_stat->stats[0].remote_is_server == isServer))
7011             break;
7012     }
7013
7014     /*
7015      * Didn't find a match so allocate a new structure and add it to the
7016      * queue.
7017      */
7018
7019     if (queue_IsEnd(stats, rpc_stat) || (rpc_stat == NULL)
7020         || (rpc_stat->stats[0].interfaceId != rxInterface)
7021         || (rpc_stat->stats[0].remote_is_server != isServer)) {
7022         int i;
7023         size_t space;
7024
7025         space =
7026             sizeof(rx_interface_stat_t) +
7027             totalFunc * sizeof(rx_function_entry_v1_t);
7028
7029         rpc_stat = (rx_interface_stat_p) rxi_Alloc(space);
7030         if (rpc_stat == NULL) {
7031             rc = 1;
7032             goto fail;
7033         }
7034         *counter += totalFunc;
7035         for (i = 0; i < totalFunc; i++) {
7036             rpc_stat->stats[i].remote_peer = remoteHost;
7037             rpc_stat->stats[i].remote_port = remotePort;
7038             rpc_stat->stats[i].remote_is_server = isServer;
7039             rpc_stat->stats[i].interfaceId = rxInterface;
7040             rpc_stat->stats[i].func_total = totalFunc;
7041             rpc_stat->stats[i].func_index = i;
7042             hzero(rpc_stat->stats[i].invocations);
7043             hzero(rpc_stat->stats[i].bytes_sent);
7044             hzero(rpc_stat->stats[i].bytes_rcvd);
7045             rpc_stat->stats[i].queue_time_sum.sec = 0;
7046             rpc_stat->stats[i].queue_time_sum.usec = 0;
7047             rpc_stat->stats[i].queue_time_sum_sqr.sec = 0;
7048             rpc_stat->stats[i].queue_time_sum_sqr.usec = 0;
7049             rpc_stat->stats[i].queue_time_min.sec = 9999999;
7050             rpc_stat->stats[i].queue_time_min.usec = 9999999;
7051             rpc_stat->stats[i].queue_time_max.sec = 0;
7052             rpc_stat->stats[i].queue_time_max.usec = 0;
7053             rpc_stat->stats[i].execution_time_sum.sec = 0;
7054             rpc_stat->stats[i].execution_time_sum.usec = 0;
7055             rpc_stat->stats[i].execution_time_sum_sqr.sec = 0;
7056             rpc_stat->stats[i].execution_time_sum_sqr.usec = 0;
7057             rpc_stat->stats[i].execution_time_min.sec = 9999999;
7058             rpc_stat->stats[i].execution_time_min.usec = 9999999;
7059             rpc_stat->stats[i].execution_time_max.sec = 0;
7060             rpc_stat->stats[i].execution_time_max.usec = 0;
7061         }
7062         queue_Prepend(stats, rpc_stat);
7063         if (addToPeerList) {
7064             queue_Prepend(&peerStats, &rpc_stat->all_peers);
7065         }
7066     }
7067
7068     /*
7069      * Increment the stats for this function
7070      */
7071
7072     hadd32(rpc_stat->stats[currentFunc].invocations, 1);
7073     hadd(rpc_stat->stats[currentFunc].bytes_sent, *bytesSent);
7074     hadd(rpc_stat->stats[currentFunc].bytes_rcvd, *bytesRcvd);
7075     clock_Add(&rpc_stat->stats[currentFunc].queue_time_sum, queueTime);
7076     clock_AddSq(&rpc_stat->stats[currentFunc].queue_time_sum_sqr, queueTime);
7077     if (clock_Lt(queueTime, &rpc_stat->stats[currentFunc].queue_time_min)) {
7078         rpc_stat->stats[currentFunc].queue_time_min = *queueTime;
7079     }
7080     if (clock_Gt(queueTime, &rpc_stat->stats[currentFunc].queue_time_max)) {
7081         rpc_stat->stats[currentFunc].queue_time_max = *queueTime;
7082     }
7083     clock_Add(&rpc_stat->stats[currentFunc].execution_time_sum, execTime);
7084     clock_AddSq(&rpc_stat->stats[currentFunc].execution_time_sum_sqr,
7085                 execTime);
7086     if (clock_Lt(execTime, &rpc_stat->stats[currentFunc].execution_time_min)) {
7087         rpc_stat->stats[currentFunc].execution_time_min = *execTime;
7088     }
7089     if (clock_Gt(execTime, &rpc_stat->stats[currentFunc].execution_time_max)) {
7090         rpc_stat->stats[currentFunc].execution_time_max = *execTime;
7091     }
7092
7093   fail:
7094     return rc;
7095 }
7096
7097 /*
7098  * rx_IncrementTimeAndCount - increment the times and count for a particular
7099  * rpc function.
7100  *
7101  * PARAMETERS
7102  *
7103  * IN peer - the peer who invoked the rpc
7104  *
7105  * IN rxInterface - a unique number that identifies the rpc interface
7106  *
7107  * IN currentFunc - the index of the function being invoked
7108  *
7109  * IN totalFunc - the total number of functions in this interface
7110  *
7111  * IN queueTime - the amount of time this function waited for a thread
7112  *
7113  * IN execTime - the amount of time this function invocation took to execute
7114  *
7115  * IN bytesSent - the number bytes sent by this invocation
7116  *
7117  * IN bytesRcvd - the number bytes received by this invocation
7118  *
7119  * IN isServer - if true, this invocation was made to a server
7120  *
7121  * RETURN CODES
7122  *
7123  * Returns void.
7124  */
7125
7126 void
7127 rx_IncrementTimeAndCount(struct rx_peer *peer, afs_uint32 rxInterface,
7128                          afs_uint32 currentFunc, afs_uint32 totalFunc,
7129                          struct clock *queueTime, struct clock *execTime,
7130                          afs_hyper_t * bytesSent, afs_hyper_t * bytesRcvd,
7131                          int isServer)
7132 {
7133
7134     if (!(rxi_monitor_peerStats || rxi_monitor_processStats))
7135         return;
7136
7137     MUTEX_ENTER(&rx_rpc_stats);
7138     MUTEX_ENTER(&peer->peer_lock);
7139
7140     if (rxi_monitor_peerStats) {
7141         rxi_AddRpcStat(&peer->rpcStats, rxInterface, currentFunc, totalFunc,
7142                        queueTime, execTime, bytesSent, bytesRcvd, isServer,
7143                        peer->host, peer->port, 1, &rxi_rpc_peer_stat_cnt);
7144     }
7145
7146     if (rxi_monitor_processStats) {
7147         rxi_AddRpcStat(&processStats, rxInterface, currentFunc, totalFunc,
7148                        queueTime, execTime, bytesSent, bytesRcvd, isServer,
7149                        0xffffffff, 0xffffffff, 0, &rxi_rpc_process_stat_cnt);
7150     }
7151
7152     MUTEX_EXIT(&peer->peer_lock);
7153     MUTEX_EXIT(&rx_rpc_stats);
7154
7155 }
7156
7157 /*
7158  * rx_MarshallProcessRPCStats - marshall an array of rpc statistics
7159  *
7160  * PARAMETERS
7161  *
7162  * IN callerVersion - the rpc stat version of the caller.
7163  *
7164  * IN count - the number of entries to marshall.
7165  *
7166  * IN stats - pointer to stats to be marshalled.
7167  *
7168  * OUT ptr - Where to store the marshalled data.
7169  *
7170  * RETURN CODES
7171  *
7172  * Returns void.
7173  */
7174 void
7175 rx_MarshallProcessRPCStats(afs_uint32 callerVersion, int count,
7176                            rx_function_entry_v1_t * stats, afs_uint32 ** ptrP)
7177 {
7178     int i;
7179     afs_uint32 *ptr;
7180
7181     /*
7182      * We only support the first version
7183      */
7184     for (ptr = *ptrP, i = 0; i < count; i++, stats++) {
7185         *(ptr++) = stats->remote_peer;
7186         *(ptr++) = stats->remote_port;
7187         *(ptr++) = stats->remote_is_server;
7188         *(ptr++) = stats->interfaceId;
7189         *(ptr++) = stats->func_total;
7190         *(ptr++) = stats->func_index;
7191         *(ptr++) = hgethi(stats->invocations);
7192         *(ptr++) = hgetlo(stats->invocations);
7193         *(ptr++) = hgethi(stats->bytes_sent);
7194         *(ptr++) = hgetlo(stats->bytes_sent);
7195         *(ptr++) = hgethi(stats->bytes_rcvd);
7196         *(ptr++) = hgetlo(stats->bytes_rcvd);
7197         *(ptr++) = stats->queue_time_sum.sec;
7198         *(ptr++) = stats->queue_time_sum.usec;
7199         *(ptr++) = stats->queue_time_sum_sqr.sec;
7200         *(ptr++) = stats->queue_time_sum_sqr.usec;
7201         *(ptr++) = stats->queue_time_min.sec;
7202         *(ptr++) = stats->queue_time_min.usec;
7203         *(ptr++) = stats->queue_time_max.sec;
7204         *(ptr++) = stats->queue_time_max.usec;
7205         *(ptr++) = stats->execution_time_sum.sec;
7206         *(ptr++) = stats->execution_time_sum.usec;
7207         *(ptr++) = stats->execution_time_sum_sqr.sec;
7208         *(ptr++) = stats->execution_time_sum_sqr.usec;
7209         *(ptr++) = stats->execution_time_min.sec;
7210         *(ptr++) = stats->execution_time_min.usec;
7211         *(ptr++) = stats->execution_time_max.sec;
7212         *(ptr++) = stats->execution_time_max.usec;
7213     }
7214     *ptrP = ptr;
7215 }
7216
7217 /*
7218  * rx_RetrieveProcessRPCStats - retrieve all of the rpc statistics for
7219  * this process
7220  *
7221  * PARAMETERS
7222  *
7223  * IN callerVersion - the rpc stat version of the caller
7224  *
7225  * OUT myVersion - the rpc stat version of this function
7226  *
7227  * OUT clock_sec - local time seconds
7228  *
7229  * OUT clock_usec - local time microseconds
7230  *
7231  * OUT allocSize - the number of bytes allocated to contain stats
7232  *
7233  * OUT statCount - the number stats retrieved from this process.
7234  *
7235  * OUT stats - the actual stats retrieved from this process.
7236  *
7237  * RETURN CODES
7238  *
7239  * Returns void.  If successful, stats will != NULL.
7240  */
7241
7242 int
7243 rx_RetrieveProcessRPCStats(afs_uint32 callerVersion, afs_uint32 * myVersion,
7244                            afs_uint32 * clock_sec, afs_uint32 * clock_usec,
7245                            size_t * allocSize, afs_uint32 * statCount,
7246                            afs_uint32 ** stats)
7247 {
7248     size_t space = 0;
7249     afs_uint32 *ptr;
7250     struct clock now;
7251     int rc = 0;
7252
7253     *stats = 0;
7254     *allocSize = 0;
7255     *statCount = 0;
7256     *myVersion = RX_STATS_RETRIEVAL_VERSION;
7257
7258     /*
7259      * Check to see if stats are enabled
7260      */
7261
7262     MUTEX_ENTER(&rx_rpc_stats);
7263     if (!rxi_monitor_processStats) {
7264         MUTEX_EXIT(&rx_rpc_stats);
7265         return rc;
7266     }
7267
7268     clock_GetTime(&now);
7269     *clock_sec = now.sec;
7270     *clock_usec = now.usec;
7271
7272     /*
7273      * Allocate the space based upon the caller version
7274      *
7275      * If the client is at an older version than we are,
7276      * we return the statistic data in the older data format, but
7277      * we still return our version number so the client knows we
7278      * are maintaining more data than it can retrieve.
7279      */
7280
7281     if (callerVersion >= RX_STATS_RETRIEVAL_FIRST_EDITION) {
7282         space = rxi_rpc_process_stat_cnt * sizeof(rx_function_entry_v1_t);
7283         *statCount = rxi_rpc_process_stat_cnt;
7284     } else {
7285         /*
7286          * This can't happen yet, but in the future version changes
7287          * can be handled by adding additional code here
7288          */
7289     }
7290
7291     if (space > (size_t) 0) {
7292         *allocSize = space;
7293         ptr = *stats = (afs_uint32 *) rxi_Alloc(space);
7294
7295         if (ptr != NULL) {
7296             rx_interface_stat_p rpc_stat, nrpc_stat;
7297
7298
7299             for (queue_Scan
7300                  (&processStats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7301                 /*
7302                  * Copy the data based upon the caller version
7303                  */
7304                 rx_MarshallProcessRPCStats(callerVersion,
7305                                            rpc_stat->stats[0].func_total,
7306                                            rpc_stat->stats, &ptr);
7307             }
7308         } else {
7309             rc = ENOMEM;
7310         }
7311     }
7312     MUTEX_EXIT(&rx_rpc_stats);
7313     return rc;
7314 }
7315
7316 /*
7317  * rx_RetrievePeerRPCStats - retrieve all of the rpc statistics for the peers
7318  *
7319  * PARAMETERS
7320  *
7321  * IN callerVersion - the rpc stat version of the caller
7322  *
7323  * OUT myVersion - the rpc stat version of this function
7324  *
7325  * OUT clock_sec - local time seconds
7326  *
7327  * OUT clock_usec - local time microseconds
7328  *
7329  * OUT allocSize - the number of bytes allocated to contain stats
7330  *
7331  * OUT statCount - the number of stats retrieved from the individual
7332  * peer structures.
7333  *
7334  * OUT stats - the actual stats retrieved from the individual peer structures.
7335  *
7336  * RETURN CODES
7337  *
7338  * Returns void.  If successful, stats will != NULL.
7339  */
7340
7341 int
7342 rx_RetrievePeerRPCStats(afs_uint32 callerVersion, afs_uint32 * myVersion,
7343                         afs_uint32 * clock_sec, afs_uint32 * clock_usec,
7344                         size_t * allocSize, afs_uint32 * statCount,
7345                         afs_uint32 ** stats)
7346 {
7347     size_t space = 0;
7348     afs_uint32 *ptr;
7349     struct clock now;
7350     int rc = 0;
7351
7352     *stats = 0;
7353     *statCount = 0;
7354     *allocSize = 0;
7355     *myVersion = RX_STATS_RETRIEVAL_VERSION;
7356
7357     /*
7358      * Check to see if stats are enabled
7359      */
7360
7361     MUTEX_ENTER(&rx_rpc_stats);
7362     if (!rxi_monitor_peerStats) {
7363         MUTEX_EXIT(&rx_rpc_stats);
7364         return rc;
7365     }
7366
7367     clock_GetTime(&now);
7368     *clock_sec = now.sec;
7369     *clock_usec = now.usec;
7370
7371     /*
7372      * Allocate the space based upon the caller version
7373      *
7374      * If the client is at an older version than we are,
7375      * we return the statistic data in the older data format, but
7376      * we still return our version number so the client knows we
7377      * are maintaining more data than it can retrieve.
7378      */
7379
7380     if (callerVersion >= RX_STATS_RETRIEVAL_FIRST_EDITION) {
7381         space = rxi_rpc_peer_stat_cnt * sizeof(rx_function_entry_v1_t);
7382         *statCount = rxi_rpc_peer_stat_cnt;
7383     } else {
7384         /*
7385          * This can't happen yet, but in the future version changes
7386          * can be handled by adding additional code here
7387          */
7388     }
7389
7390     if (space > (size_t) 0) {
7391         *allocSize = space;
7392         ptr = *stats = (afs_uint32 *) rxi_Alloc(space);
7393
7394         if (ptr != NULL) {
7395             rx_interface_stat_p rpc_stat, nrpc_stat;
7396             char *fix_offset;
7397
7398             for (queue_Scan
7399                  (&peerStats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7400                 /*
7401                  * We have to fix the offset of rpc_stat since we are
7402                  * keeping this structure on two rx_queues.  The rx_queue
7403                  * package assumes that the rx_queue member is the first
7404                  * member of the structure.  That is, rx_queue assumes that
7405                  * any one item is only on one queue at a time.  We are
7406                  * breaking that assumption and so we have to do a little
7407                  * math to fix our pointers.
7408                  */
7409
7410                 fix_offset = (char *)rpc_stat;
7411                 fix_offset -= offsetof(rx_interface_stat_t, all_peers);
7412                 rpc_stat = (rx_interface_stat_p) fix_offset;
7413
7414                 /*
7415                  * Copy the data based upon the caller version
7416                  */
7417                 rx_MarshallProcessRPCStats(callerVersion,
7418                                            rpc_stat->stats[0].func_total,
7419                                            rpc_stat->stats, &ptr);
7420             }
7421         } else {
7422             rc = ENOMEM;
7423         }
7424     }
7425     MUTEX_EXIT(&rx_rpc_stats);
7426     return rc;
7427 }
7428
7429 /*
7430  * rx_FreeRPCStats - free memory allocated by
7431  *                   rx_RetrieveProcessRPCStats and rx_RetrievePeerRPCStats
7432  *
7433  * PARAMETERS
7434  *
7435  * IN stats - stats previously returned by rx_RetrieveProcessRPCStats or
7436  * rx_RetrievePeerRPCStats
7437  *
7438  * IN allocSize - the number of bytes in stats.
7439  *
7440  * RETURN CODES
7441  *
7442  * Returns void.
7443  */
7444
7445 void
7446 rx_FreeRPCStats(afs_uint32 * stats, size_t allocSize)
7447 {
7448     rxi_Free(stats, allocSize);
7449 }
7450
7451 /*
7452  * rx_queryProcessRPCStats - see if process rpc stat collection is
7453  * currently enabled.
7454  *
7455  * PARAMETERS
7456  *
7457  * RETURN CODES
7458  *
7459  * Returns 0 if stats are not enabled != 0 otherwise
7460  */
7461
7462 int
7463 rx_queryProcessRPCStats(void)
7464 {
7465     int rc;
7466     MUTEX_ENTER(&rx_rpc_stats);
7467     rc = rxi_monitor_processStats;
7468     MUTEX_EXIT(&rx_rpc_stats);
7469     return rc;
7470 }
7471
7472 /*
7473  * rx_queryPeerRPCStats - see if peer stat collection is currently enabled.
7474  *
7475  * PARAMETERS
7476  *
7477  * RETURN CODES
7478  *
7479  * Returns 0 if stats are not enabled != 0 otherwise
7480  */
7481
7482 int
7483 rx_queryPeerRPCStats(void)
7484 {
7485     int rc;
7486     MUTEX_ENTER(&rx_rpc_stats);
7487     rc = rxi_monitor_peerStats;
7488     MUTEX_EXIT(&rx_rpc_stats);
7489     return rc;
7490 }
7491
7492 /*
7493  * rx_enableProcessRPCStats - begin rpc stat collection for entire process
7494  *
7495  * PARAMETERS
7496  *
7497  * RETURN CODES
7498  *
7499  * Returns void.
7500  */
7501
7502 void
7503 rx_enableProcessRPCStats(void)
7504 {
7505     MUTEX_ENTER(&rx_rpc_stats);
7506     rx_enable_stats = 1;
7507     rxi_monitor_processStats = 1;
7508     MUTEX_EXIT(&rx_rpc_stats);
7509 }
7510
7511 /*
7512  * rx_enablePeerRPCStats - begin rpc stat collection per peer structure
7513  *
7514  * PARAMETERS
7515  *
7516  * RETURN CODES
7517  *
7518  * Returns void.
7519  */
7520
7521 void
7522 rx_enablePeerRPCStats(void)
7523 {
7524     MUTEX_ENTER(&rx_rpc_stats);
7525     rx_enable_stats = 1;
7526     rxi_monitor_peerStats = 1;
7527     MUTEX_EXIT(&rx_rpc_stats);
7528 }
7529
7530 /*
7531  * rx_disableProcessRPCStats - stop rpc stat collection for entire process
7532  *
7533  * PARAMETERS
7534  *
7535  * RETURN CODES
7536  *
7537  * Returns void.
7538  */
7539
7540 void
7541 rx_disableProcessRPCStats(void)
7542 {
7543     rx_interface_stat_p rpc_stat, nrpc_stat;
7544     size_t space;
7545
7546     MUTEX_ENTER(&rx_rpc_stats);
7547
7548     /*
7549      * Turn off process statistics and if peer stats is also off, turn
7550      * off everything
7551      */
7552
7553     rxi_monitor_processStats = 0;
7554     if (rxi_monitor_peerStats == 0) {
7555         rx_enable_stats = 0;
7556     }
7557
7558     for (queue_Scan(&processStats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7559         unsigned int num_funcs = 0;
7560         if (!rpc_stat)
7561             break;
7562         queue_Remove(rpc_stat);
7563         num_funcs = rpc_stat->stats[0].func_total;
7564         space =
7565             sizeof(rx_interface_stat_t) +
7566             rpc_stat->stats[0].func_total * sizeof(rx_function_entry_v1_t);
7567
7568         rxi_Free(rpc_stat, space);
7569         rxi_rpc_process_stat_cnt -= num_funcs;
7570     }
7571     MUTEX_EXIT(&rx_rpc_stats);
7572 }
7573
7574 /*
7575  * rx_disablePeerRPCStats - stop rpc stat collection for peers
7576  *
7577  * PARAMETERS
7578  *
7579  * RETURN CODES
7580  *
7581  * Returns void.
7582  */
7583
7584 void
7585 rx_disablePeerRPCStats(void)
7586 {
7587     struct rx_peer **peer_ptr, **peer_end;
7588     int code;
7589
7590     MUTEX_ENTER(&rx_rpc_stats);
7591
7592     /*
7593      * Turn off peer statistics and if process stats is also off, turn
7594      * off everything
7595      */
7596
7597     rxi_monitor_peerStats = 0;
7598     if (rxi_monitor_processStats == 0) {
7599         rx_enable_stats = 0;
7600     }
7601
7602     MUTEX_ENTER(&rx_peerHashTable_lock);
7603     for (peer_ptr = &rx_peerHashTable[0], peer_end =
7604          &rx_peerHashTable[rx_hashTableSize]; peer_ptr < peer_end;
7605          peer_ptr++) {
7606         struct rx_peer *peer, *next, *prev;
7607         for (prev = peer = *peer_ptr; peer; peer = next) {
7608             next = peer->next;
7609             code = MUTEX_TRYENTER(&peer->peer_lock);
7610             if (code) {
7611                 rx_interface_stat_p rpc_stat, nrpc_stat;
7612                 size_t space;
7613                 for (queue_Scan
7614                      (&peer->rpcStats, rpc_stat, nrpc_stat,
7615                       rx_interface_stat)) {
7616                     unsigned int num_funcs = 0;
7617                     if (!rpc_stat)
7618                         break;
7619                     queue_Remove(&rpc_stat->queue_header);
7620                     queue_Remove(&rpc_stat->all_peers);
7621                     num_funcs = rpc_stat->stats[0].func_total;
7622                     space =
7623                         sizeof(rx_interface_stat_t) +
7624                         rpc_stat->stats[0].func_total *
7625                         sizeof(rx_function_entry_v1_t);
7626
7627                     rxi_Free(rpc_stat, space);
7628                     rxi_rpc_peer_stat_cnt -= num_funcs;
7629                 }
7630                 MUTEX_EXIT(&peer->peer_lock);
7631                 if (prev == *peer_ptr) {
7632                     *peer_ptr = next;
7633                     prev = next;
7634                 } else
7635                     prev->next = next;
7636             } else {
7637                 prev = peer;
7638             }
7639         }
7640     }
7641     MUTEX_EXIT(&rx_peerHashTable_lock);
7642     MUTEX_EXIT(&rx_rpc_stats);
7643 }
7644
7645 /*
7646  * rx_clearProcessRPCStats - clear the contents of the rpc stats according
7647  * to clearFlag
7648  *
7649  * PARAMETERS
7650  *
7651  * IN clearFlag - flag indicating which stats to clear
7652  *
7653  * RETURN CODES
7654  *
7655  * Returns void.
7656  */
7657
7658 void
7659 rx_clearProcessRPCStats(afs_uint32 clearFlag)
7660 {
7661     rx_interface_stat_p rpc_stat, nrpc_stat;
7662
7663     MUTEX_ENTER(&rx_rpc_stats);
7664
7665     for (queue_Scan(&processStats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7666         unsigned int num_funcs = 0, i;
7667         num_funcs = rpc_stat->stats[0].func_total;
7668         for (i = 0; i < num_funcs; i++) {
7669             if (clearFlag & AFS_RX_STATS_CLEAR_INVOCATIONS) {
7670                 hzero(rpc_stat->stats[i].invocations);
7671             }
7672             if (clearFlag & AFS_RX_STATS_CLEAR_BYTES_SENT) {
7673                 hzero(rpc_stat->stats[i].bytes_sent);
7674             }
7675             if (clearFlag & AFS_RX_STATS_CLEAR_BYTES_RCVD) {
7676                 hzero(rpc_stat->stats[i].bytes_rcvd);
7677             }
7678             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_SUM) {
7679                 rpc_stat->stats[i].queue_time_sum.sec = 0;
7680                 rpc_stat->stats[i].queue_time_sum.usec = 0;
7681             }
7682             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_SQUARE) {
7683                 rpc_stat->stats[i].queue_time_sum_sqr.sec = 0;
7684                 rpc_stat->stats[i].queue_time_sum_sqr.usec = 0;
7685             }
7686             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_MIN) {
7687                 rpc_stat->stats[i].queue_time_min.sec = 9999999;
7688                 rpc_stat->stats[i].queue_time_min.usec = 9999999;
7689             }
7690             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_MAX) {
7691                 rpc_stat->stats[i].queue_time_max.sec = 0;
7692                 rpc_stat->stats[i].queue_time_max.usec = 0;
7693             }
7694             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_SUM) {
7695                 rpc_stat->stats[i].execution_time_sum.sec = 0;
7696                 rpc_stat->stats[i].execution_time_sum.usec = 0;
7697             }
7698             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_SQUARE) {
7699                 rpc_stat->stats[i].execution_time_sum_sqr.sec = 0;
7700                 rpc_stat->stats[i].execution_time_sum_sqr.usec = 0;
7701             }
7702             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_MIN) {
7703                 rpc_stat->stats[i].execution_time_min.sec = 9999999;
7704                 rpc_stat->stats[i].execution_time_min.usec = 9999999;
7705             }
7706             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_MAX) {
7707                 rpc_stat->stats[i].execution_time_max.sec = 0;
7708                 rpc_stat->stats[i].execution_time_max.usec = 0;
7709             }
7710         }
7711     }
7712
7713     MUTEX_EXIT(&rx_rpc_stats);
7714 }
7715
7716 /*
7717  * rx_clearPeerRPCStats - clear the contents of the rpc stats according
7718  * to clearFlag
7719  *
7720  * PARAMETERS
7721  *
7722  * IN clearFlag - flag indicating which stats to clear
7723  *
7724  * RETURN CODES
7725  *
7726  * Returns void.
7727  */
7728
7729 void
7730 rx_clearPeerRPCStats(afs_uint32 clearFlag)
7731 {
7732     rx_interface_stat_p rpc_stat, nrpc_stat;
7733
7734     MUTEX_ENTER(&rx_rpc_stats);
7735
7736     for (queue_Scan(&peerStats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7737         unsigned int num_funcs = 0, i;
7738         char *fix_offset;
7739         /*
7740          * We have to fix the offset of rpc_stat since we are
7741          * keeping this structure on two rx_queues.  The rx_queue
7742          * package assumes that the rx_queue member is the first
7743          * member of the structure.  That is, rx_queue assumes that
7744          * any one item is only on one queue at a time.  We are
7745          * breaking that assumption and so we have to do a little
7746          * math to fix our pointers.
7747          */
7748
7749         fix_offset = (char *)rpc_stat;
7750         fix_offset -= offsetof(rx_interface_stat_t, all_peers);
7751         rpc_stat = (rx_interface_stat_p) fix_offset;
7752
7753         num_funcs = rpc_stat->stats[0].func_total;
7754         for (i = 0; i < num_funcs; i++) {
7755             if (clearFlag & AFS_RX_STATS_CLEAR_INVOCATIONS) {
7756                 hzero(rpc_stat->stats[i].invocations);
7757             }
7758             if (clearFlag & AFS_RX_STATS_CLEAR_BYTES_SENT) {
7759                 hzero(rpc_stat->stats[i].bytes_sent);
7760             }
7761             if (clearFlag & AFS_RX_STATS_CLEAR_BYTES_RCVD) {
7762                 hzero(rpc_stat->stats[i].bytes_rcvd);
7763             }
7764             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_SUM) {
7765                 rpc_stat->stats[i].queue_time_sum.sec = 0;
7766                 rpc_stat->stats[i].queue_time_sum.usec = 0;
7767             }
7768             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_SQUARE) {
7769                 rpc_stat->stats[i].queue_time_sum_sqr.sec = 0;
7770                 rpc_stat->stats[i].queue_time_sum_sqr.usec = 0;
7771             }
7772             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_MIN) {
7773                 rpc_stat->stats[i].queue_time_min.sec = 9999999;
7774                 rpc_stat->stats[i].queue_time_min.usec = 9999999;
7775             }
7776             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_MAX) {
7777                 rpc_stat->stats[i].queue_time_max.sec = 0;
7778                 rpc_stat->stats[i].queue_time_max.usec = 0;
7779             }
7780             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_SUM) {
7781                 rpc_stat->stats[i].execution_time_sum.sec = 0;
7782                 rpc_stat->stats[i].execution_time_sum.usec = 0;
7783             }
7784             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_SQUARE) {
7785                 rpc_stat->stats[i].execution_time_sum_sqr.sec = 0;
7786                 rpc_stat->stats[i].execution_time_sum_sqr.usec = 0;
7787             }
7788             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_MIN) {
7789                 rpc_stat->stats[i].execution_time_min.sec = 9999999;
7790                 rpc_stat->stats[i].execution_time_min.usec = 9999999;
7791             }
7792             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_MAX) {
7793                 rpc_stat->stats[i].execution_time_max.sec = 0;
7794                 rpc_stat->stats[i].execution_time_max.usec = 0;
7795             }
7796         }
7797     }
7798
7799     MUTEX_EXIT(&rx_rpc_stats);
7800 }
7801
7802 /*
7803  * rxi_rxstat_userok points to a routine that returns 1 if the caller
7804  * is authorized to enable/disable/clear RX statistics.
7805  */
7806 static int (*rxi_rxstat_userok) (struct rx_call * call) = NULL;
7807
7808 void
7809 rx_SetRxStatUserOk(int (*proc) (struct rx_call * call))
7810 {
7811     rxi_rxstat_userok = proc;
7812 }
7813
7814 int
7815 rx_RxStatUserOk(struct rx_call *call)
7816 {
7817     if (!rxi_rxstat_userok)
7818         return 0;
7819     return rxi_rxstat_userok(call);
7820 }
7821
7822 #ifdef AFS_NT40_ENV
7823 /*
7824  * DllMain() -- Entry-point function called by the DllMainCRTStartup()
7825  *     function in the MSVC runtime DLL (msvcrt.dll).
7826  *
7827  *     Note: the system serializes calls to this function.
7828  */
7829 BOOL WINAPI
7830 DllMain(HINSTANCE dllInstHandle,        /* instance handle for this DLL module */
7831         DWORD reason,                   /* reason function is being called */
7832         LPVOID reserved)                /* reserved for future use */
7833 {
7834     switch (reason) {
7835     case DLL_PROCESS_ATTACH:
7836         /* library is being attached to a process */
7837         INIT_PTHREAD_LOCKS;
7838         return TRUE;
7839
7840     case DLL_PROCESS_DETACH:
7841         return TRUE;
7842
7843     default:
7844         return FALSE;
7845     }
7846 }
7847 #endif
7848