src/rx/rx.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 /* RX:  Extended Remote Procedure Call */
  11
  12 #include <afsconfig.h>
  13 #ifdef  KERNEL
  14 #include "afs/param.h"
  15 #else
  16 #include <afs/param.h>
  17 #endif
  18
  19 RCSID
  20     ("$Header$");
  21
  22 #ifdef KERNEL
  23 #include "afs/sysincludes.h"
  24 #include "afsincludes.h"
  25 #ifndef UKERNEL
  26 #include "h/types.h"
  27 #include "h/time.h"
  28 #include "h/stat.h"
  29 #ifdef  AFS_OSF_ENV
  30 #include <net/net_globals.h>
  31 #endif /* AFS_OSF_ENV */
  32 #ifdef AFS_LINUX20_ENV
  33 #include "h/socket.h"
  34 #endif
  35 #include "netinet/in.h"
  36 #include "afs/afs_args.h"
  37 #include "afs/afs_osi.h"
  38 #ifdef RX_KERNEL_TRACE
  39 #include "rx_kcommon.h"
  40 #endif
  41 #if     (defined(AFS_AUX_ENV) || defined(AFS_AIX_ENV))
  42 #include "h/systm.h"
  43 #endif
  44 #ifdef RXDEBUG
  45 #undef RXDEBUG                  /* turn off debugging */
  46 #endif /* RXDEBUG */
  47 #if defined(AFS_SGI_ENV)
  48 #include "sys/debug.h"
  49 #endif
  50 #include "afsint.h"
  51 #ifdef  AFS_OSF_ENV
  52 #undef kmem_alloc
  53 #undef kmem_free
  54 #undef mem_alloc
  55 #undef mem_free
  56 #undef register
  57 #endif /* AFS_OSF_ENV */
  58 #else /* !UKERNEL */
  59 #include "afs/sysincludes.h"
  60 #include "afsincludes.h"
  61 #endif /* !UKERNEL */
  62 #include "afs/lock.h"
  63 #include "rx_kmutex.h"
  64 #include "rx_kernel.h"
  65 #include "rx_clock.h"
  66 #include "rx_queue.h"
  67 #include "rx.h"
  68 #include "rx_globals.h"
  69 #include "rx_trace.h"
  70 #define AFSOP_STOP_RXCALLBACK   210     /* Stop CALLBACK process */
  71 #define AFSOP_STOP_AFS          211     /* Stop AFS process */
  72 #define AFSOP_STOP_BKG          212     /* Stop BKG process */
  73 #include "afsint.h"
  74 extern afs_int32 afs_termState;
  75 #ifdef AFS_AIX41_ENV
  76 #include "sys/lockl.h"
  77 #include "sys/lock_def.h"
  78 #endif /* AFS_AIX41_ENV */
  79 # include "rxgen_consts.h"
  80 #else /* KERNEL */
  81 # include <sys/types.h>
  82 # include <errno.h>
  83 #ifdef AFS_NT40_ENV
  84 # include <stdlib.h>
  85 # include <fcntl.h>
  86 # include <afs/afsutil.h>
  87 #else
  88 # include <sys/socket.h>
  89 # include <sys/file.h>
  90 # include <netdb.h>
  91 # include <sys/stat.h>
  92 # include <netinet/in.h>
  93 # include <sys/time.h>
  94 #endif
  95 #ifdef HAVE_STRING_H
  96 #include <string.h>
  97 #else
  98 #ifdef HAVE_STRINGS_H
  99 #include <strings.h>
 100 #endif
 101 #endif
 102 # include "rx.h"
 103 # include "rx_user.h"
 104 # include "rx_clock.h"
 105 # include "rx_queue.h"
 106 # include "rx_globals.h"
 107 # include "rx_trace.h"
 108 # include <afs/rxgen_consts.h>
 109 #endif /* KERNEL */
 110
 111 int (*registerProgram) () = 0;
 112 int (*swapNameProgram) () = 0;
 113
 114 /* Local static routines */
 115 static void rxi_DestroyConnectionNoLock(register struct rx_connection *conn);
 116 #ifdef RX_ENABLE_LOCKS
 117 static void rxi_SetAcksInTransmitQueue(register struct rx_call *call);
 118 #endif
 119
 120 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
 121 struct rx_tq_debug {
 122     afs_int32 rxi_start_aborted;        /* rxi_start awoke after rxi_Send in error. */
 123     afs_int32 rxi_start_in_error;
 124 } rx_tq_debug;
 125 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
 126
 127 /*
 128  * rxi_rpc_peer_stat_cnt counts the total number of peer stat structures
 129  * currently allocated within rx.  This number is used to allocate the
 130  * memory required to return the statistics when queried.
 131  */
 132
 133 static unsigned int rxi_rpc_peer_stat_cnt;
 134
 135 /*
 136  * rxi_rpc_process_stat_cnt counts the total number of local process stat
 137  * structures currently allocated within rx.  The number is used to allocate
 138  * the memory required to return the statistics when queried.
 139  */
 140
 141 static unsigned int rxi_rpc_process_stat_cnt;
 142
 143 #if !defined(offsetof)
 144 #include <stddef.h>             /* for definition of offsetof() */
 145 #endif
 146
 147 #ifdef AFS_PTHREAD_ENV
 148 #include <assert.h>
 149
 150 /*
 151  * Use procedural initialization of mutexes/condition variables
 152  * to ease NT porting
 153  */
 154
 155 extern pthread_mutex_t rx_stats_mutex;
 156 extern pthread_mutex_t des_init_mutex;
 157 extern pthread_mutex_t des_random_mutex;
 158 extern pthread_mutex_t rx_clock_mutex;
 159 extern pthread_mutex_t rxi_connCacheMutex;
 160 extern pthread_mutex_t rx_event_mutex;
 161 extern pthread_mutex_t osi_malloc_mutex;
 162 extern pthread_mutex_t event_handler_mutex;
 163 extern pthread_mutex_t listener_mutex;
 164 extern pthread_mutex_t rx_if_init_mutex;
 165 extern pthread_mutex_t rx_if_mutex;
 166 extern pthread_mutex_t rxkad_client_uid_mutex;
 167 extern pthread_mutex_t rxkad_random_mutex;
 168
 169 extern pthread_cond_t rx_event_handler_cond;
 170 extern pthread_cond_t rx_listener_cond;
 171
 172 static pthread_mutex_t epoch_mutex;
 173 static pthread_mutex_t rx_init_mutex;
 174 static pthread_mutex_t rx_debug_mutex;
 175
 176 static void
 177 rxi_InitPthread(void)
 178 {
 179     assert(pthread_mutex_init(&rx_clock_mutex, (const pthread_mutexattr_t *)0)
 180            == 0);
 181     assert(pthread_mutex_init(&rx_stats_mutex, (const pthread_mutexattr_t *)0)
 182            == 0);
 183     assert(pthread_mutex_init
 184            (&rxi_connCacheMutex, (const pthread_mutexattr_t *)0) == 0);
 185     assert(pthread_mutex_init(&rx_init_mutex, (const pthread_mutexattr_t *)0)
 186            == 0);
 187     assert(pthread_mutex_init(&epoch_mutex, (const pthread_mutexattr_t *)0) ==
 188            0);
 189     assert(pthread_mutex_init(&rx_event_mutex, (const pthread_mutexattr_t *)0)
 190            == 0);
 191     assert(pthread_mutex_init(&des_init_mutex, (const pthread_mutexattr_t *)0)
 192            == 0);
 193     assert(pthread_mutex_init
 194            (&des_random_mutex, (const pthread_mutexattr_t *)0) == 0);
 195     assert(pthread_mutex_init
 196            (&osi_malloc_mutex, (const pthread_mutexattr_t *)0) == 0);
 197     assert(pthread_mutex_init
 198            (&event_handler_mutex, (const pthread_mutexattr_t *)0) == 0);
 199     assert(pthread_mutex_init(&listener_mutex, (const pthread_mutexattr_t *)0)
 200            == 0);
 201     assert(pthread_mutex_init
 202            (&rx_if_init_mutex, (const pthread_mutexattr_t *)0) == 0);
 203     assert(pthread_mutex_init(&rx_if_mutex, (const pthread_mutexattr_t *)0) ==
 204            0);
 205     assert(pthread_mutex_init
 206            (&rxkad_client_uid_mutex, (const pthread_mutexattr_t *)0) == 0);
 207     assert(pthread_mutex_init
 208            (&rxkad_random_mutex, (const pthread_mutexattr_t *)0) == 0);
 209     assert(pthread_mutex_init(&rx_debug_mutex, (const pthread_mutexattr_t *)0)
 210            == 0);
 211
 212     assert(pthread_cond_init
 213            (&rx_event_handler_cond, (const pthread_condattr_t *)0) == 0);
 214     assert(pthread_cond_init(&rx_listener_cond, (const pthread_condattr_t *)0)
 215            == 0);
 216     assert(pthread_key_create(&rx_thread_id_key, NULL) == 0);
 217     assert(pthread_key_create(&rx_ts_info_key, NULL) == 0);
 218
 219     rxkad_global_stats_init();
 220 }
 221
 222 pthread_once_t rx_once_init = PTHREAD_ONCE_INIT;
 223 #define INIT_PTHREAD_LOCKS \
 224 assert(pthread_once(&rx_once_init, rxi_InitPthread)==0)
 225 /*
 226  * The rx_stats_mutex mutex protects the following global variables:
 227  * rxi_dataQuota
 228  * rxi_minDeficit
 229  * rxi_availProcs
 230  * rxi_totalMin
 231  * rxi_lowConnRefCount
 232  * rxi_lowPeerRefCount
 233  * rxi_nCalls
 234  * rxi_Alloccnt
 235  * rxi_Allocsize
 236  * rx_nFreePackets
 237  * rx_tq_debug
 238  * rx_stats
 239  */
 240 #else
 241 #define INIT_PTHREAD_LOCKS
 242 #endif
 243
 244
 245 /* Variables for handling the minProcs implementation.  availProcs gives the
 246  * number of threads available in the pool at this moment (not counting dudes
 247  * executing right now).  totalMin gives the total number of procs required
 248  * for handling all minProcs requests.  minDeficit is a dynamic variable
 249  * tracking the # of procs required to satisfy all of the remaining minProcs
 250  * demands.
 251  * For fine grain locking to work, the quota check and the reservation of
 252  * a server thread has to come while rxi_availProcs and rxi_minDeficit
 253  * are locked. To this end, the code has been modified under #ifdef
 254  * RX_ENABLE_LOCKS so that quota checks and reservation occur at the
 255  * same time. A new function, ReturnToServerPool() returns the allocation.
 256  *
 257  * A call can be on several queue's (but only one at a time). When
 258  * rxi_ResetCall wants to remove the call from a queue, it has to ensure
 259  * that no one else is touching the queue. To this end, we store the address
 260  * of the queue lock in the call structure (under the call lock) when we
 261  * put the call on a queue, and we clear the call_queue_lock when the
 262  * call is removed from a queue (once the call lock has been obtained).
 263  * This allows rxi_ResetCall to safely synchronize with others wishing
 264  * to manipulate the queue.
 265  */
 266
 267 #ifdef RX_ENABLE_LOCKS
 268 static afs_kmutex_t rx_rpc_stats;
 269 void rxi_StartUnlocked();
 270 #endif
 271
 272 /* We keep a "last conn pointer" in rxi_FindConnection. The odds are
 273 ** pretty good that the next packet coming in is from the same connection
 274 ** as the last packet, since we're send multiple packets in a transmit window.
 275 */
 276 struct rx_connection *rxLastConn = 0;
 277
 278 #ifdef RX_ENABLE_LOCKS
 279 /* The locking hierarchy for rx fine grain locking is composed of these
 280  * tiers:
 281  *
 282  * rx_connHashTable_lock - synchronizes conn creation, rx_connHashTable access
 283  * conn_call_lock - used to synchonize rx_EndCall and rx_NewCall
 284  * call->lock - locks call data fields.
 285  * These are independent of each other:
 286  *      rx_freeCallQueue_lock
 287  *      rxi_keyCreate_lock
 288  * rx_serverPool_lock
 289  * freeSQEList_lock
 290  *
 291  * serverQueueEntry->lock
 292  * rx_rpc_stats
 293  * rx_peerHashTable_lock - locked under rx_connHashTable_lock
 294  * peer->lock - locks peer data fields.
 295  * conn_data_lock - that more than one thread is not updating a conn data
 296  *                  field at the same time.
 297  * rx_freePktQ_lock
 298  *
 299  * lowest level:
 300  *      multi_handle->lock
 301  *      rxevent_lock
 302  *      rx_stats_mutex
 303  *
 304  * Do we need a lock to protect the peer field in the conn structure?
 305  *      conn->peer was previously a constant for all intents and so has no
 306  *      lock protecting this field. The multihomed client delta introduced
 307  *      a RX code change : change the peer field in the connection structure
 308  *      to that remote inetrface from which the last packet for this
 309  *      connection was sent out. This may become an issue if further changes
 310  *      are made.
 311  */
 312 #define SET_CALL_QUEUE_LOCK(C, L) (C)->call_queue_lock = (L)
 313 #define CLEAR_CALL_QUEUE_LOCK(C) (C)->call_queue_lock = NULL
 314 #ifdef RX_LOCKS_DB
 315 /* rxdb_fileID is used to identify the lock location, along with line#. */
 316 static int rxdb_fileID = RXDB_FILE_RX;
 317 #endif /* RX_LOCKS_DB */
 318 #else /* RX_ENABLE_LOCKS */
 319 #define SET_CALL_QUEUE_LOCK(C, L)
 320 #define CLEAR_CALL_QUEUE_LOCK(C)
 321 #endif /* RX_ENABLE_LOCKS */
 322 struct rx_serverQueueEntry *rx_waitForPacket = 0;
 323 struct rx_serverQueueEntry *rx_waitingForPacket = 0;
 324
 325 /* ------------Exported Interfaces------------- */
 326
 327 /* This function allows rxkad to set the epoch to a suitably random number
 328  * which rx_NewConnection will use in the future.  The principle purpose is to
 329  * get rxnull connections to use the same epoch as the rxkad connections do, at
 330  * least once the first rxkad connection is established.  This is important now
 331  * that the host/port addresses aren't used in FindConnection: the uniqueness
 332  * of epoch/cid matters and the start time won't do. */
 333
 334 #ifdef AFS_PTHREAD_ENV
 335 /*
 336  * This mutex protects the following global variables:
 337  * rx_epoch
 338  */
 339
 340 #define LOCK_EPOCH assert(pthread_mutex_lock(&epoch_mutex)==0)
 341 #define UNLOCK_EPOCH assert(pthread_mutex_unlock(&epoch_mutex)==0)
 342 #else
 343 #define LOCK_EPOCH
 344 #define UNLOCK_EPOCH
 345 #endif /* AFS_PTHREAD_ENV */
 346
 347 void
 348 rx_SetEpoch(afs_uint32 epoch)
 349 {
 350     LOCK_EPOCH;
 351     rx_epoch = epoch;
 352     UNLOCK_EPOCH;
 353 }
 354
 355 /* Initialize rx.  A port number may be mentioned, in which case this
 356  * becomes the default port number for any service installed later.
 357  * If 0 is provided for the port number, a random port will be chosen
 358  * by the kernel.  Whether this will ever overlap anything in
 359  * /etc/services is anybody's guess...  Returns 0 on success, -1 on
 360  * error. */
 361 static int rxinit_status = 1;
 362 #ifdef AFS_PTHREAD_ENV
 363 /*
 364  * This mutex protects the following global variables:
 365  * rxinit_status
 366  */
 367
 368 #define LOCK_RX_INIT assert(pthread_mutex_lock(&rx_init_mutex)==0)
 369 #define UNLOCK_RX_INIT assert(pthread_mutex_unlock(&rx_init_mutex)==0)
 370 #else
 371 #define LOCK_RX_INIT
 372 #define UNLOCK_RX_INIT
 373 #endif
 374
 375 int
 376 rx_InitHost(u_int host, u_int port)
 377 {
 378 #ifdef KERNEL
 379     osi_timeval_t tv;
 380 #else /* KERNEL */
 381     struct timeval tv;
 382 #endif /* KERNEL */
 383     char *htable, *ptable;
 384     int tmp_status;
 385
 386 #if defined(AFS_DJGPP_ENV) && !defined(DEBUG)
 387     __djgpp_set_quiet_socket(1);
 388 #endif
 389
 390     SPLVAR;
 391
 392     INIT_PTHREAD_LOCKS;
 393     LOCK_RX_INIT;
 394     if (rxinit_status == 0) {
 395         tmp_status = rxinit_status;
 396         UNLOCK_RX_INIT;
 397         return tmp_status;      /* Already started; return previous error code. */
 398     }
 399 #ifdef AFS_NT40_ENV
 400     if (afs_winsockInit() < 0)
 401         return -1;
 402 #endif
 403
 404 #ifndef KERNEL
 405     /*
 406      * Initialize anything necessary to provide a non-premptive threading
 407      * environment.
 408      */
 409     rxi_InitializeThreadSupport();
 410 #endif
 411
 412     /* Allocate and initialize a socket for client and perhaps server
 413      * connections. */
 414
 415     rx_socket = rxi_GetHostUDPSocket(host, (u_short) port);
 416     if (rx_socket == OSI_NULLSOCKET) {
 417         UNLOCK_RX_INIT;
 418         return RX_ADDRINUSE;
 419     }
 420 #ifdef  RX_ENABLE_LOCKS
 421 #ifdef RX_LOCKS_DB
 422     rxdb_init();
 423 #endif /* RX_LOCKS_DB */
 424     MUTEX_INIT(&rx_stats_mutex, "rx_stats_mutex", MUTEX_DEFAULT, 0);
 425     MUTEX_INIT(&rx_rpc_stats, "rx_rpc_stats", MUTEX_DEFAULT, 0);
 426     MUTEX_INIT(&rx_freePktQ_lock, "rx_freePktQ_lock", MUTEX_DEFAULT, 0);
 427     MUTEX_INIT(&freeSQEList_lock, "freeSQEList lock", MUTEX_DEFAULT, 0);
 428     MUTEX_INIT(&rx_freeCallQueue_lock, "rx_freeCallQueue_lock", MUTEX_DEFAULT,
 429                0);
 430     CV_INIT(&rx_waitingForPackets_cv, "rx_waitingForPackets_cv", CV_DEFAULT,
 431             0);
 432     MUTEX_INIT(&rx_peerHashTable_lock, "rx_peerHashTable_lock", MUTEX_DEFAULT,
 433                0);
 434     MUTEX_INIT(&rx_connHashTable_lock, "rx_connHashTable_lock", MUTEX_DEFAULT,
 435                0);
 436     MUTEX_INIT(&rx_serverPool_lock, "rx_serverPool_lock", MUTEX_DEFAULT, 0);
 437 #ifndef KERNEL
 438     MUTEX_INIT(&rxi_keyCreate_lock, "rxi_keyCreate_lock", MUTEX_DEFAULT, 0);
 439 #endif /* !KERNEL */
 440 #if defined(KERNEL) && defined(AFS_HPUX110_ENV)
 441     if (!uniprocessor)
 442         rx_sleepLock = alloc_spinlock(LAST_HELD_ORDER - 10, "rx_sleepLock");
 443 #endif /* KERNEL && AFS_HPUX110_ENV */
 444 #else /* RX_ENABLE_LOCKS */
 445 #if defined(KERNEL) && defined(AFS_GLOBAL_SUNLOCK) && !defined(AFS_HPUX_ENV) && !defined(AFS_OBSD_ENV)
 446     mutex_init(&afs_rxglobal_lock, "afs_rxglobal_lock", MUTEX_DEFAULT, NULL);
 447 #endif /* AFS_GLOBAL_SUNLOCK */
 448 #endif /* RX_ENABLE_LOCKS */
 449
 450     rxi_nCalls = 0;
 451     rx_connDeadTime = 12;
 452     rx_tranquil = 0;            /* reset flag */
 453     memset((char *)&rx_stats, 0, sizeof(struct rx_stats));
 454     htable = (char *)
 455         osi_Alloc(rx_hashTableSize * sizeof(struct rx_connection *));
 456     PIN(htable, rx_hashTableSize * sizeof(struct rx_connection *));     /* XXXXX */
 457     memset(htable, 0, rx_hashTableSize * sizeof(struct rx_connection *));
 458     ptable = (char *)osi_Alloc(rx_hashTableSize * sizeof(struct rx_peer *));
 459     PIN(ptable, rx_hashTableSize * sizeof(struct rx_peer *));   /* XXXXX */
 460     memset(ptable, 0, rx_hashTableSize * sizeof(struct rx_peer *));
 461
 462     /* Malloc up a bunch of packets & buffers */
 463     rx_nFreePackets = 0;
 464     queue_Init(&rx_freePacketQueue);
 465     rxi_NeedMorePackets = FALSE;
 466 #ifdef RX_ENABLE_TSFPQ
 467     rx_nPackets = 0;    /* in TSFPQ version, rx_nPackets is managed by rxi_MorePackets* */
 468     rxi_MorePacketsTSFPQ(rx_extraPackets + RX_MAX_QUOTA + 2, RX_TS_FPQ_FLUSH_GLOBAL, 0);
 469 #else /* RX_ENABLE_TSFPQ */
 470     rx_nPackets = rx_extraPackets + RX_MAX_QUOTA + 2;   /* fudge */
 471     rxi_MorePackets(rx_nPackets);
 472 #endif /* RX_ENABLE_TSFPQ */
 473     rx_CheckPackets();
 474
 475     NETPRI;
 476
 477     clock_Init();
 478
 479 #if defined(AFS_NT40_ENV) && !defined(AFS_PTHREAD_ENV)
 480     tv.tv_sec = clock_now.sec;
 481     tv.tv_usec = clock_now.usec;
 482     srand((unsigned int)tv.tv_usec);
 483 #else
 484     osi_GetTime(&tv);
 485 #endif
 486     if (port) {
 487         rx_port = port;
 488     } else {
 489 #if defined(KERNEL) && !defined(UKERNEL)
 490         /* Really, this should never happen in a real kernel */
 491         rx_port = 0;
 492 #else
 493         struct sockaddr_in addr;
 494         int addrlen = sizeof(addr);
 495         if (getsockname((int)rx_socket, (struct sockaddr *)&addr, &addrlen)) {
 496             rx_Finalize();
 497             return -1;
 498         }
 499         rx_port = addr.sin_port;
 500 #endif
 501     }
 502     rx_stats.minRtt.sec = 9999999;
 503 #ifdef  KERNEL
 504     rx_SetEpoch(tv.tv_sec | 0x80000000);
 505 #else
 506     rx_SetEpoch(tv.tv_sec);     /* Start time of this package, rxkad
 507                                  * will provide a randomer value. */
 508 #endif
 509     MUTEX_ENTER(&rx_stats_mutex);
 510     rxi_dataQuota += rx_extraQuota;     /* + extra pkts caller asked to rsrv */
 511     MUTEX_EXIT(&rx_stats_mutex);
 512     /* *Slightly* random start time for the cid.  This is just to help
 513      * out with the hashing function at the peer */
 514     rx_nextCid = ((tv.tv_sec ^ tv.tv_usec) << RX_CIDSHIFT);
 515     rx_connHashTable = (struct rx_connection **)htable;
 516     rx_peerHashTable = (struct rx_peer **)ptable;
 517
 518     rx_lastAckDelay.sec = 0;
 519     rx_lastAckDelay.usec = 400000;      /* 400 milliseconds */
 520     rx_hardAckDelay.sec = 0;
 521     rx_hardAckDelay.usec = 100000;      /* 100 milliseconds */
 522     rx_softAckDelay.sec = 0;
 523     rx_softAckDelay.usec = 100000;      /* 100 milliseconds */
 524
 525     rxevent_Init(20, rxi_ReScheduleEvents);
 526
 527     /* Initialize various global queues */
 528     queue_Init(&rx_idleServerQueue);
 529     queue_Init(&rx_incomingCallQueue);
 530     queue_Init(&rx_freeCallQueue);
 531
 532 #if defined(AFS_NT40_ENV) && !defined(KERNEL)
 533     /* Initialize our list of usable IP addresses. */
 534     rx_GetIFInfo();
 535 #endif
 536
 537     /* Start listener process (exact function is dependent on the
 538      * implementation environment--kernel or user space) */
 539     rxi_StartListener();
 540
 541     USERPRI;
 542     tmp_status = rxinit_status = 0;
 543     UNLOCK_RX_INIT;
 544     return tmp_status;
 545 }
 546
 547 int
 548 rx_Init(u_int port)
 549 {
 550     return rx_InitHost(htonl(INADDR_ANY), port);
 551 }
 552
 553 /* called with unincremented nRequestsRunning to see if it is OK to start
 554  * a new thread in this service.  Could be "no" for two reasons: over the
 555  * max quota, or would prevent others from reaching their min quota.
 556  */
 557 #ifdef RX_ENABLE_LOCKS
 558 /* This verion of QuotaOK reserves quota if it's ok while the
 559  * rx_serverPool_lock is held.  Return quota using ReturnToServerPool().
 560  */
 561 static int
 562 QuotaOK(register struct rx_service *aservice)
 563 {
 564     /* check if over max quota */
 565     if (aservice->nRequestsRunning >= aservice->maxProcs) {
 566         return 0;
 567     }
 568
 569     /* under min quota, we're OK */
 570     /* otherwise, can use only if there are enough to allow everyone
 571      * to go to their min quota after this guy starts.
 572      */
 573     MUTEX_ENTER(&rx_stats_mutex);
 574     if ((aservice->nRequestsRunning < aservice->minProcs)
 575         || (rxi_availProcs > rxi_minDeficit)) {
 576         aservice->nRequestsRunning++;
 577         /* just started call in minProcs pool, need fewer to maintain
 578          * guarantee */
 579         if (aservice->nRequestsRunning <= aservice->minProcs)
 580             rxi_minDeficit--;
 581         rxi_availProcs--;
 582         MUTEX_EXIT(&rx_stats_mutex);
 583         return 1;
 584     }
 585     MUTEX_EXIT(&rx_stats_mutex);
 586
 587     return 0;
 588 }
 589
 590 static void
 591 ReturnToServerPool(register struct rx_service *aservice)
 592 {
 593     aservice->nRequestsRunning--;
 594     MUTEX_ENTER(&rx_stats_mutex);
 595     if (aservice->nRequestsRunning < aservice->minProcs)
 596         rxi_minDeficit++;
 597     rxi_availProcs++;
 598     MUTEX_EXIT(&rx_stats_mutex);
 599 }
 600
 601 #else /* RX_ENABLE_LOCKS */
 602 static int
 603 QuotaOK(register struct rx_service *aservice)
 604 {
 605     int rc = 0;
 606     /* under min quota, we're OK */
 607     if (aservice->nRequestsRunning < aservice->minProcs)
 608         return 1;
 609
 610     /* check if over max quota */
 611     if (aservice->nRequestsRunning >= aservice->maxProcs)
 612         return 0;
 613
 614     /* otherwise, can use only if there are enough to allow everyone
 615      * to go to their min quota after this guy starts.
 616      */
 617     if (rxi_availProcs > rxi_minDeficit)
 618         rc = 1;
 619     return rc;
 620 }
 621 #endif /* RX_ENABLE_LOCKS */
 622
 623 #ifndef KERNEL
 624 /* Called by rx_StartServer to start up lwp's to service calls.
 625    NExistingProcs gives the number of procs already existing, and which
 626    therefore needn't be created. */
 627 void
 628 rxi_StartServerProcs(int nExistingProcs)
 629 {
 630     register struct rx_service *service;
 631     register int i;
 632     int maxdiff = 0;
 633     int nProcs = 0;
 634
 635     /* For each service, reserve N processes, where N is the "minimum"
 636      * number of processes that MUST be able to execute a request in parallel,
 637      * at any time, for that process.  Also compute the maximum difference
 638      * between any service's maximum number of processes that can run
 639      * (i.e. the maximum number that ever will be run, and a guarantee
 640      * that this number will run if other services aren't running), and its
 641      * minimum number.  The result is the extra number of processes that
 642      * we need in order to provide the latter guarantee */
 643     for (i = 0; i < RX_MAX_SERVICES; i++) {
 644         int diff;
 645         service = rx_services[i];
 646         if (service == (struct rx_service *)0)
 647             break;
 648         nProcs += service->minProcs;
 649         diff = service->maxProcs - service->minProcs;
 650         if (diff > maxdiff)
 651             maxdiff = diff;
 652     }
 653     nProcs += maxdiff;          /* Extra processes needed to allow max number requested to run in any given service, under good conditions */
 654     nProcs -= nExistingProcs;   /* Subtract the number of procs that were previously created for use as server procs */
 655     for (i = 0; i < nProcs; i++) {
 656         rxi_StartServerProc(rx_ServerProc, rx_stackSize);
 657     }
 658 }
 659 #endif /* KERNEL */
 660
 661 #ifdef AFS_NT40_ENV
 662 /* This routine is only required on Windows */
 663 void
 664 rx_StartClientThread(void)
 665 {
 666 #ifdef AFS_PTHREAD_ENV
 667     int pid;
 668     pid = (int) pthread_self();
 669 #endif /* AFS_PTHREAD_ENV */
 670 }
 671 #endif /* AFS_NT40_ENV */
 672
 673 /* This routine must be called if any services are exported.  If the
 674  * donateMe flag is set, the calling process is donated to the server
 675  * process pool */
 676 void
 677 rx_StartServer(int donateMe)
 678 {
 679     register struct rx_service *service;
 680     register int i;
 681     SPLVAR;
 682     clock_NewTime();
 683
 684     NETPRI;
 685     /* Start server processes, if necessary (exact function is dependent
 686      * on the implementation environment--kernel or user space).  DonateMe
 687      * will be 1 if there is 1 pre-existing proc, i.e. this one.  In this
 688      * case, one less new proc will be created rx_StartServerProcs.
 689      */
 690     rxi_StartServerProcs(donateMe);
 691
 692     /* count up the # of threads in minProcs, and add set the min deficit to
 693      * be that value, too.
 694      */
 695     for (i = 0; i < RX_MAX_SERVICES; i++) {
 696         service = rx_services[i];
 697         if (service == (struct rx_service *)0)
 698             break;
 699         MUTEX_ENTER(&rx_stats_mutex);
 700         rxi_totalMin += service->minProcs;
 701         /* below works even if a thread is running, since minDeficit would
 702          * still have been decremented and later re-incremented.
 703          */
 704         rxi_minDeficit += service->minProcs;
 705         MUTEX_EXIT(&rx_stats_mutex);
 706     }
 707
 708     /* Turn on reaping of idle server connections */
 709     rxi_ReapConnections();
 710
 711     USERPRI;
 712
 713     if (donateMe) {
 714 #ifndef AFS_NT40_ENV
 715 #ifndef KERNEL
 716         char name[32];
 717         static int nProcs;
 718 #ifdef AFS_PTHREAD_ENV
 719         pid_t pid;
 720         pid = (pid_t) pthread_self();
 721 #else /* AFS_PTHREAD_ENV */
 722         PROCESS pid;
 723         LWP_CurrentProcess(&pid);
 724 #endif /* AFS_PTHREAD_ENV */
 725
 726         sprintf(name, "srv_%d", ++nProcs);
 727         if (registerProgram)
 728             (*registerProgram) (pid, name);
 729 #endif /* KERNEL */
 730 #endif /* AFS_NT40_ENV */
 731         rx_ServerProc();        /* Never returns */
 732     }
 733 #ifdef RX_ENABLE_TSFPQ
 734     /* no use leaving packets around in this thread's local queue if
 735      * it isn't getting donated to the server thread pool.
 736      */
 737     rxi_FlushLocalPacketsTSFPQ();
 738 #endif /* RX_ENABLE_TSFPQ */
 739     return;
 740 }
 741
 742 /* Create a new client connection to the specified service, using the
 743  * specified security object to implement the security model for this
 744  * connection. */
 745 struct rx_connection *
 746 rx_NewConnection(register afs_uint32 shost, u_short sport, u_short sservice,
 747                  register struct rx_securityClass *securityObject,
 748                  int serviceSecurityIndex)
 749 {
 750     int hashindex;
 751     afs_int32 cid;
 752     register struct rx_connection *conn;
 753
 754     SPLVAR;
 755
 756     clock_NewTime();
 757     dpf(("rx_NewConnection(host %x, port %u, service %u, securityObject %x, serviceSecurityIndex %d)\n", shost, sport, sservice, securityObject, serviceSecurityIndex));
 758
 759     /* Vasilsi said: "NETPRI protects Cid and Alloc", but can this be true in
 760      * the case of kmem_alloc? */
 761     conn = rxi_AllocConnection();
 762 #ifdef  RX_ENABLE_LOCKS
 763     MUTEX_INIT(&conn->conn_call_lock, "conn call lock", MUTEX_DEFAULT, 0);
 764     MUTEX_INIT(&conn->conn_data_lock, "conn call lock", MUTEX_DEFAULT, 0);
 765     CV_INIT(&conn->conn_call_cv, "conn call cv", CV_DEFAULT, 0);
 766 #endif
 767     NETPRI;
 768     MUTEX_ENTER(&rx_connHashTable_lock);
 769     cid = (rx_nextCid += RX_MAXCALLS);
 770     conn->type = RX_CLIENT_CONNECTION;
 771     conn->cid = cid;
 772     conn->epoch = rx_epoch;
 773     conn->peer = rxi_FindPeer(shost, sport, 0, 1);
 774     conn->serviceId = sservice;
 775     conn->securityObject = securityObject;
 776     /* This doesn't work in all compilers with void (they're buggy), so fake it
 777      * with VOID */
 778     conn->securityData = (VOID *) 0;
 779     conn->securityIndex = serviceSecurityIndex;
 780     rx_SetConnDeadTime(conn, rx_connDeadTime);
 781     conn->ackRate = RX_FAST_ACK_RATE;
 782     conn->nSpecific = 0;
 783     conn->specific = NULL;
 784     conn->challengeEvent = NULL;
 785     conn->delayedAbortEvent = NULL;
 786     conn->abortCount = 0;
 787     conn->error = 0;
 788
 789     RXS_NewConnection(securityObject, conn);
 790     hashindex =
 791         CONN_HASH(shost, sport, conn->cid, conn->epoch, RX_CLIENT_CONNECTION);
 792
 793     conn->refCount++;           /* no lock required since only this thread knows... */
 794     conn->next = rx_connHashTable[hashindex];
 795     rx_connHashTable[hashindex] = conn;
 796     MUTEX_ENTER(&rx_stats_mutex);
 797     rx_stats.nClientConns++;
 798     MUTEX_EXIT(&rx_stats_mutex);
 799
 800     MUTEX_EXIT(&rx_connHashTable_lock);
 801     USERPRI;
 802     return conn;
 803 }
 804
 805 void
 806 rx_SetConnDeadTime(register struct rx_connection *conn, register int seconds)
 807 {
 808     /* The idea is to set the dead time to a value that allows several
 809      * keepalives to be dropped without timing out the connection. */
 810     conn->secondsUntilDead = MAX(seconds, 6);
 811     conn->secondsUntilPing = conn->secondsUntilDead / 6;
 812 }
 813
 814 int rxi_lowPeerRefCount = 0;
 815 int rxi_lowConnRefCount = 0;
 816
 817 /*
 818  * Cleanup a connection that was destroyed in rxi_DestroyConnectioNoLock.
 819  * NOTE: must not be called with rx_connHashTable_lock held.
 820  */
 821 void
 822 rxi_CleanupConnection(struct rx_connection *conn)
 823 {
 824     /* Notify the service exporter, if requested, that this connection
 825      * is being destroyed */
 826     if (conn->type == RX_SERVER_CONNECTION && conn->service->destroyConnProc)
 827         (*conn->service->destroyConnProc) (conn);
 828
 829     /* Notify the security module that this connection is being destroyed */
 830     RXS_DestroyConnection(conn->securityObject, conn);
 831
 832     /* If this is the last connection using the rx_peer struct, set its
 833      * idle time to now. rxi_ReapConnections will reap it if it's still
 834      * idle (refCount == 0) after rx_idlePeerTime (60 seconds) have passed.
 835      */
 836     MUTEX_ENTER(&rx_peerHashTable_lock);
 837     if (conn->peer->refCount < 2) {
 838         conn->peer->idleWhen = clock_Sec();
 839         if (conn->peer->refCount < 1) {
 840             conn->peer->refCount = 1;
 841             MUTEX_ENTER(&rx_stats_mutex);
 842             rxi_lowPeerRefCount++;
 843             MUTEX_EXIT(&rx_stats_mutex);
 844         }
 845     }
 846     conn->peer->refCount--;
 847     MUTEX_EXIT(&rx_peerHashTable_lock);
 848
 849     MUTEX_ENTER(&rx_stats_mutex);
 850     if (conn->type == RX_SERVER_CONNECTION)
 851         rx_stats.nServerConns--;
 852     else
 853         rx_stats.nClientConns--;
 854     MUTEX_EXIT(&rx_stats_mutex);
 855
 856 #ifndef KERNEL
 857     if (conn->specific) {
 858         int i;
 859         for (i = 0; i < conn->nSpecific; i++) {
 860             if (conn->specific[i] && rxi_keyCreate_destructor[i])
 861                 (*rxi_keyCreate_destructor[i]) (conn->specific[i]);
 862             conn->specific[i] = NULL;
 863         }
 864         free(conn->specific);
 865     }
 866     conn->specific = NULL;
 867     conn->nSpecific = 0;
 868 #endif /* !KERNEL */
 869
 870     MUTEX_DESTROY(&conn->conn_call_lock);
 871     MUTEX_DESTROY(&conn->conn_data_lock);
 872     CV_DESTROY(&conn->conn_call_cv);
 873
 874     rxi_FreeConnection(conn);
 875 }
 876
 877 /* Destroy the specified connection */
 878 void
 879 rxi_DestroyConnection(register struct rx_connection *conn)
 880 {
 881     MUTEX_ENTER(&rx_connHashTable_lock);
 882     rxi_DestroyConnectionNoLock(conn);
 883     /* conn should be at the head of the cleanup list */
 884     if (conn == rx_connCleanup_list) {
 885         rx_connCleanup_list = rx_connCleanup_list->next;
 886         MUTEX_EXIT(&rx_connHashTable_lock);
 887         rxi_CleanupConnection(conn);
 888     }
 889 #ifdef RX_ENABLE_LOCKS
 890     else {
 891         MUTEX_EXIT(&rx_connHashTable_lock);
 892     }
 893 #endif /* RX_ENABLE_LOCKS */
 894 }
 895
 896 static void
 897 rxi_DestroyConnectionNoLock(register struct rx_connection *conn)
 898 {
 899     register struct rx_connection **conn_ptr;
 900     register int havecalls = 0;
 901     struct rx_packet *packet;
 902     int i;
 903     SPLVAR;
 904
 905     clock_NewTime();
 906
 907     NETPRI;
 908     MUTEX_ENTER(&conn->conn_data_lock);
 909     if (conn->refCount > 0)
 910         conn->refCount--;
 911     else {
 912         MUTEX_ENTER(&rx_stats_mutex);
 913         rxi_lowConnRefCount++;
 914         MUTEX_EXIT(&rx_stats_mutex);
 915     }
 916
 917     if ((conn->refCount > 0) || (conn->flags & RX_CONN_BUSY)) {
 918         /* Busy; wait till the last guy before proceeding */
 919         MUTEX_EXIT(&conn->conn_data_lock);
 920         USERPRI;
 921         return;
 922     }
 923
 924     /* If the client previously called rx_NewCall, but it is still
 925      * waiting, treat this as a running call, and wait to destroy the
 926      * connection later when the call completes. */
 927     if ((conn->type == RX_CLIENT_CONNECTION)
 928         && (conn->flags & RX_CONN_MAKECALL_WAITING)) {
 929         conn->flags |= RX_CONN_DESTROY_ME;
 930         MUTEX_EXIT(&conn->conn_data_lock);
 931         USERPRI;
 932         return;
 933     }
 934     MUTEX_EXIT(&conn->conn_data_lock);
 935
 936     /* Check for extant references to this connection */
 937     for (i = 0; i < RX_MAXCALLS; i++) {
 938         register struct rx_call *call = conn->call[i];
 939         if (call) {
 940             havecalls = 1;
 941             if (conn->type == RX_CLIENT_CONNECTION) {
 942                 MUTEX_ENTER(&call->lock);
 943                 if (call->delayedAckEvent) {
 944                     /* Push the final acknowledgment out now--there
 945                      * won't be a subsequent call to acknowledge the
 946                      * last reply packets */
 947                     rxevent_Cancel(call->delayedAckEvent, call,
 948                                    RX_CALL_REFCOUNT_DELAY);
 949                     if (call->state == RX_STATE_PRECALL
 950                         || call->state == RX_STATE_ACTIVE) {
 951                         rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
 952                     } else {
 953                         rxi_AckAll(NULL, call, 0);
 954                     }
 955                 }
 956                 MUTEX_EXIT(&call->lock);
 957             }
 958         }
 959     }
 960 #ifdef RX_ENABLE_LOCKS
 961     if (!havecalls) {
 962         if (MUTEX_TRYENTER(&conn->conn_data_lock)) {
 963             MUTEX_EXIT(&conn->conn_data_lock);
 964         } else {
 965             /* Someone is accessing a packet right now. */
 966             havecalls = 1;
 967         }
 968     }
 969 #endif /* RX_ENABLE_LOCKS */
 970
 971     if (havecalls) {
 972         /* Don't destroy the connection if there are any call
 973          * structures still in use */
 974         MUTEX_ENTER(&conn->conn_data_lock);
 975         conn->flags |= RX_CONN_DESTROY_ME;
 976         MUTEX_EXIT(&conn->conn_data_lock);
 977         USERPRI;
 978         return;
 979     }
 980
 981     if (conn->delayedAbortEvent) {
 982         rxevent_Cancel(conn->delayedAbortEvent, (struct rx_call *)0, 0);
 983         packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
 984         if (packet) {
 985             MUTEX_ENTER(&conn->conn_data_lock);
 986             rxi_SendConnectionAbort(conn, packet, 0, 1);
 987             MUTEX_EXIT(&conn->conn_data_lock);
 988             rxi_FreePacket(packet);
 989         }
 990     }
 991
 992     /* Remove from connection hash table before proceeding */
 993     conn_ptr =
 994         &rx_connHashTable[CONN_HASH
 995                           (peer->host, peer->port, conn->cid, conn->epoch,
 996                            conn->type)];
 997     for (; *conn_ptr; conn_ptr = &(*conn_ptr)->next) {
 998         if (*conn_ptr == conn) {
 999             *conn_ptr = conn->next;
1000             break;
1001         }
1002     }
1003     /* if the conn that we are destroying was the last connection, then we
1004      * clear rxLastConn as well */
1005     if (rxLastConn == conn)
1006         rxLastConn = 0;
1007
1008     /* Make sure the connection is completely reset before deleting it. */
1009     /* get rid of pending events that could zap us later */
1010     if (conn->challengeEvent)
1011         rxevent_Cancel(conn->challengeEvent, (struct rx_call *)0, 0);
1012     if (conn->checkReachEvent)
1013         rxevent_Cancel(conn->checkReachEvent, (struct rx_call *)0, 0);
1014
1015     /* Add the connection to the list of destroyed connections that
1016      * need to be cleaned up. This is necessary to avoid deadlocks
1017      * in the routines we call to inform others that this connection is
1018      * being destroyed. */
1019     conn->next = rx_connCleanup_list;
1020     rx_connCleanup_list = conn;
1021 }
1022
1023 /* Externally available version */
1024 void
1025 rx_DestroyConnection(register struct rx_connection *conn)
1026 {
1027     SPLVAR;
1028
1029     NETPRI;
1030     rxi_DestroyConnection(conn);
1031     USERPRI;
1032 }
1033
1034 void
1035 rx_GetConnection(register struct rx_connection *conn)
1036 {
1037     SPLVAR;
1038
1039     NETPRI;
1040     MUTEX_ENTER(&conn->conn_data_lock);
1041     conn->refCount++;
1042     MUTEX_EXIT(&conn->conn_data_lock);
1043     USERPRI;
1044 }
1045
1046 /* Start a new rx remote procedure call, on the specified connection.
1047  * If wait is set to 1, wait for a free call channel; otherwise return
1048  * 0.  Maxtime gives the maximum number of seconds this call may take,
1049  * after rx_MakeCall returns.  After this time interval, a call to any
1050  * of rx_SendData, rx_ReadData, etc. will fail with RX_CALL_TIMEOUT.
1051  * For fine grain locking, we hold the conn_call_lock in order to
1052  * to ensure that we don't get signalle after we found a call in an active
1053  * state and before we go to sleep.
1054  */
1055 struct rx_call *
1056 rx_NewCall(register struct rx_connection *conn)
1057 {
1058     register int i;
1059     register struct rx_call *call;
1060     struct clock queueTime;
1061     SPLVAR;
1062
1063     clock_NewTime();
1064     dpf(("rx_MakeCall(conn %x)\n", conn));
1065
1066     NETPRI;
1067     clock_GetTime(&queueTime);
1068     MUTEX_ENTER(&conn->conn_call_lock);
1069
1070     /*
1071      * Check if there are others waiting for a new call.
1072      * If so, let them go first to avoid starving them.
1073      * This is a fairly simple scheme, and might not be
1074      * a complete solution for large numbers of waiters.
1075      *
1076      * makeCallWaiters keeps track of the number of
1077      * threads waiting to make calls and the
1078      * RX_CONN_MAKECALL_WAITING flag bit is used to
1079      * indicate that there are indeed calls waiting.
1080      * The flag is set when the waiter is incremented.
1081      * It is only cleared in rx_EndCall when
1082      * makeCallWaiters is 0.  This prevents us from
1083      * accidently destroying the connection while it
1084      * is potentially about to be used.
1085      */
1086     MUTEX_ENTER(&conn->conn_data_lock);
1087     if (conn->makeCallWaiters) {
1088         conn->flags |= RX_CONN_MAKECALL_WAITING;
1089         conn->makeCallWaiters++;
1090         MUTEX_EXIT(&conn->conn_data_lock);
1091
1092 #ifdef  RX_ENABLE_LOCKS
1093         CV_WAIT(&conn->conn_call_cv, &conn->conn_call_lock);
1094 #else
1095         osi_rxSleep(conn);
1096 #endif
1097         MUTEX_ENTER(&conn->conn_data_lock);
1098         conn->makeCallWaiters--;
1099     }
1100     MUTEX_EXIT(&conn->conn_data_lock);
1101
1102     for (;;) {
1103         for (i = 0; i < RX_MAXCALLS; i++) {
1104             call = conn->call[i];
1105             if (call) {
1106                 MUTEX_ENTER(&call->lock);
1107                 if (call->state == RX_STATE_DALLY) {
1108                     rxi_ResetCall(call, 0);
1109                     (*call->callNumber)++;
1110                     break;
1111                 }
1112                 MUTEX_EXIT(&call->lock);
1113             } else {
1114                 call = rxi_NewCall(conn, i);
1115                 break;
1116             }
1117         }
1118         if (i < RX_MAXCALLS) {
1119             break;
1120         }
1121         MUTEX_ENTER(&conn->conn_data_lock);
1122         conn->flags |= RX_CONN_MAKECALL_WAITING;
1123         conn->makeCallWaiters++;
1124         MUTEX_EXIT(&conn->conn_data_lock);
1125
1126 #ifdef  RX_ENABLE_LOCKS
1127         CV_WAIT(&conn->conn_call_cv, &conn->conn_call_lock);
1128 #else
1129         osi_rxSleep(conn);
1130 #endif
1131         MUTEX_ENTER(&conn->conn_data_lock);
1132         conn->makeCallWaiters--;
1133         MUTEX_EXIT(&conn->conn_data_lock);
1134     }
1135     /*
1136      * Wake up anyone else who might be giving us a chance to
1137      * run (see code above that avoids resource starvation).
1138      */
1139 #ifdef  RX_ENABLE_LOCKS
1140     CV_BROADCAST(&conn->conn_call_cv);
1141 #else
1142     osi_rxWakeup(conn);
1143 #endif
1144
1145     CALL_HOLD(call, RX_CALL_REFCOUNT_BEGIN);
1146
1147     /* Client is initially in send mode */
1148     call->state = RX_STATE_ACTIVE;
1149     call->error = conn->error;
1150     if (call->error)
1151         call->mode = RX_MODE_ERROR;
1152     else
1153         call->mode = RX_MODE_SENDING;
1154
1155     /* remember start time for call in case we have hard dead time limit */
1156     call->queueTime = queueTime;
1157     clock_GetTime(&call->startTime);
1158     hzero(call->bytesSent);
1159     hzero(call->bytesRcvd);
1160
1161     /* Turn on busy protocol. */
1162     rxi_KeepAliveOn(call);
1163
1164     MUTEX_EXIT(&call->lock);
1165     MUTEX_EXIT(&conn->conn_call_lock);
1166     USERPRI;
1167
1168 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
1169     /* Now, if TQ wasn't cleared earlier, do it now. */
1170     MUTEX_ENTER(&call->lock);
1171     while (call->flags & RX_CALL_TQ_BUSY) {
1172         call->flags |= RX_CALL_TQ_WAIT;
1173         call->tqWaiters++;
1174 #ifdef RX_ENABLE_LOCKS
1175         osirx_AssertMine(&call->lock, "rxi_Start lock4");
1176         CV_WAIT(&call->cv_tq, &call->lock);
1177 #else /* RX_ENABLE_LOCKS */
1178         osi_rxSleep(&call->tq);
1179 #endif /* RX_ENABLE_LOCKS */
1180         call->tqWaiters--;
1181         if (call->tqWaiters == 0) {
1182             call->flags &= ~RX_CALL_TQ_WAIT;
1183         }
1184     }
1185     if (call->flags & RX_CALL_TQ_CLEARME) {
1186         rxi_ClearTransmitQueue(call, 0);
1187         queue_Init(&call->tq);
1188     }
1189     MUTEX_EXIT(&call->lock);
1190 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
1191
1192     return call;
1193 }
1194
1195 int
1196 rxi_HasActiveCalls(register struct rx_connection *aconn)
1197 {
1198     register int i;
1199     register struct rx_call *tcall;
1200     SPLVAR;
1201
1202     NETPRI;
1203     for (i = 0; i < RX_MAXCALLS; i++) {
1204         if ((tcall = aconn->call[i])) {
1205             if ((tcall->state == RX_STATE_ACTIVE)
1206                 || (tcall->state == RX_STATE_PRECALL)) {
1207                 USERPRI;
1208                 return 1;
1209             }
1210         }
1211     }
1212     USERPRI;
1213     return 0;
1214 }
1215
1216 int
1217 rxi_GetCallNumberVector(register struct rx_connection *aconn,
1218                         register afs_int32 * aint32s)
1219 {
1220     register int i;
1221     register struct rx_call *tcall;
1222     SPLVAR;
1223
1224     NETPRI;
1225     for (i = 0; i < RX_MAXCALLS; i++) {
1226         if ((tcall = aconn->call[i]) && (tcall->state == RX_STATE_DALLY))
1227             aint32s[i] = aconn->callNumber[i] + 1;
1228         else
1229             aint32s[i] = aconn->callNumber[i];
1230     }
1231     USERPRI;
1232     return 0;
1233 }
1234
1235 int
1236 rxi_SetCallNumberVector(register struct rx_connection *aconn,
1237                         register afs_int32 * aint32s)
1238 {
1239     register int i;
1240     register struct rx_call *tcall;
1241     SPLVAR;
1242
1243     NETPRI;
1244     for (i = 0; i < RX_MAXCALLS; i++) {
1245         if ((tcall = aconn->call[i]) && (tcall->state == RX_STATE_DALLY))
1246             aconn->callNumber[i] = aint32s[i] - 1;
1247         else
1248             aconn->callNumber[i] = aint32s[i];
1249     }
1250     USERPRI;
1251     return 0;
1252 }
1253
1254 /* Advertise a new service.  A service is named locally by a UDP port
1255  * number plus a 16-bit service id.  Returns (struct rx_service *) 0
1256  * on a failure.
1257  *
1258      char *serviceName;  Name for identification purposes (e.g. the
1259                          service name might be used for probing for
1260                          statistics) */
1261 struct rx_service *
1262 rx_NewService(u_short port, u_short serviceId, char *serviceName,
1263               struct rx_securityClass **securityObjects, int nSecurityObjects,
1264               afs_int32(*serviceProc) (struct rx_call * acall))
1265 {
1266     osi_socket socket = OSI_NULLSOCKET;
1267     register struct rx_service *tservice;
1268     register int i;
1269     SPLVAR;
1270
1271     clock_NewTime();
1272
1273     if (serviceId == 0) {
1274         (osi_Msg
1275          "rx_NewService:  service id for service %s is not non-zero.\n",
1276          serviceName);
1277         return 0;
1278     }
1279     if (port == 0) {
1280         if (rx_port == 0) {
1281             (osi_Msg
1282              "rx_NewService: A non-zero port must be specified on this call if a non-zero port was not provided at Rx initialization (service %s).\n",
1283              serviceName);
1284             return 0;
1285         }
1286         port = rx_port;
1287         socket = rx_socket;
1288     }
1289
1290     tservice = rxi_AllocService();
1291     NETPRI;
1292     for (i = 0; i < RX_MAX_SERVICES; i++) {
1293         register struct rx_service *service = rx_services[i];
1294         if (service) {
1295             if (port == service->servicePort) {
1296                 if (service->serviceId == serviceId) {
1297                     /* The identical service has already been
1298                      * installed; if the caller was intending to
1299                      * change the security classes used by this
1300                      * service, he/she loses. */
1301                     (osi_Msg
1302                      "rx_NewService: tried to install service %s with service id %d, which is already in use for service %s\n",
1303                      serviceName, serviceId, service->serviceName);
1304                     USERPRI;
1305                     rxi_FreeService(tservice);
1306                     return service;
1307                 }
1308                 /* Different service, same port: re-use the socket
1309                  * which is bound to the same port */
1310                 socket = service->socket;
1311             }
1312         } else {
1313             if (socket == OSI_NULLSOCKET) {
1314                 /* If we don't already have a socket (from another
1315                  * service on same port) get a new one */
1316                 socket = rxi_GetHostUDPSocket(htonl(INADDR_ANY), port);
1317                 if (socket == OSI_NULLSOCKET) {
1318                     USERPRI;
1319                     rxi_FreeService(tservice);
1320                     return 0;
1321                 }
1322             }
1323             service = tservice;
1324             service->socket = socket;
1325             service->servicePort = port;
1326             service->serviceId = serviceId;
1327             service->serviceName = serviceName;
1328             service->nSecurityObjects = nSecurityObjects;
1329             service->securityObjects = securityObjects;
1330             service->minProcs = 0;
1331             service->maxProcs = 1;
1332             service->idleDeadTime = 60;
1333             service->connDeadTime = rx_connDeadTime;
1334             service->executeRequestProc = serviceProc;
1335             service->checkReach = 0;
1336             rx_services[i] = service;   /* not visible until now */
1337             USERPRI;
1338             return service;
1339         }
1340     }
1341     USERPRI;
1342     rxi_FreeService(tservice);
1343     (osi_Msg "rx_NewService: cannot support > %d services\n",
1344      RX_MAX_SERVICES);
1345     return 0;
1346 }
1347
1348 /* Generic request processing loop. This routine should be called
1349  * by the implementation dependent rx_ServerProc. If socketp is
1350  * non-null, it will be set to the file descriptor that this thread
1351  * is now listening on. If socketp is null, this routine will never
1352  * returns. */
1353 void
1354 rxi_ServerProc(int threadID, struct rx_call *newcall, osi_socket * socketp)
1355 {
1356     register struct rx_call *call;
1357     register afs_int32 code;
1358     register struct rx_service *tservice = NULL;
1359
1360     for (;;) {
1361         if (newcall) {
1362             call = newcall;
1363             newcall = NULL;
1364         } else {
1365             call = rx_GetCall(threadID, tservice, socketp);
1366             if (socketp && *socketp != OSI_NULLSOCKET) {
1367                 /* We are now a listener thread */
1368                 return;
1369             }
1370         }
1371
1372         /* if server is restarting( typically smooth shutdown) then do not
1373          * allow any new calls.
1374          */
1375
1376         if (rx_tranquil && (call != NULL)) {
1377             SPLVAR;
1378
1379             NETPRI;
1380             MUTEX_ENTER(&call->lock);
1381
1382             rxi_CallError(call, RX_RESTARTING);
1383             rxi_SendCallAbort(call, (struct rx_packet *)0, 0, 0);
1384
1385             MUTEX_EXIT(&call->lock);
1386             USERPRI;
1387         }
1388 #ifdef  KERNEL
1389         if (afs_termState == AFSOP_STOP_RXCALLBACK) {
1390 #ifdef RX_ENABLE_LOCKS
1391             AFS_GLOCK();
1392 #endif /* RX_ENABLE_LOCKS */
1393             afs_termState = AFSOP_STOP_AFS;
1394             afs_osi_Wakeup(&afs_termState);
1395 #ifdef RX_ENABLE_LOCKS
1396             AFS_GUNLOCK();
1397 #endif /* RX_ENABLE_LOCKS */
1398             return;
1399         }
1400 #endif
1401
1402         tservice = call->conn->service;
1403
1404         if (tservice->beforeProc)
1405             (*tservice->beforeProc) (call);
1406
1407         code = call->conn->service->executeRequestProc(call);
1408
1409         if (tservice->afterProc)
1410             (*tservice->afterProc) (call, code);
1411
1412         rx_EndCall(call, code);
1413         MUTEX_ENTER(&rx_stats_mutex);
1414         rxi_nCalls++;
1415         MUTEX_EXIT(&rx_stats_mutex);
1416     }
1417 }
1418
1419
1420 void
1421 rx_WakeupServerProcs(void)
1422 {
1423     struct rx_serverQueueEntry *np, *tqp;
1424     SPLVAR;
1425
1426     NETPRI;
1427     MUTEX_ENTER(&rx_serverPool_lock);
1428
1429 #ifdef RX_ENABLE_LOCKS
1430     if (rx_waitForPacket)
1431         CV_BROADCAST(&rx_waitForPacket->cv);
1432 #else /* RX_ENABLE_LOCKS */
1433     if (rx_waitForPacket)
1434         osi_rxWakeup(rx_waitForPacket);
1435 #endif /* RX_ENABLE_LOCKS */
1436     MUTEX_ENTER(&freeSQEList_lock);
1437     for (np = rx_FreeSQEList; np; np = tqp) {
1438         tqp = *(struct rx_serverQueueEntry **)np;
1439 #ifdef RX_ENABLE_LOCKS
1440         CV_BROADCAST(&np->cv);
1441 #else /* RX_ENABLE_LOCKS */
1442         osi_rxWakeup(np);
1443 #endif /* RX_ENABLE_LOCKS */
1444     }
1445     MUTEX_EXIT(&freeSQEList_lock);
1446     for (queue_Scan(&rx_idleServerQueue, np, tqp, rx_serverQueueEntry)) {
1447 #ifdef RX_ENABLE_LOCKS
1448         CV_BROADCAST(&np->cv);
1449 #else /* RX_ENABLE_LOCKS */
1450         osi_rxWakeup(np);
1451 #endif /* RX_ENABLE_LOCKS */
1452     }
1453     MUTEX_EXIT(&rx_serverPool_lock);
1454     USERPRI;
1455 }
1456
1457 /* meltdown:
1458  * One thing that seems to happen is that all the server threads get
1459  * tied up on some empty or slow call, and then a whole bunch of calls
1460  * arrive at once, using up the packet pool, so now there are more
1461  * empty calls.  The most critical resources here are server threads
1462  * and the free packet pool.  The "doreclaim" code seems to help in
1463  * general.  I think that eventually we arrive in this state: there
1464  * are lots of pending calls which do have all their packets present,
1465  * so they won't be reclaimed, are multi-packet calls, so they won't
1466  * be scheduled until later, and thus are tying up most of the free
1467  * packet pool for a very long time.
1468  * future options:
1469  * 1.  schedule multi-packet calls if all the packets are present.
1470  * Probably CPU-bound operation, useful to return packets to pool.
1471  * Do what if there is a full window, but the last packet isn't here?
1472  * 3.  preserve one thread which *only* runs "best" calls, otherwise
1473  * it sleeps and waits for that type of call.
1474  * 4.  Don't necessarily reserve a whole window for each thread.  In fact,
1475  * the current dataquota business is badly broken.  The quota isn't adjusted
1476  * to reflect how many packets are presently queued for a running call.
1477  * So, when we schedule a queued call with a full window of packets queued
1478  * up for it, that *should* free up a window full of packets for other 2d-class
1479  * calls to be able to use from the packet pool.  But it doesn't.
1480  *
1481  * NB.  Most of the time, this code doesn't run -- since idle server threads
1482  * sit on the idle server queue and are assigned by "...ReceivePacket" as soon
1483  * as a new call arrives.
1484  */
1485 /* Sleep until a call arrives.  Returns a pointer to the call, ready
1486  * for an rx_Read. */
1487 #ifdef RX_ENABLE_LOCKS
1488 struct rx_call *
1489 rx_GetCall(int tno, struct rx_service *cur_service, osi_socket * socketp)
1490 {
1491     struct rx_serverQueueEntry *sq;
1492     register struct rx_call *call = (struct rx_call *)0;
1493     struct rx_service *service = NULL;
1494     SPLVAR;
1495
1496     MUTEX_ENTER(&freeSQEList_lock);
1497
1498     if ((sq = rx_FreeSQEList)) {
1499         rx_FreeSQEList = *(struct rx_serverQueueEntry **)sq;
1500         MUTEX_EXIT(&freeSQEList_lock);
1501     } else {                    /* otherwise allocate a new one and return that */
1502         MUTEX_EXIT(&freeSQEList_lock);
1503         sq = (struct rx_serverQueueEntry *)
1504             rxi_Alloc(sizeof(struct rx_serverQueueEntry));
1505         MUTEX_INIT(&sq->lock, "server Queue lock", MUTEX_DEFAULT, 0);
1506         CV_INIT(&sq->cv, "server Queue lock", CV_DEFAULT, 0);
1507     }
1508
1509     MUTEX_ENTER(&rx_serverPool_lock);
1510     if (cur_service != NULL) {
1511         ReturnToServerPool(cur_service);
1512     }
1513     while (1) {
1514         if (queue_IsNotEmpty(&rx_incomingCallQueue)) {
1515             register struct rx_call *tcall, *ncall, *choice2 = NULL;
1516
1517             /* Scan for eligible incoming calls.  A call is not eligible
1518              * if the maximum number of calls for its service type are
1519              * already executing */
1520             /* One thread will process calls FCFS (to prevent starvation),
1521              * while the other threads may run ahead looking for calls which
1522              * have all their input data available immediately.  This helps
1523              * keep threads from blocking, waiting for data from the client. */
1524             for (queue_Scan(&rx_incomingCallQueue, tcall, ncall, rx_call)) {
1525                 service = tcall->conn->service;
1526                 if (!QuotaOK(service)) {
1527                     continue;
1528                 }
1529                 if (tno == rxi_fcfs_thread_num
1530                     || !tcall->queue_item_header.next) {
1531                     /* If we're the fcfs thread , then  we'll just use
1532                      * this call. If we haven't been able to find an optimal
1533                      * choice, and we're at the end of the list, then use a
1534                      * 2d choice if one has been identified.  Otherwise... */
1535                     call = (choice2 ? choice2 : tcall);
1536                     service = call->conn->service;
1537                 } else if (!queue_IsEmpty(&tcall->rq)) {
1538                     struct rx_packet *rp;
1539                     rp = queue_First(&tcall->rq, rx_packet);
1540                     if (rp->header.seq == 1) {
1541                         if (!meltdown_1pkt
1542                             || (rp->header.flags & RX_LAST_PACKET)) {
1543                             call = tcall;
1544                         } else if (rxi_2dchoice && !choice2
1545                                    && !(tcall->flags & RX_CALL_CLEARED)
1546                                    && (tcall->rprev > rxi_HardAckRate)) {
1547                             choice2 = tcall;
1548                         } else
1549                             rxi_md2cnt++;
1550                     }
1551                 }
1552                 if (call) {
1553                     break;
1554                 } else {
1555                     ReturnToServerPool(service);
1556                 }
1557             }
1558         }
1559
1560         if (call) {
1561             queue_Remove(call);
1562             MUTEX_EXIT(&rx_serverPool_lock);
1563             MUTEX_ENTER(&call->lock);
1564
1565             if (call->flags & RX_CALL_WAIT_PROC) {
1566                 call->flags &= ~RX_CALL_WAIT_PROC;
1567                 MUTEX_ENTER(&rx_stats_mutex);
1568                 rx_nWaiting--;
1569                 MUTEX_EXIT(&rx_stats_mutex);
1570             }
1571
1572             if (call->state != RX_STATE_PRECALL || call->error) {
1573                 MUTEX_EXIT(&call->lock);
1574                 MUTEX_ENTER(&rx_serverPool_lock);
1575                 ReturnToServerPool(service);
1576                 call = NULL;
1577                 continue;
1578             }
1579
1580             if (queue_IsEmpty(&call->rq)
1581                 || queue_First(&call->rq, rx_packet)->header.seq != 1)
1582                 rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
1583
1584             CLEAR_CALL_QUEUE_LOCK(call);
1585             break;
1586         } else {
1587             /* If there are no eligible incoming calls, add this process
1588              * to the idle server queue, to wait for one */
1589             sq->newcall = 0;
1590             sq->tno = tno;
1591             if (socketp) {
1592                 *socketp = OSI_NULLSOCKET;
1593             }
1594             sq->socketp = socketp;
1595             queue_Append(&rx_idleServerQueue, sq);
1596 #ifndef AFS_AIX41_ENV
1597             rx_waitForPacket = sq;
1598 #else
1599             rx_waitingForPacket = sq;
1600 #endif /* AFS_AIX41_ENV */
1601             do {
1602                 CV_WAIT(&sq->cv, &rx_serverPool_lock);
1603 #ifdef  KERNEL
1604                 if (afs_termState == AFSOP_STOP_RXCALLBACK) {
1605                     MUTEX_EXIT(&rx_serverPool_lock);
1606                     return (struct rx_call *)0;
1607                 }
1608 #endif
1609             } while (!(call = sq->newcall)
1610                      && !(socketp && *socketp != OSI_NULLSOCKET));
1611             MUTEX_EXIT(&rx_serverPool_lock);
1612             if (call) {
1613                 MUTEX_ENTER(&call->lock);
1614             }
1615             break;
1616         }
1617     }
1618
1619     MUTEX_ENTER(&freeSQEList_lock);
1620     *(struct rx_serverQueueEntry **)sq = rx_FreeSQEList;
1621     rx_FreeSQEList = sq;
1622     MUTEX_EXIT(&freeSQEList_lock);
1623
1624     if (call) {
1625         clock_GetTime(&call->startTime);
1626         call->state = RX_STATE_ACTIVE;
1627         call->mode = RX_MODE_RECEIVING;
1628 #ifdef RX_KERNEL_TRACE
1629         if (ICL_SETACTIVE(afs_iclSetp)) {
1630             int glockOwner = ISAFS_GLOCK();
1631             if (!glockOwner)
1632                 AFS_GLOCK();
1633             afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
1634                        __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER,
1635                        call);
1636             if (!glockOwner)
1637                 AFS_GUNLOCK();
1638         }
1639 #endif
1640
1641         rxi_calltrace(RX_CALL_START, call);
1642         dpf(("rx_GetCall(port=%d, service=%d) ==> call %x\n",
1643              call->conn->service->servicePort, call->conn->service->serviceId,
1644              call));
1645
1646         CALL_HOLD(call, RX_CALL_REFCOUNT_BEGIN);
1647         MUTEX_EXIT(&call->lock);
1648     } else {
1649         dpf(("rx_GetCall(socketp=0x%x, *socketp=0x%x)\n", socketp, *socketp));
1650     }
1651
1652     return call;
1653 }
1654 #else /* RX_ENABLE_LOCKS */
1655 struct rx_call *
1656 rx_GetCall(int tno, struct rx_service *cur_service, osi_socket * socketp)
1657 {
1658     struct rx_serverQueueEntry *sq;
1659     register struct rx_call *call = (struct rx_call *)0, *choice2;
1660     struct rx_service *service = NULL;
1661     SPLVAR;
1662
1663     NETPRI;
1664     MUTEX_ENTER(&freeSQEList_lock);
1665
1666     if ((sq = rx_FreeSQEList)) {
1667         rx_FreeSQEList = *(struct rx_serverQueueEntry **)sq;
1668         MUTEX_EXIT(&freeSQEList_lock);
1669     } else {                    /* otherwise allocate a new one and return that */
1670         MUTEX_EXIT(&freeSQEList_lock);
1671         sq = (struct rx_serverQueueEntry *)
1672             rxi_Alloc(sizeof(struct rx_serverQueueEntry));
1673         MUTEX_INIT(&sq->lock, "server Queue lock", MUTEX_DEFAULT, 0);
1674         CV_INIT(&sq->cv, "server Queue lock", CV_DEFAULT, 0);
1675     }
1676     MUTEX_ENTER(&sq->lock);
1677
1678     if (cur_service != NULL) {
1679         cur_service->nRequestsRunning--;
1680         if (cur_service->nRequestsRunning < cur_service->minProcs)
1681             rxi_minDeficit++;
1682         rxi_availProcs++;
1683     }
1684     if (queue_IsNotEmpty(&rx_incomingCallQueue)) {
1685         register struct rx_call *tcall, *ncall;
1686         /* Scan for eligible incoming calls.  A call is not eligible
1687          * if the maximum number of calls for its service type are
1688          * already executing */
1689         /* One thread will process calls FCFS (to prevent starvation),
1690          * while the other threads may run ahead looking for calls which
1691          * have all their input data available immediately.  This helps
1692          * keep threads from blocking, waiting for data from the client. */
1693         choice2 = (struct rx_call *)0;
1694         for (queue_Scan(&rx_incomingCallQueue, tcall, ncall, rx_call)) {
1695             service = tcall->conn->service;
1696             if (QuotaOK(service)) {
1697                 if (tno == rxi_fcfs_thread_num
1698                     || !tcall->queue_item_header.next) {
1699                     /* If we're the fcfs thread, then  we'll just use
1700                      * this call. If we haven't been able to find an optimal
1701                      * choice, and we're at the end of the list, then use a
1702                      * 2d choice if one has been identified.  Otherwise... */
1703                     call = (choice2 ? choice2 : tcall);
1704                     service = call->conn->service;
1705                 } else if (!queue_IsEmpty(&tcall->rq)) {
1706                     struct rx_packet *rp;
1707                     rp = queue_First(&tcall->rq, rx_packet);
1708                     if (rp->header.seq == 1
1709                         && (!meltdown_1pkt
1710                             || (rp->header.flags & RX_LAST_PACKET))) {
1711                         call = tcall;
1712                     } else if (rxi_2dchoice && !choice2
1713                                && !(tcall->flags & RX_CALL_CLEARED)
1714                                && (tcall->rprev > rxi_HardAckRate)) {
1715                         choice2 = tcall;
1716                     } else
1717                         rxi_md2cnt++;
1718                 }
1719             }
1720             if (call)
1721                 break;
1722         }
1723     }
1724
1725     if (call) {
1726         queue_Remove(call);
1727         /* we can't schedule a call if there's no data!!! */
1728         /* send an ack if there's no data, if we're missing the
1729          * first packet, or we're missing something between first
1730          * and last -- there's a "hole" in the incoming data. */
1731         if (queue_IsEmpty(&call->rq)
1732             || queue_First(&call->rq, rx_packet)->header.seq != 1
1733             || call->rprev != queue_Last(&call->rq, rx_packet)->header.seq)
1734             rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
1735
1736         call->flags &= (~RX_CALL_WAIT_PROC);
1737         service->nRequestsRunning++;
1738         /* just started call in minProcs pool, need fewer to maintain
1739          * guarantee */
1740         if (service->nRequestsRunning <= service->minProcs)
1741             rxi_minDeficit--;
1742         rxi_availProcs--;
1743         rx_nWaiting--;
1744         /* MUTEX_EXIT(&call->lock); */
1745     } else {
1746         /* If there are no eligible incoming calls, add this process
1747          * to the idle server queue, to wait for one */
1748         sq->newcall = 0;
1749         if (socketp) {
1750             *socketp = OSI_NULLSOCKET;
1751         }
1752         sq->socketp = socketp;
1753         queue_Append(&rx_idleServerQueue, sq);
1754         do {
1755             osi_rxSleep(sq);
1756 #ifdef  KERNEL
1757             if (afs_termState == AFSOP_STOP_RXCALLBACK) {
1758                 USERPRI;
1759                 rxi_Free(sq, sizeof(struct rx_serverQueueEntry));
1760                 return (struct rx_call *)0;
1761             }
1762 #endif
1763         } while (!(call = sq->newcall)
1764                  && !(socketp && *socketp != OSI_NULLSOCKET));
1765     }
1766     MUTEX_EXIT(&sq->lock);
1767
1768     MUTEX_ENTER(&freeSQEList_lock);
1769     *(struct rx_serverQueueEntry **)sq = rx_FreeSQEList;
1770     rx_FreeSQEList = sq;
1771     MUTEX_EXIT(&freeSQEList_lock);
1772
1773     if (call) {
1774         clock_GetTime(&call->startTime);
1775         call->state = RX_STATE_ACTIVE;
1776         call->mode = RX_MODE_RECEIVING;
1777 #ifdef RX_KERNEL_TRACE
1778         if (ICL_SETACTIVE(afs_iclSetp)) {
1779             int glockOwner = ISAFS_GLOCK();
1780             if (!glockOwner)
1781                 AFS_GLOCK();
1782             afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
1783                        __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER,
1784                        call);
1785             if (!glockOwner)
1786                 AFS_GUNLOCK();
1787         }
1788 #endif
1789
1790         rxi_calltrace(RX_CALL_START, call);
1791         dpf(("rx_GetCall(port=%d, service=%d) ==> call %x\n",
1792              call->conn->service->servicePort, call->conn->service->serviceId,
1793              call));
1794     } else {
1795         dpf(("rx_GetCall(socketp=0x%x, *socketp=0x%x)\n", socketp, *socketp));
1796     }
1797
1798     USERPRI;
1799
1800     return call;
1801 }
1802 #endif /* RX_ENABLE_LOCKS */
1803
1804
1805
1806 /* Establish a procedure to be called when a packet arrives for a
1807  * call.  This routine will be called at most once after each call,
1808  * and will also be called if there is an error condition on the or
1809  * the call is complete.  Used by multi rx to build a selection
1810  * function which determines which of several calls is likely to be a
1811  * good one to read from.
1812  * NOTE: the way this is currently implemented it is probably only a
1813  * good idea to (1) use it immediately after a newcall (clients only)
1814  * and (2) only use it once.  Other uses currently void your warranty
1815  */
1816 void
1817 rx_SetArrivalProc(register struct rx_call *call,
1818                   register void (*proc) (register struct rx_call * call,
1819                                         register VOID * mh,
1820                                         register int index),
1821                   register VOID * handle, register int arg)
1822 {
1823     call->arrivalProc = proc;
1824     call->arrivalProcHandle = handle;
1825     call->arrivalProcArg = arg;
1826 }
1827
1828 /* Call is finished (possibly prematurely).  Return rc to the peer, if
1829  * appropriate, and return the final error code from the conversation
1830  * to the caller */
1831
1832 afs_int32
1833 rx_EndCall(register struct rx_call *call, afs_int32 rc)
1834 {
1835     register struct rx_connection *conn = call->conn;
1836     register struct rx_service *service;
1837     register struct rx_packet *tp;      /* Temporary packet pointer */
1838     register struct rx_packet *nxp;     /* Next packet pointer, for queue_Scan */
1839     afs_int32 error;
1840     SPLVAR;
1841
1842     dpf(("rx_EndCall(call %x)\n", call));
1843
1844     NETPRI;
1845     MUTEX_ENTER(&call->lock);
1846
1847     if (rc == 0 && call->error == 0) {
1848         call->abortCode = 0;
1849         call->abortCount = 0;
1850     }
1851
1852     call->arrivalProc = (void (*)())0;
1853     if (rc && call->error == 0) {
1854         rxi_CallError(call, rc);
1855         /* Send an abort message to the peer if this error code has
1856          * only just been set.  If it was set previously, assume the
1857          * peer has already been sent the error code or will request it
1858          */
1859         rxi_SendCallAbort(call, (struct rx_packet *)0, 0, 0);
1860     }
1861     if (conn->type == RX_SERVER_CONNECTION) {
1862         /* Make sure reply or at least dummy reply is sent */
1863         if (call->mode == RX_MODE_RECEIVING) {
1864             rxi_WriteProc(call, 0, 0);
1865         }
1866         if (call->mode == RX_MODE_SENDING) {
1867             rxi_FlushWrite(call);
1868         }
1869         service = conn->service;
1870         rxi_calltrace(RX_CALL_END, call);
1871         /* Call goes to hold state until reply packets are acknowledged */
1872         if (call->tfirst + call->nSoftAcked < call->tnext) {
1873             call->state = RX_STATE_HOLD;
1874         } else {
1875             call->state = RX_STATE_DALLY;
1876             rxi_ClearTransmitQueue(call, 0);
1877             rxevent_Cancel(call->resendEvent, call, RX_CALL_REFCOUNT_RESEND);
1878             rxevent_Cancel(call->keepAliveEvent, call,
1879                            RX_CALL_REFCOUNT_ALIVE);
1880         }
1881     } else {                    /* Client connection */
1882         char dummy;
1883         /* Make sure server receives input packets, in the case where
1884          * no reply arguments are expected */
1885         if ((call->mode == RX_MODE_SENDING)
1886             || (call->mode == RX_MODE_RECEIVING && call->rnext == 1)) {
1887             (void)rxi_ReadProc(call, &dummy, 1);
1888         }
1889
1890         /* If we had an outstanding delayed ack, be nice to the server
1891          * and force-send it now.
1892          */
1893         if (call->delayedAckEvent) {
1894             rxevent_Cancel(call->delayedAckEvent, call,
1895                            RX_CALL_REFCOUNT_DELAY);
1896             call->delayedAckEvent = NULL;
1897             rxi_SendDelayedAck(NULL, call, NULL);
1898         }
1899
1900         /* We need to release the call lock since it's lower than the
1901          * conn_call_lock and we don't want to hold the conn_call_lock
1902          * over the rx_ReadProc call. The conn_call_lock needs to be held
1903          * here for the case where rx_NewCall is perusing the calls on
1904          * the connection structure. We don't want to signal until
1905          * rx_NewCall is in a stable state. Otherwise, rx_NewCall may
1906          * have checked this call, found it active and by the time it
1907          * goes to sleep, will have missed the signal.
1908          *
1909          * Do not clear the RX_CONN_MAKECALL_WAITING flag as long as
1910          * there are threads waiting to use the conn object.
1911          */
1912         MUTEX_EXIT(&call->lock);
1913         MUTEX_ENTER(&conn->conn_call_lock);
1914         MUTEX_ENTER(&call->lock);
1915         MUTEX_ENTER(&conn->conn_data_lock);
1916         conn->flags |= RX_CONN_BUSY;
1917         if (conn->flags & RX_CONN_MAKECALL_WAITING) {
1918             if (conn->makeCallWaiters == 0)
1919                 conn->flags &= (~RX_CONN_MAKECALL_WAITING);
1920             MUTEX_EXIT(&conn->conn_data_lock);
1921 #ifdef  RX_ENABLE_LOCKS
1922             CV_BROADCAST(&conn->conn_call_cv);
1923 #else
1924             osi_rxWakeup(conn);
1925 #endif
1926         }
1927 #ifdef RX_ENABLE_LOCKS
1928         else {
1929             MUTEX_EXIT(&conn->conn_data_lock);
1930         }
1931 #endif /* RX_ENABLE_LOCKS */
1932         call->state = RX_STATE_DALLY;
1933     }
1934     error = call->error;
1935
1936     /* currentPacket, nLeft, and NFree must be zeroed here, because
1937      * ResetCall cannot: ResetCall may be called at splnet(), in the
1938      * kernel version, and may interrupt the macros rx_Read or
1939      * rx_Write, which run at normal priority for efficiency. */
1940     if (call->currentPacket) {
1941         queue_Prepend(&call->iovq, call->currentPacket);
1942         call->currentPacket = (struct rx_packet *)0;
1943     }
1944
1945     call->nLeft = call->nFree = call->curlen = 0;
1946
1947     /* Free any packets from the last call to ReadvProc/WritevProc */
1948     rxi_FreePackets(0, &call->iovq);
1949
1950     CALL_RELE(call, RX_CALL_REFCOUNT_BEGIN);
1951     MUTEX_EXIT(&call->lock);
1952     if (conn->type == RX_CLIENT_CONNECTION) {
1953         MUTEX_EXIT(&conn->conn_call_lock);
1954         conn->flags &= ~RX_CONN_BUSY;
1955     }
1956     USERPRI;
1957     /*
1958      * Map errors to the local host's errno.h format.
1959      */
1960     error = ntoh_syserr_conv(error);
1961     return error;
1962 }
1963
1964 #if !defined(KERNEL)
1965
1966 /* Call this routine when shutting down a server or client (especially
1967  * clients).  This will allow Rx to gracefully garbage collect server
1968  * connections, and reduce the number of retries that a server might
1969  * make to a dead client.
1970  * This is not quite right, since some calls may still be ongoing and
1971  * we can't lock them to destroy them. */
1972 void
1973 rx_Finalize(void)
1974 {
1975     register struct rx_connection **conn_ptr, **conn_end;
1976
1977     INIT_PTHREAD_LOCKS;
1978     LOCK_RX_INIT;
1979     if (rxinit_status == 1) {
1980         UNLOCK_RX_INIT;
1981         return;                 /* Already shutdown. */
1982     }
1983     rxi_DeleteCachedConnections();
1984     if (rx_connHashTable) {
1985         MUTEX_ENTER(&rx_connHashTable_lock);
1986         for (conn_ptr = &rx_connHashTable[0], conn_end =
1987              &rx_connHashTable[rx_hashTableSize]; conn_ptr < conn_end;
1988              conn_ptr++) {
1989             struct rx_connection *conn, *next;
1990             for (conn = *conn_ptr; conn; conn = next) {
1991                 next = conn->next;
1992                 if (conn->type == RX_CLIENT_CONNECTION) {
1993                     /* MUTEX_ENTER(&conn->conn_data_lock); when used in kernel */
1994                     conn->refCount++;
1995                     /* MUTEX_EXIT(&conn->conn_data_lock); when used in kernel */
1996 #ifdef RX_ENABLE_LOCKS
1997                     rxi_DestroyConnectionNoLock(conn);
1998 #else /* RX_ENABLE_LOCKS */
1999                     rxi_DestroyConnection(conn);
2000 #endif /* RX_ENABLE_LOCKS */
2001                 }
2002             }
2003         }
2004 #ifdef RX_ENABLE_LOCKS
2005         while (rx_connCleanup_list) {
2006             struct rx_connection *conn;
2007             conn = rx_connCleanup_list;
2008             rx_connCleanup_list = rx_connCleanup_list->next;
2009             MUTEX_EXIT(&rx_connHashTable_lock);
2010             rxi_CleanupConnection(conn);
2011             MUTEX_ENTER(&rx_connHashTable_lock);
2012         }
2013         MUTEX_EXIT(&rx_connHashTable_lock);
2014 #endif /* RX_ENABLE_LOCKS */
2015     }
2016     rxi_flushtrace();
2017
2018     rxinit_status = 1;
2019     UNLOCK_RX_INIT;
2020 }
2021 #endif
2022
2023 /* if we wakeup packet waiter too often, can get in loop with two
2024     AllocSendPackets each waking each other up (from ReclaimPacket calls) */
2025 void
2026 rxi_PacketsUnWait(void)
2027 {
2028     if (!rx_waitingForPackets) {
2029         return;
2030     }
2031 #ifdef KERNEL
2032     if (rxi_OverQuota(RX_PACKET_CLASS_SEND)) {
2033         return;                 /* still over quota */
2034     }
2035 #endif /* KERNEL */
2036     rx_waitingForPackets = 0;
2037 #ifdef  RX_ENABLE_LOCKS
2038     CV_BROADCAST(&rx_waitingForPackets_cv);
2039 #else
2040     osi_rxWakeup(&rx_waitingForPackets);
2041 #endif
2042     return;
2043 }
2044
2045
2046 /* ------------------Internal interfaces------------------------- */
2047
2048 /* Return this process's service structure for the
2049  * specified socket and service */
2050 struct rx_service *
2051 rxi_FindService(register osi_socket socket, register u_short serviceId)
2052 {
2053     register struct rx_service **sp;
2054     for (sp = &rx_services[0]; *sp; sp++) {
2055         if ((*sp)->serviceId == serviceId && (*sp)->socket == socket)
2056             return *sp;
2057     }
2058     return 0;
2059 }
2060
2061 /* Allocate a call structure, for the indicated channel of the
2062  * supplied connection.  The mode and state of the call must be set by
2063  * the caller. Returns the call with mutex locked. */
2064 struct rx_call *
2065 rxi_NewCall(register struct rx_connection *conn, register int channel)
2066 {
2067     register struct rx_call *call;
2068 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
2069     register struct rx_call *cp;        /* Call pointer temp */
2070     register struct rx_call *nxp;       /* Next call pointer, for queue_Scan */
2071 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2072
2073     /* Grab an existing call structure, or allocate a new one.
2074      * Existing call structures are assumed to have been left reset by
2075      * rxi_FreeCall */
2076     MUTEX_ENTER(&rx_freeCallQueue_lock);
2077
2078 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
2079     /*
2080      * EXCEPT that the TQ might not yet be cleared out.
2081      * Skip over those with in-use TQs.
2082      */
2083     call = NULL;
2084     for (queue_Scan(&rx_freeCallQueue, cp, nxp, rx_call)) {
2085         if (!(cp->flags & RX_CALL_TQ_BUSY)) {
2086             call = cp;
2087             break;
2088         }
2089     }
2090     if (call) {
2091 #else /* AFS_GLOBAL_RXLOCK_KERNEL */
2092     if (queue_IsNotEmpty(&rx_freeCallQueue)) {
2093         call = queue_First(&rx_freeCallQueue, rx_call);
2094 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2095         queue_Remove(call);
2096         MUTEX_ENTER(&rx_stats_mutex);
2097         rx_stats.nFreeCallStructs--;
2098         MUTEX_EXIT(&rx_stats_mutex);
2099         MUTEX_EXIT(&rx_freeCallQueue_lock);
2100         MUTEX_ENTER(&call->lock);
2101         CLEAR_CALL_QUEUE_LOCK(call);
2102 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
2103         /* Now, if TQ wasn't cleared earlier, do it now. */
2104         if (call->flags & RX_CALL_TQ_CLEARME) {
2105             rxi_ClearTransmitQueue(call, 0);
2106             queue_Init(&call->tq);
2107         }
2108 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2109         /* Bind the call to its connection structure */
2110         call->conn = conn;
2111         rxi_ResetCall(call, 1);
2112     } else {
2113         call = (struct rx_call *)rxi_Alloc(sizeof(struct rx_call));
2114
2115         MUTEX_EXIT(&rx_freeCallQueue_lock);
2116         MUTEX_INIT(&call->lock, "call lock", MUTEX_DEFAULT, NULL);
2117         MUTEX_ENTER(&call->lock);
2118         CV_INIT(&call->cv_twind, "call twind", CV_DEFAULT, 0);
2119         CV_INIT(&call->cv_rq, "call rq", CV_DEFAULT, 0);
2120         CV_INIT(&call->cv_tq, "call tq", CV_DEFAULT, 0);
2121
2122         MUTEX_ENTER(&rx_stats_mutex);
2123         rx_stats.nCallStructs++;
2124         MUTEX_EXIT(&rx_stats_mutex);
2125         /* Initialize once-only items */
2126         queue_Init(&call->tq);
2127         queue_Init(&call->rq);
2128         queue_Init(&call->iovq);
2129         /* Bind the call to its connection structure (prereq for reset) */
2130         call->conn = conn;
2131         rxi_ResetCall(call, 1);
2132     }
2133     call->channel = channel;
2134     call->callNumber = &conn->callNumber[channel];
2135     /* Note that the next expected call number is retained (in
2136      * conn->callNumber[i]), even if we reallocate the call structure
2137      */
2138     conn->call[channel] = call;
2139     /* if the channel's never been used (== 0), we should start at 1, otherwise
2140      * the call number is valid from the last time this channel was used */
2141     if (*call->callNumber == 0)
2142         *call->callNumber = 1;
2143
2144     return call;
2145 }
2146
2147 /* A call has been inactive long enough that so we can throw away
2148  * state, including the call structure, which is placed on the call
2149  * free list.
2150  * Call is locked upon entry.
2151  * haveCTLock set if called from rxi_ReapConnections
2152  */
2153 #ifdef RX_ENABLE_LOCKS
2154 void
2155 rxi_FreeCall(register struct rx_call *call, int haveCTLock)
2156 #else /* RX_ENABLE_LOCKS */
2157 void
2158 rxi_FreeCall(register struct rx_call *call)
2159 #endif                          /* RX_ENABLE_LOCKS */
2160 {
2161     register int channel = call->channel;
2162     register struct rx_connection *conn = call->conn;
2163
2164
2165     if (call->state == RX_STATE_DALLY || call->state == RX_STATE_HOLD)
2166         (*call->callNumber)++;
2167     rxi_ResetCall(call, 0);
2168     call->conn->call[channel] = (struct rx_call *)0;
2169
2170     MUTEX_ENTER(&rx_freeCallQueue_lock);
2171     SET_CALL_QUEUE_LOCK(call, &rx_freeCallQueue_lock);
2172 #ifdef AFS_GLOBAL_RXLOCK_KERNEL
2173     /* A call may be free even though its transmit queue is still in use.
2174      * Since we search the call list from head to tail, put busy calls at
2175      * the head of the list, and idle calls at the tail.
2176      */
2177     if (call->flags & RX_CALL_TQ_BUSY)
2178         queue_Prepend(&rx_freeCallQueue, call);
2179     else
2180         queue_Append(&rx_freeCallQueue, call);
2181 #else /* AFS_GLOBAL_RXLOCK_KERNEL */
2182     queue_Append(&rx_freeCallQueue, call);
2183 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2184     MUTEX_ENTER(&rx_stats_mutex);
2185     rx_stats.nFreeCallStructs++;
2186     MUTEX_EXIT(&rx_stats_mutex);
2187
2188     MUTEX_EXIT(&rx_freeCallQueue_lock);
2189
2190     /* Destroy the connection if it was previously slated for
2191      * destruction, i.e. the Rx client code previously called
2192      * rx_DestroyConnection (client connections), or
2193      * rxi_ReapConnections called the same routine (server
2194      * connections).  Only do this, however, if there are no
2195      * outstanding calls. Note that for fine grain locking, there appears
2196      * to be a deadlock in that rxi_FreeCall has a call locked and
2197      * DestroyConnectionNoLock locks each call in the conn. But note a
2198      * few lines up where we have removed this call from the conn.
2199      * If someone else destroys a connection, they either have no
2200      * call lock held or are going through this section of code.
2201      */
2202     if (conn->flags & RX_CONN_DESTROY_ME && !(conn->flags & RX_CONN_MAKECALL_WAITING)) {
2203         MUTEX_ENTER(&conn->conn_data_lock);
2204         conn->refCount++;
2205         MUTEX_EXIT(&conn->conn_data_lock);
2206 #ifdef RX_ENABLE_LOCKS
2207         if (haveCTLock)
2208             rxi_DestroyConnectionNoLock(conn);
2209         else
2210             rxi_DestroyConnection(conn);
2211 #else /* RX_ENABLE_LOCKS */
2212         rxi_DestroyConnection(conn);
2213 #endif /* RX_ENABLE_LOCKS */
2214     }
2215 }
2216
2217 afs_int32 rxi_Alloccnt = 0, rxi_Allocsize = 0;
2218 char *
2219 rxi_Alloc(register size_t size)
2220 {
2221     register char *p;
2222
2223     MUTEX_ENTER(&rx_stats_mutex);
2224     rxi_Alloccnt++;
2225     rxi_Allocsize += size;
2226     MUTEX_EXIT(&rx_stats_mutex);
2227
2228     p = (char *)osi_Alloc(size);
2229
2230     if (!p)
2231         osi_Panic("rxi_Alloc error");
2232     memset(p, 0, size);
2233     return p;
2234 }
2235
2236 void
2237 rxi_Free(void *addr, register size_t size)
2238 {
2239     MUTEX_ENTER(&rx_stats_mutex);
2240     rxi_Alloccnt--;
2241     rxi_Allocsize -= size;
2242     MUTEX_EXIT(&rx_stats_mutex);
2243
2244     osi_Free(addr, size);
2245 }
2246
2247 /* Find the peer process represented by the supplied (host,port)
2248  * combination.  If there is no appropriate active peer structure, a
2249  * new one will be allocated and initialized
2250  * The origPeer, if set, is a pointer to a peer structure on which the
2251  * refcount will be be decremented. This is used to replace the peer
2252  * structure hanging off a connection structure */
2253 struct rx_peer *
2254 rxi_FindPeer(register afs_uint32 host, register u_short port,
2255              struct rx_peer *origPeer, int create)
2256 {
2257     register struct rx_peer *pp;
2258     int hashIndex;
2259     hashIndex = PEER_HASH(host, port);
2260     MUTEX_ENTER(&rx_peerHashTable_lock);
2261     for (pp = rx_peerHashTable[hashIndex]; pp; pp = pp->next) {
2262         if ((pp->host == host) && (pp->port == port))
2263             break;
2264     }
2265     if (!pp) {
2266         if (create) {
2267             pp = rxi_AllocPeer();       /* This bzero's *pp */
2268             pp->host = host;    /* set here or in InitPeerParams is zero */
2269             pp->port = port;
2270             MUTEX_INIT(&pp->peer_lock, "peer_lock", MUTEX_DEFAULT, 0);
2271             queue_Init(&pp->congestionQueue);
2272             queue_Init(&pp->rpcStats);
2273             pp->next = rx_peerHashTable[hashIndex];
2274             rx_peerHashTable[hashIndex] = pp;
2275             rxi_InitPeerParams(pp);
2276             MUTEX_ENTER(&rx_stats_mutex);
2277             rx_stats.nPeerStructs++;
2278             MUTEX_EXIT(&rx_stats_mutex);
2279         }
2280     }
2281     if (pp && create) {
2282         pp->refCount++;
2283     }
2284     if (origPeer)
2285         origPeer->refCount--;
2286     MUTEX_EXIT(&rx_peerHashTable_lock);
2287     return pp;
2288 }
2289
2290
2291 /* Find the connection at (host, port) started at epoch, and with the
2292  * given connection id.  Creates the server connection if necessary.
2293  * The type specifies whether a client connection or a server
2294  * connection is desired.  In both cases, (host, port) specify the
2295  * peer's (host, pair) pair.  Client connections are not made
2296  * automatically by this routine.  The parameter socket gives the
2297  * socket descriptor on which the packet was received.  This is used,
2298  * in the case of server connections, to check that *new* connections
2299  * come via a valid (port, serviceId).  Finally, the securityIndex
2300  * parameter must match the existing index for the connection.  If a
2301  * server connection is created, it will be created using the supplied
2302  * index, if the index is valid for this service */
2303 struct rx_connection *
2304 rxi_FindConnection(osi_socket socket, register afs_int32 host,
2305                    register u_short port, u_short serviceId, afs_uint32 cid,
2306                    afs_uint32 epoch, int type, u_int securityIndex)
2307 {
2308     int hashindex, flag;
2309     register struct rx_connection *conn;
2310     hashindex = CONN_HASH(host, port, cid, epoch, type);
2311     MUTEX_ENTER(&rx_connHashTable_lock);
2312     rxLastConn ? (conn = rxLastConn, flag = 0) : (conn =
2313                                                   rx_connHashTable[hashindex],
2314                                                   flag = 1);
2315     for (; conn;) {
2316         if ((conn->type == type) && ((cid & RX_CIDMASK) == conn->cid)
2317             && (epoch == conn->epoch)) {
2318             register struct rx_peer *pp = conn->peer;
2319             if (securityIndex != conn->securityIndex) {
2320                 /* this isn't supposed to happen, but someone could forge a packet
2321                  * like this, and there seems to be some CM bug that makes this
2322                  * happen from time to time -- in which case, the fileserver
2323                  * asserts. */
2324                 MUTEX_EXIT(&rx_connHashTable_lock);
2325                 return (struct rx_connection *)0;
2326             }
2327             if (pp->host == host && pp->port == port)
2328                 break;
2329             if (type == RX_CLIENT_CONNECTION && pp->port == port)
2330                 break;
2331             /* So what happens when it's a callback connection? */
2332             if (                /*type == RX_CLIENT_CONNECTION && */
2333                    (conn->epoch & 0x80000000))
2334                 break;
2335         }
2336         if (!flag) {
2337             /* the connection rxLastConn that was used the last time is not the
2338              ** one we are looking for now. Hence, start searching in the hash */
2339             flag = 1;
2340             conn = rx_connHashTable[hashindex];
2341         } else
2342             conn = conn->next;
2343     }
2344     if (!conn) {
2345         struct rx_service *service;
2346         if (type == RX_CLIENT_CONNECTION) {
2347             MUTEX_EXIT(&rx_connHashTable_lock);
2348             return (struct rx_connection *)0;
2349         }
2350         service = rxi_FindService(socket, serviceId);
2351         if (!service || (securityIndex >= service->nSecurityObjects)
2352             || (service->securityObjects[securityIndex] == 0)) {
2353             MUTEX_EXIT(&rx_connHashTable_lock);
2354             return (struct rx_connection *)0;
2355         }
2356         conn = rxi_AllocConnection();   /* This bzero's the connection */
2357         MUTEX_INIT(&conn->conn_call_lock, "conn call lock", MUTEX_DEFAULT, 0);
2358         MUTEX_INIT(&conn->conn_data_lock, "conn data lock", MUTEX_DEFAULT, 0);
2359         CV_INIT(&conn->conn_call_cv, "conn call cv", CV_DEFAULT, 0);
2360         conn->next = rx_connHashTable[hashindex];
2361         rx_connHashTable[hashindex] = conn;
2362         conn->peer = rxi_FindPeer(host, port, 0, 1);
2363         conn->type = RX_SERVER_CONNECTION;
2364         conn->lastSendTime = clock_Sec();       /* don't GC immediately */
2365         conn->epoch = epoch;
2366         conn->cid = cid & RX_CIDMASK;
2367         /* conn->serial = conn->lastSerial = 0; */
2368         /* conn->timeout = 0; */
2369         conn->ackRate = RX_FAST_ACK_RATE;
2370         conn->service = service;
2371         conn->serviceId = serviceId;
2372         conn->securityIndex = securityIndex;
2373         conn->securityObject = service->securityObjects[securityIndex];
2374         conn->nSpecific = 0;
2375         conn->specific = NULL;
2376         rx_SetConnDeadTime(conn, service->connDeadTime);
2377         rx_SetConnIdleDeadTime(conn, service->idleDeadTime);
2378         /* Notify security object of the new connection */
2379         RXS_NewConnection(conn->securityObject, conn);
2380         /* XXXX Connection timeout? */
2381         if (service->newConnProc)
2382             (*service->newConnProc) (conn);
2383         MUTEX_ENTER(&rx_stats_mutex);
2384         rx_stats.nServerConns++;
2385         MUTEX_EXIT(&rx_stats_mutex);
2386     }
2387
2388     MUTEX_ENTER(&conn->conn_data_lock);
2389     conn->refCount++;
2390     MUTEX_EXIT(&conn->conn_data_lock);
2391
2392     rxLastConn = conn;          /* store this connection as the last conn used */
2393     MUTEX_EXIT(&rx_connHashTable_lock);
2394     return conn;
2395 }
2396
2397 /* There are two packet tracing routines available for testing and monitoring
2398  * Rx.  One is called just after every packet is received and the other is
2399  * called just before every packet is sent.  Received packets, have had their
2400  * headers decoded, and packets to be sent have not yet had their headers
2401  * encoded.  Both take two parameters: a pointer to the packet and a sockaddr
2402  * containing the network address.  Both can be modified.  The return value, if
2403  * non-zero, indicates that the packet should be dropped.  */
2404
2405 int (*rx_justReceived) () = 0;
2406 int (*rx_almostSent) () = 0;
2407
2408 /* A packet has been received off the interface.  Np is the packet, socket is
2409  * the socket number it was received from (useful in determining which service
2410  * this packet corresponds to), and (host, port) reflect the host,port of the
2411  * sender.  This call returns the packet to the caller if it is finished with
2412  * it, rather than de-allocating it, just as a small performance hack */
2413
2414 struct rx_packet *
2415 rxi_ReceivePacket(register struct rx_packet *np, osi_socket socket,
2416                   afs_uint32 host, u_short port, int *tnop,
2417                   struct rx_call **newcallp)
2418 {
2419     register struct rx_call *call;
2420     register struct rx_connection *conn;
2421     int channel;
2422     afs_uint32 currentCallNumber;
2423     int type;
2424     int skew;
2425 #ifdef RXDEBUG
2426     char *packetType;
2427 #endif
2428     struct rx_packet *tnp;
2429
2430 #ifdef RXDEBUG
2431 /* We don't print out the packet until now because (1) the time may not be
2432  * accurate enough until now in the lwp implementation (rx_Listener only gets
2433  * the time after the packet is read) and (2) from a protocol point of view,
2434  * this is the first time the packet has been seen */
2435     packetType = (np->header.type > 0 && np->header.type < RX_N_PACKET_TYPES)
2436         ? rx_packetTypes[np->header.type - 1] : "*UNKNOWN*";
2437     dpf(("R %d %s: %x.%d.%d.%d.%d.%d.%d flags %d, packet %x",
2438          np->header.serial, packetType, host, port, np->header.serviceId,
2439          np->header.epoch, np->header.cid, np->header.callNumber,
2440          np->header.seq, np->header.flags, np));
2441 #endif
2442
2443     if (np->header.type == RX_PACKET_TYPE_VERSION) {
2444         return rxi_ReceiveVersionPacket(np, socket, host, port, 1);
2445     }
2446
2447     if (np->header.type == RX_PACKET_TYPE_DEBUG) {
2448         return rxi_ReceiveDebugPacket(np, socket, host, port, 1);
2449     }
2450 #ifdef RXDEBUG
2451     /* If an input tracer function is defined, call it with the packet and
2452      * network address.  Note this function may modify its arguments. */
2453     if (rx_justReceived) {
2454         struct sockaddr_in addr;
2455         int drop;
2456         addr.sin_family = AF_INET;
2457         addr.sin_port = port;
2458         addr.sin_addr.s_addr = host;
2459 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
2460         addr.sin_len = sizeof(addr);
2461 #endif /* AFS_OSF_ENV */
2462         drop = (*rx_justReceived) (np, &addr);
2463         /* drop packet if return value is non-zero */
2464         if (drop)
2465             return np;
2466         port = addr.sin_port;   /* in case fcn changed addr */
2467         host = addr.sin_addr.s_addr;
2468     }
2469 #endif
2470
2471     /* If packet was not sent by the client, then *we* must be the client */
2472     type = ((np->header.flags & RX_CLIENT_INITIATED) != RX_CLIENT_INITIATED)
2473         ? RX_CLIENT_CONNECTION : RX_SERVER_CONNECTION;
2474
2475     /* Find the connection (or fabricate one, if we're the server & if
2476      * necessary) associated with this packet */
2477     conn =
2478         rxi_FindConnection(socket, host, port, np->header.serviceId,
2479                            np->header.cid, np->header.epoch, type,
2480                            np->header.securityIndex);
2481
2482     if (!conn) {
2483         /* If no connection found or fabricated, just ignore the packet.
2484          * (An argument could be made for sending an abort packet for
2485          * the conn) */
2486         return np;
2487     }
2488
2489     MUTEX_ENTER(&conn->conn_data_lock);
2490     if (conn->maxSerial < np->header.serial)
2491         conn->maxSerial = np->header.serial;
2492     MUTEX_EXIT(&conn->conn_data_lock);
2493
2494     /* If the connection is in an error state, send an abort packet and ignore
2495      * the incoming packet */
2496     if (conn->error) {
2497         /* Don't respond to an abort packet--we don't want loops! */
2498         MUTEX_ENTER(&conn->conn_data_lock);
2499         if (np->header.type != RX_PACKET_TYPE_ABORT)
2500             np = rxi_SendConnectionAbort(conn, np, 1, 0);
2501         conn->refCount--;
2502         MUTEX_EXIT(&conn->conn_data_lock);
2503         return np;
2504     }
2505
2506     /* Check for connection-only requests (i.e. not call specific). */
2507     if (np->header.callNumber == 0) {
2508         switch (np->header.type) {
2509         case RX_PACKET_TYPE_ABORT:
2510             /* What if the supplied error is zero? */
2511             rxi_ConnectionError(conn, ntohl(rx_GetInt32(np, 0)));
2512             MUTEX_ENTER(&conn->conn_data_lock);
2513             conn->refCount--;
2514             MUTEX_EXIT(&conn->conn_data_lock);
2515             return np;
2516         case RX_PACKET_TYPE_CHALLENGE:
2517             tnp = rxi_ReceiveChallengePacket(conn, np, 1);
2518             MUTEX_ENTER(&conn->conn_data_lock);
2519             conn->refCount--;
2520             MUTEX_EXIT(&conn->conn_data_lock);
2521             return tnp;
2522         case RX_PACKET_TYPE_RESPONSE:
2523             tnp = rxi_ReceiveResponsePacket(conn, np, 1);
2524             MUTEX_ENTER(&conn->conn_data_lock);
2525             conn->refCount--;
2526             MUTEX_EXIT(&conn->conn_data_lock);
2527             return tnp;
2528         case RX_PACKET_TYPE_PARAMS:
2529         case RX_PACKET_TYPE_PARAMS + 1:
2530         case RX_PACKET_TYPE_PARAMS + 2:
2531             /* ignore these packet types for now */
2532             MUTEX_ENTER(&conn->conn_data_lock);
2533             conn->refCount--;
2534             MUTEX_EXIT(&conn->conn_data_lock);
2535             return np;
2536
2537
2538         default:
2539             /* Should not reach here, unless the peer is broken: send an
2540              * abort packet */
2541             rxi_ConnectionError(conn, RX_PROTOCOL_ERROR);
2542             MUTEX_ENTER(&conn->conn_data_lock);
2543             tnp = rxi_SendConnectionAbort(conn, np, 1, 0);
2544             conn->refCount--;
2545             MUTEX_EXIT(&conn->conn_data_lock);
2546             return tnp;
2547         }
2548     }
2549
2550     channel = np->header.cid & RX_CHANNELMASK;
2551     call = conn->call[channel];
2552 #ifdef  RX_ENABLE_LOCKS
2553     if (call)
2554         MUTEX_ENTER(&call->lock);
2555     /* Test to see if call struct is still attached to conn. */
2556     if (call != conn->call[channel]) {
2557         if (call)
2558             MUTEX_EXIT(&call->lock);
2559         if (type == RX_SERVER_CONNECTION) {
2560             call = conn->call[channel];
2561             /* If we started with no call attached and there is one now,
2562              * another thread is also running this routine and has gotten
2563              * the connection channel. We should drop this packet in the tests
2564              * below. If there was a call on this connection and it's now
2565              * gone, then we'll be making a new call below.
2566              * If there was previously a call and it's now different then
2567              * the old call was freed and another thread running this routine
2568              * has created a call on this channel. One of these two threads
2569              * has a packet for the old call and the code below handles those
2570              * cases.
2571              */
2572             if (call)
2573                 MUTEX_ENTER(&call->lock);
2574         } else {
2575             /* This packet can't be for this call. If the new call address is
2576              * 0 then no call is running on this channel. If there is a call
2577              * then, since this is a client connection we're getting data for
2578              * it must be for the previous call.
2579              */
2580             MUTEX_ENTER(&rx_stats_mutex);
2581             rx_stats.spuriousPacketsRead++;
2582             MUTEX_EXIT(&rx_stats_mutex);
2583             MUTEX_ENTER(&conn->conn_data_lock);
2584             conn->refCount--;
2585             MUTEX_EXIT(&conn->conn_data_lock);
2586             return np;
2587         }
2588     }
2589 #endif
2590     currentCallNumber = conn->callNumber[channel];
2591
2592     if (type == RX_SERVER_CONNECTION) { /* We're the server */
2593         if (np->header.callNumber < currentCallNumber) {
2594             MUTEX_ENTER(&rx_stats_mutex);
2595             rx_stats.spuriousPacketsRead++;
2596             MUTEX_EXIT(&rx_stats_mutex);
2597 #ifdef  RX_ENABLE_LOCKS
2598             if (call)
2599                 MUTEX_EXIT(&call->lock);
2600 #endif
2601             MUTEX_ENTER(&conn->conn_data_lock);
2602             conn->refCount--;
2603             MUTEX_EXIT(&conn->conn_data_lock);
2604             return np;
2605         }
2606         if (!call) {
2607             MUTEX_ENTER(&conn->conn_call_lock);
2608             call = rxi_NewCall(conn, channel);
2609             MUTEX_EXIT(&conn->conn_call_lock);
2610             *call->callNumber = np->header.callNumber;
2611             if (np->header.callNumber == 0)
2612                 dpf(("RecPacket call 0 %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", np->header.serial, rx_packetTypes[np->header.type - 1], conn->peer->host, conn->peer->port, np->header.serial, np->header.epoch, np->header.cid, np->header.callNumber, np->header.seq, np->header.flags, (unsigned long)np, np->retryTime.sec, np->retryTime.usec / 1000, np->length));
2613
2614             call->state = RX_STATE_PRECALL;
2615             clock_GetTime(&call->queueTime);
2616             hzero(call->bytesSent);
2617             hzero(call->bytesRcvd);
2618             /*
2619              * If the number of queued calls exceeds the overload
2620              * threshold then abort this call.
2621              */
2622             if ((rx_BusyThreshold > 0) && (rx_nWaiting > rx_BusyThreshold)) {
2623                 struct rx_packet *tp;
2624
2625                 rxi_CallError(call, rx_BusyError);
2626                 tp = rxi_SendCallAbort(call, np, 1, 0);
2627                 MUTEX_EXIT(&call->lock);
2628                 MUTEX_ENTER(&conn->conn_data_lock);
2629                 conn->refCount--;
2630                 MUTEX_EXIT(&conn->conn_data_lock);
2631                 MUTEX_ENTER(&rx_stats_mutex);
2632                 rx_stats.nBusies++;
2633                 MUTEX_EXIT(&rx_stats_mutex);
2634                 return tp;
2635             }
2636             rxi_KeepAliveOn(call);
2637         } else if (np->header.callNumber != currentCallNumber) {
2638             /* Wait until the transmit queue is idle before deciding
2639              * whether to reset the current call. Chances are that the
2640              * call will be in ether DALLY or HOLD state once the TQ_BUSY
2641              * flag is cleared.
2642              */
2643 #ifdef AFS_GLOBAL_RXLOCK_KERNEL
2644             while ((call->state == RX_STATE_ACTIVE)
2645                    && (call->flags & RX_CALL_TQ_BUSY)) {
2646                 call->flags |= RX_CALL_TQ_WAIT;
2647                 call->tqWaiters++;
2648 #ifdef RX_ENABLE_LOCKS
2649                 osirx_AssertMine(&call->lock, "rxi_Start lock3");
2650                 CV_WAIT(&call->cv_tq, &call->lock);
2651 #else /* RX_ENABLE_LOCKS */
2652                 osi_rxSleep(&call->tq);
2653 #endif /* RX_ENABLE_LOCKS */
2654                 call->tqWaiters--;
2655                 if (call->tqWaiters == 0)
2656                     call->flags &= ~RX_CALL_TQ_WAIT;
2657             }
2658 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2659             /* If the new call cannot be taken right now send a busy and set
2660              * the error condition in this call, so that it terminates as
2661              * quickly as possible */
2662             if (call->state == RX_STATE_ACTIVE) {
2663                 struct rx_packet *tp;
2664
2665                 rxi_CallError(call, RX_CALL_DEAD);
2666                 tp = rxi_SendSpecial(call, conn, np, RX_PACKET_TYPE_BUSY,
2667                                      NULL, 0, 1);
2668                 MUTEX_EXIT(&call->lock);
2669                 MUTEX_ENTER(&conn->conn_data_lock);
2670                 conn->refCount--;
2671                 MUTEX_EXIT(&conn->conn_data_lock);
2672                 return tp;
2673             }
2674             rxi_ResetCall(call, 0);
2675             *call->callNumber = np->header.callNumber;
2676             if (np->header.callNumber == 0)
2677                 dpf(("RecPacket call 0 %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %lx resend %d.%0.3d len %d", np->header.serial, rx_packetTypes[np->header.type - 1], conn->peer->host, conn->peer->port, np->header.serial, np->header.epoch, np->header.cid, np->header.callNumber, np->header.seq, np->header.flags, (unsigned long)np, np->retryTime.sec, np->retryTime.usec / 1000, np->length));
2678
2679             call->state = RX_STATE_PRECALL;
2680             clock_GetTime(&call->queueTime);
2681             hzero(call->bytesSent);
2682             hzero(call->bytesRcvd);
2683             /*
2684              * If the number of queued calls exceeds the overload
2685              * threshold then abort this call.
2686              */
2687             if ((rx_BusyThreshold > 0) && (rx_nWaiting > rx_BusyThreshold)) {
2688                 struct rx_packet *tp;
2689
2690                 rxi_CallError(call, rx_BusyError);
2691                 tp = rxi_SendCallAbort(call, np, 1, 0);
2692                 MUTEX_EXIT(&call->lock);
2693                 MUTEX_ENTER(&conn->conn_data_lock);
2694                 conn->refCount--;
2695                 MUTEX_EXIT(&conn->conn_data_lock);
2696                 MUTEX_ENTER(&rx_stats_mutex);
2697                 rx_stats.nBusies++;
2698                 MUTEX_EXIT(&rx_stats_mutex);
2699                 return tp;
2700             }
2701             rxi_KeepAliveOn(call);
2702         } else {
2703             /* Continuing call; do nothing here. */
2704         }
2705     } else {                    /* we're the client */
2706         /* Ignore all incoming acknowledgements for calls in DALLY state */
2707         if (call && (call->state == RX_STATE_DALLY)
2708             && (np->header.type == RX_PACKET_TYPE_ACK)) {
2709             MUTEX_ENTER(&rx_stats_mutex);
2710             rx_stats.ignorePacketDally++;
2711             MUTEX_EXIT(&rx_stats_mutex);
2712 #ifdef  RX_ENABLE_LOCKS
2713             if (call) {
2714                 MUTEX_EXIT(&call->lock);
2715             }
2716 #endif
2717             MUTEX_ENTER(&conn->conn_data_lock);
2718             conn->refCount--;
2719             MUTEX_EXIT(&conn->conn_data_lock);
2720             return np;
2721         }
2722
2723         /* Ignore anything that's not relevant to the current call.  If there
2724          * isn't a current call, then no packet is relevant. */
2725         if (!call || (np->header.callNumber != currentCallNumber)) {
2726             MUTEX_ENTER(&rx_stats_mutex);
2727             rx_stats.spuriousPacketsRead++;
2728             MUTEX_EXIT(&rx_stats_mutex);
2729 #ifdef  RX_ENABLE_LOCKS
2730             if (call) {
2731                 MUTEX_EXIT(&call->lock);
2732             }
2733 #endif
2734             MUTEX_ENTER(&conn->conn_data_lock);
2735             conn->refCount--;
2736             MUTEX_EXIT(&conn->conn_data_lock);
2737             return np;
2738         }
2739         /* If the service security object index stamped in the packet does not
2740          * match the connection's security index, ignore the packet */
2741         if (np->header.securityIndex != conn->securityIndex) {
2742 #ifdef  RX_ENABLE_LOCKS
2743             MUTEX_EXIT(&call->lock);
2744 #endif
2745             MUTEX_ENTER(&conn->conn_data_lock);
2746             conn->refCount--;
2747             MUTEX_EXIT(&conn->conn_data_lock);
2748             return np;
2749         }
2750
2751         /* If we're receiving the response, then all transmit packets are
2752          * implicitly acknowledged.  Get rid of them. */
2753         if (np->header.type == RX_PACKET_TYPE_DATA) {
2754 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
2755             /* XXX Hack. Because we must release the global rx lock when
2756              * sending packets (osi_NetSend) we drop all acks while we're
2757              * traversing the tq in rxi_Start sending packets out because
2758              * packets may move to the freePacketQueue as result of being here!
2759              * So we drop these packets until we're safely out of the
2760              * traversing. Really ugly!
2761              * For fine grain RX locking, we set the acked field in the
2762              * packets and let rxi_Start remove them from the transmit queue.
2763              */
2764             if (call->flags & RX_CALL_TQ_BUSY) {
2765 #ifdef  RX_ENABLE_LOCKS
2766                 rxi_SetAcksInTransmitQueue(call);
2767 #else
2768                 conn->refCount--;
2769                 return np;      /* xmitting; drop packet */
2770 #endif
2771             } else {
2772                 rxi_ClearTransmitQueue(call, 0);
2773             }
2774 #else /* AFS_GLOBAL_RXLOCK_KERNEL */
2775             rxi_ClearTransmitQueue(call, 0);
2776 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2777         } else {
2778             if (np->header.type == RX_PACKET_TYPE_ACK) {
2779                 /* now check to see if this is an ack packet acknowledging that the
2780                  * server actually *lost* some hard-acked data.  If this happens we
2781                  * ignore this packet, as it may indicate that the server restarted in
2782                  * the middle of a call.  It is also possible that this is an old ack
2783                  * packet.  We don't abort the connection in this case, because this
2784                  * *might* just be an old ack packet.  The right way to detect a server
2785                  * restart in the midst of a call is to notice that the server epoch
2786                  * changed, btw.  */
2787                 /* XXX I'm not sure this is exactly right, since tfirst **IS**
2788                  * XXX unacknowledged.  I think that this is off-by-one, but
2789                  * XXX I don't dare change it just yet, since it will
2790                  * XXX interact badly with the server-restart detection
2791                  * XXX code in receiveackpacket.  */
2792                 if (ntohl(rx_GetInt32(np, FIRSTACKOFFSET)) < call->tfirst) {
2793                     MUTEX_ENTER(&rx_stats_mutex);
2794                     rx_stats.spuriousPacketsRead++;
2795                     MUTEX_EXIT(&rx_stats_mutex);
2796                     MUTEX_EXIT(&call->lock);
2797                     MUTEX_ENTER(&conn->conn_data_lock);
2798                     conn->refCount--;
2799                     MUTEX_EXIT(&conn->conn_data_lock);
2800                     return np;
2801                 }
2802             }
2803         }                       /* else not a data packet */
2804     }
2805
2806     osirx_AssertMine(&call->lock, "rxi_ReceivePacket middle");
2807     /* Set remote user defined status from packet */
2808     call->remoteStatus = np->header.userStatus;
2809
2810     /* Note the gap between the expected next packet and the actual
2811      * packet that arrived, when the new packet has a smaller serial number
2812      * than expected.  Rioses frequently reorder packets all by themselves,
2813      * so this will be quite important with very large window sizes.
2814      * Skew is checked against 0 here to avoid any dependence on the type of
2815      * inPacketSkew (which may be unsigned).  In C, -1 > (unsigned) 0 is always
2816      * true!
2817      * The inPacketSkew should be a smoothed running value, not just a maximum.  MTUXXX
2818      * see CalculateRoundTripTime for an example of how to keep smoothed values.
2819      * I think using a beta of 1/8 is probably appropriate.  93.04.21
2820      */
2821     MUTEX_ENTER(&conn->conn_data_lock);
2822     skew = conn->lastSerial - np->header.serial;
2823     conn->lastSerial = np->header.serial;
2824     MUTEX_EXIT(&conn->conn_data_lock);
2825     if (skew > 0) {
2826         register struct rx_peer *peer;
2827         peer = conn->peer;
2828         if (skew > peer->inPacketSkew) {
2829             dpf(("*** In skew changed from %d to %d\n", peer->inPacketSkew,
2830                  skew));
2831             peer->inPacketSkew = skew;
2832         }
2833     }
2834
2835     /* Now do packet type-specific processing */
2836     switch (np->header.type) {
2837     case RX_PACKET_TYPE_DATA:
2838         np = rxi_ReceiveDataPacket(call, np, 1, socket, host, port, tnop,
2839                                    newcallp);
2840         break;
2841     case RX_PACKET_TYPE_ACK:
2842         /* Respond immediately to ack packets requesting acknowledgement
2843          * (ping packets) */
2844         if (np->header.flags & RX_REQUEST_ACK) {
2845             if (call->error)
2846                 (void)rxi_SendCallAbort(call, 0, 1, 0);
2847             else
2848                 (void)rxi_SendAck(call, 0, np->header.serial,
2849                                   RX_ACK_PING_RESPONSE, 1);
2850         }
2851         np = rxi_ReceiveAckPacket(call, np, 1);
2852         break;
2853     case RX_PACKET_TYPE_ABORT:
2854         /* An abort packet: reset the connection, passing the error up to
2855          * the user */
2856         /* What if error is zero? */
2857         rxi_CallError(call, ntohl(*(afs_int32 *) rx_DataOf(np)));
2858         break;
2859     case RX_PACKET_TYPE_BUSY:
2860         /* XXXX */
2861         break;
2862     case RX_PACKET_TYPE_ACKALL:
2863         /* All packets acknowledged, so we can drop all packets previously
2864          * readied for sending */
2865 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
2866         /* XXX Hack. We because we can't release the global rx lock when
2867          * sending packets (osi_NetSend) we drop all ack pkts while we're
2868          * traversing the tq in rxi_Start sending packets out because
2869          * packets may move to the freePacketQueue as result of being
2870          * here! So we drop these packets until we're safely out of the
2871          * traversing. Really ugly!
2872          * For fine grain RX locking, we set the acked field in the packets
2873          * and let rxi_Start remove the packets from the transmit queue.
2874          */
2875         if (call->flags & RX_CALL_TQ_BUSY) {
2876 #ifdef  RX_ENABLE_LOCKS
2877             rxi_SetAcksInTransmitQueue(call);
2878             break;
2879 #else /* RX_ENABLE_LOCKS */
2880             conn->refCount--;
2881             return np;          /* xmitting; drop packet */
2882 #endif /* RX_ENABLE_LOCKS */
2883         }
2884 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
2885         rxi_ClearTransmitQueue(call, 0);
2886         break;
2887     default:
2888         /* Should not reach here, unless the peer is broken: send an abort
2889          * packet */
2890         rxi_CallError(call, RX_PROTOCOL_ERROR);
2891         np = rxi_SendCallAbort(call, np, 1, 0);
2892         break;
2893     };
2894     /* Note when this last legitimate packet was received, for keep-alive
2895      * processing.  Note, we delay getting the time until now in the hope that
2896      * the packet will be delivered to the user before any get time is required
2897      * (if not, then the time won't actually be re-evaluated here). */
2898     call->lastReceiveTime = clock_Sec();
2899     MUTEX_EXIT(&call->lock);
2900     MUTEX_ENTER(&conn->conn_data_lock);
2901     conn->refCount--;
2902     MUTEX_EXIT(&conn->conn_data_lock);
2903     return np;
2904 }
2905
2906 /* return true if this is an "interesting" connection from the point of view
2907     of someone trying to debug the system */
2908 int
2909 rxi_IsConnInteresting(struct rx_connection *aconn)
2910 {
2911     register int i;
2912     register struct rx_call *tcall;
2913
2914     if (aconn->flags & (RX_CONN_MAKECALL_WAITING | RX_CONN_DESTROY_ME))
2915         return 1;
2916     for (i = 0; i < RX_MAXCALLS; i++) {
2917         tcall = aconn->call[i];
2918         if (tcall) {
2919             if ((tcall->state == RX_STATE_PRECALL)
2920                 || (tcall->state == RX_STATE_ACTIVE))
2921                 return 1;
2922             if ((tcall->mode == RX_MODE_SENDING)
2923                 || (tcall->mode == RX_MODE_RECEIVING))
2924                 return 1;
2925         }
2926     }
2927     return 0;
2928 }
2929
2930 #ifdef KERNEL
2931 /* if this is one of the last few packets AND it wouldn't be used by the
2932    receiving call to immediately satisfy a read request, then drop it on
2933    the floor, since accepting it might prevent a lock-holding thread from
2934    making progress in its reading. If a call has been cleared while in
2935    the precall state then ignore all subsequent packets until the call
2936    is assigned to a thread. */
2937
2938 static int
2939 TooLow(struct rx_packet *ap, struct rx_call *acall)
2940 {
2941     int rc = 0;
2942     MUTEX_ENTER(&rx_stats_mutex);
2943     if (((ap->header.seq != 1) && (acall->flags & RX_CALL_CLEARED)
2944          && (acall->state == RX_STATE_PRECALL))
2945         || ((rx_nFreePackets < rxi_dataQuota + 2)
2946             && !((ap->header.seq < acall->rnext + rx_initSendWindow)
2947                  && (acall->flags & RX_CALL_READER_WAIT)))) {
2948         rc = 1;
2949     }
2950     MUTEX_EXIT(&rx_stats_mutex);
2951     return rc;
2952 }
2953 #endif /* KERNEL */
2954
2955 static void
2956 rxi_CheckReachEvent(struct rxevent *event, struct rx_connection *conn,
2957                     struct rx_call *acall)
2958 {
2959     struct rx_call *call = acall;
2960     struct clock when;
2961     int i, waiting;
2962
2963     MUTEX_ENTER(&conn->conn_data_lock);
2964     conn->checkReachEvent = NULL;
2965     waiting = conn->flags & RX_CONN_ATTACHWAIT;
2966     if (event)
2967         conn->refCount--;
2968     MUTEX_EXIT(&conn->conn_data_lock);
2969
2970     if (waiting) {
2971         if (!call) {
2972             MUTEX_ENTER(&conn->conn_call_lock);
2973             MUTEX_ENTER(&conn->conn_data_lock);
2974             for (i = 0; i < RX_MAXCALLS; i++) {
2975                 struct rx_call *tc = conn->call[i];
2976                 if (tc && tc->state == RX_STATE_PRECALL) {
2977                     call = tc;
2978                     break;
2979                 }
2980             }
2981             if (!call)
2982                 /* Indicate that rxi_CheckReachEvent is no longer running by
2983                  * clearing the flag.  Must be atomic under conn_data_lock to
2984                  * avoid a new call slipping by: rxi_CheckConnReach holds
2985                  * conn_data_lock while checking RX_CONN_ATTACHWAIT.
2986                  */
2987                 conn->flags &= ~RX_CONN_ATTACHWAIT;
2988             MUTEX_EXIT(&conn->conn_data_lock);
2989             MUTEX_EXIT(&conn->conn_call_lock);
2990         }
2991
2992         if (call) {
2993             if (call != acall)
2994                 MUTEX_ENTER(&call->lock);
2995             rxi_SendAck(call, NULL, 0, RX_ACK_PING, 0);
2996             if (call != acall)
2997                 MUTEX_EXIT(&call->lock);
2998
2999             clock_GetTime(&when);
3000             when.sec += RX_CHECKREACH_TIMEOUT;
3001             MUTEX_ENTER(&conn->conn_data_lock);
3002             if (!conn->checkReachEvent) {
3003                 conn->refCount++;
3004                 conn->checkReachEvent =
3005                     rxevent_Post(&when, rxi_CheckReachEvent, conn, NULL);
3006             }
3007             MUTEX_EXIT(&conn->conn_data_lock);
3008         }
3009     }
3010 }
3011
3012 static int
3013 rxi_CheckConnReach(struct rx_connection *conn, struct rx_call *call)
3014 {
3015     struct rx_service *service = conn->service;
3016     struct rx_peer *peer = conn->peer;
3017     afs_uint32 now, lastReach;
3018
3019     if (service->checkReach == 0)
3020         return 0;
3021
3022     now = clock_Sec();
3023     MUTEX_ENTER(&peer->peer_lock);
3024     lastReach = peer->lastReachTime;
3025     MUTEX_EXIT(&peer->peer_lock);
3026     if (now - lastReach < RX_CHECKREACH_TTL)
3027         return 0;
3028
3029     MUTEX_ENTER(&conn->conn_data_lock);
3030     if (conn->flags & RX_CONN_ATTACHWAIT) {
3031         MUTEX_EXIT(&conn->conn_data_lock);
3032         return 1;
3033     }
3034     conn->flags |= RX_CONN_ATTACHWAIT;
3035     MUTEX_EXIT(&conn->conn_data_lock);
3036     if (!conn->checkReachEvent)
3037         rxi_CheckReachEvent(NULL, conn, call);
3038
3039     return 1;
3040 }
3041
3042 /* try to attach call, if authentication is complete */
3043 static void
3044 TryAttach(register struct rx_call *acall, register osi_socket socket,
3045           register int *tnop, register struct rx_call **newcallp,
3046           int reachOverride)
3047 {
3048     struct rx_connection *conn = acall->conn;
3049
3050     if (conn->type == RX_SERVER_CONNECTION
3051         && acall->state == RX_STATE_PRECALL) {
3052         /* Don't attach until we have any req'd. authentication. */
3053         if (RXS_CheckAuthentication(conn->securityObject, conn) == 0) {
3054             if (reachOverride || rxi_CheckConnReach(conn, acall) == 0)
3055                 rxi_AttachServerProc(acall, socket, tnop, newcallp);
3056             /* Note:  this does not necessarily succeed; there
3057              * may not any proc available
3058              */
3059         } else {
3060             rxi_ChallengeOn(acall->conn);
3061         }
3062     }
3063 }
3064
3065 /* A data packet has been received off the interface.  This packet is
3066  * appropriate to the call (the call is in the right state, etc.).  This
3067  * routine can return a packet to the caller, for re-use */
3068
3069 struct rx_packet *
3070 rxi_ReceiveDataPacket(register struct rx_call *call,
3071                       register struct rx_packet *np, int istack,
3072                       osi_socket socket, afs_uint32 host, u_short port,
3073                       int *tnop, struct rx_call **newcallp)
3074 {
3075     int ackNeeded = 0;          /* 0 means no, otherwise ack_reason */
3076     int newPackets = 0;
3077     int didHardAck = 0;
3078     int haveLast = 0;
3079     afs_uint32 seq, serial, flags;
3080     int isFirst;
3081     struct rx_packet *tnp;
3082     struct clock when;
3083     MUTEX_ENTER(&rx_stats_mutex);
3084     rx_stats.dataPacketsRead++;
3085     MUTEX_EXIT(&rx_stats_mutex);
3086
3087 #ifdef KERNEL
3088     /* If there are no packet buffers, drop this new packet, unless we can find
3089      * packet buffers from inactive calls */
3090     if (!call->error
3091         && (rxi_OverQuota(RX_PACKET_CLASS_RECEIVE) || TooLow(np, call))) {
3092         MUTEX_ENTER(&rx_freePktQ_lock);
3093         rxi_NeedMorePackets = TRUE;
3094         MUTEX_EXIT(&rx_freePktQ_lock);
3095         MUTEX_ENTER(&rx_stats_mutex);
3096         rx_stats.noPacketBuffersOnRead++;
3097         MUTEX_EXIT(&rx_stats_mutex);
3098         call->rprev = np->header.serial;
3099         rxi_calltrace(RX_TRACE_DROP, call);
3100         dpf(("packet %x dropped on receipt - quota problems", np));
3101         if (rxi_doreclaim)
3102             rxi_ClearReceiveQueue(call);
3103         clock_GetTime(&when);
3104         clock_Add(&when, &rx_softAckDelay);
3105         if (!call->delayedAckEvent
3106             || clock_Gt(&call->delayedAckEvent->eventTime, &when)) {
3107             rxevent_Cancel(call->delayedAckEvent, call,
3108                            RX_CALL_REFCOUNT_DELAY);
3109             CALL_HOLD(call, RX_CALL_REFCOUNT_DELAY);
3110             call->delayedAckEvent =
3111                 rxevent_Post(&when, rxi_SendDelayedAck, call, 0);
3112         }
3113         /* we've damaged this call already, might as well do it in. */
3114         return np;
3115     }
3116 #endif /* KERNEL */
3117
3118     /*
3119      * New in AFS 3.5, if the RX_JUMBO_PACKET flag is set then this
3120      * packet is one of several packets transmitted as a single
3121      * datagram. Do not send any soft or hard acks until all packets
3122      * in a jumbogram have been processed. Send negative acks right away.
3123      */
3124     for (isFirst = 1, tnp = NULL; isFirst || tnp; isFirst = 0) {
3125         /* tnp is non-null when there are more packets in the
3126          * current jumbo gram */
3127         if (tnp) {
3128             if (np)
3129                 rxi_FreePacket(np);
3130             np = tnp;
3131         }
3132
3133         seq = np->header.seq;
3134         serial = np->header.serial;
3135         flags = np->header.flags;
3136
3137         /* If the call is in an error state, send an abort message */
3138         if (call->error)
3139             return rxi_SendCallAbort(call, np, istack, 0);
3140
3141         /* The RX_JUMBO_PACKET is set in all but the last packet in each
3142          * AFS 3.5 jumbogram. */
3143         if (flags & RX_JUMBO_PACKET) {
3144             tnp = rxi_SplitJumboPacket(np, host, port, isFirst);
3145         } else {
3146             tnp = NULL;
3147         }
3148
3149         if (np->header.spare != 0) {
3150             MUTEX_ENTER(&call->conn->conn_data_lock);
3151             call->conn->flags |= RX_CONN_USING_PACKET_CKSUM;
3152             MUTEX_EXIT(&call->conn->conn_data_lock);
3153         }
3154
3155         /* The usual case is that this is the expected next packet */
3156         if (seq == call->rnext) {
3157
3158             /* Check to make sure it is not a duplicate of one already queued */
3159             if (queue_IsNotEmpty(&call->rq)
3160                 && queue_First(&call->rq, rx_packet)->header.seq == seq) {
3161                 MUTEX_ENTER(&rx_stats_mutex);
3162                 rx_stats.dupPacketsRead++;
3163                 MUTEX_EXIT(&rx_stats_mutex);
3164                 dpf(("packet %x dropped on receipt - duplicate", np));
3165                 rxevent_Cancel(call->delayedAckEvent, call,
3166                                RX_CALL_REFCOUNT_DELAY);
3167                 np = rxi_SendAck(call, np, serial, RX_ACK_DUPLICATE, istack);
3168                 ackNeeded = 0;
3169                 call->rprev = seq;
3170                 continue;
3171             }
3172
3173             /* It's the next packet. Stick it on the receive queue
3174              * for this call. Set newPackets to make sure we wake
3175              * the reader once all packets have been processed */
3176             queue_Prepend(&call->rq, np);
3177             call->nSoftAcks++;
3178             np = NULL;          /* We can't use this anymore */
3179             newPackets = 1;
3180
3181             /* If an ack is requested then set a flag to make sure we
3182              * send an acknowledgement for this packet */
3183             if (flags & RX_REQUEST_ACK) {
3184                 ackNeeded = RX_ACK_REQUESTED;
3185             }
3186
3187             /* Keep track of whether we have received the last packet */
3188             if (flags & RX_LAST_PACKET) {
3189                 call->flags |= RX_CALL_HAVE_LAST;
3190                 haveLast = 1;
3191             }
3192
3193             /* Check whether we have all of the packets for this call */
3194             if (call->flags & RX_CALL_HAVE_LAST) {
3195                 afs_uint32 tseq;        /* temporary sequence number */
3196                 struct rx_packet *tp;   /* Temporary packet pointer */
3197                 struct rx_packet *nxp;  /* Next pointer, for queue_Scan */
3198
3199                 for (tseq = seq, queue_Scan(&call->rq, tp, nxp, rx_packet)) {
3200                     if (tseq != tp->header.seq)
3201                         break;
3202                     if (tp->header.flags & RX_LAST_PACKET) {
3203                         call->flags |= RX_CALL_RECEIVE_DONE;
3204                         break;
3205                     }
3206                     tseq++;
3207                 }
3208             }
3209
3210             /* Provide asynchronous notification for those who want it
3211              * (e.g. multi rx) */
3212             if (call->arrivalProc) {
3213                 (*call->arrivalProc) (call, call->arrivalProcHandle,
3214                                       call->arrivalProcArg);
3215                 call->arrivalProc = (void (*)())0;
3216             }
3217
3218             /* Update last packet received */
3219             call->rprev = seq;
3220
3221             /* If there is no server process serving this call, grab
3222              * one, if available. We only need to do this once. If a
3223              * server thread is available, this thread becomes a server
3224              * thread and the server thread becomes a listener thread. */
3225             if (isFirst) {
3226                 TryAttach(call, socket, tnop, newcallp, 0);
3227             }
3228         }
3229         /* This is not the expected next packet. */
3230         else {
3231             /* Determine whether this is a new or old packet, and if it's
3232              * a new one, whether it fits into the current receive window.
3233              * Also figure out whether the packet was delivered in sequence.
3234              * We use the prev variable to determine whether the new packet
3235              * is the successor of its immediate predecessor in the
3236              * receive queue, and the missing flag to determine whether
3237              * any of this packets predecessors are missing.  */
3238
3239             afs_uint32 prev;    /* "Previous packet" sequence number */
3240             struct rx_packet *tp;       /* Temporary packet pointer */
3241             struct rx_packet *nxp;      /* Next pointer, for queue_Scan */
3242             int missing;        /* Are any predecessors missing? */
3243
3244             /* If the new packet's sequence number has been sent to the
3245              * application already, then this is a duplicate */
3246             if (seq < call->rnext) {
3247                 MUTEX_ENTER(&rx_stats_mutex);
3248                 rx_stats.dupPacketsRead++;
3249                 MUTEX_EXIT(&rx_stats_mutex);
3250                 rxevent_Cancel(call->delayedAckEvent, call,
3251                                RX_CALL_REFCOUNT_DELAY);
3252                 np = rxi_SendAck(call, np, serial, RX_ACK_DUPLICATE, istack);
3253                 ackNeeded = 0;
3254                 call->rprev = seq;
3255                 continue;
3256             }
3257
3258             /* If the sequence number is greater than what can be
3259              * accomodated by the current window, then send a negative
3260              * acknowledge and drop the packet */
3261             if ((call->rnext + call->rwind) <= seq) {
3262                 rxevent_Cancel(call->delayedAckEvent, call,
3263                                RX_CALL_REFCOUNT_DELAY);
3264                 np = rxi_SendAck(call, np, serial, RX_ACK_EXCEEDS_WINDOW,
3265                                  istack);
3266                 ackNeeded = 0;
3267                 call->rprev = seq;
3268                 continue;
3269             }
3270
3271             /* Look for the packet in the queue of old received packets */
3272             for (prev = call->rnext - 1, missing =
3273                  0, queue_Scan(&call->rq, tp, nxp, rx_packet)) {
3274                 /*Check for duplicate packet */
3275                 if (seq == tp->header.seq) {
3276                     MUTEX_ENTER(&rx_stats_mutex);
3277                     rx_stats.dupPacketsRead++;
3278                     MUTEX_EXIT(&rx_stats_mutex);
3279                     rxevent_Cancel(call->delayedAckEvent, call,
3280                                    RX_CALL_REFCOUNT_DELAY);
3281                     np = rxi_SendAck(call, np, serial, RX_ACK_DUPLICATE,
3282                                      istack);
3283                     ackNeeded = 0;
3284                     call->rprev = seq;
3285                     goto nextloop;
3286                 }
3287                 /* If we find a higher sequence packet, break out and
3288                  * insert the new packet here. */
3289                 if (seq < tp->header.seq)
3290                     break;
3291                 /* Check for missing packet */
3292                 if (tp->header.seq != prev + 1) {
3293                     missing = 1;
3294                 }
3295
3296                 prev = tp->header.seq;
3297             }
3298
3299             /* Keep track of whether we have received the last packet. */
3300             if (flags & RX_LAST_PACKET) {
3301                 call->flags |= RX_CALL_HAVE_LAST;
3302             }
3303
3304             /* It's within the window: add it to the the receive queue.
3305              * tp is left by the previous loop either pointing at the
3306              * packet before which to insert the new packet, or at the
3307              * queue head if the queue is empty or the packet should be
3308              * appended. */
3309             queue_InsertBefore(tp, np);
3310             call->nSoftAcks++;
3311             np = NULL;
3312
3313             /* Check whether we have all of the packets for this call */
3314             if ((call->flags & RX_CALL_HAVE_LAST)
3315                 && !(call->flags & RX_CALL_RECEIVE_DONE)) {
3316                 afs_uint32 tseq;        /* temporary sequence number */
3317
3318                 for (tseq =
3319                      call->rnext, queue_Scan(&call->rq, tp, nxp, rx_packet)) {
3320                     if (tseq != tp->header.seq)
3321                         break;
3322                     if (tp->header.flags & RX_LAST_PACKET) {
3323                         call->flags |= RX_CALL_RECEIVE_DONE;
3324                         break;
3325                     }
3326                     tseq++;
3327                 }
3328             }
3329
3330             /* We need to send an ack of the packet is out of sequence,
3331              * or if an ack was requested by the peer. */
3332             if (seq != prev + 1 || missing || (flags & RX_REQUEST_ACK)) {
3333                 ackNeeded = RX_ACK_OUT_OF_SEQUENCE;
3334             }
3335
3336             /* Acknowledge the last packet for each call */
3337             if (flags & RX_LAST_PACKET) {
3338                 haveLast = 1;
3339             }
3340
3341             call->rprev = seq;
3342         }
3343       nextloop:;
3344     }
3345
3346     if (newPackets) {
3347         /*
3348          * If the receiver is waiting for an iovec, fill the iovec
3349          * using the data from the receive queue */
3350         if (call->flags & RX_CALL_IOVEC_WAIT) {
3351             didHardAck = rxi_FillReadVec(call, serial);
3352             /* the call may have been aborted */
3353             if (call->error) {
3354                 return NULL;
3355             }
3356             if (didHardAck) {
3357                 ackNeeded = 0;
3358             }
3359         }
3360
3361         /* Wakeup the reader if any */
3362         if ((call->flags & RX_CALL_READER_WAIT)
3363             && (!(call->flags & RX_CALL_IOVEC_WAIT) || !(call->iovNBytes)
3364                 || (call->iovNext >= call->iovMax)
3365                 || (call->flags & RX_CALL_RECEIVE_DONE))) {
3366             call->flags &= ~RX_CALL_READER_WAIT;
3367 #ifdef  RX_ENABLE_LOCKS
3368             CV_BROADCAST(&call->cv_rq);
3369 #else
3370             osi_rxWakeup(&call->rq);
3371 #endif
3372         }
3373     }
3374
3375     /*
3376      * Send an ack when requested by the peer, or once every
3377      * rxi_SoftAckRate packets until the last packet has been
3378      * received. Always send a soft ack for the last packet in
3379      * the server's reply. */
3380     if (ackNeeded) {
3381         rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
3382         np = rxi_SendAck(call, np, serial, ackNeeded, istack);
3383     } else if (call->nSoftAcks > (u_short) rxi_SoftAckRate) {
3384         rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
3385         np = rxi_SendAck(call, np, serial, RX_ACK_IDLE, istack);
3386     } else if (call->nSoftAcks) {
3387         clock_GetTime(&when);
3388         if (haveLast && !(flags & RX_CLIENT_INITIATED)) {
3389             clock_Add(&when, &rx_lastAckDelay);
3390         } else {
3391             clock_Add(&when, &rx_softAckDelay);
3392         }
3393         if (!call->delayedAckEvent
3394             || clock_Gt(&call->delayedAckEvent->eventTime, &when)) {
3395             rxevent_Cancel(call->delayedAckEvent, call,
3396                            RX_CALL_REFCOUNT_DELAY);
3397             CALL_HOLD(call, RX_CALL_REFCOUNT_DELAY);
3398             call->delayedAckEvent =
3399                 rxevent_Post(&when, rxi_SendDelayedAck, call, 0);
3400         }
3401     } else if (call->flags & RX_CALL_RECEIVE_DONE) {
3402         rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
3403     }
3404
3405     return np;
3406 }
3407
3408 #ifdef  ADAPT_WINDOW
3409 static void rxi_ComputeRate();
3410 #endif
3411
3412 static void
3413 rxi_UpdatePeerReach(struct rx_connection *conn, struct rx_call *acall)
3414 {
3415     struct rx_peer *peer = conn->peer;
3416
3417     MUTEX_ENTER(&peer->peer_lock);
3418     peer->lastReachTime = clock_Sec();
3419     MUTEX_EXIT(&peer->peer_lock);
3420
3421     MUTEX_ENTER(&conn->conn_data_lock);
3422     if (conn->flags & RX_CONN_ATTACHWAIT) {
3423         int i;
3424
3425         conn->flags &= ~RX_CONN_ATTACHWAIT;
3426         MUTEX_EXIT(&conn->conn_data_lock);
3427
3428         for (i = 0; i < RX_MAXCALLS; i++) {
3429             struct rx_call *call = conn->call[i];
3430             if (call) {
3431                 if (call != acall)
3432                     MUTEX_ENTER(&call->lock);
3433                 /* tnop can be null if newcallp is null */
3434                 TryAttach(call, (osi_socket) - 1, NULL, NULL, 1);
3435                 if (call != acall)
3436                     MUTEX_EXIT(&call->lock);
3437             }
3438         }
3439     } else
3440         MUTEX_EXIT(&conn->conn_data_lock);
3441 }
3442
3443 /* rxi_ComputePeerNetStats
3444  *
3445  * Called exclusively by rxi_ReceiveAckPacket to compute network link
3446  * estimates (like RTT and throughput) based on ack packets.  Caller
3447  * must ensure that the packet in question is the right one (i.e.
3448  * serial number matches).
3449  */
3450 static void
3451 rxi_ComputePeerNetStats(struct rx_call *call, struct rx_packet *p,
3452                         struct rx_ackPacket *ap, struct rx_packet *np)
3453 {
3454     struct rx_peer *peer = call->conn->peer;
3455
3456     /* Use RTT if not delayed by client. */
3457     if (ap->reason != RX_ACK_DELAY)
3458         rxi_ComputeRoundTripTime(p, &p->timeSent, peer);
3459 #ifdef ADAPT_WINDOW
3460     rxi_ComputeRate(peer, call, p, np, ap->reason);
3461 #endif
3462 }
3463
3464 /* The real smarts of the whole thing.  */
3465 struct rx_packet *
3466 rxi_ReceiveAckPacket(register struct rx_call *call, struct rx_packet *np,
3467                      int istack)
3468 {
3469     struct rx_ackPacket *ap;
3470     int nAcks;
3471     register struct rx_packet *tp;
3472     register struct rx_packet *nxp;     /* Next packet pointer for queue_Scan */
3473     register struct rx_connection *conn = call->conn;
3474     struct rx_peer *peer = conn->peer;
3475     afs_uint32 first;
3476     afs_uint32 serial;
3477     /* because there are CM's that are bogus, sending weird values for this. */
3478     afs_uint32 skew = 0;
3479     int nbytes;
3480     int missing;
3481     int acked;
3482     int nNacked = 0;
3483     int newAckCount = 0;
3484     u_short maxMTU = 0;         /* Set if peer supports AFS 3.4a jumbo datagrams */
3485     int maxDgramPackets = 0;    /* Set if peer supports AFS 3.5 jumbo datagrams */
3486
3487     MUTEX_ENTER(&rx_stats_mutex);
3488     rx_stats.ackPacketsRead++;
3489     MUTEX_EXIT(&rx_stats_mutex);
3490     ap = (struct rx_ackPacket *)rx_DataOf(np);
3491     nbytes = rx_Contiguous(np) - ((ap->acks) - (u_char *) ap);
3492     if (nbytes < 0)
3493         return np;              /* truncated ack packet */
3494
3495     /* depends on ack packet struct */
3496     nAcks = MIN((unsigned)nbytes, (unsigned)ap->nAcks);
3497     first = ntohl(ap->firstPacket);
3498     serial = ntohl(ap->serial);
3499     /* temporarily disabled -- needs to degrade over time
3500      * skew = ntohs(ap->maxSkew); */
3501
3502     /* Ignore ack packets received out of order */
3503     if (first < call->tfirst) {
3504         return np;
3505     }
3506
3507     if (np->header.flags & RX_SLOW_START_OK) {
3508         call->flags |= RX_CALL_SLOW_START_OK;
3509     }
3510
3511     if (ap->reason == RX_ACK_PING_RESPONSE)
3512         rxi_UpdatePeerReach(conn, call);
3513
3514 #ifdef RXDEBUG
3515     if (rx_Log) {
3516         fprintf(rx_Log,
3517                 "RACK: reason %x previous %u seq %u serial %u skew %d first %u",
3518                 ap->reason, ntohl(ap->previousPacket),
3519                 (unsigned int)np->header.seq, (unsigned int)serial,
3520                 (unsigned int)skew, ntohl(ap->firstPacket));
3521         if (nAcks) {
3522             int offset;
3523             for (offset = 0; offset < nAcks; offset++)
3524                 putc(ap->acks[offset] == RX_ACK_TYPE_NACK ? '-' : '*',
3525                      rx_Log);
3526         }
3527         putc('\n', rx_Log);
3528     }
3529 #endif
3530
3531     /* Update the outgoing packet skew value to the latest value of
3532      * the peer's incoming packet skew value.  The ack packet, of
3533      * course, could arrive out of order, but that won't affect things
3534      * much */
3535     MUTEX_ENTER(&peer->peer_lock);
3536     peer->outPacketSkew = skew;
3537
3538     /* Check for packets that no longer need to be transmitted, and
3539      * discard them.  This only applies to packets positively
3540      * acknowledged as having been sent to the peer's upper level.
3541      * All other packets must be retained.  So only packets with
3542      * sequence numbers < ap->firstPacket are candidates. */
3543     for (queue_Scan(&call->tq, tp, nxp, rx_packet)) {
3544         if (tp->header.seq >= first)
3545             break;
3546         call->tfirst = tp->header.seq + 1;
3547         if (serial
3548             && (tp->header.serial == serial || tp->firstSerial == serial))
3549             rxi_ComputePeerNetStats(call, tp, ap, np);
3550 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
3551         /* XXX Hack. Because we have to release the global rx lock when sending
3552          * packets (osi_NetSend) we drop all acks while we're traversing the tq
3553          * in rxi_Start sending packets out because packets may move to the
3554          * freePacketQueue as result of being here! So we drop these packets until
3555          * we're safely out of the traversing. Really ugly!
3556          * To make it even uglier, if we're using fine grain locking, we can
3557          * set the ack bits in the packets and have rxi_Start remove the packets
3558          * when it's done transmitting.
3559          */
3560         if (!(tp->flags & RX_PKTFLAG_ACKED)) {
3561             newAckCount++;
3562         }
3563         if (call->flags & RX_CALL_TQ_BUSY) {
3564 #ifdef RX_ENABLE_LOCKS
3565             tp->flags |= RX_PKTFLAG_ACKED;
3566             call->flags |= RX_CALL_TQ_SOME_ACKED;
3567 #else /* RX_ENABLE_LOCKS */
3568             break;
3569 #endif /* RX_ENABLE_LOCKS */
3570         } else
3571 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
3572         {
3573             queue_Remove(tp);
3574             rxi_FreePacket(tp); /* rxi_FreePacket mustn't wake up anyone, preemptively. */
3575         }
3576     }
3577
3578 #ifdef ADAPT_WINDOW
3579     /* Give rate detector a chance to respond to ping requests */
3580     if (ap->reason == RX_ACK_PING_RESPONSE) {
3581         rxi_ComputeRate(peer, call, 0, np, ap->reason);
3582     }
3583 #endif
3584
3585     /* N.B. we don't turn off any timers here.  They'll go away by themselves, anyway */
3586
3587     /* Now go through explicit acks/nacks and record the results in
3588      * the waiting packets.  These are packets that can't be released
3589      * yet, even with a positive acknowledge.  This positive
3590      * acknowledge only means the packet has been received by the
3591      * peer, not that it will be retained long enough to be sent to
3592      * the peer's upper level.  In addition, reset the transmit timers
3593      * of any missing packets (those packets that must be missing
3594      * because this packet was out of sequence) */
3595
3596     call->nSoftAcked = 0;
3597     for (missing = 0, queue_Scan(&call->tq, tp, nxp, rx_packet)) {
3598         /* Update round trip time if the ack was stimulated on receipt
3599          * of this packet */
3600 #ifdef AFS_GLOBAL_RXLOCK_KERNEL
3601 #ifdef RX_ENABLE_LOCKS
3602         if (tp->header.seq >= first)
3603 #endif /* RX_ENABLE_LOCKS */
3604 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
3605             if (serial
3606                 && (tp->header.serial == serial || tp->firstSerial == serial))
3607                 rxi_ComputePeerNetStats(call, tp, ap, np);
3608
3609         /* Set the acknowledge flag per packet based on the
3610          * information in the ack packet. An acknowlegded packet can
3611          * be downgraded when the server has discarded a packet it
3612          * soacked previously, or when an ack packet is received
3613          * out of sequence. */
3614         if (tp->header.seq < first) {
3615             /* Implicit ack information */
3616             if (!(tp->flags & RX_PKTFLAG_ACKED)) {
3617                 newAckCount++;
3618             }
3619             tp->flags |= RX_PKTFLAG_ACKED;
3620         } else if (tp->header.seq < first + nAcks) {
3621             /* Explicit ack information:  set it in the packet appropriately */
3622             if (ap->acks[tp->header.seq - first] == RX_ACK_TYPE_ACK) {
3623                 if (!(tp->flags & RX_PKTFLAG_ACKED)) {
3624                     newAckCount++;
3625                     tp->flags |= RX_PKTFLAG_ACKED;
3626                 }
3627                 if (missing) {
3628                     nNacked++;
3629                 } else {
3630                     call->nSoftAcked++;
3631                 }
3632             } else {
3633                 tp->flags &= ~RX_PKTFLAG_ACKED;
3634                 missing = 1;
3635             }
3636         } else {
3637             tp->flags &= ~RX_PKTFLAG_ACKED;
3638             missing = 1;
3639         }
3640
3641         /* If packet isn't yet acked, and it has been transmitted at least
3642          * once, reset retransmit time using latest timeout
3643          * ie, this should readjust the retransmit timer for all outstanding
3644          * packets...  So we don't just retransmit when we should know better*/
3645
3646         if (!(tp->flags & RX_PKTFLAG_ACKED) && !clock_IsZero(&tp->retryTime)) {
3647             tp->retryTime = tp->timeSent;
3648             clock_Add(&tp->retryTime, &peer->timeout);
3649             /* shift by eight because one quarter-sec ~ 256 milliseconds */
3650             clock_Addmsec(&(tp->retryTime), ((afs_uint32) tp->backoff) << 8);
3651         }
3652     }
3653
3654     /* If the window has been extended by this acknowledge packet,
3655      * then wakeup a sender waiting in alloc for window space, or try
3656      * sending packets now, if he's been sitting on packets due to
3657      * lack of window space */
3658     if (call->tnext < (call->tfirst + call->twind)) {
3659 #ifdef  RX_ENABLE_LOCKS
3660         CV_SIGNAL(&call->cv_twind);
3661 #else
3662         if (call->flags & RX_CALL_WAIT_WINDOW_ALLOC) {
3663             call->flags &= ~RX_CALL_WAIT_WINDOW_ALLOC;
3664             osi_rxWakeup(&call->twind);
3665         }
3666 #endif
3667         if (call->flags & RX_CALL_WAIT_WINDOW_SEND) {
3668             call->flags &= ~RX_CALL_WAIT_WINDOW_SEND;
3669         }
3670     }
3671
3672     /* if the ack packet has a receivelen field hanging off it,
3673      * update our state */
3674     if (np->length >= rx_AckDataSize(ap->nAcks) + 2 * sizeof(afs_int32)) {
3675         afs_uint32 tSize;
3676
3677         /* If the ack packet has a "recommended" size that is less than
3678          * what I am using now, reduce my size to match */
3679         rx_packetread(np, rx_AckDataSize(ap->nAcks) + sizeof(afs_int32),
3680                       sizeof(afs_int32), &tSize);
3681         tSize = (afs_uint32) ntohl(tSize);
3682         peer->natMTU = rxi_AdjustIfMTU(MIN(tSize, peer->ifMTU));
3683
3684         /* Get the maximum packet size to send to this peer */
3685         rx_packetread(np, rx_AckDataSize(ap->nAcks), sizeof(afs_int32),
3686                       &tSize);
3687         tSize = (afs_uint32) ntohl(tSize);
3688         tSize = (afs_uint32) MIN(tSize, rx_MyMaxSendSize);
3689         tSize = rxi_AdjustMaxMTU(peer->natMTU, tSize);
3690
3691         /* sanity check - peer might have restarted with different params.
3692          * If peer says "send less", dammit, send less...  Peer should never
3693          * be unable to accept packets of the size that prior AFS versions would
3694          * send without asking.  */
3695         if (peer->maxMTU != tSize) {
3696             peer->maxMTU = tSize;
3697             peer->MTU = MIN(tSize, peer->MTU);
3698             call->MTU = MIN(call->MTU, tSize);
3699             peer->congestSeq++;
3700         }
3701
3702         if (np->length == rx_AckDataSize(ap->nAcks) + 3 * sizeof(afs_int32)) {
3703             /* AFS 3.4a */
3704             rx_packetread(np,
3705                           rx_AckDataSize(ap->nAcks) + 2 * sizeof(afs_int32),
3706                           sizeof(afs_int32), &tSize);
3707             tSize = (afs_uint32) ntohl(tSize);  /* peer's receive window, if it's */
3708             if (tSize < call->twind) {  /* smaller than our send */
3709                 call->twind = tSize;    /* window, we must send less... */
3710                 call->ssthresh = MIN(call->twind, call->ssthresh);
3711             }
3712
3713             /* Only send jumbograms to 3.4a fileservers. 3.3a RX gets the
3714              * network MTU confused with the loopback MTU. Calculate the
3715              * maximum MTU here for use in the slow start code below.
3716              */
3717             maxMTU = peer->maxMTU;
3718             /* Did peer restart with older RX version? */
3719             if (peer->maxDgramPackets > 1) {
3720                 peer->maxDgramPackets = 1;
3721             }
3722         } else if (np->length >=
3723                    rx_AckDataSize(ap->nAcks) + 4 * sizeof(afs_int32)) {
3724             /* AFS 3.5 */
3725             rx_packetread(np,
3726                           rx_AckDataSize(ap->nAcks) + 2 * sizeof(afs_int32),
3727                           sizeof(afs_int32), &tSize);
3728             tSize = (afs_uint32) ntohl(tSize);
3729             /*
3730              * As of AFS 3.5 we set the send window to match the receive window.
3731              */
3732             if (tSize < call->twind) {
3733                 call->twind = tSize;
3734                 call->ssthresh = MIN(call->twind, call->ssthresh);
3735             } else if (tSize > call->twind) {
3736                 call->twind = tSize;
3737             }
3738
3739             /*
3740              * As of AFS 3.5, a jumbogram is more than one fixed size
3741              * packet transmitted in a single UDP datagram. If the remote
3742              * MTU is smaller than our local MTU then never send a datagram
3743              * larger than the natural MTU.
3744              */
3745             rx_packetread(np,
3746                           rx_AckDataSize(ap->nAcks) + 3 * sizeof(afs_int32),
3747                           sizeof(afs_int32), &tSize);
3748             maxDgramPackets = (afs_uint32) ntohl(tSize);
3749             maxDgramPackets = MIN(maxDgramPackets, rxi_nDgramPackets);
3750             maxDgramPackets =
3751                 MIN(maxDgramPackets, (int)(peer->ifDgramPackets));
3752             maxDgramPackets = MIN(maxDgramPackets, tSize);
3753             if (maxDgramPackets > 1) {
3754                 peer->maxDgramPackets = maxDgramPackets;
3755                 call->MTU = RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE;
3756             } else {
3757                 peer->maxDgramPackets = 1;
3758                 call->MTU = peer->natMTU;
3759             }
3760         } else if (peer->maxDgramPackets > 1) {
3761             /* Restarted with lower version of RX */
3762             peer->maxDgramPackets = 1;
3763         }
3764     } else if (peer->maxDgramPackets > 1
3765                || peer->maxMTU != OLD_MAX_PACKET_SIZE) {
3766         /* Restarted with lower version of RX */
3767         peer->maxMTU = OLD_MAX_PACKET_SIZE;
3768         peer->natMTU = OLD_MAX_PACKET_SIZE;
3769         peer->MTU = OLD_MAX_PACKET_SIZE;
3770         peer->maxDgramPackets = 1;
3771         peer->nDgramPackets = 1;
3772         peer->congestSeq++;
3773         call->MTU = OLD_MAX_PACKET_SIZE;
3774     }
3775
3776     if (nNacked) {
3777         /*
3778          * Calculate how many datagrams were successfully received after
3779          * the first missing packet and adjust the negative ack counter
3780          * accordingly.
3781          */
3782         call->nAcks = 0;
3783         call->nNacks++;
3784         nNacked = (nNacked + call->nDgramPackets - 1) / call->nDgramPackets;
3785         if (call->nNacks < nNacked) {
3786             call->nNacks = nNacked;
3787         }
3788     } else {
3789         if (newAckCount) {
3790             call->nAcks++;
3791         }
3792         call->nNacks = 0;
3793     }
3794
3795     if (call->flags & RX_CALL_FAST_RECOVER) {
3796         if (nNacked) {
3797             call->cwind = MIN((int)(call->cwind + 1), rx_maxSendWindow);
3798         } else {
3799             call->flags &= ~RX_CALL_FAST_RECOVER;
3800             call->cwind = call->nextCwind;
3801             call->nextCwind = 0;
3802             call->nAcks = 0;
3803         }
3804         call->nCwindAcks = 0;
3805     } else if (nNacked && call->nNacks >= (u_short) rx_nackThreshold) {
3806         /* Three negative acks in a row trigger congestion recovery */
3807 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
3808         MUTEX_EXIT(&peer->peer_lock);
3809         if (call->flags & RX_CALL_FAST_RECOVER_WAIT) {
3810             /* someone else is waiting to start recovery */
3811             return np;
3812         }
3813         call->flags |= RX_CALL_FAST_RECOVER_WAIT;
3814         while (call->flags & RX_CALL_TQ_BUSY) {
3815             call->flags |= RX_CALL_TQ_WAIT;
3816             call->tqWaiters++;
3817 #ifdef RX_ENABLE_LOCKS
3818             osirx_AssertMine(&call->lock, "rxi_Start lock2");
3819             CV_WAIT(&call->cv_tq, &call->lock);
3820 #else /* RX_ENABLE_LOCKS */
3821             osi_rxSleep(&call->tq);
3822 #endif /* RX_ENABLE_LOCKS */
3823             call->tqWaiters--;
3824             if (call->tqWaiters == 0)
3825                 call->flags &= ~RX_CALL_TQ_WAIT;
3826         }
3827         MUTEX_ENTER(&peer->peer_lock);
3828 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
3829         call->flags &= ~RX_CALL_FAST_RECOVER_WAIT;
3830         call->flags |= RX_CALL_FAST_RECOVER;
3831         call->ssthresh = MAX(4, MIN((int)call->cwind, (int)call->twind)) >> 1;
3832         call->cwind =
3833             MIN((int)(call->ssthresh + rx_nackThreshold), rx_maxSendWindow);
3834         call->nDgramPackets = MAX(2, (int)call->nDgramPackets) >> 1;
3835         call->nextCwind = call->ssthresh;
3836         call->nAcks = 0;
3837         call->nNacks = 0;
3838         peer->MTU = call->MTU;
3839         peer->cwind = call->nextCwind;
3840         peer->nDgramPackets = call->nDgramPackets;
3841         peer->congestSeq++;
3842         call->congestSeq = peer->congestSeq;
3843         /* Reset the resend times on the packets that were nacked
3844          * so we will retransmit as soon as the window permits*/
3845         for (acked = 0, queue_ScanBackwards(&call->tq, tp, nxp, rx_packet)) {
3846             if (acked) {
3847                 if (!(tp->flags & RX_PKTFLAG_ACKED)) {
3848                     clock_Zero(&tp->retryTime);
3849                 }
3850             } else if (tp->flags & RX_PKTFLAG_ACKED) {
3851                 acked = 1;
3852             }
3853         }
3854     } else {
3855         /* If cwind is smaller than ssthresh, then increase
3856          * the window one packet for each ack we receive (exponential
3857          * growth).
3858          * If cwind is greater than or equal to ssthresh then increase
3859          * the congestion window by one packet for each cwind acks we
3860          * receive (linear growth).  */
3861         if (call->cwind < call->ssthresh) {
3862             call->cwind =
3863                 MIN((int)call->ssthresh, (int)(call->cwind + newAckCount));
3864             call->nCwindAcks = 0;
3865         } else {
3866             call->nCwindAcks += newAckCount;
3867             if (call->nCwindAcks >= call->cwind) {
3868                 call->nCwindAcks = 0;
3869                 call->cwind = MIN((int)(call->cwind + 1), rx_maxSendWindow);
3870             }
3871         }
3872         /*
3873          * If we have received several acknowledgements in a row then
3874          * it is time to increase the size of our datagrams
3875          */
3876         if ((int)call->nAcks > rx_nDgramThreshold) {
3877             if (peer->maxDgramPackets > 1) {
3878                 if (call->nDgramPackets < peer->maxDgramPackets) {
3879                     call->nDgramPackets++;
3880                 }
3881                 call->MTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE;
3882             } else if (call->MTU < peer->maxMTU) {
3883                 call->MTU += peer->natMTU;
3884                 call->MTU = MIN(call->MTU, peer->maxMTU);
3885             }
3886             call->nAcks = 0;
3887         }
3888     }
3889
3890     MUTEX_EXIT(&peer->peer_lock);       /* rxi_Start will lock peer. */
3891
3892     /* Servers need to hold the call until all response packets have
3893      * been acknowledged. Soft acks are good enough since clients
3894      * are not allowed to clear their receive queues. */
3895     if (call->state == RX_STATE_HOLD
3896         && call->tfirst + call->nSoftAcked >= call->tnext) {
3897         call->state = RX_STATE_DALLY;
3898         rxi_ClearTransmitQueue(call, 0);
3899     } else if (!queue_IsEmpty(&call->tq)) {
3900         rxi_Start(0, call, 0, istack);
3901     }
3902     return np;
3903 }
3904
3905 /* Received a response to a challenge packet */
3906 struct rx_packet *
3907 rxi_ReceiveResponsePacket(register struct rx_connection *conn,
3908                           register struct rx_packet *np, int istack)
3909 {
3910     int error;
3911
3912     /* Ignore the packet if we're the client */
3913     if (conn->type == RX_CLIENT_CONNECTION)
3914         return np;
3915
3916     /* If already authenticated, ignore the packet (it's probably a retry) */
3917     if (RXS_CheckAuthentication(conn->securityObject, conn) == 0)
3918         return np;
3919
3920     /* Otherwise, have the security object evaluate the response packet */
3921     error = RXS_CheckResponse(conn->securityObject, conn, np);
3922     if (error) {
3923         /* If the response is invalid, reset the connection, sending
3924          * an abort to the peer */
3925 #ifndef KERNEL
3926         rxi_Delay(1);
3927 #endif
3928         rxi_ConnectionError(conn, error);
3929         MUTEX_ENTER(&conn->conn_data_lock);
3930         np = rxi_SendConnectionAbort(conn, np, istack, 0);
3931         MUTEX_EXIT(&conn->conn_data_lock);
3932         return np;
3933     } else {
3934         /* If the response is valid, any calls waiting to attach
3935          * servers can now do so */
3936         int i;
3937
3938         for (i = 0; i < RX_MAXCALLS; i++) {
3939             struct rx_call *call = conn->call[i];
3940             if (call) {
3941                 MUTEX_ENTER(&call->lock);
3942                 if (call->state == RX_STATE_PRECALL)
3943                     rxi_AttachServerProc(call, (osi_socket) - 1, NULL, NULL);
3944                 /* tnop can be null if newcallp is null */
3945                 MUTEX_EXIT(&call->lock);
3946             }
3947         }
3948
3949         /* Update the peer reachability information, just in case
3950          * some calls went into attach-wait while we were waiting
3951          * for authentication..
3952          */
3953         rxi_UpdatePeerReach(conn, NULL);
3954     }
3955     return np;
3956 }
3957
3958 /* A client has received an authentication challenge: the security
3959  * object is asked to cough up a respectable response packet to send
3960  * back to the server.  The server is responsible for retrying the
3961  * challenge if it fails to get a response. */
3962
3963 struct rx_packet *
3964 rxi_ReceiveChallengePacket(register struct rx_connection *conn,
3965                            register struct rx_packet *np, int istack)
3966 {
3967     int error;
3968
3969     /* Ignore the challenge if we're the server */
3970     if (conn->type == RX_SERVER_CONNECTION)
3971         return np;
3972
3973     /* Ignore the challenge if the connection is otherwise idle; someone's
3974      * trying to use us as an oracle. */
3975     if (!rxi_HasActiveCalls(conn))
3976         return np;
3977
3978     /* Send the security object the challenge packet.  It is expected to fill
3979      * in the response. */
3980     error = RXS_GetResponse(conn->securityObject, conn, np);
3981
3982     /* If the security object is unable to return a valid response, reset the
3983      * connection and send an abort to the peer.  Otherwise send the response
3984      * packet to the peer connection. */
3985     if (error) {
3986         rxi_ConnectionError(conn, error);
3987         MUTEX_ENTER(&conn->conn_data_lock);
3988         np = rxi_SendConnectionAbort(conn, np, istack, 0);
3989         MUTEX_EXIT(&conn->conn_data_lock);
3990     } else {
3991         np = rxi_SendSpecial((struct rx_call *)0, conn, np,
3992                              RX_PACKET_TYPE_RESPONSE, NULL, -1, istack);
3993     }
3994     return np;
3995 }
3996
3997
3998 /* Find an available server process to service the current request in
3999  * the given call structure.  If one isn't available, queue up this
4000  * call so it eventually gets one */
4001 void
4002 rxi_AttachServerProc(register struct rx_call *call,
4003                      register osi_socket socket, register int *tnop,
4004                      register struct rx_call **newcallp)
4005 {
4006     register struct rx_serverQueueEntry *sq;
4007     register struct rx_service *service = call->conn->service;
4008     register int haveQuota = 0;
4009
4010     /* May already be attached */
4011     if (call->state == RX_STATE_ACTIVE)
4012         return;
4013
4014     MUTEX_ENTER(&rx_serverPool_lock);
4015
4016     haveQuota = QuotaOK(service);
4017     if ((!haveQuota) || queue_IsEmpty(&rx_idleServerQueue)) {
4018         /* If there are no processes available to service this call,
4019          * put the call on the incoming call queue (unless it's
4020          * already on the queue).
4021          */
4022 #ifdef RX_ENABLE_LOCKS
4023         if (haveQuota)
4024             ReturnToServerPool(service);
4025 #endif /* RX_ENABLE_LOCKS */
4026
4027         if (!(call->flags & RX_CALL_WAIT_PROC)) {
4028             call->flags |= RX_CALL_WAIT_PROC;
4029             MUTEX_ENTER(&rx_stats_mutex);
4030             rx_nWaiting++;
4031             rx_nWaited++;
4032             MUTEX_EXIT(&rx_stats_mutex);
4033             rxi_calltrace(RX_CALL_ARRIVAL, call);
4034             SET_CALL_QUEUE_LOCK(call, &rx_serverPool_lock);
4035             queue_Append(&rx_incomingCallQueue, call);
4036         }
4037     } else {
4038         sq = queue_First(&rx_idleServerQueue, rx_serverQueueEntry);
4039
4040         /* If hot threads are enabled, and both newcallp and sq->socketp
4041          * are non-null, then this thread will process the call, and the
4042          * idle server thread will start listening on this threads socket.
4043          */
4044         queue_Remove(sq);
4045         if (rx_enable_hot_thread && newcallp && sq->socketp) {
4046             *newcallp = call;
4047             *tnop = sq->tno;
4048             *sq->socketp = socket;
4049             clock_GetTime(&call->startTime);
4050             CALL_HOLD(call, RX_CALL_REFCOUNT_BEGIN);
4051         } else {
4052             sq->newcall = call;
4053         }
4054         if (call->flags & RX_CALL_WAIT_PROC) {
4055             /* Conservative:  I don't think this should happen */
4056             call->flags &= ~RX_CALL_WAIT_PROC;
4057             if (queue_IsOnQueue(call)) {
4058                 queue_Remove(call);
4059                 MUTEX_ENTER(&rx_stats_mutex);
4060                 rx_nWaiting--;
4061                 MUTEX_EXIT(&rx_stats_mutex);
4062             }
4063         }
4064         call->state = RX_STATE_ACTIVE;
4065         call->mode = RX_MODE_RECEIVING;
4066 #ifdef RX_KERNEL_TRACE
4067         {
4068             int glockOwner = ISAFS_GLOCK();
4069             if (!glockOwner)
4070                 AFS_GLOCK();
4071             afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
4072                        __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER,
4073                        call);
4074             if (!glockOwner)
4075                 AFS_GUNLOCK();
4076         }
4077 #endif
4078         if (call->flags & RX_CALL_CLEARED) {
4079             /* send an ack now to start the packet flow up again */
4080             call->flags &= ~RX_CALL_CLEARED;
4081             rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
4082         }
4083 #ifdef  RX_ENABLE_LOCKS
4084         CV_SIGNAL(&sq->cv);
4085 #else
4086         service->nRequestsRunning++;
4087         if (service->nRequestsRunning <= service->minProcs)
4088             rxi_minDeficit--;
4089         rxi_availProcs--;
4090         osi_rxWakeup(sq);
4091 #endif
4092     }
4093     MUTEX_EXIT(&rx_serverPool_lock);
4094 }
4095
4096 /* Delay the sending of an acknowledge event for a short while, while
4097  * a new call is being prepared (in the case of a client) or a reply
4098  * is being prepared (in the case of a server).  Rather than sending
4099  * an ack packet, an ACKALL packet is sent. */
4100 void
4101 rxi_AckAll(struct rxevent *event, register struct rx_call *call, char *dummy)
4102 {
4103 #ifdef RX_ENABLE_LOCKS
4104     if (event) {
4105         MUTEX_ENTER(&call->lock);
4106         call->delayedAckEvent = NULL;
4107         CALL_RELE(call, RX_CALL_REFCOUNT_ACKALL);
4108     }
4109     rxi_SendSpecial(call, call->conn, (struct rx_packet *)0,
4110                     RX_PACKET_TYPE_ACKALL, NULL, 0, 0);
4111     if (event)
4112         MUTEX_EXIT(&call->lock);
4113 #else /* RX_ENABLE_LOCKS */
4114     if (event)
4115         call->delayedAckEvent = NULL;
4116     rxi_SendSpecial(call, call->conn, (struct rx_packet *)0,
4117                     RX_PACKET_TYPE_ACKALL, NULL, 0, 0);
4118 #endif /* RX_ENABLE_LOCKS */
4119 }
4120
4121 void
4122 rxi_SendDelayedAck(struct rxevent *event, register struct rx_call *call,
4123                    char *dummy)
4124 {
4125 #ifdef RX_ENABLE_LOCKS
4126     if (event) {
4127         MUTEX_ENTER(&call->lock);
4128         if (event == call->delayedAckEvent)
4129             call->delayedAckEvent = NULL;
4130         CALL_RELE(call, RX_CALL_REFCOUNT_DELAY);
4131     }
4132     (void)rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
4133     if (event)
4134         MUTEX_EXIT(&call->lock);
4135 #else /* RX_ENABLE_LOCKS */
4136     if (event)
4137         call->delayedAckEvent = NULL;
4138     (void)rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
4139 #endif /* RX_ENABLE_LOCKS */
4140 }
4141
4142
4143 #ifdef RX_ENABLE_LOCKS
4144 /* Set ack in all packets in transmit queue. rxi_Start will deal with
4145  * clearing them out.
4146  */
4147 static void
4148 rxi_SetAcksInTransmitQueue(register struct rx_call *call)
4149 {
4150     register struct rx_packet *p, *tp;
4151     int someAcked = 0;
4152
4153     for (queue_Scan(&call->tq, p, tp, rx_packet)) {
4154         p->flags |= RX_PKTFLAG_ACKED;
4155         someAcked = 1;
4156     }
4157     if (someAcked) {
4158         call->flags |= RX_CALL_TQ_CLEARME;
4159         call->flags |= RX_CALL_TQ_SOME_ACKED;
4160     }
4161
4162     rxevent_Cancel(call->resendEvent, call, RX_CALL_REFCOUNT_RESEND);
4163     rxevent_Cancel(call->keepAliveEvent, call, RX_CALL_REFCOUNT_ALIVE);
4164     call->tfirst = call->tnext;
4165     call->nSoftAcked = 0;
4166
4167     if (call->flags & RX_CALL_FAST_RECOVER) {
4168         call->flags &= ~RX_CALL_FAST_RECOVER;
4169         call->cwind = call->nextCwind;
4170         call->nextCwind = 0;
4171     }
4172
4173     CV_SIGNAL(&call->cv_twind);
4174 }
4175 #endif /* RX_ENABLE_LOCKS */
4176
4177 /* Clear out the transmit queue for the current call (all packets have
4178  * been received by peer) */
4179 void
4180 rxi_ClearTransmitQueue(register struct rx_call *call, register int force)
4181 {
4182     register struct rx_packet *p, *tp;
4183
4184 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
4185     if (!force && (call->flags & RX_CALL_TQ_BUSY)) {
4186         int someAcked = 0;
4187         for (queue_Scan(&call->tq, p, tp, rx_packet)) {
4188             p->flags |= RX_PKTFLAG_ACKED;
4189             someAcked = 1;
4190         }
4191         if (someAcked) {
4192             call->flags |= RX_CALL_TQ_CLEARME;
4193             call->flags |= RX_CALL_TQ_SOME_ACKED;
4194         }
4195     } else {
4196 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
4197         rxi_FreePackets(0, &call->tq);
4198 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
4199         call->flags &= ~RX_CALL_TQ_CLEARME;
4200     }
4201 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
4202
4203     rxevent_Cancel(call->resendEvent, call, RX_CALL_REFCOUNT_RESEND);
4204     rxevent_Cancel(call->keepAliveEvent, call, RX_CALL_REFCOUNT_ALIVE);
4205     call->tfirst = call->tnext; /* implicitly acknowledge all data already sent */
4206     call->nSoftAcked = 0;
4207
4208     if (call->flags & RX_CALL_FAST_RECOVER) {
4209         call->flags &= ~RX_CALL_FAST_RECOVER;
4210         call->cwind = call->nextCwind;
4211     }
4212 #ifdef  RX_ENABLE_LOCKS
4213     CV_SIGNAL(&call->cv_twind);
4214 #else
4215     osi_rxWakeup(&call->twind);
4216 #endif
4217 }
4218
4219 void
4220 rxi_ClearReceiveQueue(register struct rx_call *call)
4221 {
4222     if (queue_IsNotEmpty(&call->rq)) {
4223         rx_packetReclaims += rxi_FreePackets(0, &call->rq);
4224         call->flags &= ~(RX_CALL_RECEIVE_DONE | RX_CALL_HAVE_LAST);
4225     }
4226     if (call->state == RX_STATE_PRECALL) {
4227         call->flags |= RX_CALL_CLEARED;
4228     }
4229 }
4230
4231 /* Send an abort packet for the specified call */
4232 struct rx_packet *
4233 rxi_SendCallAbort(register struct rx_call *call, struct rx_packet *packet,
4234                   int istack, int force)
4235 {
4236     afs_int32 error;
4237     struct clock when;
4238
4239     if (!call->error)
4240         return packet;
4241
4242     /* Clients should never delay abort messages */
4243     if (rx_IsClientConn(call->conn))
4244         force = 1;
4245
4246     if (call->abortCode != call->error) {
4247         call->abortCode = call->error;
4248         call->abortCount = 0;
4249     }
4250
4251     if (force || rxi_callAbortThreshhold == 0
4252         || call->abortCount < rxi_callAbortThreshhold) {
4253         if (call->delayedAbortEvent) {
4254             rxevent_Cancel(call->delayedAbortEvent, call,
4255                            RX_CALL_REFCOUNT_ABORT);
4256         }
4257         error = htonl(call->error);
4258         call->abortCount++;
4259         packet =
4260             rxi_SendSpecial(call, call->conn, packet, RX_PACKET_TYPE_ABORT,
4261                             (char *)&error, sizeof(error), istack);
4262     } else if (!call->delayedAbortEvent) {
4263         clock_GetTime(&when);
4264         clock_Addmsec(&when, rxi_callAbortDelay);
4265         CALL_HOLD(call, RX_CALL_REFCOUNT_ABORT);
4266         call->delayedAbortEvent =
4267             rxevent_Post(&when, rxi_SendDelayedCallAbort, call, 0);
4268     }
4269     return packet;
4270 }
4271
4272 /* Send an abort packet for the specified connection.  Packet is an
4273  * optional pointer to a packet that can be used to send the abort.
4274  * Once the number of abort messages reaches the threshhold, an
4275  * event is scheduled to send the abort. Setting the force flag
4276  * overrides sending delayed abort messages.
4277  *
4278  * NOTE: Called with conn_data_lock held. conn_data_lock is dropped
4279  *       to send the abort packet.
4280  */
4281 struct rx_packet *
4282 rxi_SendConnectionAbort(register struct rx_connection *conn,
4283                         struct rx_packet *packet, int istack, int force)
4284 {
4285     afs_int32 error;
4286     struct clock when;
4287
4288     if (!conn->error)
4289         return packet;
4290
4291     /* Clients should never delay abort messages */
4292     if (rx_IsClientConn(conn))
4293         force = 1;
4294
4295     if (force || rxi_connAbortThreshhold == 0
4296         || conn->abortCount < rxi_connAbortThreshhold) {
4297         if (conn->delayedAbortEvent) {
4298             rxevent_Cancel(conn->delayedAbortEvent, (struct rx_call *)0, 0);
4299         }
4300         error = htonl(conn->error);
4301         conn->abortCount++;
4302         MUTEX_EXIT(&conn->conn_data_lock);
4303         packet =
4304             rxi_SendSpecial((struct rx_call *)0, conn, packet,
4305                             RX_PACKET_TYPE_ABORT, (char *)&error,
4306                             sizeof(error), istack);
4307         MUTEX_ENTER(&conn->conn_data_lock);
4308     } else if (!conn->delayedAbortEvent) {
4309         clock_GetTime(&when);
4310         clock_Addmsec(&when, rxi_connAbortDelay);
4311         conn->delayedAbortEvent =
4312             rxevent_Post(&when, rxi_SendDelayedConnAbort, conn, 0);
4313     }
4314     return packet;
4315 }
4316
4317 /* Associate an error all of the calls owned by a connection.  Called
4318  * with error non-zero.  This is only for really fatal things, like
4319  * bad authentication responses.  The connection itself is set in
4320  * error at this point, so that future packets received will be
4321  * rejected. */
4322 void
4323 rxi_ConnectionError(register struct rx_connection *conn,
4324                     register afs_int32 error)
4325 {
4326     if (error) {
4327         register int i;
4328         MUTEX_ENTER(&conn->conn_data_lock);
4329         if (conn->challengeEvent)
4330             rxevent_Cancel(conn->challengeEvent, (struct rx_call *)0, 0);
4331         if (conn->checkReachEvent) {
4332             rxevent_Cancel(conn->checkReachEvent, (struct rx_call *)0, 0);
4333             conn->checkReachEvent = 0;
4334             conn->flags &= ~RX_CONN_ATTACHWAIT;
4335             conn->refCount--;
4336         }
4337         MUTEX_EXIT(&conn->conn_data_lock);
4338         for (i = 0; i < RX_MAXCALLS; i++) {
4339             struct rx_call *call = conn->call[i];
4340             if (call) {
4341                 MUTEX_ENTER(&call->lock);
4342                 rxi_CallError(call, error);
4343                 MUTEX_EXIT(&call->lock);
4344             }
4345         }
4346         conn->error = error;
4347         MUTEX_ENTER(&rx_stats_mutex);
4348         rx_stats.fatalErrors++;
4349         MUTEX_EXIT(&rx_stats_mutex);
4350     }
4351 }
4352
4353 void
4354 rxi_CallError(register struct rx_call *call, afs_int32 error)
4355 {
4356     if (call->error)
4357         error = call->error;
4358 #ifdef RX_GLOBAL_RXLOCK_KERNEL
4359     if (!((call->flags & RX_CALL_TQ_BUSY) || (call->tqWaiters > 0))) {
4360         rxi_ResetCall(call, 0);
4361     }
4362 #else
4363     rxi_ResetCall(call, 0);
4364 #endif
4365     call->error = error;
4366     call->mode = RX_MODE_ERROR;
4367 }
4368
4369 /* Reset various fields in a call structure, and wakeup waiting
4370  * processes.  Some fields aren't changed: state & mode are not
4371  * touched (these must be set by the caller), and bufptr, nLeft, and
4372  * nFree are not reset, since these fields are manipulated by
4373  * unprotected macros, and may only be reset by non-interrupting code.
4374  */
4375 #ifdef ADAPT_WINDOW
4376 /* this code requires that call->conn be set properly as a pre-condition. */
4377 #endif /* ADAPT_WINDOW */
4378
4379 void
4380 rxi_ResetCall(register struct rx_call *call, register int newcall)
4381 {
4382     register int flags;
4383     register struct rx_peer *peer;
4384     struct rx_packet *packet;
4385
4386     /* Notify anyone who is waiting for asynchronous packet arrival */
4387     if (call->arrivalProc) {
4388         (*call->arrivalProc) (call, call->arrivalProcHandle,
4389                               call->arrivalProcArg);
4390         call->arrivalProc = (void (*)())0;
4391     }
4392
4393     if (call->delayedAbortEvent) {
4394         rxevent_Cancel(call->delayedAbortEvent, call, RX_CALL_REFCOUNT_ABORT);
4395         packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
4396         if (packet) {
4397             rxi_SendCallAbort(call, packet, 0, 1);
4398             rxi_FreePacket(packet);
4399         }
4400     }
4401
4402     /*
4403      * Update the peer with the congestion information in this call
4404      * so other calls on this connection can pick up where this call
4405      * left off. If the congestion sequence numbers don't match then
4406      * another call experienced a retransmission.
4407      */
4408     peer = call->conn->peer;
4409     MUTEX_ENTER(&peer->peer_lock);
4410     if (!newcall) {
4411         if (call->congestSeq == peer->congestSeq) {
4412             peer->cwind = MAX(peer->cwind, call->cwind);
4413             peer->MTU = MAX(peer->MTU, call->MTU);
4414             peer->nDgramPackets =
4415                 MAX(peer->nDgramPackets, call->nDgramPackets);
4416         }
4417     } else {
4418         call->abortCode = 0;
4419         call->abortCount = 0;
4420     }
4421     if (peer->maxDgramPackets > 1) {
4422         call->MTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE;
4423     } else {
4424         call->MTU = peer->MTU;
4425     }
4426     call->cwind = MIN((int)peer->cwind, (int)peer->nDgramPackets);
4427     call->ssthresh = rx_maxSendWindow;
4428     call->nDgramPackets = peer->nDgramPackets;
4429     call->congestSeq = peer->congestSeq;
4430     MUTEX_EXIT(&peer->peer_lock);
4431
4432     flags = call->flags;
4433     rxi_ClearReceiveQueue(call);
4434 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
4435     if (flags & RX_CALL_TQ_BUSY) {
4436         call->flags = RX_CALL_TQ_CLEARME | RX_CALL_TQ_BUSY;
4437         call->flags |= (flags & RX_CALL_TQ_WAIT);
4438     } else
4439 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
4440     {
4441         rxi_ClearTransmitQueue(call, 0);
4442         queue_Init(&call->tq);
4443         if (call->tqWaiters || (flags & RX_CALL_TQ_WAIT)) {
4444             dpf(("rcall %x has %d waiters and flags %d\n", call, call->tqWaiters, call->flags));
4445         }
4446         call->flags = 0;
4447         while (call->tqWaiters) {
4448 #ifdef RX_ENABLE_LOCKS
4449             CV_BROADCAST(&call->cv_tq);
4450 #else /* RX_ENABLE_LOCKS */
4451             osi_rxWakeup(&call->tq);
4452 #endif /* RX_ENABLE_LOCKS */
4453             call->tqWaiters--;
4454         }
4455     }
4456     queue_Init(&call->rq);
4457     call->error = 0;
4458     call->rwind = rx_initReceiveWindow;
4459     call->twind = rx_initSendWindow;
4460     call->nSoftAcked = 0;
4461     call->nextCwind = 0;
4462     call->nAcks = 0;
4463     call->nNacks = 0;
4464     call->nCwindAcks = 0;
4465     call->nSoftAcks = 0;
4466     call->nHardAcks = 0;
4467
4468     call->tfirst = call->rnext = call->tnext = 1;
4469     call->rprev = 0;
4470     call->lastAcked = 0;
4471     call->localStatus = call->remoteStatus = 0;
4472
4473     if (flags & RX_CALL_READER_WAIT) {
4474 #ifdef  RX_ENABLE_LOCKS
4475         CV_BROADCAST(&call->cv_rq);
4476 #else
4477         osi_rxWakeup(&call->rq);
4478 #endif
4479     }
4480     if (flags & RX_CALL_WAIT_PACKETS) {
4481         MUTEX_ENTER(&rx_freePktQ_lock);
4482         rxi_PacketsUnWait();    /* XXX */
4483         MUTEX_EXIT(&rx_freePktQ_lock);
4484     }
4485 #ifdef  RX_ENABLE_LOCKS
4486     CV_SIGNAL(&call->cv_twind);
4487 #else
4488     if (flags & RX_CALL_WAIT_WINDOW_ALLOC)
4489         osi_rxWakeup(&call->twind);
4490 #endif
4491
4492 #ifdef RX_ENABLE_LOCKS
4493     /* The following ensures that we don't mess with any queue while some
4494      * other thread might also be doing so. The call_queue_lock field is
4495      * is only modified under the call lock. If the call is in the process
4496      * of being removed from a queue, the call is not locked until the
4497      * the queue lock is dropped and only then is the call_queue_lock field
4498      * zero'd out. So it's safe to lock the queue if call_queue_lock is set.
4499      * Note that any other routine which removes a call from a queue has to
4500      * obtain the queue lock before examing the queue and removing the call.
4501      */
4502     if (call->call_queue_lock) {
4503         MUTEX_ENTER(call->call_queue_lock);
4504         if (queue_IsOnQueue(call)) {
4505             queue_Remove(call);
4506             if (flags & RX_CALL_WAIT_PROC) {
4507                 MUTEX_ENTER(&rx_stats_mutex);
4508                 rx_nWaiting--;
4509                 MUTEX_EXIT(&rx_stats_mutex);
4510             }
4511         }
4512         MUTEX_EXIT(call->call_queue_lock);
4513         CLEAR_CALL_QUEUE_LOCK(call);
4514     }
4515 #else /* RX_ENABLE_LOCKS */
4516     if (queue_IsOnQueue(call)) {
4517         queue_Remove(call);
4518         if (flags & RX_CALL_WAIT_PROC)
4519             rx_nWaiting--;
4520     }
4521 #endif /* RX_ENABLE_LOCKS */
4522
4523     rxi_KeepAliveOff(call);
4524     rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
4525 }
4526
4527 /* Send an acknowledge for the indicated packet (seq,serial) of the
4528  * indicated call, for the indicated reason (reason).  This
4529  * acknowledge will specifically acknowledge receiving the packet, and
4530  * will also specify which other packets for this call have been
4531  * received.  This routine returns the packet that was used to the
4532  * caller.  The caller is responsible for freeing it or re-using it.
4533  * This acknowledgement also returns the highest sequence number
4534  * actually read out by the higher level to the sender; the sender
4535  * promises to keep around packets that have not been read by the
4536  * higher level yet (unless, of course, the sender decides to abort
4537  * the call altogether).  Any of p, seq, serial, pflags, or reason may
4538  * be set to zero without ill effect.  That is, if they are zero, they
4539  * will not convey any information.
4540  * NOW there is a trailer field, after the ack where it will safely be
4541  * ignored by mundanes, which indicates the maximum size packet this
4542  * host can swallow.  */
4543 /*
4544     register struct rx_packet *optionalPacket;  use to send ack (or null)
4545     int seq;                     Sequence number of the packet we are acking
4546     int serial;                  Serial number of the packet
4547     int pflags;                  Flags field from packet header
4548     int reason;                  Reason an acknowledge was prompted
4549 */
4550
4551 struct rx_packet *
4552 rxi_SendAck(register struct rx_call *call,
4553             register struct rx_packet *optionalPacket, int serial, int reason,
4554             int istack)
4555 {
4556     struct rx_ackPacket *ap;
4557     register struct rx_packet *rqp;
4558     register struct rx_packet *nxp;     /* For queue_Scan */
4559     register struct rx_packet *p;
4560     u_char offset;
4561     afs_int32 templ;
4562 #ifdef RX_ENABLE_TSFPQ
4563     struct rx_ts_info_t * rx_ts_info;
4564 #endif
4565
4566     /*
4567      * Open the receive window once a thread starts reading packets
4568      */
4569     if (call->rnext > 1) {
4570         call->rwind = rx_maxReceiveWindow;
4571     }
4572
4573     call->nHardAcks = 0;
4574     call->nSoftAcks = 0;
4575     if (call->rnext > call->lastAcked)
4576         call->lastAcked = call->rnext;
4577     p = optionalPacket;
4578
4579     if (p) {
4580         rx_computelen(p, p->length);    /* reset length, you never know */
4581     } /* where that's been...         */
4582 #ifdef RX_ENABLE_TSFPQ
4583     else {
4584         RX_TS_INFO_GET(rx_ts_info);
4585         if ((p = rx_ts_info->local_special_packet)) {
4586             rx_computelen(p, p->length);
4587         } else if ((p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL))) {
4588             rx_ts_info->local_special_packet = p;
4589         } else { /* We won't send the ack, but don't panic. */
4590             return optionalPacket;
4591         }
4592     }
4593 #else
4594     else if (!(p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL))) {
4595         /* We won't send the ack, but don't panic. */
4596         return optionalPacket;
4597     }
4598 #endif
4599
4600     templ =
4601         rx_AckDataSize(call->rwind) + 4 * sizeof(afs_int32) -
4602         rx_GetDataSize(p);
4603     if (templ > 0) {
4604         if (rxi_AllocDataBuf(p, templ, RX_PACKET_CLASS_SPECIAL) > 0) {
4605 #ifndef RX_ENABLE_TSFPQ
4606             if (!optionalPacket)
4607                 rxi_FreePacket(p);
4608 #endif
4609             return optionalPacket;
4610         }
4611         templ = rx_AckDataSize(call->rwind) + 2 * sizeof(afs_int32);
4612         if (rx_Contiguous(p) < templ) {
4613 #ifndef RX_ENABLE_TSFPQ
4614             if (!optionalPacket)
4615                 rxi_FreePacket(p);
4616 #endif
4617             return optionalPacket;
4618         }
4619     }
4620
4621
4622     /* MTUXXX failing to send an ack is very serious.  We should */
4623     /* try as hard as possible to send even a partial ack; it's */
4624     /* better than nothing. */
4625     ap = (struct rx_ackPacket *)rx_DataOf(p);
4626     ap->bufferSpace = htonl(0); /* Something should go here, sometime */
4627     ap->reason = reason;
4628
4629     /* The skew computation used to be bogus, I think it's better now. */
4630     /* We should start paying attention to skew.    XXX  */
4631     ap->serial = htonl(serial);
4632     ap->maxSkew = 0;            /* used to be peer->inPacketSkew */
4633
4634     ap->firstPacket = htonl(call->rnext);       /* First packet not yet forwarded to reader */
4635     ap->previousPacket = htonl(call->rprev);    /* Previous packet received */
4636
4637     /* No fear of running out of ack packet here because there can only be at most
4638      * one window full of unacknowledged packets.  The window size must be constrained
4639      * to be less than the maximum ack size, of course.  Also, an ack should always
4640      * fit into a single packet -- it should not ever be fragmented.  */
4641     for (offset = 0, queue_Scan(&call->rq, rqp, nxp, rx_packet)) {
4642         if (!rqp || !call->rq.next
4643             || (rqp->header.seq > (call->rnext + call->rwind))) {
4644 #ifndef RX_ENABLE_TSFPQ
4645             if (!optionalPacket)
4646                 rxi_FreePacket(p);
4647 #endif
4648             rxi_CallError(call, RX_CALL_DEAD);
4649             return optionalPacket;
4650         }
4651
4652         while (rqp->header.seq > call->rnext + offset)
4653             ap->acks[offset++] = RX_ACK_TYPE_NACK;
4654         ap->acks[offset++] = RX_ACK_TYPE_ACK;
4655
4656         if ((offset > (u_char) rx_maxReceiveWindow) || (offset > call->rwind)) {
4657 #ifndef RX_ENABLE_TSFPQ
4658             if (!optionalPacket)
4659                 rxi_FreePacket(p);
4660 #endif
4661             rxi_CallError(call, RX_CALL_DEAD);
4662             return optionalPacket;
4663         }
4664     }
4665
4666     ap->nAcks = offset;
4667     p->length = rx_AckDataSize(offset) + 4 * sizeof(afs_int32);
4668
4669     /* these are new for AFS 3.3 */
4670     templ = rxi_AdjustMaxMTU(call->conn->peer->ifMTU, rx_maxReceiveSize);
4671     templ = htonl(templ);
4672     rx_packetwrite(p, rx_AckDataSize(offset), sizeof(afs_int32), &templ);
4673     templ = htonl(call->conn->peer->ifMTU);
4674     rx_packetwrite(p, rx_AckDataSize(offset) + sizeof(afs_int32),
4675                    sizeof(afs_int32), &templ);
4676
4677     /* new for AFS 3.4 */
4678     templ = htonl(call->rwind);
4679     rx_packetwrite(p, rx_AckDataSize(offset) + 2 * sizeof(afs_int32),
4680                    sizeof(afs_int32), &templ);
4681
4682     /* new for AFS 3.5 */
4683     templ = htonl(call->conn->peer->ifDgramPackets);
4684     rx_packetwrite(p, rx_AckDataSize(offset) + 3 * sizeof(afs_int32),
4685                    sizeof(afs_int32), &templ);
4686
4687     p->header.serviceId = call->conn->serviceId;
4688     p->header.cid = (call->conn->cid | call->channel);
4689     p->header.callNumber = *call->callNumber;
4690     p->header.seq = 0;
4691     p->header.securityIndex = call->conn->securityIndex;
4692     p->header.epoch = call->conn->epoch;
4693     p->header.type = RX_PACKET_TYPE_ACK;
4694     p->header.flags = RX_SLOW_START_OK;
4695     if (reason == RX_ACK_PING) {
4696         p->header.flags |= RX_REQUEST_ACK;
4697 #ifdef ADAPT_WINDOW
4698         clock_GetTime(&call->pingRequestTime);
4699 #endif
4700     }
4701     if (call->conn->type == RX_CLIENT_CONNECTION)
4702         p->header.flags |= RX_CLIENT_INITIATED;
4703
4704 #ifdef RXDEBUG
4705     if (rx_Log) {
4706         fprintf(rx_Log, "SACK: reason %x previous %u seq %u first %u",
4707                 ap->reason, ntohl(ap->previousPacket),
4708                 (unsigned int)p->header.seq, ntohl(ap->firstPacket));
4709         if (ap->nAcks) {
4710             for (offset = 0; offset < ap->nAcks; offset++)
4711                 putc(ap->acks[offset] == RX_ACK_TYPE_NACK ? '-' : '*',
4712                      rx_Log);
4713         }
4714         putc('\n', rx_Log);
4715     }
4716 #endif
4717
4718     {
4719         register int i, nbytes = p->length;
4720
4721         for (i = 1; i < p->niovecs; i++) {      /* vec 0 is ALWAYS header */
4722             if (nbytes <= p->wirevec[i].iov_len) {
4723                 register int savelen, saven;
4724
4725                 savelen = p->wirevec[i].iov_len;
4726                 saven = p->niovecs;
4727                 p->wirevec[i].iov_len = nbytes;
4728                 p->niovecs = i + 1;
4729                 rxi_Send(call, p, istack);
4730                 p->wirevec[i].iov_len = savelen;
4731                 p->niovecs = saven;
4732                 break;
4733             } else
4734                 nbytes -= p->wirevec[i].iov_len;
4735         }
4736     }
4737     MUTEX_ENTER(&rx_stats_mutex);
4738     rx_stats.ackPacketsSent++;
4739     MUTEX_EXIT(&rx_stats_mutex);
4740 #ifndef RX_ENABLE_TSFPQ
4741     if (!optionalPacket)
4742         rxi_FreePacket(p);
4743 #endif
4744     return optionalPacket;      /* Return packet for re-use by caller */
4745 }
4746
4747 /* Send all of the packets in the list in single datagram */
4748 static void
4749 rxi_SendList(struct rx_call *call, struct rx_packet **list, int len,
4750              int istack, int moreFlag, struct clock *now,
4751              struct clock *retryTime, int resending)
4752 {
4753     int i;
4754     int requestAck = 0;
4755     int lastPacket = 0;
4756     struct rx_connection *conn = call->conn;
4757     struct rx_peer *peer = conn->peer;
4758
4759     MUTEX_ENTER(&peer->peer_lock);
4760     peer->nSent += len;
4761     if (resending)
4762         peer->reSends += len;
4763     MUTEX_ENTER(&rx_stats_mutex);
4764     rx_stats.dataPacketsSent += len;
4765     MUTEX_EXIT(&rx_stats_mutex);
4766     MUTEX_EXIT(&peer->peer_lock);
4767
4768     if (list[len - 1]->header.flags & RX_LAST_PACKET) {
4769         lastPacket = 1;
4770     }
4771
4772     /* Set the packet flags and schedule the resend events */
4773     /* Only request an ack for the last packet in the list */
4774     for (i = 0; i < len; i++) {
4775         list[i]->retryTime = *retryTime;
4776         if (list[i]->header.serial) {
4777             /* Exponentially backoff retry times */
4778             if (list[i]->backoff < MAXBACKOFF) {
4779                 /* so it can't stay == 0 */
4780                 list[i]->backoff = (list[i]->backoff << 1) + 1;
4781             } else
4782                 list[i]->backoff++;
4783             clock_Addmsec(&(list[i]->retryTime),
4784                           ((afs_uint32) list[i]->backoff) << 8);
4785         }
4786
4787         /* Wait a little extra for the ack on the last packet */
4788         if (lastPacket && !(list[i]->header.flags & RX_CLIENT_INITIATED)) {
4789             clock_Addmsec(&(list[i]->retryTime), 400);
4790         }
4791
4792         /* Record the time sent */
4793         list[i]->timeSent = *now;
4794
4795         /* Ask for an ack on retransmitted packets,  on every other packet
4796          * if the peer doesn't support slow start. Ask for an ack on every
4797          * packet until the congestion window reaches the ack rate. */
4798         if (list[i]->header.serial) {
4799             requestAck = 1;
4800             MUTEX_ENTER(&rx_stats_mutex);
4801             rx_stats.dataPacketsReSent++;
4802             MUTEX_EXIT(&rx_stats_mutex);
4803         } else {
4804             /* improved RTO calculation- not Karn */
4805             list[i]->firstSent = *now;
4806             if (!lastPacket && (call->cwind <= (u_short) (conn->ackRate + 1)
4807                                 || (!(call->flags & RX_CALL_SLOW_START_OK)
4808                                     && (list[i]->header.seq & 1)))) {
4809                 requestAck = 1;
4810             }
4811         }
4812
4813         MUTEX_ENTER(&peer->peer_lock);
4814         peer->nSent++;
4815         if (resending)
4816             peer->reSends++;
4817         MUTEX_ENTER(&rx_stats_mutex);
4818         rx_stats.dataPacketsSent++;
4819         MUTEX_EXIT(&rx_stats_mutex);
4820         MUTEX_EXIT(&peer->peer_lock);
4821
4822         /* Tag this packet as not being the last in this group,
4823          * for the receiver's benefit */
4824         if (i < len - 1 || moreFlag) {
4825             list[i]->header.flags |= RX_MORE_PACKETS;
4826         }
4827
4828         /* Install the new retransmit time for the packet, and
4829          * record the time sent */
4830         list[i]->timeSent = *now;
4831     }
4832
4833     if (requestAck) {
4834         list[len - 1]->header.flags |= RX_REQUEST_ACK;
4835     }
4836
4837     /* Since we're about to send a data packet to the peer, it's
4838      * safe to nuke any scheduled end-of-packets ack */
4839     rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
4840
4841     CALL_HOLD(call, RX_CALL_REFCOUNT_SEND);
4842     MUTEX_EXIT(&call->lock);
4843     if (len > 1) {
4844         rxi_SendPacketList(call, conn, list, len, istack);
4845     } else {
4846         rxi_SendPacket(call, conn, list[0], istack);
4847     }
4848     MUTEX_ENTER(&call->lock);
4849     CALL_RELE(call, RX_CALL_REFCOUNT_SEND);
4850
4851     /* Update last send time for this call (for keep-alive
4852      * processing), and for the connection (so that we can discover
4853      * idle connections) */
4854     conn->lastSendTime = call->lastSendTime = clock_Sec();
4855 }
4856
4857 /* When sending packets we need to follow these rules:
4858  * 1. Never send more than maxDgramPackets in a jumbogram.
4859  * 2. Never send a packet with more than two iovecs in a jumbogram.
4860  * 3. Never send a retransmitted packet in a jumbogram.
4861  * 4. Never send more than cwind/4 packets in a jumbogram
4862  * We always keep the last list we should have sent so we
4863  * can set the RX_MORE_PACKETS flags correctly.
4864  */
4865 static void
4866 rxi_SendXmitList(struct rx_call *call, struct rx_packet **list, int len,
4867                  int istack, struct clock *now, struct clock *retryTime,
4868                  int resending)
4869 {
4870     int i, cnt, lastCnt = 0;
4871     struct rx_packet **listP, **lastP = 0;
4872     struct rx_peer *peer = call->conn->peer;
4873     int morePackets = 0;
4874
4875     for (cnt = 0, listP = &list[0], i = 0; i < len; i++) {
4876         /* Does the current packet force us to flush the current list? */
4877         if (cnt > 0
4878             && (list[i]->header.serial || (list[i]->flags & RX_PKTFLAG_ACKED)
4879                 || list[i]->length > RX_JUMBOBUFFERSIZE)) {
4880             if (lastCnt > 0) {
4881                 rxi_SendList(call, lastP, lastCnt, istack, 1, now, retryTime,
4882                              resending);
4883                 /* If the call enters an error state stop sending, or if
4884                  * we entered congestion recovery mode, stop sending */
4885                 if (call->error || (call->flags & RX_CALL_FAST_RECOVER_WAIT))
4886                     return;
4887             }
4888             lastP = listP;
4889             lastCnt = cnt;
4890             listP = &list[i];
4891             cnt = 0;
4892         }
4893         /* Add the current packet to the list if it hasn't been acked.
4894          * Otherwise adjust the list pointer to skip the current packet.  */
4895         if (!(list[i]->flags & RX_PKTFLAG_ACKED)) {
4896             cnt++;
4897             /* Do we need to flush the list? */
4898             if (cnt >= (int)peer->maxDgramPackets
4899                 || cnt >= (int)call->nDgramPackets || cnt >= (int)call->cwind
4900                 || list[i]->header.serial
4901                 || list[i]->length != RX_JUMBOBUFFERSIZE) {
4902                 if (lastCnt > 0) {
4903                     rxi_SendList(call, lastP, lastCnt, istack, 1, now,
4904                                  retryTime, resending);
4905                     /* If the call enters an error state stop sending, or if
4906                      * we entered congestion recovery mode, stop sending */
4907                     if (call->error
4908                         || (call->flags & RX_CALL_FAST_RECOVER_WAIT))
4909                         return;
4910                 }
4911                 lastP = listP;
4912                 lastCnt = cnt;
4913                 listP = &list[i + 1];
4914                 cnt = 0;
4915             }
4916         } else {
4917             if (cnt != 0) {
4918                 osi_Panic("rxi_SendList error");
4919             }
4920             listP = &list[i + 1];
4921         }
4922     }
4923
4924     /* Send the whole list when the call is in receive mode, when
4925      * the call is in eof mode, when we are in fast recovery mode,
4926      * and when we have the last packet */
4927     if ((list[len - 1]->header.flags & RX_LAST_PACKET)
4928         || call->mode == RX_MODE_RECEIVING || call->mode == RX_MODE_EOF
4929         || (call->flags & RX_CALL_FAST_RECOVER)) {
4930         /* Check for the case where the current list contains
4931          * an acked packet. Since we always send retransmissions
4932          * in a separate packet, we only need to check the first
4933          * packet in the list */
4934         if (cnt > 0 && !(listP[0]->flags & RX_PKTFLAG_ACKED)) {
4935             morePackets = 1;
4936         }
4937         if (lastCnt > 0) {
4938             rxi_SendList(call, lastP, lastCnt, istack, morePackets, now,
4939                          retryTime, resending);
4940             /* If the call enters an error state stop sending, or if
4941              * we entered congestion recovery mode, stop sending */
4942             if (call->error || (call->flags & RX_CALL_FAST_RECOVER_WAIT))
4943                 return;
4944         }
4945         if (morePackets) {
4946             rxi_SendList(call, listP, cnt, istack, 0, now, retryTime,
4947                          resending);
4948         }
4949     } else if (lastCnt > 0) {
4950         rxi_SendList(call, lastP, lastCnt, istack, 0, now, retryTime,
4951                      resending);
4952     }
4953 }
4954
4955 #ifdef  RX_ENABLE_LOCKS
4956 /* Call rxi_Start, below, but with the call lock held. */
4957 void
4958 rxi_StartUnlocked(struct rxevent *event, register struct rx_call *call,
4959                   void *arg1, int istack)
4960 {
4961     MUTEX_ENTER(&call->lock);
4962     rxi_Start(event, call, arg1, istack);
4963     MUTEX_EXIT(&call->lock);
4964 }
4965 #endif /* RX_ENABLE_LOCKS */
4966
4967 /* This routine is called when new packets are readied for
4968  * transmission and when retransmission may be necessary, or when the
4969  * transmission window or burst count are favourable.  This should be
4970  * better optimized for new packets, the usual case, now that we've
4971  * got rid of queues of send packets. XXXXXXXXXXX */
4972 void
4973 rxi_Start(struct rxevent *event, register struct rx_call *call,
4974           void *arg1, int istack)
4975 {
4976     struct rx_packet *p;
4977     register struct rx_packet *nxp;     /* Next pointer for queue_Scan */
4978     struct rx_peer *peer = call->conn->peer;
4979     struct clock now, retryTime;
4980     int haveEvent;
4981     int nXmitPackets;
4982     int maxXmitPackets;
4983     struct rx_packet **xmitList;
4984     int resending = 0;
4985
4986     /* If rxi_Start is being called as a result of a resend event,
4987      * then make sure that the event pointer is removed from the call
4988      * structure, since there is no longer a per-call retransmission
4989      * event pending. */
4990     if (event && event == call->resendEvent) {
4991         CALL_RELE(call, RX_CALL_REFCOUNT_RESEND);
4992         call->resendEvent = NULL;
4993         resending = 1;
4994         if (queue_IsEmpty(&call->tq)) {
4995             /* Nothing to do */
4996             return;
4997         }
4998         /* Timeouts trigger congestion recovery */
4999 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5000         if (call->flags & RX_CALL_FAST_RECOVER_WAIT) {
5001             /* someone else is waiting to start recovery */
5002             return;
5003         }
5004         call->flags |= RX_CALL_FAST_RECOVER_WAIT;
5005         while (call->flags & RX_CALL_TQ_BUSY) {
5006             call->flags |= RX_CALL_TQ_WAIT;
5007             call->tqWaiters++;
5008 #ifdef RX_ENABLE_LOCKS
5009             osirx_AssertMine(&call->lock, "rxi_Start lock1");
5010             CV_WAIT(&call->cv_tq, &call->lock);
5011 #else /* RX_ENABLE_LOCKS */
5012             osi_rxSleep(&call->tq);
5013 #endif /* RX_ENABLE_LOCKS */
5014             call->tqWaiters--;
5015             if (call->tqWaiters == 0)
5016                 call->flags &= ~RX_CALL_TQ_WAIT;
5017         }
5018 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
5019         call->flags &= ~RX_CALL_FAST_RECOVER_WAIT;
5020         call->flags |= RX_CALL_FAST_RECOVER;
5021         if (peer->maxDgramPackets > 1) {
5022             call->MTU = RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE;
5023         } else {
5024             call->MTU = MIN(peer->natMTU, peer->maxMTU);
5025         }
5026         call->ssthresh = MAX(4, MIN((int)call->cwind, (int)call->twind)) >> 1;
5027         call->nDgramPackets = 1;
5028         call->cwind = 1;
5029         call->nextCwind = 1;
5030         call->nAcks = 0;
5031         call->nNacks = 0;
5032         MUTEX_ENTER(&peer->peer_lock);
5033         peer->MTU = call->MTU;
5034         peer->cwind = call->cwind;
5035         peer->nDgramPackets = 1;
5036         peer->congestSeq++;
5037         call->congestSeq = peer->congestSeq;
5038         MUTEX_EXIT(&peer->peer_lock);
5039         /* Clear retry times on packets. Otherwise, it's possible for
5040          * some packets in the queue to force resends at rates faster
5041          * than recovery rates.
5042          */
5043         for (queue_Scan(&call->tq, p, nxp, rx_packet)) {
5044             if (!(p->flags & RX_PKTFLAG_ACKED)) {
5045                 clock_Zero(&p->retryTime);
5046             }
5047         }
5048     }
5049     if (call->error) {
5050 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5051         MUTEX_ENTER(&rx_stats_mutex);
5052         rx_tq_debug.rxi_start_in_error++;
5053         MUTEX_EXIT(&rx_stats_mutex);
5054 #endif
5055         return;
5056     }
5057
5058     if (queue_IsNotEmpty(&call->tq)) {  /* If we have anything to send */
5059         /* Get clock to compute the re-transmit time for any packets
5060          * in this burst.  Note, if we back off, it's reasonable to
5061          * back off all of the packets in the same manner, even if
5062          * some of them have been retransmitted more times than more
5063          * recent additions */
5064         clock_GetTime(&now);
5065         retryTime = now;        /* initialize before use */
5066         MUTEX_ENTER(&peer->peer_lock);
5067         clock_Add(&retryTime, &peer->timeout);
5068         MUTEX_EXIT(&peer->peer_lock);
5069
5070         /* Send (or resend) any packets that need it, subject to
5071          * window restrictions and congestion burst control
5072          * restrictions.  Ask for an ack on the last packet sent in
5073          * this burst.  For now, we're relying upon the window being
5074          * considerably bigger than the largest number of packets that
5075          * are typically sent at once by one initial call to
5076          * rxi_Start.  This is probably bogus (perhaps we should ask
5077          * for an ack when we're half way through the current
5078          * window?).  Also, for non file transfer applications, this
5079          * may end up asking for an ack for every packet.  Bogus. XXXX
5080          */
5081         /*
5082          * But check whether we're here recursively, and let the other guy
5083          * do the work.
5084          */
5085 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5086         if (!(call->flags & RX_CALL_TQ_BUSY)) {
5087             call->flags |= RX_CALL_TQ_BUSY;
5088             do {
5089 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
5090             restart:
5091 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5092                 call->flags &= ~RX_CALL_NEED_START;
5093 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
5094                 nXmitPackets = 0;
5095                 maxXmitPackets = MIN(call->twind, call->cwind);
5096                 xmitList = (struct rx_packet **)
5097                     osi_Alloc(maxXmitPackets * sizeof(struct rx_packet *));
5098                 if (xmitList == NULL)
5099                     osi_Panic("rxi_Start, failed to allocate xmit list");
5100                 for (queue_Scan(&call->tq, p, nxp, rx_packet)) {
5101                     if (call->flags & RX_CALL_FAST_RECOVER_WAIT) {
5102                         /* We shouldn't be sending packets if a thread is waiting
5103                          * to initiate congestion recovery */
5104                         break;
5105                     }
5106                     if ((nXmitPackets)
5107                         && (call->flags & RX_CALL_FAST_RECOVER)) {
5108                         /* Only send one packet during fast recovery */
5109                         break;
5110                     }
5111                     if ((p->flags & RX_PKTFLAG_FREE)
5112                         || (!queue_IsEnd(&call->tq, nxp)
5113                             && (nxp->flags & RX_PKTFLAG_FREE))
5114                         || (p == (struct rx_packet *)&rx_freePacketQueue)
5115                         || (nxp == (struct rx_packet *)&rx_freePacketQueue)) {
5116                         osi_Panic("rxi_Start: xmit queue clobbered");
5117                     }
5118                     if (p->flags & RX_PKTFLAG_ACKED) {
5119                         MUTEX_ENTER(&rx_stats_mutex);
5120                         rx_stats.ignoreAckedPacket++;
5121                         MUTEX_EXIT(&rx_stats_mutex);
5122                         continue;       /* Ignore this packet if it has been acknowledged */
5123                     }
5124
5125                     /* Turn off all flags except these ones, which are the same
5126                      * on each transmission */
5127                     p->header.flags &= RX_PRESET_FLAGS;
5128
5129                     if (p->header.seq >=
5130                         call->tfirst + MIN((int)call->twind,
5131                                            (int)(call->nSoftAcked +
5132                                                  call->cwind))) {
5133                         call->flags |= RX_CALL_WAIT_WINDOW_SEND;        /* Wait for transmit window */
5134                         /* Note: if we're waiting for more window space, we can
5135                          * still send retransmits; hence we don't return here, but
5136                          * break out to schedule a retransmit event */
5137                         dpf(("call %d waiting for window",
5138                              *(call->callNumber)));
5139                         break;
5140                     }
5141
5142                     /* Transmit the packet if it needs to be sent. */
5143                     if (!clock_Lt(&now, &p->retryTime)) {
5144                         if (nXmitPackets == maxXmitPackets) {
5145                             rxi_SendXmitList(call, xmitList, nXmitPackets,
5146                                              istack, &now, &retryTime,
5147                                              resending);
5148                             osi_Free(xmitList, maxXmitPackets *
5149                                      sizeof(struct rx_packet *));
5150                             goto restart;
5151                         }
5152                         xmitList[nXmitPackets++] = p;
5153                     }
5154                 }
5155
5156                 /* xmitList now hold pointers to all of the packets that are
5157                  * ready to send. Now we loop to send the packets */
5158                 if (nXmitPackets > 0) {
5159                     rxi_SendXmitList(call, xmitList, nXmitPackets, istack,
5160                                      &now, &retryTime, resending);
5161                 }
5162                 osi_Free(xmitList,
5163                          maxXmitPackets * sizeof(struct rx_packet *));
5164
5165 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5166                 /*
5167                  * TQ references no longer protected by this flag; they must remain
5168                  * protected by the global lock.
5169                  */
5170                 if (call->flags & RX_CALL_FAST_RECOVER_WAIT) {
5171                     call->flags &= ~RX_CALL_TQ_BUSY;
5172                     if (call->tqWaiters || (call->flags & RX_CALL_TQ_WAIT)) {
5173                         dpf(("call %x has %d waiters and flags %d\n", call, call->tqWaiters, call->flags));
5174                     }
5175 #ifdef RX_ENABLE_LOCKS
5176                     osirx_AssertMine(&call->lock, "rxi_Start start");
5177                     CV_BROADCAST(&call->cv_tq);
5178 #else /* RX_ENABLE_LOCKS */
5179                     osi_rxWakeup(&call->tq);
5180 #endif /* RX_ENABLE_LOCKS */
5181                     return;
5182                 }
5183                 if (call->error) {
5184                     /* We went into the error state while sending packets. Now is
5185                      * the time to reset the call. This will also inform the using
5186                      * process that the call is in an error state.
5187                      */
5188                     MUTEX_ENTER(&rx_stats_mutex);
5189                     rx_tq_debug.rxi_start_aborted++;
5190                     MUTEX_EXIT(&rx_stats_mutex);
5191                     call->flags &= ~RX_CALL_TQ_BUSY;
5192                     if (call->tqWaiters || (call->flags & RX_CALL_TQ_WAIT)) {
5193                         dpf(("call %x has %d waiters and flags %d\n", call, call->tqWaiters, call->flags));
5194                     }
5195 #ifdef RX_ENABLE_LOCKS
5196                     osirx_AssertMine(&call->lock, "rxi_Start middle");
5197                     CV_BROADCAST(&call->cv_tq);
5198 #else /* RX_ENABLE_LOCKS */
5199                     osi_rxWakeup(&call->tq);
5200 #endif /* RX_ENABLE_LOCKS */
5201                     rxi_CallError(call, call->error);
5202                     return;
5203                 }
5204 #ifdef RX_ENABLE_LOCKS
5205                 if (call->flags & RX_CALL_TQ_SOME_ACKED) {
5206                     register int missing;
5207                     call->flags &= ~RX_CALL_TQ_SOME_ACKED;
5208                     /* Some packets have received acks. If they all have, we can clear
5209                      * the transmit queue.
5210                      */
5211                     for (missing =
5212                          0, queue_Scan(&call->tq, p, nxp, rx_packet)) {
5213                         if (p->header.seq < call->tfirst
5214                             && (p->flags & RX_PKTFLAG_ACKED)) {
5215                             queue_Remove(p);
5216                             rxi_FreePacket(p);
5217                         } else
5218                             missing = 1;
5219                     }
5220                     if (!missing)
5221                         call->flags |= RX_CALL_TQ_CLEARME;
5222                 }
5223 #endif /* RX_ENABLE_LOCKS */
5224                 /* Don't bother doing retransmits if the TQ is cleared. */
5225                 if (call->flags & RX_CALL_TQ_CLEARME) {
5226                     rxi_ClearTransmitQueue(call, 1);
5227                 } else
5228 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
5229                 {
5230
5231                     /* Always post a resend event, if there is anything in the
5232                      * queue, and resend is possible.  There should be at least
5233                      * one unacknowledged packet in the queue ... otherwise none
5234                      * of these packets should be on the queue in the first place.
5235                      */
5236                     if (call->resendEvent) {
5237                         /* Cancel the existing event and post a new one */
5238                         rxevent_Cancel(call->resendEvent, call,
5239                                        RX_CALL_REFCOUNT_RESEND);
5240                     }
5241
5242                     /* The retry time is the retry time on the first unacknowledged
5243                      * packet inside the current window */
5244                     for (haveEvent =
5245                          0, queue_Scan(&call->tq, p, nxp, rx_packet)) {
5246                         /* Don't set timers for packets outside the window */
5247                         if (p->header.seq >= call->tfirst + call->twind) {
5248                             break;
5249                         }
5250
5251                         if (!(p->flags & RX_PKTFLAG_ACKED)
5252                             && !clock_IsZero(&p->retryTime)) {
5253                             haveEvent = 1;
5254                             retryTime = p->retryTime;
5255                             break;
5256                         }
5257                     }
5258
5259                     /* Post a new event to re-run rxi_Start when retries may be needed */
5260                     if (haveEvent && !(call->flags & RX_CALL_NEED_START)) {
5261 #ifdef RX_ENABLE_LOCKS
5262                         CALL_HOLD(call, RX_CALL_REFCOUNT_RESEND);
5263                         call->resendEvent =
5264                             rxevent_Post2(&retryTime, rxi_StartUnlocked,
5265                                          (void *)call, 0, istack);
5266 #else /* RX_ENABLE_LOCKS */
5267                         call->resendEvent =
5268                             rxevent_Post2(&retryTime, rxi_Start, (void *)call,
5269                                          0, istack);
5270 #endif /* RX_ENABLE_LOCKS */
5271                     }
5272                 }
5273 #ifdef  AFS_GLOBAL_RXLOCK_KERNEL
5274             } while (call->flags & RX_CALL_NEED_START);
5275             /*
5276              * TQ references no longer protected by this flag; they must remain
5277              * protected by the global lock.
5278              */
5279             call->flags &= ~RX_CALL_TQ_BUSY;
5280             if (call->tqWaiters || (call->flags & RX_CALL_TQ_WAIT)) {
5281                 dpf(("call %x has %d waiters and flags %d\n", call, call->tqWaiters, call->flags));
5282             }
5283 #ifdef RX_ENABLE_LOCKS
5284             osirx_AssertMine(&call->lock, "rxi_Start end");
5285             CV_BROADCAST(&call->cv_tq);
5286 #else /* RX_ENABLE_LOCKS */
5287             osi_rxWakeup(&call->tq);
5288 #endif /* RX_ENABLE_LOCKS */
5289         } else {
5290             call->flags |= RX_CALL_NEED_START;
5291         }
5292 #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
5293     } else {
5294         if (call->resendEvent) {
5295             rxevent_Cancel(call->resendEvent, call, RX_CALL_REFCOUNT_RESEND);
5296         }
5297     }
5298 }
5299
5300 /* Also adjusts the keep alive parameters for the call, to reflect
5301  * that we have just sent a packet (so keep alives aren't sent
5302  * immediately) */
5303 void
5304 rxi_Send(register struct rx_call *call, register struct rx_packet *p,
5305          int istack)
5306 {
5307     register struct rx_connection *conn = call->conn;
5308
5309     /* Stamp each packet with the user supplied status */
5310     p->header.userStatus = call->localStatus;
5311
5312     /* Allow the security object controlling this call's security to
5313      * make any last-minute changes to the packet */
5314     RXS_SendPacket(conn->securityObject, call, p);
5315
5316     /* Since we're about to send SOME sort of packet to the peer, it's
5317      * safe to nuke any scheduled end-of-packets ack */
5318     rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY);
5319
5320     /* Actually send the packet, filling in more connection-specific fields */
5321     CALL_HOLD(call, RX_CALL_REFCOUNT_SEND);
5322     MUTEX_EXIT(&call->lock);
5323     rxi_SendPacket(call, conn, p, istack);
5324     MUTEX_ENTER(&call->lock);
5325     CALL_RELE(call, RX_CALL_REFCOUNT_SEND);
5326
5327     /* Update last send time for this call (for keep-alive
5328      * processing), and for the connection (so that we can discover
5329      * idle connections) */
5330     conn->lastSendTime = call->lastSendTime = clock_Sec();
5331 }
5332
5333
5334 /* Check if a call needs to be destroyed.  Called by keep-alive code to ensure
5335  * that things are fine.  Also called periodically to guarantee that nothing
5336  * falls through the cracks (e.g. (error + dally) connections have keepalive
5337  * turned off.  Returns 0 if conn is well, -1 otherwise.  If otherwise, call
5338  *  may be freed!
5339  * haveCTLock Set if calling from rxi_ReapConnections
5340  */
5341 #ifdef RX_ENABLE_LOCKS
5342 int
5343 rxi_CheckCall(register struct rx_call *call, int haveCTLock)
5344 #else /* RX_ENABLE_LOCKS */
5345 int
5346 rxi_CheckCall(register struct rx_call *call)
5347 #endif                          /* RX_ENABLE_LOCKS */
5348 {
5349     register struct rx_connection *conn = call->conn;
5350     afs_uint32 now;
5351     afs_uint32 deadTime;
5352
5353 #ifdef RX_GLOBAL_RXLOCK_KERNEL
5354     if (call->flags & RX_CALL_TQ_BUSY) {
5355         /* Call is active and will be reset by rxi_Start if it's
5356          * in an error state.
5357          */
5358         return 0;
5359     }
5360 #endif
5361     /* dead time + RTT + 8*MDEV, rounded up to next second. */
5362     deadTime =
5363         (((afs_uint32) conn->secondsUntilDead << 10) +
5364          ((afs_uint32) conn->peer->rtt >> 3) +
5365          ((afs_uint32) conn->peer->rtt_dev << 1) + 1023) >> 10;
5366     now = clock_Sec();
5367     /* These are computed to the second (+- 1 second).  But that's
5368      * good enough for these values, which should be a significant
5369      * number of seconds. */
5370     if (now > (call->lastReceiveTime + deadTime)) {
5371         if (call->state == RX_STATE_ACTIVE) {
5372             rxi_CallError(call, RX_CALL_DEAD);
5373             return -1;
5374         } else {
5375 #ifdef RX_ENABLE_LOCKS
5376             /* Cancel pending events */
5377             rxevent_Cancel(call->delayedAckEvent, call,
5378                            RX_CALL_REFCOUNT_DELAY);
5379             rxevent_Cancel(call->resendEvent, call, RX_CALL_REFCOUNT_RESEND);
5380             rxevent_Cancel(call->keepAliveEvent, call,
5381                            RX_CALL_REFCOUNT_ALIVE);
5382             if (call->refCount == 0) {
5383                 rxi_FreeCall(call, haveCTLock);
5384                 return -2;
5385             }
5386             return -1;
5387 #else /* RX_ENABLE_LOCKS */
5388             rxi_FreeCall(call);
5389             return -2;
5390 #endif /* RX_ENABLE_LOCKS */
5391         }
5392         /* Non-active calls are destroyed if they are not responding
5393          * to pings; active calls are simply flagged in error, so the
5394          * attached process can die reasonably gracefully. */
5395     }
5396     /* see if we have a non-activity timeout */
5397     if (call->startWait && conn->idleDeadTime
5398         && ((call->startWait + conn->idleDeadTime) < now)) {
5399         if (call->state == RX_STATE_ACTIVE) {
5400             rxi_CallError(call, RX_CALL_TIMEOUT);
5401             return -1;
5402         }
5403     }
5404     /* see if we have a hard timeout */
5405     if (conn->hardDeadTime
5406         && (now > (conn->hardDeadTime + call->startTime.sec))) {
5407         if (call->state == RX_STATE_ACTIVE)
5408             rxi_CallError(call, RX_CALL_TIMEOUT);
5409         return -1;
5410     }
5411     return 0;
5412 }
5413
5414
5415 /* When a call is in progress, this routine is called occasionally to
5416  * make sure that some traffic has arrived (or been sent to) the peer.
5417  * If nothing has arrived in a reasonable amount of time, the call is
5418  * declared dead; if nothing has been sent for a while, we send a
5419  * keep-alive packet (if we're actually trying to keep the call alive)
5420  */
5421 void
5422 rxi_KeepAliveEvent(struct rxevent *event, register struct rx_call *call,
5423                    char *dummy)
5424 {
5425     struct rx_connection *conn;
5426     afs_uint32 now;
5427
5428     MUTEX_ENTER(&call->lock);
5429     CALL_RELE(call, RX_CALL_REFCOUNT_ALIVE);
5430     if (event == call->keepAliveEvent)
5431         call->keepAliveEvent = NULL;
5432     now = clock_Sec();
5433
5434 #ifdef RX_ENABLE_LOCKS
5435     if (rxi_CheckCall(call, 0)) {
5436         MUTEX_EXIT(&call->lock);
5437         return;
5438     }
5439 #else /* RX_ENABLE_LOCKS */
5440     if (rxi_CheckCall(call))
5441         return;
5442 #endif /* RX_ENABLE_LOCKS */
5443
5444     /* Don't try to keep alive dallying calls */
5445     if (call->state == RX_STATE_DALLY) {
5446         MUTEX_EXIT(&call->lock);
5447         return;
5448     }
5449
5450     conn = call->conn;
5451     if ((now - call->lastSendTime) > conn->secondsUntilPing) {
5452         /* Don't try to send keepalives if there is unacknowledged data */
5453         /* the rexmit code should be good enough, this little hack
5454          * doesn't quite work XXX */
5455         (void)rxi_SendAck(call, NULL, 0, RX_ACK_PING, 0);
5456     }
5457     rxi_ScheduleKeepAliveEvent(call);
5458     MUTEX_EXIT(&call->lock);
5459 }
5460
5461
5462 void
5463 rxi_ScheduleKeepAliveEvent(register struct rx_call *call)
5464 {
5465     if (!call->keepAliveEvent) {
5466         struct clock when;
5467         clock_GetTime(&when);
5468         when.sec += call->conn->secondsUntilPing;
5469         CALL_HOLD(call, RX_CALL_REFCOUNT_ALIVE);
5470         call->keepAliveEvent =
5471             rxevent_Post(&when, rxi_KeepAliveEvent, call, 0);
5472     }
5473 }
5474
5475 /* N.B. rxi_KeepAliveOff:  is defined earlier as a macro */
5476 void
5477 rxi_KeepAliveOn(register struct rx_call *call)
5478 {
5479     /* Pretend last packet received was received now--i.e. if another
5480      * packet isn't received within the keep alive time, then the call
5481      * will die; Initialize last send time to the current time--even
5482      * if a packet hasn't been sent yet.  This will guarantee that a
5483      * keep-alive is sent within the ping time */
5484     call->lastReceiveTime = call->lastSendTime = clock_Sec();
5485     rxi_ScheduleKeepAliveEvent(call);
5486 }
5487
5488 /* This routine is called to send connection abort messages
5489  * that have been delayed to throttle looping clients. */
5490 void
5491 rxi_SendDelayedConnAbort(struct rxevent *event,
5492                          register struct rx_connection *conn, char *dummy)
5493 {
5494     afs_int32 error;
5495     struct rx_packet *packet;
5496
5497     MUTEX_ENTER(&conn->conn_data_lock);
5498     conn->delayedAbortEvent = NULL;
5499     error = htonl(conn->error);
5500     conn->abortCount++;
5501     MUTEX_EXIT(&conn->conn_data_lock);
5502     packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
5503     if (packet) {
5504         packet =
5505             rxi_SendSpecial((struct rx_call *)0, conn, packet,
5506                             RX_PACKET_TYPE_ABORT, (char *)&error,
5507                             sizeof(error), 0);
5508         rxi_FreePacket(packet);
5509     }
5510 }
5511
5512 /* This routine is called to send call abort messages
5513  * that have been delayed to throttle looping clients. */
5514 void
5515 rxi_SendDelayedCallAbort(struct rxevent *event, register struct rx_call *call,
5516                          char *dummy)
5517 {
5518     afs_int32 error;
5519     struct rx_packet *packet;
5520
5521     MUTEX_ENTER(&call->lock);
5522     call->delayedAbortEvent = NULL;
5523     error = htonl(call->error);
5524     call->abortCount++;
5525     packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
5526     if (packet) {
5527         packet =
5528             rxi_SendSpecial(call, call->conn, packet, RX_PACKET_TYPE_ABORT,
5529                             (char *)&error, sizeof(error), 0);
5530         rxi_FreePacket(packet);
5531     }
5532     MUTEX_EXIT(&call->lock);
5533 }
5534
5535 /* This routine is called periodically (every RX_AUTH_REQUEST_TIMEOUT
5536  * seconds) to ask the client to authenticate itself.  The routine
5537  * issues a challenge to the client, which is obtained from the
5538  * security object associated with the connection */
5539 void
5540 rxi_ChallengeEvent(struct rxevent *event, register struct rx_connection *conn,
5541                    void *arg1, int tries)
5542 {
5543     conn->challengeEvent = NULL;
5544     if (RXS_CheckAuthentication(conn->securityObject, conn) != 0) {
5545         register struct rx_packet *packet;
5546         struct clock when;
5547
5548         if (tries <= 0) {
5549             /* We've failed to authenticate for too long.
5550              * Reset any calls waiting for authentication;
5551              * they are all in RX_STATE_PRECALL.
5552              */
5553             int i;
5554
5555             MUTEX_ENTER(&conn->conn_call_lock);
5556             for (i = 0; i < RX_MAXCALLS; i++) {
5557                 struct rx_call *call = conn->call[i];
5558                 if (call) {
5559                     MUTEX_ENTER(&call->lock);
5560                     if (call->state == RX_STATE_PRECALL) {
5561                         rxi_CallError(call, RX_CALL_DEAD);
5562                         rxi_SendCallAbort(call, NULL, 0, 0);
5563                     }
5564                     MUTEX_EXIT(&call->lock);
5565                 }
5566             }
5567             MUTEX_EXIT(&conn->conn_call_lock);
5568             return;
5569         }
5570
5571         packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
5572         if (packet) {
5573             /* If there's no packet available, do this later. */
5574             RXS_GetChallenge(conn->securityObject, conn, packet);
5575             rxi_SendSpecial((struct rx_call *)0, conn, packet,
5576                             RX_PACKET_TYPE_CHALLENGE, NULL, -1, 0);
5577             rxi_FreePacket(packet);
5578         }
5579         clock_GetTime(&when);
5580         when.sec += RX_CHALLENGE_TIMEOUT;
5581         conn->challengeEvent =
5582             rxevent_Post2(&when, rxi_ChallengeEvent, conn, 0,
5583                          (tries - 1));
5584     }
5585 }
5586
5587 /* Call this routine to start requesting the client to authenticate
5588  * itself.  This will continue until authentication is established,
5589  * the call times out, or an invalid response is returned.  The
5590  * security object associated with the connection is asked to create
5591  * the challenge at this time.  N.B.  rxi_ChallengeOff is a macro,
5592  * defined earlier. */
5593 void
5594 rxi_ChallengeOn(register struct rx_connection *conn)
5595 {
5596     if (!conn->challengeEvent) {
5597         RXS_CreateChallenge(conn->securityObject, conn);
5598         rxi_ChallengeEvent(NULL, conn, 0, RX_CHALLENGE_MAXTRIES);
5599     };
5600 }
5601
5602
5603 /* Compute round trip time of the packet provided, in *rttp.
5604  */
5605
5606 /* rxi_ComputeRoundTripTime is called with peer locked. */
5607 /* sentp and/or peer may be null */
5608 void
5609 rxi_ComputeRoundTripTime(register struct rx_packet *p,
5610                          register struct clock *sentp,
5611                          register struct rx_peer *peer)
5612 {
5613     struct clock thisRtt, *rttp = &thisRtt;
5614
5615     register int rtt_timeout;
5616
5617     clock_GetTime(rttp);
5618
5619     if (clock_Lt(rttp, sentp)) {
5620         clock_Zero(rttp);
5621         return;                 /* somebody set the clock back, don't count this time. */
5622     }
5623     clock_Sub(rttp, sentp);
5624     MUTEX_ENTER(&rx_stats_mutex);
5625     if (clock_Lt(rttp, &rx_stats.minRtt))
5626         rx_stats.minRtt = *rttp;
5627     if (clock_Gt(rttp, &rx_stats.maxRtt)) {
5628         if (rttp->sec > 60) {
5629             MUTEX_EXIT(&rx_stats_mutex);
5630             return;             /* somebody set the clock ahead */
5631         }
5632         rx_stats.maxRtt = *rttp;
5633     }
5634     clock_Add(&rx_stats.totalRtt, rttp);
5635     rx_stats.nRttSamples++;
5636     MUTEX_EXIT(&rx_stats_mutex);
5637
5638     /* better rtt calculation courtesy of UMich crew (dave,larry,peter,?) */
5639
5640     /* Apply VanJacobson round-trip estimations */
5641     if (peer->rtt) {
5642         register int delta;
5643
5644         /*
5645          * srtt (peer->rtt) is in units of one-eighth-milliseconds.
5646          * srtt is stored as fixed point with 3 bits after the binary
5647          * point (i.e., scaled by 8). The following magic is
5648          * equivalent to the smoothing algorithm in rfc793 with an
5649          * alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed point).
5650          * srtt*8 = srtt*8 + rtt - srtt
5651          * srtt = srtt + rtt/8 - srtt/8
5652          */
5653
5654         delta = MSEC(rttp) - (peer->rtt >> 3);
5655         peer->rtt += delta;
5656
5657         /*
5658          * We accumulate a smoothed rtt variance (actually, a smoothed
5659          * mean difference), then set the retransmit timer to smoothed
5660          * rtt + 4 times the smoothed variance (was 2x in van's original
5661          * paper, but 4x works better for me, and apparently for him as
5662          * well).
5663          * rttvar is stored as
5664          * fixed point with 2 bits after the binary point (scaled by
5665          * 4).  The following is equivalent to rfc793 smoothing with
5666          * an alpha of .75 (rttvar = rttvar*3/4 + |delta| / 4).  This
5667          * replaces rfc793's wired-in beta.
5668          * dev*4 = dev*4 + (|actual - expected| - dev)
5669          */
5670
5671         if (delta < 0)
5672             delta = -delta;
5673
5674         delta -= (peer->rtt_dev >> 2);
5675         peer->rtt_dev += delta;
5676     } else {
5677         /* I don't have a stored RTT so I start with this value.  Since I'm
5678          * probably just starting a call, and will be pushing more data down
5679          * this, I expect congestion to increase rapidly.  So I fudge a
5680          * little, and I set deviance to half the rtt.  In practice,
5681          * deviance tends to approach something a little less than
5682          * half the smoothed rtt. */
5683         peer->rtt = (MSEC(rttp) << 3) + 8;
5684         peer->rtt_dev = peer->rtt >> 2; /* rtt/2: they're scaled differently */
5685     }
5686     /* the timeout is RTT + 4*MDEV + 0.35 sec   This is because one end or
5687      * the other of these connections is usually in a user process, and can
5688      * be switched and/or swapped out.  So on fast, reliable networks, the
5689      * timeout would otherwise be too short.
5690      */
5691     rtt_timeout = (peer->rtt >> 3) + peer->rtt_dev + 350;
5692     clock_Zero(&(peer->timeout));
5693     clock_Addmsec(&(peer->timeout), rtt_timeout);
5694
5695     dpf(("rxi_ComputeRoundTripTime(rtt=%d ms, srtt=%d ms, rtt_dev=%d ms, timeout=%d.%0.3d sec)\n", MSEC(rttp), peer->rtt >> 3, peer->rtt_dev >> 2, (peer->timeout.sec), (peer->timeout.usec)));
5696 }
5697
5698
5699 /* Find all server connections that have not been active for a long time, and
5700  * toss them */
5701 void
5702 rxi_ReapConnections(void)
5703 {
5704     struct clock now;
5705     clock_GetTime(&now);
5706
5707     /* Find server connection structures that haven't been used for
5708      * greater than rx_idleConnectionTime */
5709     {
5710         struct rx_connection **conn_ptr, **conn_end;
5711         int i, havecalls = 0;
5712         MUTEX_ENTER(&rx_connHashTable_lock);
5713         for (conn_ptr = &rx_connHashTable[0], conn_end =
5714              &rx_connHashTable[rx_hashTableSize]; conn_ptr < conn_end;
5715              conn_ptr++) {
5716             struct rx_connection *conn, *next;
5717             struct rx_call *call;
5718             int result;
5719
5720           rereap:
5721             for (conn = *conn_ptr; conn; conn = next) {
5722                 /* XXX -- Shouldn't the connection be locked? */
5723                 next = conn->next;
5724                 havecalls = 0;
5725                 for (i = 0; i < RX_MAXCALLS; i++) {
5726                     call = conn->call[i];
5727                     if (call) {
5728                         havecalls = 1;
5729                         MUTEX_ENTER(&call->lock);
5730 #ifdef RX_ENABLE_LOCKS
5731                         result = rxi_CheckCall(call, 1);
5732 #else /* RX_ENABLE_LOCKS */
5733                         result = rxi_CheckCall(call);
5734 #endif /* RX_ENABLE_LOCKS */
5735                         MUTEX_EXIT(&call->lock);
5736                         if (result == -2) {
5737                             /* If CheckCall freed the call, it might
5738                              * have destroyed  the connection as well,
5739                              * which screws up the linked lists.
5740                              */
5741                             goto rereap;
5742                         }
5743                     }
5744                 }
5745                 if (conn->type == RX_SERVER_CONNECTION) {
5746                     /* This only actually destroys the connection if
5747                      * there are no outstanding calls */
5748                     MUTEX_ENTER(&conn->conn_data_lock);
5749                     if (!havecalls && !conn->refCount
5750                         && ((conn->lastSendTime + rx_idleConnectionTime) <
5751                             now.sec)) {
5752                         conn->refCount++;       /* it will be decr in rx_DestroyConn */
5753                         MUTEX_EXIT(&conn->conn_data_lock);
5754 #ifdef RX_ENABLE_LOCKS
5755                         rxi_DestroyConnectionNoLock(conn);
5756 #else /* RX_ENABLE_LOCKS */
5757                         rxi_DestroyConnection(conn);
5758 #endif /* RX_ENABLE_LOCKS */
5759                     }
5760 #ifdef RX_ENABLE_LOCKS
5761                     else {
5762                         MUTEX_EXIT(&conn->conn_data_lock);
5763                     }
5764 #endif /* RX_ENABLE_LOCKS */
5765                 }
5766             }
5767         }
5768 #ifdef RX_ENABLE_LOCKS
5769         while (rx_connCleanup_list) {
5770             struct rx_connection *conn;
5771             conn = rx_connCleanup_list;
5772             rx_connCleanup_list = rx_connCleanup_list->next;
5773             MUTEX_EXIT(&rx_connHashTable_lock);
5774             rxi_CleanupConnection(conn);
5775             MUTEX_ENTER(&rx_connHashTable_lock);
5776         }
5777         MUTEX_EXIT(&rx_connHashTable_lock);
5778 #endif /* RX_ENABLE_LOCKS */
5779     }
5780
5781     /* Find any peer structures that haven't been used (haven't had an
5782      * associated connection) for greater than rx_idlePeerTime */
5783     {
5784         struct rx_peer **peer_ptr, **peer_end;
5785         int code;
5786         MUTEX_ENTER(&rx_rpc_stats);
5787         MUTEX_ENTER(&rx_peerHashTable_lock);
5788         for (peer_ptr = &rx_peerHashTable[0], peer_end =
5789              &rx_peerHashTable[rx_hashTableSize]; peer_ptr < peer_end;
5790              peer_ptr++) {
5791             struct rx_peer *peer, *next, *prev;
5792             for (prev = peer = *peer_ptr; peer; peer = next) {
5793                 next = peer->next;
5794                 code = MUTEX_TRYENTER(&peer->peer_lock);
5795                 if ((code) && (peer->refCount == 0)
5796                     && ((peer->idleWhen + rx_idlePeerTime) < now.sec)) {
5797                     rx_interface_stat_p rpc_stat, nrpc_stat;
5798                     size_t space;
5799                     MUTEX_EXIT(&peer->peer_lock);
5800                     MUTEX_DESTROY(&peer->peer_lock);
5801                     for (queue_Scan
5802                          (&peer->rpcStats, rpc_stat, nrpc_stat,
5803                           rx_interface_stat)) {
5804                         unsigned int num_funcs;
5805                         if (!rpc_stat)
5806                             break;
5807                         queue_Remove(&rpc_stat->queue_header);
5808                         queue_Remove(&rpc_stat->all_peers);
5809                         num_funcs = rpc_stat->stats[0].func_total;
5810                         space =
5811                             sizeof(rx_interface_stat_t) +
5812                             rpc_stat->stats[0].func_total *
5813                             sizeof(rx_function_entry_v1_t);
5814
5815                         rxi_Free(rpc_stat, space);
5816                         rxi_rpc_peer_stat_cnt -= num_funcs;
5817                     }
5818                     rxi_FreePeer(peer);
5819                     MUTEX_ENTER(&rx_stats_mutex);
5820                     rx_stats.nPeerStructs--;
5821                     MUTEX_EXIT(&rx_stats_mutex);
5822                     if (peer == *peer_ptr) {
5823                         *peer_ptr = next;
5824                         prev = next;
5825                     } else
5826                         prev->next = next;
5827                 } else {
5828                     if (code) {
5829                         MUTEX_EXIT(&peer->peer_lock);
5830                     }
5831                     prev = peer;
5832                 }
5833             }
5834         }
5835         MUTEX_EXIT(&rx_peerHashTable_lock);
5836         MUTEX_EXIT(&rx_rpc_stats);
5837     }
5838
5839     /* THIS HACK IS A TEMPORARY HACK.  The idea is that the race condition in
5840      * rxi_AllocSendPacket, if it hits, will be handled at the next conn
5841      * GC, just below.  Really, we shouldn't have to keep moving packets from
5842      * one place to another, but instead ought to always know if we can
5843      * afford to hold onto a packet in its particular use.  */
5844     MUTEX_ENTER(&rx_freePktQ_lock);
5845     if (rx_waitingForPackets) {
5846         rx_waitingForPackets = 0;
5847 #ifdef  RX_ENABLE_LOCKS
5848         CV_BROADCAST(&rx_waitingForPackets_cv);
5849 #else
5850         osi_rxWakeup(&rx_waitingForPackets);
5851 #endif
5852     }
5853     MUTEX_EXIT(&rx_freePktQ_lock);
5854
5855     now.sec += RX_REAP_TIME;    /* Check every RX_REAP_TIME seconds */
5856     rxevent_Post(&now, rxi_ReapConnections, 0, 0);
5857 }
5858
5859
5860 /* rxs_Release - This isn't strictly necessary but, since the macro name from
5861  * rx.h is sort of strange this is better.  This is called with a security
5862  * object before it is discarded.  Each connection using a security object has
5863  * its own refcount to the object so it won't actually be freed until the last
5864  * connection is destroyed.
5865  *
5866  * This is the only rxs module call.  A hold could also be written but no one
5867  * needs it. */
5868
5869 int
5870 rxs_Release(struct rx_securityClass *aobj)
5871 {
5872     return RXS_Close(aobj);
5873 }
5874
5875 #ifdef ADAPT_WINDOW
5876 #define RXRATE_PKT_OH   (RX_HEADER_SIZE + RX_IPUDP_SIZE)
5877 #define RXRATE_SMALL_PKT    (RXRATE_PKT_OH + sizeof(struct rx_ackPacket))
5878 #define RXRATE_AVG_SMALL_PKT    (RXRATE_PKT_OH + (sizeof(struct rx_ackPacket)/2))
5879 #define RXRATE_LARGE_PKT    (RXRATE_SMALL_PKT + 256)
5880
5881 /* Adjust our estimate of the transmission rate to this peer, given
5882  * that the packet p was just acked. We can adjust peer->timeout and
5883  * call->twind. Pragmatically, this is called
5884  * only with packets of maximal length.
5885  * Called with peer and call locked.
5886  */
5887
5888 static void
5889 rxi_ComputeRate(register struct rx_peer *peer, register struct rx_call *call,
5890                 struct rx_packet *p, struct rx_packet *ackp, u_char ackReason)
5891 {
5892     afs_int32 xferSize, xferMs;
5893     register afs_int32 minTime;
5894     struct clock newTO;
5895
5896     /* Count down packets */
5897     if (peer->rateFlag > 0)
5898         peer->rateFlag--;
5899     /* Do nothing until we're enabled */
5900     if (peer->rateFlag != 0)
5901         return;
5902     if (!call->conn)
5903         return;
5904
5905     /* Count only when the ack seems legitimate */
5906     switch (ackReason) {
5907     case RX_ACK_REQUESTED:
5908         xferSize =
5909             p->length + RX_HEADER_SIZE + call->conn->securityMaxTrailerSize;
5910         xferMs = peer->rtt;
5911         break;
5912
5913     case RX_ACK_PING_RESPONSE:
5914         if (p)                  /* want the response to ping-request, not data send */
5915             return;
5916         clock_GetTime(&newTO);
5917         if (clock_Gt(&newTO, &call->pingRequestTime)) {
5918             clock_Sub(&newTO, &call->pingRequestTime);
5919             xferMs = (newTO.sec * 1000) + (newTO.usec / 1000);
5920         } else {
5921             return;
5922         }
5923         xferSize = rx_AckDataSize(rx_Window) + RX_HEADER_SIZE;
5924         break;
5925
5926     default:
5927         return;
5928     }
5929
5930     dpf(("CONG peer %lx/%u: sample (%s) size %ld, %ld ms (to %lu.%06lu, rtt %u, ps %u)", ntohl(peer->host), ntohs(peer->port), (ackReason == RX_ACK_REQUESTED ? "dataack" : "pingack"), xferSize, xferMs, peer->timeout.sec, peer->timeout.usec, peer->smRtt, peer->ifMTU));
5931
5932     /* Track only packets that are big enough. */
5933     if ((p->length + RX_HEADER_SIZE + call->conn->securityMaxTrailerSize) <
5934         peer->ifMTU)
5935         return;
5936
5937     /* absorb RTT data (in milliseconds) for these big packets */
5938     if (peer->smRtt == 0) {
5939         peer->smRtt = xferMs;
5940     } else {
5941         peer->smRtt = ((peer->smRtt * 15) + xferMs + 4) >> 4;
5942         if (!peer->smRtt)
5943             peer->smRtt = 1;
5944     }
5945
5946     if (peer->countDown) {
5947         peer->countDown--;
5948         return;
5949     }
5950     peer->countDown = 10;       /* recalculate only every so often */
5951
5952     /* In practice, we can measure only the RTT for full packets,
5953      * because of the way Rx acks the data that it receives.  (If it's
5954      * smaller than a full packet, it often gets implicitly acked
5955      * either by the call response (from a server) or by the next call
5956      * (from a client), and either case confuses transmission times
5957      * with processing times.)  Therefore, replace the above
5958      * more-sophisticated processing with a simpler version, where the
5959      * smoothed RTT is kept for full-size packets, and the time to
5960      * transmit a windowful of full-size packets is simply RTT *
5961      * windowSize. Again, we take two steps:
5962      - ensure the timeout is large enough for a single packet's RTT;
5963      - ensure that the window is small enough to fit in the desired timeout.*/
5964
5965     /* First, the timeout check. */
5966     minTime = peer->smRtt;
5967     /* Get a reasonable estimate for a timeout period */
5968     minTime += minTime;
5969     newTO.sec = minTime / 1000;
5970     newTO.usec = (minTime - (newTO.sec * 1000)) * 1000;
5971
5972     /* Increase the timeout period so that we can always do at least
5973      * one packet exchange */
5974     if (clock_Gt(&newTO, &peer->timeout)) {
5975
5976         dpf(("CONG peer %lx/%u: timeout %lu.%06lu ==> %lu.%06lu (rtt %u, ps %u)", ntohl(peer->host), ntohs(peer->port), peer->timeout.sec, peer->timeout.usec, newTO.sec, newTO.usec, peer->smRtt, peer->packetSize));
5977
5978         peer->timeout = newTO;
5979     }
5980
5981     /* Now, get an estimate for the transmit window size. */
5982     minTime = peer->timeout.sec * 1000 + (peer->timeout.usec / 1000);
5983     /* Now, convert to the number of full packets that could fit in a
5984      * reasonable fraction of that interval */
5985     minTime /= (peer->smRtt << 1);
5986     xferSize = minTime;         /* (make a copy) */
5987
5988     /* Now clamp the size to reasonable bounds. */
5989     if (minTime <= 1)
5990         minTime = 1;
5991     else if (minTime > rx_Window)
5992         minTime = rx_Window;
5993 /*    if (minTime != peer->maxWindow) {
5994       dpf(("CONG peer %lx/%u: windowsize %lu ==> %lu (to %lu.%06lu, rtt %u, ps %u)",
5995              ntohl(peer->host), ntohs(peer->port), peer->maxWindow, minTime,
5996              peer->timeout.sec, peer->timeout.usec, peer->smRtt,
5997              peer->packetSize));
5998       peer->maxWindow = minTime;
5999         elide... call->twind = minTime;
6000     }
6001 */
6002
6003     /* Cut back on the peer timeout if it had earlier grown unreasonably.
6004      * Discern this by calculating the timeout necessary for rx_Window
6005      * packets. */
6006     if ((xferSize > rx_Window) && (peer->timeout.sec >= 3)) {
6007         /* calculate estimate for transmission interval in milliseconds */
6008         minTime = rx_Window * peer->smRtt;
6009         if (minTime < 1000) {
6010             dpf(("CONG peer %lx/%u: cut TO %lu.%06lu by 0.5 (rtt %u, ps %u)",
6011                  ntohl(peer->host), ntohs(peer->port), peer->timeout.sec,
6012                  peer->timeout.usec, peer->smRtt, peer->packetSize));
6013
6014             newTO.sec = 0;      /* cut back on timeout by half a second */
6015             newTO.usec = 500000;
6016             clock_Sub(&peer->timeout, &newTO);
6017         }
6018     }
6019
6020     return;
6021 }                               /* end of rxi_ComputeRate */
6022 #endif /* ADAPT_WINDOW */
6023
6024
6025
6026
6027
6028
6029 #ifdef RXDEBUG
6030 /* Don't call this debugging routine directly; use dpf */
6031 void
6032 rxi_DebugPrint(char *format, int a1, int a2, int a3, int a4, int a5, int a6,
6033                int a7, int a8, int a9, int a10, int a11, int a12, int a13,
6034                int a14, int a15)
6035 {
6036 #ifdef AFS_NT40_ENV
6037     char msg[512];
6038     int len;
6039
6040     len = _snprintf(msg, sizeof(msg)-2,
6041                     format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
6042                     a11, a12, a13, a14, a15);
6043     if (len > 0) {
6044         if (msg[len-1] != '\n') {
6045             msg[len] = '\n';
6046             msg[len+1] = '\0';
6047         }
6048         OutputDebugString(msg);
6049     }
6050 #else
6051     struct clock now;
6052     clock_GetTime(&now);
6053     fprintf(rx_Log, " %u.%.3u:", (unsigned int)now.sec,
6054             (unsigned int)now.usec / 1000);
6055     fprintf(rx_Log, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12,
6056             a13, a14, a15);
6057     putc('\n', rx_Log);
6058 #endif
6059 }
6060
6061 /*
6062  * This function is used to process the rx_stats structure that is local
6063  * to a process as well as an rx_stats structure received from a remote
6064  * process (via rxdebug).  Therefore, it needs to do minimal version
6065  * checking.
6066  */
6067 void
6068 rx_PrintTheseStats(FILE * file, struct rx_stats *s, int size,
6069                    afs_int32 freePackets, char version)
6070 {
6071     int i;
6072
6073     if (size != sizeof(struct rx_stats)) {
6074         fprintf(file,
6075                 "Unexpected size of stats structure: was %d, expected %d\n",
6076                 size, sizeof(struct rx_stats));
6077     }
6078
6079     fprintf(file, "rx stats: free packets %d, allocs %d, ", (int)freePackets,
6080             s->packetRequests);
6081
6082     if (version >= RX_DEBUGI_VERSION_W_NEWPACKETTYPES) {
6083         fprintf(file, "alloc-failures(rcv %d/%d,send %d/%d,ack %d)\n",
6084                 s->receivePktAllocFailures, s->receiveCbufPktAllocFailures,
6085                 s->sendPktAllocFailures, s->sendCbufPktAllocFailures,
6086                 s->specialPktAllocFailures);
6087     } else {
6088         fprintf(file, "alloc-failures(rcv %d,send %d,ack %d)\n",
6089                 s->receivePktAllocFailures, s->sendPktAllocFailures,
6090                 s->specialPktAllocFailures);
6091     }
6092
6093     fprintf(file,
6094             "   greedy %d, " "bogusReads %d (last from host %x), "
6095             "noPackets %d, " "noBuffers %d, " "selects %d, "
6096             "sendSelects %d\n", s->socketGreedy, s->bogusPacketOnRead,
6097             s->bogusHost, s->noPacketOnRead, s->noPacketBuffersOnRead,
6098             s->selects, s->sendSelects);
6099
6100     fprintf(file, "   packets read: ");
6101     for (i = 0; i < RX_N_PACKET_TYPES; i++) {
6102         fprintf(file, "%s %d ", rx_packetTypes[i], s->packetsRead[i]);
6103     }
6104     fprintf(file, "\n");
6105
6106     fprintf(file,
6107             "   other read counters: data %d, " "ack %d, " "dup %d "
6108             "spurious %d " "dally %d\n", s->dataPacketsRead,
6109             s->ackPacketsRead, s->dupPacketsRead, s->spuriousPacketsRead,
6110             s->ignorePacketDally);
6111
6112     fprintf(file, "   packets sent: ");
6113     for (i = 0; i < RX_N_PACKET_TYPES; i++) {
6114         fprintf(file, "%s %d ", rx_packetTypes[i], s->packetsSent[i]);
6115     }
6116     fprintf(file, "\n");
6117
6118     fprintf(file,
6119             "   other send counters: ack %d, " "data %d (not resends), "
6120             "resends %d, " "pushed %d, " "acked&ignored %d\n",
6121             s->ackPacketsSent, s->dataPacketsSent, s->dataPacketsReSent,
6122             s->dataPacketsPushed, s->ignoreAckedPacket);
6123
6124     fprintf(file,
6125             "   \t(these should be small) sendFailed %d, " "fatalErrors %d\n",
6126             s->netSendFailures, (int)s->fatalErrors);
6127
6128     if (s->nRttSamples) {
6129         fprintf(file, "   Average rtt is %0.3f, with %d samples\n",
6130                 clock_Float(&s->totalRtt) / s->nRttSamples, s->nRttSamples);
6131
6132         fprintf(file, "   Minimum rtt is %0.3f, maximum is %0.3f\n",
6133                 clock_Float(&s->minRtt), clock_Float(&s->maxRtt));
6134     }
6135
6136     fprintf(file,
6137             "   %d server connections, " "%d client connections, "
6138             "%d peer structs, " "%d call structs, " "%d free call structs\n",
6139             s->nServerConns, s->nClientConns, s->nPeerStructs,
6140             s->nCallStructs, s->nFreeCallStructs);
6141
6142 #if     !defined(AFS_PTHREAD_ENV) && !defined(AFS_USE_GETTIMEOFDAY)
6143     fprintf(file, "   %d clock updates\n", clock_nUpdates);
6144 #endif
6145
6146 }
6147
6148 /* for backward compatibility */
6149 void
6150 rx_PrintStats(FILE * file)
6151 {
6152     MUTEX_ENTER(&rx_stats_mutex);
6153     rx_PrintTheseStats(file, &rx_stats, sizeof(rx_stats), rx_nFreePackets,
6154                        RX_DEBUGI_VERSION);
6155     MUTEX_EXIT(&rx_stats_mutex);
6156 }
6157
6158 void
6159 rx_PrintPeerStats(FILE * file, struct rx_peer *peer)
6160 {
6161     fprintf(file, "Peer %x.%d.  " "Burst size %d, " "burst wait %u.%d.\n",
6162             ntohl(peer->host), (int)peer->port, (int)peer->burstSize,
6163             (int)peer->burstWait.sec, (int)peer->burstWait.usec);
6164
6165     fprintf(file,
6166             "   Rtt %d, " "retry time %u.%06d, " "total sent %d, "
6167             "resent %d\n", peer->rtt, (int)peer->timeout.sec,
6168             (int)peer->timeout.usec, peer->nSent, peer->reSends);
6169
6170     fprintf(file,
6171             "   Packet size %d, " "max in packet skew %d, "
6172             "max out packet skew %d\n", peer->ifMTU, (int)peer->inPacketSkew,
6173             (int)peer->outPacketSkew);
6174 }
6175
6176 #ifdef AFS_PTHREAD_ENV
6177 /*
6178  * This mutex protects the following static variables:
6179  * counter
6180  */
6181
6182 #define LOCK_RX_DEBUG assert(pthread_mutex_lock(&rx_debug_mutex)==0)
6183 #define UNLOCK_RX_DEBUG assert(pthread_mutex_unlock(&rx_debug_mutex)==0)
6184 #else
6185 #define LOCK_RX_DEBUG
6186 #define UNLOCK_RX_DEBUG
6187 #endif /* AFS_PTHREAD_ENV */
6188
6189 static int
6190 MakeDebugCall(osi_socket socket, afs_uint32 remoteAddr, afs_uint16 remotePort,
6191               u_char type, void *inputData, size_t inputLength,
6192               void *outputData, size_t outputLength)
6193 {
6194     static afs_int32 counter = 100;
6195     afs_int32 endTime;
6196     struct rx_header theader;
6197     char tbuffer[1500];
6198     register afs_int32 code;
6199     struct timeval tv;
6200     struct sockaddr_in taddr, faddr;
6201     int faddrLen;
6202     fd_set imask;
6203     register char *tp;
6204
6205     endTime = time(0) + 20;     /* try for 20 seconds */
6206     LOCK_RX_DEBUG;
6207     counter++;
6208     UNLOCK_RX_DEBUG;
6209     tp = &tbuffer[sizeof(struct rx_header)];
6210     taddr.sin_family = AF_INET;
6211     taddr.sin_port = remotePort;
6212     taddr.sin_addr.s_addr = remoteAddr;
6213 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
6214     taddr.sin_len = sizeof(struct sockaddr_in);
6215 #endif
6216     while (1) {
6217         memset(&theader, 0, sizeof(theader));
6218         theader.epoch = htonl(999);
6219         theader.cid = 0;
6220         theader.callNumber = htonl(counter);
6221         theader.seq = 0;
6222         theader.serial = 0;
6223         theader.type = type;
6224         theader.flags = RX_CLIENT_INITIATED | RX_LAST_PACKET;
6225         theader.serviceId = 0;
6226
6227         memcpy(tbuffer, &theader, sizeof(theader));
6228         memcpy(tp, inputData, inputLength);
6229         code =
6230             sendto(socket, tbuffer, inputLength + sizeof(struct rx_header), 0,
6231                    (struct sockaddr *)&taddr, sizeof(struct sockaddr_in));
6232
6233         /* see if there's a packet available */
6234         FD_ZERO(&imask);
6235         FD_SET(socket, &imask);
6236         tv.tv_sec = 1;
6237         tv.tv_usec = 0;
6238         code = select(socket + 1, &imask, 0, 0, &tv);
6239         if (code == 1 && FD_ISSET(socket, &imask)) {
6240             /* now receive a packet */
6241             faddrLen = sizeof(struct sockaddr_in);
6242             code =
6243                 recvfrom(socket, tbuffer, sizeof(tbuffer), 0,
6244                          (struct sockaddr *)&faddr, &faddrLen);
6245
6246             if (code > 0) {
6247                 memcpy(&theader, tbuffer, sizeof(struct rx_header));
6248                 if (counter == ntohl(theader.callNumber))
6249                     break;
6250             }
6251         }
6252
6253         /* see if we've timed out */
6254         if (endTime < time(0))
6255             return -1;
6256     }
6257     code -= sizeof(struct rx_header);
6258     if (code > outputLength)
6259         code = outputLength;
6260     memcpy(outputData, tp, code);
6261     return code;
6262 }
6263
6264 afs_int32
6265 rx_GetServerDebug(osi_socket socket, afs_uint32 remoteAddr,
6266                   afs_uint16 remotePort, struct rx_debugStats * stat,
6267                   afs_uint32 * supportedValues)
6268 {
6269     struct rx_debugIn in;
6270     afs_int32 rc = 0;
6271
6272     *supportedValues = 0;
6273     in.type = htonl(RX_DEBUGI_GETSTATS);
6274     in.index = 0;
6275
6276     rc = MakeDebugCall(socket, remoteAddr, remotePort, RX_PACKET_TYPE_DEBUG,
6277                        &in, sizeof(in), stat, sizeof(*stat));
6278
6279     /*
6280      * If the call was successful, fixup the version and indicate
6281      * what contents of the stat structure are valid.
6282      * Also do net to host conversion of fields here.
6283      */
6284
6285     if (rc >= 0) {
6286         if (stat->version >= RX_DEBUGI_VERSION_W_SECSTATS) {
6287             *supportedValues |= RX_SERVER_DEBUG_SEC_STATS;
6288         }
6289         if (stat->version >= RX_DEBUGI_VERSION_W_GETALLCONN) {
6290             *supportedValues |= RX_SERVER_DEBUG_ALL_CONN;
6291         }
6292         if (stat->version >= RX_DEBUGI_VERSION_W_RXSTATS) {
6293             *supportedValues |= RX_SERVER_DEBUG_RX_STATS;
6294         }
6295         if (stat->version >= RX_DEBUGI_VERSION_W_WAITERS) {
6296             *supportedValues |= RX_SERVER_DEBUG_WAITER_CNT;
6297         }
6298         if (stat->version >= RX_DEBUGI_VERSION_W_IDLETHREADS) {
6299             *supportedValues |= RX_SERVER_DEBUG_IDLE_THREADS;
6300         }
6301         if (stat->version >= RX_DEBUGI_VERSION_W_NEWPACKETTYPES) {
6302             *supportedValues |= RX_SERVER_DEBUG_NEW_PACKETS;
6303         }
6304         if (stat->version >= RX_DEBUGI_VERSION_W_GETPEER) {
6305             *supportedValues |= RX_SERVER_DEBUG_ALL_PEER;
6306         }
6307         if (stat->version >= RX_DEBUGI_VERSION_W_WAITED) {
6308             *supportedValues |= RX_SERVER_DEBUG_WAITED_CNT;
6309         }
6310
6311         stat->nFreePackets = ntohl(stat->nFreePackets);
6312         stat->packetReclaims = ntohl(stat->packetReclaims);
6313         stat->callsExecuted = ntohl(stat->callsExecuted);
6314         stat->nWaiting = ntohl(stat->nWaiting);
6315         stat->idleThreads = ntohl(stat->idleThreads);
6316     }
6317
6318     return rc;
6319 }
6320
6321 afs_int32
6322 rx_GetServerStats(osi_socket socket, afs_uint32 remoteAddr,
6323                   afs_uint16 remotePort, struct rx_stats * stat,
6324                   afs_uint32 * supportedValues)
6325 {
6326     struct rx_debugIn in;
6327     afs_int32 *lp = (afs_int32 *) stat;
6328     int i;
6329     afs_int32 rc = 0;
6330
6331     /*
6332      * supportedValues is currently unused, but added to allow future
6333      * versioning of this function.
6334      */
6335
6336     *supportedValues = 0;
6337     in.type = htonl(RX_DEBUGI_RXSTATS);
6338     in.index = 0;
6339     memset(stat, 0, sizeof(*stat));
6340
6341     rc = MakeDebugCall(socket, remoteAddr, remotePort, RX_PACKET_TYPE_DEBUG,
6342                        &in, sizeof(in), stat, sizeof(*stat));
6343
6344     if (rc >= 0) {
6345
6346         /*
6347          * Do net to host conversion here
6348          */
6349
6350         for (i = 0; i < sizeof(*stat) / sizeof(afs_int32); i++, lp++) {
6351             *lp = ntohl(*lp);
6352         }
6353     }
6354
6355     return rc;
6356 }
6357
6358 afs_int32
6359 rx_GetServerVersion(osi_socket socket, afs_uint32 remoteAddr,
6360                     afs_uint16 remotePort, size_t version_length,
6361                     char *version)
6362 {
6363     char a[1] = { 0 };
6364     return MakeDebugCall(socket, remoteAddr, remotePort,
6365                          RX_PACKET_TYPE_VERSION, a, 1, version,
6366                          version_length);
6367 }
6368
6369 afs_int32
6370 rx_GetServerConnections(osi_socket socket, afs_uint32 remoteAddr,
6371                         afs_uint16 remotePort, afs_int32 * nextConnection,
6372                         int allConnections, afs_uint32 debugSupportedValues,
6373                         struct rx_debugConn * conn,
6374                         afs_uint32 * supportedValues)
6375 {
6376     struct rx_debugIn in;
6377     afs_int32 rc = 0;
6378     int i;
6379
6380     /*
6381      * supportedValues is currently unused, but added to allow future
6382      * versioning of this function.
6383      */
6384
6385     *supportedValues = 0;
6386     if (allConnections) {
6387         in.type = htonl(RX_DEBUGI_GETALLCONN);
6388     } else {
6389         in.type = htonl(RX_DEBUGI_GETCONN);
6390     }
6391     in.index = htonl(*nextConnection);
6392     memset(conn, 0, sizeof(*conn));
6393
6394     rc = MakeDebugCall(socket, remoteAddr, remotePort, RX_PACKET_TYPE_DEBUG,
6395                        &in, sizeof(in), conn, sizeof(*conn));
6396
6397     if (rc >= 0) {
6398         *nextConnection += 1;
6399
6400         /*
6401          * Convert old connection format to new structure.
6402          */
6403
6404         if (debugSupportedValues & RX_SERVER_DEBUG_OLD_CONN) {
6405             struct rx_debugConn_vL *vL = (struct rx_debugConn_vL *)conn;
6406 #define MOVEvL(a) (conn->a = vL->a)
6407
6408             /* any old or unrecognized version... */
6409             for (i = 0; i < RX_MAXCALLS; i++) {
6410                 MOVEvL(callState[i]);
6411                 MOVEvL(callMode[i]);
6412                 MOVEvL(callFlags[i]);
6413                 MOVEvL(callOther[i]);
6414             }
6415             if (debugSupportedValues & RX_SERVER_DEBUG_SEC_STATS) {
6416                 MOVEvL(secStats.type);
6417                 MOVEvL(secStats.level);
6418                 MOVEvL(secStats.flags);
6419                 MOVEvL(secStats.expires);
6420                 MOVEvL(secStats.packetsReceived);
6421                 MOVEvL(secStats.packetsSent);
6422                 MOVEvL(secStats.bytesReceived);
6423                 MOVEvL(secStats.bytesSent);
6424             }
6425         }
6426
6427         /*
6428          * Do net to host conversion here
6429          * NOTE:
6430          *    I don't convert host or port since we are most likely
6431          *    going to want these in NBO.
6432          */
6433         conn->cid = ntohl(conn->cid);
6434         conn->serial = ntohl(conn->serial);
6435         for (i = 0; i < RX_MAXCALLS; i++) {
6436             conn->callNumber[i] = ntohl(conn->callNumber[i]);
6437         }
6438         conn->error = ntohl(conn->error);
6439         conn->secStats.flags = ntohl(conn->secStats.flags);
6440         conn->secStats.expires = ntohl(conn->secStats.expires);
6441         conn->secStats.packetsReceived =
6442             ntohl(conn->secStats.packetsReceived);
6443         conn->secStats.packetsSent = ntohl(conn->secStats.packetsSent);
6444         conn->secStats.bytesReceived = ntohl(conn->secStats.bytesReceived);
6445         conn->secStats.bytesSent = ntohl(conn->secStats.bytesSent);
6446         conn->epoch = ntohl(conn->epoch);
6447         conn->natMTU = ntohl(conn->natMTU);
6448     }
6449
6450     return rc;
6451 }
6452
6453 afs_int32
6454 rx_GetServerPeers(osi_socket socket, afs_uint32 remoteAddr,
6455                   afs_uint16 remotePort, afs_int32 * nextPeer,
6456                   afs_uint32 debugSupportedValues, struct rx_debugPeer * peer,
6457                   afs_uint32 * supportedValues)
6458 {
6459     struct rx_debugIn in;
6460     afs_int32 rc = 0;
6461
6462     /*
6463      * supportedValues is currently unused, but added to allow future
6464      * versioning of this function.
6465      */
6466
6467     *supportedValues = 0;
6468     in.type = htonl(RX_DEBUGI_GETPEER);
6469     in.index = htonl(*nextPeer);
6470     memset(peer, 0, sizeof(*peer));
6471
6472     rc = MakeDebugCall(socket, remoteAddr, remotePort, RX_PACKET_TYPE_DEBUG,
6473                        &in, sizeof(in), peer, sizeof(*peer));
6474
6475     if (rc >= 0) {
6476         *nextPeer += 1;
6477
6478         /*
6479          * Do net to host conversion here
6480          * NOTE:
6481          *    I don't convert host or port since we are most likely
6482          *    going to want these in NBO.
6483          */
6484         peer->ifMTU = ntohs(peer->ifMTU);
6485         peer->idleWhen = ntohl(peer->idleWhen);
6486         peer->refCount = ntohs(peer->refCount);
6487         peer->burstWait.sec = ntohl(peer->burstWait.sec);
6488         peer->burstWait.usec = ntohl(peer->burstWait.usec);
6489         peer->rtt = ntohl(peer->rtt);
6490         peer->rtt_dev = ntohl(peer->rtt_dev);
6491         peer->timeout.sec = ntohl(peer->timeout.sec);
6492         peer->timeout.usec = ntohl(peer->timeout.usec);
6493         peer->nSent = ntohl(peer->nSent);
6494         peer->reSends = ntohl(peer->reSends);
6495         peer->inPacketSkew = ntohl(peer->inPacketSkew);
6496         peer->outPacketSkew = ntohl(peer->outPacketSkew);
6497         peer->rateFlag = ntohl(peer->rateFlag);
6498         peer->natMTU = ntohs(peer->natMTU);
6499         peer->maxMTU = ntohs(peer->maxMTU);
6500         peer->maxDgramPackets = ntohs(peer->maxDgramPackets);
6501         peer->ifDgramPackets = ntohs(peer->ifDgramPackets);
6502         peer->MTU = ntohs(peer->MTU);
6503         peer->cwind = ntohs(peer->cwind);
6504         peer->nDgramPackets = ntohs(peer->nDgramPackets);
6505         peer->congestSeq = ntohs(peer->congestSeq);
6506         peer->bytesSent.high = ntohl(peer->bytesSent.high);
6507         peer->bytesSent.low = ntohl(peer->bytesSent.low);
6508         peer->bytesReceived.high = ntohl(peer->bytesReceived.high);
6509         peer->bytesReceived.low = ntohl(peer->bytesReceived.low);
6510     }
6511
6512     return rc;
6513 }
6514 #endif /* RXDEBUG */
6515
6516 void
6517 shutdown_rx(void)
6518 {
6519     struct rx_serverQueueEntry *np;
6520     register int i, j;
6521 #ifndef KERNEL
6522     register struct rx_call *call;
6523     register struct rx_serverQueueEntry *sq;
6524 #endif /* KERNEL */
6525
6526     LOCK_RX_INIT;
6527     if (rxinit_status == 1) {
6528         UNLOCK_RX_INIT;
6529         return;                 /* Already shutdown. */
6530     }
6531 #ifndef KERNEL
6532     rx_port = 0;
6533 #ifndef AFS_PTHREAD_ENV
6534     FD_ZERO(&rx_selectMask);
6535 #endif /* AFS_PTHREAD_ENV */
6536     rxi_dataQuota = RX_MAX_QUOTA;
6537 #ifndef AFS_PTHREAD_ENV
6538     rxi_StopListener();
6539 #endif /* AFS_PTHREAD_ENV */
6540     shutdown_rxevent();
6541     rx_SetEpoch(0);
6542 #ifndef AFS_PTHREAD_ENV
6543 #ifndef AFS_USE_GETTIMEOFDAY
6544     clock_UnInit();
6545 #endif /* AFS_USE_GETTIMEOFDAY */
6546 #endif /* AFS_PTHREAD_ENV */
6547
6548     while (!queue_IsEmpty(&rx_freeCallQueue)) {
6549         call = queue_First(&rx_freeCallQueue, rx_call);
6550         queue_Remove(call);
6551         rxi_Free(call, sizeof(struct rx_call));
6552     }
6553
6554     while (!queue_IsEmpty(&rx_idleServerQueue)) {
6555         sq = queue_First(&rx_idleServerQueue, rx_serverQueueEntry);
6556         queue_Remove(sq);
6557     }
6558 #endif /* KERNEL */
6559
6560     {
6561         struct rx_peer **peer_ptr, **peer_end;
6562         for (peer_ptr = &rx_peerHashTable[0], peer_end =
6563              &rx_peerHashTable[rx_hashTableSize]; peer_ptr < peer_end;
6564              peer_ptr++) {
6565             struct rx_peer *peer, *next;
6566             for (peer = *peer_ptr; peer; peer = next) {
6567                 rx_interface_stat_p rpc_stat, nrpc_stat;
6568                 size_t space;
6569                 for (queue_Scan
6570                      (&peer->rpcStats, rpc_stat, nrpc_stat,
6571                       rx_interface_stat)) {
6572                     unsigned int num_funcs;
6573                     if (!rpc_stat)
6574                         break;
6575                     queue_Remove(&rpc_stat->queue_header);
6576                     queue_Remove(&rpc_stat->all_peers);
6577                     num_funcs = rpc_stat->stats[0].func_total;
6578                     space =
6579                         sizeof(rx_interface_stat_t) +
6580                         rpc_stat->stats[0].func_total *
6581                         sizeof(rx_function_entry_v1_t);
6582
6583                     rxi_Free(rpc_stat, space);
6584                     MUTEX_ENTER(&rx_rpc_stats);
6585                     rxi_rpc_peer_stat_cnt -= num_funcs;
6586                     MUTEX_EXIT(&rx_rpc_stats);
6587                 }
6588                 next = peer->next;
6589                 rxi_FreePeer(peer);
6590                 MUTEX_ENTER(&rx_stats_mutex);
6591                 rx_stats.nPeerStructs--;
6592                 MUTEX_EXIT(&rx_stats_mutex);
6593             }
6594         }
6595     }
6596     for (i = 0; i < RX_MAX_SERVICES; i++) {
6597         if (rx_services[i])
6598             rxi_Free(rx_services[i], sizeof(*rx_services[i]));
6599     }
6600     for (i = 0; i < rx_hashTableSize; i++) {
6601         register struct rx_connection *tc, *ntc;
6602         MUTEX_ENTER(&rx_connHashTable_lock);
6603         for (tc = rx_connHashTable[i]; tc; tc = ntc) {
6604             ntc = tc->next;
6605             for (j = 0; j < RX_MAXCALLS; j++) {
6606                 if (tc->call[j]) {
6607                     rxi_Free(tc->call[j], sizeof(*tc->call[j]));
6608                 }
6609             }
6610             rxi_Free(tc, sizeof(*tc));
6611         }
6612         MUTEX_EXIT(&rx_connHashTable_lock);
6613     }
6614
6615     MUTEX_ENTER(&freeSQEList_lock);
6616
6617     while ((np = rx_FreeSQEList)) {
6618         rx_FreeSQEList = *(struct rx_serverQueueEntry **)np;
6619         MUTEX_DESTROY(&np->lock);
6620         rxi_Free(np, sizeof(*np));
6621     }
6622
6623     MUTEX_EXIT(&freeSQEList_lock);
6624     MUTEX_DESTROY(&freeSQEList_lock);
6625     MUTEX_DESTROY(&rx_freeCallQueue_lock);
6626     MUTEX_DESTROY(&rx_connHashTable_lock);
6627     MUTEX_DESTROY(&rx_peerHashTable_lock);
6628     MUTEX_DESTROY(&rx_serverPool_lock);
6629
6630     osi_Free(rx_connHashTable,
6631              rx_hashTableSize * sizeof(struct rx_connection *));
6632     osi_Free(rx_peerHashTable, rx_hashTableSize * sizeof(struct rx_peer *));
6633
6634     UNPIN(rx_connHashTable,
6635           rx_hashTableSize * sizeof(struct rx_connection *));
6636     UNPIN(rx_peerHashTable, rx_hashTableSize * sizeof(struct rx_peer *));
6637
6638     rxi_FreeAllPackets();
6639
6640     MUTEX_ENTER(&rx_stats_mutex);
6641     rxi_dataQuota = RX_MAX_QUOTA;
6642     rxi_availProcs = rxi_totalMin = rxi_minDeficit = 0;
6643     MUTEX_EXIT(&rx_stats_mutex);
6644
6645     rxinit_status = 1;
6646     UNLOCK_RX_INIT;
6647 }
6648
6649 #ifdef RX_ENABLE_LOCKS
6650 void
6651 osirx_AssertMine(afs_kmutex_t * lockaddr, char *msg)
6652 {
6653     if (!MUTEX_ISMINE(lockaddr))
6654         osi_Panic("Lock not held: %s", msg);
6655 }
6656 #endif /* RX_ENABLE_LOCKS */
6657
6658 #ifndef KERNEL
6659
6660 /*
6661  * Routines to implement connection specific data.
6662  */
6663
6664 int
6665 rx_KeyCreate(rx_destructor_t rtn)
6666 {
6667     int key;
6668     MUTEX_ENTER(&rxi_keyCreate_lock);
6669     key = rxi_keyCreate_counter++;
6670     rxi_keyCreate_destructor = (rx_destructor_t *)
6671         realloc((void *)rxi_keyCreate_destructor,
6672                 (key + 1) * sizeof(rx_destructor_t));
6673     rxi_keyCreate_destructor[key] = rtn;
6674     MUTEX_EXIT(&rxi_keyCreate_lock);
6675     return key;
6676 }
6677
6678 void
6679 rx_SetSpecific(struct rx_connection *conn, int key, void *ptr)
6680 {
6681     int i;
6682     MUTEX_ENTER(&conn->conn_data_lock);
6683     if (!conn->specific) {
6684         conn->specific = (void **)malloc((key + 1) * sizeof(void *));
6685         for (i = 0; i < key; i++)
6686             conn->specific[i] = NULL;
6687         conn->nSpecific = key + 1;
6688         conn->specific[key] = ptr;
6689     } else if (key >= conn->nSpecific) {
6690         conn->specific = (void **)
6691             realloc(conn->specific, (key + 1) * sizeof(void *));
6692         for (i = conn->nSpecific; i < key; i++)
6693             conn->specific[i] = NULL;
6694         conn->nSpecific = key + 1;
6695         conn->specific[key] = ptr;
6696     } else {
6697         if (conn->specific[key] && rxi_keyCreate_destructor[key])
6698             (*rxi_keyCreate_destructor[key]) (conn->specific[key]);
6699         conn->specific[key] = ptr;
6700     }
6701     MUTEX_EXIT(&conn->conn_data_lock);
6702 }
6703
6704 void *
6705 rx_GetSpecific(struct rx_connection *conn, int key)
6706 {
6707     void *ptr;
6708     MUTEX_ENTER(&conn->conn_data_lock);
6709     if (key >= conn->nSpecific)
6710         ptr = NULL;
6711     else
6712         ptr = conn->specific[key];
6713     MUTEX_EXIT(&conn->conn_data_lock);
6714     return ptr;
6715 }
6716
6717 #endif /* !KERNEL */
6718
6719 /*
6720  * processStats is a queue used to store the statistics for the local
6721  * process.  Its contents are similar to the contents of the rpcStats
6722  * queue on a rx_peer structure, but the actual data stored within
6723  * this queue contains totals across the lifetime of the process (assuming
6724  * the stats have not been reset) - unlike the per peer structures
6725  * which can come and go based upon the peer lifetime.
6726  */
6727
6728 static struct rx_queue processStats = { &processStats, &processStats };
6729
6730 /*
6731  * peerStats is a queue used to store the statistics for all peer structs.
6732  * Its contents are the union of all the peer rpcStats queues.
6733  */
6734
6735 static struct rx_queue peerStats = { &peerStats, &peerStats };
6736
6737 /*
6738  * rxi_monitor_processStats is used to turn process wide stat collection
6739  * on and off
6740  */
6741
6742 static int rxi_monitor_processStats = 0;
6743
6744 /*
6745  * rxi_monitor_peerStats is used to turn per peer stat collection on and off
6746  */
6747
6748 static int rxi_monitor_peerStats = 0;
6749
6750 /*
6751  * rxi_AddRpcStat - given all of the information for a particular rpc
6752  * call, create (if needed) and update the stat totals for the rpc.
6753  *
6754  * PARAMETERS
6755  *
6756  * IN stats - the queue of stats that will be updated with the new value
6757  *
6758  * IN rxInterface - a unique number that identifies the rpc interface
6759  *
6760  * IN currentFunc - the index of the function being invoked
6761  *
6762  * IN totalFunc - the total number of functions in this interface
6763  *
6764  * IN queueTime - the amount of time this function waited for a thread
6765  *
6766  * IN execTime - the amount of time this function invocation took to execute
6767  *
6768  * IN bytesSent - the number bytes sent by this invocation
6769  *
6770  * IN bytesRcvd - the number bytes received by this invocation
6771  *
6772  * IN isServer - if true, this invocation was made to a server
6773  *
6774  * IN remoteHost - the ip address of the remote host
6775  *
6776  * IN remotePort - the port of the remote host
6777  *
6778  * IN addToPeerList - if != 0, add newly created stat to the global peer list
6779  *
6780  * INOUT counter - if a new stats structure is allocated, the counter will
6781  * be updated with the new number of allocated stat structures
6782  *
6783  * RETURN CODES
6784  *
6785  * Returns void.
6786  */
6787
6788 static int
6789 rxi_AddRpcStat(struct rx_queue *stats, afs_uint32 rxInterface,
6790                afs_uint32 currentFunc, afs_uint32 totalFunc,
6791                struct clock *queueTime, struct clock *execTime,
6792                afs_hyper_t * bytesSent, afs_hyper_t * bytesRcvd, int isServer,
6793                afs_uint32 remoteHost, afs_uint32 remotePort,
6794                int addToPeerList, unsigned int *counter)
6795 {
6796     int rc = 0;
6797     rx_interface_stat_p rpc_stat, nrpc_stat;
6798
6799     /*
6800      * See if there's already a structure for this interface
6801      */
6802
6803     for (queue_Scan(stats, rpc_stat, nrpc_stat, rx_interface_stat)) {
6804         if ((rpc_stat->stats[0].interfaceId == rxInterface)
6805             && (rpc_stat->stats[0].remote_is_server == isServer))
6806             break;
6807     }
6808
6809     /*
6810      * Didn't find a match so allocate a new structure and add it to the
6811      * queue.
6812      */
6813
6814     if (queue_IsEnd(stats, rpc_stat) || (rpc_stat == NULL)
6815         || (rpc_stat->stats[0].interfaceId != rxInterface)
6816         || (rpc_stat->stats[0].remote_is_server != isServer)) {
6817         int i;
6818         size_t space;
6819
6820         space =
6821             sizeof(rx_interface_stat_t) +
6822             totalFunc * sizeof(rx_function_entry_v1_t);
6823
6824         rpc_stat = (rx_interface_stat_p) rxi_Alloc(space);
6825         if (rpc_stat == NULL) {
6826             rc = 1;
6827             goto fail;
6828         }
6829         *counter += totalFunc;
6830         for (i = 0; i < totalFunc; i++) {
6831             rpc_stat->stats[i].remote_peer = remoteHost;
6832             rpc_stat->stats[i].remote_port = remotePort;
6833             rpc_stat->stats[i].remote_is_server = isServer;
6834             rpc_stat->stats[i].interfaceId = rxInterface;
6835             rpc_stat->stats[i].func_total = totalFunc;
6836             rpc_stat->stats[i].func_index = i;
6837             hzero(rpc_stat->stats[i].invocations);
6838             hzero(rpc_stat->stats[i].bytes_sent);
6839             hzero(rpc_stat->stats[i].bytes_rcvd);
6840             rpc_stat->stats[i].queue_time_sum.sec = 0;
6841             rpc_stat->stats[i].queue_time_sum.usec = 0;
6842             rpc_stat->stats[i].queue_time_sum_sqr.sec = 0;
6843             rpc_stat->stats[i].queue_time_sum_sqr.usec = 0;
6844             rpc_stat->stats[i].queue_time_min.sec = 9999999;
6845             rpc_stat->stats[i].queue_time_min.usec = 9999999;
6846             rpc_stat->stats[i].queue_time_max.sec = 0;
6847             rpc_stat->stats[i].queue_time_max.usec = 0;
6848             rpc_stat->stats[i].execution_time_sum.sec = 0;
6849             rpc_stat->stats[i].execution_time_sum.usec = 0;
6850             rpc_stat->stats[i].execution_time_sum_sqr.sec = 0;
6851             rpc_stat->stats[i].execution_time_sum_sqr.usec = 0;
6852             rpc_stat->stats[i].execution_time_min.sec = 9999999;
6853             rpc_stat->stats[i].execution_time_min.usec = 9999999;
6854             rpc_stat->stats[i].execution_time_max.sec = 0;
6855             rpc_stat->stats[i].execution_time_max.usec = 0;
6856         }
6857         queue_Prepend(stats, rpc_stat);
6858         if (addToPeerList) {
6859             queue_Prepend(&peerStats, &rpc_stat->all_peers);
6860         }
6861     }
6862
6863     /*
6864      * Increment the stats for this function
6865      */
6866
6867     hadd32(rpc_stat->stats[currentFunc].invocations, 1);
6868     hadd(rpc_stat->stats[currentFunc].bytes_sent, *bytesSent);
6869     hadd(rpc_stat->stats[currentFunc].bytes_rcvd, *bytesRcvd);
6870     clock_Add(&rpc_stat->stats[currentFunc].queue_time_sum, queueTime);
6871     clock_AddSq(&rpc_stat->stats[currentFunc].queue_time_sum_sqr, queueTime);
6872     if (clock_Lt(queueTime, &rpc_stat->stats[currentFunc].queue_time_min)) {
6873         rpc_stat->stats[currentFunc].queue_time_min = *queueTime;
6874     }
6875     if (clock_Gt(queueTime, &rpc_stat->stats[currentFunc].queue_time_max)) {
6876         rpc_stat->stats[currentFunc].queue_time_max = *queueTime;
6877     }
6878     clock_Add(&rpc_stat->stats[currentFunc].execution_time_sum, execTime);
6879     clock_AddSq(&rpc_stat->stats[currentFunc].execution_time_sum_sqr,
6880                 execTime);
6881     if (clock_Lt(execTime, &rpc_stat->stats[currentFunc].execution_time_min)) {
6882         rpc_stat->stats[currentFunc].execution_time_min = *execTime;
6883     }
6884     if (clock_Gt(execTime, &rpc_stat->stats[currentFunc].execution_time_max)) {
6885         rpc_stat->stats[currentFunc].execution_time_max = *execTime;
6886     }
6887
6888   fail:
6889     return rc;
6890 }
6891
6892 /*
6893  * rx_IncrementTimeAndCount - increment the times and count for a particular
6894  * rpc function.
6895  *
6896  * PARAMETERS
6897  *
6898  * IN peer - the peer who invoked the rpc
6899  *
6900  * IN rxInterface - a unique number that identifies the rpc interface
6901  *
6902  * IN currentFunc - the index of the function being invoked
6903  *
6904  * IN totalFunc - the total number of functions in this interface
6905  *
6906  * IN queueTime - the amount of time this function waited for a thread
6907  *
6908  * IN execTime - the amount of time this function invocation took to execute
6909  *
6910  * IN bytesSent - the number bytes sent by this invocation
6911  *
6912  * IN bytesRcvd - the number bytes received by this invocation
6913  *
6914  * IN isServer - if true, this invocation was made to a server
6915  *
6916  * RETURN CODES
6917  *
6918  * Returns void.
6919  */
6920
6921 void
6922 rx_IncrementTimeAndCount(struct rx_peer *peer, afs_uint32 rxInterface,
6923                          afs_uint32 currentFunc, afs_uint32 totalFunc,
6924                          struct clock *queueTime, struct clock *execTime,
6925                          afs_hyper_t * bytesSent, afs_hyper_t * bytesRcvd,
6926                          int isServer)
6927 {
6928
6929     MUTEX_ENTER(&rx_rpc_stats);
6930     MUTEX_ENTER(&peer->peer_lock);
6931
6932     if (rxi_monitor_peerStats) {
6933         rxi_AddRpcStat(&peer->rpcStats, rxInterface, currentFunc, totalFunc,
6934                        queueTime, execTime, bytesSent, bytesRcvd, isServer,
6935                        peer->host, peer->port, 1, &rxi_rpc_peer_stat_cnt);
6936     }
6937
6938     if (rxi_monitor_processStats) {
6939         rxi_AddRpcStat(&processStats, rxInterface, currentFunc, totalFunc,
6940                        queueTime, execTime, bytesSent, bytesRcvd, isServer,
6941                        0xffffffff, 0xffffffff, 0, &rxi_rpc_process_stat_cnt);
6942     }
6943
6944     MUTEX_EXIT(&peer->peer_lock);
6945     MUTEX_EXIT(&rx_rpc_stats);
6946
6947 }
6948
6949 /*
6950  * rx_MarshallProcessRPCStats - marshall an array of rpc statistics
6951  *
6952  * PARAMETERS
6953  *
6954  * IN callerVersion - the rpc stat version of the caller.
6955  *
6956  * IN count - the number of entries to marshall.
6957  *
6958  * IN stats - pointer to stats to be marshalled.
6959  *
6960  * OUT ptr - Where to store the marshalled data.
6961  *
6962  * RETURN CODES
6963  *
6964  * Returns void.
6965  */
6966 void
6967 rx_MarshallProcessRPCStats(afs_uint32 callerVersion, int count,
6968                            rx_function_entry_v1_t * stats, afs_uint32 ** ptrP)
6969 {
6970     int i;
6971     afs_uint32 *ptr;
6972
6973     /*
6974      * We only support the first version
6975      */
6976     for (ptr = *ptrP, i = 0; i < count; i++, stats++) {
6977         *(ptr++) = stats->remote_peer;
6978         *(ptr++) = stats->remote_port;
6979         *(ptr++) = stats->remote_is_server;
6980         *(ptr++) = stats->interfaceId;
6981         *(ptr++) = stats->func_total;
6982         *(ptr++) = stats->func_index;
6983         *(ptr++) = hgethi(stats->invocations);
6984         *(ptr++) = hgetlo(stats->invocations);
6985         *(ptr++) = hgethi(stats->bytes_sent);
6986         *(ptr++) = hgetlo(stats->bytes_sent);
6987         *(ptr++) = hgethi(stats->bytes_rcvd);
6988         *(ptr++) = hgetlo(stats->bytes_rcvd);
6989         *(ptr++) = stats->queue_time_sum.sec;
6990         *(ptr++) = stats->queue_time_sum.usec;
6991         *(ptr++) = stats->queue_time_sum_sqr.sec;
6992         *(ptr++) = stats->queue_time_sum_sqr.usec;
6993         *(ptr++) = stats->queue_time_min.sec;
6994         *(ptr++) = stats->queue_time_min.usec;
6995         *(ptr++) = stats->queue_time_max.sec;
6996         *(ptr++) = stats->queue_time_max.usec;
6997         *(ptr++) = stats->execution_time_sum.sec;
6998         *(ptr++) = stats->execution_time_sum.usec;
6999         *(ptr++) = stats->execution_time_sum_sqr.sec;
7000         *(ptr++) = stats->execution_time_sum_sqr.usec;
7001         *(ptr++) = stats->execution_time_min.sec;
7002         *(ptr++) = stats->execution_time_min.usec;
7003         *(ptr++) = stats->execution_time_max.sec;
7004         *(ptr++) = stats->execution_time_max.usec;
7005     }
7006     *ptrP = ptr;
7007 }
7008
7009 /*
7010  * rx_RetrieveProcessRPCStats - retrieve all of the rpc statistics for
7011  * this process
7012  *
7013  * PARAMETERS
7014  *
7015  * IN callerVersion - the rpc stat version of the caller
7016  *
7017  * OUT myVersion - the rpc stat version of this function
7018  *
7019  * OUT clock_sec - local time seconds
7020  *
7021  * OUT clock_usec - local time microseconds
7022  *
7023  * OUT allocSize - the number of bytes allocated to contain stats
7024  *
7025  * OUT statCount - the number stats retrieved from this process.
7026  *
7027  * OUT stats - the actual stats retrieved from this process.
7028  *
7029  * RETURN CODES
7030  *
7031  * Returns void.  If successful, stats will != NULL.
7032  */
7033
7034 int
7035 rx_RetrieveProcessRPCStats(afs_uint32 callerVersion, afs_uint32 * myVersion,
7036                            afs_uint32 * clock_sec, afs_uint32 * clock_usec,
7037                            size_t * allocSize, afs_uint32 * statCount,
7038                            afs_uint32 ** stats)
7039 {
7040     size_t space = 0;
7041     afs_uint32 *ptr;
7042     struct clock now;
7043     int rc = 0;
7044
7045     *stats = 0;
7046     *allocSize = 0;
7047     *statCount = 0;
7048     *myVersion = RX_STATS_RETRIEVAL_VERSION;
7049
7050     /*
7051      * Check to see if stats are enabled
7052      */
7053
7054     MUTEX_ENTER(&rx_rpc_stats);
7055     if (!rxi_monitor_processStats) {
7056         MUTEX_EXIT(&rx_rpc_stats);
7057         return rc;
7058     }
7059
7060     clock_GetTime(&now);
7061     *clock_sec = now.sec;
7062     *clock_usec = now.usec;
7063
7064     /*
7065      * Allocate the space based upon the caller version
7066      *
7067      * If the client is at an older version than we are,
7068      * we return the statistic data in the older data format, but
7069      * we still return our version number so the client knows we
7070      * are maintaining more data than it can retrieve.
7071      */
7072
7073     if (callerVersion >= RX_STATS_RETRIEVAL_FIRST_EDITION) {
7074         space = rxi_rpc_process_stat_cnt * sizeof(rx_function_entry_v1_t);
7075         *statCount = rxi_rpc_process_stat_cnt;
7076     } else {
7077         /*
7078          * This can't happen yet, but in the future version changes
7079          * can be handled by adding additional code here
7080          */
7081     }
7082
7083     if (space > (size_t) 0) {
7084         *allocSize = space;
7085         ptr = *stats = (afs_uint32 *) rxi_Alloc(space);
7086
7087         if (ptr != NULL) {
7088             rx_interface_stat_p rpc_stat, nrpc_stat;
7089
7090
7091             for (queue_Scan
7092                  (&processStats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7093                 /*
7094                  * Copy the data based upon the caller version
7095                  */
7096                 rx_MarshallProcessRPCStats(callerVersion,
7097                                            rpc_stat->stats[0].func_total,
7098                                            rpc_stat->stats, &ptr);
7099             }
7100         } else {
7101             rc = ENOMEM;
7102         }
7103     }
7104     MUTEX_EXIT(&rx_rpc_stats);
7105     return rc;
7106 }
7107
7108 /*
7109  * rx_RetrievePeerRPCStats - retrieve all of the rpc statistics for the peers
7110  *
7111  * PARAMETERS
7112  *
7113  * IN callerVersion - the rpc stat version of the caller
7114  *
7115  * OUT myVersion - the rpc stat version of this function
7116  *
7117  * OUT clock_sec - local time seconds
7118  *
7119  * OUT clock_usec - local time microseconds
7120  *
7121  * OUT allocSize - the number of bytes allocated to contain stats
7122  *
7123  * OUT statCount - the number of stats retrieved from the individual
7124  * peer structures.
7125  *
7126  * OUT stats - the actual stats retrieved from the individual peer structures.
7127  *
7128  * RETURN CODES
7129  *
7130  * Returns void.  If successful, stats will != NULL.
7131  */
7132
7133 int
7134 rx_RetrievePeerRPCStats(afs_uint32 callerVersion, afs_uint32 * myVersion,
7135                         afs_uint32 * clock_sec, afs_uint32 * clock_usec,
7136                         size_t * allocSize, afs_uint32 * statCount,
7137                         afs_uint32 ** stats)
7138 {
7139     size_t space = 0;
7140     afs_uint32 *ptr;
7141     struct clock now;
7142     int rc = 0;
7143
7144     *stats = 0;
7145     *statCount = 0;
7146     *allocSize = 0;
7147     *myVersion = RX_STATS_RETRIEVAL_VERSION;
7148
7149     /*
7150      * Check to see if stats are enabled
7151      */
7152
7153     MUTEX_ENTER(&rx_rpc_stats);
7154     if (!rxi_monitor_peerStats) {
7155         MUTEX_EXIT(&rx_rpc_stats);
7156         return rc;
7157     }
7158
7159     clock_GetTime(&now);
7160     *clock_sec = now.sec;
7161     *clock_usec = now.usec;
7162
7163     /*
7164      * Allocate the space based upon the caller version
7165      *
7166      * If the client is at an older version than we are,
7167      * we return the statistic data in the older data format, but
7168      * we still return our version number so the client knows we
7169      * are maintaining more data than it can retrieve.
7170      */
7171
7172     if (callerVersion >= RX_STATS_RETRIEVAL_FIRST_EDITION) {
7173         space = rxi_rpc_peer_stat_cnt * sizeof(rx_function_entry_v1_t);
7174         *statCount = rxi_rpc_peer_stat_cnt;
7175     } else {
7176         /*
7177          * This can't happen yet, but in the future version changes
7178          * can be handled by adding additional code here
7179          */
7180     }
7181
7182     if (space > (size_t) 0) {
7183         *allocSize = space;
7184         ptr = *stats = (afs_uint32 *) rxi_Alloc(space);
7185
7186         if (ptr != NULL) {
7187             rx_interface_stat_p rpc_stat, nrpc_stat;
7188             char *fix_offset;
7189
7190             for (queue_Scan
7191                  (&peerStats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7192                 /*
7193                  * We have to fix the offset of rpc_stat since we are
7194                  * keeping this structure on two rx_queues.  The rx_queue
7195                  * package assumes that the rx_queue member is the first
7196                  * member of the structure.  That is, rx_queue assumes that
7197                  * any one item is only on one queue at a time.  We are
7198                  * breaking that assumption and so we have to do a little
7199                  * math to fix our pointers.
7200                  */
7201
7202                 fix_offset = (char *)rpc_stat;
7203                 fix_offset -= offsetof(rx_interface_stat_t, all_peers);
7204                 rpc_stat = (rx_interface_stat_p) fix_offset;
7205
7206                 /*
7207                  * Copy the data based upon the caller version
7208                  */
7209                 rx_MarshallProcessRPCStats(callerVersion,
7210                                            rpc_stat->stats[0].func_total,
7211                                            rpc_stat->stats, &ptr);
7212             }
7213         } else {
7214             rc = ENOMEM;
7215         }
7216     }
7217     MUTEX_EXIT(&rx_rpc_stats);
7218     return rc;
7219 }
7220
7221 /*
7222  * rx_FreeRPCStats - free memory allocated by
7223  *                   rx_RetrieveProcessRPCStats and rx_RetrievePeerRPCStats
7224  *
7225  * PARAMETERS
7226  *
7227  * IN stats - stats previously returned by rx_RetrieveProcessRPCStats or
7228  * rx_RetrievePeerRPCStats
7229  *
7230  * IN allocSize - the number of bytes in stats.
7231  *
7232  * RETURN CODES
7233  *
7234  * Returns void.
7235  */
7236
7237 void
7238 rx_FreeRPCStats(afs_uint32 * stats, size_t allocSize)
7239 {
7240     rxi_Free(stats, allocSize);
7241 }
7242
7243 /*
7244  * rx_queryProcessRPCStats - see if process rpc stat collection is
7245  * currently enabled.
7246  *
7247  * PARAMETERS
7248  *
7249  * RETURN CODES
7250  *
7251  * Returns 0 if stats are not enabled != 0 otherwise
7252  */
7253
7254 int
7255 rx_queryProcessRPCStats(void)
7256 {
7257     int rc;
7258     MUTEX_ENTER(&rx_rpc_stats);
7259     rc = rxi_monitor_processStats;
7260     MUTEX_EXIT(&rx_rpc_stats);
7261     return rc;
7262 }
7263
7264 /*
7265  * rx_queryPeerRPCStats - see if peer stat collection is currently enabled.
7266  *
7267  * PARAMETERS
7268  *
7269  * RETURN CODES
7270  *
7271  * Returns 0 if stats are not enabled != 0 otherwise
7272  */
7273
7274 int
7275 rx_queryPeerRPCStats(void)
7276 {
7277     int rc;
7278     MUTEX_ENTER(&rx_rpc_stats);
7279     rc = rxi_monitor_peerStats;
7280     MUTEX_EXIT(&rx_rpc_stats);
7281     return rc;
7282 }
7283
7284 /*
7285  * rx_enableProcessRPCStats - begin rpc stat collection for entire process
7286  *
7287  * PARAMETERS
7288  *
7289  * RETURN CODES
7290  *
7291  * Returns void.
7292  */
7293
7294 void
7295 rx_enableProcessRPCStats(void)
7296 {
7297     MUTEX_ENTER(&rx_rpc_stats);
7298     rx_enable_stats = 1;
7299     rxi_monitor_processStats = 1;
7300     MUTEX_EXIT(&rx_rpc_stats);
7301 }
7302
7303 /*
7304  * rx_enablePeerRPCStats - begin rpc stat collection per peer structure
7305  *
7306  * PARAMETERS
7307  *
7308  * RETURN CODES
7309  *
7310  * Returns void.
7311  */
7312
7313 void
7314 rx_enablePeerRPCStats(void)
7315 {
7316     MUTEX_ENTER(&rx_rpc_stats);
7317     rx_enable_stats = 1;
7318     rxi_monitor_peerStats = 1;
7319     MUTEX_EXIT(&rx_rpc_stats);
7320 }
7321
7322 /*
7323  * rx_disableProcessRPCStats - stop rpc stat collection for entire process
7324  *
7325  * PARAMETERS
7326  *
7327  * RETURN CODES
7328  *
7329  * Returns void.
7330  */
7331
7332 void
7333 rx_disableProcessRPCStats(void)
7334 {
7335     rx_interface_stat_p rpc_stat, nrpc_stat;
7336     size_t space;
7337
7338     MUTEX_ENTER(&rx_rpc_stats);
7339
7340     /*
7341      * Turn off process statistics and if peer stats is also off, turn
7342      * off everything
7343      */
7344
7345     rxi_monitor_processStats = 0;
7346     if (rxi_monitor_peerStats == 0) {
7347         rx_enable_stats = 0;
7348     }
7349
7350     for (queue_Scan(&processStats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7351         unsigned int num_funcs = 0;
7352         if (!rpc_stat)
7353             break;
7354         queue_Remove(rpc_stat);
7355         num_funcs = rpc_stat->stats[0].func_total;
7356         space =
7357             sizeof(rx_interface_stat_t) +
7358             rpc_stat->stats[0].func_total * sizeof(rx_function_entry_v1_t);
7359
7360         rxi_Free(rpc_stat, space);
7361         rxi_rpc_process_stat_cnt -= num_funcs;
7362     }
7363     MUTEX_EXIT(&rx_rpc_stats);
7364 }
7365
7366 /*
7367  * rx_disablePeerRPCStats - stop rpc stat collection for peers
7368  *
7369  * PARAMETERS
7370  *
7371  * RETURN CODES
7372  *
7373  * Returns void.
7374  */
7375
7376 void
7377 rx_disablePeerRPCStats(void)
7378 {
7379     struct rx_peer **peer_ptr, **peer_end;
7380     int code;
7381
7382     MUTEX_ENTER(&rx_rpc_stats);
7383
7384     /*
7385      * Turn off peer statistics and if process stats is also off, turn
7386      * off everything
7387      */
7388
7389     rxi_monitor_peerStats = 0;
7390     if (rxi_monitor_processStats == 0) {
7391         rx_enable_stats = 0;
7392     }
7393
7394     MUTEX_ENTER(&rx_peerHashTable_lock);
7395     for (peer_ptr = &rx_peerHashTable[0], peer_end =
7396          &rx_peerHashTable[rx_hashTableSize]; peer_ptr < peer_end;
7397          peer_ptr++) {
7398         struct rx_peer *peer, *next, *prev;
7399         for (prev = peer = *peer_ptr; peer; peer = next) {
7400             next = peer->next;
7401             code = MUTEX_TRYENTER(&peer->peer_lock);
7402             if (code) {
7403                 rx_interface_stat_p rpc_stat, nrpc_stat;
7404                 size_t space;
7405                 for (queue_Scan
7406                      (&peer->rpcStats, rpc_stat, nrpc_stat,
7407                       rx_interface_stat)) {
7408                     unsigned int num_funcs = 0;
7409                     if (!rpc_stat)
7410                         break;
7411                     queue_Remove(&rpc_stat->queue_header);
7412                     queue_Remove(&rpc_stat->all_peers);
7413                     num_funcs = rpc_stat->stats[0].func_total;
7414                     space =
7415                         sizeof(rx_interface_stat_t) +
7416                         rpc_stat->stats[0].func_total *
7417                         sizeof(rx_function_entry_v1_t);
7418
7419                     rxi_Free(rpc_stat, space);
7420                     rxi_rpc_peer_stat_cnt -= num_funcs;
7421                 }
7422                 MUTEX_EXIT(&peer->peer_lock);
7423                 if (prev == *peer_ptr) {
7424                     *peer_ptr = next;
7425                     prev = next;
7426                 } else
7427                     prev->next = next;
7428             } else {
7429                 prev = peer;
7430             }
7431         }
7432     }
7433     MUTEX_EXIT(&rx_peerHashTable_lock);
7434     MUTEX_EXIT(&rx_rpc_stats);
7435 }
7436
7437 /*
7438  * rx_clearProcessRPCStats - clear the contents of the rpc stats according
7439  * to clearFlag
7440  *
7441  * PARAMETERS
7442  *
7443  * IN clearFlag - flag indicating which stats to clear
7444  *
7445  * RETURN CODES
7446  *
7447  * Returns void.
7448  */
7449
7450 void
7451 rx_clearProcessRPCStats(afs_uint32 clearFlag)
7452 {
7453     rx_interface_stat_p rpc_stat, nrpc_stat;
7454
7455     MUTEX_ENTER(&rx_rpc_stats);
7456
7457     for (queue_Scan(&processStats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7458         unsigned int num_funcs = 0, i;
7459         num_funcs = rpc_stat->stats[0].func_total;
7460         for (i = 0; i < num_funcs; i++) {
7461             if (clearFlag & AFS_RX_STATS_CLEAR_INVOCATIONS) {
7462                 hzero(rpc_stat->stats[i].invocations);
7463             }
7464             if (clearFlag & AFS_RX_STATS_CLEAR_BYTES_SENT) {
7465                 hzero(rpc_stat->stats[i].bytes_sent);
7466             }
7467             if (clearFlag & AFS_RX_STATS_CLEAR_BYTES_RCVD) {
7468                 hzero(rpc_stat->stats[i].bytes_rcvd);
7469             }
7470             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_SUM) {
7471                 rpc_stat->stats[i].queue_time_sum.sec = 0;
7472                 rpc_stat->stats[i].queue_time_sum.usec = 0;
7473             }
7474             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_SQUARE) {
7475                 rpc_stat->stats[i].queue_time_sum_sqr.sec = 0;
7476                 rpc_stat->stats[i].queue_time_sum_sqr.usec = 0;
7477             }
7478             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_MIN) {
7479                 rpc_stat->stats[i].queue_time_min.sec = 9999999;
7480                 rpc_stat->stats[i].queue_time_min.usec = 9999999;
7481             }
7482             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_MAX) {
7483                 rpc_stat->stats[i].queue_time_max.sec = 0;
7484                 rpc_stat->stats[i].queue_time_max.usec = 0;
7485             }
7486             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_SUM) {
7487                 rpc_stat->stats[i].execution_time_sum.sec = 0;
7488                 rpc_stat->stats[i].execution_time_sum.usec = 0;
7489             }
7490             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_SQUARE) {
7491                 rpc_stat->stats[i].execution_time_sum_sqr.sec = 0;
7492                 rpc_stat->stats[i].execution_time_sum_sqr.usec = 0;
7493             }
7494             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_MIN) {
7495                 rpc_stat->stats[i].execution_time_min.sec = 9999999;
7496                 rpc_stat->stats[i].execution_time_min.usec = 9999999;
7497             }
7498             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_MAX) {
7499                 rpc_stat->stats[i].execution_time_max.sec = 0;
7500                 rpc_stat->stats[i].execution_time_max.usec = 0;
7501             }
7502         }
7503     }
7504
7505     MUTEX_EXIT(&rx_rpc_stats);
7506 }
7507
7508 /*
7509  * rx_clearPeerRPCStats - clear the contents of the rpc stats according
7510  * to clearFlag
7511  *
7512  * PARAMETERS
7513  *
7514  * IN clearFlag - flag indicating which stats to clear
7515  *
7516  * RETURN CODES
7517  *
7518  * Returns void.
7519  */
7520
7521 void
7522 rx_clearPeerRPCStats(afs_uint32 clearFlag)
7523 {
7524     rx_interface_stat_p rpc_stat, nrpc_stat;
7525
7526     MUTEX_ENTER(&rx_rpc_stats);
7527
7528     for (queue_Scan(&peerStats, rpc_stat, nrpc_stat, rx_interface_stat)) {
7529         unsigned int num_funcs = 0, i;
7530         char *fix_offset;
7531         /*
7532          * We have to fix the offset of rpc_stat since we are
7533          * keeping this structure on two rx_queues.  The rx_queue
7534          * package assumes that the rx_queue member is the first
7535          * member of the structure.  That is, rx_queue assumes that
7536          * any one item is only on one queue at a time.  We are
7537          * breaking that assumption and so we have to do a little
7538          * math to fix our pointers.
7539          */
7540
7541         fix_offset = (char *)rpc_stat;
7542         fix_offset -= offsetof(rx_interface_stat_t, all_peers);
7543         rpc_stat = (rx_interface_stat_p) fix_offset;
7544
7545         num_funcs = rpc_stat->stats[0].func_total;
7546         for (i = 0; i < num_funcs; i++) {
7547             if (clearFlag & AFS_RX_STATS_CLEAR_INVOCATIONS) {
7548                 hzero(rpc_stat->stats[i].invocations);
7549             }
7550             if (clearFlag & AFS_RX_STATS_CLEAR_BYTES_SENT) {
7551                 hzero(rpc_stat->stats[i].bytes_sent);
7552             }
7553             if (clearFlag & AFS_RX_STATS_CLEAR_BYTES_RCVD) {
7554                 hzero(rpc_stat->stats[i].bytes_rcvd);
7555             }
7556             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_SUM) {
7557                 rpc_stat->stats[i].queue_time_sum.sec = 0;
7558                 rpc_stat->stats[i].queue_time_sum.usec = 0;
7559             }
7560             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_SQUARE) {
7561                 rpc_stat->stats[i].queue_time_sum_sqr.sec = 0;
7562                 rpc_stat->stats[i].queue_time_sum_sqr.usec = 0;
7563             }
7564             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_MIN) {
7565                 rpc_stat->stats[i].queue_time_min.sec = 9999999;
7566                 rpc_stat->stats[i].queue_time_min.usec = 9999999;
7567             }
7568             if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_MAX) {
7569                 rpc_stat->stats[i].queue_time_max.sec = 0;
7570                 rpc_stat->stats[i].queue_time_max.usec = 0;
7571             }
7572             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_SUM) {
7573                 rpc_stat->stats[i].execution_time_sum.sec = 0;
7574                 rpc_stat->stats[i].execution_time_sum.usec = 0;
7575             }
7576             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_SQUARE) {
7577                 rpc_stat->stats[i].execution_time_sum_sqr.sec = 0;
7578                 rpc_stat->stats[i].execution_time_sum_sqr.usec = 0;
7579             }
7580             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_MIN) {
7581                 rpc_stat->stats[i].execution_time_min.sec = 9999999;
7582                 rpc_stat->stats[i].execution_time_min.usec = 9999999;
7583             }
7584             if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_MAX) {
7585                 rpc_stat->stats[i].execution_time_max.sec = 0;
7586                 rpc_stat->stats[i].execution_time_max.usec = 0;
7587             }
7588         }
7589     }
7590
7591     MUTEX_EXIT(&rx_rpc_stats);
7592 }
7593
7594 /*
7595  * rxi_rxstat_userok points to a routine that returns 1 if the caller
7596  * is authorized to enable/disable/clear RX statistics.
7597  */
7598 static int (*rxi_rxstat_userok) (struct rx_call * call) = NULL;
7599
7600 void
7601 rx_SetRxStatUserOk(int (*proc) (struct rx_call * call))
7602 {
7603     rxi_rxstat_userok = proc;
7604 }
7605
7606 int
7607 rx_RxStatUserOk(struct rx_call *call)
7608 {
7609     if (!rxi_rxstat_userok)
7610         return 0;
7611     return rxi_rxstat_userok(call);
7612 }