2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 /* RX: Extended Remote Procedure Call */
12 #include <afsconfig.h>
13 #include <afs/param.h>
16 # include "afs/sysincludes.h"
17 # include "afsincludes.h"
22 # ifdef AFS_LINUX20_ENV
23 # include "h/socket.h"
25 # include "netinet/in.h"
27 # include "netinet/ip6.h"
28 # include "inet/common.h"
30 # include "inet/ip_ire.h"
32 # include "afs/afs_args.h"
33 # include "afs/afs_osi.h"
34 # ifdef RX_KERNEL_TRACE
35 # include "rx_kcommon.h"
37 # if defined(AFS_AIX_ENV)
41 # undef RXDEBUG /* turn off debugging */
43 # if defined(AFS_SGI_ENV)
44 # include "sys/debug.h"
47 # include "afs/sysincludes.h"
48 # include "afsincludes.h"
49 # endif /* !UKERNEL */
50 # include "afs/lock.h"
51 # include "rx_kmutex.h"
52 # include "rx_kernel.h"
53 # define AFSOP_STOP_RXCALLBACK 210 /* Stop CALLBACK process */
54 # define AFSOP_STOP_AFS 211 /* Stop AFS process */
55 # define AFSOP_STOP_BKG 212 /* Stop BKG process */
56 extern afs_int32 afs_termState;
58 # include "sys/lockl.h"
59 # include "sys/lock_def.h"
60 # endif /* AFS_AIX41_ENV */
61 # include "afs/rxgen_consts.h"
66 # include <afs/afsutil.h>
67 # include <WINNT\afsreg.h>
75 #include <opr/queue.h>
79 #include "rx_atomic.h"
80 #include "rx_globals.h"
82 #include "rx_internal.h"
89 #include "rx_packet.h"
90 #include "rx_server.h"
92 #include <afs/rxgen_consts.h>
95 #ifdef AFS_PTHREAD_ENV
97 int (*registerProgram) (pid_t, char *) = 0;
98 int (*swapNameProgram) (pid_t, const char *, char *) = 0;
101 int (*registerProgram) (PROCESS, char *) = 0;
102 int (*swapNameProgram) (PROCESS, const char *, char *) = 0;
106 /* Local static routines */
107 static void rxi_DestroyConnectionNoLock(struct rx_connection *conn);
108 static void rxi_ComputeRoundTripTime(struct rx_packet *, struct rx_ackPacket *,
109 struct rx_call *, struct rx_peer *,
111 static void rxi_Resend(struct rxevent *event, void *arg0, void *arg1,
113 static void rxi_SendDelayedAck(struct rxevent *event, void *call,
114 void *dummy, int dummy2);
115 static void rxi_SendDelayedCallAbort(struct rxevent *event, void *arg1,
116 void *dummy, int dummy2);
117 static void rxi_SendDelayedConnAbort(struct rxevent *event, void *arg1,
118 void *unused, int unused2);
119 static void rxi_ReapConnections(struct rxevent *unused, void *unused1,
120 void *unused2, int unused3);
121 static struct rx_packet *rxi_SendCallAbort(struct rx_call *call,
122 struct rx_packet *packet,
123 int istack, int force);
124 static void rxi_AckAll(struct rx_call *call);
125 static struct rx_connection
126 *rxi_FindConnection(osi_socket socket, afs_uint32 host, u_short port,
127 u_short serviceId, afs_uint32 cid,
128 afs_uint32 epoch, int type, u_int securityIndex);
129 static struct rx_packet
130 *rxi_ReceiveDataPacket(struct rx_call *call, struct rx_packet *np,
131 int istack, osi_socket socket,
132 afs_uint32 host, u_short port, int *tnop,
133 struct rx_call **newcallp);
134 static struct rx_packet
135 *rxi_ReceiveAckPacket(struct rx_call *call, struct rx_packet *np,
137 static struct rx_packet
138 *rxi_ReceiveResponsePacket(struct rx_connection *conn,
139 struct rx_packet *np, int istack);
140 static struct rx_packet
141 *rxi_ReceiveChallengePacket(struct rx_connection *conn,
142 struct rx_packet *np, int istack);
143 static void rxi_AttachServerProc(struct rx_call *call, osi_socket socket,
144 int *tnop, struct rx_call **newcallp);
145 static void rxi_ClearTransmitQueue(struct rx_call *call, int force);
146 static void rxi_ClearReceiveQueue(struct rx_call *call);
147 static void rxi_ResetCall(struct rx_call *call, int newcall);
148 static void rxi_ScheduleKeepAliveEvent(struct rx_call *call);
149 static void rxi_ScheduleNatKeepAliveEvent(struct rx_connection *conn);
150 static void rxi_ScheduleGrowMTUEvent(struct rx_call *call, int secs);
151 static void rxi_KeepAliveOn(struct rx_call *call);
152 static void rxi_GrowMTUOn(struct rx_call *call);
153 static void rxi_ChallengeOn(struct rx_connection *conn);
154 static int rxi_CheckCall(struct rx_call *call, int haveCTLock);
155 static void rxi_AckAllInTransmitQueue(struct rx_call *call);
156 static void rxi_CancelKeepAliveEvent(struct rx_call *call);
157 static void rxi_CancelDelayedAbortEvent(struct rx_call *call);
158 static void rxi_CancelGrowMTUEvent(struct rx_call *call);
160 #ifdef RX_ENABLE_LOCKS
162 rx_atomic_t rxi_start_aborted; /* rxi_start awoke after rxi_Send in error.*/
163 rx_atomic_t rxi_start_in_error;
165 #endif /* RX_ENABLE_LOCKS */
167 /* Constant delay time before sending an acknowledge of the last packet
168 * received. This is to avoid sending an extra acknowledge when the
169 * client is about to make another call, anyway, or the server is
172 * The lastAckDelay may not exceeed 400ms without causing peers to
173 * unecessarily timeout.
175 struct clock rx_lastAckDelay = {0, 400000};
177 /* Constant delay time before sending a soft ack when none was requested.
178 * This is to make sure we send soft acks before the sender times out,
179 * Normally we wait and send a hard ack when the receiver consumes the packet
181 * This value has been 100ms in all shipping versions of OpenAFS. Changing it
182 * will require changes to the peer's RTT calculations.
184 struct clock rx_softAckDelay = {0, 100000};
187 * rxi_rpc_peer_stat_cnt counts the total number of peer stat structures
188 * currently allocated within rx. This number is used to allocate the
189 * memory required to return the statistics when queried.
190 * Protected by the rx_rpc_stats mutex.
193 static unsigned int rxi_rpc_peer_stat_cnt;
196 * rxi_rpc_process_stat_cnt counts the total number of local process stat
197 * structures currently allocated within rx. The number is used to allocate
198 * the memory required to return the statistics when queried.
199 * Protected by the rx_rpc_stats mutex.
202 static unsigned int rxi_rpc_process_stat_cnt;
205 * rxi_busyChannelError is a boolean. It indicates whether or not RX_CALL_BUSY
206 * errors should be reported to the application when a call channel appears busy
207 * (inferred from the receipt of RX_PACKET_TYPE_BUSY packets on the channel),
208 * and there are other call channels in the connection that are not busy.
209 * If 0, we do not return errors upon receiving busy packets; we just keep
210 * trying on the same call channel until we hit a timeout.
212 static afs_int32 rxi_busyChannelError = 0;
214 rx_atomic_t rx_nWaiting = RX_ATOMIC_INIT(0);
215 rx_atomic_t rx_nWaited = RX_ATOMIC_INIT(0);
217 /* Incoming calls wait on this queue when there are no available
218 * server processes */
219 struct opr_queue rx_incomingCallQueue;
221 /* Server processes wait on this queue when there are no appropriate
222 * calls to process */
223 struct opr_queue rx_idleServerQueue;
225 #if !defined(offsetof)
226 #include <stddef.h> /* for definition of offsetof() */
229 #ifdef RX_ENABLE_LOCKS
230 afs_kmutex_t rx_atomic_mutex;
233 /* Forward prototypes */
234 static struct rx_call * rxi_NewCall(struct rx_connection *, int);
237 putConnection (struct rx_connection *conn) {
238 MUTEX_ENTER(&rx_refcnt_mutex);
240 MUTEX_EXIT(&rx_refcnt_mutex);
243 #ifdef AFS_PTHREAD_ENV
246 * Use procedural initialization of mutexes/condition variables
250 extern afs_kmutex_t rx_quota_mutex;
251 extern afs_kmutex_t rx_pthread_mutex;
252 extern afs_kmutex_t rx_packets_mutex;
253 extern afs_kmutex_t rx_refcnt_mutex;
254 extern afs_kmutex_t des_init_mutex;
255 extern afs_kmutex_t des_random_mutex;
256 extern afs_kmutex_t rx_clock_mutex;
257 extern afs_kmutex_t rxi_connCacheMutex;
258 extern afs_kmutex_t event_handler_mutex;
259 extern afs_kmutex_t listener_mutex;
260 extern afs_kmutex_t rx_if_init_mutex;
261 extern afs_kmutex_t rx_if_mutex;
263 extern afs_kcondvar_t rx_event_handler_cond;
264 extern afs_kcondvar_t rx_listener_cond;
266 static afs_kmutex_t epoch_mutex;
267 static afs_kmutex_t rx_init_mutex;
268 static afs_kmutex_t rx_debug_mutex;
269 static afs_kmutex_t rx_rpc_stats;
272 rxi_InitPthread(void)
274 MUTEX_INIT(&rx_clock_mutex, "clock", MUTEX_DEFAULT, 0);
275 MUTEX_INIT(&rx_stats_mutex, "stats", MUTEX_DEFAULT, 0);
276 MUTEX_INIT(&rx_atomic_mutex, "atomic", MUTEX_DEFAULT, 0);
277 MUTEX_INIT(&rx_quota_mutex, "quota", MUTEX_DEFAULT, 0);
278 MUTEX_INIT(&rx_pthread_mutex, "pthread", MUTEX_DEFAULT, 0);
279 MUTEX_INIT(&rx_packets_mutex, "packets", MUTEX_DEFAULT, 0);
280 MUTEX_INIT(&rx_refcnt_mutex, "refcnts", MUTEX_DEFAULT, 0);
281 MUTEX_INIT(&epoch_mutex, "epoch", MUTEX_DEFAULT, 0);
282 MUTEX_INIT(&rx_init_mutex, "init", MUTEX_DEFAULT, 0);
283 MUTEX_INIT(&event_handler_mutex, "event handler", MUTEX_DEFAULT, 0);
284 MUTEX_INIT(&rxi_connCacheMutex, "conn cache", MUTEX_DEFAULT, 0);
285 MUTEX_INIT(&listener_mutex, "listener", MUTEX_DEFAULT, 0);
286 MUTEX_INIT(&rx_if_init_mutex, "if init", MUTEX_DEFAULT, 0);
287 MUTEX_INIT(&rx_if_mutex, "if", MUTEX_DEFAULT, 0);
288 MUTEX_INIT(&rx_debug_mutex, "debug", MUTEX_DEFAULT, 0);
290 CV_INIT(&rx_event_handler_cond, "evhand", CV_DEFAULT, 0);
291 CV_INIT(&rx_listener_cond, "rxlisten", CV_DEFAULT, 0);
293 osi_Assert(pthread_key_create(&rx_thread_id_key, NULL) == 0);
294 osi_Assert(pthread_key_create(&rx_ts_info_key, NULL) == 0);
296 MUTEX_INIT(&rx_rpc_stats, "rx_rpc_stats", MUTEX_DEFAULT, 0);
297 MUTEX_INIT(&rx_freePktQ_lock, "rx_freePktQ_lock", MUTEX_DEFAULT, 0);
298 #ifdef RX_ENABLE_LOCKS
301 #endif /* RX_LOCKS_DB */
302 MUTEX_INIT(&freeSQEList_lock, "freeSQEList lock", MUTEX_DEFAULT, 0);
303 MUTEX_INIT(&rx_freeCallQueue_lock, "rx_freeCallQueue_lock", MUTEX_DEFAULT,
305 CV_INIT(&rx_waitingForPackets_cv, "rx_waitingForPackets_cv", CV_DEFAULT,
307 MUTEX_INIT(&rx_peerHashTable_lock, "rx_peerHashTable_lock", MUTEX_DEFAULT,
309 MUTEX_INIT(&rx_connHashTable_lock, "rx_connHashTable_lock", MUTEX_DEFAULT,
311 MUTEX_INIT(&rx_serverPool_lock, "rx_serverPool_lock", MUTEX_DEFAULT, 0);
312 MUTEX_INIT(&rxi_keyCreate_lock, "rxi_keyCreate_lock", MUTEX_DEFAULT, 0);
313 #endif /* RX_ENABLE_LOCKS */
316 pthread_once_t rx_once_init = PTHREAD_ONCE_INIT;
317 #define INIT_PTHREAD_LOCKS osi_Assert(pthread_once(&rx_once_init, rxi_InitPthread)==0)
319 * The rx_stats_mutex mutex protects the following global variables:
320 * rxi_lowConnRefCount
321 * rxi_lowPeerRefCount
330 * The rx_quota_mutex mutex protects the following global variables:
338 * The rx_freePktQ_lock protects the following global variables:
343 * The rx_packets_mutex mutex protects the following global variables:
351 * The rx_pthread_mutex mutex protects the following global variables:
352 * rxi_fcfs_thread_num
355 #define INIT_PTHREAD_LOCKS
359 /* Variables for handling the minProcs implementation. availProcs gives the
360 * number of threads available in the pool at this moment (not counting dudes
361 * executing right now). totalMin gives the total number of procs required
362 * for handling all minProcs requests. minDeficit is a dynamic variable
363 * tracking the # of procs required to satisfy all of the remaining minProcs
365 * For fine grain locking to work, the quota check and the reservation of
366 * a server thread has to come while rxi_availProcs and rxi_minDeficit
367 * are locked. To this end, the code has been modified under #ifdef
368 * RX_ENABLE_LOCKS so that quota checks and reservation occur at the
369 * same time. A new function, ReturnToServerPool() returns the allocation.
371 * A call can be on several queue's (but only one at a time). When
372 * rxi_ResetCall wants to remove the call from a queue, it has to ensure
373 * that no one else is touching the queue. To this end, we store the address
374 * of the queue lock in the call structure (under the call lock) when we
375 * put the call on a queue, and we clear the call_queue_lock when the
376 * call is removed from a queue (once the call lock has been obtained).
377 * This allows rxi_ResetCall to safely synchronize with others wishing
378 * to manipulate the queue.
381 #if defined(RX_ENABLE_LOCKS)
382 static afs_kmutex_t rx_rpc_stats;
385 /* We keep a "last conn pointer" in rxi_FindConnection. The odds are
386 ** pretty good that the next packet coming in is from the same connection
387 ** as the last packet, since we're send multiple packets in a transmit window.
389 struct rx_connection *rxLastConn = 0;
391 #ifdef RX_ENABLE_LOCKS
392 /* The locking hierarchy for rx fine grain locking is composed of these
395 * rx_connHashTable_lock - synchronizes conn creation, rx_connHashTable access
396 * conn_call_lock - used to synchonize rx_EndCall and rx_NewCall
397 * call->lock - locks call data fields.
398 * These are independent of each other:
399 * rx_freeCallQueue_lock
404 * serverQueueEntry->lock
405 * rx_peerHashTable_lock - locked under rx_connHashTable_lock
407 * peer->lock - locks peer data fields.
408 * conn_data_lock - that more than one thread is not updating a conn data
409 * field at the same time.
420 * Do we need a lock to protect the peer field in the conn structure?
421 * conn->peer was previously a constant for all intents and so has no
422 * lock protecting this field. The multihomed client delta introduced
423 * a RX code change : change the peer field in the connection structure
424 * to that remote interface from which the last packet for this
425 * connection was sent out. This may become an issue if further changes
428 #define SET_CALL_QUEUE_LOCK(C, L) (C)->call_queue_lock = (L)
429 #define CLEAR_CALL_QUEUE_LOCK(C) (C)->call_queue_lock = NULL
431 /* rxdb_fileID is used to identify the lock location, along with line#. */
432 static int rxdb_fileID = RXDB_FILE_RX;
433 #endif /* RX_LOCKS_DB */
434 #else /* RX_ENABLE_LOCKS */
435 #define SET_CALL_QUEUE_LOCK(C, L)
436 #define CLEAR_CALL_QUEUE_LOCK(C)
437 #endif /* RX_ENABLE_LOCKS */
438 struct rx_serverQueueEntry *rx_waitForPacket = 0;
439 struct rx_serverQueueEntry *rx_waitingForPacket = 0;
441 /* ------------Exported Interfaces------------- */
443 /* This function allows rxkad to set the epoch to a suitably random number
444 * which rx_NewConnection will use in the future. The principle purpose is to
445 * get rxnull connections to use the same epoch as the rxkad connections do, at
446 * least once the first rxkad connection is established. This is important now
447 * that the host/port addresses aren't used in FindConnection: the uniqueness
448 * of epoch/cid matters and the start time won't do. */
450 #ifdef AFS_PTHREAD_ENV
452 * This mutex protects the following global variables:
456 #define LOCK_EPOCH MUTEX_ENTER(&epoch_mutex)
457 #define UNLOCK_EPOCH MUTEX_EXIT(&epoch_mutex)
461 #endif /* AFS_PTHREAD_ENV */
464 rx_SetEpoch(afs_uint32 epoch)
471 /* Initialize rx. A port number may be mentioned, in which case this
472 * becomes the default port number for any service installed later.
473 * If 0 is provided for the port number, a random port will be chosen
474 * by the kernel. Whether this will ever overlap anything in
475 * /etc/services is anybody's guess... Returns 0 on success, -1 on
480 int rxinit_status = 1;
481 #ifdef AFS_PTHREAD_ENV
483 * This mutex protects the following global variables:
487 #define LOCK_RX_INIT MUTEX_ENTER(&rx_init_mutex)
488 #define UNLOCK_RX_INIT MUTEX_EXIT(&rx_init_mutex)
491 #define UNLOCK_RX_INIT
495 rx_InitHost(u_int host, u_int port)
502 char *htable, *ptable;
509 if (rxinit_status == 0) {
510 tmp_status = rxinit_status;
512 return tmp_status; /* Already started; return previous error code. */
518 if (afs_winsockInit() < 0)
524 * Initialize anything necessary to provide a non-premptive threading
527 rxi_InitializeThreadSupport();
530 /* Allocate and initialize a socket for client and perhaps server
533 rx_socket = rxi_GetHostUDPSocket(host, (u_short) port);
534 if (rx_socket == OSI_NULLSOCKET) {
538 #if defined(RX_ENABLE_LOCKS) && defined(KERNEL)
541 #endif /* RX_LOCKS_DB */
542 MUTEX_INIT(&rx_stats_mutex, "rx_stats_mutex", MUTEX_DEFAULT, 0);
543 MUTEX_INIT(&rx_quota_mutex, "rx_quota_mutex", MUTEX_DEFAULT, 0);
544 MUTEX_INIT(&rx_atomic_mutex, "rx_atomic_mutex", MUTEX_DEFAULT, 0);
545 MUTEX_INIT(&rx_pthread_mutex, "rx_pthread_mutex", MUTEX_DEFAULT, 0);
546 MUTEX_INIT(&rx_packets_mutex, "rx_packets_mutex", MUTEX_DEFAULT, 0);
547 MUTEX_INIT(&rx_refcnt_mutex, "rx_refcnt_mutex", MUTEX_DEFAULT, 0);
548 MUTEX_INIT(&rx_rpc_stats, "rx_rpc_stats", MUTEX_DEFAULT, 0);
549 MUTEX_INIT(&rx_freePktQ_lock, "rx_freePktQ_lock", MUTEX_DEFAULT, 0);
550 MUTEX_INIT(&freeSQEList_lock, "freeSQEList lock", MUTEX_DEFAULT, 0);
551 MUTEX_INIT(&rx_freeCallQueue_lock, "rx_freeCallQueue_lock", MUTEX_DEFAULT,
553 CV_INIT(&rx_waitingForPackets_cv, "rx_waitingForPackets_cv", CV_DEFAULT,
555 MUTEX_INIT(&rx_peerHashTable_lock, "rx_peerHashTable_lock", MUTEX_DEFAULT,
557 MUTEX_INIT(&rx_connHashTable_lock, "rx_connHashTable_lock", MUTEX_DEFAULT,
559 MUTEX_INIT(&rx_serverPool_lock, "rx_serverPool_lock", MUTEX_DEFAULT, 0);
560 #if defined(AFS_HPUX110_ENV)
562 rx_sleepLock = alloc_spinlock(LAST_HELD_ORDER - 10, "rx_sleepLock");
563 #endif /* AFS_HPUX110_ENV */
564 #endif /* RX_ENABLE_LOCKS && KERNEL */
567 rx_connDeadTime = 12;
568 rx_tranquil = 0; /* reset flag */
569 rxi_ResetStatistics();
570 htable = osi_Alloc(rx_hashTableSize * sizeof(struct rx_connection *));
571 PIN(htable, rx_hashTableSize * sizeof(struct rx_connection *)); /* XXXXX */
572 memset(htable, 0, rx_hashTableSize * sizeof(struct rx_connection *));
573 ptable = osi_Alloc(rx_hashTableSize * sizeof(struct rx_peer *));
574 PIN(ptable, rx_hashTableSize * sizeof(struct rx_peer *)); /* XXXXX */
575 memset(ptable, 0, rx_hashTableSize * sizeof(struct rx_peer *));
577 /* Malloc up a bunch of packets & buffers */
579 opr_queue_Init(&rx_freePacketQueue);
580 rxi_NeedMorePackets = FALSE;
581 rx_nPackets = 0; /* rx_nPackets is managed by rxi_MorePackets* */
583 /* enforce a minimum number of allocated packets */
584 if (rx_extraPackets < rxi_nSendFrags * rx_maxSendWindow)
585 rx_extraPackets = rxi_nSendFrags * rx_maxSendWindow;
587 /* allocate the initial free packet pool */
588 #ifdef RX_ENABLE_TSFPQ
589 rxi_MorePacketsTSFPQ(rx_extraPackets + RX_MAX_QUOTA + 2, RX_TS_FPQ_FLUSH_GLOBAL, 0);
590 #else /* RX_ENABLE_TSFPQ */
591 rxi_MorePackets(rx_extraPackets + RX_MAX_QUOTA + 2); /* fudge */
592 #endif /* RX_ENABLE_TSFPQ */
599 #if defined(AFS_NT40_ENV) && !defined(AFS_PTHREAD_ENV)
600 tv.tv_sec = clock_now.sec;
601 tv.tv_usec = clock_now.usec;
602 srand((unsigned int)tv.tv_usec);
609 #if defined(KERNEL) && !defined(UKERNEL)
610 /* Really, this should never happen in a real kernel */
613 struct sockaddr_in addr;
615 int addrlen = sizeof(addr);
617 socklen_t addrlen = sizeof(addr);
619 if (getsockname((intptr_t)rx_socket, (struct sockaddr *)&addr, &addrlen)) {
621 osi_Free(htable, rx_hashTableSize * sizeof(struct rx_connection *));
624 rx_port = addr.sin_port;
627 rx_stats.minRtt.sec = 9999999;
629 rx_SetEpoch(tv.tv_sec | 0x80000000);
631 rx_SetEpoch(tv.tv_sec); /* Start time of this package, rxkad
632 * will provide a randomer value. */
634 MUTEX_ENTER(&rx_quota_mutex);
635 rxi_dataQuota += rx_extraQuota; /* + extra pkts caller asked to rsrv */
636 MUTEX_EXIT(&rx_quota_mutex);
637 /* *Slightly* random start time for the cid. This is just to help
638 * out with the hashing function at the peer */
639 rx_nextCid = ((tv.tv_sec ^ tv.tv_usec) << RX_CIDSHIFT);
640 rx_connHashTable = (struct rx_connection **)htable;
641 rx_peerHashTable = (struct rx_peer **)ptable;
643 rx_hardAckDelay.sec = 0;
644 rx_hardAckDelay.usec = 100000; /* 100 milliseconds */
646 rxevent_Init(20, rxi_ReScheduleEvents);
648 /* Initialize various global queues */
649 opr_queue_Init(&rx_idleServerQueue);
650 opr_queue_Init(&rx_incomingCallQueue);
651 opr_queue_Init(&rx_freeCallQueue);
653 #if defined(AFS_NT40_ENV) && !defined(KERNEL)
654 /* Initialize our list of usable IP addresses. */
658 #if defined(RXK_LISTENER_ENV) || !defined(KERNEL)
659 /* Start listener process (exact function is dependent on the
660 * implementation environment--kernel or user space) */
665 tmp_status = rxinit_status = 0;
673 return rx_InitHost(htonl(INADDR_ANY), port);
679 * The rxi_rto functions implement a TCP (RFC2988) style algorithm for
680 * maintaing the round trip timer.
685 * Start a new RTT timer for a given call and packet.
687 * There must be no resendEvent already listed for this call, otherwise this
688 * will leak events - intended for internal use within the RTO code only
691 * the RX call to start the timer for
692 * @param[in] lastPacket
693 * a flag indicating whether the last packet has been sent or not
695 * @pre call must be locked before calling this function
699 rxi_rto_startTimer(struct rx_call *call, int lastPacket, int istack)
701 struct clock now, retryTime;
706 clock_Add(&retryTime, &call->rto);
708 /* If we're sending the last packet, and we're the client, then the server
709 * may wait for an additional 400ms before returning the ACK, wait for it
710 * rather than hitting a timeout */
711 if (lastPacket && call->conn->type == RX_CLIENT_CONNECTION)
712 clock_Addmsec(&retryTime, 400);
714 CALL_HOLD(call, RX_CALL_REFCOUNT_RESEND);
715 call->resendEvent = rxevent_Post(&retryTime, &now, rxi_Resend,
720 * Cancel an RTT timer for a given call.
724 * the RX call to cancel the timer for
726 * @pre call must be locked before calling this function
731 rxi_rto_cancel(struct rx_call *call)
733 rxevent_Cancel(&call->resendEvent);
734 CALL_RELE(call, RX_CALL_REFCOUNT_RESEND);
738 * Tell the RTO timer that we have sent a packet.
740 * If the timer isn't already running, then start it. If the timer is running,
744 * the RX call that the packet has been sent on
745 * @param[in] lastPacket
746 * A flag which is true if this is the last packet for the call
748 * @pre The call must be locked before calling this function
753 rxi_rto_packet_sent(struct rx_call *call, int lastPacket, int istack)
755 if (call->resendEvent)
758 rxi_rto_startTimer(call, lastPacket, istack);
762 * Tell the RTO timer that we have received an new ACK message
764 * This function should be called whenever a call receives an ACK that
765 * acknowledges new packets. Whatever happens, we stop the current timer.
766 * If there are unacked packets in the queue which have been sent, then
767 * we restart the timer from now. Otherwise, we leave it stopped.
770 * the RX call that the ACK has been received on
774 rxi_rto_packet_acked(struct rx_call *call, int istack)
776 struct opr_queue *cursor;
778 rxi_rto_cancel(call);
780 if (opr_queue_IsEmpty(&call->tq))
783 for (opr_queue_Scan(&call->tq, cursor)) {
784 struct rx_packet *p = opr_queue_Entry(cursor, struct rx_packet, entry);
785 if (p->header.seq > call->tfirst + call->twind)
788 if (!(p->flags & RX_PKTFLAG_ACKED) && p->flags & RX_PKTFLAG_SENT) {
789 rxi_rto_startTimer(call, p->header.flags & RX_LAST_PACKET, istack);
797 * Set an initial round trip timeout for a peer connection
799 * @param[in] secs The timeout to set in seconds
803 rx_rto_setPeerTimeoutSecs(struct rx_peer *peer, int secs) {
804 peer->rtt = secs * 8000;
808 * Enables or disables the busy call channel error (RX_CALL_BUSY).
810 * @param[in] onoff Non-zero to enable busy call channel errors.
812 * @pre Neither rx_Init nor rx_InitHost have been called yet
815 rx_SetBusyChannelError(afs_int32 onoff)
817 osi_Assert(rxinit_status != 0);
818 rxi_busyChannelError = onoff ? 1 : 0;
822 * Set a delayed ack event on the specified call for the given time
824 * @param[in] call - the call on which to set the event
825 * @param[in] offset - the delay from now after which the event fires
828 rxi_PostDelayedAckEvent(struct rx_call *call, struct clock *offset)
830 struct clock now, when;
834 clock_Add(&when, offset);
836 if (call->delayedAckEvent && clock_Gt(&call->delayedAckTime, &when)) {
837 /* The event we're cancelling already has a reference, so we don't
839 rxevent_Cancel(&call->delayedAckEvent);
840 call->delayedAckEvent = rxevent_Post(&when, &now, rxi_SendDelayedAck,
843 call->delayedAckTime = when;
844 } else if (!call->delayedAckEvent) {
845 CALL_HOLD(call, RX_CALL_REFCOUNT_DELAY);
846 call->delayedAckEvent = rxevent_Post(&when, &now,
849 call->delayedAckTime = when;
854 rxi_CancelDelayedAckEvent(struct rx_call *call)
856 if (call->delayedAckEvent) {
857 rxevent_Cancel(&call->delayedAckEvent);
858 CALL_RELE(call, RX_CALL_REFCOUNT_DELAY);
862 /* called with unincremented nRequestsRunning to see if it is OK to start
863 * a new thread in this service. Could be "no" for two reasons: over the
864 * max quota, or would prevent others from reaching their min quota.
866 #ifdef RX_ENABLE_LOCKS
867 /* This verion of QuotaOK reserves quota if it's ok while the
868 * rx_serverPool_lock is held. Return quota using ReturnToServerPool().
871 QuotaOK(struct rx_service *aservice)
873 /* check if over max quota */
874 if (aservice->nRequestsRunning >= aservice->maxProcs) {
878 /* under min quota, we're OK */
879 /* otherwise, can use only if there are enough to allow everyone
880 * to go to their min quota after this guy starts.
883 MUTEX_ENTER(&rx_quota_mutex);
884 if ((aservice->nRequestsRunning < aservice->minProcs)
885 || (rxi_availProcs > rxi_minDeficit)) {
886 aservice->nRequestsRunning++;
887 /* just started call in minProcs pool, need fewer to maintain
889 if (aservice->nRequestsRunning <= aservice->minProcs)
892 MUTEX_EXIT(&rx_quota_mutex);
895 MUTEX_EXIT(&rx_quota_mutex);
901 ReturnToServerPool(struct rx_service *aservice)
903 aservice->nRequestsRunning--;
904 MUTEX_ENTER(&rx_quota_mutex);
905 if (aservice->nRequestsRunning < aservice->minProcs)
908 MUTEX_EXIT(&rx_quota_mutex);
911 #else /* RX_ENABLE_LOCKS */
913 QuotaOK(struct rx_service *aservice)
916 /* under min quota, we're OK */
917 if (aservice->nRequestsRunning < aservice->minProcs)
920 /* check if over max quota */
921 if (aservice->nRequestsRunning >= aservice->maxProcs)
924 /* otherwise, can use only if there are enough to allow everyone
925 * to go to their min quota after this guy starts.
927 MUTEX_ENTER(&rx_quota_mutex);
928 if (rxi_availProcs > rxi_minDeficit)
930 MUTEX_EXIT(&rx_quota_mutex);
933 #endif /* RX_ENABLE_LOCKS */
936 /* Called by rx_StartServer to start up lwp's to service calls.
937 NExistingProcs gives the number of procs already existing, and which
938 therefore needn't be created. */
940 rxi_StartServerProcs(int nExistingProcs)
942 struct rx_service *service;
947 /* For each service, reserve N processes, where N is the "minimum"
948 * number of processes that MUST be able to execute a request in parallel,
949 * at any time, for that process. Also compute the maximum difference
950 * between any service's maximum number of processes that can run
951 * (i.e. the maximum number that ever will be run, and a guarantee
952 * that this number will run if other services aren't running), and its
953 * minimum number. The result is the extra number of processes that
954 * we need in order to provide the latter guarantee */
955 for (i = 0; i < RX_MAX_SERVICES; i++) {
957 service = rx_services[i];
958 if (service == (struct rx_service *)0)
960 nProcs += service->minProcs;
961 diff = service->maxProcs - service->minProcs;
965 nProcs += maxdiff; /* Extra processes needed to allow max number requested to run in any given service, under good conditions */
966 nProcs -= nExistingProcs; /* Subtract the number of procs that were previously created for use as server procs */
967 for (i = 0; i < nProcs; i++) {
968 rxi_StartServerProc(rx_ServerProc, rx_stackSize);
974 /* This routine is only required on Windows */
976 rx_StartClientThread(void)
978 #ifdef AFS_PTHREAD_ENV
980 pid = pthread_self();
981 #endif /* AFS_PTHREAD_ENV */
983 #endif /* AFS_NT40_ENV */
985 /* This routine must be called if any services are exported. If the
986 * donateMe flag is set, the calling process is donated to the server
989 rx_StartServer(int donateMe)
991 struct rx_service *service;
997 /* Start server processes, if necessary (exact function is dependent
998 * on the implementation environment--kernel or user space). DonateMe
999 * will be 1 if there is 1 pre-existing proc, i.e. this one. In this
1000 * case, one less new proc will be created rx_StartServerProcs.
1002 rxi_StartServerProcs(donateMe);
1004 /* count up the # of threads in minProcs, and add set the min deficit to
1005 * be that value, too.
1007 for (i = 0; i < RX_MAX_SERVICES; i++) {
1008 service = rx_services[i];
1009 if (service == (struct rx_service *)0)
1011 MUTEX_ENTER(&rx_quota_mutex);
1012 rxi_totalMin += service->minProcs;
1013 /* below works even if a thread is running, since minDeficit would
1014 * still have been decremented and later re-incremented.
1016 rxi_minDeficit += service->minProcs;
1017 MUTEX_EXIT(&rx_quota_mutex);
1020 /* Turn on reaping of idle server connections */
1021 rxi_ReapConnections(NULL, NULL, NULL, 0);
1026 #ifndef AFS_NT40_ENV
1030 #ifdef AFS_PTHREAD_ENV
1032 pid = afs_pointer_to_int(pthread_self());
1033 #else /* AFS_PTHREAD_ENV */
1035 LWP_CurrentProcess(&pid);
1036 #endif /* AFS_PTHREAD_ENV */
1038 sprintf(name, "srv_%d", ++nProcs);
1039 if (registerProgram)
1040 (*registerProgram) (pid, name);
1042 #endif /* AFS_NT40_ENV */
1043 rx_ServerProc(NULL); /* Never returns */
1045 #ifdef RX_ENABLE_TSFPQ
1046 /* no use leaving packets around in this thread's local queue if
1047 * it isn't getting donated to the server thread pool.
1049 rxi_FlushLocalPacketsTSFPQ();
1050 #endif /* RX_ENABLE_TSFPQ */
1054 /* Create a new client connection to the specified service, using the
1055 * specified security object to implement the security model for this
1057 struct rx_connection *
1058 rx_NewConnection(afs_uint32 shost, u_short sport, u_short sservice,
1059 struct rx_securityClass *securityObject,
1060 int serviceSecurityIndex)
1064 struct rx_connection *conn;
1069 dpf(("rx_NewConnection(host %x, port %u, service %u, securityObject %p, "
1070 "serviceSecurityIndex %d)\n",
1071 ntohl(shost), ntohs(sport), sservice, securityObject,
1072 serviceSecurityIndex));
1074 /* Vasilsi said: "NETPRI protects Cid and Alloc", but can this be true in
1075 * the case of kmem_alloc? */
1076 conn = rxi_AllocConnection();
1077 #ifdef RX_ENABLE_LOCKS
1078 MUTEX_INIT(&conn->conn_call_lock, "conn call lock", MUTEX_DEFAULT, 0);
1079 MUTEX_INIT(&conn->conn_data_lock, "conn data lock", MUTEX_DEFAULT, 0);
1080 CV_INIT(&conn->conn_call_cv, "conn call cv", CV_DEFAULT, 0);
1083 MUTEX_ENTER(&rx_connHashTable_lock);
1084 cid = (rx_nextCid += RX_MAXCALLS);
1085 conn->type = RX_CLIENT_CONNECTION;
1087 conn->epoch = rx_epoch;
1088 conn->peer = rxi_FindPeer(shost, sport, 1);
1089 conn->serviceId = sservice;
1090 conn->securityObject = securityObject;
1091 conn->securityData = (void *) 0;
1092 conn->securityIndex = serviceSecurityIndex;
1093 rx_SetConnDeadTime(conn, rx_connDeadTime);
1094 rx_SetConnSecondsUntilNatPing(conn, 0);
1095 conn->ackRate = RX_FAST_ACK_RATE;
1096 conn->nSpecific = 0;
1097 conn->specific = NULL;
1098 conn->challengeEvent = NULL;
1099 conn->delayedAbortEvent = NULL;
1100 conn->abortCount = 0;
1102 for (i = 0; i < RX_MAXCALLS; i++) {
1103 conn->twind[i] = rx_initSendWindow;
1104 conn->rwind[i] = rx_initReceiveWindow;
1105 conn->lastBusy[i] = 0;
1108 RXS_NewConnection(securityObject, conn);
1110 CONN_HASH(shost, sport, conn->cid, conn->epoch, RX_CLIENT_CONNECTION);
1112 conn->refCount++; /* no lock required since only this thread knows... */
1113 conn->next = rx_connHashTable[hashindex];
1114 rx_connHashTable[hashindex] = conn;
1115 if (rx_stats_active)
1116 rx_atomic_inc(&rx_stats.nClientConns);
1117 MUTEX_EXIT(&rx_connHashTable_lock);
1123 * Ensure a connection's timeout values are valid.
1125 * @param[in] conn The connection to check
1127 * @post conn->secondUntilDead <= conn->idleDeadTime <= conn->hardDeadTime,
1128 * unless idleDeadTime and/or hardDeadTime are not set
1132 rxi_CheckConnTimeouts(struct rx_connection *conn)
1134 /* a connection's timeouts must have the relationship
1135 * deadTime <= idleDeadTime <= hardDeadTime. Otherwise, for example, a
1136 * total loss of network to a peer may cause an idle timeout instead of a
1137 * dead timeout, simply because the idle timeout gets hit first. Also set
1138 * a minimum deadTime of 6, just to ensure it doesn't get set too low. */
1139 /* this logic is slightly complicated by the fact that
1140 * idleDeadTime/hardDeadTime may not be set at all, but it's not too bad.
1142 conn->secondsUntilDead = MAX(conn->secondsUntilDead, 6);
1143 if (conn->idleDeadTime) {
1144 conn->idleDeadTime = MAX(conn->idleDeadTime, conn->secondsUntilDead);
1146 if (conn->hardDeadTime) {
1147 if (conn->idleDeadTime) {
1148 conn->hardDeadTime = MAX(conn->idleDeadTime, conn->hardDeadTime);
1150 conn->hardDeadTime = MAX(conn->secondsUntilDead, conn->hardDeadTime);
1156 rx_SetConnDeadTime(struct rx_connection *conn, int seconds)
1158 /* The idea is to set the dead time to a value that allows several
1159 * keepalives to be dropped without timing out the connection. */
1160 conn->secondsUntilDead = seconds;
1161 rxi_CheckConnTimeouts(conn);
1162 conn->secondsUntilPing = conn->secondsUntilDead / 6;
1166 rx_SetConnHardDeadTime(struct rx_connection *conn, int seconds)
1168 conn->hardDeadTime = seconds;
1169 rxi_CheckConnTimeouts(conn);
1173 rx_SetConnIdleDeadTime(struct rx_connection *conn, int seconds)
1175 conn->idleDeadTime = seconds;
1176 conn->idleDeadDetection = (seconds ? 1 : 0);
1177 rxi_CheckConnTimeouts(conn);
1180 int rxi_lowPeerRefCount = 0;
1181 int rxi_lowConnRefCount = 0;
1184 * Cleanup a connection that was destroyed in rxi_DestroyConnectioNoLock.
1185 * NOTE: must not be called with rx_connHashTable_lock held.
1188 rxi_CleanupConnection(struct rx_connection *conn)
1190 /* Notify the service exporter, if requested, that this connection
1191 * is being destroyed */
1192 if (conn->type == RX_SERVER_CONNECTION && conn->service->destroyConnProc)
1193 (*conn->service->destroyConnProc) (conn);
1195 /* Notify the security module that this connection is being destroyed */
1196 RXS_DestroyConnection(conn->securityObject, conn);
1198 /* If this is the last connection using the rx_peer struct, set its
1199 * idle time to now. rxi_ReapConnections will reap it if it's still
1200 * idle (refCount == 0) after rx_idlePeerTime (60 seconds) have passed.
1202 MUTEX_ENTER(&rx_peerHashTable_lock);
1203 if (conn->peer->refCount < 2) {
1204 conn->peer->idleWhen = clock_Sec();
1205 if (conn->peer->refCount < 1) {
1206 conn->peer->refCount = 1;
1207 if (rx_stats_active) {
1208 MUTEX_ENTER(&rx_stats_mutex);
1209 rxi_lowPeerRefCount++;
1210 MUTEX_EXIT(&rx_stats_mutex);
1214 conn->peer->refCount--;
1215 MUTEX_EXIT(&rx_peerHashTable_lock);
1217 if (rx_stats_active)
1219 if (conn->type == RX_SERVER_CONNECTION)
1220 rx_atomic_dec(&rx_stats.nServerConns);
1222 rx_atomic_dec(&rx_stats.nClientConns);
1225 if (conn->specific) {
1227 for (i = 0; i < conn->nSpecific; i++) {
1228 if (conn->specific[i] && rxi_keyCreate_destructor[i])
1229 (*rxi_keyCreate_destructor[i]) (conn->specific[i]);
1230 conn->specific[i] = NULL;
1232 free(conn->specific);
1234 conn->specific = NULL;
1235 conn->nSpecific = 0;
1236 #endif /* !KERNEL */
1238 MUTEX_DESTROY(&conn->conn_call_lock);
1239 MUTEX_DESTROY(&conn->conn_data_lock);
1240 CV_DESTROY(&conn->conn_call_cv);
1242 rxi_FreeConnection(conn);
1245 /* Destroy the specified connection */
1247 rxi_DestroyConnection(struct rx_connection *conn)
1249 MUTEX_ENTER(&rx_connHashTable_lock);
1250 rxi_DestroyConnectionNoLock(conn);
1251 /* conn should be at the head of the cleanup list */
1252 if (conn == rx_connCleanup_list) {
1253 rx_connCleanup_list = rx_connCleanup_list->next;
1254 MUTEX_EXIT(&rx_connHashTable_lock);
1255 rxi_CleanupConnection(conn);
1257 #ifdef RX_ENABLE_LOCKS
1259 MUTEX_EXIT(&rx_connHashTable_lock);
1261 #endif /* RX_ENABLE_LOCKS */
1265 rxi_DestroyConnectionNoLock(struct rx_connection *conn)
1267 struct rx_connection **conn_ptr;
1269 struct rx_packet *packet;
1276 MUTEX_ENTER(&conn->conn_data_lock);
1277 MUTEX_ENTER(&rx_refcnt_mutex);
1278 if (conn->refCount > 0)
1281 if (rx_stats_active) {
1282 MUTEX_ENTER(&rx_stats_mutex);
1283 rxi_lowConnRefCount++;
1284 MUTEX_EXIT(&rx_stats_mutex);
1288 if ((conn->refCount > 0) || (conn->flags & RX_CONN_BUSY)) {
1289 /* Busy; wait till the last guy before proceeding */
1290 MUTEX_EXIT(&rx_refcnt_mutex);
1291 MUTEX_EXIT(&conn->conn_data_lock);
1296 /* If the client previously called rx_NewCall, but it is still
1297 * waiting, treat this as a running call, and wait to destroy the
1298 * connection later when the call completes. */
1299 if ((conn->type == RX_CLIENT_CONNECTION)
1300 && (conn->flags & (RX_CONN_MAKECALL_WAITING|RX_CONN_MAKECALL_ACTIVE))) {
1301 conn->flags |= RX_CONN_DESTROY_ME;
1302 MUTEX_EXIT(&conn->conn_data_lock);
1306 MUTEX_EXIT(&rx_refcnt_mutex);
1307 MUTEX_EXIT(&conn->conn_data_lock);
1309 /* Check for extant references to this connection */
1310 MUTEX_ENTER(&conn->conn_call_lock);
1311 for (i = 0; i < RX_MAXCALLS; i++) {
1312 struct rx_call *call = conn->call[i];
1315 if (conn->type == RX_CLIENT_CONNECTION) {
1316 MUTEX_ENTER(&call->lock);
1317 if (call->delayedAckEvent) {
1318 /* Push the final acknowledgment out now--there
1319 * won't be a subsequent call to acknowledge the
1320 * last reply packets */
1321 rxi_CancelDelayedAckEvent(call);
1322 if (call->state == RX_STATE_PRECALL
1323 || call->state == RX_STATE_ACTIVE) {
1324 rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
1329 MUTEX_EXIT(&call->lock);
1333 MUTEX_EXIT(&conn->conn_call_lock);
1335 #ifdef RX_ENABLE_LOCKS
1337 if (MUTEX_TRYENTER(&conn->conn_data_lock)) {
1338 MUTEX_EXIT(&conn->conn_data_lock);
1340 /* Someone is accessing a packet right now. */
1344 #endif /* RX_ENABLE_LOCKS */
1347 /* Don't destroy the connection if there are any call
1348 * structures still in use */
1349 MUTEX_ENTER(&conn->conn_data_lock);
1350 conn->flags |= RX_CONN_DESTROY_ME;
1351 MUTEX_EXIT(&conn->conn_data_lock);
1356 if (conn->natKeepAliveEvent) {
1357 rxi_NatKeepAliveOff(conn);
1360 if (conn->delayedAbortEvent) {
1361 rxevent_Cancel(&conn->delayedAbortEvent);
1362 packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
1364 MUTEX_ENTER(&conn->conn_data_lock);
1365 rxi_SendConnectionAbort(conn, packet, 0, 1);
1366 MUTEX_EXIT(&conn->conn_data_lock);
1367 rxi_FreePacket(packet);
1371 /* Remove from connection hash table before proceeding */
1373 &rx_connHashTable[CONN_HASH
1374 (peer->host, peer->port, conn->cid, conn->epoch,
1376 for (; *conn_ptr; conn_ptr = &(*conn_ptr)->next) {
1377 if (*conn_ptr == conn) {
1378 *conn_ptr = conn->next;
1382 /* if the conn that we are destroying was the last connection, then we
1383 * clear rxLastConn as well */
1384 if (rxLastConn == conn)
1387 /* Make sure the connection is completely reset before deleting it. */
1388 /* get rid of pending events that could zap us later */
1389 rxevent_Cancel(&conn->challengeEvent);
1390 rxevent_Cancel(&conn->checkReachEvent);
1391 rxevent_Cancel(&conn->natKeepAliveEvent);
1393 /* Add the connection to the list of destroyed connections that
1394 * need to be cleaned up. This is necessary to avoid deadlocks
1395 * in the routines we call to inform others that this connection is
1396 * being destroyed. */
1397 conn->next = rx_connCleanup_list;
1398 rx_connCleanup_list = conn;
1401 /* Externally available version */
1403 rx_DestroyConnection(struct rx_connection *conn)
1408 rxi_DestroyConnection(conn);
1413 rx_GetConnection(struct rx_connection *conn)
1418 MUTEX_ENTER(&rx_refcnt_mutex);
1420 MUTEX_EXIT(&rx_refcnt_mutex);
1424 #ifdef RX_ENABLE_LOCKS
1425 /* Wait for the transmit queue to no longer be busy.
1426 * requires the call->lock to be held */
1428 rxi_WaitforTQBusy(struct rx_call *call) {
1429 while (!call->error && (call->flags & RX_CALL_TQ_BUSY)) {
1430 call->flags |= RX_CALL_TQ_WAIT;
1432 MUTEX_ASSERT(&call->lock);
1433 CV_WAIT(&call->cv_tq, &call->lock);
1435 if (call->tqWaiters == 0) {
1436 call->flags &= ~RX_CALL_TQ_WAIT;
1443 rxi_WakeUpTransmitQueue(struct rx_call *call)
1445 if (call->tqWaiters || (call->flags & RX_CALL_TQ_WAIT)) {
1446 dpf(("call %"AFS_PTR_FMT" has %d waiters and flags %d\n",
1447 call, call->tqWaiters, call->flags));
1448 #ifdef RX_ENABLE_LOCKS
1449 MUTEX_ASSERT(&call->lock);
1450 CV_BROADCAST(&call->cv_tq);
1451 #else /* RX_ENABLE_LOCKS */
1452 osi_rxWakeup(&call->tq);
1453 #endif /* RX_ENABLE_LOCKS */
1457 /* Start a new rx remote procedure call, on the specified connection.
1458 * If wait is set to 1, wait for a free call channel; otherwise return
1459 * 0. Maxtime gives the maximum number of seconds this call may take,
1460 * after rx_NewCall returns. After this time interval, a call to any
1461 * of rx_SendData, rx_ReadData, etc. will fail with RX_CALL_TIMEOUT.
1462 * For fine grain locking, we hold the conn_call_lock in order to
1463 * to ensure that we don't get signalle after we found a call in an active
1464 * state and before we go to sleep.
1467 rx_NewCall(struct rx_connection *conn)
1469 int i, wait, ignoreBusy = 1;
1470 struct rx_call *call;
1471 struct clock queueTime;
1472 afs_uint32 leastBusy = 0;
1476 dpf(("rx_NewCall(conn %"AFS_PTR_FMT")\n", conn));
1479 clock_GetTime(&queueTime);
1481 * Check if there are others waiting for a new call.
1482 * If so, let them go first to avoid starving them.
1483 * This is a fairly simple scheme, and might not be
1484 * a complete solution for large numbers of waiters.
1486 * makeCallWaiters keeps track of the number of
1487 * threads waiting to make calls and the
1488 * RX_CONN_MAKECALL_WAITING flag bit is used to
1489 * indicate that there are indeed calls waiting.
1490 * The flag is set when the waiter is incremented.
1491 * It is only cleared when makeCallWaiters is 0.
1492 * This prevents us from accidently destroying the
1493 * connection while it is potentially about to be used.
1495 MUTEX_ENTER(&conn->conn_call_lock);
1496 MUTEX_ENTER(&conn->conn_data_lock);
1497 while (conn->flags & RX_CONN_MAKECALL_ACTIVE) {
1498 conn->flags |= RX_CONN_MAKECALL_WAITING;
1499 conn->makeCallWaiters++;
1500 MUTEX_EXIT(&conn->conn_data_lock);
1502 #ifdef RX_ENABLE_LOCKS
1503 CV_WAIT(&conn->conn_call_cv, &conn->conn_call_lock);
1507 MUTEX_ENTER(&conn->conn_data_lock);
1508 conn->makeCallWaiters--;
1509 if (conn->makeCallWaiters == 0)
1510 conn->flags &= ~RX_CONN_MAKECALL_WAITING;
1513 /* We are now the active thread in rx_NewCall */
1514 conn->flags |= RX_CONN_MAKECALL_ACTIVE;
1515 MUTEX_EXIT(&conn->conn_data_lock);
1520 for (i = 0; i < RX_MAXCALLS; i++) {
1521 call = conn->call[i];
1523 if (!ignoreBusy && conn->lastBusy[i] != leastBusy) {
1524 /* we're not ignoring busy call slots; only look at the
1525 * call slot that is the "least" busy */
1529 if (call->state == RX_STATE_DALLY) {
1530 MUTEX_ENTER(&call->lock);
1531 if (call->state == RX_STATE_DALLY) {
1532 if (ignoreBusy && conn->lastBusy[i]) {
1533 /* if we're ignoring busy call slots, skip any ones that
1534 * have lastBusy set */
1535 if (leastBusy == 0 || conn->lastBusy[i] < leastBusy) {
1536 leastBusy = conn->lastBusy[i];
1538 MUTEX_EXIT(&call->lock);
1543 * We are setting the state to RX_STATE_RESET to
1544 * ensure that no one else will attempt to use this
1545 * call once we drop the conn->conn_call_lock and
1546 * call->lock. We must drop the conn->conn_call_lock
1547 * before calling rxi_ResetCall because the process
1548 * of clearing the transmit queue can block for an
1549 * extended period of time. If we block while holding
1550 * the conn->conn_call_lock, then all rx_EndCall
1551 * processing will block as well. This has a detrimental
1552 * effect on overall system performance.
1554 call->state = RX_STATE_RESET;
1555 (*call->callNumber)++;
1556 MUTEX_EXIT(&conn->conn_call_lock);
1557 CALL_HOLD(call, RX_CALL_REFCOUNT_BEGIN);
1558 rxi_ResetCall(call, 0);
1559 if (MUTEX_TRYENTER(&conn->conn_call_lock))
1563 * If we failed to be able to safely obtain the
1564 * conn->conn_call_lock we will have to drop the
1565 * call->lock to avoid a deadlock. When the call->lock
1566 * is released the state of the call can change. If it
1567 * is no longer RX_STATE_RESET then some other thread is
1570 MUTEX_EXIT(&call->lock);
1571 MUTEX_ENTER(&conn->conn_call_lock);
1572 MUTEX_ENTER(&call->lock);
1574 if (call->state == RX_STATE_RESET)
1578 * If we get here it means that after dropping
1579 * the conn->conn_call_lock and call->lock that
1580 * the call is no longer ours. If we can't find
1581 * a free call in the remaining slots we should
1582 * not go immediately to RX_CONN_MAKECALL_WAITING
1583 * because by dropping the conn->conn_call_lock
1584 * we have given up synchronization with rx_EndCall.
1585 * Instead, cycle through one more time to see if
1586 * we can find a call that can call our own.
1588 CALL_RELE(call, RX_CALL_REFCOUNT_BEGIN);
1591 MUTEX_EXIT(&call->lock);
1594 if (ignoreBusy && conn->lastBusy[i]) {
1595 /* if we're ignoring busy call slots, skip any ones that
1596 * have lastBusy set */
1597 if (leastBusy == 0 || conn->lastBusy[i] < leastBusy) {
1598 leastBusy = conn->lastBusy[i];
1603 /* rxi_NewCall returns with mutex locked */
1604 call = rxi_NewCall(conn, i);
1605 CALL_HOLD(call, RX_CALL_REFCOUNT_BEGIN);
1609 if (i < RX_MAXCALLS) {
1610 conn->lastBusy[i] = 0;
1611 call->flags &= ~RX_CALL_PEER_BUSY;
1616 if (leastBusy && ignoreBusy) {
1617 /* we didn't find a useable call slot, but we did see at least one
1618 * 'busy' slot; look again and only use a slot with the 'least
1624 MUTEX_ENTER(&conn->conn_data_lock);
1625 conn->flags |= RX_CONN_MAKECALL_WAITING;
1626 conn->makeCallWaiters++;
1627 MUTEX_EXIT(&conn->conn_data_lock);
1629 #ifdef RX_ENABLE_LOCKS
1630 CV_WAIT(&conn->conn_call_cv, &conn->conn_call_lock);
1634 MUTEX_ENTER(&conn->conn_data_lock);
1635 conn->makeCallWaiters--;
1636 if (conn->makeCallWaiters == 0)
1637 conn->flags &= ~RX_CONN_MAKECALL_WAITING;
1638 MUTEX_EXIT(&conn->conn_data_lock);
1640 /* Client is initially in send mode */
1641 call->state = RX_STATE_ACTIVE;
1642 call->error = conn->error;
1644 call->app.mode = RX_MODE_ERROR;
1646 call->app.mode = RX_MODE_SENDING;
1648 #ifdef AFS_RXERRQ_ENV
1649 /* remember how many network errors the peer has when we started, so if
1650 * more errors are encountered after the call starts, we know the other endpoint won't be
1651 * responding to us */
1652 call->neterr_gen = rx_atomic_read(&conn->peer->neterrs);
1655 /* remember start time for call in case we have hard dead time limit */
1656 call->queueTime = queueTime;
1657 clock_GetTime(&call->startTime);
1658 call->app.bytesSent = 0;
1659 call->app.bytesRcvd = 0;
1661 /* Turn on busy protocol. */
1662 rxi_KeepAliveOn(call);
1664 /* Attempt MTU discovery */
1665 rxi_GrowMTUOn(call);
1668 * We are no longer the active thread in rx_NewCall
1670 MUTEX_ENTER(&conn->conn_data_lock);
1671 conn->flags &= ~RX_CONN_MAKECALL_ACTIVE;
1672 MUTEX_EXIT(&conn->conn_data_lock);
1675 * Wake up anyone else who might be giving us a chance to
1676 * run (see code above that avoids resource starvation).
1678 #ifdef RX_ENABLE_LOCKS
1679 if (call->flags & (RX_CALL_TQ_BUSY | RX_CALL_TQ_CLEARME)) {
1680 osi_Panic("rx_NewCall call about to be used without an empty tq");
1683 CV_BROADCAST(&conn->conn_call_cv);
1687 MUTEX_EXIT(&conn->conn_call_lock);
1688 MUTEX_EXIT(&call->lock);
1691 dpf(("rx_NewCall(call %"AFS_PTR_FMT")\n", call));
1696 rxi_HasActiveCalls(struct rx_connection *aconn)
1699 struct rx_call *tcall;
1703 for (i = 0; i < RX_MAXCALLS; i++) {
1704 if ((tcall = aconn->call[i])) {
1705 if ((tcall->state == RX_STATE_ACTIVE)
1706 || (tcall->state == RX_STATE_PRECALL)) {
1717 rxi_GetCallNumberVector(struct rx_connection *aconn,
1718 afs_int32 * aint32s)
1721 struct rx_call *tcall;
1725 MUTEX_ENTER(&aconn->conn_call_lock);
1726 for (i = 0; i < RX_MAXCALLS; i++) {
1727 if ((tcall = aconn->call[i]) && (tcall->state == RX_STATE_DALLY))
1728 aint32s[i] = aconn->callNumber[i] + 1;
1730 aint32s[i] = aconn->callNumber[i];
1732 MUTEX_EXIT(&aconn->conn_call_lock);
1738 rxi_SetCallNumberVector(struct rx_connection *aconn,
1739 afs_int32 * aint32s)
1742 struct rx_call *tcall;
1746 MUTEX_ENTER(&aconn->conn_call_lock);
1747 for (i = 0; i < RX_MAXCALLS; i++) {
1748 if ((tcall = aconn->call[i]) && (tcall->state == RX_STATE_DALLY))
1749 aconn->callNumber[i] = aint32s[i] - 1;
1751 aconn->callNumber[i] = aint32s[i];
1753 MUTEX_EXIT(&aconn->conn_call_lock);
1758 /* Advertise a new service. A service is named locally by a UDP port
1759 * number plus a 16-bit service id. Returns (struct rx_service *) 0
1762 char *serviceName; Name for identification purposes (e.g. the
1763 service name might be used for probing for
1766 rx_NewServiceHost(afs_uint32 host, u_short port, u_short serviceId,
1767 char *serviceName, struct rx_securityClass **securityObjects,
1768 int nSecurityObjects,
1769 afs_int32(*serviceProc) (struct rx_call * acall))
1771 osi_socket socket = OSI_NULLSOCKET;
1772 struct rx_service *tservice;
1778 if (serviceId == 0) {
1780 "rx_NewService: service id for service %s is not non-zero.\n",
1787 "rx_NewService: A non-zero port must be specified on this call if a non-zero port was not provided at Rx initialization (service %s).\n",
1795 tservice = rxi_AllocService();
1798 MUTEX_INIT(&tservice->svc_data_lock, "svc data lock", MUTEX_DEFAULT, 0);
1800 for (i = 0; i < RX_MAX_SERVICES; i++) {
1801 struct rx_service *service = rx_services[i];
1803 if (port == service->servicePort && host == service->serviceHost) {
1804 if (service->serviceId == serviceId) {
1805 /* The identical service has already been
1806 * installed; if the caller was intending to
1807 * change the security classes used by this
1808 * service, he/she loses. */
1810 "rx_NewService: tried to install service %s with service id %d, which is already in use for service %s\n",
1811 serviceName, serviceId, service->serviceName);
1813 rxi_FreeService(tservice);
1816 /* Different service, same port: re-use the socket
1817 * which is bound to the same port */
1818 socket = service->socket;
1821 if (socket == OSI_NULLSOCKET) {
1822 /* If we don't already have a socket (from another
1823 * service on same port) get a new one */
1824 socket = rxi_GetHostUDPSocket(host, port);
1825 if (socket == OSI_NULLSOCKET) {
1827 rxi_FreeService(tservice);
1832 service->socket = socket;
1833 service->serviceHost = host;
1834 service->servicePort = port;
1835 service->serviceId = serviceId;
1836 service->serviceName = serviceName;
1837 service->nSecurityObjects = nSecurityObjects;
1838 service->securityObjects = securityObjects;
1839 service->minProcs = 0;
1840 service->maxProcs = 1;
1841 service->idleDeadTime = 60;
1842 service->idleDeadErr = 0;
1843 service->connDeadTime = rx_connDeadTime;
1844 service->executeRequestProc = serviceProc;
1845 service->checkReach = 0;
1846 service->nSpecific = 0;
1847 service->specific = NULL;
1848 rx_services[i] = service; /* not visible until now */
1854 rxi_FreeService(tservice);
1855 (osi_Msg "rx_NewService: cannot support > %d services\n",
1860 /* Set configuration options for all of a service's security objects */
1863 rx_SetSecurityConfiguration(struct rx_service *service,
1864 rx_securityConfigVariables type,
1868 for (i = 0; i<service->nSecurityObjects; i++) {
1869 if (service->securityObjects[i]) {
1870 RXS_SetConfiguration(service->securityObjects[i], NULL, type,
1878 rx_NewService(u_short port, u_short serviceId, char *serviceName,
1879 struct rx_securityClass **securityObjects, int nSecurityObjects,
1880 afs_int32(*serviceProc) (struct rx_call * acall))
1882 return rx_NewServiceHost(htonl(INADDR_ANY), port, serviceId, serviceName, securityObjects, nSecurityObjects, serviceProc);
1885 /* Generic request processing loop. This routine should be called
1886 * by the implementation dependent rx_ServerProc. If socketp is
1887 * non-null, it will be set to the file descriptor that this thread
1888 * is now listening on. If socketp is null, this routine will never
1891 rxi_ServerProc(int threadID, struct rx_call *newcall, osi_socket * socketp)
1893 struct rx_call *call;
1895 struct rx_service *tservice = NULL;
1902 call = rx_GetCall(threadID, tservice, socketp);
1903 if (socketp && *socketp != OSI_NULLSOCKET) {
1904 /* We are now a listener thread */
1910 if (afs_termState == AFSOP_STOP_RXCALLBACK) {
1911 #ifdef RX_ENABLE_LOCKS
1913 #endif /* RX_ENABLE_LOCKS */
1914 afs_termState = AFSOP_STOP_AFS;
1915 afs_osi_Wakeup(&afs_termState);
1916 #ifdef RX_ENABLE_LOCKS
1918 #endif /* RX_ENABLE_LOCKS */
1923 /* if server is restarting( typically smooth shutdown) then do not
1924 * allow any new calls.
1927 if (rx_tranquil && (call != NULL)) {
1931 MUTEX_ENTER(&call->lock);
1933 rxi_CallError(call, RX_RESTARTING);
1934 rxi_SendCallAbort(call, (struct rx_packet *)0, 0, 0);
1936 MUTEX_EXIT(&call->lock);
1941 tservice = call->conn->service;
1943 if (tservice->beforeProc)
1944 (*tservice->beforeProc) (call);
1946 code = tservice->executeRequestProc(call);
1948 if (tservice->afterProc)
1949 (*tservice->afterProc) (call, code);
1951 rx_EndCall(call, code);
1953 if (tservice->postProc)
1954 (*tservice->postProc) (code);
1956 if (rx_stats_active) {
1957 MUTEX_ENTER(&rx_stats_mutex);
1959 MUTEX_EXIT(&rx_stats_mutex);
1966 rx_WakeupServerProcs(void)
1968 struct rx_serverQueueEntry *np, *tqp;
1969 struct opr_queue *cursor;
1973 MUTEX_ENTER(&rx_serverPool_lock);
1975 #ifdef RX_ENABLE_LOCKS
1976 if (rx_waitForPacket)
1977 CV_BROADCAST(&rx_waitForPacket->cv);
1978 #else /* RX_ENABLE_LOCKS */
1979 if (rx_waitForPacket)
1980 osi_rxWakeup(rx_waitForPacket);
1981 #endif /* RX_ENABLE_LOCKS */
1982 MUTEX_ENTER(&freeSQEList_lock);
1983 for (np = rx_FreeSQEList; np; np = tqp) {
1984 tqp = *(struct rx_serverQueueEntry **)np;
1985 #ifdef RX_ENABLE_LOCKS
1986 CV_BROADCAST(&np->cv);
1987 #else /* RX_ENABLE_LOCKS */
1989 #endif /* RX_ENABLE_LOCKS */
1991 MUTEX_EXIT(&freeSQEList_lock);
1992 for (opr_queue_Scan(&rx_idleServerQueue, cursor)) {
1993 np = opr_queue_Entry(cursor, struct rx_serverQueueEntry, entry);
1994 #ifdef RX_ENABLE_LOCKS
1995 CV_BROADCAST(&np->cv);
1996 #else /* RX_ENABLE_LOCKS */
1998 #endif /* RX_ENABLE_LOCKS */
2000 MUTEX_EXIT(&rx_serverPool_lock);
2005 * One thing that seems to happen is that all the server threads get
2006 * tied up on some empty or slow call, and then a whole bunch of calls
2007 * arrive at once, using up the packet pool, so now there are more
2008 * empty calls. The most critical resources here are server threads
2009 * and the free packet pool. The "doreclaim" code seems to help in
2010 * general. I think that eventually we arrive in this state: there
2011 * are lots of pending calls which do have all their packets present,
2012 * so they won't be reclaimed, are multi-packet calls, so they won't
2013 * be scheduled until later, and thus are tying up most of the free
2014 * packet pool for a very long time.
2016 * 1. schedule multi-packet calls if all the packets are present.
2017 * Probably CPU-bound operation, useful to return packets to pool.
2018 * Do what if there is a full window, but the last packet isn't here?
2019 * 3. preserve one thread which *only* runs "best" calls, otherwise
2020 * it sleeps and waits for that type of call.
2021 * 4. Don't necessarily reserve a whole window for each thread. In fact,
2022 * the current dataquota business is badly broken. The quota isn't adjusted
2023 * to reflect how many packets are presently queued for a running call.
2024 * So, when we schedule a queued call with a full window of packets queued
2025 * up for it, that *should* free up a window full of packets for other 2d-class
2026 * calls to be able to use from the packet pool. But it doesn't.
2028 * NB. Most of the time, this code doesn't run -- since idle server threads
2029 * sit on the idle server queue and are assigned by "...ReceivePacket" as soon
2030 * as a new call arrives.
2032 /* Sleep until a call arrives. Returns a pointer to the call, ready
2033 * for an rx_Read. */
2034 #ifdef RX_ENABLE_LOCKS
2036 rx_GetCall(int tno, struct rx_service *cur_service, osi_socket * socketp)
2038 struct rx_serverQueueEntry *sq;
2039 struct rx_call *call = (struct rx_call *)0;
2040 struct rx_service *service = NULL;
2042 MUTEX_ENTER(&freeSQEList_lock);
2044 if ((sq = rx_FreeSQEList)) {
2045 rx_FreeSQEList = *(struct rx_serverQueueEntry **)sq;
2046 MUTEX_EXIT(&freeSQEList_lock);
2047 } else { /* otherwise allocate a new one and return that */
2048 MUTEX_EXIT(&freeSQEList_lock);
2049 sq = rxi_Alloc(sizeof(struct rx_serverQueueEntry));
2050 MUTEX_INIT(&sq->lock, "server Queue lock", MUTEX_DEFAULT, 0);
2051 CV_INIT(&sq->cv, "server Queue lock", CV_DEFAULT, 0);
2054 MUTEX_ENTER(&rx_serverPool_lock);
2055 if (cur_service != NULL) {
2056 ReturnToServerPool(cur_service);
2059 if (!opr_queue_IsEmpty(&rx_incomingCallQueue)) {
2060 struct rx_call *tcall, *choice2 = NULL;
2061 struct opr_queue *cursor;
2063 /* Scan for eligible incoming calls. A call is not eligible
2064 * if the maximum number of calls for its service type are
2065 * already executing */
2066 /* One thread will process calls FCFS (to prevent starvation),
2067 * while the other threads may run ahead looking for calls which
2068 * have all their input data available immediately. This helps
2069 * keep threads from blocking, waiting for data from the client. */
2070 for (opr_queue_Scan(&rx_incomingCallQueue, cursor)) {
2071 tcall = opr_queue_Entry(cursor, struct rx_call, entry);
2073 service = tcall->conn->service;
2074 if (!QuotaOK(service)) {
2077 MUTEX_ENTER(&rx_pthread_mutex);
2078 if (tno == rxi_fcfs_thread_num
2079 || opr_queue_IsEnd(&rx_incomingCallQueue, cursor)) {
2080 MUTEX_EXIT(&rx_pthread_mutex);
2081 /* If we're the fcfs thread , then we'll just use
2082 * this call. If we haven't been able to find an optimal
2083 * choice, and we're at the end of the list, then use a
2084 * 2d choice if one has been identified. Otherwise... */
2085 call = (choice2 ? choice2 : tcall);
2086 service = call->conn->service;
2088 MUTEX_EXIT(&rx_pthread_mutex);
2089 if (!opr_queue_IsEmpty(&tcall->rq)) {
2090 struct rx_packet *rp;
2091 rp = opr_queue_First(&tcall->rq, struct rx_packet,
2093 if (rp->header.seq == 1) {
2095 || (rp->header.flags & RX_LAST_PACKET)) {
2097 } else if (rxi_2dchoice && !choice2
2098 && !(tcall->flags & RX_CALL_CLEARED)
2099 && (tcall->rprev > rxi_HardAckRate)) {
2109 ReturnToServerPool(service);
2115 opr_queue_Remove(&call->entry);
2116 MUTEX_EXIT(&rx_serverPool_lock);
2117 MUTEX_ENTER(&call->lock);
2119 if (call->flags & RX_CALL_WAIT_PROC) {
2120 call->flags &= ~RX_CALL_WAIT_PROC;
2121 rx_atomic_dec(&rx_nWaiting);
2124 if (call->state != RX_STATE_PRECALL || call->error) {
2125 MUTEX_EXIT(&call->lock);
2126 MUTEX_ENTER(&rx_serverPool_lock);
2127 ReturnToServerPool(service);
2132 if (opr_queue_IsEmpty(&call->rq)
2133 || opr_queue_First(&call->rq, struct rx_packet, entry)->header.seq != 1)
2134 rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
2136 CLEAR_CALL_QUEUE_LOCK(call);
2139 /* If there are no eligible incoming calls, add this process
2140 * to the idle server queue, to wait for one */
2144 *socketp = OSI_NULLSOCKET;
2146 sq->socketp = socketp;
2147 opr_queue_Append(&rx_idleServerQueue, &sq->entry);
2148 #ifndef AFS_AIX41_ENV
2149 rx_waitForPacket = sq;
2151 rx_waitingForPacket = sq;
2152 #endif /* AFS_AIX41_ENV */
2154 CV_WAIT(&sq->cv, &rx_serverPool_lock);
2156 if (afs_termState == AFSOP_STOP_RXCALLBACK) {
2157 MUTEX_EXIT(&rx_serverPool_lock);
2158 return (struct rx_call *)0;
2161 } while (!(call = sq->newcall)
2162 && !(socketp && *socketp != OSI_NULLSOCKET));
2163 MUTEX_EXIT(&rx_serverPool_lock);
2165 MUTEX_ENTER(&call->lock);
2171 MUTEX_ENTER(&freeSQEList_lock);
2172 *(struct rx_serverQueueEntry **)sq = rx_FreeSQEList;
2173 rx_FreeSQEList = sq;
2174 MUTEX_EXIT(&freeSQEList_lock);
2177 clock_GetTime(&call->startTime);
2178 call->state = RX_STATE_ACTIVE;
2179 call->app.mode = RX_MODE_RECEIVING;
2180 #ifdef RX_KERNEL_TRACE
2181 if (ICL_SETACTIVE(afs_iclSetp)) {
2182 int glockOwner = ISAFS_GLOCK();
2185 afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
2186 __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER,
2193 rxi_calltrace(RX_CALL_START, call);
2194 dpf(("rx_GetCall(port=%d, service=%d) ==> call %"AFS_PTR_FMT"\n",
2195 call->conn->service->servicePort, call->conn->service->serviceId,
2198 MUTEX_EXIT(&call->lock);
2199 CALL_HOLD(call, RX_CALL_REFCOUNT_BEGIN);
2201 dpf(("rx_GetCall(socketp=%p, *socketp=0x%x)\n", socketp, *socketp));
2206 #else /* RX_ENABLE_LOCKS */
2208 rx_GetCall(int tno, struct rx_service *cur_service, osi_socket * socketp)
2210 struct rx_serverQueueEntry *sq;
2211 struct rx_call *call = (struct rx_call *)0, *choice2;
2212 struct rx_service *service = NULL;
2216 MUTEX_ENTER(&freeSQEList_lock);
2218 if ((sq = rx_FreeSQEList)) {
2219 rx_FreeSQEList = *(struct rx_serverQueueEntry **)sq;
2220 MUTEX_EXIT(&freeSQEList_lock);
2221 } else { /* otherwise allocate a new one and return that */
2222 MUTEX_EXIT(&freeSQEList_lock);
2223 sq = rxi_Alloc(sizeof(struct rx_serverQueueEntry));
2224 MUTEX_INIT(&sq->lock, "server Queue lock", MUTEX_DEFAULT, 0);
2225 CV_INIT(&sq->cv, "server Queue lock", CV_DEFAULT, 0);
2227 MUTEX_ENTER(&sq->lock);
2229 if (cur_service != NULL) {
2230 cur_service->nRequestsRunning--;
2231 MUTEX_ENTER(&rx_quota_mutex);
2232 if (cur_service->nRequestsRunning < cur_service->minProcs)
2235 MUTEX_EXIT(&rx_quota_mutex);
2237 if (!opr_queue_IsEmpty(&rx_incomingCallQueue)) {
2238 struct rx_call *tcall;
2239 struct opr_queue *cursor;
2240 /* Scan for eligible incoming calls. A call is not eligible
2241 * if the maximum number of calls for its service type are
2242 * already executing */
2243 /* One thread will process calls FCFS (to prevent starvation),
2244 * while the other threads may run ahead looking for calls which
2245 * have all their input data available immediately. This helps
2246 * keep threads from blocking, waiting for data from the client. */
2247 choice2 = (struct rx_call *)0;
2248 for (opr_queue_Scan(&rx_incomingCallQueue, cursor)) {
2249 tcall = opr_queue_Entry(cursor, struct rx_call, entry);
2250 service = tcall->conn->service;
2251 if (QuotaOK(service)) {
2252 MUTEX_ENTER(&rx_pthread_mutex);
2253 /* XXX - If tcall->entry.next is NULL, then we're no longer
2254 * on a queue at all. This shouldn't happen. */
2255 if (tno == rxi_fcfs_thread_num || !tcall->entry.next) {
2256 MUTEX_EXIT(&rx_pthread_mutex);
2257 /* If we're the fcfs thread, then we'll just use
2258 * this call. If we haven't been able to find an optimal
2259 * choice, and we're at the end of the list, then use a
2260 * 2d choice if one has been identified. Otherwise... */
2261 call = (choice2 ? choice2 : tcall);
2262 service = call->conn->service;
2264 MUTEX_EXIT(&rx_pthread_mutex);
2265 if (!opr_queue_IsEmpty(&tcall->rq)) {
2266 struct rx_packet *rp;
2267 rp = opr_queue_First(&tcall->rq, struct rx_packet,
2269 if (rp->header.seq == 1
2271 || (rp->header.flags & RX_LAST_PACKET))) {
2273 } else if (rxi_2dchoice && !choice2
2274 && !(tcall->flags & RX_CALL_CLEARED)
2275 && (tcall->rprev > rxi_HardAckRate)) {
2288 opr_queue_Remove(&call->entry);
2289 /* we can't schedule a call if there's no data!!! */
2290 /* send an ack if there's no data, if we're missing the
2291 * first packet, or we're missing something between first
2292 * and last -- there's a "hole" in the incoming data. */
2293 if (opr_queue_IsEmpty(&call->rq)
2294 || opr_queue_First(&call->rq, struct rx_packet, entry)->header.seq != 1
2295 || call->rprev != opr_queue_Last(&call->rq, struct rx_packet, entry)->header.seq)
2296 rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
2298 call->flags &= (~RX_CALL_WAIT_PROC);
2299 service->nRequestsRunning++;
2300 /* just started call in minProcs pool, need fewer to maintain
2302 MUTEX_ENTER(&rx_quota_mutex);
2303 if (service->nRequestsRunning <= service->minProcs)
2306 MUTEX_EXIT(&rx_quota_mutex);
2307 rx_atomic_dec(&rx_nWaiting);
2308 /* MUTEX_EXIT(&call->lock); */
2310 /* If there are no eligible incoming calls, add this process
2311 * to the idle server queue, to wait for one */
2314 *socketp = OSI_NULLSOCKET;
2316 sq->socketp = socketp;
2317 opr_queue_Append(&rx_idleServerQueue, &sq->entry);
2321 if (afs_termState == AFSOP_STOP_RXCALLBACK) {
2323 rxi_Free(sq, sizeof(struct rx_serverQueueEntry));
2324 return (struct rx_call *)0;
2327 } while (!(call = sq->newcall)
2328 && !(socketp && *socketp != OSI_NULLSOCKET));
2330 MUTEX_EXIT(&sq->lock);
2332 MUTEX_ENTER(&freeSQEList_lock);
2333 *(struct rx_serverQueueEntry **)sq = rx_FreeSQEList;
2334 rx_FreeSQEList = sq;
2335 MUTEX_EXIT(&freeSQEList_lock);
2338 clock_GetTime(&call->startTime);
2339 call->state = RX_STATE_ACTIVE;
2340 call->app.mode = RX_MODE_RECEIVING;
2341 #ifdef RX_KERNEL_TRACE
2342 if (ICL_SETACTIVE(afs_iclSetp)) {
2343 int glockOwner = ISAFS_GLOCK();
2346 afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
2347 __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER,
2354 rxi_calltrace(RX_CALL_START, call);
2355 dpf(("rx_GetCall(port=%d, service=%d) ==> call %p\n",
2356 call->conn->service->servicePort, call->conn->service->serviceId,
2359 dpf(("rx_GetCall(socketp=%p, *socketp=0x%x)\n", socketp, *socketp));
2366 #endif /* RX_ENABLE_LOCKS */
2370 /* Establish a procedure to be called when a packet arrives for a
2371 * call. This routine will be called at most once after each call,
2372 * and will also be called if there is an error condition on the or
2373 * the call is complete. Used by multi rx to build a selection
2374 * function which determines which of several calls is likely to be a
2375 * good one to read from.
2376 * NOTE: the way this is currently implemented it is probably only a
2377 * good idea to (1) use it immediately after a newcall (clients only)
2378 * and (2) only use it once. Other uses currently void your warranty
2381 rx_SetArrivalProc(struct rx_call *call,
2382 void (*proc) (struct rx_call * call,
2385 void * handle, int arg)
2387 call->arrivalProc = proc;
2388 call->arrivalProcHandle = handle;
2389 call->arrivalProcArg = arg;
2392 /* Call is finished (possibly prematurely). Return rc to the peer, if
2393 * appropriate, and return the final error code from the conversation
2397 rx_EndCall(struct rx_call *call, afs_int32 rc)
2399 struct rx_connection *conn = call->conn;
2403 dpf(("rx_EndCall(call %"AFS_PTR_FMT" rc %d error %d abortCode %d)\n",
2404 call, rc, call->error, call->abortCode));
2407 MUTEX_ENTER(&call->lock);
2409 if (rc == 0 && call->error == 0) {
2410 call->abortCode = 0;
2411 call->abortCount = 0;
2414 call->arrivalProc = (void (*)())0;
2415 if (rc && call->error == 0) {
2416 rxi_CallError(call, rc);
2417 call->app.mode = RX_MODE_ERROR;
2418 /* Send an abort message to the peer if this error code has
2419 * only just been set. If it was set previously, assume the
2420 * peer has already been sent the error code or will request it
2422 rxi_SendCallAbort(call, (struct rx_packet *)0, 0, 0);
2424 if (conn->type == RX_SERVER_CONNECTION) {
2425 /* Make sure reply or at least dummy reply is sent */
2426 if (call->app.mode == RX_MODE_RECEIVING) {
2427 MUTEX_EXIT(&call->lock);
2428 rxi_WriteProc(call, 0, 0);
2429 MUTEX_ENTER(&call->lock);
2431 if (call->app.mode == RX_MODE_SENDING) {
2432 MUTEX_EXIT(&call->lock);
2433 rxi_FlushWrite(call);
2434 MUTEX_ENTER(&call->lock);
2436 rxi_calltrace(RX_CALL_END, call);
2437 /* Call goes to hold state until reply packets are acknowledged */
2438 if (call->tfirst + call->nSoftAcked < call->tnext) {
2439 call->state = RX_STATE_HOLD;
2441 call->state = RX_STATE_DALLY;
2442 rxi_ClearTransmitQueue(call, 0);
2443 rxi_rto_cancel(call);
2444 rxi_CancelKeepAliveEvent(call);
2446 } else { /* Client connection */
2448 /* Make sure server receives input packets, in the case where
2449 * no reply arguments are expected */
2451 if ((call->app.mode == RX_MODE_SENDING)
2452 || (call->app.mode == RX_MODE_RECEIVING && call->rnext == 1)) {
2453 MUTEX_EXIT(&call->lock);
2454 (void)rxi_ReadProc(call, &dummy, 1);
2455 MUTEX_ENTER(&call->lock);
2458 /* If we had an outstanding delayed ack, be nice to the server
2459 * and force-send it now.
2461 if (call->delayedAckEvent) {
2462 rxi_CancelDelayedAckEvent(call);
2463 rxi_SendDelayedAck(NULL, call, NULL, 0);
2466 /* We need to release the call lock since it's lower than the
2467 * conn_call_lock and we don't want to hold the conn_call_lock
2468 * over the rx_ReadProc call. The conn_call_lock needs to be held
2469 * here for the case where rx_NewCall is perusing the calls on
2470 * the connection structure. We don't want to signal until
2471 * rx_NewCall is in a stable state. Otherwise, rx_NewCall may
2472 * have checked this call, found it active and by the time it
2473 * goes to sleep, will have missed the signal.
2475 MUTEX_EXIT(&call->lock);
2476 MUTEX_ENTER(&conn->conn_call_lock);
2477 MUTEX_ENTER(&call->lock);
2479 if (!(call->flags & RX_CALL_PEER_BUSY)) {
2480 conn->lastBusy[call->channel] = 0;
2483 MUTEX_ENTER(&conn->conn_data_lock);
2484 conn->flags |= RX_CONN_BUSY;
2485 if (conn->flags & RX_CONN_MAKECALL_WAITING) {
2486 MUTEX_EXIT(&conn->conn_data_lock);
2487 #ifdef RX_ENABLE_LOCKS
2488 CV_BROADCAST(&conn->conn_call_cv);
2493 #ifdef RX_ENABLE_LOCKS
2495 MUTEX_EXIT(&conn->conn_data_lock);
2497 #endif /* RX_ENABLE_LOCKS */
2498 call->state = RX_STATE_DALLY;
2500 error = call->error;
2502 /* currentPacket, nLeft, and NFree must be zeroed here, because
2503 * ResetCall cannot: ResetCall may be called at splnet(), in the
2504 * kernel version, and may interrupt the macros rx_Read or
2505 * rx_Write, which run at normal priority for efficiency. */
2506 if (call->app.currentPacket) {
2507 #ifdef RX_TRACK_PACKETS
2508 call->app.currentPacket->flags &= ~RX_PKTFLAG_CP;
2510 rxi_FreePacket(call->app.currentPacket);
2511 call->app.currentPacket = (struct rx_packet *)0;
2514 call->app.nLeft = call->app.nFree = call->app.curlen = 0;
2516 /* Free any packets from the last call to ReadvProc/WritevProc */
2517 #ifdef RXDEBUG_PACKET
2519 #endif /* RXDEBUG_PACKET */
2520 rxi_FreePackets(0, &call->app.iovq);
2521 MUTEX_EXIT(&call->lock);
2523 CALL_RELE(call, RX_CALL_REFCOUNT_BEGIN);
2524 if (conn->type == RX_CLIENT_CONNECTION) {
2525 MUTEX_ENTER(&conn->conn_data_lock);
2526 conn->flags &= ~RX_CONN_BUSY;
2527 MUTEX_EXIT(&conn->conn_data_lock);
2528 MUTEX_EXIT(&conn->conn_call_lock);
2532 * Map errors to the local host's errno.h format.
2534 error = ntoh_syserr_conv(error);
2538 #if !defined(KERNEL)
2540 /* Call this routine when shutting down a server or client (especially
2541 * clients). This will allow Rx to gracefully garbage collect server
2542 * connections, and reduce the number of retries that a server might
2543 * make to a dead client.
2544 * This is not quite right, since some calls may still be ongoing and
2545 * we can't lock them to destroy them. */
2549 struct rx_connection **conn_ptr, **conn_end;
2553 if (rxinit_status == 1) {
2555 return; /* Already shutdown. */
2557 rxi_DeleteCachedConnections();
2558 if (rx_connHashTable) {
2559 MUTEX_ENTER(&rx_connHashTable_lock);
2560 for (conn_ptr = &rx_connHashTable[0], conn_end =
2561 &rx_connHashTable[rx_hashTableSize]; conn_ptr < conn_end;
2563 struct rx_connection *conn, *next;
2564 for (conn = *conn_ptr; conn; conn = next) {
2566 if (conn->type == RX_CLIENT_CONNECTION) {
2567 MUTEX_ENTER(&rx_refcnt_mutex);
2569 MUTEX_EXIT(&rx_refcnt_mutex);
2570 #ifdef RX_ENABLE_LOCKS
2571 rxi_DestroyConnectionNoLock(conn);
2572 #else /* RX_ENABLE_LOCKS */
2573 rxi_DestroyConnection(conn);
2574 #endif /* RX_ENABLE_LOCKS */
2578 #ifdef RX_ENABLE_LOCKS
2579 while (rx_connCleanup_list) {
2580 struct rx_connection *conn;
2581 conn = rx_connCleanup_list;
2582 rx_connCleanup_list = rx_connCleanup_list->next;
2583 MUTEX_EXIT(&rx_connHashTable_lock);
2584 rxi_CleanupConnection(conn);
2585 MUTEX_ENTER(&rx_connHashTable_lock);
2587 MUTEX_EXIT(&rx_connHashTable_lock);
2588 #endif /* RX_ENABLE_LOCKS */
2593 afs_winsockCleanup();
2601 /* if we wakeup packet waiter too often, can get in loop with two
2602 AllocSendPackets each waking each other up (from ReclaimPacket calls) */
2604 rxi_PacketsUnWait(void)
2606 if (!rx_waitingForPackets) {
2610 if (rxi_OverQuota(RX_PACKET_CLASS_SEND)) {
2611 return; /* still over quota */
2614 rx_waitingForPackets = 0;
2615 #ifdef RX_ENABLE_LOCKS
2616 CV_BROADCAST(&rx_waitingForPackets_cv);
2618 osi_rxWakeup(&rx_waitingForPackets);
2624 /* ------------------Internal interfaces------------------------- */
2626 /* Return this process's service structure for the
2627 * specified socket and service */
2628 static struct rx_service *
2629 rxi_FindService(osi_socket socket, u_short serviceId)
2631 struct rx_service **sp;
2632 for (sp = &rx_services[0]; *sp; sp++) {
2633 if ((*sp)->serviceId == serviceId && (*sp)->socket == socket)
2639 #ifdef RXDEBUG_PACKET
2640 #ifdef KDUMP_RX_LOCK
2641 static struct rx_call_rx_lock *rx_allCallsp = 0;
2643 static struct rx_call *rx_allCallsp = 0;
2645 #endif /* RXDEBUG_PACKET */
2647 /* Allocate a call structure, for the indicated channel of the
2648 * supplied connection. The mode and state of the call must be set by
2649 * the caller. Returns the call with mutex locked. */
2650 static struct rx_call *
2651 rxi_NewCall(struct rx_connection *conn, int channel)
2653 struct rx_call *call;
2654 #ifdef RX_ENABLE_LOCKS
2655 struct rx_call *cp; /* Call pointer temp */
2656 struct opr_queue *cursor;
2659 dpf(("rxi_NewCall(conn %"AFS_PTR_FMT", channel %d)\n", conn, channel));
2661 /* Grab an existing call structure, or allocate a new one.
2662 * Existing call structures are assumed to have been left reset by
2664 MUTEX_ENTER(&rx_freeCallQueue_lock);
2666 #ifdef RX_ENABLE_LOCKS
2668 * EXCEPT that the TQ might not yet be cleared out.
2669 * Skip over those with in-use TQs.
2672 for (opr_queue_Scan(&rx_freeCallQueue, cursor)) {
2673 cp = opr_queue_Entry(cursor, struct rx_call, entry);
2674 if (!(cp->flags & RX_CALL_TQ_BUSY)) {
2680 #else /* RX_ENABLE_LOCKS */
2681 if (!opr_queue_IsEmpty(&rx_freeCallQueue)) {
2682 call = opr_queue_First(&rx_freeCallQueue, struct rx_call, entry);
2683 #endif /* RX_ENABLE_LOCKS */
2684 opr_queue_Remove(&call->entry);
2685 if (rx_stats_active)
2686 rx_atomic_dec(&rx_stats.nFreeCallStructs);
2687 MUTEX_EXIT(&rx_freeCallQueue_lock);
2688 MUTEX_ENTER(&call->lock);
2689 CLEAR_CALL_QUEUE_LOCK(call);
2690 #ifdef RX_ENABLE_LOCKS
2691 /* Now, if TQ wasn't cleared earlier, do it now. */
2692 rxi_WaitforTQBusy(call);
2693 if (call->flags & RX_CALL_TQ_CLEARME) {
2694 rxi_ClearTransmitQueue(call, 1);
2695 /*queue_Init(&call->tq);*/
2697 #endif /* RX_ENABLE_LOCKS */
2698 /* Bind the call to its connection structure */
2700 rxi_ResetCall(call, 1);
2703 call = rxi_Alloc(sizeof(struct rx_call));
2704 #ifdef RXDEBUG_PACKET
2705 call->allNextp = rx_allCallsp;
2706 rx_allCallsp = call;
2708 rx_atomic_inc_and_read(&rx_stats.nCallStructs);
2709 #else /* RXDEBUG_PACKET */
2710 rx_atomic_inc(&rx_stats.nCallStructs);
2711 #endif /* RXDEBUG_PACKET */
2713 MUTEX_EXIT(&rx_freeCallQueue_lock);
2714 MUTEX_INIT(&call->lock, "call lock", MUTEX_DEFAULT, NULL);
2715 MUTEX_ENTER(&call->lock);
2716 CV_INIT(&call->cv_twind, "call twind", CV_DEFAULT, 0);
2717 CV_INIT(&call->cv_rq, "call rq", CV_DEFAULT, 0);
2718 CV_INIT(&call->cv_tq, "call tq", CV_DEFAULT, 0);
2720 /* Initialize once-only items */
2721 opr_queue_Init(&call->tq);
2722 opr_queue_Init(&call->rq);
2723 opr_queue_Init(&call->app.iovq);
2724 #ifdef RXDEBUG_PACKET
2725 call->rqc = call->tqc = call->iovqc = 0;
2726 #endif /* RXDEBUG_PACKET */
2727 /* Bind the call to its connection structure (prereq for reset) */
2729 rxi_ResetCall(call, 1);
2731 call->channel = channel;
2732 call->callNumber = &conn->callNumber[channel];
2733 call->rwind = conn->rwind[channel];
2734 call->twind = conn->twind[channel];
2735 /* Note that the next expected call number is retained (in
2736 * conn->callNumber[i]), even if we reallocate the call structure
2738 conn->call[channel] = call;
2739 /* if the channel's never been used (== 0), we should start at 1, otherwise
2740 * the call number is valid from the last time this channel was used */
2741 if (*call->callNumber == 0)
2742 *call->callNumber = 1;
2747 /* A call has been inactive long enough that so we can throw away
2748 * state, including the call structure, which is placed on the call
2751 * call->lock amd rx_refcnt_mutex are held upon entry.
2752 * haveCTLock is set when called from rxi_ReapConnections.
2754 * return 1 if the call is freed, 0 if not.
2757 rxi_FreeCall(struct rx_call *call, int haveCTLock)
2759 int channel = call->channel;
2760 struct rx_connection *conn = call->conn;
2761 u_char state = call->state;
2764 * We are setting the state to RX_STATE_RESET to
2765 * ensure that no one else will attempt to use this
2766 * call once we drop the refcnt lock. We must drop
2767 * the refcnt lock before calling rxi_ResetCall
2768 * because it cannot be held across acquiring the
2769 * freepktQ lock. NewCall does the same.
2771 call->state = RX_STATE_RESET;
2772 MUTEX_EXIT(&rx_refcnt_mutex);
2773 rxi_ResetCall(call, 0);
2775 if (MUTEX_TRYENTER(&conn->conn_call_lock))
2777 if (state == RX_STATE_DALLY || state == RX_STATE_HOLD)
2778 (*call->callNumber)++;
2780 if (call->conn->call[channel] == call)
2781 call->conn->call[channel] = 0;
2782 MUTEX_EXIT(&conn->conn_call_lock);
2785 * We couldn't obtain the conn_call_lock so we can't
2786 * disconnect the call from the connection. Set the
2787 * call state to dally so that the call can be reused.
2789 MUTEX_ENTER(&rx_refcnt_mutex);
2790 call->state = RX_STATE_DALLY;
2794 MUTEX_ENTER(&rx_freeCallQueue_lock);
2795 SET_CALL_QUEUE_LOCK(call, &rx_freeCallQueue_lock);
2796 #ifdef RX_ENABLE_LOCKS
2797 /* A call may be free even though its transmit queue is still in use.
2798 * Since we search the call list from head to tail, put busy calls at
2799 * the head of the list, and idle calls at the tail.
2801 if (call->flags & RX_CALL_TQ_BUSY)
2802 opr_queue_Prepend(&rx_freeCallQueue, &call->entry);
2804 opr_queue_Append(&rx_freeCallQueue, &call->entry);
2805 #else /* RX_ENABLE_LOCKS */
2806 opr_queue_Append(&rx_freeCallQueue, &call->entry);
2807 #endif /* RX_ENABLE_LOCKS */
2808 if (rx_stats_active)
2809 rx_atomic_inc(&rx_stats.nFreeCallStructs);
2810 MUTEX_EXIT(&rx_freeCallQueue_lock);
2812 /* Destroy the connection if it was previously slated for
2813 * destruction, i.e. the Rx client code previously called
2814 * rx_DestroyConnection (client connections), or
2815 * rxi_ReapConnections called the same routine (server
2816 * connections). Only do this, however, if there are no
2817 * outstanding calls. Note that for fine grain locking, there appears
2818 * to be a deadlock in that rxi_FreeCall has a call locked and
2819 * DestroyConnectionNoLock locks each call in the conn. But note a
2820 * few lines up where we have removed this call from the conn.
2821 * If someone else destroys a connection, they either have no
2822 * call lock held or are going through this section of code.
2824 MUTEX_ENTER(&conn->conn_data_lock);
2825 if (conn->flags & RX_CONN_DESTROY_ME && !(conn->flags & RX_CONN_MAKECALL_WAITING)) {
2826 MUTEX_ENTER(&rx_refcnt_mutex);
2828 MUTEX_EXIT(&rx_refcnt_mutex);
2829 MUTEX_EXIT(&conn->conn_data_lock);
2830 #ifdef RX_ENABLE_LOCKS
2832 rxi_DestroyConnectionNoLock(conn);
2834 rxi_DestroyConnection(conn);
2835 #else /* RX_ENABLE_LOCKS */
2836 rxi_DestroyConnection(conn);
2837 #endif /* RX_ENABLE_LOCKS */
2839 MUTEX_EXIT(&conn->conn_data_lock);
2841 MUTEX_ENTER(&rx_refcnt_mutex);
2845 rx_atomic_t rxi_Allocsize = RX_ATOMIC_INIT(0);
2846 rx_atomic_t rxi_Alloccnt = RX_ATOMIC_INIT(0);
2849 rxi_Alloc(size_t size)
2853 if (rx_stats_active) {
2854 rx_atomic_add(&rxi_Allocsize, (int) size);
2855 rx_atomic_inc(&rxi_Alloccnt);
2859 #if defined(KERNEL) && !defined(UKERNEL) && defined(AFS_FBSD80_ENV)
2860 afs_osi_Alloc_NoSleep(size);
2865 osi_Panic("rxi_Alloc error");
2871 rxi_Free(void *addr, size_t size)
2873 if (rx_stats_active) {
2874 rx_atomic_sub(&rxi_Allocsize, (int) size);
2875 rx_atomic_dec(&rxi_Alloccnt);
2877 osi_Free(addr, size);
2881 rxi_SetPeerMtu(struct rx_peer *peer, afs_uint32 host, afs_uint32 port, int mtu)
2883 struct rx_peer **peer_ptr = NULL, **peer_end = NULL;
2884 struct rx_peer *next = NULL;
2888 MUTEX_ENTER(&rx_peerHashTable_lock);
2890 peer_ptr = &rx_peerHashTable[0];
2891 peer_end = &rx_peerHashTable[rx_hashTableSize];
2894 for ( ; peer_ptr < peer_end; peer_ptr++) {
2897 for ( ; peer; peer = next) {
2899 if (host == peer->host)
2904 hashIndex = PEER_HASH(host, port);
2905 for (peer = rx_peerHashTable[hashIndex]; peer; peer = peer->next) {
2906 if ((peer->host == host) && (peer->port == port))
2911 MUTEX_ENTER(&rx_peerHashTable_lock);
2916 MUTEX_EXIT(&rx_peerHashTable_lock);
2918 MUTEX_ENTER(&peer->peer_lock);
2919 /* We don't handle dropping below min, so don't */
2920 mtu = MAX(mtu, RX_MIN_PACKET_SIZE);
2921 peer->ifMTU=MIN(mtu, peer->ifMTU);
2922 peer->natMTU = rxi_AdjustIfMTU(peer->ifMTU);
2923 /* if we tweaked this down, need to tune our peer MTU too */
2924 peer->MTU = MIN(peer->MTU, peer->natMTU);
2925 /* if we discovered a sub-1500 mtu, degrade */
2926 if (peer->ifMTU < OLD_MAX_PACKET_SIZE)
2927 peer->maxDgramPackets = 1;
2928 /* We no longer have valid peer packet information */
2929 if (peer->maxPacketSize-RX_IPUDP_SIZE > peer->ifMTU)
2930 peer->maxPacketSize = 0;
2931 MUTEX_EXIT(&peer->peer_lock);
2933 MUTEX_ENTER(&rx_peerHashTable_lock);
2935 if (host && !port) {
2937 /* pick up where we left off */
2941 MUTEX_EXIT(&rx_peerHashTable_lock);
2944 #ifdef AFS_RXERRQ_ENV
2946 rxi_SetPeerDead(struct sock_extended_err *err, afs_uint32 host, afs_uint16 port)
2948 int hashIndex = PEER_HASH(host, port);
2949 struct rx_peer *peer;
2951 MUTEX_ENTER(&rx_peerHashTable_lock);
2953 for (peer = rx_peerHashTable[hashIndex]; peer; peer = peer->next) {
2954 if (peer->host == host && peer->port == port) {
2960 MUTEX_EXIT(&rx_peerHashTable_lock);
2963 rx_atomic_inc(&peer->neterrs);
2964 MUTEX_ENTER(&peer->peer_lock);
2965 peer->last_err_origin = RX_NETWORK_ERROR_ORIGIN_ICMP;
2966 peer->last_err_type = err->ee_type;
2967 peer->last_err_code = err->ee_code;
2968 MUTEX_EXIT(&peer->peer_lock);
2970 MUTEX_ENTER(&rx_peerHashTable_lock);
2972 MUTEX_EXIT(&rx_peerHashTable_lock);
2977 rxi_ProcessNetError(struct sock_extended_err *err, afs_uint32 addr, afs_uint16 port)
2979 # ifdef AFS_ADAPT_PMTU
2980 if (err->ee_errno == EMSGSIZE && err->ee_info >= 68) {
2981 rxi_SetPeerMtu(NULL, addr, port, err->ee_info - RX_IPUDP_SIZE);
2985 if (err->ee_origin == SO_EE_ORIGIN_ICMP && err->ee_type == ICMP_DEST_UNREACH) {
2986 switch (err->ee_code) {
2987 case ICMP_NET_UNREACH:
2988 case ICMP_HOST_UNREACH:
2989 case ICMP_PORT_UNREACH:
2992 rxi_SetPeerDead(err, addr, port);
2999 rxi_TranslateICMP(int type, int code)
3002 case ICMP_DEST_UNREACH:
3004 case ICMP_NET_UNREACH:
3005 return "Destination Net Unreachable";
3006 case ICMP_HOST_UNREACH:
3007 return "Destination Host Unreachable";
3008 case ICMP_PROT_UNREACH:
3009 return "Destination Protocol Unreachable";
3010 case ICMP_PORT_UNREACH:
3011 return "Destination Port Unreachable";
3013 return "Destination Net Prohibited";
3015 return "Destination Host Prohibited";
3021 #endif /* AFS_RXERRQ_ENV */
3024 * Get the last network error for a connection
3026 * A "network error" here means an error retrieved from ICMP, or some other
3027 * mechanism outside of Rx that informs us of errors in network reachability.
3029 * If a peer associated with the given Rx connection has received a network
3030 * error recently, this function allows the caller to know what error
3031 * specifically occurred. This can be useful to know, since e.g. ICMP errors
3032 * can cause calls to that peer to be quickly aborted. So, this function can
3033 * help see why a call was aborted due to network errors.
3035 * If we have received traffic from a peer since the last network error, we
3036 * treat that peer as if we had not received an network error for it.
3038 * @param[in] conn The Rx connection to examine
3039 * @param[out] err_origin The origin of the last network error (e.g. ICMP);
3040 * one of the RX_NETWORK_ERROR_ORIGIN_* constants
3041 * @param[out] err_type The type of the last error
3042 * @param[out] err_code The code of the last error
3043 * @param[out] msg Human-readable error message, if applicable; NULL otherwise
3045 * @return If we have an error
3046 * @retval -1 No error to get; 'out' params are undefined
3047 * @retval 0 We have an error; 'out' params contain the last error
3050 rx_GetNetworkError(struct rx_connection *conn, int *err_origin, int *err_type,
3051 int *err_code, const char **msg)
3053 #ifdef AFS_RXERRQ_ENV
3054 struct rx_peer *peer = conn->peer;
3055 if (rx_atomic_read(&peer->neterrs)) {
3056 MUTEX_ENTER(&peer->peer_lock);
3057 *err_origin = peer->last_err_origin;
3058 *err_type = peer->last_err_type;
3059 *err_code = peer->last_err_code;
3060 MUTEX_EXIT(&peer->peer_lock);
3063 if (*err_origin == RX_NETWORK_ERROR_ORIGIN_ICMP) {
3064 *msg = rxi_TranslateICMP(*err_type, *err_code);
3073 /* Find the peer process represented by the supplied (host,port)
3074 * combination. If there is no appropriate active peer structure, a
3075 * new one will be allocated and initialized
3078 rxi_FindPeer(afs_uint32 host, u_short port, int create)
3082 hashIndex = PEER_HASH(host, port);
3083 MUTEX_ENTER(&rx_peerHashTable_lock);
3084 for (pp = rx_peerHashTable[hashIndex]; pp; pp = pp->next) {
3085 if ((pp->host == host) && (pp->port == port))
3090 pp = rxi_AllocPeer(); /* This bzero's *pp */
3091 pp->host = host; /* set here or in InitPeerParams is zero */
3093 #ifdef AFS_RXERRQ_ENV
3094 rx_atomic_set(&pp->neterrs, 0);
3096 MUTEX_INIT(&pp->peer_lock, "peer_lock", MUTEX_DEFAULT, 0);
3097 opr_queue_Init(&pp->rpcStats);
3098 pp->next = rx_peerHashTable[hashIndex];
3099 rx_peerHashTable[hashIndex] = pp;
3100 rxi_InitPeerParams(pp);
3101 if (rx_stats_active)
3102 rx_atomic_inc(&rx_stats.nPeerStructs);
3108 MUTEX_EXIT(&rx_peerHashTable_lock);
3113 /* Find the connection at (host, port) started at epoch, and with the
3114 * given connection id. Creates the server connection if necessary.
3115 * The type specifies whether a client connection or a server
3116 * connection is desired. In both cases, (host, port) specify the
3117 * peer's (host, pair) pair. Client connections are not made
3118 * automatically by this routine. The parameter socket gives the
3119 * socket descriptor on which the packet was received. This is used,
3120 * in the case of server connections, to check that *new* connections
3121 * come via a valid (port, serviceId). Finally, the securityIndex
3122 * parameter must match the existing index for the connection. If a
3123 * server connection is created, it will be created using the supplied
3124 * index, if the index is valid for this service */
3125 static struct rx_connection *
3126 rxi_FindConnection(osi_socket socket, afs_uint32 host,
3127 u_short port, u_short serviceId, afs_uint32 cid,
3128 afs_uint32 epoch, int type, u_int securityIndex)
3130 int hashindex, flag, i;
3131 struct rx_connection *conn;
3132 hashindex = CONN_HASH(host, port, cid, epoch, type);
3133 MUTEX_ENTER(&rx_connHashTable_lock);
3134 rxLastConn ? (conn = rxLastConn, flag = 0) : (conn =
3135 rx_connHashTable[hashindex],
3138 if ((conn->type == type) && ((cid & RX_CIDMASK) == conn->cid)
3139 && (epoch == conn->epoch)) {
3140 struct rx_peer *pp = conn->peer;
3141 if (securityIndex != conn->securityIndex) {
3142 /* this isn't supposed to happen, but someone could forge a packet
3143 * like this, and there seems to be some CM bug that makes this
3144 * happen from time to time -- in which case, the fileserver
3146 MUTEX_EXIT(&rx_connHashTable_lock);
3147 return (struct rx_connection *)0;
3149 if (pp->host == host && pp->port == port)
3151 if (type == RX_CLIENT_CONNECTION && pp->port == port)
3153 /* So what happens when it's a callback connection? */
3154 if ( /*type == RX_CLIENT_CONNECTION && */
3155 (conn->epoch & 0x80000000))
3159 /* the connection rxLastConn that was used the last time is not the
3160 ** one we are looking for now. Hence, start searching in the hash */
3162 conn = rx_connHashTable[hashindex];
3167 struct rx_service *service;
3168 if (type == RX_CLIENT_CONNECTION) {
3169 MUTEX_EXIT(&rx_connHashTable_lock);
3170 return (struct rx_connection *)0;
3172 service = rxi_FindService(socket, serviceId);
3173 if (!service || (securityIndex >= service->nSecurityObjects)
3174 || (service->securityObjects[securityIndex] == 0)) {
3175 MUTEX_EXIT(&rx_connHashTable_lock);
3176 return (struct rx_connection *)0;
3178 conn = rxi_AllocConnection(); /* This bzero's the connection */
3179 MUTEX_INIT(&conn->conn_call_lock, "conn call lock", MUTEX_DEFAULT, 0);
3180 MUTEX_INIT(&conn->conn_data_lock, "conn data lock", MUTEX_DEFAULT, 0);
3181 CV_INIT(&conn->conn_call_cv, "conn call cv", CV_DEFAULT, 0);
3182 conn->next = rx_connHashTable[hashindex];
3183 rx_connHashTable[hashindex] = conn;
3184 conn->peer = rxi_FindPeer(host, port, 1);
3185 conn->type = RX_SERVER_CONNECTION;
3186 conn->lastSendTime = clock_Sec(); /* don't GC immediately */
3187 conn->epoch = epoch;
3188 conn->cid = cid & RX_CIDMASK;
3189 conn->ackRate = RX_FAST_ACK_RATE;
3190 conn->service = service;
3191 conn->serviceId = serviceId;
3192 conn->securityIndex = securityIndex;
3193 conn->securityObject = service->securityObjects[securityIndex];
3194 conn->nSpecific = 0;
3195 conn->specific = NULL;
3196 rx_SetConnDeadTime(conn, service->connDeadTime);
3197 conn->idleDeadTime = service->idleDeadTime;
3198 conn->idleDeadDetection = service->idleDeadErr ? 1 : 0;
3199 for (i = 0; i < RX_MAXCALLS; i++) {
3200 conn->twind[i] = rx_initSendWindow;
3201 conn->rwind[i] = rx_initReceiveWindow;
3203 /* Notify security object of the new connection */
3204 RXS_NewConnection(conn->securityObject, conn);
3205 /* XXXX Connection timeout? */
3206 if (service->newConnProc)
3207 (*service->newConnProc) (conn);
3208 if (rx_stats_active)
3209 rx_atomic_inc(&rx_stats.nServerConns);
3212 MUTEX_ENTER(&rx_refcnt_mutex);
3214 MUTEX_EXIT(&rx_refcnt_mutex);
3216 rxLastConn = conn; /* store this connection as the last conn used */
3217 MUTEX_EXIT(&rx_connHashTable_lock);
3222 * Timeout a call on a busy call channel if appropriate.
3224 * @param[in] call The busy call.
3226 * @pre 'call' is marked as busy (namely,
3227 * call->conn->lastBusy[call->channel] != 0)
3229 * @pre call->lock is held
3230 * @pre rxi_busyChannelError is nonzero
3232 * @note call->lock is dropped and reacquired
3235 rxi_CheckBusy(struct rx_call *call)
3237 struct rx_connection *conn = call->conn;
3238 int channel = call->channel;
3239 int freechannel = 0;
3242 MUTEX_EXIT(&call->lock);
3244 MUTEX_ENTER(&conn->conn_call_lock);
3246 /* Are there any other call slots on this conn that we should try? Look for
3247 * slots that are empty and are either non-busy, or were marked as busy
3248 * longer than conn->secondsUntilDead seconds before this call started. */
3250 for (i = 0; i < RX_MAXCALLS && !freechannel; i++) {
3252 /* only look at channels that aren't us */
3256 if (conn->lastBusy[i]) {
3257 /* if this channel looked busy too recently, don't look at it */
3258 if (conn->lastBusy[i] >= call->startTime.sec) {
3261 if (call->startTime.sec - conn->lastBusy[i] < conn->secondsUntilDead) {
3266 if (conn->call[i]) {
3267 struct rx_call *tcall = conn->call[i];
3268 MUTEX_ENTER(&tcall->lock);
3269 if (tcall->state == RX_STATE_DALLY) {
3272 MUTEX_EXIT(&tcall->lock);
3278 MUTEX_ENTER(&call->lock);
3280 /* Since the call->lock has been released it is possible that the call may
3281 * no longer be busy (the call channel cannot have been reallocated as we
3282 * haven't dropped the conn_call_lock) Therefore, we must confirm
3283 * that the call state has not changed when deciding whether or not to
3284 * force this application thread to retry by forcing a Timeout error. */
3286 if (freechannel && (call->flags & RX_CALL_PEER_BUSY)) {
3287 /* Since 'freechannel' is set, there exists another channel in this
3288 * rx_conn that the application thread might be able to use. We know
3289 * that we have the correct call since callNumber is unchanged, and we
3290 * know that the call is still busy. So, set the call error state to
3291 * rxi_busyChannelError so the application can retry the request,
3292 * presumably on a less-busy call channel. */
3294 rxi_CallError(call, RX_CALL_BUSY);
3296 MUTEX_EXIT(&conn->conn_call_lock);
3300 * Abort the call if the server is over the busy threshold. This
3301 * can be used without requiring a call structure be initialised,
3302 * or connected to a particular channel
3305 rxi_AbortIfServerBusy(osi_socket socket, struct rx_connection *conn,
3306 struct rx_packet *np)
3308 if ((rx_BusyThreshold > 0) &&
3309 (rx_atomic_read(&rx_nWaiting) > rx_BusyThreshold)) {
3310 rxi_SendRawAbort(socket, conn->peer->host, conn->peer->port,
3311 rx_BusyError, np, 0);
3312 if (rx_stats_active)
3313 rx_atomic_inc(&rx_stats.nBusies);
3320 static_inline struct rx_call *
3321 rxi_ReceiveClientCall(struct rx_packet *np, struct rx_connection *conn)
3324 struct rx_call *call;
3326 channel = np->header.cid & RX_CHANNELMASK;
3327 MUTEX_ENTER(&conn->conn_call_lock);
3328 call = conn->call[channel];
3329 if (!call || conn->callNumber[channel] != np->header.callNumber) {
3330 MUTEX_EXIT(&conn->conn_call_lock);
3331 if (rx_stats_active)
3332 rx_atomic_inc(&rx_stats.spuriousPacketsRead);
3336 MUTEX_ENTER(&call->lock);
3337 MUTEX_EXIT(&conn->conn_call_lock);
3339 if ((call->state == RX_STATE_DALLY)
3340 && np->header.type == RX_PACKET_TYPE_ACK) {
3341 if (rx_stats_active)
3342 rx_atomic_inc(&rx_stats.ignorePacketDally);
3343 MUTEX_EXIT(&call->lock);
3350 static_inline struct rx_call *
3351 rxi_ReceiveServerCall(osi_socket socket, struct rx_packet *np,
3352 struct rx_connection *conn)
3355 struct rx_call *call;
3357 channel = np->header.cid & RX_CHANNELMASK;
3358 MUTEX_ENTER(&conn->conn_call_lock);
3359 call = conn->call[channel];
3362 if (rxi_AbortIfServerBusy(socket, conn, np)) {
3363 MUTEX_EXIT(&conn->conn_call_lock);
3367 call = rxi_NewCall(conn, channel); /* returns locked call */
3368 *call->callNumber = np->header.callNumber;
3369 MUTEX_EXIT(&conn->conn_call_lock);
3371 call->state = RX_STATE_PRECALL;
3372 clock_GetTime(&call->queueTime);
3373 call->app.bytesSent = 0;
3374 call->app.bytesRcvd = 0;
3375 rxi_KeepAliveOn(call);
3380 if (np->header.callNumber == conn->callNumber[channel]) {
3381 MUTEX_ENTER(&call->lock);
3382 MUTEX_EXIT(&conn->conn_call_lock);
3386 if (np->header.callNumber < conn->callNumber[channel]) {
3387 MUTEX_EXIT(&conn->conn_call_lock);
3388 if (rx_stats_active)
3389 rx_atomic_inc(&rx_stats.spuriousPacketsRead);
3393 MUTEX_ENTER(&call->lock);
3394 MUTEX_EXIT(&conn->conn_call_lock);
3396 /* Wait until the transmit queue is idle before deciding
3397 * whether to reset the current call. Chances are that the
3398 * call will be in ether DALLY or HOLD state once the TQ_BUSY
3401 #ifdef RX_ENABLE_LOCKS
3402 if (call->state == RX_STATE_ACTIVE) {
3403 int old_error = call->error;
3404 rxi_WaitforTQBusy(call);
3405 /* If we entered error state while waiting,
3406 * must call rxi_CallError to permit rxi_ResetCall
3407 * to processed when the tqWaiter count hits zero.
3409 if (call->error && call->error != old_error) {
3410 rxi_CallError(call, call->error);
3411 MUTEX_EXIT(&call->lock);
3415 #endif /* RX_ENABLE_LOCKS */
3416 /* If the new call cannot be taken right now send a busy and set
3417 * the error condition in this call, so that it terminates as
3418 * quickly as possible */
3419 if (call->state == RX_STATE_ACTIVE) {
3420 rxi_CallError(call, RX_CALL_DEAD);
3421 rxi_SendSpecial(call, conn, NULL, RX_PACKET_TYPE_BUSY,
3423 MUTEX_EXIT(&call->lock);
3427 if (rxi_AbortIfServerBusy(socket, conn, np)) {
3428 MUTEX_EXIT(&call->lock);
3432 rxi_ResetCall(call, 0);
3433 /* The conn_call_lock is not held but no one else should be
3434 * using this call channel while we are processing this incoming
3435 * packet. This assignment should be safe.
3437 *call->callNumber = np->header.callNumber;
3438 call->state = RX_STATE_PRECALL;
3439 clock_GetTime(&call->queueTime);
3440 call->app.bytesSent = 0;
3441 call->app.bytesRcvd = 0;
3442 rxi_KeepAliveOn(call);
3448 /* There are two packet tracing routines available for testing and monitoring
3449 * Rx. One is called just after every packet is received and the other is
3450 * called just before every packet is sent. Received packets, have had their
3451 * headers decoded, and packets to be sent have not yet had their headers
3452 * encoded. Both take two parameters: a pointer to the packet and a sockaddr
3453 * containing the network address. Both can be modified. The return value, if
3454 * non-zero, indicates that the packet should be dropped. */
3456 int (*rx_justReceived) (struct rx_packet *, struct sockaddr_in *) = 0;
3457 int (*rx_almostSent) (struct rx_packet *, struct sockaddr_in *) = 0;
3459 /* A packet has been received off the interface. Np is the packet, socket is
3460 * the socket number it was received from (useful in determining which service
3461 * this packet corresponds to), and (host, port) reflect the host,port of the
3462 * sender. This call returns the packet to the caller if it is finished with
3463 * it, rather than de-allocating it, just as a small performance hack */
3466 rxi_ReceivePacket(struct rx_packet *np, osi_socket socket,
3467 afs_uint32 host, u_short port, int *tnop,
3468 struct rx_call **newcallp)
3470 struct rx_call *call;
3471 struct rx_connection *conn;
3476 struct rx_packet *tnp;
3479 /* We don't print out the packet until now because (1) the time may not be
3480 * accurate enough until now in the lwp implementation (rx_Listener only gets
3481 * the time after the packet is read) and (2) from a protocol point of view,
3482 * this is the first time the packet has been seen */
3483 packetType = (np->header.type > 0 && np->header.type < RX_N_PACKET_TYPES)
3484 ? rx_packetTypes[np->header.type - 1] : "*UNKNOWN*";
3485 dpf(("R %d %s: %x.%d.%d.%d.%d.%d.%d flags %d, packet %"AFS_PTR_FMT"\n",
3486 np->header.serial, packetType, ntohl(host), ntohs(port), np->header.serviceId,
3487 np->header.epoch, np->header.cid, np->header.callNumber,
3488 np->header.seq, np->header.flags, np));
3491 /* Account for connectionless packets */
3492 if (rx_stats_active &&
3493 ((np->header.type == RX_PACKET_TYPE_VERSION) ||
3494 (np->header.type == RX_PACKET_TYPE_DEBUG))) {
3495 struct rx_peer *peer;
3497 /* Try to look up the peer structure, but don't create one */
3498 peer = rxi_FindPeer(host, port, 0);
3500 /* Since this may not be associated with a connection, it may have
3501 * no refCount, meaning we could race with ReapConnections
3504 if (peer && (peer->refCount > 0)) {
3505 #ifdef AFS_RXERRQ_ENV
3506 if (rx_atomic_read(&peer->neterrs)) {
3507 rx_atomic_set(&peer->neterrs, 0);
3510 MUTEX_ENTER(&peer->peer_lock);
3511 peer->bytesReceived += np->length;
3512 MUTEX_EXIT(&peer->peer_lock);
3516 if (np->header.type == RX_PACKET_TYPE_VERSION) {
3517 return rxi_ReceiveVersionPacket(np, socket, host, port, 1);
3520 if (np->header.type == RX_PACKET_TYPE_DEBUG) {
3521 return rxi_ReceiveDebugPacket(np, socket, host, port, 1);
3524 /* If an input tracer function is defined, call it with the packet and
3525 * network address. Note this function may modify its arguments. */
3526 if (rx_justReceived) {
3527 struct sockaddr_in addr;
3529 addr.sin_family = AF_INET;
3530 addr.sin_port = port;
3531 addr.sin_addr.s_addr = host;
3532 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
3533 addr.sin_len = sizeof(addr);
3534 #endif /* AFS_OSF_ENV */
3535 drop = (*rx_justReceived) (np, &addr);
3536 /* drop packet if return value is non-zero */
3539 port = addr.sin_port; /* in case fcn changed addr */
3540 host = addr.sin_addr.s_addr;
3544 /* If packet was not sent by the client, then *we* must be the client */
3545 type = ((np->header.flags & RX_CLIENT_INITIATED) != RX_CLIENT_INITIATED)
3546 ? RX_CLIENT_CONNECTION : RX_SERVER_CONNECTION;
3548 /* Find the connection (or fabricate one, if we're the server & if
3549 * necessary) associated with this packet */
3551 rxi_FindConnection(socket, host, port, np->header.serviceId,
3552 np->header.cid, np->header.epoch, type,
3553 np->header.securityIndex);
3555 /* To avoid having 2 connections just abort at each other,
3556 don't abort an abort. */
3558 if (np->header.type != RX_PACKET_TYPE_ABORT)
3559 rxi_SendRawAbort(socket, host, port, RX_INVALID_OPERATION,
3564 #ifdef AFS_RXERRQ_ENV
3565 if (rx_atomic_read(&conn->peer->neterrs)) {
3566 rx_atomic_set(&conn->peer->neterrs, 0);
3570 /* If we're doing statistics, then account for the incoming packet */
3571 if (rx_stats_active) {
3572 MUTEX_ENTER(&conn->peer->peer_lock);
3573 conn->peer->bytesReceived += np->length;
3574 MUTEX_EXIT(&conn->peer->peer_lock);
3577 /* If the connection is in an error state, send an abort packet and ignore
3578 * the incoming packet */
3580 /* Don't respond to an abort packet--we don't want loops! */
3581 MUTEX_ENTER(&conn->conn_data_lock);
3582 if (np->header.type != RX_PACKET_TYPE_ABORT)
3583 np = rxi_SendConnectionAbort(conn, np, 1, 0);
3584 putConnection(conn);
3585 MUTEX_EXIT(&conn->conn_data_lock);
3589 /* Check for connection-only requests (i.e. not call specific). */
3590 if (np->header.callNumber == 0) {
3591 switch (np->header.type) {
3592 case RX_PACKET_TYPE_ABORT: {
3593 /* What if the supplied error is zero? */
3594 afs_int32 errcode = ntohl(rx_GetInt32(np, 0));
3595 dpf(("rxi_ReceivePacket ABORT rx_GetInt32 = %d\n", errcode));
3596 rxi_ConnectionError(conn, errcode);
3597 putConnection(conn);
3600 case RX_PACKET_TYPE_CHALLENGE:
3601 tnp = rxi_ReceiveChallengePacket(conn, np, 1);
3602 putConnection(conn);
3604 case RX_PACKET_TYPE_RESPONSE:
3605 tnp = rxi_ReceiveResponsePacket(conn, np, 1);
3606 putConnection(conn);
3608 case RX_PACKET_TYPE_PARAMS:
3609 case RX_PACKET_TYPE_PARAMS + 1:
3610 case RX_PACKET_TYPE_PARAMS + 2:
3611 /* ignore these packet types for now */
3612 putConnection(conn);
3616 /* Should not reach here, unless the peer is broken: send an
3618 rxi_ConnectionError(conn, RX_PROTOCOL_ERROR);
3619 MUTEX_ENTER(&conn->conn_data_lock);
3620 tnp = rxi_SendConnectionAbort(conn, np, 1, 0);
3621 putConnection(conn);
3622 MUTEX_EXIT(&conn->conn_data_lock);
3627 if (type == RX_SERVER_CONNECTION)
3628 call = rxi_ReceiveServerCall(socket, np, conn);
3630 call = rxi_ReceiveClientCall(np, conn);
3633 putConnection(conn);
3637 MUTEX_ASSERT(&call->lock);
3638 /* Set remote user defined status from packet */
3639 call->remoteStatus = np->header.userStatus;
3641 /* Now do packet type-specific processing */
3642 switch (np->header.type) {
3643 case RX_PACKET_TYPE_DATA:
3644 /* If we're a client, and receiving a response, then all the packets
3645 * we transmitted packets are implicitly acknowledged. */
3646 if (type == RX_CLIENT_CONNECTION && !opr_queue_IsEmpty(&call->tq))
3647 rxi_AckAllInTransmitQueue(call);
3649 np = rxi_ReceiveDataPacket(call, np, 1, socket, host, port, tnop,
3652 case RX_PACKET_TYPE_ACK:
3653 /* Respond immediately to ack packets requesting acknowledgement
3655 if (np->header.flags & RX_REQUEST_ACK) {
3657 (void)rxi_SendCallAbort(call, 0, 1, 0);
3659 (void)rxi_SendAck(call, 0, np->header.serial,
3660 RX_ACK_PING_RESPONSE, 1);
3662 np = rxi_ReceiveAckPacket(call, np, 1);
3664 case RX_PACKET_TYPE_ABORT: {
3665 /* An abort packet: reset the call, passing the error up to the user. */
3666 /* What if error is zero? */
3667 /* What if the error is -1? the application will treat it as a timeout. */
3668 afs_int32 errdata = ntohl(*(afs_int32 *) rx_DataOf(np));
3669 dpf(("rxi_ReceivePacket ABORT rx_DataOf = %d\n", errdata));
3670 rxi_CallError(call, errdata);
3671 MUTEX_EXIT(&call->lock);
3672 putConnection(conn);
3673 return np; /* xmitting; drop packet */
3675 case RX_PACKET_TYPE_BUSY: {
3676 struct clock busyTime;
3678 clock_GetTime(&busyTime);
3680 MUTEX_EXIT(&call->lock);
3682 MUTEX_ENTER(&conn->conn_call_lock);
3683 MUTEX_ENTER(&call->lock);
3684 conn->lastBusy[call->channel] = busyTime.sec;
3685 call->flags |= RX_CALL_PEER_BUSY;
3686 MUTEX_EXIT(&call->lock);
3687 MUTEX_EXIT(&conn->conn_call_lock);
3689 putConnection(conn);
3693 case RX_PACKET_TYPE_ACKALL:
3694 /* All packets acknowledged, so we can drop all packets previously
3695 * readied for sending */
3696 rxi_AckAllInTransmitQueue(call);
3699 /* Should not reach here, unless the peer is broken: send an abort
3701 rxi_CallError(call, RX_PROTOCOL_ERROR);
3702 np = rxi_SendCallAbort(call, np, 1, 0);
3705 /* Note when this last legitimate packet was received, for keep-alive
3706 * processing. Note, we delay getting the time until now in the hope that
3707 * the packet will be delivered to the user before any get time is required
3708 * (if not, then the time won't actually be re-evaluated here). */
3709 call->lastReceiveTime = clock_Sec();
3710 /* we've received a legit packet, so the channel is not busy */
3711 call->flags &= ~RX_CALL_PEER_BUSY;
3712 MUTEX_EXIT(&call->lock);
3713 putConnection(conn);
3717 /* return true if this is an "interesting" connection from the point of view
3718 of someone trying to debug the system */
3720 rxi_IsConnInteresting(struct rx_connection *aconn)
3723 struct rx_call *tcall;
3725 if (aconn->flags & (RX_CONN_MAKECALL_WAITING | RX_CONN_DESTROY_ME))
3728 for (i = 0; i < RX_MAXCALLS; i++) {
3729 tcall = aconn->call[i];
3731 if ((tcall->state == RX_STATE_PRECALL)
3732 || (tcall->state == RX_STATE_ACTIVE))
3734 if ((tcall->app.mode == RX_MODE_SENDING)
3735 || (tcall->app.mode == RX_MODE_RECEIVING))
3743 /* if this is one of the last few packets AND it wouldn't be used by the
3744 receiving call to immediately satisfy a read request, then drop it on
3745 the floor, since accepting it might prevent a lock-holding thread from
3746 making progress in its reading. If a call has been cleared while in
3747 the precall state then ignore all subsequent packets until the call
3748 is assigned to a thread. */
3751 TooLow(struct rx_packet *ap, struct rx_call *acall)
3755 MUTEX_ENTER(&rx_quota_mutex);
3756 if (((ap->header.seq != 1) && (acall->flags & RX_CALL_CLEARED)
3757 && (acall->state == RX_STATE_PRECALL))
3758 || ((rx_nFreePackets < rxi_dataQuota + 2)
3759 && !((ap->header.seq < acall->rnext + rx_initSendWindow)
3760 && (acall->flags & RX_CALL_READER_WAIT)))) {
3763 MUTEX_EXIT(&rx_quota_mutex);
3769 * Clear the attach wait flag on a connection and proceed.
3771 * Any processing waiting for a connection to be attached should be
3772 * unblocked. We clear the flag and do any other needed tasks.
3775 * the conn to unmark waiting for attach
3777 * @pre conn's conn_data_lock must be locked before calling this function
3781 rxi_ConnClearAttachWait(struct rx_connection *conn)
3783 /* Indicate that rxi_CheckReachEvent is no longer running by
3784 * clearing the flag. Must be atomic under conn_data_lock to
3785 * avoid a new call slipping by: rxi_CheckConnReach holds
3786 * conn_data_lock while checking RX_CONN_ATTACHWAIT.
3788 conn->flags &= ~RX_CONN_ATTACHWAIT;
3789 if (conn->flags & RX_CONN_NAT_PING) {
3790 conn->flags &= ~RX_CONN_NAT_PING;
3791 rxi_ScheduleNatKeepAliveEvent(conn);
3796 rxi_CheckReachEvent(struct rxevent *event, void *arg1, void *arg2, int dummy)
3798 struct rx_connection *conn = arg1;
3799 struct rx_call *acall = arg2;
3800 struct rx_call *call = acall;
3801 struct clock when, now;
3804 MUTEX_ENTER(&conn->conn_data_lock);
3807 rxevent_Put(&conn->checkReachEvent);
3809 waiting = conn->flags & RX_CONN_ATTACHWAIT;
3811 putConnection(conn);
3813 MUTEX_EXIT(&conn->conn_data_lock);
3817 MUTEX_ENTER(&conn->conn_call_lock);
3818 MUTEX_ENTER(&conn->conn_data_lock);
3819 for (i = 0; i < RX_MAXCALLS; i++) {
3820 struct rx_call *tc = conn->call[i];
3821 if (tc && tc->state == RX_STATE_PRECALL) {
3827 rxi_ConnClearAttachWait(conn);
3828 MUTEX_EXIT(&conn->conn_data_lock);
3829 MUTEX_EXIT(&conn->conn_call_lock);
3834 MUTEX_ENTER(&call->lock);
3835 rxi_SendAck(call, NULL, 0, RX_ACK_PING, 0);
3837 MUTEX_EXIT(&call->lock);
3839 clock_GetTime(&now);
3841 when.sec += RX_CHECKREACH_TIMEOUT;
3842 MUTEX_ENTER(&conn->conn_data_lock);
3843 if (!conn->checkReachEvent) {
3844 MUTEX_ENTER(&rx_refcnt_mutex);
3846 MUTEX_EXIT(&rx_refcnt_mutex);
3847 conn->checkReachEvent = rxevent_Post(&when, &now,
3848 rxi_CheckReachEvent, conn,
3851 MUTEX_EXIT(&conn->conn_data_lock);
3857 rxi_CheckConnReach(struct rx_connection *conn, struct rx_call *call)
3859 struct rx_service *service = conn->service;
3860 struct rx_peer *peer = conn->peer;
3861 afs_uint32 now, lastReach;
3863 if (service->checkReach == 0)
3867 MUTEX_ENTER(&peer->peer_lock);
3868 lastReach = peer->lastReachTime;
3869 MUTEX_EXIT(&peer->peer_lock);
3870 if (now - lastReach < RX_CHECKREACH_TTL)
3873 MUTEX_ENTER(&conn->conn_data_lock);
3874 if (conn->flags & RX_CONN_ATTACHWAIT) {
3875 MUTEX_EXIT(&conn->conn_data_lock);
3878 conn->flags |= RX_CONN_ATTACHWAIT;
3879 MUTEX_EXIT(&conn->conn_data_lock);
3880 if (!conn->checkReachEvent)
3881 rxi_CheckReachEvent(NULL, conn, call, 0);
3886 /* try to attach call, if authentication is complete */
3888 TryAttach(struct rx_call *acall, osi_socket socket,
3889 int *tnop, struct rx_call **newcallp,
3892 struct rx_connection *conn = acall->conn;
3894 if (conn->type == RX_SERVER_CONNECTION
3895 && acall->state == RX_STATE_PRECALL) {
3896 /* Don't attach until we have any req'd. authentication. */
3897 if (RXS_CheckAuthentication(conn->securityObject, conn) == 0) {
3898 if (reachOverride || rxi_CheckConnReach(conn, acall) == 0)
3899 rxi_AttachServerProc(acall, socket, tnop, newcallp);
3900 /* Note: this does not necessarily succeed; there
3901 * may not any proc available
3904 rxi_ChallengeOn(acall->conn);
3909 /* A data packet has been received off the interface. This packet is
3910 * appropriate to the call (the call is in the right state, etc.). This
3911 * routine can return a packet to the caller, for re-use */
3913 static struct rx_packet *
3914 rxi_ReceiveDataPacket(struct rx_call *call,
3915 struct rx_packet *np, int istack,
3916 osi_socket socket, afs_uint32 host, u_short port,
3917 int *tnop, struct rx_call **newcallp)
3919 int ackNeeded = 0; /* 0 means no, otherwise ack_reason */
3924 afs_uint32 serial=0, flags=0;
3926 struct rx_packet *tnp;
3927 if (rx_stats_active)
3928 rx_atomic_inc(&rx_stats.dataPacketsRead);
3931 /* If there are no packet buffers, drop this new packet, unless we can find
3932 * packet buffers from inactive calls */
3934 && (rxi_OverQuota(RX_PACKET_CLASS_RECEIVE) || TooLow(np, call))) {
3935 MUTEX_ENTER(&rx_freePktQ_lock);
3936 rxi_NeedMorePackets = TRUE;
3937 MUTEX_EXIT(&rx_freePktQ_lock);
3938 if (rx_stats_active)
3939 rx_atomic_inc(&rx_stats.noPacketBuffersOnRead);
3940 rxi_calltrace(RX_TRACE_DROP, call);
3941 dpf(("packet %"AFS_PTR_FMT" dropped on receipt - quota problems\n", np));
3942 /* We used to clear the receive queue here, in an attempt to free
3943 * packets. However this is unsafe if the queue has received a
3944 * soft ACK for the final packet */
3945 rxi_PostDelayedAckEvent(call, &rx_softAckDelay);
3951 * New in AFS 3.5, if the RX_JUMBO_PACKET flag is set then this
3952 * packet is one of several packets transmitted as a single
3953 * datagram. Do not send any soft or hard acks until all packets
3954 * in a jumbogram have been processed. Send negative acks right away.
3956 for (isFirst = 1, tnp = NULL; isFirst || tnp; isFirst = 0) {
3957 /* tnp is non-null when there are more packets in the
3958 * current jumbo gram */
3965 seq = np->header.seq;
3966 serial = np->header.serial;
3967 flags = np->header.flags;
3969 /* If the call is in an error state, send an abort message */
3971 return rxi_SendCallAbort(call, np, istack, 0);
3973 /* The RX_JUMBO_PACKET is set in all but the last packet in each
3974 * AFS 3.5 jumbogram. */
3975 if (flags & RX_JUMBO_PACKET) {
3976 tnp = rxi_SplitJumboPacket(np, host, port, isFirst);
3981 if (np->header.spare != 0) {
3982 MUTEX_ENTER(&call->conn->conn_data_lock);
3983 call->conn->flags |= RX_CONN_USING_PACKET_CKSUM;
3984 MUTEX_EXIT(&call->conn->conn_data_lock);
3987 /* The usual case is that this is the expected next packet */
3988 if (seq == call->rnext) {
3990 /* Check to make sure it is not a duplicate of one already queued */
3991 if (!opr_queue_IsEmpty(&call->rq)
3992 && opr_queue_First(&call->rq, struct rx_packet, entry)->header.seq == seq) {
3993 if (rx_stats_active)
3994 rx_atomic_inc(&rx_stats.dupPacketsRead);
3995 dpf(("packet %"AFS_PTR_FMT" dropped on receipt - duplicate\n", np));
3996 rxi_CancelDelayedAckEvent(call);
3997 np = rxi_SendAck(call, np, serial, RX_ACK_DUPLICATE, istack);
4003 /* It's the next packet. Stick it on the receive queue
4004 * for this call. Set newPackets to make sure we wake
4005 * the reader once all packets have been processed */
4006 #ifdef RX_TRACK_PACKETS
4007 np->flags |= RX_PKTFLAG_RQ;
4009 opr_queue_Prepend(&call->rq, &np->entry);
4010 #ifdef RXDEBUG_PACKET
4012 #endif /* RXDEBUG_PACKET */
4014 np = NULL; /* We can't use this anymore */
4017 /* If an ack is requested then set a flag to make sure we
4018 * send an acknowledgement for this packet */
4019 if (flags & RX_REQUEST_ACK) {
4020 ackNeeded = RX_ACK_REQUESTED;
4023 /* Keep track of whether we have received the last packet */
4024 if (flags & RX_LAST_PACKET) {
4025 call->flags |= RX_CALL_HAVE_LAST;
4029 /* Check whether we have all of the packets for this call */
4030 if (call->flags & RX_CALL_HAVE_LAST) {
4031 afs_uint32 tseq; /* temporary sequence number */
4032 struct opr_queue *cursor;
4034 for (tseq = seq, opr_queue_Scan(&call->rq, cursor)) {
4035 struct rx_packet *tp;
4037 tp = opr_queue_Entry(cursor, struct rx_packet, entry);
4038 if (tseq != tp->header.seq)
4040 if (tp->header.flags & RX_LAST_PACKET) {
4041 call->flags |= RX_CALL_RECEIVE_DONE;
4048 /* Provide asynchronous notification for those who want it
4049 * (e.g. multi rx) */
4050 if (call->arrivalProc) {
4051 (*call->arrivalProc) (call, call->arrivalProcHandle,
4052 call->arrivalProcArg);
4053 call->arrivalProc = (void (*)())0;
4056 /* Update last packet received */
4059 /* If there is no server process serving this call, grab
4060 * one, if available. We only need to do this once. If a
4061 * server thread is available, this thread becomes a server
4062 * thread and the server thread becomes a listener thread. */
4064 TryAttach(call, socket, tnop, newcallp, 0);
4067 /* This is not the expected next packet. */
4069 /* Determine whether this is a new or old packet, and if it's
4070 * a new one, whether it fits into the current receive window.
4071 * Also figure out whether the packet was delivered in sequence.
4072 * We use the prev variable to determine whether the new packet
4073 * is the successor of its immediate predecessor in the
4074 * receive queue, and the missing flag to determine whether
4075 * any of this packets predecessors are missing. */
4077 afs_uint32 prev; /* "Previous packet" sequence number */
4078 struct opr_queue *cursor;
4079 int missing; /* Are any predecessors missing? */
4081 /* If the new packet's sequence number has been sent to the
4082 * application already, then this is a duplicate */
4083 if (seq < call->rnext) {
4084 if (rx_stats_active)
4085 rx_atomic_inc(&rx_stats.dupPacketsRead);
4086 rxi_CancelDelayedAckEvent(call);
4087 np = rxi_SendAck(call, np, serial, RX_ACK_DUPLICATE, istack);
4093 /* If the sequence number is greater than what can be
4094 * accomodated by the current window, then send a negative
4095 * acknowledge and drop the packet */
4096 if ((call->rnext + call->rwind) <= seq) {
4097 rxi_CancelDelayedAckEvent(call);
4098 np = rxi_SendAck(call, np, serial, RX_ACK_EXCEEDS_WINDOW,
4105 /* Look for the packet in the queue of old received packets */
4106 prev = call->rnext - 1;
4108 for (opr_queue_Scan(&call->rq, cursor)) {
4109 struct rx_packet *tp
4110 = opr_queue_Entry(cursor, struct rx_packet, entry);
4112 /*Check for duplicate packet */
4113 if (seq == tp->header.seq) {
4114 if (rx_stats_active)
4115 rx_atomic_inc(&rx_stats.dupPacketsRead);
4116 rxi_CancelDelayedAckEvent(call);
4117 np = rxi_SendAck(call, np, serial, RX_ACK_DUPLICATE,
4123 /* If we find a higher sequence packet, break out and
4124 * insert the new packet here. */
4125 if (seq < tp->header.seq)
4127 /* Check for missing packet */
4128 if (tp->header.seq != prev + 1) {
4132 prev = tp->header.seq;
4135 /* Keep track of whether we have received the last packet. */
4136 if (flags & RX_LAST_PACKET) {
4137 call->flags |= RX_CALL_HAVE_LAST;
4140 /* It's within the window: add it to the the receive queue.
4141 * tp is left by the previous loop either pointing at the
4142 * packet before which to insert the new packet, or at the
4143 * queue head if the queue is empty or the packet should be
4145 #ifdef RX_TRACK_PACKETS
4146 np->flags |= RX_PKTFLAG_RQ;
4148 #ifdef RXDEBUG_PACKET
4150 #endif /* RXDEBUG_PACKET */
4151 opr_queue_InsertBefore(cursor, &np->entry);
4155 /* Check whether we have all of the packets for this call */
4156 if ((call->flags & RX_CALL_HAVE_LAST)
4157 && !(call->flags & RX_CALL_RECEIVE_DONE)) {
4158 afs_uint32 tseq; /* temporary sequence number */
4161 for (opr_queue_Scan(&call->rq, cursor)) {
4162 struct rx_packet *tp
4163 = opr_queue_Entry(cursor, struct rx_packet, entry);
4164 if (tseq != tp->header.seq)
4166 if (tp->header.flags & RX_LAST_PACKET) {
4167 call->flags |= RX_CALL_RECEIVE_DONE;
4174 /* We need to send an ack of the packet is out of sequence,
4175 * or if an ack was requested by the peer. */
4176 if (seq != prev + 1 || missing) {
4177 ackNeeded = RX_ACK_OUT_OF_SEQUENCE;
4178 } else if (flags & RX_REQUEST_ACK) {
4179 ackNeeded = RX_ACK_REQUESTED;
4182 /* Acknowledge the last packet for each call */
4183 if (flags & RX_LAST_PACKET) {
4194 * If the receiver is waiting for an iovec, fill the iovec
4195 * using the data from the receive queue */
4196 if (call->flags & RX_CALL_IOVEC_WAIT) {
4197 didHardAck = rxi_FillReadVec(call, serial);
4198 /* the call may have been aborted */
4207 /* Wakeup the reader if any */
4208 if ((call->flags & RX_CALL_READER_WAIT)
4209 && (!(call->flags & RX_CALL_IOVEC_WAIT) || !(call->iovNBytes)
4210 || (call->iovNext >= call->iovMax)
4211 || (call->flags & RX_CALL_RECEIVE_DONE))) {
4212 call->flags &= ~RX_CALL_READER_WAIT;
4213 #ifdef RX_ENABLE_LOCKS
4214 CV_BROADCAST(&call->cv_rq);
4216 osi_rxWakeup(&call->rq);
4222 * Send an ack when requested by the peer, or once every
4223 * rxi_SoftAckRate packets until the last packet has been
4224 * received. Always send a soft ack for the last packet in
4225 * the server's reply. */
4227 rxi_CancelDelayedAckEvent(call);
4228 np = rxi_SendAck(call, np, serial, ackNeeded, istack);
4229 } else if (call->nSoftAcks > (u_short) rxi_SoftAckRate) {
4230 rxi_CancelDelayedAckEvent(call);
4231 np = rxi_SendAck(call, np, serial, RX_ACK_IDLE, istack);
4232 } else if (call->nSoftAcks) {
4233 if (haveLast && !(flags & RX_CLIENT_INITIATED))
4234 rxi_PostDelayedAckEvent(call, &rx_lastAckDelay);
4236 rxi_PostDelayedAckEvent(call, &rx_softAckDelay);
4237 } else if (call->flags & RX_CALL_RECEIVE_DONE) {
4238 rxi_CancelDelayedAckEvent(call);
4245 rxi_UpdatePeerReach(struct rx_connection *conn, struct rx_call *acall)
4247 struct rx_peer *peer = conn->peer;
4249 MUTEX_ENTER(&peer->peer_lock);
4250 peer->lastReachTime = clock_Sec();
4251 MUTEX_EXIT(&peer->peer_lock);
4253 MUTEX_ENTER(&conn->conn_data_lock);
4254 if (conn->flags & RX_CONN_ATTACHWAIT) {
4257 rxi_ConnClearAttachWait(conn);
4258 MUTEX_EXIT(&conn->conn_data_lock);
4260 for (i = 0; i < RX_MAXCALLS; i++) {
4261 struct rx_call *call = conn->call[i];
4264 MUTEX_ENTER(&call->lock);
4265 /* tnop can be null if newcallp is null */
4266 TryAttach(call, (osi_socket) - 1, NULL, NULL, 1);
4268 MUTEX_EXIT(&call->lock);
4272 MUTEX_EXIT(&conn->conn_data_lock);
4275 #if defined(RXDEBUG) && defined(AFS_NT40_ENV)
4277 rx_ack_reason(int reason)
4280 case RX_ACK_REQUESTED:
4282 case RX_ACK_DUPLICATE:
4284 case RX_ACK_OUT_OF_SEQUENCE:
4286 case RX_ACK_EXCEEDS_WINDOW:
4288 case RX_ACK_NOSPACE:
4292 case RX_ACK_PING_RESPONSE:
4305 /* The real smarts of the whole thing. */
4306 static struct rx_packet *
4307 rxi_ReceiveAckPacket(struct rx_call *call, struct rx_packet *np,
4310 struct rx_ackPacket *ap;
4312 struct rx_packet *tp;
4313 struct rx_connection *conn = call->conn;
4314 struct rx_peer *peer = conn->peer;
4315 struct opr_queue *cursor;
4316 struct clock now; /* Current time, for RTT calculations */
4324 int newAckCount = 0;
4325 int maxDgramPackets = 0; /* Set if peer supports AFS 3.5 jumbo datagrams */
4326 int pktsize = 0; /* Set if we need to update the peer mtu */
4327 int conn_data_locked = 0;
4329 if (rx_stats_active)
4330 rx_atomic_inc(&rx_stats.ackPacketsRead);
4331 ap = (struct rx_ackPacket *)rx_DataOf(np);
4332 nbytes = rx_Contiguous(np) - (int)((ap->acks) - (u_char *) ap);
4334 return np; /* truncated ack packet */
4336 /* depends on ack packet struct */
4337 nAcks = MIN((unsigned)nbytes, (unsigned)ap->nAcks);
4338 first = ntohl(ap->firstPacket);
4339 prev = ntohl(ap->previousPacket);
4340 serial = ntohl(ap->serial);
4343 * Ignore ack packets received out of order while protecting
4344 * against peers that set the previousPacket field to a packet
4345 * serial number instead of a sequence number.
4347 if (first < call->tfirst ||
4348 (first == call->tfirst && prev < call->tprev && prev < call->tfirst
4355 if (np->header.flags & RX_SLOW_START_OK) {
4356 call->flags |= RX_CALL_SLOW_START_OK;
4359 if (ap->reason == RX_ACK_PING_RESPONSE)
4360 rxi_UpdatePeerReach(conn, call);
4362 if (conn->lastPacketSizeSeq) {
4363 MUTEX_ENTER(&conn->conn_data_lock);
4364 conn_data_locked = 1;
4365 if ((first > conn->lastPacketSizeSeq) && (conn->lastPacketSize)) {
4366 pktsize = conn->lastPacketSize;
4367 conn->lastPacketSize = conn->lastPacketSizeSeq = 0;
4370 if ((ap->reason == RX_ACK_PING_RESPONSE) && (conn->lastPingSizeSer)) {
4371 if (!conn_data_locked) {
4372 MUTEX_ENTER(&conn->conn_data_lock);
4373 conn_data_locked = 1;
4375 if ((conn->lastPingSizeSer == serial) && (conn->lastPingSize)) {
4376 /* process mtu ping ack */
4377 pktsize = conn->lastPingSize;
4378 conn->lastPingSizeSer = conn->lastPingSize = 0;
4382 if (conn_data_locked) {
4383 MUTEX_EXIT(&conn->conn_data_lock);
4384 conn_data_locked = 0;
4388 if (rxdebug_active) {
4392 len = _snprintf(msg, sizeof(msg),
4393 "tid[%d] RACK: reason %s serial %u previous %u seq %u first %u acks %u space %u ",
4394 GetCurrentThreadId(), rx_ack_reason(ap->reason),
4395 ntohl(ap->serial), ntohl(ap->previousPacket),
4396 (unsigned int)np->header.seq, ntohl(ap->firstPacket),
4397 ap->nAcks, ntohs(ap->bufferSpace) );
4401 for (offset = 0; offset < nAcks && len < sizeof(msg); offset++)
4402 msg[len++] = (ap->acks[offset] == RX_ACK_TYPE_NACK ? '-' : '*');
4406 OutputDebugString(msg);
4408 #else /* AFS_NT40_ENV */
4411 "RACK: reason %x previous %u seq %u serial %u first %u",
4412 ap->reason, ntohl(ap->previousPacket),
4413 (unsigned int)np->header.seq, (unsigned int)serial,
4414 ntohl(ap->firstPacket));
4417 for (offset = 0; offset < nAcks; offset++)
4418 putc(ap->acks[offset] == RX_ACK_TYPE_NACK ? '-' : '*',
4423 #endif /* AFS_NT40_ENV */
4426 MUTEX_ENTER(&peer->peer_lock);
4429 * Start somewhere. Can't assume we can send what we can receive,
4430 * but we are clearly receiving.
4432 if (!peer->maxPacketSize)
4433 peer->maxPacketSize = RX_MIN_PACKET_SIZE+RX_IPUDP_SIZE;
4435 if (pktsize > peer->maxPacketSize) {
4436 peer->maxPacketSize = pktsize;
4437 if ((pktsize-RX_IPUDP_SIZE > peer->ifMTU)) {
4438 peer->ifMTU=pktsize-RX_IPUDP_SIZE;
4439 peer->natMTU = rxi_AdjustIfMTU(peer->ifMTU);
4440 rxi_ScheduleGrowMTUEvent(call, 1);
4445 clock_GetTime(&now);
4447 /* The transmit queue splits into 4 sections.
4449 * The first section is packets which have now been acknowledged
4450 * by a window size change in the ack. These have reached the
4451 * application layer, and may be discarded. These are packets
4452 * with sequence numbers < ap->firstPacket.
4454 * The second section is packets which have sequence numbers in
4455 * the range ap->firstPacket to ap->firstPacket + ap->nAcks. The
4456 * contents of the packet's ack array determines whether these
4457 * packets are acknowledged or not.
4459 * The third section is packets which fall above the range
4460 * addressed in the ack packet. These have not yet been received
4463 * The four section is packets which have not yet been transmitted.
4464 * These packets will have a header.serial of 0.
4467 /* First section - implicitly acknowledged packets that can be
4471 tp = opr_queue_First(&call->tq, struct rx_packet, entry);
4472 while(!opr_queue_IsEnd(&call->tq, &tp->entry) && tp->header.seq < first) {
4473 struct rx_packet *next;
4475 next = opr_queue_Next(&tp->entry, struct rx_packet, entry);
4476 call->tfirst = tp->header.seq + 1;
4478 if (!(tp->flags & RX_PKTFLAG_ACKED)) {
4480 rxi_ComputeRoundTripTime(tp, ap, call, peer, &now);
4483 #ifdef RX_ENABLE_LOCKS
4484 /* XXX Hack. Because we have to release the global call lock when sending
4485 * packets (osi_NetSend) we drop all acks while we're traversing the tq
4486 * in rxi_Start sending packets out because packets may move to the
4487 * freePacketQueue as result of being here! So we drop these packets until
4488 * we're safely out of the traversing. Really ugly!
4489 * To make it even uglier, if we're using fine grain locking, we can
4490 * set the ack bits in the packets and have rxi_Start remove the packets
4491 * when it's done transmitting.
4493 if (call->flags & RX_CALL_TQ_BUSY) {
4494 tp->flags |= RX_PKTFLAG_ACKED;
4495 call->flags |= RX_CALL_TQ_SOME_ACKED;
4497 #endif /* RX_ENABLE_LOCKS */
4499 opr_queue_Remove(&tp->entry);
4500 #ifdef RX_TRACK_PACKETS
4501 tp->flags &= ~RX_PKTFLAG_TQ;
4503 #ifdef RXDEBUG_PACKET
4505 #endif /* RXDEBUG_PACKET */
4506 rxi_FreePacket(tp); /* rxi_FreePacket mustn't wake up anyone, preemptively. */
4511 /* N.B. we don't turn off any timers here. They'll go away by themselves, anyway */
4513 /* Second section of the queue - packets for which we are receiving
4516 * Go through the explicit acks/nacks and record the results in
4517 * the waiting packets. These are packets that can't be released
4518 * yet, even with a positive acknowledge. This positive
4519 * acknowledge only means the packet has been received by the
4520 * peer, not that it will be retained long enough to be sent to
4521 * the peer's upper level. In addition, reset the transmit timers
4522 * of any missing packets (those packets that must be missing
4523 * because this packet was out of sequence) */
4525 call->nSoftAcked = 0;
4527 while (!opr_queue_IsEnd(&call->tq, &tp->entry)
4528 && tp->header.seq < first + nAcks) {
4529 /* Set the acknowledge flag per packet based on the
4530 * information in the ack packet. An acknowlegded packet can
4531 * be downgraded when the server has discarded a packet it
4532 * soacked previously, or when an ack packet is received
4533 * out of sequence. */
4534 if (ap->acks[tp->header.seq - first] == RX_ACK_TYPE_ACK) {
4535 if (!(tp->flags & RX_PKTFLAG_ACKED)) {
4537 tp->flags |= RX_PKTFLAG_ACKED;
4538 rxi_ComputeRoundTripTime(tp, ap, call, peer, &now);
4545 } else /* RX_ACK_TYPE_NACK */ {
4546 tp->flags &= ~RX_PKTFLAG_ACKED;
4550 tp = opr_queue_Next(&tp->entry, struct rx_packet, entry);
4553 /* We don't need to take any action with the 3rd or 4th section in the
4554 * queue - they're not addressed by the contents of this ACK packet.
4557 /* If the window has been extended by this acknowledge packet,
4558 * then wakeup a sender waiting in alloc for window space, or try
4559 * sending packets now, if he's been sitting on packets due to
4560 * lack of window space */
4561 if (call->tnext < (call->tfirst + call->twind)) {
4562 #ifdef RX_ENABLE_LOCKS
4563 CV_SIGNAL(&call->cv_twind);
4565 if (call->flags & RX_CALL_WAIT_WINDOW_ALLOC) {
4566 call->flags &= ~RX_CALL_WAIT_WINDOW_ALLOC;
4567 osi_rxWakeup(&call->twind);
4570 if (call->flags & RX_CALL_WAIT_WINDOW_SEND) {
4571 call->flags &= ~RX_CALL_WAIT_WINDOW_SEND;
4575 /* if the ack packet has a receivelen field hanging off it,
4576 * update our state */
4577 if (np->length >= rx_AckDataSize(ap->nAcks) + 2 * sizeof(afs_int32)) {
4580 /* If the ack packet has a "recommended" size that is less than
4581 * what I am using now, reduce my size to match */
4582 rx_packetread(np, rx_AckDataSize(ap->nAcks) + (int)sizeof(afs_int32),
4583 (int)sizeof(afs_int32), &tSize);
4584 tSize = (afs_uint32) ntohl(tSize);
4585 peer->natMTU = rxi_AdjustIfMTU(MIN(tSize, peer->ifMTU));
4587 /* Get the maximum packet size to send to this peer */
4588 rx_packetread(np, rx_AckDataSize(ap->nAcks), (int)sizeof(afs_int32),
4590 tSize = (afs_uint32) ntohl(tSize);
4591 tSize = (afs_uint32) MIN(tSize, rx_MyMaxSendSize);
4592 tSize = rxi_AdjustMaxMTU(peer->natMTU, tSize);
4594 /* sanity check - peer might have restarted with different params.
4595 * If peer says "send less", dammit, send less... Peer should never
4596 * be unable to accept packets of the size that prior AFS versions would
4597 * send without asking. */
4598 if (peer->maxMTU != tSize) {
4599 if (peer->maxMTU > tSize) /* possible cong., maxMTU decreased */
4601 peer->maxMTU = tSize;
4602 peer->MTU = MIN(tSize, peer->MTU);
4603 call->MTU = MIN(call->MTU, tSize);
4606 if (np->length == rx_AckDataSize(ap->nAcks) + 3 * sizeof(afs_int32)) {
4609 rx_AckDataSize(ap->nAcks) + 2 * (int)sizeof(afs_int32),
4610 (int)sizeof(afs_int32), &tSize);
4611 tSize = (afs_uint32) ntohl(tSize); /* peer's receive window, if it's */
4612 if (tSize < call->twind) { /* smaller than our send */
4613 call->twind = tSize; /* window, we must send less... */
4614 call->ssthresh = MIN(call->twind, call->ssthresh);
4615 call->conn->twind[call->channel] = call->twind;
4618 /* Only send jumbograms to 3.4a fileservers. 3.3a RX gets the
4619 * network MTU confused with the loopback MTU. Calculate the
4620 * maximum MTU here for use in the slow start code below.
4622 /* Did peer restart with older RX version? */
4623 if (peer->maxDgramPackets > 1) {
4624 peer->maxDgramPackets = 1;
4626 } else if (np->length >=
4627 rx_AckDataSize(ap->nAcks) + 4 * sizeof(afs_int32)) {
4630 rx_AckDataSize(ap->nAcks) + 2 * (int)sizeof(afs_int32),
4631 sizeof(afs_int32), &tSize);
4632 tSize = (afs_uint32) ntohl(tSize);
4634 * As of AFS 3.5 we set the send window to match the receive window.
4636 if (tSize < call->twind) {
4637 call->twind = tSize;
4638 call->conn->twind[call->channel] = call->twind;
4639 call->ssthresh = MIN(call->twind, call->ssthresh);
4640 } else if (tSize > call->twind) {
4641 call->twind = tSize;
4642 call->conn->twind[call->channel] = call->twind;
4646 * As of AFS 3.5, a jumbogram is more than one fixed size
4647 * packet transmitted in a single UDP datagram. If the remote
4648 * MTU is smaller than our local MTU then never send a datagram
4649 * larger than the natural MTU.
4652 rx_AckDataSize(ap->nAcks) + 3 * (int)sizeof(afs_int32),
4653 (int)sizeof(afs_int32), &tSize);
4654 maxDgramPackets = (afs_uint32) ntohl(tSize);
4655 maxDgramPackets = MIN(maxDgramPackets, rxi_nDgramPackets);
4657 MIN(maxDgramPackets, (int)(peer->ifDgramPackets));
4658 if (maxDgramPackets > 1) {
4659 peer->maxDgramPackets = maxDgramPackets;
4660 call->MTU = RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE;
4662 peer->maxDgramPackets = 1;
4663 call->MTU = peer->natMTU;
4665 } else if (peer->maxDgramPackets > 1) {
4666 /* Restarted with lower version of RX */
4667 peer->maxDgramPackets = 1;
4669 } else if (peer->maxDgramPackets > 1
4670 || peer->maxMTU != OLD_MAX_PACKET_SIZE) {
4671 /* Restarted with lower version of RX */
4672 peer->maxMTU = OLD_MAX_PACKET_SIZE;
4673 peer->natMTU = OLD_MAX_PACKET_SIZE;
4674 peer->MTU = OLD_MAX_PACKET_SIZE;
4675 peer->maxDgramPackets = 1;
4676 peer->nDgramPackets = 1;
4678 call->MTU = OLD_MAX_PACKET_SIZE;
4683 * Calculate how many datagrams were successfully received after
4684 * the first missing packet and adjust the negative ack counter
4689 nNacked = (nNacked + call->nDgramPackets - 1) / call->nDgramPackets;
4690 if (call->nNacks < nNacked) {
4691 call->nNacks = nNacked;
4694 call->nAcks += newAckCount;
4698 /* If the packet contained new acknowledgements, rather than just
4699 * being a duplicate of one we have previously seen, then we can restart
4702 if (newAckCount > 0)
4703 rxi_rto_packet_acked(call, istack);
4705 if (call->flags & RX_CALL_FAST_RECOVER) {
4706 if (newAckCount == 0) {
4707 call->cwind = MIN((int)(call->cwind + 1), rx_maxSendWindow);
4709 call->flags &= ~RX_CALL_FAST_RECOVER;
4710 call->cwind = call->nextCwind;
4711 call->nextCwind = 0;
4714 call->nCwindAcks = 0;
4715 } else if (nNacked && call->nNacks >= (u_short) rx_nackThreshold) {
4716 /* Three negative acks in a row trigger congestion recovery */
4717 call->flags |= RX_CALL_FAST_RECOVER;
4718 call->ssthresh = MAX(4, MIN((int)call->cwind, (int)call->twind)) >> 1;
4720 MIN((int)(call->ssthresh + rx_nackThreshold), rx_maxSendWindow);
4721 call->nDgramPackets = MAX(2, (int)call->nDgramPackets) >> 1;
4722 call->nextCwind = call->ssthresh;
4725 peer->MTU = call->MTU;
4726 peer->cwind = call->nextCwind;
4727 peer->nDgramPackets = call->nDgramPackets;
4729 call->congestSeq = peer->congestSeq;
4731 /* Reset the resend times on the packets that were nacked
4732 * so we will retransmit as soon as the window permits
4736 for (opr_queue_ScanBackwards(&call->tq, cursor)) {
4737 struct rx_packet *tp =
4738 opr_queue_Entry(cursor, struct rx_packet, entry);
4740 if (!(tp->flags & RX_PKTFLAG_ACKED)) {
4741 tp->flags &= ~RX_PKTFLAG_SENT;
4743 } else if (tp->flags & RX_PKTFLAG_ACKED) {
4748 /* If cwind is smaller than ssthresh, then increase
4749 * the window one packet for each ack we receive (exponential
4751 * If cwind is greater than or equal to ssthresh then increase
4752 * the congestion window by one packet for each cwind acks we
4753 * receive (linear growth). */
4754 if (call->cwind < call->ssthresh) {
4756 MIN((int)call->ssthresh, (int)(call->cwind + newAckCount));
4757 call->nCwindAcks = 0;
4759 call->nCwindAcks += newAckCount;
4760 if (call->nCwindAcks >= call->cwind) {
4761 call->nCwindAcks = 0;
4762 call->cwind = MIN((int)(call->cwind + 1), rx_maxSendWindow);
4766 * If we have received several acknowledgements in a row then
4767 * it is time to increase the size of our datagrams
4769 if ((int)call->nAcks > rx_nDgramThreshold) {
4770 if (peer->maxDgramPackets > 1) {
4771 if (call->nDgramPackets < peer->maxDgramPackets) {
4772 call->nDgramPackets++;
4774 call->MTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE;
4775 } else if (call->MTU < peer->maxMTU) {
4776 /* don't upgrade if we can't handle it */
4777 if ((call->nDgramPackets == 1) && (call->MTU >= peer->ifMTU))
4778 call->MTU = peer->ifMTU;
4780 call->MTU += peer->natMTU;
4781 call->MTU = MIN(call->MTU, peer->maxMTU);
4788 MUTEX_EXIT(&peer->peer_lock); /* rxi_Start will lock peer. */
4790 /* Servers need to hold the call until all response packets have
4791 * been acknowledged. Soft acks are good enough since clients
4792 * are not allowed to clear their receive queues. */
4793 if (call->state == RX_STATE_HOLD
4794 && call->tfirst + call->nSoftAcked >= call->tnext) {
4795 call->state = RX_STATE_DALLY;
4796 rxi_ClearTransmitQueue(call, 0);
4797 rxi_CancelKeepAliveEvent(call);
4798 } else if (!opr_queue_IsEmpty(&call->tq)) {
4799 rxi_Start(call, istack);
4804 /* Received a response to a challenge packet */
4805 static struct rx_packet *
4806 rxi_ReceiveResponsePacket(struct rx_connection *conn,
4807 struct rx_packet *np, int istack)
4811 /* Ignore the packet if we're the client */
4812 if (conn->type == RX_CLIENT_CONNECTION)
4815 /* If already authenticated, ignore the packet (it's probably a retry) */
4816 if (RXS_CheckAuthentication(conn->securityObject, conn) == 0)
4819 /* Otherwise, have the security object evaluate the response packet */
4820 error = RXS_CheckResponse(conn->securityObject, conn, np);
4822 /* If the response is invalid, reset the connection, sending
4823 * an abort to the peer */
4827 rxi_ConnectionError(conn, error);
4828 MUTEX_ENTER(&conn->conn_data_lock);
4829 np = rxi_SendConnectionAbort(conn, np, istack, 0);
4830 MUTEX_EXIT(&conn->conn_data_lock);
4833 /* If the response is valid, any calls waiting to attach
4834 * servers can now do so */
4837 for (i = 0; i < RX_MAXCALLS; i++) {
4838 struct rx_call *call = conn->call[i];
4840 MUTEX_ENTER(&call->lock);
4841 if (call->state == RX_STATE_PRECALL)
4842 rxi_AttachServerProc(call, (osi_socket) - 1, NULL, NULL);
4843 /* tnop can be null if newcallp is null */
4844 MUTEX_EXIT(&call->lock);
4848 /* Update the peer reachability information, just in case
4849 * some calls went into attach-wait while we were waiting
4850 * for authentication..
4852 rxi_UpdatePeerReach(conn, NULL);
4857 /* A client has received an authentication challenge: the security
4858 * object is asked to cough up a respectable response packet to send
4859 * back to the server. The server is responsible for retrying the
4860 * challenge if it fails to get a response. */
4862 static struct rx_packet *
4863 rxi_ReceiveChallengePacket(struct rx_connection *conn,
4864 struct rx_packet *np, int istack)
4868 /* Ignore the challenge if we're the server */
4869 if (conn->type == RX_SERVER_CONNECTION)
4872 /* Ignore the challenge if the connection is otherwise idle; someone's
4873 * trying to use us as an oracle. */
4874 if (!rxi_HasActiveCalls(conn))
4877 /* Send the security object the challenge packet. It is expected to fill
4878 * in the response. */
4879 error = RXS_GetResponse(conn->securityObject, conn, np);
4881 /* If the security object is unable to return a valid response, reset the
4882 * connection and send an abort to the peer. Otherwise send the response
4883 * packet to the peer connection. */
4885 rxi_ConnectionError(conn, error);
4886 MUTEX_ENTER(&conn->conn_data_lock);
4887 np = rxi_SendConnectionAbort(conn, np, istack, 0);
4888 MUTEX_EXIT(&conn->conn_data_lock);
4890 np = rxi_SendSpecial((struct rx_call *)0, conn, np,
4891 RX_PACKET_TYPE_RESPONSE, NULL, -1, istack);
4897 /* Find an available server process to service the current request in
4898 * the given call structure. If one isn't available, queue up this
4899 * call so it eventually gets one */
4901 rxi_AttachServerProc(struct rx_call *call,
4902 osi_socket socket, int *tnop,
4903 struct rx_call **newcallp)
4905 struct rx_serverQueueEntry *sq;
4906 struct rx_service *service = call->conn->service;
4909 /* May already be attached */
4910 if (call->state == RX_STATE_ACTIVE)
4913 MUTEX_ENTER(&rx_serverPool_lock);
4915 haveQuota = QuotaOK(service);
4916 if ((!haveQuota) || opr_queue_IsEmpty(&rx_idleServerQueue)) {
4917 /* If there are no processes available to service this call,
4918 * put the call on the incoming call queue (unless it's
4919 * already on the queue).
4921 #ifdef RX_ENABLE_LOCKS
4923 ReturnToServerPool(service);
4924 #endif /* RX_ENABLE_LOCKS */
4926 if (!(call->flags & RX_CALL_WAIT_PROC)) {
4927 call->flags |= RX_CALL_WAIT_PROC;
4928 rx_atomic_inc(&rx_nWaiting);
4929 rx_atomic_inc(&rx_nWaited);
4930 rxi_calltrace(RX_CALL_ARRIVAL, call);
4931 SET_CALL_QUEUE_LOCK(call, &rx_serverPool_lock);
4932 opr_queue_Append(&rx_incomingCallQueue, &call->entry);
4935 sq = opr_queue_Last(&rx_idleServerQueue,
4936 struct rx_serverQueueEntry, entry);
4938 /* If hot threads are enabled, and both newcallp and sq->socketp
4939 * are non-null, then this thread will process the call, and the
4940 * idle server thread will start listening on this threads socket.
4942 opr_queue_Remove(&sq->entry);
4944 if (rx_enable_hot_thread && newcallp && sq->socketp) {
4947 *sq->socketp = socket;
4948 clock_GetTime(&call->startTime);
4949 CALL_HOLD(call, RX_CALL_REFCOUNT_BEGIN);
4953 if (call->flags & RX_CALL_WAIT_PROC) {
4954 /* Conservative: I don't think this should happen */
4955 call->flags &= ~RX_CALL_WAIT_PROC;
4956 rx_atomic_dec(&rx_nWaiting);
4957 if (opr_queue_IsOnQueue(&call->entry)) {
4958 opr_queue_Remove(&call->entry);
4961 call->state = RX_STATE_ACTIVE;
4962 call->app.mode = RX_MODE_RECEIVING;
4963 #ifdef RX_KERNEL_TRACE
4965 int glockOwner = ISAFS_GLOCK();
4968 afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
4969 __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER,
4975 if (call->flags & RX_CALL_CLEARED) {
4976 /* send an ack now to start the packet flow up again */
4977 call->flags &= ~RX_CALL_CLEARED;
4978 rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
4980 #ifdef RX_ENABLE_LOCKS
4983 service->nRequestsRunning++;
4984 MUTEX_ENTER(&rx_quota_mutex);
4985 if (service->nRequestsRunning <= service->minProcs)
4988 MUTEX_EXIT(&rx_quota_mutex);
4992 MUTEX_EXIT(&rx_serverPool_lock);
4995 /* Delay the sending of an acknowledge event for a short while, while
4996 * a new call is being prepared (in the case of a client) or a reply
4997 * is being prepared (in the case of a server). Rather than sending
4998 * an ack packet, an ACKALL packet is sent. */
5000 rxi_AckAll(struct rx_call *call)
5002 rxi_SendSpecial(call, call->conn, NULL, RX_PACKET_TYPE_ACKALL,
5004 call->flags |= RX_CALL_ACKALL_SENT;
5008 rxi_SendDelayedAck(struct rxevent *event, void *arg1, void *unused1,
5011 struct rx_call *call = arg1;
5012 #ifdef RX_ENABLE_LOCKS
5014 MUTEX_ENTER(&call->lock);
5015 if (event == call->delayedAckEvent)
5016 rxevent_Put(&call->delayedAckEvent);
5017 CALL_RELE(call, RX_CALL_REFCOUNT_DELAY);
5019 (void)rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
5021 MUTEX_EXIT(&call->lock);
5022 #else /* RX_ENABLE_LOCKS */
5024 rxevent_Put(&call->delayedAckEvent);
5025 (void)rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
5026 #endif /* RX_ENABLE_LOCKS */
5029 #ifdef RX_ENABLE_LOCKS
5030 /* Set ack in all packets in transmit queue. rxi_Start will deal with
5031 * clearing them out.
5034 rxi_SetAcksInTransmitQueue(struct rx_call *call)
5036 struct opr_queue *cursor;
5039 for (opr_queue_Scan(&call->tq, cursor)) {
5041 = opr_queue_Entry(cursor, struct rx_packet, entry);
5043 p->flags |= RX_PKTFLAG_ACKED;
5048 call->flags |= RX_CALL_TQ_CLEARME;
5049 call->flags |= RX_CALL_TQ_SOME_ACKED;
5052 rxi_rto_cancel(call);
5054 call->tfirst = call->tnext;
5055 call->nSoftAcked = 0;
5057 if (call->flags & RX_CALL_FAST_RECOVER) {
5058 call->flags &= ~RX_CALL_FAST_RECOVER;
5059 call->cwind = call->nextCwind;
5060 call->nextCwind = 0;
5063 CV_SIGNAL(&call->cv_twind);
5065 #endif /* RX_ENABLE_LOCKS */
5068 * Acknowledge the whole transmit queue.
5070 * If we're running without locks, or the transmit queue isn't busy, then
5071 * we can just clear the queue now. Otherwise, we have to mark all of the
5072 * packets as acknowledged, and let rxi_Start clear it later on
5075 rxi_AckAllInTransmitQueue(struct rx_call *call)
5077 #ifdef RX_ENABLE_LOCKS
5078 if (call->flags & RX_CALL_TQ_BUSY) {
5079 rxi_SetAcksInTransmitQueue(call);
5083 rxi_ClearTransmitQueue(call, 0);
5085 /* Clear out the transmit queue for the current call (all packets have
5086 * been received by peer) */
5088 rxi_ClearTransmitQueue(struct rx_call *call, int force)
5090 #ifdef RX_ENABLE_LOCKS
5091 struct opr_queue *cursor;
5092 if (!force && (call->flags & RX_CALL_TQ_BUSY)) {
5094 for (opr_queue_Scan(&call->tq, cursor)) {
5096 = opr_queue_Entry(cursor, struct rx_packet, entry);
5098 p->flags |= RX_PKTFLAG_ACKED;
5102 call->flags |= RX_CALL_TQ_CLEARME;
5103 call->flags |= RX_CALL_TQ_SOME_ACKED;
5106 #endif /* RX_ENABLE_LOCKS */
5107 #ifdef RXDEBUG_PACKET
5109 #endif /* RXDEBUG_PACKET */
5110 rxi_FreePackets(0, &call->tq);
5111 rxi_WakeUpTransmitQueue(call);
5112 #ifdef RX_ENABLE_LOCKS
5113 call->flags &= ~RX_CALL_TQ_CLEARME;
5117 rxi_rto_cancel(call);
5118 call->tfirst = call->tnext; /* implicitly acknowledge all data already sent */
5119 call->nSoftAcked = 0;
5121 if (call->flags & RX_CALL_FAST_RECOVER) {
5122 call->flags &= ~RX_CALL_FAST_RECOVER;
5123 call->cwind = call->nextCwind;
5125 #ifdef RX_ENABLE_LOCKS
5126 CV_SIGNAL(&call->cv_twind);
5128 osi_rxWakeup(&call->twind);
5133 rxi_ClearReceiveQueue(struct rx_call *call)
5135 if (!opr_queue_IsEmpty(&call->rq)) {
5138 count = rxi_FreePackets(0, &call->rq);
5139 rx_packetReclaims += count;
5140 #ifdef RXDEBUG_PACKET
5142 if ( call->rqc != 0 )
5143 dpf(("rxi_ClearReceiveQueue call %"AFS_PTR_FMT" rqc %u != 0\n", call, call->rqc));
5145 call->flags &= ~(RX_CALL_RECEIVE_DONE | RX_CALL_HAVE_LAST);
5147 if (call->state == RX_STATE_PRECALL) {
5148 call->flags |= RX_CALL_CLEARED;
5152 /* Send an abort packet for the specified call */
5153 static struct rx_packet *
5154 rxi_SendCallAbort(struct rx_call *call, struct rx_packet *packet,
5155 int istack, int force)
5157 afs_int32 error, cerror;
5158 struct clock when, now;
5163 switch (call->error) {
5166 cerror = RX_CALL_TIMEOUT;
5169 cerror = call->error;
5172 /* Clients should never delay abort messages */
5173 if (rx_IsClientConn(call->conn))
5176 if (call->abortCode != cerror) {
5177 call->abortCode = cerror;
5178 call->abortCount = 0;
5181 if (force || rxi_callAbortThreshhold == 0
5182 || call->abortCount < rxi_callAbortThreshhold) {
5183 rxi_CancelDelayedAbortEvent(call);
5184 error = htonl(cerror);
5187 rxi_SendSpecial(call, call->conn, packet, RX_PACKET_TYPE_ABORT,
5188 (char *)&error, sizeof(error), istack);
5189 } else if (!call->delayedAbortEvent) {
5190 clock_GetTime(&now);
5192 clock_Addmsec(&when, rxi_callAbortDelay);
5193 CALL_HOLD(call, RX_CALL_REFCOUNT_ABORT);
5194 call->delayedAbortEvent =
5195 rxevent_Post(&when, &now, rxi_SendDelayedCallAbort, call, 0, 0);
5201 rxi_CancelDelayedAbortEvent(struct rx_call *call)
5203 if (call->delayedAbortEvent) {
5204 rxevent_Cancel(&call->delayedAbortEvent);
5205 CALL_RELE(call, RX_CALL_REFCOUNT_ABORT);
5209 /* Send an abort packet for the specified connection. Packet is an
5210 * optional pointer to a packet that can be used to send the abort.
5211 * Once the number of abort messages reaches the threshhold, an
5212 * event is scheduled to send the abort. Setting the force flag
5213 * overrides sending delayed abort messages.
5215 * NOTE: Called with conn_data_lock held. conn_data_lock is dropped
5216 * to send the abort packet.
5219 rxi_SendConnectionAbort(struct rx_connection *conn,
5220 struct rx_packet *packet, int istack, int force)
5223 struct clock when, now;
5228 /* Clients should never delay abort messages */
5229 if (rx_IsClientConn(conn))
5232 if (force || rxi_connAbortThreshhold == 0
5233 || conn->abortCount < rxi_connAbortThreshhold) {
5235 rxevent_Cancel(&conn->delayedAbortEvent);
5236 error = htonl(conn->error);
5238 MUTEX_EXIT(&conn->conn_data_lock);
5240 rxi_SendSpecial((struct rx_call *)0, conn, packet,
5241 RX_PACKET_TYPE_ABORT, (char *)&error,
5242 sizeof(error), istack);
5243 MUTEX_ENTER(&conn->conn_data_lock);
5244 } else if (!conn->delayedAbortEvent) {
5245 clock_GetTime(&now);
5247 clock_Addmsec(&when, rxi_connAbortDelay);
5248 conn->delayedAbortEvent =
5249 rxevent_Post(&when, &now, rxi_SendDelayedConnAbort, conn, NULL, 0);
5254 /* Associate an error all of the calls owned by a connection. Called
5255 * with error non-zero. This is only for really fatal things, like
5256 * bad authentication responses. The connection itself is set in
5257 * error at this point, so that future packets received will be
5260 rxi_ConnectionError(struct rx_connection *conn,
5266 dpf(("rxi_ConnectionError conn %"AFS_PTR_FMT" error %d\n", conn, error));
5268 MUTEX_ENTER(&conn->conn_data_lock);
5269 rxevent_Cancel(&conn->challengeEvent);
5270 rxevent_Cancel(&conn->natKeepAliveEvent);
5271 if (conn->checkReachEvent) {
5272 rxevent_Cancel(&conn->checkReachEvent);
5273 conn->flags &= ~(RX_CONN_ATTACHWAIT|RX_CONN_NAT_PING);
5274 putConnection(conn);
5276 MUTEX_EXIT(&conn->conn_data_lock);
5277 for (i = 0; i < RX_MAXCALLS; i++) {
5278 struct rx_call *call = conn->call[i];
5280 MUTEX_ENTER(&call->lock);
5281 rxi_CallError(call, error);
5282 MUTEX_EXIT(&call->lock);
5285 conn->error = error;
5286 if (rx_stats_active)
5287 rx_atomic_inc(&rx_stats.fatalErrors);
5292 * Interrupt an in-progress call with the specified error and wakeup waiters.
5294 * @param[in] call The call to interrupt
5295 * @param[in] error The error code to send to the peer
5298 rx_InterruptCall(struct rx_call *call, afs_int32 error)
5300 MUTEX_ENTER(&call->lock);
5301 rxi_CallError(call, error);
5302 rxi_SendCallAbort(call, NULL, 0, 1);
5303 MUTEX_EXIT(&call->lock);
5307 rxi_CallError(struct rx_call *call, afs_int32 error)
5309 MUTEX_ASSERT(&call->lock);
5310 dpf(("rxi_CallError call %"AFS_PTR_FMT" error %d call->error %d\n", call, error, call->error));
5312 error = call->error;
5314 #ifdef RX_ENABLE_LOCKS
5315 if (!((call->flags & RX_CALL_TQ_BUSY) || (call->tqWaiters > 0))) {
5316 rxi_ResetCall(call, 0);
5319 rxi_ResetCall(call, 0);
5321 call->error = error;
5324 /* Reset various fields in a call structure, and wakeup waiting
5325 * processes. Some fields aren't changed: state & mode are not
5326 * touched (these must be set by the caller), and bufptr, nLeft, and
5327 * nFree are not reset, since these fields are manipulated by
5328 * unprotected macros, and may only be reset by non-interrupting code.
5332 rxi_ResetCall(struct rx_call *call, int newcall)
5335 struct rx_peer *peer;
5336 struct rx_packet *packet;
5338 MUTEX_ASSERT(&call->lock);
5339 dpf(("rxi_ResetCall(call %"AFS_PTR_FMT", newcall %d)\n", call, newcall));
5341 /* Notify anyone who is waiting for asynchronous packet arrival */
5342 if (call->arrivalProc) {
5343 (*call->arrivalProc) (call, call->arrivalProcHandle,
5344 call->arrivalProcArg);
5345 call->arrivalProc = (void (*)())0;
5349 rxi_CancelGrowMTUEvent(call);
5351 if (call->delayedAbortEvent) {
5352 rxi_CancelDelayedAbortEvent(call);
5353 packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
5355 rxi_SendCallAbort(call, packet, 0, 1);
5356 rxi_FreePacket(packet);
5361 * Update the peer with the congestion information in this call
5362 * so other calls on this connection can pick up where this call
5363 * left off. If the congestion sequence numbers don't match then
5364 * another call experienced a retransmission.
5366 peer = call->conn->peer;
5367 MUTEX_ENTER(&peer->peer_lock);
5369 if (call->congestSeq == peer->congestSeq) {
5370 peer->cwind = MAX(peer->cwind, call->cwind);
5371 peer->MTU = MAX(peer->MTU, call->MTU);
5372 peer->nDgramPackets =
5373 MAX(peer->nDgramPackets, call->nDgramPackets);
5376 call->abortCode = 0;
5377 call->abortCount = 0;
5379 if (peer->maxDgramPackets > 1) {
5380 call->MTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE;
5382 call->MTU = peer->MTU;
5384 call->cwind = MIN((int)peer->cwind, (int)peer->nDgramPackets);
5385 call->ssthresh = rx_maxSendWindow;
5386 call->nDgramPackets = peer->nDgramPackets;
5387 call->congestSeq = peer->congestSeq;
5388 call->rtt = peer->rtt;
5389 call->rtt_dev = peer->rtt_dev;
5390 clock_Zero(&call->rto);
5391 clock_Addmsec(&call->rto,
5392 MAX(((call->rtt >> 3) + call->rtt_dev), rx_minPeerTimeout) + 200);
5393 MUTEX_EXIT(&peer->peer_lock);
5395 flags = call->flags;
5396 rxi_WaitforTQBusy(call);
5398 rxi_ClearTransmitQueue(call, 1);
5399 if (call->tqWaiters || (flags & RX_CALL_TQ_WAIT)) {
5400 dpf(("rcall %"AFS_PTR_FMT" has %d waiters and flags %d\n", call, call->tqWaiters, call->flags));
5404 if (!newcall && (flags & RX_CALL_PEER_BUSY)) {
5405 /* The call channel is still busy; resetting the call doesn't change
5406 * that. However, if 'newcall' is set, we are processing a call
5407 * structure that has either been recycled from the free list, or has
5408 * been newly allocated. So, RX_CALL_PEER_BUSY is not relevant if
5409 * 'newcall' is set, since it describes a completely different call
5410 * channel which we do not care about. */
5411 call->flags |= RX_CALL_PEER_BUSY;
5414 rxi_ClearReceiveQueue(call);
5415 /* why init the queue if you just emptied it? queue_Init(&call->rq); */
5419 call->twind = call->conn->twind[call->channel];
5420 call->rwind = call->conn->rwind[call->channel];
5421 call->nSoftAcked = 0;
5422 call->nextCwind = 0;
5425 call->nCwindAcks = 0;
5426 call->nSoftAcks = 0;
5427 call->nHardAcks = 0;
5429 call->tfirst = call->rnext = call->tnext = 1;
5432 call->lastAcked = 0;
5433 call->localStatus = call->remoteStatus = 0;
5435 if (flags & RX_CALL_READER_WAIT) {
5436 #ifdef RX_ENABLE_LOCKS
5437 CV_BROADCAST(&call->cv_rq);
5439 osi_rxWakeup(&call->rq);
5442 if (flags & RX_CALL_WAIT_PACKETS) {
5443 MUTEX_ENTER(&rx_freePktQ_lock);
5444 rxi_PacketsUnWait(); /* XXX */
5445 MUTEX_EXIT(&rx_freePktQ_lock);
5447 #ifdef RX_ENABLE_LOCKS
5448 CV_SIGNAL(&call->cv_twind);
5450 if (flags & RX_CALL_WAIT_WINDOW_ALLOC)
5451 osi_rxWakeup(&call->twind);
5454 if (flags & RX_CALL_WAIT_PROC) {
5455 rx_atomic_dec(&rx_nWaiting);
5457 #ifdef RX_ENABLE_LOCKS
5458 /* The following ensures that we don't mess with any queue while some
5459 * other thread might also be doing so. The call_queue_lock field is
5460 * is only modified under the call lock. If the call is in the process
5461 * of being removed from a queue, the call is not locked until the
5462 * the queue lock is dropped and only then is the call_queue_lock field
5463 * zero'd out. So it's safe to lock the queue if call_queue_lock is set.
5464 * Note that any other routine which removes a call from a queue has to
5465 * obtain the queue lock before examing the queue and removing the call.
5467 if (call->call_queue_lock) {
5468 MUTEX_ENTER(call->call_queue_lock);
5469 if (opr_queue_IsOnQueue(&call->entry)) {
5470 opr_queue_Remove(&call->entry);
5472 MUTEX_EXIT(call->call_queue_lock);
5473 CLEAR_CALL_QUEUE_LOCK(call);
5475 #else /* RX_ENABLE_LOCKS */
5476 if (opr_queue_IsOnQueue(&call->entry)) {
5477 opr_queue_Remove(&call->entry);
5479 #endif /* RX_ENABLE_LOCKS */
5481 rxi_CancelKeepAliveEvent(call);
5482 rxi_CancelDelayedAckEvent(call);
5485 /* Send an acknowledge for the indicated packet (seq,serial) of the
5486 * indicated call, for the indicated reason (reason). This
5487 * acknowledge will specifically acknowledge receiving the packet, and
5488 * will also specify which other packets for this call have been
5489 * received. This routine returns the packet that was used to the
5490 * caller. The caller is responsible for freeing it or re-using it.
5491 * This acknowledgement also returns the highest sequence number
5492 * actually read out by the higher level to the sender; the sender
5493 * promises to keep around packets that have not been read by the
5494 * higher level yet (unless, of course, the sender decides to abort
5495 * the call altogether). Any of p, seq, serial, pflags, or reason may
5496 * be set to zero without ill effect. That is, if they are zero, they
5497 * will not convey any information.
5498 * NOW there is a trailer field, after the ack where it will safely be
5499 * ignored by mundanes, which indicates the maximum size packet this
5500 * host can swallow. */
5502 struct rx_packet *optionalPacket; use to send ack (or null)
5503 int seq; Sequence number of the packet we are acking
5504 int serial; Serial number of the packet
5505 int pflags; Flags field from packet header
5506 int reason; Reason an acknowledge was prompted
5510 rxi_SendAck(struct rx_call *call,
5511 struct rx_packet *optionalPacket, int serial, int reason,
5514 struct rx_ackPacket *ap;
5515 struct rx_packet *p;
5516 struct opr_queue *cursor;
5519 afs_uint32 padbytes = 0;
5520 #ifdef RX_ENABLE_TSFPQ
5521 struct rx_ts_info_t * rx_ts_info;
5525 * Open the receive window once a thread starts reading packets
5527 if (call->rnext > 1) {
5528 call->conn->rwind[call->channel] = call->rwind = rx_maxReceiveWindow;
5531 /* Don't attempt to grow MTU if this is a critical ping */
5532 if (reason == RX_ACK_MTU) {
5533 /* keep track of per-call attempts, if we're over max, do in small
5534 * otherwise in larger? set a size to increment by, decrease
5537 if (call->conn->peer->maxPacketSize &&
5538 (call->conn->peer->maxPacketSize < OLD_MAX_PACKET_SIZE
5540 padbytes = call->conn->peer->maxPacketSize+16;
5542 padbytes = call->conn->peer->maxMTU + 128;
5544 /* do always try a minimum size ping */
5545 padbytes = MAX(padbytes, RX_MIN_PACKET_SIZE+RX_IPUDP_SIZE+4);
5547 /* subtract the ack payload */
5548 padbytes -= (rx_AckDataSize(call->rwind) + 4 * sizeof(afs_int32));
5549 reason = RX_ACK_PING;
5552 call->nHardAcks = 0;
5553 call->nSoftAcks = 0;
5554 if (call->rnext > call->lastAcked)
5555 call->lastAcked = call->rnext;
5559 rx_computelen(p, p->length); /* reset length, you never know */
5560 } /* where that's been... */
5561 #ifdef RX_ENABLE_TSFPQ
5563 RX_TS_INFO_GET(rx_ts_info);
5564 if ((p = rx_ts_info->local_special_packet)) {
5565 rx_computelen(p, p->length);
5566 } else if ((p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL))) {
5567 rx_ts_info->local_special_packet = p;
5568 } else { /* We won't send the ack, but don't panic. */
5569 return optionalPacket;
5573 else if (!(p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL))) {
5574 /* We won't send the ack, but don't panic. */
5575 return optionalPacket;
5580 rx_AckDataSize(call->rwind) + 4 * sizeof(afs_int32) -
5583 if (rxi_AllocDataBuf(p, templ, RX_PACKET_CLASS_SPECIAL) > 0) {
5584 #ifndef RX_ENABLE_TSFPQ
5585 if (!optionalPacket)
5588 return optionalPacket;
5590 templ = rx_AckDataSize(call->rwind) + 2 * sizeof(afs_int32);
5591 if (rx_Contiguous(p) < templ) {
5592 #ifndef RX_ENABLE_TSFPQ
5593 if (!optionalPacket)
5596 return optionalPacket;
5601 /* MTUXXX failing to send an ack is very serious. We should */
5602 /* try as hard as possible to send even a partial ack; it's */
5603 /* better than nothing. */
5604 ap = (struct rx_ackPacket *)rx_DataOf(p);
5605 ap->bufferSpace = htonl(0); /* Something should go here, sometime */
5606 ap->reason = reason;
5608 /* The skew computation used to be bogus, I think it's better now. */
5609 /* We should start paying attention to skew. XXX */
5610 ap->serial = htonl(serial);
5611 ap->maxSkew = 0; /* used to be peer->inPacketSkew */
5614 * First packet not yet forwarded to reader. When ACKALL has been
5615 * sent the peer has been told that all received packets will be
5616 * delivered to the reader. The value 'rnext' is used internally
5617 * to refer to the next packet in the receive queue that must be
5618 * delivered to the reader. From the perspective of the peer it
5619 * already has so report the last sequence number plus one if there
5620 * are packets in the receive queue awaiting processing.
5622 if ((call->flags & RX_CALL_ACKALL_SENT) &&
5623 !opr_queue_IsEmpty(&call->rq)) {
5624 ap->firstPacket = htonl(opr_queue_Last(&call->rq, struct rx_packet, entry)->header.seq + 1);
5626 ap->firstPacket = htonl(call->rnext);
5628 ap->previousPacket = htonl(call->rprev); /* Previous packet received */
5630 /* No fear of running out of ack packet here because there can only
5631 * be at most one window full of unacknowledged packets. The window
5632 * size must be constrained to be less than the maximum ack size,
5633 * of course. Also, an ack should always fit into a single packet
5634 * -- it should not ever be fragmented. */
5636 for (opr_queue_Scan(&call->rq, cursor)) {
5637 struct rx_packet *rqp
5638 = opr_queue_Entry(cursor, struct rx_packet, entry);
5640 if (!rqp || !call->rq.next
5641 || (rqp->header.seq > (call->rnext + call->rwind))) {
5642 #ifndef RX_ENABLE_TSFPQ
5643 if (!optionalPacket)
5646 rxi_CallError(call, RX_CALL_DEAD);
5647 return optionalPacket;
5650 while (rqp->header.seq > call->rnext + offset)
5651 ap->acks[offset++] = RX_ACK_TYPE_NACK;
5652 ap->acks[offset++] = RX_ACK_TYPE_ACK;
5654 if ((offset > (u_char) rx_maxReceiveWindow) || (offset > call->rwind)) {
5655 #ifndef RX_ENABLE_TSFPQ
5656 if (!optionalPacket)
5659 rxi_CallError(call, RX_CALL_DEAD);
5660 return optionalPacket;
5666 p->length = rx_AckDataSize(offset) + 4 * sizeof(afs_int32);
5668 /* these are new for AFS 3.3 */
5669 templ = rxi_AdjustMaxMTU(call->conn->peer->ifMTU, rx_maxReceiveSize);
5670 templ = htonl(templ);
5671 rx_packetwrite(p, rx_AckDataSize(offset), sizeof(afs_int32), &templ);
5672 templ = htonl(call->conn->peer->ifMTU);
5673 rx_packetwrite(p, rx_AckDataSize(offset) + sizeof(afs_int32),
5674 sizeof(afs_int32), &templ);
5676 /* new for AFS 3.4 */
5677 templ = htonl(call->rwind);
5678 rx_packetwrite(p, rx_AckDataSize(offset) + 2 * sizeof(afs_int32),
5679 sizeof(afs_int32), &templ);
5681 /* new for AFS 3.5 */
5682 templ = htonl(call->conn->peer->ifDgramPackets);
5683 rx_packetwrite(p, rx_AckDataSize(offset) + 3 * sizeof(afs_int32),
5684 sizeof(afs_int32), &templ);
5686 p->header.serviceId = call->conn->serviceId;
5687 p->header.cid = (call->conn->cid | call->channel);
5688 p->header.callNumber = *call->callNumber;
5690 p->header.securityIndex = call->conn->securityIndex;
5691 p->header.epoch = call->conn->epoch;
5692 p->header.type = RX_PACKET_TYPE_ACK;
5693 p->header.flags = RX_SLOW_START_OK;
5694 if (reason == RX_ACK_PING) {
5695 p->header.flags |= RX_REQUEST_ACK;
5697 p->length = padbytes +
5698 rx_AckDataSize(call->rwind) + 4 * sizeof(afs_int32);
5701 /* not fast but we can potentially use this if truncated
5702 * fragments are delivered to figure out the mtu.
5704 rx_packetwrite(p, rx_AckDataSize(offset) + 4 *
5705 sizeof(afs_int32), sizeof(afs_int32),
5709 if (call->conn->type == RX_CLIENT_CONNECTION)
5710 p->header.flags |= RX_CLIENT_INITIATED;
5714 if (rxdebug_active) {
5718 len = _snprintf(msg, sizeof(msg),
5719 "tid[%d] SACK: reason %s serial %u previous %u seq %u first %u acks %u space %u ",
5720 GetCurrentThreadId(), rx_ack_reason(ap->reason),
5721 ntohl(ap->serial), ntohl(ap->previousPacket),
5722 (unsigned int)p->header.seq, ntohl(ap->firstPacket),
5723 ap->nAcks, ntohs(ap->bufferSpace) );
5727 for (offset = 0; offset < ap->nAcks && len < sizeof(msg); offset++)
5728 msg[len++] = (ap->acks[offset] == RX_ACK_TYPE_NACK ? '-' : '*');
5732 OutputDebugString(msg);
5734 #else /* AFS_NT40_ENV */
5736 fprintf(rx_Log, "SACK: reason %x previous %u seq %u first %u ",
5737 ap->reason, ntohl(ap->previousPacket),
5738 (unsigned int)p->header.seq, ntohl(ap->firstPacket));
5740 for (offset = 0; offset < ap->nAcks; offset++)
5741 putc(ap->acks[offset] == RX_ACK_TYPE_NACK ? '-' : '*',
5746 #endif /* AFS_NT40_ENV */
5749 int i, nbytes = p->length;
5751 for (i = 1; i < p->niovecs; i++) { /* vec 0 is ALWAYS header */
5752 if (nbytes <= p->wirevec[i].iov_len) {
5755 savelen = p->wirevec[i].iov_len;
5757 p->wirevec[i].iov_len = nbytes;
5759 rxi_Send(call, p, istack);
5760 p->wirevec[i].iov_len = savelen;
5764 nbytes -= p->wirevec[i].iov_len;
5767 if (rx_stats_active)
5768 rx_atomic_inc(&rx_stats.ackPacketsSent);
5769 #ifndef RX_ENABLE_TSFPQ
5770 if (!optionalPacket)
5773 return optionalPacket; /* Return packet for re-use by caller */
5777 struct rx_packet **list;
5782 /* Send all of the packets in the list in single datagram */
5784 rxi_SendList(struct rx_call *call, struct xmitlist *xmit,
5785 int istack, int moreFlag)
5791 struct rx_connection *conn = call->conn;
5792 struct rx_peer *peer = conn->peer;
5794 MUTEX_ENTER(&peer->peer_lock);
5795 peer->nSent += xmit->len;
5796 if (xmit->resending)
5797 peer->reSends += xmit->len;
5798 MUTEX_EXIT(&peer->peer_lock);
5800 if (rx_stats_active) {
5801 if (xmit->resending)
5802 rx_atomic_add(&rx_stats.dataPacketsReSent, xmit->len);
5804 rx_atomic_add(&rx_stats.dataPacketsSent, xmit->len);
5807 clock_GetTime(&now);
5809 if (xmit->list[xmit->len - 1]->header.flags & RX_LAST_PACKET) {
5813 /* Set the packet flags and schedule the resend events */
5814 /* Only request an ack for the last packet in the list */
5815 for (i = 0; i < xmit->len; i++) {
5816 struct rx_packet *packet = xmit->list[i];
5818 /* Record the time sent */
5819 packet->timeSent = now;
5820 packet->flags |= RX_PKTFLAG_SENT;
5822 /* Ask for an ack on retransmitted packets, on every other packet
5823 * if the peer doesn't support slow start. Ask for an ack on every
5824 * packet until the congestion window reaches the ack rate. */
5825 if (packet->header.serial) {
5828 packet->firstSent = now;
5829 if (!lastPacket && (call->cwind <= (u_short) (conn->ackRate + 1)
5830 || (!(call->flags & RX_CALL_SLOW_START_OK)
5831 && (packet->header.seq & 1)))) {
5836 /* Tag this packet as not being the last in this group,
5837 * for the receiver's benefit */
5838 if (i < xmit->len - 1 || moreFlag) {
5839 packet->header.flags |= RX_MORE_PACKETS;
5844 xmit->list[xmit->len - 1]->header.flags |= RX_REQUEST_ACK;
5847 /* Since we're about to send a data packet to the peer, it's
5848 * safe to nuke any scheduled end-of-packets ack */
5849 rxi_CancelDelayedAckEvent(call);
5851 MUTEX_EXIT(&call->lock);
5852 CALL_HOLD(call, RX_CALL_REFCOUNT_SEND);
5853 if (xmit->len > 1) {
5854 rxi_SendPacketList(call, conn, xmit->list, xmit->len, istack);
5856 rxi_SendPacket(call, conn, xmit->list[0], istack);
5858 MUTEX_ENTER(&call->lock);
5859 CALL_RELE(call, RX_CALL_REFCOUNT_SEND);
5861 /* Tell the RTO calculation engine that we have sent a packet, and
5862 * if it was the last one */
5863 rxi_rto_packet_sent(call, lastPacket, istack);
5865 /* Update last send time for this call (for keep-alive
5866 * processing), and for the connection (so that we can discover
5867 * idle connections) */
5868 conn->lastSendTime = call->lastSendTime = clock_Sec();
5869 /* Let a set of retransmits trigger an idle timeout */
5870 if (!xmit->resending)
5871 call->lastSendData = call->lastSendTime;
5874 /* When sending packets we need to follow these rules:
5875 * 1. Never send more than maxDgramPackets in a jumbogram.
5876 * 2. Never send a packet with more than two iovecs in a jumbogram.
5877 * 3. Never send a retransmitted packet in a jumbogram.
5878 * 4. Never send more than cwind/4 packets in a jumbogram
5879 * We always keep the last list we should have sent so we
5880 * can set the RX_MORE_PACKETS flags correctly.
5884 rxi_SendXmitList(struct rx_call *call, struct rx_packet **list, int len,
5889 struct xmitlist working;
5890 struct xmitlist last;
5892 struct rx_peer *peer = call->conn->peer;
5893 int morePackets = 0;
5895 memset(&last, 0, sizeof(struct xmitlist));
5896 working.list = &list[0];
5898 working.resending = 0;
5900 recovery = call->flags & RX_CALL_FAST_RECOVER;
5902 for (i = 0; i < len; i++) {
5903 /* Does the current packet force us to flush the current list? */
5905 && (list[i]->header.serial || (list[i]->flags & RX_PKTFLAG_ACKED)
5906 || list[i]->length > RX_JUMBOBUFFERSIZE)) {
5908 /* This sends the 'last' list and then rolls the current working
5909 * set into the 'last' one, and resets the working set */
5912 rxi_SendList(call, &last, istack, 1);
5913 /* If the call enters an error state stop sending, or if
5914 * we entered congestion recovery mode, stop sending */
5916 || (!recovery && (call->flags & RX_CALL_FAST_RECOVER)))
5921 working.resending = 0;
5922 working.list = &list[i];
5924 /* Add the current packet to the list if it hasn't been acked.
5925 * Otherwise adjust the list pointer to skip the current packet. */
5926 if (!(list[i]->flags & RX_PKTFLAG_ACKED)) {
5929 if (list[i]->header.serial)
5930 working.resending = 1;
5932 /* Do we need to flush the list? */
5933 if (working.len >= (int)peer->maxDgramPackets
5934 || working.len >= (int)call->nDgramPackets
5935 || working.len >= (int)call->cwind
5936 || list[i]->header.serial
5937 || list[i]->length != RX_JUMBOBUFFERSIZE) {
5939 rxi_SendList(call, &last, istack, 1);
5940 /* If the call enters an error state stop sending, or if
5941 * we entered congestion recovery mode, stop sending */
5943 || (!recovery && (call->flags & RX_CALL_FAST_RECOVER)))
5948 working.resending = 0;
5949 working.list = &list[i + 1];
5952 if (working.len != 0) {
5953 osi_Panic("rxi_SendList error");
5955 working.list = &list[i + 1];
5959 /* Send the whole list when the call is in receive mode, when
5960 * the call is in eof mode, when we are in fast recovery mode,
5961 * and when we have the last packet */
5962 /* XXX - The accesses to app.mode aren't safe, as this may be called by
5963 * the listener or event threads
5965 if ((list[len - 1]->header.flags & RX_LAST_PACKET)
5966 || (call->flags & RX_CALL_FLUSH)
5967 || (call->flags & RX_CALL_FAST_RECOVER)) {
5968 /* Check for the case where the current list contains
5969 * an acked packet. Since we always send retransmissions
5970 * in a separate packet, we only need to check the first
5971 * packet in the list */
5972 if (working.len > 0 && !(working.list[0]->flags & RX_PKTFLAG_ACKED)) {
5976 rxi_SendList(call, &last, istack, morePackets);
5977 /* If the call enters an error state stop sending, or if
5978 * we entered congestion recovery mode, stop sending */
5980 || (!recovery && (call->flags & RX_CALL_FAST_RECOVER)))
5984 rxi_SendList(call, &working, istack, 0);
5986 } else if (last.len > 0) {
5987 rxi_SendList(call, &last, istack, 0);
5988 /* Packets which are in 'working' are not sent by this call */
5993 * Check if the peer for the given call is known to be dead
5995 * If the call's peer appears dead (it has encountered fatal network errors
5996 * since the call started) the call is killed with RX_CALL_DEAD if the call
5997 * is active. Otherwise, we do nothing.
5999 * @param[in] call The call to check
6002 * @retval 0 The call is fine, and we haven't done anything to the call
6003 * @retval nonzero The call's peer appears dead, and the call has been
6004 * terminated if it was active
6006 * @pre call->lock must be locked
6009 rxi_CheckPeerDead(struct rx_call *call)
6011 #ifdef AFS_RXERRQ_ENV
6014 if (call->state == RX_STATE_DALLY) {
6018 peererrs = rx_atomic_read(&call->conn->peer->neterrs);
6019 if (call->neterr_gen < peererrs) {
6020 /* we have received network errors since this call started; kill
6022 if (call->state == RX_STATE_ACTIVE) {
6023 rxi_CallError(call, RX_CALL_DEAD);
6027 if (call->neterr_gen > peererrs) {
6028 /* someone has reset the number of peer errors; set the call error gen
6029 * so we can detect if more errors are encountered */
6030 call->neterr_gen = peererrs;
6037 rxi_Resend(struct rxevent *event, void *arg0, void *arg1, int istack)
6039 struct rx_call *call = arg0;
6040 struct rx_peer *peer;
6041 struct opr_queue *cursor;
6042 struct clock maxTimeout = { 60, 0 };
6044 MUTEX_ENTER(&call->lock);
6046 peer = call->conn->peer;
6048 /* Make sure that the event pointer is removed from the call
6049 * structure, since there is no longer a per-call retransmission
6051 if (event == call->resendEvent) {
6052 CALL_RELE(call, RX_CALL_REFCOUNT_RESEND);
6053 rxevent_Put(&call->resendEvent);
6056 rxi_CheckPeerDead(call);
6058 if (rxi_busyChannelError && (call->flags & RX_CALL_PEER_BUSY)) {
6059 rxi_CheckBusy(call);
6062 if (opr_queue_IsEmpty(&call->tq)) {
6063 /* Nothing to do. This means that we've been raced, and that an
6064 * ACK has come in between when we were triggered, and when we
6065 * actually got to run. */
6069 /* We're in loss recovery */
6070 call->flags |= RX_CALL_FAST_RECOVER;
6072 /* Mark all of the pending packets in the queue as being lost */
6073 for (opr_queue_Scan(&call->tq, cursor)) {
6074 struct rx_packet *p = opr_queue_Entry(cursor, struct rx_packet, entry);
6075 if (!(p->flags & RX_PKTFLAG_ACKED))
6076 p->flags &= ~RX_PKTFLAG_SENT;
6079 /* We're resending, so we double the timeout of the call. This will be
6080 * dropped back down by the first successful ACK that we receive.
6082 * We apply a maximum value here of 60 seconds
6084 clock_Add(&call->rto, &call->rto);
6085 if (clock_Gt(&call->rto, &maxTimeout))
6086 call->rto = maxTimeout;
6088 /* Packet loss is most likely due to congestion, so drop our window size
6089 * and start again from the beginning */
6090 if (peer->maxDgramPackets >1) {
6091 call->MTU = RX_JUMBOBUFFERSIZE + RX_HEADER_SIZE;
6092 call->MTU = MIN(peer->natMTU, peer->maxMTU);
6094 call->ssthresh = MAX(4, MIN((int)call->cwind, (int)call->twind)) >> 1;
6095 call->nDgramPackets = 1;
6097 call->nextCwind = 1;
6100 MUTEX_ENTER(&peer->peer_lock);
6101 peer->MTU = call->MTU;
6102 peer->cwind = call->cwind;
6103 peer->nDgramPackets = 1;
6105 call->congestSeq = peer->congestSeq;
6106 MUTEX_EXIT(&peer->peer_lock);
6108 rxi_Start(call, istack);
6111 MUTEX_EXIT(&call->lock);
6114 /* This routine is called when new packets are readied for
6115 * transmission and when retransmission may be necessary, or when the
6116 * transmission window or burst count are favourable. This should be
6117 * better optimized for new packets, the usual case, now that we've
6118 * got rid of queues of send packets. XXXXXXXXXXX */
6120 rxi_Start(struct rx_call *call, int istack)
6122 struct opr_queue *cursor;
6123 #ifdef RX_ENABLE_LOCKS
6124 struct opr_queue *store;
6130 #ifdef RX_ENABLE_LOCKS
6131 if (rx_stats_active)
6132 rx_atomic_inc(&rx_tq_debug.rxi_start_in_error);
6137 if (!opr_queue_IsEmpty(&call->tq)) { /* If we have anything to send */
6138 /* Send (or resend) any packets that need it, subject to
6139 * window restrictions and congestion burst control
6140 * restrictions. Ask for an ack on the last packet sent in
6141 * this burst. For now, we're relying upon the window being
6142 * considerably bigger than the largest number of packets that
6143 * are typically sent at once by one initial call to
6144 * rxi_Start. This is probably bogus (perhaps we should ask
6145 * for an ack when we're half way through the current
6146 * window?). Also, for non file transfer applications, this
6147 * may end up asking for an ack for every packet. Bogus. XXXX
6150 * But check whether we're here recursively, and let the other guy
6153 #ifdef RX_ENABLE_LOCKS
6154 if (!(call->flags & RX_CALL_TQ_BUSY)) {
6155 call->flags |= RX_CALL_TQ_BUSY;
6157 #endif /* RX_ENABLE_LOCKS */
6159 #ifdef RX_ENABLE_LOCKS
6160 call->flags &= ~RX_CALL_NEED_START;
6161 #endif /* RX_ENABLE_LOCKS */
6163 maxXmitPackets = MIN(call->twind, call->cwind);
6164 for (opr_queue_Scan(&call->tq, cursor)) {
6166 = opr_queue_Entry(cursor, struct rx_packet, entry);
6168 if (p->flags & RX_PKTFLAG_ACKED) {
6169 /* Since we may block, don't trust this */
6170 if (rx_stats_active)
6171 rx_atomic_inc(&rx_stats.ignoreAckedPacket);
6172 continue; /* Ignore this packet if it has been acknowledged */
6175 /* Turn off all flags except these ones, which are the same
6176 * on each transmission */
6177 p->header.flags &= RX_PRESET_FLAGS;
6179 if (p->header.seq >=
6180 call->tfirst + MIN((int)call->twind,
6181 (int)(call->nSoftAcked +
6183 call->flags |= RX_CALL_WAIT_WINDOW_SEND; /* Wait for transmit window */
6184 /* Note: if we're waiting for more window space, we can
6185 * still send retransmits; hence we don't return here, but
6186 * break out to schedule a retransmit event */
6187 dpf(("call %d waiting for window (seq %d, twind %d, nSoftAcked %d, cwind %d)\n",
6188 *(call->callNumber), p->header.seq, call->twind, call->nSoftAcked,
6193 /* Transmit the packet if it needs to be sent. */
6194 if (!(p->flags & RX_PKTFLAG_SENT)) {
6195 if (nXmitPackets == maxXmitPackets) {
6196 rxi_SendXmitList(call, call->xmitList,
6197 nXmitPackets, istack);
6200 dpf(("call %d xmit packet %"AFS_PTR_FMT"\n",
6201 *(call->callNumber), p));
6202 call->xmitList[nXmitPackets++] = p;
6204 } /* end of the queue_Scan */
6206 /* xmitList now hold pointers to all of the packets that are
6207 * ready to send. Now we loop to send the packets */
6208 if (nXmitPackets > 0) {
6209 rxi_SendXmitList(call, call->xmitList, nXmitPackets,
6213 #ifdef RX_ENABLE_LOCKS
6215 /* We went into the error state while sending packets. Now is
6216 * the time to reset the call. This will also inform the using
6217 * process that the call is in an error state.
6219 if (rx_stats_active)
6220 rx_atomic_inc(&rx_tq_debug.rxi_start_aborted);
6221 call->flags &= ~RX_CALL_TQ_BUSY;
6222 rxi_WakeUpTransmitQueue(call);
6223 rxi_CallError(call, call->error);
6227 if (call->flags & RX_CALL_TQ_SOME_ACKED) {
6229 call->flags &= ~RX_CALL_TQ_SOME_ACKED;
6230 /* Some packets have received acks. If they all have, we can clear
6231 * the transmit queue.
6234 for (opr_queue_ScanSafe(&call->tq, cursor, store)) {
6236 = opr_queue_Entry(cursor, struct rx_packet, entry);
6238 if (p->header.seq < call->tfirst
6239 && (p->flags & RX_PKTFLAG_ACKED)) {
6240 opr_queue_Remove(&p->entry);
6241 #ifdef RX_TRACK_PACKETS
6242 p->flags &= ~RX_PKTFLAG_TQ;
6244 #ifdef RXDEBUG_PACKET
6252 call->flags |= RX_CALL_TQ_CLEARME;
6254 if (call->flags & RX_CALL_TQ_CLEARME)
6255 rxi_ClearTransmitQueue(call, 1);
6256 } while (call->flags & RX_CALL_NEED_START);
6258 * TQ references no longer protected by this flag; they must remain
6259 * protected by the call lock.
6261 call->flags &= ~RX_CALL_TQ_BUSY;
6262 rxi_WakeUpTransmitQueue(call);
6264 call->flags |= RX_CALL_NEED_START;
6266 #endif /* RX_ENABLE_LOCKS */
6268 rxi_rto_cancel(call);
6272 /* Also adjusts the keep alive parameters for the call, to reflect
6273 * that we have just sent a packet (so keep alives aren't sent
6276 rxi_Send(struct rx_call *call, struct rx_packet *p,
6279 struct rx_connection *conn = call->conn;
6281 /* Stamp each packet with the user supplied status */
6282 p->header.userStatus = call->localStatus;
6284 /* Allow the security object controlling this call's security to
6285 * make any last-minute changes to the packet */
6286 RXS_SendPacket(conn->securityObject, call, p);
6288 /* Since we're about to send SOME sort of packet to the peer, it's
6289 * safe to nuke any scheduled end-of-packets ack */
6290 rxi_CancelDelayedAckEvent(call);
6292 /* Actually send the packet, filling in more connection-specific fields */
6293 MUTEX_EXIT(&call->lock);
6294 CALL_HOLD(call, RX_CALL_REFCOUNT_SEND);
6295 rxi_SendPacket(call, conn, p, istack);
6296 CALL_RELE(call, RX_CALL_REFCOUNT_SEND);
6297 MUTEX_ENTER(&call->lock);
6299 /* Update last send time for this call (for keep-alive
6300 * processing), and for the connection (so that we can discover
6301 * idle connections) */
6302 if ((p->header.type != RX_PACKET_TYPE_ACK) ||
6303 (((struct rx_ackPacket *)rx_DataOf(p))->reason == RX_ACK_PING) ||
6304 (p->length <= (rx_AckDataSize(call->rwind) + 4 * sizeof(afs_int32))))
6306 conn->lastSendTime = call->lastSendTime = clock_Sec();
6307 /* Don't count keepalive ping/acks here, so idleness can be tracked. */
6308 if ((p->header.type != RX_PACKET_TYPE_ACK) ||
6309 ((((struct rx_ackPacket *)rx_DataOf(p))->reason != RX_ACK_PING) &&
6310 (((struct rx_ackPacket *)rx_DataOf(p))->reason !=
6311 RX_ACK_PING_RESPONSE)))
6312 call->lastSendData = call->lastSendTime;
6316 /* Check if a call needs to be destroyed. Called by keep-alive code to ensure
6317 * that things are fine. Also called periodically to guarantee that nothing
6318 * falls through the cracks (e.g. (error + dally) connections have keepalive
6319 * turned off. Returns 0 if conn is well, -1 otherwise. If otherwise, call
6321 * haveCTLock Set if calling from rxi_ReapConnections
6324 rxi_CheckCall(struct rx_call *call, int haveCTLock)
6326 struct rx_connection *conn = call->conn;
6328 afs_uint32 deadTime, idleDeadTime = 0, hardDeadTime = 0;
6329 afs_uint32 fudgeFactor;
6332 int idle_timeout = 0;
6333 afs_int32 clock_diff = 0;
6335 if (rxi_CheckPeerDead(call)) {
6341 /* Large swings in the clock can have a significant impact on
6342 * the performance of RX call processing. Forward clock shifts
6343 * will result in premature event triggering or timeouts.
6344 * Backward shifts can result in calls not completing until
6345 * the clock catches up with the original start clock value.
6347 * If a backward clock shift of more than five minutes is noticed,
6348 * just fail the call.
6350 if (now < call->lastSendTime)
6351 clock_diff = call->lastSendTime - now;
6352 if (now < call->startWait)
6353 clock_diff = MAX(clock_diff, call->startWait - now);
6354 if (now < call->lastReceiveTime)
6355 clock_diff = MAX(clock_diff, call->lastReceiveTime - now);
6356 if (clock_diff > 5 * 60)
6358 if (call->state == RX_STATE_ACTIVE)
6359 rxi_CallError(call, RX_CALL_TIMEOUT);
6363 #ifdef RX_ENABLE_LOCKS
6364 if (call->flags & RX_CALL_TQ_BUSY) {
6365 /* Call is active and will be reset by rxi_Start if it's
6366 * in an error state.
6371 /* RTT + 8*MDEV, rounded up to the next second. */
6372 fudgeFactor = (((afs_uint32) call->rtt >> 3) +
6373 ((afs_uint32) call->rtt_dev << 1) + 1023) >> 10;
6375 deadTime = conn->secondsUntilDead + fudgeFactor;
6376 /* These are computed to the second (+- 1 second). But that's
6377 * good enough for these values, which should be a significant
6378 * number of seconds. */
6379 if (now > (call->lastReceiveTime + deadTime)) {
6380 if (call->state == RX_STATE_ACTIVE) {
6381 #ifdef AFS_ADAPT_PMTU
6382 # if defined(KERNEL) && defined(AFS_SUN5_ENV)
6384 # if defined(AFS_SUN510_ENV) && defined(GLOBAL_NETSTACKID)
6385 netstack_t *ns = netstack_find_by_stackid(GLOBAL_NETSTACKID);
6386 ip_stack_t *ipst = ns->netstack_ip;
6388 ire = ire_cache_lookup(conn->peer->host
6389 # if defined(AFS_SUN510_ENV) && defined(ALL_ZONES)
6391 # if defined(ICL_3_ARG) || defined(GLOBAL_NETSTACKID)
6393 # if defined(GLOBAL_NETSTACKID)
6400 if (ire && ire->ire_max_frag > 0)
6401 rxi_SetPeerMtu(NULL, conn->peer->host, 0,
6403 # if defined(GLOBAL_NETSTACKID)
6407 #endif /* AFS_ADAPT_PMTU */
6408 cerror = RX_CALL_DEAD;
6411 #ifdef RX_ENABLE_LOCKS
6412 /* Cancel pending events */
6413 rxi_CancelDelayedAckEvent(call);
6414 rxi_rto_cancel(call);
6415 rxi_CancelKeepAliveEvent(call);
6416 rxi_CancelGrowMTUEvent(call);
6417 MUTEX_ENTER(&rx_refcnt_mutex);
6418 /* if rxi_FreeCall returns 1 it has freed the call */
6419 if (call->refCount == 0 &&
6420 rxi_FreeCall(call, haveCTLock))
6422 MUTEX_EXIT(&rx_refcnt_mutex);
6425 MUTEX_EXIT(&rx_refcnt_mutex);
6427 #else /* RX_ENABLE_LOCKS */
6428 rxi_FreeCall(call, 0);
6430 #endif /* RX_ENABLE_LOCKS */
6432 /* Non-active calls are destroyed if they are not responding
6433 * to pings; active calls are simply flagged in error, so the
6434 * attached process can die reasonably gracefully. */
6437 if (conn->idleDeadDetection) {
6438 if (conn->idleDeadTime) {
6439 idleDeadTime = conn->idleDeadTime + fudgeFactor;
6443 /* see if we have a non-activity timeout */
6444 if (call->startWait && ((call->startWait + idleDeadTime) < now) &&
6445 (call->flags & RX_CALL_READER_WAIT)) {
6446 if (call->state == RX_STATE_ACTIVE) {
6447 cerror = RX_CALL_TIMEOUT;
6452 if (call->lastSendData && ((call->lastSendData + idleDeadTime) < now)) {
6453 if (call->state == RX_STATE_ACTIVE) {
6454 cerror = conn->service ? conn->service->idleDeadErr : RX_CALL_IDLE;
6462 if (conn->hardDeadTime) {
6463 hardDeadTime = conn->hardDeadTime + fudgeFactor;
6466 /* see if we have a hard timeout */
6468 && (now > (hardDeadTime + call->startTime.sec))) {
6469 if (call->state == RX_STATE_ACTIVE)
6470 rxi_CallError(call, RX_CALL_TIMEOUT);
6475 if (conn->msgsizeRetryErr && cerror != RX_CALL_TIMEOUT && !idle_timeout &&
6476 call->lastReceiveTime) {
6477 int oldMTU = conn->peer->ifMTU;
6479 /* if we thought we could send more, perhaps things got worse */
6480 if (conn->peer->maxPacketSize > conn->lastPacketSize)
6481 /* maxpacketsize will be cleared in rxi_SetPeerMtu */
6482 newmtu = MAX(conn->peer->maxPacketSize-RX_IPUDP_SIZE,
6483 conn->lastPacketSize-(128+RX_IPUDP_SIZE));
6485 newmtu = conn->lastPacketSize-(128+RX_IPUDP_SIZE);
6487 /* minimum capped in SetPeerMtu */
6488 rxi_SetPeerMtu(conn->peer, 0, 0, newmtu);
6491 conn->lastPacketSize = 0;
6493 /* needed so ResetCall doesn't clobber us. */
6494 call->MTU = conn->peer->ifMTU;
6496 /* if we never succeeded, let the error pass out as-is */
6497 if (conn->peer->maxPacketSize && oldMTU != conn->peer->ifMTU)
6498 cerror = conn->msgsizeRetryErr;
6501 rxi_CallError(call, cerror);
6506 rxi_NatKeepAliveEvent(struct rxevent *event, void *arg1,
6507 void *dummy, int dummy2)
6509 struct rx_connection *conn = arg1;
6510 struct rx_header theader;
6511 char tbuffer[1 + sizeof(struct rx_header)];
6512 struct sockaddr_in taddr;
6515 struct iovec tmpiov[2];
6518 RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket);
6521 tp = &tbuffer[sizeof(struct rx_header)];
6522 taddr.sin_family = AF_INET;
6523 taddr.sin_port = rx_PortOf(rx_PeerOf(conn));
6524 taddr.sin_addr.s_addr = rx_HostOf(rx_PeerOf(conn));
6525 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
6526 taddr.sin_len = sizeof(struct sockaddr_in);
6528 memset(&theader, 0, sizeof(theader));
6529 theader.epoch = htonl(999);
6531 theader.callNumber = 0;
6534 theader.type = RX_PACKET_TYPE_VERSION;
6535 theader.flags = RX_LAST_PACKET;
6536 theader.serviceId = 0;
6538 memcpy(tbuffer, &theader, sizeof(theader));
6539 memcpy(tp, &a, sizeof(a));
6540 tmpiov[0].iov_base = tbuffer;
6541 tmpiov[0].iov_len = 1 + sizeof(struct rx_header);
6543 osi_NetSend(socket, &taddr, tmpiov, 1, 1 + sizeof(struct rx_header), 1);
6545 MUTEX_ENTER(&conn->conn_data_lock);
6546 MUTEX_ENTER(&rx_refcnt_mutex);
6547 /* Only reschedule ourselves if the connection would not be destroyed */
6548 if (conn->refCount <= 1) {
6549 rxevent_Put(&conn->natKeepAliveEvent);
6550 MUTEX_EXIT(&rx_refcnt_mutex);
6551 MUTEX_EXIT(&conn->conn_data_lock);
6552 rx_DestroyConnection(conn); /* drop the reference for this */
6554 conn->refCount--; /* drop the reference for this */
6555 MUTEX_EXIT(&rx_refcnt_mutex);
6556 rxevent_Put(&conn->natKeepAliveEvent);
6557 rxi_ScheduleNatKeepAliveEvent(conn);
6558 MUTEX_EXIT(&conn->conn_data_lock);
6563 rxi_ScheduleNatKeepAliveEvent(struct rx_connection *conn)
6565 if (!conn->natKeepAliveEvent && conn->secondsUntilNatPing) {
6566 struct clock when, now;
6567 clock_GetTime(&now);
6569 when.sec += conn->secondsUntilNatPing;
6570 MUTEX_ENTER(&rx_refcnt_mutex);
6571 conn->refCount++; /* hold a reference for this */
6572 MUTEX_EXIT(&rx_refcnt_mutex);
6573 conn->natKeepAliveEvent =
6574 rxevent_Post(&when, &now, rxi_NatKeepAliveEvent, conn, NULL, 0);
6579 rx_SetConnSecondsUntilNatPing(struct rx_connection *conn, afs_int32 seconds)
6581 MUTEX_ENTER(&conn->conn_data_lock);
6582 conn->secondsUntilNatPing = seconds;
6584 if (!(conn->flags & RX_CONN_ATTACHWAIT))
6585 rxi_ScheduleNatKeepAliveEvent(conn);
6587 conn->flags |= RX_CONN_NAT_PING;
6589 MUTEX_EXIT(&conn->conn_data_lock);
6592 /* When a call is in progress, this routine is called occasionally to
6593 * make sure that some traffic has arrived (or been sent to) the peer.
6594 * If nothing has arrived in a reasonable amount of time, the call is
6595 * declared dead; if nothing has been sent for a while, we send a
6596 * keep-alive packet (if we're actually trying to keep the call alive)
6599 rxi_KeepAliveEvent(struct rxevent *event, void *arg1, void *dummy,
6602 struct rx_call *call = arg1;
6603 struct rx_connection *conn;
6606 CALL_RELE(call, RX_CALL_REFCOUNT_ALIVE);
6607 MUTEX_ENTER(&call->lock);
6609 if (event == call->keepAliveEvent)
6610 rxevent_Put(&call->keepAliveEvent);
6614 if (rxi_CheckCall(call, 0)) {
6615 MUTEX_EXIT(&call->lock);
6619 /* Don't try to keep alive dallying calls */
6620 if (call->state == RX_STATE_DALLY) {
6621 MUTEX_EXIT(&call->lock);
6626 if ((now - call->lastSendTime) > conn->secondsUntilPing) {
6627 /* Don't try to send keepalives if there is unacknowledged data */
6628 /* the rexmit code should be good enough, this little hack
6629 * doesn't quite work XXX */
6630 (void)rxi_SendAck(call, NULL, 0, RX_ACK_PING, 0);
6632 rxi_ScheduleKeepAliveEvent(call);
6633 MUTEX_EXIT(&call->lock);
6636 /* Does what's on the nameplate. */
6638 rxi_GrowMTUEvent(struct rxevent *event, void *arg1, void *dummy, int dummy2)
6640 struct rx_call *call = arg1;
6641 struct rx_connection *conn;
6643 CALL_RELE(call, RX_CALL_REFCOUNT_MTU);
6644 MUTEX_ENTER(&call->lock);
6646 if (event == call->growMTUEvent)
6647 rxevent_Put(&call->growMTUEvent);
6649 if (rxi_CheckCall(call, 0)) {
6650 MUTEX_EXIT(&call->lock);
6654 /* Don't bother with dallying calls */
6655 if (call->state == RX_STATE_DALLY) {
6656 MUTEX_EXIT(&call->lock);
6663 * keep being scheduled, just don't do anything if we're at peak,
6664 * or we're not set up to be properly handled (idle timeout required)
6666 if ((conn->peer->maxPacketSize != 0) &&
6667 (conn->peer->natMTU < RX_MAX_PACKET_SIZE) &&
6668 conn->idleDeadDetection)
6669 (void)rxi_SendAck(call, NULL, 0, RX_ACK_MTU, 0);
6670 rxi_ScheduleGrowMTUEvent(call, 0);
6671 MUTEX_EXIT(&call->lock);
6675 rxi_ScheduleKeepAliveEvent(struct rx_call *call)
6677 if (!call->keepAliveEvent) {
6678 struct clock when, now;
6679 clock_GetTime(&now);
6681 when.sec += call->conn->secondsUntilPing;
6682 CALL_HOLD(call, RX_CALL_REFCOUNT_ALIVE);
6683 call->keepAliveEvent =
6684 rxevent_Post(&when, &now, rxi_KeepAliveEvent, call, NULL, 0);
6689 rxi_CancelKeepAliveEvent(struct rx_call *call) {
6690 if (call->keepAliveEvent) {
6691 rxevent_Cancel(&call->keepAliveEvent);
6692 CALL_RELE(call, RX_CALL_REFCOUNT_ALIVE);
6697 rxi_ScheduleGrowMTUEvent(struct rx_call *call, int secs)
6699 if (!call->growMTUEvent) {
6700 struct clock when, now;
6702 clock_GetTime(&now);
6705 if (call->conn->secondsUntilPing)
6706 secs = (6*call->conn->secondsUntilPing)-1;
6708 if (call->conn->secondsUntilDead)
6709 secs = MIN(secs, (call->conn->secondsUntilDead-1));
6713 CALL_HOLD(call, RX_CALL_REFCOUNT_MTU);
6714 call->growMTUEvent =
6715 rxevent_Post(&when, &now, rxi_GrowMTUEvent, call, NULL, 0);
6720 rxi_CancelGrowMTUEvent(struct rx_call *call)
6722 if (call->growMTUEvent) {
6723 rxevent_Cancel(&call->growMTUEvent);
6724 CALL_RELE(call, RX_CALL_REFCOUNT_MTU);
6729 rxi_KeepAliveOn(struct rx_call *call)
6731 /* Pretend last packet received was received now--i.e. if another
6732 * packet isn't received within the keep alive time, then the call
6733 * will die; Initialize last send time to the current time--even
6734 * if a packet hasn't been sent yet. This will guarantee that a
6735 * keep-alive is sent within the ping time */
6736 call->lastReceiveTime = call->lastSendTime = clock_Sec();
6737 rxi_ScheduleKeepAliveEvent(call);
6741 rx_KeepAliveOff(struct rx_call *call)
6743 MUTEX_ENTER(&call->lock);
6744 rxi_CancelKeepAliveEvent(call);
6745 MUTEX_EXIT(&call->lock);
6749 rx_KeepAliveOn(struct rx_call *call)
6751 MUTEX_ENTER(&call->lock);
6752 rxi_KeepAliveOn(call);
6753 MUTEX_EXIT(&call->lock);
6757 rxi_GrowMTUOn(struct rx_call *call)
6759 struct rx_connection *conn = call->conn;
6760 MUTEX_ENTER(&conn->conn_data_lock);
6761 conn->lastPingSizeSer = conn->lastPingSize = 0;
6762 MUTEX_EXIT(&conn->conn_data_lock);
6763 rxi_ScheduleGrowMTUEvent(call, 1);
6766 /* This routine is called to send connection abort messages
6767 * that have been delayed to throttle looping clients. */
6769 rxi_SendDelayedConnAbort(struct rxevent *event, void *arg1, void *unused,
6772 struct rx_connection *conn = arg1;
6775 struct rx_packet *packet;
6777 MUTEX_ENTER(&conn->conn_data_lock);
6778 rxevent_Put(&conn->delayedAbortEvent);
6779 error = htonl(conn->error);
6781 MUTEX_EXIT(&conn->conn_data_lock);
6782 packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
6785 rxi_SendSpecial((struct rx_call *)0, conn, packet,
6786 RX_PACKET_TYPE_ABORT, (char *)&error,
6788 rxi_FreePacket(packet);
6792 /* This routine is called to send call abort messages
6793 * that have been delayed to throttle looping clients. */
6795 rxi_SendDelayedCallAbort(struct rxevent *event, void *arg1, void *dummy,
6798 struct rx_call *call = arg1;
6801 struct rx_packet *packet;
6803 MUTEX_ENTER(&call->lock);
6804 rxevent_Put(&call->delayedAbortEvent);
6805 error = htonl(call->error);
6807 packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
6810 rxi_SendSpecial(call, call->conn, packet, RX_PACKET_TYPE_ABORT,
6811 (char *)&error, sizeof(error), 0);
6812 rxi_FreePacket(packet);
6814 MUTEX_EXIT(&call->lock);
6815 CALL_RELE(call, RX_CALL_REFCOUNT_ABORT);
6818 /* This routine is called periodically (every RX_AUTH_REQUEST_TIMEOUT
6819 * seconds) to ask the client to authenticate itself. The routine
6820 * issues a challenge to the client, which is obtained from the
6821 * security object associated with the connection */
6823 rxi_ChallengeEvent(struct rxevent *event,
6824 void *arg0, void *arg1, int tries)
6826 struct rx_connection *conn = arg0;
6829 rxevent_Put(&conn->challengeEvent);
6831 if (RXS_CheckAuthentication(conn->securityObject, conn) != 0) {
6832 struct rx_packet *packet;
6833 struct clock when, now;
6836 /* We've failed to authenticate for too long.
6837 * Reset any calls waiting for authentication;
6838 * they are all in RX_STATE_PRECALL.
6842 MUTEX_ENTER(&conn->conn_call_lock);
6843 for (i = 0; i < RX_MAXCALLS; i++) {
6844 struct rx_call *call = conn->call[i];
6846 MUTEX_ENTER(&call->lock);
6847 if (call->state == RX_STATE_PRECALL) {
6848 rxi_CallError(call, RX_CALL_DEAD);
6849 rxi_SendCallAbort(call, NULL, 0, 0);
6851 MUTEX_EXIT(&call->lock);
6854 MUTEX_EXIT(&conn->conn_call_lock);
6858 packet = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL);
6860 /* If there's no packet available, do this later. */
6861 RXS_GetChallenge(conn->securityObject, conn, packet);
6862 rxi_SendSpecial((struct rx_call *)0, conn, packet,
6863 RX_PACKET_TYPE_CHALLENGE, NULL, -1, 0);
6864 rxi_FreePacket(packet);
6866 clock_GetTime(&now);
6868 when.sec += RX_CHALLENGE_TIMEOUT;
6869 conn->challengeEvent =
6870 rxevent_Post(&when, &now, rxi_ChallengeEvent, conn, 0,
6875 /* Call this routine to start requesting the client to authenticate
6876 * itself. This will continue until authentication is established,
6877 * the call times out, or an invalid response is returned. The
6878 * security object associated with the connection is asked to create
6879 * the challenge at this time. N.B. rxi_ChallengeOff is a macro,
6880 * defined earlier. */
6882 rxi_ChallengeOn(struct rx_connection *conn)
6884 if (!conn->challengeEvent) {
6885 RXS_CreateChallenge(conn->securityObject, conn);
6886 rxi_ChallengeEvent(NULL, conn, 0, RX_CHALLENGE_MAXTRIES);
6891 /* rxi_ComputeRoundTripTime is called with peer locked. */
6892 /* peer may be null */
6894 rxi_ComputeRoundTripTime(struct rx_packet *p,
6895 struct rx_ackPacket *ack,
6896 struct rx_call *call,
6897 struct rx_peer *peer,
6900 struct clock thisRtt, *sentp;
6904 /* If the ACK is delayed, then do nothing */
6905 if (ack->reason == RX_ACK_DELAY)
6908 /* On the wire, jumbograms are a single UDP packet. We shouldn't count
6909 * their RTT multiple times, so only include the RTT of the last packet
6911 if (p->flags & RX_JUMBO_PACKET)
6914 /* Use the serial number to determine which transmission the ACK is for,
6915 * and set the sent time to match this. If we have no serial number, then
6916 * only use the ACK for RTT calculations if the packet has not been
6920 serial = ntohl(ack->serial);
6922 if (serial == p->header.serial) {
6923 sentp = &p->timeSent;
6924 } else if (serial == p->firstSerial) {
6925 sentp = &p->firstSent;
6926 } else if (clock_Eq(&p->timeSent, &p->firstSent)) {
6927 sentp = &p->firstSent;
6931 if (clock_Eq(&p->timeSent, &p->firstSent)) {
6932 sentp = &p->firstSent;
6939 if (clock_Lt(&thisRtt, sentp))
6940 return; /* somebody set the clock back, don't count this time. */
6942 clock_Sub(&thisRtt, sentp);
6943 dpf(("rxi_ComputeRoundTripTime(call=%d packet=%"AFS_PTR_FMT" rttp=%d.%06d sec)\n",
6944 p->header.callNumber, p, thisRtt.sec, thisRtt.usec));
6946 if (clock_IsZero(&thisRtt)) {
6948 * The actual round trip time is shorter than the
6949 * clock_GetTime resolution. It is most likely 1ms or 100ns.
6950 * Since we can't tell which at the moment we will assume 1ms.
6952 thisRtt.usec = 1000;
6955 if (rx_stats_active) {
6956 MUTEX_ENTER(&rx_stats_mutex);
6957 if (clock_Lt(&thisRtt, &rx_stats.minRtt))
6958 rx_stats.minRtt = thisRtt;
6959 if (clock_Gt(&thisRtt, &rx_stats.maxRtt)) {
6960 if (thisRtt.sec > 60) {
6961 MUTEX_EXIT(&rx_stats_mutex);
6962 return; /* somebody set the clock ahead */
6964 rx_stats.maxRtt = thisRtt;
6966 clock_Add(&rx_stats.totalRtt, &thisRtt);
6967 rx_atomic_inc(&rx_stats.nRttSamples);
6968 MUTEX_EXIT(&rx_stats_mutex);
6971 /* better rtt calculation courtesy of UMich crew (dave,larry,peter,?) */
6973 /* Apply VanJacobson round-trip estimations */
6978 * srtt (call->rtt) is in units of one-eighth-milliseconds.
6979 * srtt is stored as fixed point with 3 bits after the binary
6980 * point (i.e., scaled by 8). The following magic is
6981 * equivalent to the smoothing algorithm in rfc793 with an
6982 * alpha of .875 (srtt' = rtt/8 + srtt*7/8 in fixed point).
6983 * srtt'*8 = rtt + srtt*7
6984 * srtt'*8 = srtt*8 + rtt - srtt
6985 * srtt' = srtt + rtt/8 - srtt/8
6986 * srtt' = srtt + (rtt - srtt)/8
6989 delta = _8THMSEC(&thisRtt) - call->rtt;
6990 call->rtt += (delta >> 3);
6993 * We accumulate a smoothed rtt variance (actually, a smoothed
6994 * mean difference), then set the retransmit timer to smoothed
6995 * rtt + 4 times the smoothed variance (was 2x in van's original
6996 * paper, but 4x works better for me, and apparently for him as
6998 * rttvar is stored as
6999 * fixed point with 2 bits after the binary point (scaled by
7000 * 4). The following is equivalent to rfc793 smoothing with
7001 * an alpha of .75 (rttvar' = rttvar*3/4 + |delta| / 4).
7002 * rttvar'*4 = rttvar*3 + |delta|
7003 * rttvar'*4 = rttvar*4 + |delta| - rttvar
7004 * rttvar' = rttvar + |delta|/4 - rttvar/4
7005 * rttvar' = rttvar + (|delta| - rttvar)/4
7006 * This replaces rfc793's wired-in beta.
7007 * dev*4 = dev*4 + (|actual - expected| - dev)
7013 delta -= (call->rtt_dev << 1);
7014 call->rtt_dev += (delta >> 3);
7016 /* I don't have a stored RTT so I start with this value. Since I'm
7017 * probably just starting a call, and will be pushing more data down
7018 * this, I expect congestion to increase rapidly. So I fudge a
7019 * little, and I set deviance to half the rtt. In practice,
7020 * deviance tends to approach something a little less than
7021 * half the smoothed rtt. */
7022 call->rtt = _8THMSEC(&thisRtt) + 8;
7023 call->rtt_dev = call->rtt >> 2; /* rtt/2: they're scaled differently */
7025 /* the smoothed RTT time is RTT + 4*MDEV
7027 * We allow a user specified minimum to be set for this, to allow clamping
7028 * at a minimum value in the same way as TCP. In addition, we have to allow
7029 * for the possibility that this packet is answered by a delayed ACK, so we
7030 * add on a fixed 200ms to account for that timer expiring.
7033 rtt_timeout = MAX(((call->rtt >> 3) + call->rtt_dev),
7034 rx_minPeerTimeout) + 200;
7035 clock_Zero(&call->rto);
7036 clock_Addmsec(&call->rto, rtt_timeout);
7038 /* Update the peer, so any new calls start with our values */
7039 peer->rtt_dev = call->rtt_dev;
7040 peer->rtt = call->rtt;
7042 dpf(("rxi_ComputeRoundTripTime(call=%d packet=%"AFS_PTR_FMT" rtt=%d ms, srtt=%d ms, rtt_dev=%d ms, timeout=%d.%06d sec)\n",
7043 p->header.callNumber, p, MSEC(&thisRtt), call->rtt >> 3, call->rtt_dev >> 2, (call->rto.sec), (call->rto.usec)));
7047 /* Find all server connections that have not been active for a long time, and
7050 rxi_ReapConnections(struct rxevent *unused, void *unused1, void *unused2,
7053 struct clock now, when;
7054 struct rxevent *event;
7055 clock_GetTime(&now);
7057 /* Find server connection structures that haven't been used for
7058 * greater than rx_idleConnectionTime */
7060 struct rx_connection **conn_ptr, **conn_end;
7061 int i, havecalls = 0;
7062 MUTEX_ENTER(&rx_connHashTable_lock);
7063 for (conn_ptr = &rx_connHashTable[0], conn_end =
7064 &rx_connHashTable[rx_hashTableSize]; conn_ptr < conn_end;
7066 struct rx_connection *conn, *next;
7067 struct rx_call *call;
7071 for (conn = *conn_ptr; conn; conn = next) {
7072 /* XXX -- Shouldn't the connection be locked? */
7075 for (i = 0; i < RX_MAXCALLS; i++) {
7076 call = conn->call[i];
7080 code = MUTEX_TRYENTER(&call->lock);
7083 result = rxi_CheckCall(call, 1);
7084 MUTEX_EXIT(&call->lock);
7086 /* If CheckCall freed the call, it might
7087 * have destroyed the connection as well,
7088 * which screws up the linked lists.
7094 if (conn->type == RX_SERVER_CONNECTION) {
7095 /* This only actually destroys the connection if
7096 * there are no outstanding calls */
7097 MUTEX_ENTER(&conn->conn_data_lock);
7098 MUTEX_ENTER(&rx_refcnt_mutex);
7099 if (!havecalls && !conn->refCount
7100 && ((conn->lastSendTime + rx_idleConnectionTime) <
7102 conn->refCount++; /* it will be decr in rx_DestroyConn */
7103 MUTEX_EXIT(&rx_refcnt_mutex);
7104 MUTEX_EXIT(&conn->conn_data_lock);
7105 #ifdef RX_ENABLE_LOCKS
7106 rxi_DestroyConnectionNoLock(conn);
7107 #else /* RX_ENABLE_LOCKS */
7108 rxi_DestroyConnection(conn);
7109 #endif /* RX_ENABLE_LOCKS */
7111 #ifdef RX_ENABLE_LOCKS
7113 MUTEX_EXIT(&rx_refcnt_mutex);
7114 MUTEX_EXIT(&conn->conn_data_lock);
7116 #endif /* RX_ENABLE_LOCKS */
7120 #ifdef RX_ENABLE_LOCKS
7121 while (rx_connCleanup_list) {
7122 struct rx_connection *conn;
7123 conn = rx_connCleanup_list;
7124 rx_connCleanup_list = rx_connCleanup_list->next;
7125 MUTEX_EXIT(&rx_connHashTable_lock);
7126 rxi_CleanupConnection(conn);
7127 MUTEX_ENTER(&rx_connHashTable_lock);
7129 MUTEX_EXIT(&rx_connHashTable_lock);
7130 #endif /* RX_ENABLE_LOCKS */
7133 /* Find any peer structures that haven't been used (haven't had an
7134 * associated connection) for greater than rx_idlePeerTime */
7136 struct rx_peer **peer_ptr, **peer_end;
7140 * Why do we need to hold the rx_peerHashTable_lock across
7141 * the incrementing of peer_ptr since the rx_peerHashTable
7142 * array is not changing? We don't.
7144 * By dropping the lock periodically we can permit other
7145 * activities to be performed while a rxi_ReapConnections
7146 * call is in progress. The goal of reap connections
7147 * is to clean up quickly without causing large amounts
7148 * of contention. Therefore, it is important that global
7149 * mutexes not be held for extended periods of time.
7151 for (peer_ptr = &rx_peerHashTable[0], peer_end =
7152 &rx_peerHashTable[rx_hashTableSize]; peer_ptr < peer_end;
7154 struct rx_peer *peer, *next, *prev;
7156 MUTEX_ENTER(&rx_peerHashTable_lock);
7157 for (prev = peer = *peer_ptr; peer; peer = next) {
7159 code = MUTEX_TRYENTER(&peer->peer_lock);
7160 if ((code) && (peer->refCount == 0)
7161 && ((peer->idleWhen + rx_idlePeerTime) < now.sec)) {
7162 struct opr_queue *cursor, *store;
7166 * now know that this peer object is one to be
7167 * removed from the hash table. Once it is removed
7168 * it can't be referenced by other threads.
7169 * Lets remove it first and decrement the struct
7170 * nPeerStructs count.
7172 if (peer == *peer_ptr) {
7178 if (rx_stats_active)
7179 rx_atomic_dec(&rx_stats.nPeerStructs);
7182 * Now if we hold references on 'prev' and 'next'
7183 * we can safely drop the rx_peerHashTable_lock
7184 * while we destroy this 'peer' object.
7190 MUTEX_EXIT(&rx_peerHashTable_lock);
7192 MUTEX_EXIT(&peer->peer_lock);
7193 MUTEX_DESTROY(&peer->peer_lock);
7195 for (opr_queue_ScanSafe(&peer->rpcStats, cursor, store)) {
7196 unsigned int num_funcs;
7197 struct rx_interface_stat *rpc_stat
7198 = opr_queue_Entry(cursor, struct rx_interface_stat,
7203 opr_queue_Remove(&rpc_stat->entry);
7204 opr_queue_Remove(&rpc_stat->entryPeers);
7206 num_funcs = rpc_stat->stats[0].func_total;
7208 sizeof(rx_interface_stat_t) +
7209 rpc_stat->stats[0].func_total *
7210 sizeof(rx_function_entry_v1_t);
7212 rxi_Free(rpc_stat, space);
7214 MUTEX_ENTER(&rx_rpc_stats);
7215 rxi_rpc_peer_stat_cnt -= num_funcs;
7216 MUTEX_EXIT(&rx_rpc_stats);
7221 * Regain the rx_peerHashTable_lock and
7222 * decrement the reference count on 'prev'
7225 MUTEX_ENTER(&rx_peerHashTable_lock);
7232 MUTEX_EXIT(&peer->peer_lock);
7237 MUTEX_EXIT(&rx_peerHashTable_lock);
7241 /* THIS HACK IS A TEMPORARY HACK. The idea is that the race condition in
7242 * rxi_AllocSendPacket, if it hits, will be handled at the next conn
7243 * GC, just below. Really, we shouldn't have to keep moving packets from
7244 * one place to another, but instead ought to always know if we can
7245 * afford to hold onto a packet in its particular use. */
7246 MUTEX_ENTER(&rx_freePktQ_lock);
7247 if (rx_waitingForPackets) {
7248 rx_waitingForPackets = 0;
7249 #ifdef RX_ENABLE_LOCKS
7250 CV_BROADCAST(&rx_waitingForPackets_cv);
7252 osi_rxWakeup(&rx_waitingForPackets);
7255 MUTEX_EXIT(&rx_freePktQ_lock);
7258 when.sec += RX_REAP_TIME; /* Check every RX_REAP_TIME seconds */
7259 event = rxevent_Post(&when, &now, rxi_ReapConnections, 0, NULL, 0);
7260 rxevent_Put(&event);
7264 /* rxs_Release - This isn't strictly necessary but, since the macro name from
7265 * rx.h is sort of strange this is better. This is called with a security
7266 * object before it is discarded. Each connection using a security object has
7267 * its own refcount to the object so it won't actually be freed until the last
7268 * connection is destroyed.
7270 * This is the only rxs module call. A hold could also be written but no one
7274 rxs_Release(struct rx_securityClass *aobj)
7276 return RXS_Close(aobj);
7284 #define TRACE_OPTION_RX_DEBUG 16
7292 code = RegOpenKeyEx(HKEY_LOCAL_MACHINE, AFSREG_CLT_SVC_PARAM_SUBKEY,
7293 0, KEY_QUERY_VALUE, &parmKey);
7294 if (code != ERROR_SUCCESS)
7297 dummyLen = sizeof(TraceOption);
7298 code = RegQueryValueEx(parmKey, "TraceOption", NULL, NULL,
7299 (BYTE *) &TraceOption, &dummyLen);
7300 if (code == ERROR_SUCCESS) {
7301 rxdebug_active = (TraceOption & TRACE_OPTION_RX_DEBUG) ? 1 : 0;
7303 RegCloseKey (parmKey);
7304 #endif /* AFS_NT40_ENV */
7309 rx_DebugOnOff(int on)
7313 rxdebug_active = on;
7319 rx_StatsOnOff(int on)
7321 rx_stats_active = on;
7325 /* Don't call this debugging routine directly; use dpf */
7327 rxi_DebugPrint(char *format, ...)
7336 va_start(ap, format);
7338 len = _snprintf(tformat, sizeof(tformat), "tid[%d] %s", GetCurrentThreadId(), format);
7341 len = _vsnprintf(msg, sizeof(msg)-2, tformat, ap);
7343 OutputDebugString(msg);
7349 va_start(ap, format);
7351 clock_GetTime(&now);
7352 fprintf(rx_Log, " %d.%06d:", (unsigned int)now.sec,
7353 (unsigned int)now.usec);
7354 vfprintf(rx_Log, format, ap);
7362 * This function is used to process the rx_stats structure that is local
7363 * to a process as well as an rx_stats structure received from a remote
7364 * process (via rxdebug). Therefore, it needs to do minimal version
7368 rx_PrintTheseStats(FILE * file, struct rx_statistics *s, int size,
7369 afs_int32 freePackets, char version)
7373 if (size != sizeof(struct rx_statistics)) {
7375 "Unexpected size of stats structure: was %d, expected %" AFS_SIZET_FMT "\n",
7376 size, sizeof(struct rx_statistics));
7379 fprintf(file, "rx stats: free packets %d, allocs %d, ", (int)freePackets,
7382 if (version >= RX_DEBUGI_VERSION_W_NEWPACKETTYPES) {
7383 fprintf(file, "alloc-failures(rcv %u/%u,send %u/%u,ack %u)\n",
7384 s->receivePktAllocFailures, s->receiveCbufPktAllocFailures,
7385 s->sendPktAllocFailures, s->sendCbufPktAllocFailures,
7386 s->specialPktAllocFailures);
7388 fprintf(file, "alloc-failures(rcv %u,send %u,ack %u)\n",
7389 s->receivePktAllocFailures, s->sendPktAllocFailures,
7390 s->specialPktAllocFailures);
7394 " greedy %u, " "bogusReads %u (last from host %x), "
7395 "noPackets %u, " "noBuffers %u, " "selects %u, "
7396 "sendSelects %u\n", s->socketGreedy, s->bogusPacketOnRead,
7397 s->bogusHost, s->noPacketOnRead, s->noPacketBuffersOnRead,
7398 s->selects, s->sendSelects);
7400 fprintf(file, " packets read: ");
7401 for (i = 0; i < RX_N_PACKET_TYPES; i++) {
7402 fprintf(file, "%s %u ", rx_packetTypes[i], s->packetsRead[i]);
7404 fprintf(file, "\n");
7407 " other read counters: data %u, " "ack %u, " "dup %u "
7408 "spurious %u " "dally %u\n", s->dataPacketsRead,
7409 s->ackPacketsRead, s->dupPacketsRead, s->spuriousPacketsRead,
7410 s->ignorePacketDally);
7412 fprintf(file, " packets sent: ");
7413 for (i = 0; i < RX_N_PACKET_TYPES; i++) {
7414 fprintf(file, "%s %u ", rx_packetTypes[i], s->packetsSent[i]);
7416 fprintf(file, "\n");
7419 " other send counters: ack %u, " "data %u (not resends), "
7420 "resends %u, " "pushed %u, " "acked&ignored %u\n",
7421 s->ackPacketsSent, s->dataPacketsSent, s->dataPacketsReSent,
7422 s->dataPacketsPushed, s->ignoreAckedPacket);
7425 " \t(these should be small) sendFailed %u, " "fatalErrors %u\n",
7426 s->netSendFailures, (int)s->fatalErrors);
7428 if (s->nRttSamples) {
7429 fprintf(file, " Average rtt is %0.3f, with %d samples\n",
7430 clock_Float(&s->totalRtt) / s->nRttSamples, s->nRttSamples);
7432 fprintf(file, " Minimum rtt is %0.3f, maximum is %0.3f\n",
7433 clock_Float(&s->minRtt), clock_Float(&s->maxRtt));
7437 " %d server connections, " "%d client connections, "
7438 "%d peer structs, " "%d call structs, " "%d free call structs\n",
7439 s->nServerConns, s->nClientConns, s->nPeerStructs,
7440 s->nCallStructs, s->nFreeCallStructs);
7442 #if !defined(AFS_PTHREAD_ENV) && !defined(AFS_USE_GETTIMEOFDAY)
7443 fprintf(file, " %d clock updates\n", clock_nUpdates);
7447 /* for backward compatibility */
7449 rx_PrintStats(FILE * file)
7451 MUTEX_ENTER(&rx_stats_mutex);
7452 rx_PrintTheseStats(file, (struct rx_statistics *) &rx_stats,
7453 sizeof(rx_stats), rx_nFreePackets,
7455 MUTEX_EXIT(&rx_stats_mutex);
7459 rx_PrintPeerStats(FILE * file, struct rx_peer *peer)
7461 fprintf(file, "Peer %x.%d.\n",
7462 ntohl(peer->host), (int)ntohs(peer->port));
7465 " Rtt %d, " "total sent %d, " "resent %d\n",
7466 peer->rtt, peer->nSent, peer->reSends);
7468 fprintf(file, " Packet size %d\n", peer->ifMTU);
7472 #if defined(AFS_PTHREAD_ENV) && defined(RXDEBUG)
7474 * This mutex protects the following static variables:
7478 #define LOCK_RX_DEBUG MUTEX_ENTER(&rx_debug_mutex)
7479 #define UNLOCK_RX_DEBUG MUTEX_EXIT(&rx_debug_mutex)
7481 #define LOCK_RX_DEBUG
7482 #define UNLOCK_RX_DEBUG
7483 #endif /* AFS_PTHREAD_ENV */
7485 #if defined(RXDEBUG) || defined(MAKEDEBUGCALL)
7487 MakeDebugCall(osi_socket socket, afs_uint32 remoteAddr, afs_uint16 remotePort,
7488 u_char type, void *inputData, size_t inputLength,
7489 void *outputData, size_t outputLength)
7491 static afs_int32 counter = 100;
7492 time_t waitTime, waitCount;
7493 struct rx_header theader;
7496 struct timeval tv_now, tv_wake, tv_delta;
7497 struct sockaddr_in taddr, faddr;
7511 tp = &tbuffer[sizeof(struct rx_header)];
7512 taddr.sin_family = AF_INET;
7513 taddr.sin_port = remotePort;
7514 taddr.sin_addr.s_addr = remoteAddr;
7515 #ifdef STRUCT_SOCKADDR_HAS_SA_LEN
7516 taddr.sin_len = sizeof(struct sockaddr_in);
7519 memset(&theader, 0, sizeof(theader));
7520 theader.epoch = htonl(999);
7522 theader.callNumber = htonl(counter);
7525 theader.type = type;
7526 theader.flags = RX_CLIENT_INITIATED | RX_LAST_PACKET;
7527 theader.serviceId = 0;
7529 memcpy(tbuffer, &theader, sizeof(theader));
7530 memcpy(tp, inputData, inputLength);
7532 sendto(socket, tbuffer, inputLength + sizeof(struct rx_header), 0,
7533 (struct sockaddr *)&taddr, sizeof(struct sockaddr_in));
7535 /* see if there's a packet available */
7536 gettimeofday(&tv_wake, NULL);
7537 tv_wake.tv_sec += waitTime;
7540 FD_SET(socket, &imask);
7541 tv_delta.tv_sec = tv_wake.tv_sec;
7542 tv_delta.tv_usec = tv_wake.tv_usec;
7543 gettimeofday(&tv_now, NULL);
7545 if (tv_delta.tv_usec < tv_now.tv_usec) {
7547 tv_delta.tv_usec += 1000000;
7550 tv_delta.tv_usec -= tv_now.tv_usec;
7552 if (tv_delta.tv_sec < tv_now.tv_sec) {
7556 tv_delta.tv_sec -= tv_now.tv_sec;
7559 code = select(0, &imask, 0, 0, &tv_delta);
7560 #else /* AFS_NT40_ENV */
7561 code = select(socket + 1, &imask, 0, 0, &tv_delta);
7562 #endif /* AFS_NT40_ENV */
7563 if (code == 1 && FD_ISSET(socket, &imask)) {
7564 /* now receive a packet */
7565 faddrLen = sizeof(struct sockaddr_in);
7567 recvfrom(socket, tbuffer, sizeof(tbuffer), 0,
7568 (struct sockaddr *)&faddr, &faddrLen);
7571 memcpy(&theader, tbuffer, sizeof(struct rx_header));
7572 if (counter == ntohl(theader.callNumber))
7580 /* see if we've timed out */
7588 code -= sizeof(struct rx_header);
7589 if (code > outputLength)
7590 code = outputLength;
7591 memcpy(outputData, tp, code);
7594 #endif /* RXDEBUG */
7597 rx_GetServerDebug(osi_socket socket, afs_uint32 remoteAddr,
7598 afs_uint16 remotePort, struct rx_debugStats * stat,
7599 afs_uint32 * supportedValues)
7601 #if defined(RXDEBUG) || defined(MAKEDEBUGCALL)
7603 struct rx_debugIn in;
7605 *supportedValues = 0;
7606 in.type = htonl(RX_DEBUGI_GETSTATS);
7609 rc = MakeDebugCall(socket, remoteAddr, remotePort, RX_PACKET_TYPE_DEBUG,
7610 &in, sizeof(in), stat, sizeof(*stat));
7613 * If the call was successful, fixup the version and indicate
7614 * what contents of the stat structure are valid.
7615 * Also do net to host conversion of fields here.
7619 if (stat->version >= RX_DEBUGI_VERSION_W_SECSTATS) {
7620 *supportedValues |= RX_SERVER_DEBUG_SEC_STATS;
7622 if (stat->version >= RX_DEBUGI_VERSION_W_GETALLCONN) {
7623 *supportedValues |= RX_SERVER_DEBUG_ALL_CONN;
7625 if (stat->version >= RX_DEBUGI_VERSION_W_RXSTATS) {
7626 *supportedValues |= RX_SERVER_DEBUG_RX_STATS;
7628 if (stat->version >= RX_DEBUGI_VERSION_W_WAITERS) {
7629 *supportedValues |= RX_SERVER_DEBUG_WAITER_CNT;
7631 if (stat->version >= RX_DEBUGI_VERSION_W_IDLETHREADS) {
7632 *supportedValues |= RX_SERVER_DEBUG_IDLE_THREADS;
7634 if (stat->version >= RX_DEBUGI_VERSION_W_NEWPACKETTYPES) {
7635 *supportedValues |= RX_SERVER_DEBUG_NEW_PACKETS;
7637 if (stat->version >= RX_DEBUGI_VERSION_W_GETPEER) {
7638 *supportedValues |= RX_SERVER_DEBUG_ALL_PEER;
7640 if (stat->version >= RX_DEBUGI_VERSION_W_WAITED) {
7641 *supportedValues |= RX_SERVER_DEBUG_WAITED_CNT;
7643 if (stat->version >= RX_DEBUGI_VERSION_W_PACKETS) {
7644 *supportedValues |= RX_SERVER_DEBUG_PACKETS_CNT;
7646 stat->nFreePackets = ntohl(stat->nFreePackets);
7647 stat->packetReclaims = ntohl(stat->packetReclaims);
7648 stat->callsExecuted = ntohl(stat->callsExecuted);
7649 stat->nWaiting = ntohl(stat->nWaiting);
7650 stat->idleThreads = ntohl(stat->idleThreads);
7651 stat->nWaited = ntohl(stat->nWaited);
7652 stat->nPackets = ntohl(stat->nPackets);
7661 rx_GetServerStats(osi_socket socket, afs_uint32 remoteAddr,
7662 afs_uint16 remotePort, struct rx_statistics * stat,
7663 afs_uint32 * supportedValues)
7665 #if defined(RXDEBUG) || defined(MAKEDEBUGCALL)
7667 struct rx_debugIn in;
7668 afs_int32 *lp = (afs_int32 *) stat;
7672 * supportedValues is currently unused, but added to allow future
7673 * versioning of this function.
7676 *supportedValues = 0;
7677 in.type = htonl(RX_DEBUGI_RXSTATS);
7679 memset(stat, 0, sizeof(*stat));
7681 rc = MakeDebugCall(socket, remoteAddr, remotePort, RX_PACKET_TYPE_DEBUG,
7682 &in, sizeof(in), stat, sizeof(*stat));
7687 * Do net to host conversion here
7690 for (i = 0; i < sizeof(*stat) / sizeof(afs_int32); i++, lp++) {
7701 rx_GetServerVersion(osi_socket socket, afs_uint32 remoteAddr,
7702 afs_uint16 remotePort, size_t version_length,
7705 #if defined(RXDEBUG) || defined(MAKEDEBUGCALL)
7707 return MakeDebugCall(socket, remoteAddr, remotePort,
7708 RX_PACKET_TYPE_VERSION, a, 1, version,
7716 rx_GetServerConnections(osi_socket socket, afs_uint32 remoteAddr,
7717 afs_uint16 remotePort, afs_int32 * nextConnection,
7718 int allConnections, afs_uint32 debugSupportedValues,
7719 struct rx_debugConn * conn,
7720 afs_uint32 * supportedValues)
7722 #if defined(RXDEBUG) || defined(MAKEDEBUGCALL)
7724 struct rx_debugIn in;
7728 * supportedValues is currently unused, but added to allow future
7729 * versioning of this function.
7732 *supportedValues = 0;
7733 if (allConnections) {
7734 in.type = htonl(RX_DEBUGI_GETALLCONN);
7736 in.type = htonl(RX_DEBUGI_GETCONN);
7738 in.index = htonl(*nextConnection);
7739 memset(conn, 0, sizeof(*conn));
7741 rc = MakeDebugCall(socket, remoteAddr, remotePort, RX_PACKET_TYPE_DEBUG,
7742 &in, sizeof(in), conn, sizeof(*conn));
7745 *nextConnection += 1;
7748 * Convert old connection format to new structure.
7751 if (debugSupportedValues & RX_SERVER_DEBUG_OLD_CONN) {
7752 struct rx_debugConn_vL *vL = (struct rx_debugConn_vL *)conn;
7753 #define MOVEvL(a) (conn->a = vL->a)
7755 /* any old or unrecognized version... */
7756 for (i = 0; i < RX_MAXCALLS; i++) {
7757 MOVEvL(callState[i]);
7758 MOVEvL(callMode[i]);
7759 MOVEvL(callFlags[i]);
7760 MOVEvL(callOther[i]);
7762 if (debugSupportedValues & RX_SERVER_DEBUG_SEC_STATS) {
7763 MOVEvL(secStats.type);
7764 MOVEvL(secStats.level);
7765 MOVEvL(secStats.flags);
7766 MOVEvL(secStats.expires);
7767 MOVEvL(secStats.packetsReceived);
7768 MOVEvL(secStats.packetsSent);
7769 MOVEvL(secStats.bytesReceived);
7770 MOVEvL(secStats.bytesSent);
7775 * Do net to host conversion here
7777 * I don't convert host or port since we are most likely
7778 * going to want these in NBO.
7780 conn->cid = ntohl(conn->cid);
7781 conn->serial = ntohl(conn->serial);
7782 for (i = 0; i < RX_MAXCALLS; i++) {
7783 conn->callNumber[i] = ntohl(conn->callNumber[i]);
7785 conn->error = ntohl(conn->error);
7786 conn->secStats.flags = ntohl(conn->secStats.flags);
7787 conn->secStats.expires = ntohl(conn->secStats.expires);
7788 conn->secStats.packetsReceived =
7789 ntohl(conn->secStats.packetsReceived);
7790 conn->secStats.packetsSent = ntohl(conn->secStats.packetsSent);
7791 conn->secStats.bytesReceived = ntohl(conn->secStats.bytesReceived);
7792 conn->secStats.bytesSent = ntohl(conn->secStats.bytesSent);
7793 conn->epoch = ntohl(conn->epoch);
7794 conn->natMTU = ntohl(conn->natMTU);
7803 rx_GetServerPeers(osi_socket socket, afs_uint32 remoteAddr,
7804 afs_uint16 remotePort, afs_int32 * nextPeer,
7805 afs_uint32 debugSupportedValues, struct rx_debugPeer * peer,
7806 afs_uint32 * supportedValues)
7808 #if defined(RXDEBUG) || defined(MAKEDEBUGCALL)
7810 struct rx_debugIn in;
7813 * supportedValues is currently unused, but added to allow future
7814 * versioning of this function.
7817 *supportedValues = 0;
7818 in.type = htonl(RX_DEBUGI_GETPEER);
7819 in.index = htonl(*nextPeer);
7820 memset(peer, 0, sizeof(*peer));
7822 rc = MakeDebugCall(socket, remoteAddr, remotePort, RX_PACKET_TYPE_DEBUG,
7823 &in, sizeof(in), peer, sizeof(*peer));
7829 * Do net to host conversion here
7831 * I don't convert host or port since we are most likely
7832 * going to want these in NBO.
7834 peer->ifMTU = ntohs(peer->ifMTU);
7835 peer->idleWhen = ntohl(peer->idleWhen);
7836 peer->refCount = ntohs(peer->refCount);
7837 peer->rtt = ntohl(peer->rtt);
7838 peer->rtt_dev = ntohl(peer->rtt_dev);
7839 peer->timeout.sec = 0;
7840 peer->timeout.usec = 0;
7841 peer->nSent = ntohl(peer->nSent);
7842 peer->reSends = ntohl(peer->reSends);
7843 peer->natMTU = ntohs(peer->natMTU);
7844 peer->maxMTU = ntohs(peer->maxMTU);
7845 peer->maxDgramPackets = ntohs(peer->maxDgramPackets);
7846 peer->ifDgramPackets = ntohs(peer->ifDgramPackets);
7847 peer->MTU = ntohs(peer->MTU);
7848 peer->cwind = ntohs(peer->cwind);
7849 peer->nDgramPackets = ntohs(peer->nDgramPackets);
7850 peer->congestSeq = ntohs(peer->congestSeq);
7851 peer->bytesSent.high = ntohl(peer->bytesSent.high);
7852 peer->bytesSent.low = ntohl(peer->bytesSent.low);
7853 peer->bytesReceived.high = ntohl(peer->bytesReceived.high);
7854 peer->bytesReceived.low = ntohl(peer->bytesReceived.low);
7863 rx_GetLocalPeers(afs_uint32 peerHost, afs_uint16 peerPort,
7864 struct rx_debugPeer * peerStats)
7867 afs_int32 error = 1; /* default to "did not succeed" */
7868 afs_uint32 hashValue = PEER_HASH(peerHost, peerPort);
7870 MUTEX_ENTER(&rx_peerHashTable_lock);
7871 for(tp = rx_peerHashTable[hashValue];
7872 tp != NULL; tp = tp->next) {
7873 if (tp->host == peerHost)
7879 MUTEX_EXIT(&rx_peerHashTable_lock);
7883 MUTEX_ENTER(&tp->peer_lock);
7884 peerStats->host = tp->host;
7885 peerStats->port = tp->port;
7886 peerStats->ifMTU = tp->ifMTU;
7887 peerStats->idleWhen = tp->idleWhen;
7888 peerStats->refCount = tp->refCount;
7889 peerStats->burstSize = 0;
7890 peerStats->burst = 0;
7891 peerStats->burstWait.sec = 0;
7892 peerStats->burstWait.usec = 0;
7893 peerStats->rtt = tp->rtt;
7894 peerStats->rtt_dev = tp->rtt_dev;
7895 peerStats->timeout.sec = 0;
7896 peerStats->timeout.usec = 0;
7897 peerStats->nSent = tp->nSent;
7898 peerStats->reSends = tp->reSends;
7899 peerStats->natMTU = tp->natMTU;
7900 peerStats->maxMTU = tp->maxMTU;
7901 peerStats->maxDgramPackets = tp->maxDgramPackets;
7902 peerStats->ifDgramPackets = tp->ifDgramPackets;
7903 peerStats->MTU = tp->MTU;
7904 peerStats->cwind = tp->cwind;
7905 peerStats->nDgramPackets = tp->nDgramPackets;
7906 peerStats->congestSeq = tp->congestSeq;
7907 peerStats->bytesSent.high = tp->bytesSent >> 32;
7908 peerStats->bytesSent.low = tp->bytesSent & MAX_AFS_UINT32;
7909 peerStats->bytesReceived.high = tp->bytesReceived >> 32;
7910 peerStats->bytesReceived.low
7911 = tp->bytesReceived & MAX_AFS_UINT32;
7912 MUTEX_EXIT(&tp->peer_lock);
7914 MUTEX_ENTER(&rx_peerHashTable_lock);
7917 MUTEX_EXIT(&rx_peerHashTable_lock);
7925 struct rx_serverQueueEntry *np;
7928 struct rx_call *call;
7929 struct rx_serverQueueEntry *sq;
7933 if (rxinit_status == 1) {
7935 return; /* Already shutdown. */
7939 #ifndef AFS_PTHREAD_ENV
7940 FD_ZERO(&rx_selectMask);
7941 #endif /* AFS_PTHREAD_ENV */
7942 rxi_dataQuota = RX_MAX_QUOTA;
7943 #ifndef AFS_PTHREAD_ENV
7945 #endif /* AFS_PTHREAD_ENV */
7948 #ifndef AFS_PTHREAD_ENV
7949 #ifndef AFS_USE_GETTIMEOFDAY
7951 #endif /* AFS_USE_GETTIMEOFDAY */
7952 #endif /* AFS_PTHREAD_ENV */
7954 while (!opr_queue_IsEmpty(&rx_freeCallQueue)) {
7955 call = opr_queue_First(&rx_freeCallQueue, struct rx_call, entry);
7956 opr_queue_Remove(&call->entry);
7957 rxi_Free(call, sizeof(struct rx_call));
7960 while (!opr_queue_IsEmpty(&rx_idleServerQueue)) {
7961 sq = opr_queue_First(&rx_idleServerQueue, struct rx_serverQueueEntry,
7963 opr_queue_Remove(&sq->entry);
7968 struct rx_peer **peer_ptr, **peer_end;
7969 for (peer_ptr = &rx_peerHashTable[0], peer_end =
7970 &rx_peerHashTable[rx_hashTableSize]; peer_ptr < peer_end;
7972 struct rx_peer *peer, *next;
7974 MUTEX_ENTER(&rx_peerHashTable_lock);
7975 for (peer = *peer_ptr; peer; peer = next) {
7976 struct opr_queue *cursor, *store;
7979 MUTEX_ENTER(&rx_rpc_stats);
7980 MUTEX_ENTER(&peer->peer_lock);
7981 for (opr_queue_ScanSafe(&peer->rpcStats, cursor, store)) {
7982 unsigned int num_funcs;
7983 struct rx_interface_stat *rpc_stat
7984 = opr_queue_Entry(cursor, struct rx_interface_stat,
7988 opr_queue_Remove(&rpc_stat->entry);
7989 opr_queue_Remove(&rpc_stat->entryPeers);
7990 num_funcs = rpc_stat->stats[0].func_total;
7992 sizeof(rx_interface_stat_t) +
7993 rpc_stat->stats[0].func_total *
7994 sizeof(rx_function_entry_v1_t);
7996 rxi_Free(rpc_stat, space);
7998 /* rx_rpc_stats must be held */
7999 rxi_rpc_peer_stat_cnt -= num_funcs;
8001 MUTEX_EXIT(&peer->peer_lock);
8002 MUTEX_EXIT(&rx_rpc_stats);
8006 if (rx_stats_active)
8007 rx_atomic_dec(&rx_stats.nPeerStructs);
8009 MUTEX_EXIT(&rx_peerHashTable_lock);
8012 for (i = 0; i < RX_MAX_SERVICES; i++) {
8014 rxi_Free(rx_services[i], sizeof(*rx_services[i]));
8016 for (i = 0; i < rx_hashTableSize; i++) {
8017 struct rx_connection *tc, *ntc;
8018 MUTEX_ENTER(&rx_connHashTable_lock);
8019 for (tc = rx_connHashTable[i]; tc; tc = ntc) {
8021 for (j = 0; j < RX_MAXCALLS; j++) {
8023 rxi_Free(tc->call[j], sizeof(*tc->call[j]));
8026 rxi_Free(tc, sizeof(*tc));
8028 MUTEX_EXIT(&rx_connHashTable_lock);
8031 MUTEX_ENTER(&freeSQEList_lock);
8033 while ((np = rx_FreeSQEList)) {
8034 rx_FreeSQEList = *(struct rx_serverQueueEntry **)np;
8035 MUTEX_DESTROY(&np->lock);
8036 rxi_Free(np, sizeof(*np));
8039 MUTEX_EXIT(&freeSQEList_lock);
8040 MUTEX_DESTROY(&freeSQEList_lock);
8041 MUTEX_DESTROY(&rx_freeCallQueue_lock);
8042 MUTEX_DESTROY(&rx_connHashTable_lock);
8043 MUTEX_DESTROY(&rx_peerHashTable_lock);
8044 MUTEX_DESTROY(&rx_serverPool_lock);
8046 osi_Free(rx_connHashTable,
8047 rx_hashTableSize * sizeof(struct rx_connection *));
8048 osi_Free(rx_peerHashTable, rx_hashTableSize * sizeof(struct rx_peer *));
8050 UNPIN(rx_connHashTable,
8051 rx_hashTableSize * sizeof(struct rx_connection *));
8052 UNPIN(rx_peerHashTable, rx_hashTableSize * sizeof(struct rx_peer *));
8054 rxi_FreeAllPackets();
8056 MUTEX_ENTER(&rx_quota_mutex);
8057 rxi_dataQuota = RX_MAX_QUOTA;
8058 rxi_availProcs = rxi_totalMin = rxi_minDeficit = 0;
8059 MUTEX_EXIT(&rx_quota_mutex);
8067 * Routines to implement connection specific data.
8071 rx_KeyCreate(rx_destructor_t rtn)
8074 MUTEX_ENTER(&rxi_keyCreate_lock);
8075 key = rxi_keyCreate_counter++;
8076 rxi_keyCreate_destructor = (rx_destructor_t *)
8077 realloc((void *)rxi_keyCreate_destructor,
8078 (key + 1) * sizeof(rx_destructor_t));
8079 rxi_keyCreate_destructor[key] = rtn;
8080 MUTEX_EXIT(&rxi_keyCreate_lock);
8085 rx_SetSpecific(struct rx_connection *conn, int key, void *ptr)
8088 MUTEX_ENTER(&conn->conn_data_lock);
8089 if (!conn->specific) {
8090 conn->specific = malloc((key + 1) * sizeof(void *));
8091 for (i = 0; i < key; i++)
8092 conn->specific[i] = NULL;
8093 conn->nSpecific = key + 1;
8094 conn->specific[key] = ptr;
8095 } else if (key >= conn->nSpecific) {
8096 conn->specific = (void **)
8097 realloc(conn->specific, (key + 1) * sizeof(void *));
8098 for (i = conn->nSpecific; i < key; i++)
8099 conn->specific[i] = NULL;
8100 conn->nSpecific = key + 1;
8101 conn->specific[key] = ptr;
8103 if (conn->specific[key] && rxi_keyCreate_destructor[key])
8104 (*rxi_keyCreate_destructor[key]) (conn->specific[key]);
8105 conn->specific[key] = ptr;
8107 MUTEX_EXIT(&conn->conn_data_lock);
8111 rx_SetServiceSpecific(struct rx_service *svc, int key, void *ptr)
8114 MUTEX_ENTER(&svc->svc_data_lock);
8115 if (!svc->specific) {
8116 svc->specific = malloc((key + 1) * sizeof(void *));
8117 for (i = 0; i < key; i++)
8118 svc->specific[i] = NULL;
8119 svc->nSpecific = key + 1;
8120 svc->specific[key] = ptr;
8121 } else if (key >= svc->nSpecific) {
8122 svc->specific = (void **)
8123 realloc(svc->specific, (key + 1) * sizeof(void *));
8124 for (i = svc->nSpecific; i < key; i++)
8125 svc->specific[i] = NULL;
8126 svc->nSpecific = key + 1;
8127 svc->specific[key] = ptr;
8129 if (svc->specific[key] && rxi_keyCreate_destructor[key])
8130 (*rxi_keyCreate_destructor[key]) (svc->specific[key]);
8131 svc->specific[key] = ptr;
8133 MUTEX_EXIT(&svc->svc_data_lock);
8137 rx_GetSpecific(struct rx_connection *conn, int key)
8140 MUTEX_ENTER(&conn->conn_data_lock);
8141 if (key >= conn->nSpecific)
8144 ptr = conn->specific[key];
8145 MUTEX_EXIT(&conn->conn_data_lock);
8150 rx_GetServiceSpecific(struct rx_service *svc, int key)
8153 MUTEX_ENTER(&svc->svc_data_lock);
8154 if (key >= svc->nSpecific)
8157 ptr = svc->specific[key];
8158 MUTEX_EXIT(&svc->svc_data_lock);
8163 #endif /* !KERNEL */
8166 * processStats is a queue used to store the statistics for the local
8167 * process. Its contents are similar to the contents of the rpcStats
8168 * queue on a rx_peer structure, but the actual data stored within
8169 * this queue contains totals across the lifetime of the process (assuming
8170 * the stats have not been reset) - unlike the per peer structures
8171 * which can come and go based upon the peer lifetime.
8174 static struct opr_queue processStats = { &processStats, &processStats };
8177 * peerStats is a queue used to store the statistics for all peer structs.
8178 * Its contents are the union of all the peer rpcStats queues.
8181 static struct opr_queue peerStats = { &peerStats, &peerStats };
8184 * rxi_monitor_processStats is used to turn process wide stat collection
8188 static int rxi_monitor_processStats = 0;
8191 * rxi_monitor_peerStats is used to turn per peer stat collection on and off
8194 static int rxi_monitor_peerStats = 0;
8198 rxi_ClearRPCOpStat(rx_function_entry_v1_p rpc_stat)
8200 rpc_stat->invocations = 0;
8201 rpc_stat->bytes_sent = 0;
8202 rpc_stat->bytes_rcvd = 0;
8203 rpc_stat->queue_time_sum.sec = 0;
8204 rpc_stat->queue_time_sum.usec = 0;
8205 rpc_stat->queue_time_sum_sqr.sec = 0;
8206 rpc_stat->queue_time_sum_sqr.usec = 0;
8207 rpc_stat->queue_time_min.sec = 9999999;
8208 rpc_stat->queue_time_min.usec = 9999999;
8209 rpc_stat->queue_time_max.sec = 0;
8210 rpc_stat->queue_time_max.usec = 0;
8211 rpc_stat->execution_time_sum.sec = 0;
8212 rpc_stat->execution_time_sum.usec = 0;
8213 rpc_stat->execution_time_sum_sqr.sec = 0;
8214 rpc_stat->execution_time_sum_sqr.usec = 0;
8215 rpc_stat->execution_time_min.sec = 9999999;
8216 rpc_stat->execution_time_min.usec = 9999999;
8217 rpc_stat->execution_time_max.sec = 0;
8218 rpc_stat->execution_time_max.usec = 0;
8222 * Given all of the information for a particular rpc
8223 * call, find or create (if requested) the stat structure for the rpc.
8226 * the queue of stats that will be updated with the new value
8228 * @param rxInterface
8229 * a unique number that identifies the rpc interface
8232 * the total number of functions in this interface. this is only
8233 * required if create is true
8236 * if true, this invocation was made to a server
8239 * the ip address of the remote host. this is only required if create
8240 * and addToPeerList are true
8243 * the port of the remote host. this is only required if create
8244 * and addToPeerList are true
8246 * @param addToPeerList
8247 * if != 0, add newly created stat to the global peer list
8250 * if a new stats structure is allocated, the counter will
8251 * be updated with the new number of allocated stat structures.
8252 * only required if create is true
8255 * if no stats structure exists, allocate one
8259 static rx_interface_stat_p
8260 rxi_FindRpcStat(struct opr_queue *stats, afs_uint32 rxInterface,
8261 afs_uint32 totalFunc, int isServer, afs_uint32 remoteHost,
8262 afs_uint32 remotePort, int addToPeerList,
8263 unsigned int *counter, int create)
8265 rx_interface_stat_p rpc_stat = NULL;
8266 struct opr_queue *cursor;
8269 * See if there's already a structure for this interface
8272 for (opr_queue_Scan(stats, cursor)) {
8273 rpc_stat = opr_queue_Entry(cursor, struct rx_interface_stat, entry);
8275 if ((rpc_stat->stats[0].interfaceId == rxInterface)
8276 && (rpc_stat->stats[0].remote_is_server == isServer))
8280 /* if they didn't ask us to create, we're done */
8282 if (opr_queue_IsEnd(stats, cursor))
8288 /* can't proceed without these */
8289 if (!totalFunc || !counter)
8293 * Didn't find a match so allocate a new structure and add it to the
8297 if (opr_queue_IsEnd(stats, cursor) || (rpc_stat == NULL)
8298 || (rpc_stat->stats[0].interfaceId != rxInterface)
8299 || (rpc_stat->stats[0].remote_is_server != isServer)) {
8304 sizeof(rx_interface_stat_t) +
8305 totalFunc * sizeof(rx_function_entry_v1_t);
8307 rpc_stat = rxi_Alloc(space);
8308 if (rpc_stat == NULL)
8311 *counter += totalFunc;
8312 for (i = 0; i < totalFunc; i++) {
8313 rxi_ClearRPCOpStat(&(rpc_stat->stats[i]));
8314 rpc_stat->stats[i].remote_peer = remoteHost;
8315 rpc_stat->stats[i].remote_port = remotePort;
8316 rpc_stat->stats[i].remote_is_server = isServer;
8317 rpc_stat->stats[i].interfaceId = rxInterface;
8318 rpc_stat->stats[i].func_total = totalFunc;
8319 rpc_stat->stats[i].func_index = i;
8321 opr_queue_Prepend(stats, &rpc_stat->entry);
8322 if (addToPeerList) {
8323 opr_queue_Prepend(&peerStats, &rpc_stat->entryPeers);
8330 rx_ClearProcessRPCStats(afs_int32 rxInterface)
8332 rx_interface_stat_p rpc_stat;
8335 if (rxInterface == -1)
8338 MUTEX_ENTER(&rx_rpc_stats);
8339 rpc_stat = rxi_FindRpcStat(&processStats, rxInterface, 0, 0,
8342 totalFunc = rpc_stat->stats[0].func_total;
8343 for (i = 0; i < totalFunc; i++)
8344 rxi_ClearRPCOpStat(&(rpc_stat->stats[i]));
8346 MUTEX_EXIT(&rx_rpc_stats);
8351 rx_ClearPeerRPCStats(afs_int32 rxInterface, afs_uint32 peerHost, afs_uint16 peerPort)
8353 rx_interface_stat_p rpc_stat;
8355 struct rx_peer * peer;
8357 if (rxInterface == -1)
8360 peer = rxi_FindPeer(peerHost, peerPort, 0);
8364 MUTEX_ENTER(&rx_rpc_stats);
8365 rpc_stat = rxi_FindRpcStat(&peer->rpcStats, rxInterface, 0, 1,
8368 totalFunc = rpc_stat->stats[0].func_total;
8369 for (i = 0; i < totalFunc; i++)
8370 rxi_ClearRPCOpStat(&(rpc_stat->stats[i]));
8372 MUTEX_EXIT(&rx_rpc_stats);
8377 rx_CopyProcessRPCStats(afs_uint64 op)
8379 rx_interface_stat_p rpc_stat;
8380 rx_function_entry_v1_p rpcop_stat =
8381 rxi_Alloc(sizeof(rx_function_entry_v1_t));
8382 int currentFunc = (op & MAX_AFS_UINT32);
8383 afs_int32 rxInterface = (op >> 32);
8385 if (!rxi_monitor_processStats)
8388 if (rxInterface == -1)
8391 if (rpcop_stat == NULL)
8394 MUTEX_ENTER(&rx_rpc_stats);
8395 rpc_stat = rxi_FindRpcStat(&processStats, rxInterface, 0, 0,
8398 memcpy(rpcop_stat, &(rpc_stat->stats[currentFunc]),
8399 sizeof(rx_function_entry_v1_t));
8400 MUTEX_EXIT(&rx_rpc_stats);
8402 rxi_Free(rpcop_stat, sizeof(rx_function_entry_v1_t));
8409 rx_CopyPeerRPCStats(afs_uint64 op, afs_uint32 peerHost, afs_uint16 peerPort)
8411 rx_interface_stat_p rpc_stat;
8412 rx_function_entry_v1_p rpcop_stat =
8413 rxi_Alloc(sizeof(rx_function_entry_v1_t));
8414 int currentFunc = (op & MAX_AFS_UINT32);
8415 afs_int32 rxInterface = (op >> 32);
8416 struct rx_peer *peer;
8418 if (!rxi_monitor_peerStats)
8421 if (rxInterface == -1)
8424 if (rpcop_stat == NULL)
8427 peer = rxi_FindPeer(peerHost, peerPort, 0);
8431 MUTEX_ENTER(&rx_rpc_stats);
8432 rpc_stat = rxi_FindRpcStat(&peer->rpcStats, rxInterface, 0, 1,
8435 memcpy(rpcop_stat, &(rpc_stat->stats[currentFunc]),
8436 sizeof(rx_function_entry_v1_t));
8437 MUTEX_EXIT(&rx_rpc_stats);
8439 rxi_Free(rpcop_stat, sizeof(rx_function_entry_v1_t));
8446 rx_ReleaseRPCStats(void *stats)
8449 rxi_Free(stats, sizeof(rx_function_entry_v1_t));
8453 * Given all of the information for a particular rpc
8454 * call, create (if needed) and update the stat totals for the rpc.
8457 * the queue of stats that will be updated with the new value
8459 * @param rxInterface
8460 * a unique number that identifies the rpc interface
8462 * @param currentFunc
8463 * the index of the function being invoked
8466 * the total number of functions in this interface
8469 * the amount of time this function waited for a thread
8472 * the amount of time this function invocation took to execute
8475 * the number bytes sent by this invocation
8478 * the number bytes received by this invocation
8481 * if true, this invocation was made to a server
8484 * the ip address of the remote host
8487 * the port of the remote host
8489 * @param addToPeerList
8490 * if != 0, add newly created stat to the global peer list
8493 * if a new stats structure is allocated, the counter will
8494 * be updated with the new number of allocated stat structures
8499 rxi_AddRpcStat(struct opr_queue *stats, afs_uint32 rxInterface,
8500 afs_uint32 currentFunc, afs_uint32 totalFunc,
8501 struct clock *queueTime, struct clock *execTime,
8502 afs_uint64 bytesSent, afs_uint64 bytesRcvd, int isServer,
8503 afs_uint32 remoteHost, afs_uint32 remotePort,
8504 int addToPeerList, unsigned int *counter)
8507 rx_interface_stat_p rpc_stat;
8509 rpc_stat = rxi_FindRpcStat(stats, rxInterface, totalFunc, isServer,
8510 remoteHost, remotePort, addToPeerList, counter,
8518 * Increment the stats for this function
8521 rpc_stat->stats[currentFunc].invocations++;
8522 rpc_stat->stats[currentFunc].bytes_sent += bytesSent;
8523 rpc_stat->stats[currentFunc].bytes_rcvd += bytesRcvd;
8524 clock_Add(&rpc_stat->stats[currentFunc].queue_time_sum, queueTime);
8525 clock_AddSq(&rpc_stat->stats[currentFunc].queue_time_sum_sqr, queueTime);
8526 if (clock_Lt(queueTime, &rpc_stat->stats[currentFunc].queue_time_min)) {
8527 rpc_stat->stats[currentFunc].queue_time_min = *queueTime;
8529 if (clock_Gt(queueTime, &rpc_stat->stats[currentFunc].queue_time_max)) {
8530 rpc_stat->stats[currentFunc].queue_time_max = *queueTime;
8532 clock_Add(&rpc_stat->stats[currentFunc].execution_time_sum, execTime);
8533 clock_AddSq(&rpc_stat->stats[currentFunc].execution_time_sum_sqr,
8535 if (clock_Lt(execTime, &rpc_stat->stats[currentFunc].execution_time_min)) {
8536 rpc_stat->stats[currentFunc].execution_time_min = *execTime;
8538 if (clock_Gt(execTime, &rpc_stat->stats[currentFunc].execution_time_max)) {
8539 rpc_stat->stats[currentFunc].execution_time_max = *execTime;
8547 rxi_IncrementTimeAndCount(struct rx_peer *peer, afs_uint32 rxInterface,
8548 afs_uint32 currentFunc, afs_uint32 totalFunc,
8549 struct clock *queueTime, struct clock *execTime,
8550 afs_uint64 bytesSent, afs_uint64 bytesRcvd,
8554 if (!(rxi_monitor_peerStats || rxi_monitor_processStats))
8557 MUTEX_ENTER(&rx_rpc_stats);
8559 if (rxi_monitor_peerStats) {
8560 MUTEX_ENTER(&peer->peer_lock);
8561 rxi_AddRpcStat(&peer->rpcStats, rxInterface, currentFunc, totalFunc,
8562 queueTime, execTime, bytesSent, bytesRcvd, isServer,
8563 peer->host, peer->port, 1, &rxi_rpc_peer_stat_cnt);
8564 MUTEX_EXIT(&peer->peer_lock);
8567 if (rxi_monitor_processStats) {
8568 rxi_AddRpcStat(&processStats, rxInterface, currentFunc, totalFunc,
8569 queueTime, execTime, bytesSent, bytesRcvd, isServer,
8570 0xffffffff, 0xffffffff, 0, &rxi_rpc_process_stat_cnt);
8573 MUTEX_EXIT(&rx_rpc_stats);
8577 * Increment the times and count for a particular rpc function.
8579 * Traditionally this call was invoked from rxgen stubs. Modern stubs
8580 * call rx_RecordCallStatistics instead, so the public version of this
8581 * function is left purely for legacy callers.
8584 * The peer who invoked the rpc
8586 * @param rxInterface
8587 * A unique number that identifies the rpc interface
8589 * @param currentFunc
8590 * The index of the function being invoked
8593 * The total number of functions in this interface
8596 * The amount of time this function waited for a thread
8599 * The amount of time this function invocation took to execute
8602 * The number bytes sent by this invocation
8605 * The number bytes received by this invocation
8608 * If true, this invocation was made to a server
8612 rx_IncrementTimeAndCount(struct rx_peer *peer, afs_uint32 rxInterface,
8613 afs_uint32 currentFunc, afs_uint32 totalFunc,
8614 struct clock *queueTime, struct clock *execTime,
8615 afs_hyper_t * bytesSent, afs_hyper_t * bytesRcvd,
8621 sent64 = ((afs_uint64)bytesSent->high << 32) + bytesSent->low;
8622 rcvd64 = ((afs_uint64)bytesRcvd->high << 32) + bytesRcvd->low;
8624 rxi_IncrementTimeAndCount(peer, rxInterface, currentFunc, totalFunc,
8625 queueTime, execTime, sent64, rcvd64,
8632 * rx_MarshallProcessRPCStats - marshall an array of rpc statistics
8636 * IN callerVersion - the rpc stat version of the caller.
8638 * IN count - the number of entries to marshall.
8640 * IN stats - pointer to stats to be marshalled.
8642 * OUT ptr - Where to store the marshalled data.
8649 rx_MarshallProcessRPCStats(afs_uint32 callerVersion, int count,
8650 rx_function_entry_v1_t * stats, afs_uint32 ** ptrP)
8656 * We only support the first version
8658 for (ptr = *ptrP, i = 0; i < count; i++, stats++) {
8659 *(ptr++) = stats->remote_peer;
8660 *(ptr++) = stats->remote_port;
8661 *(ptr++) = stats->remote_is_server;
8662 *(ptr++) = stats->interfaceId;
8663 *(ptr++) = stats->func_total;
8664 *(ptr++) = stats->func_index;
8665 *(ptr++) = stats->invocations >> 32;
8666 *(ptr++) = stats->invocations & MAX_AFS_UINT32;
8667 *(ptr++) = stats->bytes_sent >> 32;
8668 *(ptr++) = stats->bytes_sent & MAX_AFS_UINT32;
8669 *(ptr++) = stats->bytes_rcvd >> 32;
8670 *(ptr++) = stats->bytes_rcvd & MAX_AFS_UINT32;
8671 *(ptr++) = stats->queue_time_sum.sec;
8672 *(ptr++) = stats->queue_time_sum.usec;
8673 *(ptr++) = stats->queue_time_sum_sqr.sec;
8674 *(ptr++) = stats->queue_time_sum_sqr.usec;
8675 *(ptr++) = stats->queue_time_min.sec;
8676 *(ptr++) = stats->queue_time_min.usec;
8677 *(ptr++) = stats->queue_time_max.sec;
8678 *(ptr++) = stats->queue_time_max.usec;
8679 *(ptr++) = stats->execution_time_sum.sec;
8680 *(ptr++) = stats->execution_time_sum.usec;
8681 *(ptr++) = stats->execution_time_sum_sqr.sec;
8682 *(ptr++) = stats->execution_time_sum_sqr.usec;
8683 *(ptr++) = stats->execution_time_min.sec;
8684 *(ptr++) = stats->execution_time_min.usec;
8685 *(ptr++) = stats->execution_time_max.sec;
8686 *(ptr++) = stats->execution_time_max.usec;
8692 * rx_RetrieveProcessRPCStats - retrieve all of the rpc statistics for
8697 * IN callerVersion - the rpc stat version of the caller
8699 * OUT myVersion - the rpc stat version of this function
8701 * OUT clock_sec - local time seconds
8703 * OUT clock_usec - local time microseconds
8705 * OUT allocSize - the number of bytes allocated to contain stats
8707 * OUT statCount - the number stats retrieved from this process.
8709 * OUT stats - the actual stats retrieved from this process.
8713 * Returns void. If successful, stats will != NULL.
8717 rx_RetrieveProcessRPCStats(afs_uint32 callerVersion, afs_uint32 * myVersion,
8718 afs_uint32 * clock_sec, afs_uint32 * clock_usec,
8719 size_t * allocSize, afs_uint32 * statCount,
8720 afs_uint32 ** stats)
8730 *myVersion = RX_STATS_RETRIEVAL_VERSION;
8733 * Check to see if stats are enabled
8736 MUTEX_ENTER(&rx_rpc_stats);
8737 if (!rxi_monitor_processStats) {
8738 MUTEX_EXIT(&rx_rpc_stats);
8742 clock_GetTime(&now);
8743 *clock_sec = now.sec;
8744 *clock_usec = now.usec;
8747 * Allocate the space based upon the caller version
8749 * If the client is at an older version than we are,
8750 * we return the statistic data in the older data format, but
8751 * we still return our version number so the client knows we
8752 * are maintaining more data than it can retrieve.
8755 if (callerVersion >= RX_STATS_RETRIEVAL_FIRST_EDITION) {
8756 space = rxi_rpc_process_stat_cnt * sizeof(rx_function_entry_v1_t);
8757 *statCount = rxi_rpc_process_stat_cnt;
8760 * This can't happen yet, but in the future version changes
8761 * can be handled by adding additional code here
8765 if (space > (size_t) 0) {
8767 ptr = *stats = rxi_Alloc(space);
8770 struct opr_queue *cursor;
8772 for (opr_queue_Scan(&processStats, cursor)) {
8773 struct rx_interface_stat *rpc_stat =
8774 opr_queue_Entry(cursor, struct rx_interface_stat, entry);
8776 * Copy the data based upon the caller version
8778 rx_MarshallProcessRPCStats(callerVersion,
8779 rpc_stat->stats[0].func_total,
8780 rpc_stat->stats, &ptr);
8786 MUTEX_EXIT(&rx_rpc_stats);
8791 * rx_RetrievePeerRPCStats - retrieve all of the rpc statistics for the peers
8795 * IN callerVersion - the rpc stat version of the caller
8797 * OUT myVersion - the rpc stat version of this function
8799 * OUT clock_sec - local time seconds
8801 * OUT clock_usec - local time microseconds
8803 * OUT allocSize - the number of bytes allocated to contain stats
8805 * OUT statCount - the number of stats retrieved from the individual
8808 * OUT stats - the actual stats retrieved from the individual peer structures.
8812 * Returns void. If successful, stats will != NULL.
8816 rx_RetrievePeerRPCStats(afs_uint32 callerVersion, afs_uint32 * myVersion,
8817 afs_uint32 * clock_sec, afs_uint32 * clock_usec,
8818 size_t * allocSize, afs_uint32 * statCount,
8819 afs_uint32 ** stats)
8829 *myVersion = RX_STATS_RETRIEVAL_VERSION;
8832 * Check to see if stats are enabled
8835 MUTEX_ENTER(&rx_rpc_stats);
8836 if (!rxi_monitor_peerStats) {
8837 MUTEX_EXIT(&rx_rpc_stats);
8841 clock_GetTime(&now);
8842 *clock_sec = now.sec;
8843 *clock_usec = now.usec;
8846 * Allocate the space based upon the caller version
8848 * If the client is at an older version than we are,
8849 * we return the statistic data in the older data format, but
8850 * we still return our version number so the client knows we
8851 * are maintaining more data than it can retrieve.
8854 if (callerVersion >= RX_STATS_RETRIEVAL_FIRST_EDITION) {
8855 space = rxi_rpc_peer_stat_cnt * sizeof(rx_function_entry_v1_t);
8856 *statCount = rxi_rpc_peer_stat_cnt;
8859 * This can't happen yet, but in the future version changes
8860 * can be handled by adding additional code here
8864 if (space > (size_t) 0) {
8866 ptr = *stats = rxi_Alloc(space);
8869 struct opr_queue *cursor;
8871 for (opr_queue_Scan(&peerStats, cursor)) {
8872 struct rx_interface_stat *rpc_stat
8873 = opr_queue_Entry(cursor, struct rx_interface_stat,
8877 * Copy the data based upon the caller version
8879 rx_MarshallProcessRPCStats(callerVersion,
8880 rpc_stat->stats[0].func_total,
8881 rpc_stat->stats, &ptr);
8887 MUTEX_EXIT(&rx_rpc_stats);
8892 * rx_FreeRPCStats - free memory allocated by
8893 * rx_RetrieveProcessRPCStats and rx_RetrievePeerRPCStats
8897 * IN stats - stats previously returned by rx_RetrieveProcessRPCStats or
8898 * rx_RetrievePeerRPCStats
8900 * IN allocSize - the number of bytes in stats.
8908 rx_FreeRPCStats(afs_uint32 * stats, size_t allocSize)
8910 rxi_Free(stats, allocSize);
8914 * rx_queryProcessRPCStats - see if process rpc stat collection is
8915 * currently enabled.
8921 * Returns 0 if stats are not enabled != 0 otherwise
8925 rx_queryProcessRPCStats(void)
8928 MUTEX_ENTER(&rx_rpc_stats);
8929 rc = rxi_monitor_processStats;
8930 MUTEX_EXIT(&rx_rpc_stats);
8935 * rx_queryPeerRPCStats - see if peer stat collection is currently enabled.
8941 * Returns 0 if stats are not enabled != 0 otherwise
8945 rx_queryPeerRPCStats(void)
8948 MUTEX_ENTER(&rx_rpc_stats);
8949 rc = rxi_monitor_peerStats;
8950 MUTEX_EXIT(&rx_rpc_stats);
8955 * rx_enableProcessRPCStats - begin rpc stat collection for entire process
8965 rx_enableProcessRPCStats(void)
8967 MUTEX_ENTER(&rx_rpc_stats);
8968 rx_enable_stats = 1;
8969 rxi_monitor_processStats = 1;
8970 MUTEX_EXIT(&rx_rpc_stats);
8974 * rx_enablePeerRPCStats - begin rpc stat collection per peer structure
8984 rx_enablePeerRPCStats(void)
8986 MUTEX_ENTER(&rx_rpc_stats);
8987 rx_enable_stats = 1;
8988 rxi_monitor_peerStats = 1;
8989 MUTEX_EXIT(&rx_rpc_stats);
8993 * rx_disableProcessRPCStats - stop rpc stat collection for entire process
9003 rx_disableProcessRPCStats(void)
9005 struct opr_queue *cursor, *store;
9008 MUTEX_ENTER(&rx_rpc_stats);
9011 * Turn off process statistics and if peer stats is also off, turn
9015 rxi_monitor_processStats = 0;
9016 if (rxi_monitor_peerStats == 0) {
9017 rx_enable_stats = 0;
9020 for (opr_queue_ScanSafe(&processStats, cursor, store)) {
9021 unsigned int num_funcs = 0;
9022 struct rx_interface_stat *rpc_stat
9023 = opr_queue_Entry(cursor, struct rx_interface_stat, entry);
9025 opr_queue_Remove(&rpc_stat->entry);
9027 num_funcs = rpc_stat->stats[0].func_total;
9029 sizeof(rx_interface_stat_t) +
9030 rpc_stat->stats[0].func_total * sizeof(rx_function_entry_v1_t);
9032 rxi_Free(rpc_stat, space);
9033 rxi_rpc_process_stat_cnt -= num_funcs;
9035 MUTEX_EXIT(&rx_rpc_stats);
9039 * rx_disablePeerRPCStats - stop rpc stat collection for peers
9049 rx_disablePeerRPCStats(void)
9051 struct rx_peer **peer_ptr, **peer_end;
9055 * Turn off peer statistics and if process stats is also off, turn
9059 rxi_monitor_peerStats = 0;
9060 if (rxi_monitor_processStats == 0) {
9061 rx_enable_stats = 0;
9064 for (peer_ptr = &rx_peerHashTable[0], peer_end =
9065 &rx_peerHashTable[rx_hashTableSize]; peer_ptr < peer_end;
9067 struct rx_peer *peer, *next, *prev;
9069 MUTEX_ENTER(&rx_peerHashTable_lock);
9070 MUTEX_ENTER(&rx_rpc_stats);
9071 for (prev = peer = *peer_ptr; peer; peer = next) {
9073 code = MUTEX_TRYENTER(&peer->peer_lock);
9076 struct opr_queue *cursor, *store;
9078 if (prev == *peer_ptr) {
9089 MUTEX_EXIT(&rx_peerHashTable_lock);
9091 for (opr_queue_ScanSafe(&peer->rpcStats, cursor, store)) {
9092 unsigned int num_funcs = 0;
9093 struct rx_interface_stat *rpc_stat
9094 = opr_queue_Entry(cursor, struct rx_interface_stat,
9097 opr_queue_Remove(&rpc_stat->entry);
9098 opr_queue_Remove(&rpc_stat->entryPeers);
9099 num_funcs = rpc_stat->stats[0].func_total;
9101 sizeof(rx_interface_stat_t) +
9102 rpc_stat->stats[0].func_total *
9103 sizeof(rx_function_entry_v1_t);
9105 rxi_Free(rpc_stat, space);
9106 rxi_rpc_peer_stat_cnt -= num_funcs;
9108 MUTEX_EXIT(&peer->peer_lock);
9110 MUTEX_ENTER(&rx_peerHashTable_lock);
9120 MUTEX_EXIT(&rx_rpc_stats);
9121 MUTEX_EXIT(&rx_peerHashTable_lock);
9126 * rx_clearProcessRPCStats - clear the contents of the rpc stats according
9131 * IN clearFlag - flag indicating which stats to clear
9139 rx_clearProcessRPCStats(afs_uint32 clearFlag)
9141 struct opr_queue *cursor;
9143 MUTEX_ENTER(&rx_rpc_stats);
9145 for (opr_queue_Scan(&processStats, cursor)) {
9146 unsigned int num_funcs = 0, i;
9147 struct rx_interface_stat *rpc_stat
9148 = opr_queue_Entry(rpc_stat, struct rx_interface_stat, entry);
9150 num_funcs = rpc_stat->stats[0].func_total;
9151 for (i = 0; i < num_funcs; i++) {
9152 if (clearFlag & AFS_RX_STATS_CLEAR_INVOCATIONS) {
9153 rpc_stat->stats[i].invocations = 0;
9155 if (clearFlag & AFS_RX_STATS_CLEAR_BYTES_SENT) {
9156 rpc_stat->stats[i].bytes_sent = 0;
9158 if (clearFlag & AFS_RX_STATS_CLEAR_BYTES_RCVD) {
9159 rpc_stat->stats[i].bytes_rcvd = 0;
9161 if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_SUM) {
9162 rpc_stat->stats[i].queue_time_sum.sec = 0;
9163 rpc_stat->stats[i].queue_time_sum.usec = 0;
9165 if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_SQUARE) {
9166 rpc_stat->stats[i].queue_time_sum_sqr.sec = 0;
9167 rpc_stat->stats[i].queue_time_sum_sqr.usec = 0;
9169 if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_MIN) {
9170 rpc_stat->stats[i].queue_time_min.sec = 9999999;
9171 rpc_stat->stats[i].queue_time_min.usec = 9999999;
9173 if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_MAX) {
9174 rpc_stat->stats[i].queue_time_max.sec = 0;
9175 rpc_stat->stats[i].queue_time_max.usec = 0;
9177 if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_SUM) {
9178 rpc_stat->stats[i].execution_time_sum.sec = 0;
9179 rpc_stat->stats[i].execution_time_sum.usec = 0;
9181 if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_SQUARE) {
9182 rpc_stat->stats[i].execution_time_sum_sqr.sec = 0;
9183 rpc_stat->stats[i].execution_time_sum_sqr.usec = 0;
9185 if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_MIN) {
9186 rpc_stat->stats[i].execution_time_min.sec = 9999999;
9187 rpc_stat->stats[i].execution_time_min.usec = 9999999;
9189 if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_MAX) {
9190 rpc_stat->stats[i].execution_time_max.sec = 0;
9191 rpc_stat->stats[i].execution_time_max.usec = 0;
9196 MUTEX_EXIT(&rx_rpc_stats);
9200 * rx_clearPeerRPCStats - clear the contents of the rpc stats according
9205 * IN clearFlag - flag indicating which stats to clear
9213 rx_clearPeerRPCStats(afs_uint32 clearFlag)
9215 struct opr_queue *cursor;
9217 MUTEX_ENTER(&rx_rpc_stats);
9219 for (opr_queue_Scan(&peerStats, cursor)) {
9220 unsigned int num_funcs, i;
9221 struct rx_interface_stat *rpc_stat
9222 = opr_queue_Entry(cursor, struct rx_interface_stat, entryPeers);
9224 num_funcs = rpc_stat->stats[0].func_total;
9225 for (i = 0; i < num_funcs; i++) {
9226 if (clearFlag & AFS_RX_STATS_CLEAR_INVOCATIONS) {
9227 rpc_stat->stats[i].invocations = 0;
9229 if (clearFlag & AFS_RX_STATS_CLEAR_BYTES_SENT) {
9230 rpc_stat->stats[i].bytes_sent = 0;
9232 if (clearFlag & AFS_RX_STATS_CLEAR_BYTES_RCVD) {
9233 rpc_stat->stats[i].bytes_rcvd = 0;
9235 if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_SUM) {
9236 rpc_stat->stats[i].queue_time_sum.sec = 0;
9237 rpc_stat->stats[i].queue_time_sum.usec = 0;
9239 if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_SQUARE) {
9240 rpc_stat->stats[i].queue_time_sum_sqr.sec = 0;
9241 rpc_stat->stats[i].queue_time_sum_sqr.usec = 0;
9243 if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_MIN) {
9244 rpc_stat->stats[i].queue_time_min.sec = 9999999;
9245 rpc_stat->stats[i].queue_time_min.usec = 9999999;
9247 if (clearFlag & AFS_RX_STATS_CLEAR_QUEUE_TIME_MAX) {
9248 rpc_stat->stats[i].queue_time_max.sec = 0;
9249 rpc_stat->stats[i].queue_time_max.usec = 0;
9251 if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_SUM) {
9252 rpc_stat->stats[i].execution_time_sum.sec = 0;
9253 rpc_stat->stats[i].execution_time_sum.usec = 0;
9255 if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_SQUARE) {
9256 rpc_stat->stats[i].execution_time_sum_sqr.sec = 0;
9257 rpc_stat->stats[i].execution_time_sum_sqr.usec = 0;
9259 if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_MIN) {
9260 rpc_stat->stats[i].execution_time_min.sec = 9999999;
9261 rpc_stat->stats[i].execution_time_min.usec = 9999999;
9263 if (clearFlag & AFS_RX_STATS_CLEAR_EXEC_TIME_MAX) {
9264 rpc_stat->stats[i].execution_time_max.sec = 0;
9265 rpc_stat->stats[i].execution_time_max.usec = 0;
9270 MUTEX_EXIT(&rx_rpc_stats);
9274 * rxi_rxstat_userok points to a routine that returns 1 if the caller
9275 * is authorized to enable/disable/clear RX statistics.
9277 static int (*rxi_rxstat_userok) (struct rx_call * call) = NULL;
9280 rx_SetRxStatUserOk(int (*proc) (struct rx_call * call))
9282 rxi_rxstat_userok = proc;
9286 rx_RxStatUserOk(struct rx_call *call)
9288 if (!rxi_rxstat_userok)
9290 return rxi_rxstat_userok(call);
9295 * DllMain() -- Entry-point function called by the DllMainCRTStartup()
9296 * function in the MSVC runtime DLL (msvcrt.dll).
9298 * Note: the system serializes calls to this function.
9301 DllMain(HINSTANCE dllInstHandle, /* instance handle for this DLL module */
9302 DWORD reason, /* reason function is being called */
9303 LPVOID reserved) /* reserved for future use */
9306 case DLL_PROCESS_ATTACH:
9307 /* library is being attached to a process */
9311 case DLL_PROCESS_DETACH:
9318 #endif /* AFS_NT40_ENV */
9321 int rx_DumpCalls(FILE *outputFile, char *cookie)
9323 #ifdef RXDEBUG_PACKET
9324 #ifdef KDUMP_RX_LOCK
9325 struct rx_call_rx_lock *c;
9332 #define RXDPRINTF sprintf
9333 #define RXDPRINTOUT output
9335 #define RXDPRINTF fprintf
9336 #define RXDPRINTOUT outputFile
9339 RXDPRINTF(RXDPRINTOUT, "%s - Start dumping all Rx Calls - count=%u\r\n", cookie, rx_stats.nCallStructs);
9341 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
9344 for (c = rx_allCallsp; c; c = c->allNextp) {
9345 u_short rqc, tqc, iovqc;
9347 MUTEX_ENTER(&c->lock);
9348 rqc = opr_queue_Count(&c->rq);
9349 tqc = opr_queue_Count(&c->tq);
9350 iovqc = opr_queue_Count(&c->app.iovq);
9352 RXDPRINTF(RXDPRINTOUT, "%s - call=0x%p, id=%u, state=%u, mode=%u, conn=%p, epoch=%u, cid=%u, callNum=%u, connFlags=0x%x, flags=0x%x, "
9353 "rqc=%u,%u, tqc=%u,%u, iovqc=%u,%u, "
9354 "lstatus=%u, rstatus=%u, error=%d, timeout=%u, "
9355 "resendEvent=%d, keepAliveEvt=%d, delayedAckEvt=%d, delayedAbortEvt=%d, abortCode=%d, abortCount=%d, "
9356 "lastSendTime=%u, lastRecvTime=%u, lastSendData=%u"
9357 #ifdef RX_ENABLE_LOCKS
9360 #ifdef RX_REFCOUNT_CHECK
9361 ", refCountBegin=%u, refCountResend=%u, refCountDelay=%u, "
9362 "refCountAlive=%u, refCountPacket=%u, refCountSend=%u, refCountAckAll=%u, refCountAbort=%u"
9365 cookie, c, c->call_id, (afs_uint32)c->state, (afs_uint32)c->app.mode, c->conn, c->conn?c->conn->epoch:0, c->conn?c->conn->cid:0,
9366 c->callNumber?*c->callNumber:0, c->conn?c->conn->flags:0, c->flags,
9367 (afs_uint32)c->rqc, (afs_uint32)rqc, (afs_uint32)c->tqc, (afs_uint32)tqc, (afs_uint32)c->iovqc, (afs_uint32)iovqc,
9368 (afs_uint32)c->localStatus, (afs_uint32)c->remoteStatus, c->error, c->timeout,
9369 c->resendEvent?1:0, c->keepAliveEvent?1:0, c->delayedAckEvent?1:0, c->delayedAbortEvent?1:0,
9370 c->abortCode, c->abortCount, c->lastSendTime, c->lastReceiveTime, c->lastSendData
9371 #ifdef RX_ENABLE_LOCKS
9372 , (afs_uint32)c->refCount
9374 #ifdef RX_REFCOUNT_CHECK
9375 , c->refCDebug[0],c->refCDebug[1],c->refCDebug[2],c->refCDebug[3],c->refCDebug[4],c->refCDebug[5],c->refCDebug[6],c->refCDebug[7]
9378 MUTEX_EXIT(&c->lock);
9381 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
9384 RXDPRINTF(RXDPRINTOUT, "%s - End dumping all Rx Calls\r\n", cookie);
9386 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
9388 #endif /* RXDEBUG_PACKET */