2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
11 /* kdump for SGI needs MP and SP versions of rx_serverQueueEntry,
12 * rx_peer, rx_connection and rx_call structs. rx.h gets included a
13 * second time to pick up mp_ versions of those structs. Currently
14 * the affected struct's have #ifdef's in them for the second pass.
15 * This should change once we start using only ANSI compilers.
16 * Actually, kdump does not use rx_serverQueueEntry, but I'm including
17 * it for completeness.
26 /* Substitute VOID (char) for void, because some compilers are confused by void
27 * in some situations */
33 #include "rx_kmutex.h"
34 #include "rx_kernel.h"
38 #include "rx_packet.h"
41 #if defined (AFS_OBSD_ENV) && !defined (MLEN)
44 #include "netinet/in.h"
45 #include "sys/socket.h"
47 # include <sys/types.h>
49 #ifdef AFS_PTHREAD_ENV
50 # include "rx_pthread.h"
58 # include "rx_clock.h"
59 # include "rx_event.h"
60 # include "rx_packet.h"
63 # include "rx_multi.h"
65 # include <netinet/in.h>
66 # include <sys/socket.h>
71 /* Configurable parameters */
72 #define RX_IDLE_DEAD_TIME 60 /* default idle dead time */
73 #define RX_MAX_SERVICES 20 /* Maximum number of services that may be installed */
74 #if defined(KERNEL) && defined(AFS_AIX51_ENV) && defined(__64__)
75 #define RX_DEFAULT_STACK_SIZE 24000
77 #define RX_DEFAULT_STACK_SIZE 16000 /* Default process stack size; overriden by rx_SetStackSize */
80 /* This parameter should not normally be changed */
81 #define RX_PROCESS_PRIORITY LWP_NORMAL_PRIORITY
83 /* backoff is fixed point binary. Ie, units of 1/4 seconds */
84 #define MAXBACKOFF 0x1F
86 #define ADDRSPERSITE 16
89 /* Bottom n-bits of the Call Identifier give the call number */
90 #define RX_MAXCALLS 4 /* Power of 2; max async calls per connection */
91 #define RX_CIDSHIFT 2 /* Log2(RX_MAXCALLS) */
92 #define RX_CHANNELMASK (RX_MAXCALLS-1)
93 #define RX_CIDMASK (~RX_CHANNELMASK)
94 #endif /* !KDUMP_RX_LOCK */
97 typedef void (*rx_destructor_t)(void *);
98 int rx_KeyCreate(rx_destructor_t);
99 osi_socket rxi_GetUDPSocket(u_short port);
103 int ntoh_syserr_conv(int error);
106 #define RX_DONTWAIT 0
108 #define rx_ConnectionOf(call) ((call)->conn)
109 #define rx_PeerOf(conn) ((conn)->peer)
110 #define rx_HostOf(peer) ((peer)->host)
111 #define rx_PortOf(peer) ((peer)->port)
112 #define rx_SetLocalStatus(call, status) ((call)->localStatus = (status))
113 #define rx_GetLocalStatus(call, status) ((call)->localStatus)
114 #define rx_GetRemoteStatus(call) ((call)->remoteStatus)
115 #define rx_Error(call) ((call)->error)
116 #define rx_ConnError(conn) ((conn)->error)
117 #define rx_IsServerConn(conn) ((conn)->type == RX_SERVER_CONNECTION)
118 #define rx_IsClientConn(conn) ((conn)->type == RX_CLIENT_CONNECTION)
119 /* Don't use these; use the IsServerConn style */
120 #define rx_ServerConn(conn) ((conn)->type == RX_SERVER_CONNECTION)
121 #define rx_ClientConn(conn) ((conn)->type == RX_CLIENT_CONNECTION)
122 #define rx_IsUsingPktCksum(conn) ((conn)->flags & RX_CONN_USING_PACKET_CKSUM)
123 #define rx_ServiceIdOf(conn) ((conn)->serviceId)
124 #define rx_SecurityClassOf(conn) ((conn)->securityIndex)
125 #define rx_SecurityObjectOf(conn) ((conn)->securityObject)
128 * Macros callable by the user to further define attributes of a
129 * service. Must be called before rx_StartServer
132 /* Set the service stack size. This currently just sets the stack
133 * size for all processes to be the maximum seen, so far */
134 #define rx_SetStackSize(service, stackSize) \
135 rx_stackSize = (((stackSize) > rx_stackSize)? stackSize: rx_stackSize)
137 /* Set minimum number of processes guaranteed to be available for this
138 * service at all times */
139 #define rx_SetMinProcs(service, min) ((service)->minProcs = (min))
141 /* Set maximum number of processes that will be made available to this
142 * service (also a guarantee that this number will be made available
143 * if there is no competition) */
144 #define rx_SetMaxProcs(service, max) ((service)->maxProcs = (max))
146 /* Define a procedure to be called just before a server connection is destroyed */
147 #define rx_SetDestroyConnProc(service,proc) ((service)->destroyConnProc = (proc))
149 /* Define procedure to set service dead time */
150 #define rx_SetIdleDeadTime(service,time) ((service)->idleDeadTime = (time))
152 /* Define procedures for getting and setting before and after execute-request procs */
153 #define rx_SetAfterProc(service,proc) ((service)->afterProc = (proc))
154 #define rx_SetBeforeProc(service,proc) ((service)->beforeProc = (proc))
155 #define rx_GetAfterProc(service) ((service)->afterProc)
156 #define rx_GetBeforeProc(service) ((service)->beforeProc)
158 /* Define a procedure to be called when a server connection is created */
159 #define rx_SetNewConnProc(service, proc) ((service)->newConnProc = (proc))
161 /* NOTE: We'll probably redefine the following three routines, again, sometime. */
163 /* Set the connection dead time for any connections created for this service (server only) */
164 #define rx_SetServiceDeadTime(service, seconds) ((service)->secondsUntilDead = (seconds))
166 /* Enable or disable asymmetric client checking for a service */
167 #define rx_SetCheckReach(service, x) ((service)->checkReach = (x))
169 /* Set connection hard and idle timeouts for a connection */
170 #define rx_SetConnHardDeadTime(conn, seconds) ((conn)->hardDeadTime = (seconds))
171 #define rx_SetConnIdleDeadTime(conn, seconds) ((conn)->idleDeadTime = (seconds))
173 /* Set the overload threshold and the overload error */
174 #define rx_SetBusyThreshold(threshold, code) (rx_BusyThreshold=(threshold),rx_BusyError=(code))
176 /* If this flag is set,no new requests are processed by rx, all new requests are
177 returned with an error code of RX_CALL_DEAD ( transient error ) */
178 #define rx_SetRxTranquil() (rx_tranquil = 1)
179 #define rx_ClearRxTranquil() (rx_tranquil = 0)
181 /* Set the threshold and time to delay aborts for consecutive errors */
182 #define rx_SetCallAbortThreshold(A) (rxi_callAbortThreshhold = (A))
183 #define rx_SetCallAbortDelay(A) (rxi_callAbortDelay = (A))
184 #define rx_SetConnAbortThreshold(A) (rxi_connAbortThreshhold = (A))
185 #define rx_SetConnAbortDelay(A) (rxi_connAbortDelay = (A))
187 #define rx_GetCallAbortCode(call) ((call)->abortCode)
188 #define rx_SetCallAbortCode(call, code) ((call)->abortCode = (code))
190 #define cpspace(call) ((call)->curlen)
191 #define cppos(call) ((call)->curpos)
193 #define rx_Read(call, buf, nbytes) rx_ReadProc(call, buf, nbytes)
194 #define rx_Read32(call, value) rx_ReadProc32(call, value)
195 #define rx_Readv(call, iov, nio, maxio, nbytes) \
196 rx_ReadvProc(call, iov, nio, maxio, nbytes)
197 #define rx_Write(call, buf, nbytes) rx_WriteProc(call, buf, nbytes)
198 #define rx_Write32(call, value) rx_WriteProc32(call, value)
199 #define rx_Writev(call, iov, nio, nbytes) \
200 rx_WritevProc(call, iov, nio, nbytes)
202 /* This is the maximum size data packet that can be sent on this connection, accounting for security module-specific overheads. */
203 #define rx_MaxUserDataSize(call) ((call)->MTU - RX_HEADER_SIZE - (call)->conn->securityHeaderSize - (call)->conn->securityMaxTrailerSize)
205 /* Macros to turn the hot thread feature on and off. Enabling hot threads
206 * allows the listener thread to trade places with an idle worker thread,
207 * which moves the context switch from listener to worker out of the
210 #define rx_EnableHotThread() (rx_enable_hot_thread = 1)
211 #define rx_DisableHotThread() (rx_enable_hot_thread = 0)
213 /* A connection is an authenticated communication path, allowing
214 limited multiple asynchronous conversations. */
216 struct rx_connection_rx_lock {
217 struct rx_connection_rx_lock *next; /* on hash chain _or_ free list */
218 struct rx_peer_rx_lock *peer;
220 struct rx_connection {
221 struct rx_connection *next; /* on hash chain _or_ free list */
222 struct rx_peer *peer;
224 #ifdef RX_ENABLE_LOCKS
225 afs_kmutex_t conn_call_lock; /* locks conn_call_cv */
226 afs_kcondvar_t conn_call_cv;
227 afs_kmutex_t conn_data_lock; /* locks packet data */
229 afs_uint32 epoch; /* Process start time of client side of connection */
230 afs_uint32 cid; /* Connection id (call channel is bottom bits) */
231 afs_int32 error; /* If this connection is in error, this is it */
233 struct rx_call_rx_lock *call[RX_MAXCALLS];
235 struct rx_call *call[RX_MAXCALLS];
237 afs_uint32 callNumber[RX_MAXCALLS]; /* Current call numbers */
238 afs_uint32 serial; /* Next outgoing packet serial number */
239 afs_uint32 lastSerial; /* # of last packet received, for computing skew */
240 afs_int32 maxSerial; /* largest serial number seen on incoming packets */
241 /* afs_int32 maxPacketSize; max packet size should be per-connection since */
242 /* peer process could be restarted on us. Includes RX Header. */
243 struct rxevent *challengeEvent; /* Scheduled when the server is challenging a */
244 struct rxevent *delayedAbortEvent; /* Scheduled to throttle looping client */
245 struct rxevent *checkReachEvent; /* Scheduled when checking reachability */
246 int abortCount; /* count of abort messages sent */
247 /* client-- to retransmit the challenge */
248 struct rx_service *service; /* used by servers only */
249 u_short serviceId; /* To stamp on requests (clients only) */
250 u_short refCount; /* Reference count */
251 u_char flags; /* Defined below */
252 u_char type; /* Type of connection, defined below */
253 u_char secondsUntilPing; /* how often to ping for each active call */
254 u_char securityIndex; /* corresponds to the security class of the */
255 /* securityObject for this conn */
256 struct rx_securityClass *securityObject; /* Security object for this connection */
257 VOID *securityData; /* Private data for this conn's security class */
258 u_short securityHeaderSize; /* Length of security module's packet header data */
259 u_short securityMaxTrailerSize; /* Length of security module's packet trailer data */
261 int timeout; /* Overall timeout per call (seconds) for this conn */
262 int lastSendTime; /* Last send time for this connection */
263 u_short secondsUntilDead; /* Maximum silence from peer before RX_CALL_DEAD */
264 u_short hardDeadTime; /* hard max for call execution */
265 u_short idleDeadTime; /* max time a call can be idle (no data) */
266 u_char ackRate; /* how many packets between ack requests */
267 u_char makeCallWaiters; /* how many rx_NewCalls are waiting */
268 int nSpecific; /* number entries in specific data */
269 void **specific; /* pointer to connection specific data */
273 /* A service is installed by rx_NewService, and specifies a service type that
274 * is exported by this process. Incoming calls are stamped with the service
275 * type, and must match an installed service for the call to be accepted.
276 * Each service exported has a (port,serviceId) pair to uniquely identify it.
277 * It is also named: this is intended to allow a remote statistics gathering
278 * program to retrieve per service statistics without having to know the local
279 * service id's. Each service has a number of
282 /* security objects (instances of security classes) which implement
283 * various types of end-to-end security protocols for connections made
284 * to this service. Finally, there are two parameters controlling the
285 * number of requests which may be executed in parallel by this
286 * service: minProcs is the number of requests to this service which
287 * are guaranteed to be able to run in parallel at any time; maxProcs
288 * has two meanings: it limits the total number of requests which may
289 * execute in parallel and it also guarantees that that many requests
290 * may be handled in parallel if no other service is handling any
294 u_short serviceId; /* Service number */
295 u_short servicePort; /* UDP port for this service */
296 char *serviceName; /* Name of the service */
297 osi_socket socket; /* socket structure or file descriptor */
298 u_short nRequestsRunning; /* Number of requests currently in progress */
299 u_short nSecurityObjects; /* Number of entries in security objects array */
300 struct rx_securityClass **securityObjects; /* Array of security class objects */
301 afs_int32 (*executeRequestProc)(struct rx_call *acall); /* Routine to call when an rpc request is received */
302 void (*destroyConnProc)(struct rx_connection *tcon); /* Routine to call when a server connection is destroyed */
303 void (*newConnProc)(struct rx_connection *tcon); /* Routine to call when a server connection is created */
304 void (*beforeProc)(struct rx_call *acall); /* routine to call before a call is executed */
305 void (*afterProc)(struct rx_call *acall, afs_int32 code); /* routine to call after a call is executed */
306 u_short maxProcs; /* Maximum procs to be used for this service */
307 u_short minProcs; /* Minimum # of requests guaranteed executable simultaneously */
308 u_short connDeadTime; /* Seconds until a client of this service will be declared dead, if it is not responding */
309 u_short idleDeadTime; /* Time a server will wait for I/O to start up again */
310 u_char checkReach; /* Check for asymmetric clients? */
313 #endif /* KDUMP_RX_LOCK */
315 /* A server puts itself on an idle queue for a service using an
316 * instance of the following structure. When a call arrives, the call
317 * structure pointer is placed in "newcall", the routine to execute to
318 * service the request is placed in executeRequestProc, and the
319 * process is woken up. The queue entry's address is used for the
320 * sleep/wakeup. If socketp is non-null, then this thread is willing
321 * to become a listener thread. A thread sets *socketp to -1 before
322 * sleeping. If *socketp is not -1 when the thread awakes, it is now
323 * the listener thread for *socketp. When socketp is non-null, tno
324 * contains the server's threadID, which is used to make decitions in GetCall.
327 struct rx_serverQueueEntry_rx_lock {
329 struct rx_serverQueueEntry {
331 struct rx_queue queueItemHeader;
333 struct rx_call_rx_lock *newcall;
335 struct rx_call *newcall;
337 #ifdef RX_ENABLE_LOCKS
346 /* A peer refers to a peer process, specified by a (host,port) pair. There may be more than one peer on a given host. */
348 struct rx_peer_rx_lock {
349 struct rx_peer_rx_lock *next; /* Next in hash conflict or free list */
352 struct rx_peer *next; /* Next in hash conflict or free list */
354 #ifdef RX_ENABLE_LOCKS
355 afs_kmutex_t peer_lock; /* Lock peer */
356 #endif /* RX_ENABLE_LOCKS */
357 afs_uint32 host; /* Remote IP address, in net byte order */
358 u_short port; /* Remote UDP port, in net byte order */
360 /* interface mtu probably used for this host - includes RX Header */
361 u_short ifMTU; /* doesn't include IP header */
363 /* For garbage collection */
364 afs_uint32 idleWhen; /* When the refcountwent to zero */
365 short refCount; /* Reference count for this structure */
367 /* Congestion control parameters */
368 u_char burstSize; /* Reinitialization size for the burst parameter */
369 u_char burst; /* Number of packets that can be transmitted right now, without pausing */
370 struct clock burstWait; /* Delay until new burst is allowed */
371 struct rx_queue congestionQueue; /* Calls that are waiting for non-zero burst value */
372 int rtt; /* Round trip time, measured in milliseconds/8 */
373 int rtt_dev; /* rtt smoothed error, in milliseconds/4 */
374 struct clock timeout; /* Current retransmission delay */
375 int nSent; /* Total number of distinct data packets sent, not including retransmissions */
376 int reSends; /* Total number of retransmissions for this peer, since this structure was created */
378 /* Skew: if a packet is received N packets later than expected (based
379 * on packet serial numbers), then we define it to have a skew of N.
380 * The maximum skew values allow us to decide when a packet hasn't
381 * been received yet because it is out-of-order, as opposed to when it
382 * is likely to have been dropped. */
383 afs_uint32 inPacketSkew; /* Maximum skew on incoming packets */
384 afs_uint32 outPacketSkew; /* Peer-reported max skew on our sent packets */
385 int rateFlag; /* Flag for rate testing (-no 0yes +decrement) */
387 /* the "natural" MTU, excluding IP,UDP headers, is negotiated by the endpoints */
390 /* negotiated maximum number of packets to send in a single datagram. */
391 u_short maxDgramPackets;
392 /* local maximum number of packets to send in a single datagram. */
393 u_short ifDgramPackets;
395 * MTU, cwind, and nDgramPackets are used to initialize
396 * slow start parameters for new calls. These values are set whenever a
397 * call sends a retransmission and at the end of each call.
398 * congestSeq is incremented each time the congestion parameters are
399 * changed by a call recovering from a dropped packet. A call used
400 * MAX when updating congestion parameters if it started with the
401 * current congestion sequence number, otherwise it uses MIN.
403 u_short MTU; /* MTU for AFS 3.4a jumboGrams */
404 u_short cwind; /* congestion window */
405 u_short nDgramPackets; /* number packets per AFS 3.5 jumbogram */
406 u_short congestSeq; /* Changed when a call retransmits */
407 afs_hyper_t bytesSent; /* Number of bytes sent to this peer */
408 afs_hyper_t bytesReceived; /* Number of bytes received from this peer */
409 struct rx_queue rpcStats; /* rpc statistic list */
410 int lastReachTime; /* Last time we verified reachability */
414 #ifndef KDUMP_RX_LOCK
415 /* Flag bits for connection structure */
416 #define RX_CONN_MAKECALL_WAITING 1 /* rx_MakeCall is waiting for a channel */
417 #define RX_CONN_DESTROY_ME 2 /* Destroy *client* connection after last call */
418 #define RX_CONN_USING_PACKET_CKSUM 4 /* non-zero header.spare field seen */
419 #define RX_CONN_KNOW_WINDOW 8 /* window size negotiation works */
420 #define RX_CONN_RESET 16 /* connection is reset, remove */
421 #define RX_CONN_BUSY 32 /* connection is busy; don't delete */
422 #define RX_CONN_ATTACHWAIT 64 /* attach waiting for peer->lastReach */
424 /* Type of connection, client or server */
425 #define RX_CLIENT_CONNECTION 0
426 #define RX_SERVER_CONNECTION 1
427 #endif /* !KDUMP_RX_LOCK */
429 /* Call structure: only instantiated for active calls and dallying server calls. The permanent call state (i.e. the call number as well as state shared with other calls associated with this connection) is maintained in the connection structure. */
431 struct rx_call_rx_lock {
435 struct rx_queue queue_item_header; /* Call can be on various queues (one-at-a-time) */
436 struct rx_queue tq; /* Transmit packet queue */
437 struct rx_queue rq; /* Receive packet queue */
439 * The following fields are accessed while the call is unlocked.
440 * These fields are used by the caller/server thread to marshall
441 * and unmarshall RPC data. The only time they may be changed by
442 * other threads is when the RX_CALL_IOVEC_WAIT flag is set.
444 * NOTE: Be sure that these fields start and end on a double
445 * word boundary. Otherwise threads that are changing
446 * adjacent fields will cause problems.
448 struct rx_queue iovq; /* readv/writev packet queue */
449 u_short nLeft; /* Number bytes left in first receive packet */
450 u_short curvec; /* current iovec in currentPacket */
451 u_short curlen; /* bytes remaining in curvec */
452 u_short nFree; /* Number bytes free in last send packet */
453 struct rx_packet *currentPacket;/* Current packet being assembled or being read */
454 char *curpos; /* current position in curvec */
456 * End of fields accessed with call unlocked
458 u_char channel; /* Index of call, within connection */
459 u_char state; /* Current call state as defined below */
460 u_char mode; /* Current mode of a call in ACTIVE state */
461 #ifdef RX_ENABLE_LOCKS
462 afs_kmutex_t lock; /* lock covers data as well as mutexes. */
463 afs_kmutex_t *call_queue_lock; /* points to lock for queue we're on,
465 afs_kcondvar_t cv_twind;
466 afs_kcondvar_t cv_rq;
467 afs_kcondvar_t cv_tq;
470 struct rx_connection_rx_lock *conn; /* Parent connection for call */
472 struct rx_connection *conn; /* Parent connection for this call */
474 afs_uint32 *callNumber; /* Pointer to call number field within connection */
475 afs_uint32 flags; /* Some random flags */
476 u_char localStatus; /* Local user status sent out of band */
477 u_char remoteStatus; /* Remote user status received out of band */
478 afs_int32 error; /* Error condition for this call */
479 afs_uint32 timeout; /* High level timeout for this call */
480 afs_uint32 rnext; /* Next sequence number expected to be read by rx_ReadData */
481 afs_uint32 rprev; /* Previous packet received; used for deciding what the next packet to be received should be, in order to decide whether a negative acknowledge should be sent */
482 afs_uint32 rwind; /* The receive window: the peer must not send packets with sequence numbers >= rnext+rwind */
483 afs_uint32 tfirst; /* First unacknowledged transmit packet number */
484 afs_uint32 tnext; /* Next transmit sequence number to use */
485 u_short twind; /* The transmit window: we cannot assign a sequence number to a packet >= tfirst + twind */
486 u_short cwind; /* The congestion window */
487 u_short nSoftAcked; /* Number soft acked transmit packets */
488 u_short nextCwind; /* The congestion window after recovery */
489 u_short nCwindAcks; /* Number acks received at current cwind */
490 u_short ssthresh; /* The slow start threshold */
491 u_short nDgramPackets; /* Packets per AFS 3.5 jumbogram */
492 u_short nAcks; /* The number of consecttive acks */
493 u_short nNacks; /* Number packets acked that follow the
494 * first negatively acked packet */
495 u_short nSoftAcks; /* The number of delayed soft acks */
496 u_short nHardAcks; /* The number of delayed hard acks */
497 u_short congestSeq; /* Peer's congestion sequence counter */
498 struct rxevent *resendEvent; /* If this is non-Null, there is a retransmission event pending */
499 struct rxevent *timeoutEvent; /* If this is non-Null, then there is an overall timeout for this call */
500 struct rxevent *keepAliveEvent; /* Scheduled periodically in active calls to keep call alive */
501 struct rxevent *delayedAckEvent; /* Scheduled after all packets are received to send an ack if a reply or new call is not generated soon */
502 struct rxevent *delayedAbortEvent; /* Scheduled to throttle looping client */
503 int abortCode; /* error code from last RPC */
504 int abortCount; /* number of times last error was sent */
505 u_int lastSendTime; /* Last time a packet was sent on this call */
506 u_int lastReceiveTime; /* Last time a packet was received for this call */
507 VOID (*arrivalProc)(register struct rx_call *call,
508 register struct multi_handle *mh, register int index); /* Procedure to call when reply is received */
509 VOID *arrivalProcHandle; /* Handle to pass to replyFunc */
510 VOID *arrivalProcArg; /* Additional arg to pass to reply Proc */
511 afs_uint32 lastAcked; /* last packet "hard" acked by receiver */
512 afs_uint32 startWait; /* time server began waiting for input data/send quota */
513 struct clock traceWait; /* time server began waiting for input data/send quota */
514 struct clock traceStart; /* time the call started running */
515 u_short MTU; /* size of packets currently sending */
516 #ifdef RX_ENABLE_LOCKS
517 short refCount; /* Used to keep calls from disappearring
518 when we get them from a queue. */
519 #endif /* RX_ENABLE_LOCKS */
520 /* Call refcount modifiers */
521 #define RX_CALL_REFCOUNT_BEGIN 0 /* GetCall/NewCall/EndCall */
522 #define RX_CALL_REFCOUNT_RESEND 1 /* resend event */
523 #define RX_CALL_REFCOUNT_DELAY 2 /* delayed ack */
524 #define RX_CALL_REFCOUNT_ALIVE 3 /* keep alive event */
525 #define RX_CALL_REFCOUNT_PACKET 4 /* waiting for packets. */
526 #define RX_CALL_REFCOUNT_SEND 5 /* rxi_Send */
527 #define RX_CALL_REFCOUNT_ACKALL 6 /* rxi_AckAll */
528 #define RX_CALL_REFCOUNT_ABORT 7 /* delayed abort */
529 #define RX_CALL_REFCOUNT_MAX 8 /* array size. */
530 #ifdef RX_REFCOUNT_CHECK
531 short refCDebug[RX_CALL_REFCOUNT_MAX];
532 #endif /* RX_REFCOUNT_CHECK */
533 int iovNBytes; /* byte count for current iovec */
534 int iovMax; /* number elements in current iovec */
535 int iovNext; /* next entry in current iovec */
536 struct iovec *iov; /* current iovec */
537 struct clock queueTime; /* time call was queued */
538 struct clock startTime; /* time call was started */
539 afs_hyper_t bytesSent; /* Number bytes sent */
540 afs_hyper_t bytesRcvd; /* Number bytes received */
543 #ifndef KDUMP_RX_LOCK
544 /* Major call states */
545 #define RX_STATE_NOTINIT 0 /* Call structure has never been initialized */
546 #define RX_STATE_PRECALL 1 /* Server-only: call is not in progress, but packets have arrived */
547 #define RX_STATE_ACTIVE 2 /* An active call; a process is dealing with this call */
548 #define RX_STATE_DALLY 3 /* Dallying after process is done with call */
549 #define RX_STATE_HOLD 4 /* Waiting for acks on reply data packets */
551 /* Call modes: the modes of a call in RX_STATE_ACTIVE state (process attached) */
552 #define RX_MODE_SENDING 1 /* Sending or ready to send */
553 #define RX_MODE_RECEIVING 2 /* Receiving or ready to receive */
554 #define RX_MODE_ERROR 3 /* Something in error for current conversation */
555 #define RX_MODE_EOF 4 /* Server has flushed (or client has read) last reply packet */
558 #define RX_CALL_READER_WAIT 1 /* Reader is waiting for next packet */
559 #define RX_CALL_WAIT_WINDOW_ALLOC 2 /* Sender is waiting for window to allocate buffers */
560 #define RX_CALL_WAIT_WINDOW_SEND 4 /* Sender is waiting for window to send buffers */
561 #define RX_CALL_WAIT_PACKETS 8 /* Sender is waiting for packet buffers */
562 #define RX_CALL_WAIT_PROC 16 /* Waiting for a process to be assigned */
563 #define RX_CALL_RECEIVE_DONE 32 /* All packets received on this call */
564 #define RX_CALL_CLEARED 64 /* Receive queue cleared in precall state */
565 #define RX_CALL_TQ_BUSY 128 /* Call's Xmit Queue is busy; don't modify */
566 #define RX_CALL_TQ_CLEARME 256 /* Need to clear this call's TQ later */
567 #define RX_CALL_TQ_SOME_ACKED 512 /* rxi_Start needs to discard ack'd packets. */
568 #define RX_CALL_TQ_WAIT 1024 /* Reader is waiting for TQ_BUSY to be reset */
569 #define RX_CALL_FAST_RECOVER 2048 /* call is doing congestion recovery */
570 #define RX_CALL_FAST_RECOVER_WAIT 4096 /* thread is waiting to start recovery */
571 #define RX_CALL_SLOW_START_OK 8192 /* receiver acks every other packet */
572 #define RX_CALL_IOVEC_WAIT 16384 /* waiting thread is using an iovec */
573 #define RX_CALL_HAVE_LAST 32768 /* Last packet has been received */
574 #define RX_CALL_NEED_START 0x10000 /* tells rxi_Start to start again */
576 /* Maximum number of acknowledgements in an acknowledge packet */
577 #define RX_MAXACKS 255
579 /* The structure of the data portion of an acknowledge packet: An acknowledge
580 * packet is in network byte order at all times. An acknowledgement is always
581 * prompted for a specific reason by a specific incoming packet. This reason
582 * is reported in "reason" and the packet's sequence number in the packet
583 * header.seq. In addition to this information, all of the current
584 * acknowledgement information about this call is placed in the packet.
585 * "FirstPacket" is the sequence number of the first packet represented in an
586 * array of bytes, "acks", containing acknowledgement information for a number
587 * of consecutive packets. All packets prior to FirstPacket are implicitly
588 * acknowledged: the sender need no longer be concerned about them. Packets
589 * from firstPacket+nAcks and on are not acknowledged. Packets in the range
590 * [firstPacket,firstPacket+nAcks) are each acknowledged explicitly. The
591 * acknowledgement may be RX_NACK if the packet is not (currently) at the
592 * receiver (it may have never been received, or received and then later
593 * dropped), or it may be RX_ACK if the packet is queued up waiting to be read
594 * by the upper level software. RX_ACK does not imply that the packet may not
595 * be dropped before it is read; it does imply that the sender should stop
596 * retransmitting the packet until notified otherwise. The field
597 * previousPacket identifies the previous packet received by the peer. This
598 * was used in a previous version of this software, and could be used in the
599 * future. The serial number in the data part of the ack packet corresponds to
600 * the serial number oof the packet which prompted the acknowledge. Any
601 * packets which are explicitly not acknowledged, and which were last
602 * transmitted with a serial number less than the provided serial number,
603 * should be retransmitted immediately. Actually, this is slightly inaccurate:
604 * packets are not necessarily received in order. When packets are habitually
605 * transmitted out of order, this is allowed for in the retransmission
606 * algorithm by introducing the notion of maximum packet skew: the degree of
607 * out-of-orderness of the packets received on the wire. This number is
608 * communicated from the receiver to the sender in ack packets. */
610 struct rx_ackPacket {
611 u_short bufferSpace; /* Number of packet buffers available. That is: the number of buffers that the sender of the ack packet is willing to provide for data, on this or subsequent calls. Lying is permissable. */
612 u_short maxSkew; /* Maximum difference between serial# of packet acknowledged and highest packet yet received */
613 afs_uint32 firstPacket; /* The first packet in the list of acknowledged packets */
614 afs_uint32 previousPacket; /* The previous packet number received (obsolete?) */
615 afs_uint32 serial; /* Serial number of the packet which prompted the acknowledge */
616 u_char reason; /* Reason for the acknowledge of ackPacket, defined below */
617 u_char nAcks; /* Number of acknowledgements */
618 u_char acks[RX_MAXACKS]; /* Up to RX_MAXACKS packet acknowledgements, defined below */
619 /* Packets <firstPacket are implicitly acknowledged and may be discarded by the sender. Packets >= firstPacket+nAcks are implicitly NOT acknowledged. No packets with sequence numbers >= firstPacket should be discarded by the sender (they may thrown out at any time by the receiver) */
622 #define FIRSTACKOFFSET 4
624 /* Reason for acknowledge message */
625 #define RX_ACK_REQUESTED 1 /* Peer requested an ack on this packet */
626 #define RX_ACK_DUPLICATE 2 /* Duplicate packet */
627 #define RX_ACK_OUT_OF_SEQUENCE 3 /* Packet out of sequence */
628 #define RX_ACK_EXCEEDS_WINDOW 4 /* Packet sequence number higher than window; discarded */
629 #define RX_ACK_NOSPACE 5 /* No buffer space at all */
630 #define RX_ACK_PING 6 /* This is a keep-alive ack */
631 #define RX_ACK_PING_RESPONSE 7 /* Ack'ing because we were pinged */
632 #define RX_ACK_DELAY 8 /* Ack generated since nothing has happened since receiving packet */
633 #define RX_ACK_IDLE 9 /* Similar to RX_ACK_DELAY, but can
636 /* Packet acknowledgement type */
637 #define RX_ACK_TYPE_NACK 0 /* I Don't have this packet */
638 #define RX_ACK_TYPE_ACK 1 /* I have this packet, although I may discard it later */
640 /* The packet size transmitted for an acknowledge is adjusted to reflect the actual size of the acks array. This macro defines the size */
641 #define rx_AckDataSize(nAcks) (3 + nAcks + offsetof(struct rx_ackPacket, acks[0]))
643 #define RX_CHALLENGE_TIMEOUT 2 /* Number of seconds before another authentication request packet is generated */
644 #define RX_CHALLENGE_MAXTRIES 50 /* Max # of times we resend challenge */
645 #define RX_CHECKREACH_TIMEOUT 2 /* Number of seconds before another ping is generated */
646 #define RX_CHECKREACH_TTL 60 /* Re-check reachability this often */
648 /* RX error codes. RX uses error codes from -1 to -64. Rxgen may use other error codes < -64; user programs are expected to return positive error codes */
650 /* Something bad happened to the connection; temporary loss of communication */
651 #define RX_CALL_DEAD (-1)
653 /* An invalid operation, such as a client attempting to send data after having received the beginning of a reply from the server */
654 #define RX_INVALID_OPERATION (-2)
656 /* An optional timeout per call may be specified */
657 #define RX_CALL_TIMEOUT (-3)
659 /* End of data on a read */
662 /* Some sort of low-level protocol error */
663 #define RX_PROTOCOL_ERROR (-5)
665 /* Generic user abort code; used when no more specific error code needs to be communicated. For example, multi rx clients use this code to abort a multi rx call */
666 #define RX_USER_ABORT (-6)
668 /* Port already in use (from rx_Init) */
669 #define RX_ADDRINUSE (-7)
671 /* EMSGSIZE returned from network. Packet too big, must fragment */
672 #define RX_MSGSIZE (-8)
674 /* transient failure detected ( possibly the server is restarting ) */
675 /* this shud be equal to VRESTARTING ( util/errors.h ) for old clients to work */
676 #define RX_RESTARTING (-100)
678 struct rx_securityObjectStats {
679 char type; /* 0:unk 1:null,2:vab 3:kad */
681 char sparec[10]; /* force correct alignment */
682 afs_int32 flags; /* 1=>unalloc, 2=>auth, 4=>expired */
684 afs_uint32 packetsReceived;
685 afs_uint32 packetsSent;
686 afs_uint32 bytesReceived;
687 afs_uint32 bytesSent;
692 /* XXXX (rewrite this description) A security class object contains a set of
693 * procedures and some private data to implement a security model for rx
694 * connections. These routines are called by rx as appropriate. Rx knows
695 * nothing about the internal details of any particular security model, or
696 * about security state. Rx does maintain state per connection on behalf of
697 * the security class. Each security class implementation is also expected to
698 * provide routines to create these objects. Rx provides a basic routine to
699 * allocate one of these objects; this routine must be called by the class. */
700 struct rx_securityClass {
701 struct rx_securityOps {
702 int (*op_Close)(struct rx_securityClass *aobj);
703 int (*op_NewConnection)(struct rx_securityClass *aobj, struct rx_connection *aconn);
704 int (*op_PreparePacket)(struct rx_securityClass *aobj, struct rx_call *acall, struct rx_packet *apacket);
705 int (*op_SendPacket)(struct rx_securityClass *aobj, struct rx_call *acall, struct rx_packet *apacket);
706 int (*op_CheckAuthentication)(struct rx_securityClass *aobj, struct rx_connection *aconn);
707 int (*op_CreateChallenge)(struct rx_securityClass *aobj, struct rx_connection *aconn);
708 int (*op_GetChallenge)(struct rx_securityClass *aobj, struct rx_connection *aconn, struct rx_packet *apacket);
709 int (*op_GetResponse)(struct rx_securityClass *aobj, struct rx_connection *aconn, struct rx_packet *apacket);
710 int (*op_CheckResponse)(struct rx_securityClass *aobj, struct rx_connection *aconn, struct rx_packet *apacket);
711 int (*op_CheckPacket) (struct rx_securityClass *aobj, struct rx_call *acall, struct rx_packet *apacket);
712 int (*op_DestroyConnection)(struct rx_securityClass *aobj, struct rx_connection *aconn);
713 int (*op_GetStats)(struct rx_securityClass *aobj, struct rx_connection *aconn, struct rx_securityObjectStats *astats);
714 int (*op_Spare1)(void);
715 int (*op_Spare2)(void);
716 int (*op_Spare3)(void);
722 #define RXS_OP(obj,op,args) ((obj && (obj->ops->op_ ## op)) ? (*(obj)->ops->op_ ## op)args : 0)
724 #define RXS_Close(obj) RXS_OP(obj,Close,(obj))
725 #define RXS_NewConnection(obj,conn) RXS_OP(obj,NewConnection,(obj,conn))
726 #define RXS_PreparePacket(obj,call,packet) RXS_OP(obj,PreparePacket,(obj,call,packet))
727 #define RXS_SendPacket(obj,call,packet) RXS_OP(obj,SendPacket,(obj,call,packet))
728 #define RXS_CheckAuthentication(obj,conn) RXS_OP(obj,CheckAuthentication,(obj,conn))
729 #define RXS_CreateChallenge(obj,conn) RXS_OP(obj,CreateChallenge,(obj,conn))
730 #define RXS_GetChallenge(obj,conn,packet) RXS_OP(obj,GetChallenge,(obj,conn,packet))
731 #define RXS_GetResponse(obj,conn,packet) RXS_OP(obj,GetResponse,(obj,conn,packet))
732 #define RXS_CheckResponse(obj,conn,packet) RXS_OP(obj,CheckResponse,(obj,conn,packet))
733 #define RXS_CheckPacket(obj,call,packet) RXS_OP(obj,CheckPacket,(obj,call,packet))
734 #define RXS_DestroyConnection(obj,conn) RXS_OP(obj,DestroyConnection,(obj,conn))
735 #define RXS_GetStats(obj,conn,stats) RXS_OP(obj,GetStats,(obj,conn,stats))
739 /* Structure for keeping rx statistics. Note that this structure is returned
740 * by rxdebug, so, for compatibility reasons, new fields should be appended (or
741 * spares used), the rxdebug protocol checked, if necessary, and the PrintStats
742 * code should be updated as well.
744 * Clearly we assume that ntohl will work on these structures so sizeof(int)
745 * must equal sizeof(afs_int32). */
747 struct rx_stats { /* General rx statistics */
748 int packetRequests; /* Number of packet allocation requests */
749 int receivePktAllocFailures;
750 int sendPktAllocFailures;
751 int specialPktAllocFailures;
752 int socketGreedy; /* Whether SO_GREEDY succeeded */
753 int bogusPacketOnRead; /* Number of inappropriately short packets received */
754 int bogusHost; /* Host address from bogus packets */
755 int noPacketOnRead; /* Number of read packets attempted when there was actually no packet to read off the wire */
756 int noPacketBuffersOnRead; /* Number of dropped data packets due to lack of packet buffers */
757 int selects; /* Number of selects waiting for packet or timeout */
758 int sendSelects; /* Number of selects forced when sending packet */
759 int packetsRead[RX_N_PACKET_TYPES]; /* Total number of packets read, per type */
760 int dataPacketsRead; /* Number of unique data packets read off the wire */
761 int ackPacketsRead; /* Number of ack packets read */
762 int dupPacketsRead; /* Number of duplicate data packets read */
763 int spuriousPacketsRead;/* Number of inappropriate data packets */
764 int packetsSent[RX_N_PACKET_TYPES]; /* Number of rxi_Sends: packets sent over the wire, per type */
765 int ackPacketsSent; /* Number of acks sent */
766 int pingPacketsSent; /* Total number of ping packets sent */
767 int abortPacketsSent; /* Total number of aborts */
768 int busyPacketsSent; /* Total number of busies sent received */
769 int dataPacketsSent; /* Number of unique data packets sent */
770 int dataPacketsReSent; /* Number of retransmissions */
771 int dataPacketsPushed; /* Number of retransmissions pushed early by a NACK */
772 int ignoreAckedPacket; /* Number of packets with acked flag, on rxi_Start */
773 struct clock totalRtt; /* Total round trip time measured (use to compute average) */
774 struct clock minRtt; /* Minimum round trip time measured */
775 struct clock maxRtt; /* Maximum round trip time measured */
776 int nRttSamples; /* Total number of round trip samples */
777 int nServerConns; /* Total number of server connections */
778 int nClientConns; /* Total number of client connections */
779 int nPeerStructs; /* Total number of peer structures */
780 int nCallStructs; /* Total number of call structures allocated */
781 int nFreeCallStructs; /* Total number of previously allocated free call structures */
783 afs_int32 fatalErrors;
784 int ignorePacketDally; /* packets dropped because call is in dally state */
785 int receiveCbufPktAllocFailures;
786 int sendCbufPktAllocFailures;
790 /* structures for debug input and output packets */
792 /* debug input types */
798 /* Invalid rx debug package type */
799 #define RX_DEBUGI_BADTYPE (-8)
801 #define RX_DEBUGI_VERSION_MINIMUM ('L') /* earliest real version */
802 #define RX_DEBUGI_VERSION ('Q') /* Latest version */
803 /* first version w/ secStats */
804 #define RX_DEBUGI_VERSION_W_SECSTATS ('L')
805 /* version M is first supporting GETALLCONN and RXSTATS type */
806 #define RX_DEBUGI_VERSION_W_GETALLCONN ('M')
807 #define RX_DEBUGI_VERSION_W_RXSTATS ('M')
808 /* last version with unaligned debugConn */
809 #define RX_DEBUGI_VERSION_W_UNALIGNED_CONN ('L')
810 #define RX_DEBUGI_VERSION_W_WAITERS ('N')
811 #define RX_DEBUGI_VERSION_W_IDLETHREADS ('O')
812 #define RX_DEBUGI_VERSION_W_NEWPACKETTYPES ('P')
813 #define RX_DEBUGI_VERSION_W_GETPEER ('Q')
815 #define RX_DEBUGI_GETSTATS 1 /* get basic rx stats */
816 #define RX_DEBUGI_GETCONN 2 /* get connection info */
817 #define RX_DEBUGI_GETALLCONN 3 /* get even uninteresting conns */
818 #define RX_DEBUGI_RXSTATS 4 /* get all rx stats */
819 #define RX_DEBUGI_GETPEER 5 /* get all peer structs */
821 struct rx_debugStats {
822 afs_int32 nFreePackets;
823 afs_int32 packetReclaims;
824 afs_int32 callsExecuted;
825 char waitingForPackets;
830 afs_int32 idleThreads; /* Number of server threads that are idle */
834 struct rx_debugConn_vL {
838 afs_int32 callNumber[RX_MAXCALLS];
844 char callState[RX_MAXCALLS];
845 char callMode[RX_MAXCALLS];
846 char callFlags[RX_MAXCALLS];
847 char callOther[RX_MAXCALLS];
848 /* old style getconn stops here */
849 struct rx_securityObjectStats secStats;
850 afs_int32 sparel[10];
853 struct rx_debugConn {
857 afs_int32 callNumber[RX_MAXCALLS];
863 char sparec[3]; /* force correct alignment */
864 char callState[RX_MAXCALLS];
865 char callMode[RX_MAXCALLS];
866 char callFlags[RX_MAXCALLS];
867 char callOther[RX_MAXCALLS];
868 /* old style getconn stops here */
869 struct rx_securityObjectStats secStats;
875 struct rx_debugPeer {
883 struct clock burstWait;
886 struct clock timeout;
889 afs_int32 inPacketSkew;
890 afs_int32 outPacketSkew;
894 u_short maxDgramPackets;
895 u_short ifDgramPackets;
898 u_short nDgramPackets;
900 afs_hyper_t bytesSent;
901 afs_hyper_t bytesReceived;
902 afs_int32 sparel[10];
905 #define RX_OTHER_IN 1 /* packets avail in in queue */
906 #define RX_OTHER_OUT 2 /* packets avail in out queue */
910 /* Only include this once, even when re-loading for kdump. */
911 #ifndef _CALL_REF_DEFINED_
912 #define _CALL_REF_DEFINED_
914 #ifdef RX_ENABLE_LOCKS
915 #ifdef RX_REFCOUNT_CHECK
916 /* RX_REFCOUNT_CHECK is used to test for call refcount leaks by event
919 extern int rx_callHoldType;
920 #define CALL_HOLD(call, type) do { \
922 call->refCDebug[type]++; \
923 if (call->refCDebug[type] > 50) {\
924 rx_callHoldType = type; \
925 osi_Panic("Huge call refCount"); \
928 #define CALL_RELE(call, type) do { \
930 call->refCDebug[type]--; \
931 if (call->refCDebug[type] > 50) {\
932 rx_callHoldType = type; \
933 osi_Panic("Negative call refCount"); \
936 #else /* RX_REFCOUNT_CHECK */
937 #define CALL_HOLD(call, type) call->refCount++
938 #define CALL_RELE(call, type) call->refCount--
939 #endif /* RX_REFCOUNT_CHECK */
941 #else /* RX_ENABLE_LOCKS */
942 #define CALL_HOLD(call, type)
943 #define CALL_RELE(call, type)
944 #endif /* RX_ENABLE_LOCKS */
946 #endif /* _CALL_REF_DEFINED_ */
948 #define RX_SERVER_DEBUG_SEC_STATS 0x1
949 #define RX_SERVER_DEBUG_ALL_CONN 0x2
950 #define RX_SERVER_DEBUG_RX_STATS 0x4
951 #define RX_SERVER_DEBUG_WAITER_CNT 0x8
952 #define RX_SERVER_DEBUG_IDLE_THREADS 0x10
953 #define RX_SERVER_DEBUG_OLD_CONN 0x20
954 #define RX_SERVER_DEBUG_NEW_PACKETS 0x40
955 #define RX_SERVER_DEBUG_ALL_PEER 0x80
957 #define AFS_RX_STATS_CLEAR_ALL 0xffffffff
958 #define AFS_RX_STATS_CLEAR_INVOCATIONS 0x1
959 #define AFS_RX_STATS_CLEAR_BYTES_SENT 0x2
960 #define AFS_RX_STATS_CLEAR_BYTES_RCVD 0x4
961 #define AFS_RX_STATS_CLEAR_QUEUE_TIME_SUM 0x8
962 #define AFS_RX_STATS_CLEAR_QUEUE_TIME_SQUARE 0x10
963 #define AFS_RX_STATS_CLEAR_QUEUE_TIME_MIN 0x20
964 #define AFS_RX_STATS_CLEAR_QUEUE_TIME_MAX 0x40
965 #define AFS_RX_STATS_CLEAR_EXEC_TIME_SUM 0x80
966 #define AFS_RX_STATS_CLEAR_EXEC_TIME_SQUARE 0x100
967 #define AFS_RX_STATS_CLEAR_EXEC_TIME_MIN 0x200
968 #define AFS_RX_STATS_CLEAR_EXEC_TIME_MAX 0x400
970 typedef struct rx_function_entry_v1 {
971 afs_uint32 remote_peer;
972 afs_uint32 remote_port;
973 afs_uint32 remote_is_server;
974 afs_uint32 interfaceId;
975 afs_uint32 func_total;
976 afs_uint32 func_index;
977 afs_hyper_t invocations;
978 afs_hyper_t bytes_sent;
979 afs_hyper_t bytes_rcvd;
980 struct clock queue_time_sum;
981 struct clock queue_time_sum_sqr;
982 struct clock queue_time_min;
983 struct clock queue_time_max;
984 struct clock execution_time_sum;
985 struct clock execution_time_sum_sqr;
986 struct clock execution_time_min;
987 struct clock execution_time_max;
988 } rx_function_entry_v1_t, *rx_function_entry_v1_p;
991 * If you need to change rx_function_entry, you should probably create a brand
992 * new structure. Keeping the old structure will allow backwards compatibility
993 * with old clients (even if it is only used to calculate allocation size).
994 * If you do change the size or the format, you'll need to bump
995 * RX_STATS_RETRIEVAL_VERSION. This allows some primitive form
996 * of versioning a la rxdebug.
999 #define RX_STATS_RETRIEVAL_VERSION 1 /* latest version */
1000 #define RX_STATS_RETRIEVAL_FIRST_EDITION 1 /* first implementation */
1002 typedef struct rx_interface_stat {
1003 struct rx_queue queue_header;
1004 struct rx_queue all_peers;
1005 rx_function_entry_v1_t stats[1]; /* make sure this is aligned correctly */
1006 } rx_interface_stat_t, *rx_interface_stat_p;
1008 #define RX_STATS_SERVICE_ID 409
1012 #endif /* _RX_ End of rx.h */
1015 #include "rx/rx_prototypes.h"
1017 #include "rx_prototypes.h"
1020 #endif /* !KDUMP_RX_LOCK */