2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include "afs/param.h"
16 #include "rx/rx_kcommon.h"
17 #include "h/tcp-param.h"
18 /* This must be loaded after proc.h to avoid macro collision with a variable*/
19 #include "netinet/udp_var.h"
24 #ifdef RXK_LISTENER_ENV
26 * OS dependent part of kernel RX listener thread.
29 * so socket to receive on, typically rx_socket
30 * from pointer to a sockaddr_in.
31 * iov array of iovecs to fill in.
32 * iovcnt how many iovecs there are.
33 * lengthp IN/OUT in: total space available in iovecs. out: size of read.
37 * error code (such as EINTR) if not
40 * Note that the maximum number of iovecs is 2 + RX_MAXWVECS. This is
41 * so we have a little space to look for packets larger than
44 int rxk_lastSocketError = 0;
45 int rxk_nSocketErrors = 0;
46 int rxk_nSignalsCleared = 0;
49 osi_NetReceive(osi_socket so, struct sockaddr_in *addr, struct iovec *dvec,
50 int nvecs, int *alength)
54 struct mbuf *maddr = NULL;
55 struct sockaddr_in *taddr;
56 struct iovec tmpvec[RX_MAXWVECS + 2];
59 BHV_PDATA(&bhv) = (void *)so;
62 tuio.uio_iov = tmpvec;
63 tuio.uio_iovcnt = nvecs;
65 tuio.uio_segflg = AFS_UIOSYS;
67 tuio.uio_resid = *alength;
71 if (nvecs > RX_MAXWVECS + 2) {
72 osi_Panic("Too many (%d) iovecs passed to osi_NetReceive\n", nvecs);
74 memcpy(tmpvec, (char *)dvec, (RX_MAXWVECS + 1) * sizeof(struct iovec));
76 code = soreceive(&bhv, &maddr, &tuio, NULL, NULL);
78 code = soreceive(so, &maddr, &tuio, NULL, NULL);
83 /* Clear the error before using the socket again. I've tried being nice
84 * and blocking SIGKILL and SIGSTOP from the kernel, but they get
85 * delivered anyway. So, time to be crude and just clear the signals
86 * pending on this thread.
89 uthread_t *ut = curuthread;
92 sigemptyset(&ut->ut_sig);
94 thread_interrupt_clear(UT_TO_KT(ut), 1);
96 rxk_nSignalsCleared++;
99 /* Clear the error before using the socket again. */
101 rxk_lastSocketError = code;
106 *alength = *alength - tuio.uio_resid;
108 memcpy((char *)addr, (char *)mtod(maddr, struct sockaddr_in *),
109 sizeof(struct sockaddr_in));
117 #else /* RXK_LISTENER_ENV */
119 static struct protosw parent_proto; /* udp proto switch */
122 * RX input, fast timer and initialization routines.
127 rxk_input(struct mbuf *am, struct ifnet *aif, struct ipsec *spec)
130 rxk_input(struct mbuf *am, struct ifnet *aif)
134 register unsigned short *tsp;
137 register struct ip *ti;
138 struct udpiphdr *tvu;
141 struct sockaddr_in taddr;
146 /* make sure we have base ip and udp headers in first mbuf */
147 if (am->m_off > MMAXOFF || am->m_len < 28) {
148 am = m_pullup(am, 28);
153 hdr = (mtod(am, struct ip *))->ip_hl;
155 /* pull up more, the IP hdr is bigger than usual */
156 if (am->m_len < (8 + (hdr << 2))) {
157 am = m_pullup(am, 8 + (hdr << 2));
161 ti = mtod(am, struct ip *); /* recompute, since m_pullup allocates new mbuf */
162 tu = (struct udphdr *)(((char *)ti) + (hdr << 2)); /* skip ip hdr */
164 ti = mtod(am, struct ip *);
165 tu = (struct udphdr *)(((char *)ti) + 20); /* skip basic ip hdr */
167 /* now read the port out */
171 for (tsp = rxk_ports, i = 0; i < MAXRXPORTS; i++) {
172 if (*tsp++ == port) {
173 /* checksum the packet */
175 ip_stripoptions(am, (struct mbuf *)0); /* get rid of anything we don't need */
176 tu = (struct udphdr *)(((char *)ti) + 20);
179 * Make mbuf data length reflect UDP length.
180 * If not enough data to reflect UDP length, drop.
182 tvu = (struct udpiphdr *)ti;
183 tlen = ntohs((u_short) tvu->ui_ulen);
184 if ((int)ti->ip_len != tlen) {
185 if (tlen > (int)ti->ip_len) {
189 m_adj(am, tlen - (int)ti->ip_len);
191 /* deliver packet to rx */
192 taddr.sin_family = AF_INET; /* compute source address */
193 taddr.sin_port = tu->uh_sport;
194 taddr.sin_addr.s_addr = ti->ip_src.s_addr;
195 /* handle the checksum. Note that this code damages the actual ip
196 * header (replacing it with the virtual one, which is the same size),
197 * so we must ensure we get everything out we need, first */
198 if (tu->uh_sum != 0) {
199 /* if the checksum is there, always check it. It's crazy not
200 * to, unless you can really be sure that your
201 * underlying network (and interfaces and drivers and
202 * DMA hardware, etc!) is error-free. First, fill
203 * in entire virtual ip header. */
207 tvu->ui_len = tvu->ui_ulen;
208 tlen = ntohs((unsigned short)(tvu->ui_ulen));
209 if ((!(am->m_flags & M_CKSUMMED))
210 && in_cksum(am, sizeof(struct ip) + tlen)) {
211 /* checksum, including cksum field, doesn't come out 0, so
212 * this packet is bad */
219 * 28 is IP (20) + UDP (8) header. ulen includes
220 * udp header, and we *don't* tell RX about udp
221 * header either. So, we remove those 8 as well.
223 data_len = ntohs(tu->uh_ulen);
225 if (!(*rxk_GetPacketProc) (&phandle, data_len)) {
226 if (rx_mb_to_packet(am, m_freem, 28, data_len, phandle)) {
227 /* XXX should just increment counter here.. */
228 printf("rx: truncated UDP packet\n");
229 rxi_FreePacket(phandle);
231 (*rxk_PacketArrivalProc) (phandle, &taddr,
232 rxk_portRocks[i], data_len);
240 /* if we get here, try to deliver packet to udp */
241 if (tproc = parent_proto.pr_input)
247 * UDP fast timer to raise events for all but Solaris and NCR.
248 * Called about 5 times per second (at unknown priority?). Must go to
249 * splnet or obtain global lock before touching anything significant.
257 /* do rx fasttimo processing here */
258 rxevent_RaiseEvents(&temp);
259 if (tproc = parent_proto.pr_fasttimo)
264 /* start intercepting basic calls */
268 register struct protosw *tpro, *last;
272 last = inetdomain.dom_protoswNPROTOSW;
273 for (tpro = inetdomain.dom_protosw; tpro < last; tpro++) {
274 if (tpro->pr_protocol == IPPROTO_UDP) {
275 memcpy(&parent_proto, tpro, sizeof(parent_proto));
276 tpro->pr_input = rxk_input;
277 tpro->pr_fasttimo = rxk_fasttimo;
282 osi_Panic("inet:no udp");
284 #endif /* RXK_LISTENER_ENV */
287 * RX IP address routines.
290 static afs_uint32 myNetAddrs[ADDRSPERSITE];
291 static int myNetMTUs[ADDRSPERSITE];
292 static int myNetFlags[ADDRSPERSITE];
293 static int numMyNetAddrs = 0;
295 /* This version doesn't even begin to handle iterative requests, but then
296 * we don't yet use them anyway. Fix this when rxi_InitPeerParams is changed
297 * to find a true maximum.
300 rxi_MatchIfnet(struct hashbucket *h, caddr_t key, caddr_t arg1, caddr_t arg2)
302 afs_uint32 ppaddr = *(afs_uint32 *) key;
303 int match_value = *(int *)arg1;
304 struct in_ifaddr *ifa = (struct in_ifaddr *)h;
305 struct sockaddr_in *sin;
307 if ((ppaddr & ifa->ia_netmask) == ifa->ia_net) {
308 if ((ppaddr & ifa->ia_subnetmask) == ifa->ia_subnet) {
310 if (sin->sin_addr.s_addr == ppaddr) { /* ie, ME!!! */
312 *(struct in_ifaddr **)arg2 = ifa;
314 if (match_value < 3) {
315 *(struct in_ifaddr **)arg2 = ifa;
319 if (match_value < 2) {
320 *(struct in_ifaddr **)arg2 = ifa;
325 *(int *)arg1 = match_value;
331 rxi_FindIfnet(afs_uint32 addr, afs_uint32 * maskp)
335 struct in_ifaddr *ifad;
337 if (numMyNetAddrs == 0)
338 (void)rxi_GetIFInfo();
340 ppaddr = ntohl(addr);
341 ifad = (struct in_ifaddr *)&hashinfo_inaddr;
343 (void)hash_enum(&hashinfo_inaddr, rxi_MatchIfnet, HTF_INET,
344 (caddr_t) & ppaddr, (caddr_t) & match_value,
349 *maskp = ifad->ia_subnetmask;
356 rxi_EnumGetIfInfo(struct hashbucket *h, caddr_t key, caddr_t arg1,
359 int different = *(int *)arg1;
360 int i = *(int *)arg2;
361 struct in_ifaddr *iap = (struct in_ifaddr *)h;
366 if (i >= ADDRSPERSITE)
370 rxmtu = (ifnp->if_mtu - RX_IPUDP_SIZE);
371 ifinaddr = ntohl(iap->ia_addr.sin_addr.s_addr);
372 if (myNetAddrs[i] != ifinaddr) {
373 myNetAddrs[i] = ifinaddr;
374 myNetMTUs[i] = rxmtu;
376 *(int *)arg1 = different;
378 rxmtu = rxmtu * rxi_nRecvFrags + ((rxi_nRecvFrags - 1) * UDP_HDR_SIZE);
379 if ((ifinaddr != 0x7f000001) && (rxmtu > rx_maxReceiveSize)) {
380 rx_maxReceiveSize = MIN(RX_MAX_PACKET_SIZE, rxmtu);
381 rx_maxReceiveSize = MIN(rx_maxReceiveSize, rx_maxReceiveSizeUser);
384 *(int *)arg2 = i + 1;
394 /* SGI 6.2 does not have a pointer from the ifnet to the list of
395 * of addresses (if_addrlist). So it's more efficient to run the
396 * in_ifaddr list and use the back pointers to the ifnet struct's.
398 (void)hash_enum(&hashinfo_inaddr, rxi_EnumGetIfInfo, HTF_INET, NULL,
399 (caddr_t) & different, (caddr_t) & i);
401 rx_maxJumboRecvSize =
402 RX_HEADER_SIZE + rxi_nDgramPackets * RX_JUMBOBUFFERSIZE +
403 (rxi_nDgramPackets - 1) * RX_JUMBOHEADERSIZE;
404 rx_maxJumboRecvSize = MAX(rx_maxJumboRecvSize, rx_maxReceiveSize);
409 /* osi_NetSend - from the now defunct afs_osinet.c */
418 osi_NetSend(asocket, addr, dvec, nvec, asize, istack)
419 register struct osi_socket *asocket;
422 register afs_int32 asize;
423 struct sockaddr_in *addr;
427 struct iovec tvecs[RX_MAXWVECS + 1];
434 if (nvec > RX_MAXWVECS + 1) {
435 osi_Panic("osi_NetSend: %d: Too many iovecs.\n", nvec);
437 memcpy((char *)tvecs, (char *)dvec, nvec * sizeof(struct iovec));
439 tuio.uio_iov = tvecs;
440 tuio.uio_iovcnt = nvec;
441 tuio.uio_segflg = UIO_SYSSPACE;
443 tuio.uio_sigpipe = 0;
448 for (i = 0, iovp = tvecs; i < nvec; i++, iovp++)
449 tuio.uio_resid += iovp->iov_len;
452 to = m_get(M_WAIT, MT_SONAME);
453 to->m_len = sizeof(struct sockaddr_in);
454 memcpy(mtod(to, caddr_t), (char *)addr, to->m_len);
456 BHV_PDATA(&bhv) = (void *)asocket;
457 code = sosend(&bhv, to, &tuio, 0, NULL);
462 #else /* AFS_SGI65_ENV */
465 dummy_sblock(struct sockbuf *a, int b, struct socket *c, int *d, int e)
468 ("sblock was called before it was installed. Install proper afsd.\n");
472 dummy_sbunlock(struct sockbuf *a, int b, struct socket *c, int d)
475 ("sbunlock was called before it was installed. Install proper afsd.\n");
478 int (*afs_sblockp) (struct sockbuf *, int, struct socket *, int *, int) =
480 void (*afs_sbunlockp) (struct sockbuf *, int, struct socket *, int) =
482 #define AFS_SBUNLOCK(SB, EV, SO, O) (*afs_sbunlockp)(SB, EV, SO, O)
484 /* osi_NetSend - send asize bytes at adata from asocket to host at addr.
486 * Now, why do we allocate a new buffer when we could theoretically use the one
487 * pointed to by adata? Because PRU_SEND returns after queueing the message,
488 * not after sending it. If the sender changes the data after queueing it,
489 * we'd see the already-queued data change. One attempt to fix this without
490 * adding a copy would be to have this function wait until the datagram is
491 * sent; however this doesn't work well. In particular, if a host is down, and
492 * an ARP fails to that host, this packet will be queued until the ARP request
493 * comes back, which could be hours later. We can't block in this routine that
494 * long, since it prevents RPC timeouts from happening.
496 /* XXX In the brave new world, steal the data bufs out of the rx_packet iovec,
497 * and just queue those. XXX
500 osi_NetSend(asocket, addr, dvec, nvec, asize, istack)
501 register struct socket *asocket;
504 register afs_int32 asize;
505 struct sockaddr_in *addr;
508 register struct mbuf *tm, *um;
509 register afs_int32 code;
511 struct mbuf *top = 0;
512 register struct mbuf *m, **mp;
519 AFS_STATCNT(osi_NetSend);
521 (*afs_sblockp) (&asocket->so_snd, NETEVENT_SODOWN, asocket, &s1, istack);
526 tdata = dvec[i].iov_base;
527 tl = dvec[i].iov_len;
529 if ((m = m_vget(M_DONTWAIT, MIN(asize, VCL_MAX), MT_DATA)) == NULL) {
533 AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
536 len = MIN(m->m_len, asize);
538 tpa = mtod(m, caddr_t);
541 memcpy(tpa, tdata, rlen);
551 /* shouldn't come here! */
552 asize = 0; /* so we make progress toward completion */
555 tdata = dvec[i].iov_base;
556 tl = dvec[i].iov_len;
568 /* setup mbuf corresponding to destination address */
569 um = m_get(M_DONTWAIT, MT_SONAME);
572 m_freem(top); /* free mbuf chain */
573 /* if this were vfs40, we'd do sbunlock(asocket, &asocket->so_snd), but
574 * we don't do the locking at all for vfs40 systems */
576 AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
579 memcpy(mtod(um, caddr_t), addr, sizeof(*addr));
580 um->m_len = sizeof(*addr);
581 /* note that udp_usrreq frees funny mbuf. We hold onto data, but mbuf
582 * around it is gone. we free address ourselves. */
583 code = (*asocket->so_proto->pr_usrreq) (asocket, PRU_SEND, tm, um, 0);
586 AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
590 #endif /* AFS_SGI65_ENV */