2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include "afs/param.h"
14 #include "rx/rx_kcommon.h"
15 #include "h/tcp-param.h"
16 /* This must be loaded after proc.h to avoid macro collision with a variable*/
17 #include "netinet/udp_var.h"
22 #ifdef RXK_LISTENER_ENV
24 * OS dependent part of kernel RX listener thread.
27 * so socket to receive on, typically rx_socket
28 * from pointer to a sockaddr_in.
29 * iov array of iovecs to fill in.
30 * iovcnt how many iovecs there are.
31 * lengthp IN/OUT in: total space available in iovecs. out: size of read.
35 * error code (such as EINTR) if not
38 * Note that the maximum number of iovecs is 2 + RX_MAXWVECS. This is
39 * so we have a little space to look for packets larger than
42 int rxk_lastSocketError = 0;
43 int rxk_nSocketErrors = 0;
44 int rxk_nSignalsCleared = 0;
47 osi_NetReceive(osi_socket so, struct sockaddr_in *addr, struct iovec *dvec,
48 int nvecs, int *alength)
52 struct mbuf *maddr = NULL;
53 struct sockaddr_in *taddr;
54 struct iovec tmpvec[RX_MAXWVECS + 2];
57 BHV_PDATA(&bhv) = (void *)so;
60 tuio.uio_iov = tmpvec;
61 tuio.uio_iovcnt = nvecs;
63 tuio.uio_segflg = AFS_UIOSYS;
65 tuio.uio_resid = *alength;
69 if (nvecs > RX_MAXWVECS + 2) {
70 osi_Panic("Too many (%d) iovecs passed to osi_NetReceive\n", nvecs);
72 memcpy(tmpvec, (char *)dvec, (RX_MAXWVECS + 1) * sizeof(struct iovec));
74 code = soreceive(&bhv, &maddr, &tuio, NULL, NULL);
76 code = soreceive(so, &maddr, &tuio, NULL, NULL);
81 /* Clear the error before using the socket again. I've tried being nice
82 * and blocking SIGKILL and SIGSTOP from the kernel, but they get
83 * delivered anyway. So, time to be crude and just clear the signals
84 * pending on this thread.
87 uthread_t *ut = curuthread;
90 sigemptyset(&ut->ut_sig);
92 thread_interrupt_clear(UT_TO_KT(ut), 1);
94 rxk_nSignalsCleared++;
97 /* Clear the error before using the socket again. */
99 rxk_lastSocketError = code;
104 *alength = *alength - tuio.uio_resid;
106 memcpy((char *)addr, (char *)mtod(maddr, struct sockaddr_in *),
107 sizeof(struct sockaddr_in));
115 #else /* RXK_LISTENER_ENV */
117 static struct protosw parent_proto; /* udp proto switch */
120 * RX input, fast timer and initialization routines.
125 rxk_input(struct mbuf *am, struct ifnet *aif, struct ipsec *spec)
128 rxk_input(struct mbuf *am, struct ifnet *aif)
136 struct udpiphdr *tvu;
139 struct sockaddr_in taddr;
144 /* make sure we have base ip and udp headers in first mbuf */
145 if (am->m_off > MMAXOFF || am->m_len < 28) {
146 am = m_pullup(am, 28);
151 hdr = (mtod(am, struct ip *))->ip_hl;
153 /* pull up more, the IP hdr is bigger than usual */
154 if (am->m_len < (8 + (hdr << 2))) {
155 am = m_pullup(am, 8 + (hdr << 2));
159 ti = mtod(am, struct ip *); /* recompute, since m_pullup allocates new mbuf */
160 tu = (struct udphdr *)(((char *)ti) + (hdr << 2)); /* skip ip hdr */
162 ti = mtod(am, struct ip *);
163 tu = (struct udphdr *)(((char *)ti) + 20); /* skip basic ip hdr */
165 /* now read the port out */
169 for (tsp = rxk_ports, i = 0; i < MAXRXPORTS; i++) {
170 if (*tsp++ == port) {
171 /* checksum the packet */
173 ip_stripoptions(am, (struct mbuf *)0); /* get rid of anything we don't need */
174 tu = (struct udphdr *)(((char *)ti) + 20);
177 * Make mbuf data length reflect UDP length.
178 * If not enough data to reflect UDP length, drop.
180 tvu = (struct udpiphdr *)ti;
181 tlen = ntohs((u_short) tvu->ui_ulen);
182 if ((int)ti->ip_len != tlen) {
183 if (tlen > (int)ti->ip_len) {
187 m_adj(am, tlen - (int)ti->ip_len);
189 /* deliver packet to rx */
190 taddr.sin_family = AF_INET; /* compute source address */
191 taddr.sin_port = tu->uh_sport;
192 taddr.sin_addr.s_addr = ti->ip_src.s_addr;
193 /* handle the checksum. Note that this code damages the actual ip
194 * header (replacing it with the virtual one, which is the same size),
195 * so we must ensure we get everything out we need, first */
196 if (tu->uh_sum != 0) {
197 /* if the checksum is there, always check it. It's crazy not
198 * to, unless you can really be sure that your
199 * underlying network (and interfaces and drivers and
200 * DMA hardware, etc!) is error-free. First, fill
201 * in entire virtual ip header. */
205 tvu->ui_len = tvu->ui_ulen;
206 tlen = ntohs((unsigned short)(tvu->ui_ulen));
207 if ((!(am->m_flags & M_CKSUMMED))
208 && in_cksum(am, sizeof(struct ip) + tlen)) {
209 /* checksum, including cksum field, doesn't come out 0, so
210 * this packet is bad */
217 * 28 is IP (20) + UDP (8) header. ulen includes
218 * udp header, and we *don't* tell RX about udp
219 * header either. So, we remove those 8 as well.
221 data_len = ntohs(tu->uh_ulen);
223 if (!(*rxk_GetPacketProc) (&phandle, data_len)) {
224 if (rx_mb_to_packet(am, m_freem, 28, data_len, phandle)) {
225 /* XXX should just increment counter here.. */
226 printf("rx: truncated UDP packet\n");
227 rxi_FreePacket(phandle);
229 (*rxk_PacketArrivalProc) (phandle, &taddr,
230 rxk_portRocks[i], data_len);
238 /* if we get here, try to deliver packet to udp */
239 if (tproc = parent_proto.pr_input)
245 * UDP fast timer to raise events for all but Solaris and NCR.
246 * Called about 5 times per second (at unknown priority?). Must go to
247 * splnet or obtain global lock before touching anything significant.
255 /* do rx fasttimo processing here */
256 rxevent_RaiseEvents(&temp);
257 if (tproc = parent_proto.pr_fasttimo)
262 /* start intercepting basic calls */
266 struct protosw *tpro, *last;
270 last = inetdomain.dom_protoswNPROTOSW;
271 for (tpro = inetdomain.dom_protosw; tpro < last; tpro++) {
272 if (tpro->pr_protocol == IPPROTO_UDP) {
273 memcpy(&parent_proto, tpro, sizeof(parent_proto));
274 tpro->pr_input = rxk_input;
275 tpro->pr_fasttimo = rxk_fasttimo;
280 osi_Panic("inet:no udp");
282 #endif /* RXK_LISTENER_ENV */
285 * RX IP address routines.
288 static afs_uint32 myNetAddrs[ADDRSPERSITE];
289 static int myNetMTUs[ADDRSPERSITE];
290 static int myNetFlags[ADDRSPERSITE];
291 static int numMyNetAddrs = 0;
293 /* This version doesn't even begin to handle iterative requests, but then
294 * we don't yet use them anyway. Fix this when rxi_InitPeerParams is changed
295 * to find a true maximum.
298 rxi_MatchIfnet(struct hashbucket *h, caddr_t key, caddr_t arg1, caddr_t arg2)
300 afs_uint32 ppaddr = *(afs_uint32 *) key;
301 int match_value = *(int *)arg1;
302 struct in_ifaddr *ifa = (struct in_ifaddr *)h;
303 struct sockaddr_in *sin;
305 if ((ppaddr & ifa->ia_netmask) == ifa->ia_net) {
306 if ((ppaddr & ifa->ia_subnetmask) == ifa->ia_subnet) {
308 if (sin->sin_addr.s_addr == ppaddr) { /* ie, ME!!! */
310 *(struct in_ifaddr **)arg2 = ifa;
312 if (match_value < 3) {
313 *(struct in_ifaddr **)arg2 = ifa;
317 if (match_value < 2) {
318 *(struct in_ifaddr **)arg2 = ifa;
323 *(int *)arg1 = match_value;
329 rxi_FindIfnet(afs_uint32 addr, afs_uint32 * maskp)
333 struct in_ifaddr *ifad;
335 if (numMyNetAddrs == 0)
336 (void)rxi_GetIFInfo();
338 ppaddr = ntohl(addr);
339 ifad = (struct in_ifaddr *)&hashinfo_inaddr;
341 (void)hash_enum(&hashinfo_inaddr, rxi_MatchIfnet, HTF_INET,
342 (caddr_t) & ppaddr, (caddr_t) & match_value,
347 *maskp = ifad->ia_subnetmask;
354 rxi_EnumGetIfInfo(struct hashbucket *h, caddr_t key, caddr_t arg1,
357 int different = *(int *)arg1;
358 int i = *(int *)arg2;
359 struct in_ifaddr *iap = (struct in_ifaddr *)h;
364 if (i >= ADDRSPERSITE)
368 rxmtu = (ifnp->if_mtu - RX_IPUDP_SIZE);
369 ifinaddr = ntohl(iap->ia_addr.sin_addr.s_addr);
370 if (myNetAddrs[i] != ifinaddr) {
371 myNetAddrs[i] = ifinaddr;
372 myNetMTUs[i] = rxmtu;
374 *(int *)arg1 = different;
376 rxmtu = rxmtu * rxi_nRecvFrags + ((rxi_nRecvFrags - 1) * UDP_HDR_SIZE);
377 if (!rx_IsLoopbackAddr(ifinaddr) && (rxmtu > rx_maxReceiveSize)) {
378 rx_maxReceiveSize = MIN(RX_MAX_PACKET_SIZE, rxmtu);
379 rx_maxReceiveSize = MIN(rx_maxReceiveSize, rx_maxReceiveSizeUser);
382 *(int *)arg2 = i + 1;
392 /* SGI 6.2 does not have a pointer from the ifnet to the list of
393 * of addresses (if_addrlist). So it's more efficient to run the
394 * in_ifaddr list and use the back pointers to the ifnet struct's.
396 (void)hash_enum(&hashinfo_inaddr, rxi_EnumGetIfInfo, HTF_INET, NULL,
397 (caddr_t) & different, (caddr_t) & i);
399 rx_maxJumboRecvSize =
400 RX_HEADER_SIZE + rxi_nDgramPackets * RX_JUMBOBUFFERSIZE +
401 (rxi_nDgramPackets - 1) * RX_JUMBOHEADERSIZE;
402 rx_maxJumboRecvSize = MAX(rx_maxJumboRecvSize, rx_maxReceiveSize);
407 /* osi_NetSend - from the now defunct afs_osinet.c */
416 osi_NetSend(asocket, addr, dvec, nvec, asize, istack)
421 struct sockaddr_in *addr;
425 struct iovec tvecs[RX_MAXWVECS + 1];
432 if (nvec > RX_MAXWVECS + 1) {
433 osi_Panic("osi_NetSend: %d: Too many iovecs.\n", nvec);
435 memcpy((char *)tvecs, (char *)dvec, nvec * sizeof(struct iovec));
437 tuio.uio_iov = tvecs;
438 tuio.uio_iovcnt = nvec;
439 tuio.uio_segflg = UIO_SYSSPACE;
441 tuio.uio_sigpipe = 0;
446 for (i = 0, iovp = tvecs; i < nvec; i++, iovp++)
447 tuio.uio_resid += iovp->iov_len;
450 to = m_get(M_WAIT, MT_SONAME);
451 to->m_len = sizeof(struct sockaddr_in);
452 memcpy(mtod(to, caddr_t), (char *)addr, to->m_len);
454 BHV_PDATA(&bhv) = (void *)asocket;
455 code = sosend(&bhv, to, &tuio, 0, NULL);
460 #else /* AFS_SGI65_ENV */
463 dummy_sblock(struct sockbuf *a, int b, struct socket *c, int *d, int e)
466 ("sblock was called before it was installed. Install proper afsd.\n");
470 dummy_sbunlock(struct sockbuf *a, int b, struct socket *c, int d)
473 ("sbunlock was called before it was installed. Install proper afsd.\n");
476 int (*afs_sblockp) (struct sockbuf *, int, struct socket *, int *, int) =
478 void (*afs_sbunlockp) (struct sockbuf *, int, struct socket *, int) =
480 #define AFS_SBUNLOCK(SB, EV, SO, O) (*afs_sbunlockp)(SB, EV, SO, O)
482 /* osi_NetSend - send asize bytes at adata from asocket to host at addr.
484 * Now, why do we allocate a new buffer when we could theoretically use the one
485 * pointed to by adata? Because PRU_SEND returns after queueing the message,
486 * not after sending it. If the sender changes the data after queueing it,
487 * we'd see the already-queued data change. One attempt to fix this without
488 * adding a copy would be to have this function wait until the datagram is
489 * sent; however this doesn't work well. In particular, if a host is down, and
490 * an ARP fails to that host, this packet will be queued until the ARP request
491 * comes back, which could be hours later. We can't block in this routine that
492 * long, since it prevents RPC timeouts from happening.
494 /* XXX In the brave new world, steal the data bufs out of the rx_packet iovec,
495 * and just queue those. XXX
498 osi_NetSend(asocket, addr, dvec, nvec, asize, istack)
499 struct socket *asocket;
503 struct sockaddr_in *addr;
506 struct mbuf *tm, *um;
509 struct mbuf *top = 0;
510 struct mbuf *m, **mp;
517 AFS_STATCNT(osi_NetSend);
519 (*afs_sblockp) (&asocket->so_snd, NETEVENT_SODOWN, asocket, &s1, istack);
524 tdata = dvec[i].iov_base;
525 tl = dvec[i].iov_len;
527 if ((m = m_vget(M_DONTWAIT, MIN(asize, VCL_MAX), MT_DATA)) == NULL) {
531 AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
534 len = MIN(m->m_len, asize);
536 tpa = mtod(m, caddr_t);
539 memcpy(tpa, tdata, rlen);
549 /* shouldn't come here! */
550 asize = 0; /* so we make progress toward completion */
553 tdata = dvec[i].iov_base;
554 tl = dvec[i].iov_len;
566 /* setup mbuf corresponding to destination address */
567 um = m_get(M_DONTWAIT, MT_SONAME);
570 m_freem(top); /* free mbuf chain */
571 /* if this were vfs40, we'd do sbunlock(asocket, &asocket->so_snd), but
572 * we don't do the locking at all for vfs40 systems */
574 AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
577 memcpy(mtod(um, caddr_t), addr, sizeof(*addr));
578 um->m_len = sizeof(*addr);
579 /* note that udp_usrreq frees funny mbuf. We hold onto data, but mbuf
580 * around it is gone. we free address ourselves. */
581 code = (*asocket->so_proto->pr_usrreq) (asocket, PRU_SEND, tm, um, 0);
584 AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
588 #endif /* AFS_SGI65_ENV */