2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include "../afs/param.h"
15 #include "../rx/rx_kcommon.h"
16 #include "../h/tcp-param.h"
17 /* This must be loaded after proc.h to avoid macro collision with a variable*/
18 #include "../netinet/udp_var.h"
23 #ifdef RXK_LISTENER_ENV
25 * OS dependent part of kernel RX listener thread.
28 * so socket to receive on, typically rx_socket
29 * from pointer to a sockaddr_in.
30 * iov array of iovecs to fill in.
31 * iovcnt how many iovecs there are.
32 * lengthp IN/OUT in: total space available in iovecs. out: size of read.
36 * error code (such as EINTR) if not
39 * Note that the maximum number of iovecs is 2 + RX_MAXWVECS. This is
40 * so we have a little space to look for packets larger than
43 int rxk_lastSocketError = 0;
44 int rxk_nSocketErrors = 0;
45 int rxk_nSignalsCleared = 0;
46 int osi_NetReceive(osi_socket so, struct sockaddr_in *from,
47 struct iovec *iov, int iovcnt, int *lengthp)
51 struct mbuf *maddr = NULL;
52 struct sockaddr_in *taddr;
53 struct iovec tmpvec[RX_MAXWVECS+2];
56 BHV_PDATA(&bhv) = (void*)so;
59 tuio.uio_iov = tmpvec;
60 tuio.uio_iovcnt = iovcnt;
62 tuio.uio_segflg = AFS_UIOSYS;
64 tuio.uio_resid = *lengthp;
68 if (iovcnt > RX_MAXWVECS+2) {
69 osi_Panic("Too many (%d) iovecs passed to osi_NetReceive\n", iovcnt);
71 memcpy(tmpvec, (char*)iov, (RX_MAXWVECS+1) * sizeof(struct iovec));
73 code = soreceive(&bhv, &maddr, &tuio, NULL, NULL);
75 code = soreceive(so, &maddr, &tuio, NULL, NULL);
80 /* Clear the error before using the socket again. I've tried being nice
81 * and blocking SIGKILL and SIGSTOP from the kernel, but they get
82 * delivered anyway. So, time to be crude and just clear the signals
83 * pending on this thread.
86 uthread_t *ut = curuthread;
89 sigemptyset(&ut->ut_sig);
91 thread_interrupt_clear(UT_TO_KT(ut), 1);
93 rxk_nSignalsCleared++;
96 /* Clear the error before using the socket again. */
98 rxk_lastSocketError = code;
99 rxk_nSocketErrors ++ ;
104 *lengthp = *lengthp - tuio.uio_resid;
106 memcpy((char*)from, (char*)mtod(maddr, struct sockaddr_in *),
107 sizeof(struct sockaddr_in));
116 #else /* RXK_LISTENER_ENV */
118 static struct protosw parent_proto; /* udp proto switch */
121 * RX input, fast timer and initialization routines.
125 static void rxk_input(struct mbuf *am, struct ifnet *aif, struct ipsec * spec)
127 static void rxk_input(struct mbuf *am, struct ifnet *aif)
131 register unsigned short *tsp;
134 register struct ip *ti;
135 struct udpiphdr *tvu;
138 struct sockaddr_in taddr;
143 /* make sure we have base ip and udp headers in first mbuf */
144 if (am->m_off > MMAXOFF || am->m_len < 28) {
145 am = m_pullup(am, 28);
149 hdr = (mtod(am, struct ip *))->ip_hl;
151 /* pull up more, the IP hdr is bigger than usual */
152 if (am->m_len < (8 + (hdr<<2))) {
153 am = m_pullup(am, 8+(hdr<<2));
156 ti = mtod(am, struct ip *); /* recompute, since m_pullup allocates new mbuf */
157 tu = (struct udphdr *)(((char *)ti) + (hdr<<2)); /* skip ip hdr */
160 ti = mtod(am, struct ip *);
161 tu = (struct udphdr *)(((char *)ti) + 20); /* skip basic ip hdr */
163 /* now read the port out */
167 for(tsp=rxk_ports, i=0; i<MAXRXPORTS;i++) {
168 if (*tsp++ == port) {
169 /* checksum the packet */
171 ip_stripoptions(am, (struct mbuf *) 0); /* get rid of anything we don't need */
172 tu = (struct udphdr *)(((char *)ti) + 20);
175 * Make mbuf data length reflect UDP length.
176 * If not enough data to reflect UDP length, drop.
178 tvu = (struct udpiphdr *)ti;
179 tlen = ntohs((u_short)tvu->ui_ulen);
180 if ((int)ti->ip_len != tlen) {
181 if (tlen > (int)ti->ip_len) {
185 m_adj(am, tlen - (int)ti->ip_len);
187 /* deliver packet to rx */
188 taddr.sin_family = AF_INET; /* compute source address */
189 taddr.sin_port = tu->uh_sport;
190 taddr.sin_addr.s_addr = ti->ip_src.s_addr;
191 /* handle the checksum. Note that this code damages the actual ip
192 header (replacing it with the virtual one, which is the same size),
193 so we must ensure we get everything out we need, first */
194 if ( tu->uh_sum != 0) {
195 /* if the checksum is there, always check it. It's crazy not
196 * to, unless you can really be sure that your
197 * underlying network (and interfaces and drivers and
198 * DMA hardware, etc!) is error-free. First, fill
199 * in entire virtual ip header. */
203 tvu->ui_len = tvu->ui_ulen;
204 tlen = ntohs((unsigned short)(tvu->ui_ulen));
205 if ((!(am->m_flags & M_CKSUMMED)) &&
206 in_cksum(am, sizeof(struct ip) + tlen)){
207 /* checksum, including cksum field, doesn't come out 0, so
208 this packet is bad */
215 * 28 is IP (20) + UDP (8) header. ulen includes
216 * udp header, and we *don't* tell RX about udp
217 * header either. So, we remove those 8 as well.
219 data_len = ntohs(tu->uh_ulen);
221 if (!(*rxk_GetPacketProc)(&phandle, data_len)) {
222 if (rx_mb_to_packet(am, m_freem, 28, data_len, phandle)) {
223 /* XXX should just increment counter here.. */
224 printf("rx: truncated UDP packet\n");
225 rxi_FreePacket(phandle);
228 (*rxk_PacketArrivalProc)(phandle, &taddr,
229 rxk_portRocks[i], data_len);
236 /* if we get here, try to deliver packet to udp */
237 if (tproc = parent_proto.pr_input) (*tproc)(am, aif);
242 * UDP fast timer to raise events for all but Solaris and NCR.
243 * Called about 5 times per second (at unknown priority?). Must go to
244 * splnet or obtain global lock before touching anything significant.
246 static void rxk_fasttimo (void)
251 /* do rx fasttimo processing here */
252 rxevent_RaiseEvents(&temp);
253 if (tproc = parent_proto.pr_fasttimo) (*tproc)();
257 /* start intercepting basic calls */
258 void rxk_init(void) {
259 register struct protosw *tpro, *last;
260 if (rxk_initDone) return;
262 last = inetdomain.dom_protoswNPROTOSW;
263 for (tpro = inetdomain.dom_protosw; tpro < last; tpro++) {
264 if (tpro->pr_protocol == IPPROTO_UDP) {
265 memcpy(&parent_proto, tpro, sizeof(parent_proto));
266 tpro->pr_input = rxk_input;
267 tpro->pr_fasttimo = rxk_fasttimo;
272 osi_Panic("inet:no udp");
274 #endif /* RXK_LISTENER_ENV */
277 * RX IP address routines.
280 static afs_uint32 myNetAddrs[ADDRSPERSITE];
281 static int myNetMTUs[ADDRSPERSITE];
282 static int myNetFlags[ADDRSPERSITE];
283 static int numMyNetAddrs = 0;
285 /* This version doesn't even begin to handle iterative requests, but then
286 * we don't yet use them anyway. Fix this when rxi_InitPeerParams is changed
287 * to find a true maximum.
289 static int rxi_MatchIfnet(struct hashbucket *h, caddr_t key, caddr_t arg1,
292 afs_uint32 ppaddr = *(afs_uint32*)key;
293 int match_value = *(int*)arg1;
294 struct in_ifaddr *ifa = (struct in_ifaddr*)h;
295 struct sockaddr_in *sin;
297 if ((ppaddr & ifa->ia_netmask) == ifa->ia_net) {
298 if ((ppaddr & ifa->ia_subnetmask) == ifa->ia_subnet) {
300 if ( sin->sin_addr.s_addr == ppaddr) { /* ie, ME!!! */
302 *(struct in_ifaddr**)arg2 = ifa;
304 if (match_value < 3) {
305 *(struct in_ifaddr**)arg2 = ifa;
310 if (match_value < 2) {
311 *(struct in_ifaddr**)arg2 = ifa;
316 *(int*)arg1 = match_value;
321 struct ifnet * rxi_FindIfnet(addr, pifad)
323 struct in_ifaddr **pifad;
328 if (numMyNetAddrs == 0)
329 (void) rxi_GetIFInfo();
331 ppaddr = ntohl(addr);
332 *pifad = (struct in_ifaddr*)&hashinfo_inaddr;
334 (void) hash_enum(&hashinfo_inaddr, rxi_MatchIfnet, HTF_INET,
335 (caddr_t)&ppaddr, (caddr_t)&match_value, (caddr_t)pifad);
338 return (*pifad)->ia_ifp;
343 static int rxi_EnumGetIfInfo(struct hashbucket *h, caddr_t key, caddr_t arg1,
346 int different = *(int*)arg1;
348 struct in_ifaddr *iap = (struct in_ifaddr*)h;
357 rxmtu = (ifnp->if_mtu - RX_IPUDP_SIZE);
358 ifinaddr = ntohl(iap->ia_addr.sin_addr.s_addr);
359 if (myNetAddrs[i] != ifinaddr) {
360 myNetAddrs[i] = ifinaddr;
361 myNetMTUs[i] = rxmtu;
363 *(int*)arg1 = different;
365 rxmtu = rxmtu * rxi_nRecvFrags + ((rxi_nRecvFrags - 1) * UDP_HDR_SIZE);
366 if ( ( ifinaddr != 0x7f000001 ) &&
367 (rxmtu > rx_maxReceiveSize) ) {
368 rx_maxReceiveSize = MIN( RX_MAX_PACKET_SIZE, rxmtu);
369 rx_maxReceiveSize = MIN( rx_maxReceiveSize, rx_maxReceiveSizeUser);
381 /* SGI 6.2 does not have a pointer from the ifnet to the list of
382 * of addresses (if_addrlist). So it's more efficient to run the
383 * in_ifaddr list and use the back pointers to the ifnet struct's.
385 (void) hash_enum(&hashinfo_inaddr, rxi_EnumGetIfInfo, HTF_INET,
386 NULL, (caddr_t)&different, (caddr_t)&i);
388 rx_maxJumboRecvSize = RX_HEADER_SIZE
389 + rxi_nDgramPackets * RX_JUMBOBUFFERSIZE
390 + (rxi_nDgramPackets-1) * RX_JUMBOHEADERSIZE;
391 rx_maxJumboRecvSize = MAX(rx_maxJumboRecvSize, rx_maxReceiveSize);
396 /* osi_NetSend - from the now defunct afs_osinet.c */
405 osi_NetSend(asocket, addr, dvec, nvec, asize, istack)
406 register struct osi_socket *asocket;
409 register afs_int32 asize;
410 struct sockaddr_in *addr;
414 struct iovec tvecs[RX_MAXWVECS+1];
421 if (nvec > RX_MAXWVECS+1) {
422 osi_Panic("osi_NetSend: %d: Too many iovecs.\n", nvec);
424 memcpy((char*)tvecs, (char*)dvec, nvec * sizeof(struct iovec));
426 tuio.uio_iov = tvecs;
427 tuio.uio_iovcnt = nvec;
428 tuio.uio_segflg = UIO_SYSSPACE;
430 tuio.uio_sigpipe = 0;
435 for (i=0, iovp = tvecs; i<nvec; i++, iovp++)
436 tuio.uio_resid += iovp->iov_len;
439 to = m_get(M_WAIT, MT_SONAME);
440 to->m_len = sizeof(struct sockaddr_in);
441 memcpy(mtod(to, caddr_t), (char*)addr, to->m_len);
443 BHV_PDATA(&bhv) = (void*)asocket;
444 code = sosend(&bhv, to, &tuio, 0, NULL);
449 #else /* AFS_SGI65_ENV */
451 int dummy_sblock(struct sockbuf *a, int b, struct socket *c, int *d, int e)
453 afs_warn("sblock was called before it was installed. Install proper afsd.\n");
455 void dummy_sbunlock(struct sockbuf *a, int b, struct socket *c, int d)
457 afs_warn("sbunlock was called before it was installed. Install proper afsd.\n");
460 int (*afs_sblockp)(struct sockbuf*, int, struct socket*, int*, int) =
462 void (*afs_sbunlockp)(struct sockbuf*, int, struct socket*, int) =
464 #define AFS_SBUNLOCK(SB, EV, SO, O) (*afs_sbunlockp)(SB, EV, SO, O)
466 /* osi_NetSend - send asize bytes at adata from asocket to host at addr.
468 * Now, why do we allocate a new buffer when we could theoretically use the one
469 * pointed to by adata? Because PRU_SEND returns after queueing the message,
470 * not after sending it. If the sender changes the data after queueing it,
471 * we'd see the already-queued data change. One attempt to fix this without
472 * adding a copy would be to have this function wait until the datagram is
473 * sent; however this doesn't work well. In particular, if a host is down, and
474 * an ARP fails to that host, this packet will be queued until the ARP request
475 * comes back, which could be hours later. We can't block in this routine that
476 * long, since it prevents RPC timeouts from happening.
478 /* XXX In the brave new world, steal the data bufs out of the rx_packet iovec,
479 * and just queue those. XXX
482 osi_NetSend(asocket, addr, dvec, nvec, asize, istack)
483 register struct socket *asocket;
486 register afs_int32 asize;
487 struct sockaddr_in *addr;
490 register struct mbuf *tm, *um;
491 register afs_int32 code;
493 struct mbuf *top = 0;
494 register struct mbuf *m, **mp;
501 AFS_STATCNT(osi_NetSend);
503 (*afs_sblockp)(&asocket->so_snd, NETEVENT_SODOWN, asocket, &s1, istack);
508 tdata = dvec[i].iov_base;
509 tl = dvec[i].iov_len;
511 if ((m = m_vget(M_DONTWAIT, MIN(asize, VCL_MAX), MT_DATA)) == NULL) {
512 if (top) m_freem(top);
514 AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
517 len = MIN(m->m_len, asize);
519 tpa = mtod(m, caddr_t);
522 memcpy(tpa, tdata, rlen);
532 /* shouldn't come here! */
533 asize = 0; /* so we make progress toward completion */
536 tdata = dvec[i].iov_base;
537 tl = dvec[i].iov_len;
547 tm->m_act = (struct mbuf *) 0;
549 /* setup mbuf corresponding to destination address */
550 um = m_get(M_DONTWAIT, MT_SONAME);
552 if (top) m_freem(top); /* free mbuf chain */
553 /* if this were vfs40, we'd do sbunlock(asocket, &asocket->so_snd), but
554 we don't do the locking at all for vfs40 systems */
556 AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
559 memcpy(mtod(um, caddr_t), addr, sizeof(*addr));
560 um->m_len = sizeof(*addr);
561 /* note that udp_usrreq frees funny mbuf. We hold onto data, but mbuf
562 * around it is gone. we free address ourselves. */
563 code = (*asocket->so_proto->pr_usrreq)(asocket, PRU_SEND, tm, um, 0);
566 AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
570 #endif /* AFS_SGI65_ENV */