2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include "../afs/param.h"
11 #include "../rx/rx_kcommon.h"
12 #include "../h/tcp-param.h"
13 /* This must be loaded after proc.h to avoid macro collision with a variable*/
14 #include "../netinet/udp_var.h"
19 #ifdef RXK_LISTENER_ENV
21 * OS dependent part of kernel RX listener thread.
24 * so socket to receive on, typically rx_socket
25 * from pointer to a sockaddr_in.
26 * iov array of iovecs to fill in.
27 * iovcnt how many iovecs there are.
28 * lengthp IN/OUT in: total space available in iovecs. out: size of read.
32 * error code (such as EINTR) if not
35 * Note that the maximum number of iovecs is 2 + RX_MAXWVECS. This is
36 * so we have a little space to look for packets larger than
39 int rxk_lastSocketError = 0;
40 int rxk_nSocketErrors = 0;
41 int rxk_nSignalsCleared = 0;
42 int osi_NetReceive(osi_socket so, struct sockaddr_in *from,
43 struct iovec *iov, int iovcnt, int *lengthp)
47 struct mbuf *maddr = NULL;
48 struct sockaddr_in *taddr;
49 struct iovec tmpvec[RX_MAXWVECS+2];
52 BHV_PDATA(&bhv) = (void*)so;
55 tuio.uio_iov = tmpvec;
56 tuio.uio_iovcnt = iovcnt;
58 tuio.uio_segflg = AFS_UIOSYS;
60 tuio.uio_resid = *lengthp;
64 if (iovcnt > RX_MAXWVECS+2) {
65 osi_Panic("Too many (%d) iovecs passed to osi_NetReceive\n", iovcnt);
67 bcopy((char*)iov, tmpvec, (RX_MAXWVECS+1) * sizeof(struct iovec));
69 code = soreceive(&bhv, &maddr, &tuio, NULL, NULL);
71 code = soreceive(so, &maddr, &tuio, NULL, NULL);
76 /* Clear the error before using the socket again. I've tried being nice
77 * and blocking SIGKILL and SIGSTOP from the kernel, but they get
78 * delivered anyway. So, time to be crude and just clear the signals
79 * pending on this thread.
82 uthread_t *ut = curuthread;
85 sigemptyset(&ut->ut_sig);
87 thread_interrupt_clear(UT_TO_KT(ut), 1);
89 rxk_nSignalsCleared++;
92 /* Clear the error before using the socket again. */
94 rxk_lastSocketError = code;
95 rxk_nSocketErrors ++ ;
100 *lengthp = *lengthp - tuio.uio_resid;
102 bcopy((char*)mtod(maddr, struct sockaddr_in *), (char*)from,
103 sizeof(struct sockaddr_in));
112 #else /* RXK_LISTENER_ENV */
114 static struct protosw parent_proto; /* udp proto switch */
117 * RX input, fast timer and initialization routines.
121 static void rxk_input(struct mbuf *am, struct ifnet *aif, struct ipsec * spec)
123 static void rxk_input(struct mbuf *am, struct ifnet *aif)
127 register unsigned short *tsp;
130 register struct ip *ti;
131 struct udpiphdr *tvu;
134 struct sockaddr_in taddr;
139 /* make sure we have base ip and udp headers in first mbuf */
140 if (am->m_off > MMAXOFF || am->m_len < 28) {
141 am = m_pullup(am, 28);
145 hdr = (mtod(am, struct ip *))->ip_hl;
147 /* pull up more, the IP hdr is bigger than usual */
148 if (am->m_len < (8 + (hdr<<2))) {
149 am = m_pullup(am, 8+(hdr<<2));
152 ti = mtod(am, struct ip *); /* recompute, since m_pullup allocates new mbuf */
153 tu = (struct udphdr *)(((char *)ti) + (hdr<<2)); /* skip ip hdr */
156 ti = mtod(am, struct ip *);
157 tu = (struct udphdr *)(((char *)ti) + 20); /* skip basic ip hdr */
159 /* now read the port out */
163 for(tsp=rxk_ports, i=0; i<MAXRXPORTS;i++) {
164 if (*tsp++ == port) {
165 /* checksum the packet */
167 ip_stripoptions(am, (struct mbuf *) 0); /* get rid of anything we don't need */
168 tu = (struct udphdr *)(((char *)ti) + 20);
171 * Make mbuf data length reflect UDP length.
172 * If not enough data to reflect UDP length, drop.
174 tvu = (struct udpiphdr *)ti;
175 tlen = ntohs((u_short)tvu->ui_ulen);
176 if ((int)ti->ip_len != tlen) {
177 if (tlen > (int)ti->ip_len) {
181 m_adj(am, tlen - (int)ti->ip_len);
183 /* deliver packet to rx */
184 taddr.sin_family = AF_INET; /* compute source address */
185 taddr.sin_port = tu->uh_sport;
186 taddr.sin_addr.s_addr = ti->ip_src.s_addr;
187 /* handle the checksum. Note that this code damages the actual ip
188 header (replacing it with the virtual one, which is the same size),
189 so we must ensure we get everything out we need, first */
190 if ( tu->uh_sum != 0) {
191 /* if the checksum is there, always check it. It's crazy not
192 * to, unless you can really be sure that your
193 * underlying network (and interfaces and drivers and
194 * DMA hardware, etc!) is error-free. First, fill
195 * in entire virtual ip header. */
199 tvu->ui_len = tvu->ui_ulen;
200 tlen = ntohs((unsigned short)(tvu->ui_ulen));
201 if ((!(am->m_flags & M_CKSUMMED)) &&
202 in_cksum(am, sizeof(struct ip) + tlen)){
203 /* checksum, including cksum field, doesn't come out 0, so
204 this packet is bad */
211 * 28 is IP (20) + UDP (8) header. ulen includes
212 * udp header, and we *don't* tell RX about udp
213 * header either. So, we remove those 8 as well.
215 data_len = ntohs(tu->uh_ulen);
217 if (!(*rxk_GetPacketProc)(&phandle, data_len)) {
218 if (rx_mb_to_packet(am, m_freem, 28, data_len, phandle)) {
219 /* XXX should just increment counter here.. */
220 printf("rx: truncated UDP packet\n");
221 rxi_FreePacket(phandle);
224 (*rxk_PacketArrivalProc)(phandle, &taddr,
225 rxk_portRocks[i], data_len);
232 /* if we get here, try to deliver packet to udp */
233 if (tproc = parent_proto.pr_input) (*tproc)(am, aif);
238 * UDP fast timer to raise events for all but Solaris and NCR.
239 * Called about 5 times per second (at unknown priority?). Must go to
240 * splnet or obtain global lock before touching anything significant.
242 static void rxk_fasttimo (void)
247 /* do rx fasttimo processing here */
248 rxevent_RaiseEvents(&temp);
249 if (tproc = parent_proto.pr_fasttimo) (*tproc)();
253 /* start intercepting basic calls */
254 void rxk_init(void) {
255 register struct protosw *tpro, *last;
256 if (rxk_initDone) return;
258 last = inetdomain.dom_protoswNPROTOSW;
259 for (tpro = inetdomain.dom_protosw; tpro < last; tpro++) {
260 if (tpro->pr_protocol == IPPROTO_UDP) {
261 bcopy(tpro, &parent_proto, sizeof(parent_proto));
262 tpro->pr_input = rxk_input;
263 tpro->pr_fasttimo = rxk_fasttimo;
268 osi_Panic("inet:no udp");
270 #endif /* RXK_LISTENER_ENV */
273 * RX IP address routines.
276 static afs_uint32 myNetAddrs[ADDRSPERSITE];
277 static int myNetMTUs[ADDRSPERSITE];
278 static int myNetFlags[ADDRSPERSITE];
279 static int numMyNetAddrs = 0;
281 /* This version doesn't even begin to handle iterative requests, but then
282 * we don't yet use them anyway. Fix this when rxi_InitPeerParams is changed
283 * to find a true maximum.
285 static int rxi_MatchIfnet(struct hashbucket *h, caddr_t key, caddr_t arg1,
288 afs_uint32 ppaddr = *(afs_uint32*)key;
289 int match_value = *(int*)arg1;
290 struct in_ifaddr *ifa = (struct in_ifaddr*)h;
291 struct sockaddr_in *sin;
293 if ((ppaddr & ifa->ia_netmask) == ifa->ia_net) {
294 if ((ppaddr & ifa->ia_subnetmask) == ifa->ia_subnet) {
296 if ( sin->sin_addr.s_addr == ppaddr) { /* ie, ME!!! */
298 *(struct in_ifaddr**)arg2 = ifa;
300 if (match_value < 3) {
301 *(struct in_ifaddr**)arg2 = ifa;
306 if (match_value < 2) {
307 *(struct in_ifaddr**)arg2 = ifa;
312 *(int*)arg1 = match_value;
317 struct ifnet * rxi_FindIfnet(addr, pifad)
319 struct in_ifaddr **pifad;
323 struct hashbucket *slop;
325 if (numMyNetAddrs == 0)
326 (void) rxi_GetIFInfo();
328 ppaddr = ntohl(addr);
329 *pifad = (struct in_ifaddr*)&hashinfo_inaddr;
331 slop = hash_enum(&hashinfo_inaddr, rxi_MatchIfnet, HTF_INET,
332 (caddr_t)&ppaddr, (caddr_t)&match_value, (caddr_t)pifad);
335 return ((struct in_ifaddr*)slop)->ia_ifp;
342 static int rxi_EnumGetIfInfo(struct hashbucket *h, caddr_t key, caddr_t arg1,
345 int different = *(int*)arg1;
347 struct in_ifaddr *iap = (struct in_ifaddr*)h;
356 rxmtu = (ifnp->if_mtu - RX_IPUDP_SIZE);
357 ifinaddr = ntohl(iap->ia_addr.sin_addr.s_addr);
358 if (myNetAddrs[i] != ifinaddr) {
359 myNetAddrs[i] = ifinaddr;
360 myNetMTUs[i] = rxmtu;
362 *(int*)arg1 = different;
364 rxmtu = rxmtu * rxi_nRecvFrags + ((rxi_nRecvFrags - 1) * UDP_HDR_SIZE);
365 if ( ( ifinaddr != 0x7f000001 ) &&
366 (rxmtu > rx_maxReceiveSize) ) {
367 rx_maxReceiveSize = MIN( RX_MAX_PACKET_SIZE, rxmtu);
368 rx_maxReceiveSize = MIN( rx_maxReceiveSize, rx_maxReceiveSizeUser);
380 /* SGI 6.2 does not have a pointer from the ifnet to the list of
381 * of addresses (if_addrlist). So it's more efficient to run the
382 * in_ifaddr list and use the back pointers to the ifnet struct's.
384 (void) hash_enum(&hashinfo_inaddr, rxi_EnumGetIfInfo, HTF_INET,
385 NULL, (caddr_t)&different, (caddr_t)&i);
387 rx_maxJumboRecvSize = RX_HEADER_SIZE
388 + rxi_nDgramPackets * RX_JUMBOBUFFERSIZE
389 + (rxi_nDgramPackets-1) * RX_JUMBOHEADERSIZE;
390 rx_maxJumboRecvSize = MAX(rx_maxJumboRecvSize, rx_maxReceiveSize);
395 /* osi_NetSend - from the now defunct afs_osinet.c */
404 osi_NetSend(asocket, addr, dvec, nvec, asize, istack)
405 register struct osi_socket *asocket;
408 register afs_int32 asize;
409 struct sockaddr_in *addr;
413 struct iovec tvecs[RX_MAXWVECS+1];
420 if (nvec > RX_MAXWVECS+1) {
421 osi_Panic("osi_NetSend: %d: Too many iovecs.\n", nvec);
423 bcopy((char*)dvec, (char*)tvecs, nvec * sizeof(struct iovec));
425 tuio.uio_iov = tvecs;
426 tuio.uio_iovcnt = nvec;
427 tuio.uio_segflg = UIO_SYSSPACE;
429 tuio.uio_sigpipe = 0;
434 for (i=0, iovp = tvecs; i<nvec; i++, iovp++)
435 tuio.uio_resid += iovp->iov_len;
438 to = m_get(M_WAIT, MT_SONAME);
439 to->m_len = sizeof(struct sockaddr_in);
440 bcopy((char*)addr, mtod(to, caddr_t), to->m_len);
442 BHV_PDATA(&bhv) = (void*)asocket;
443 code = sosend(&bhv, to, &tuio, 0, NULL);
448 #else /* AFS_SGI65_ENV */
450 int dummy_sblock(struct sockbuf *a, int b, struct socket *c, int *d, int e)
452 afs_warn("sblock was called before it was installed. Install proper afsd.\n");
454 void dummy_sbunlock(struct sockbuf *a, int b, struct socket *c, int d)
456 afs_warn("sbunlock was called before it was installed. Install proper afsd.\n");
459 int (*afs_sblockp)(struct sockbuf*, int, struct socket*, int*, int) =
461 void (*afs_sbunlockp)(struct sockbuf*, int, struct socket*, int) =
463 #define AFS_SBUNLOCK(SB, EV, SO, O) (*afs_sbunlockp)(SB, EV, SO, O)
465 /* osi_NetSend - send asize bytes at adata from asocket to host at addr.
467 * Now, why do we allocate a new buffer when we could theoretically use the one
468 * pointed to by adata? Because PRU_SEND returns after queueing the message,
469 * not after sending it. If the sender changes the data after queueing it,
470 * we'd see the already-queued data change. One attempt to fix this without
471 * adding a copy would be to have this function wait until the datagram is
472 * sent; however this doesn't work well. In particular, if a host is down, and
473 * an ARP fails to that host, this packet will be queued until the ARP request
474 * comes back, which could be hours later. We can't block in this routine that
475 * long, since it prevents RPC timeouts from happening.
477 /* XXX In the brave new world, steal the data bufs out of the rx_packet iovec,
478 * and just queue those. XXX
481 osi_NetSend(asocket, addr, dvec, nvec, asize, istack)
482 register struct socket *asocket;
485 register afs_int32 asize;
486 struct sockaddr_in *addr;
489 register struct mbuf *tm, *um;
490 register afs_int32 code;
492 struct mbuf *top = 0;
493 register struct mbuf *m, **mp;
500 AFS_STATCNT(osi_NetSend);
502 (*afs_sblockp)(&asocket->so_snd, NETEVENT_SODOWN, asocket, &s1, istack);
507 tdata = dvec[i].iov_base;
508 tl = dvec[i].iov_len;
510 if ((m = m_vget(M_DONTWAIT, MIN(asize, VCL_MAX), MT_DATA)) == NULL) {
511 if (top) m_freem(top);
513 AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
516 len = MIN(m->m_len, asize);
518 tpa = mtod(m, caddr_t);
521 bcopy(tdata, tpa, rlen);
531 /* shouldn't come here! */
532 asize = 0; /* so we make progress toward completion */
535 tdata = dvec[i].iov_base;
536 tl = dvec[i].iov_len;
546 tm->m_act = (struct mbuf *) 0;
548 /* setup mbuf corresponding to destination address */
549 um = m_get(M_DONTWAIT, MT_SONAME);
551 if (top) m_freem(top); /* free mbuf chain */
552 /* if this were vfs40, we'd do sbunlock(asocket, &asocket->so_snd), but
553 we don't do the locking at all for vfs40 systems */
555 AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
558 bcopy(addr, mtod(um, caddr_t), sizeof(*addr));
559 um->m_len = sizeof(*addr);
560 /* note that udp_usrreq frees funny mbuf. We hold onto data, but mbuf
561 * around it is gone. we free address ourselves. */
562 code = (*asocket->so_proto->pr_usrreq)(asocket, PRU_SEND, tm, um, 0);
565 AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
569 #endif /* AFS_SGI65_ENV */