src/rx/FBSD/rx_knet.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include <afsconfig.h>
  11 #include "afs/param.h"
  12
  13 RCSID
  14     ("$Header$");
  15
  16 #ifdef AFS_FBSD40_ENV
  17 #include <sys/malloc.h>
  18 #include "rx/rx_kcommon.h"
  19
  20 #ifdef RXK_LISTENER_ENV
  21 int
  22 osi_NetReceive(osi_socket asocket, struct sockaddr_in *addr,
  23                struct iovec *dvec, int nvecs, int *alength)
  24 {
  25     struct uio u;
  26     int i;
  27     struct iovec iov[RX_MAXIOVECS];
  28     struct sockaddr *sa = NULL;
  29     int code;
  30
  31     int haveGlock = ISAFS_GLOCK();
  32     /*AFS_STATCNT(osi_NetReceive); */
  33
  34     if (nvecs > RX_MAXIOVECS)
  35         osi_Panic("osi_NetReceive: %d: Too many iovecs.\n", nvecs);
  36
  37     for (i = 0; i < nvecs; i++)
  38         iov[i] = dvec[i];
  39
  40     u.uio_iov = &iov[0];
  41     u.uio_iovcnt = nvecs;
  42     u.uio_offset = 0;
  43     u.uio_resid = *alength;
  44     u.uio_segflg = UIO_SYSSPACE;
  45     u.uio_rw = UIO_READ;
  46 #ifdef AFS_FBSD50_ENV
  47     u.uio_td = NULL;
  48 #else
  49     u.uio_procp = NULL;
  50 #endif
  51
  52     if (haveGlock)
  53         AFS_GUNLOCK();
  54     code = soreceive(asocket, &sa, &u, NULL, NULL, NULL);
  55     if (haveGlock)
  56         AFS_GLOCK();
  57
  58     if (code) {
  59 #if KNET_DEBUG
  60         if (code == EINVAL)
  61             Debugger("afs NetReceive busted");
  62         else
  63             printf("y");
  64 #else
  65         return code;
  66 #endif
  67     }
  68     *alength -= u.uio_resid;
  69     if (sa) {
  70         if (sa->sa_family == AF_INET) {
  71             if (addr)
  72                 *addr = *(struct sockaddr_in *)sa;
  73         } else
  74             printf("Unknown socket family %d in NetReceive\n", sa->sa_family);
  75         FREE(sa, M_SONAME);
  76     }
  77     return code;
  78 }
  79
  80 extern int rxk_ListenerPid;
  81 void
  82 osi_StopListener(void)
  83 {
  84     struct proc *p;
  85
  86     /*
  87      * Have to drop global lock to safely do this.
  88      * soclose() is currently protected by Giant,
  89      * but pfind and psignal are MPSAFE.
  90      */
  91     AFS_GUNLOCK();
  92     soclose(rx_socket);
  93     p = pfind(rxk_ListenerPid);
  94     if (p)
  95         psignal(p, SIGUSR1);
  96 #ifdef AFS_FBSD50_ENV
  97     PROC_UNLOCK(p);
  98 #endif
  99     AFS_GLOCK();
 100 }
 101
 102 int
 103 osi_NetSend(osi_socket asocket, struct sockaddr_in *addr, struct iovec *dvec,
 104             int nvecs, afs_int32 alength, int istack)
 105 {
 106     register afs_int32 code;
 107     int i;
 108     struct iovec iov[RX_MAXIOVECS];
 109     struct uio u;
 110     int haveGlock = ISAFS_GLOCK();
 111
 112     AFS_STATCNT(osi_NetSend);
 113     if (nvecs > RX_MAXIOVECS)
 114         osi_Panic("osi_NetSend: %d: Too many iovecs.\n", nvecs);
 115
 116     for (i = 0; i < nvecs; i++)
 117         iov[i] = dvec[i];
 118
 119     u.uio_iov = &iov[0];
 120     u.uio_iovcnt = nvecs;
 121     u.uio_offset = 0;
 122     u.uio_resid = alength;
 123     u.uio_segflg = UIO_SYSSPACE;
 124     u.uio_rw = UIO_WRITE;
 125 #ifdef AFS_FBSD50_ENV
 126     u.uio_td = NULL;
 127 #else
 128     u.uio_procp = NULL;
 129 #endif
 130
 131     addr->sin_len = sizeof(struct sockaddr_in);
 132
 133     if (haveGlock)
 134         AFS_GUNLOCK();
 135 #if KNET_DEBUG
 136     printf("+");
 137 #endif
 138 #ifdef AFS_FBSD50_ENV
 139     code =
 140         sosend(asocket, (struct sockaddr *)addr, &u, NULL, NULL, 0,
 141                curthread);
 142 #else
 143     code =
 144         sosend(asocket, (struct sockaddr *)addr, &u, NULL, NULL, 0, curproc);
 145 #endif
 146 #if KNET_DEBUG
 147     if (code) {
 148         if (code == EINVAL)
 149             Debugger("afs NetSend busted");
 150         else
 151             printf("z");
 152     }
 153 #endif
 154     if (haveGlock)
 155         AFS_GLOCK();
 156     return code;
 157 }
 158 #else
 159 /* This code *almost* works :( */
 160 static struct protosw parent_proto;     /* udp proto switch */
 161 static void rxk_input(struct mbuf *am, int iphlen);
 162 static void rxk_fasttimo(void);
 163
 164 /* start intercepting basic calls */
 165 rxk_init()
 166 {
 167     register struct protosw *tpro, *last;
 168     if (rxk_initDone)
 169         return 0;
 170
 171     last = inetdomain.dom_protoswNPROTOSW;
 172     for (tpro = inetdomain.dom_protosw; tpro < last; tpro++)
 173         if (tpro->pr_protocol == IPPROTO_UDP) {
 174 #if 0                           /* not exported */
 175             /* force UDP checksumming on for AFS    */
 176             extern int udpcksum;
 177             udpcksum = 1;
 178 #endif
 179             memcpy(&parent_proto, tpro, sizeof(parent_proto));
 180             tpro->pr_input = rxk_input;
 181             tpro->pr_fasttimo = rxk_fasttimo;
 182             /*
 183              * don't bother with pr_drain and pr_ctlinput
 184              * until we have something to do
 185              */
 186             rxk_initDone = 1;
 187             return 0;
 188         }
 189     osi_Panic("inet:no udp");
 190 }
 191
 192
 193 static void
 194 rxk_input(struct mbuf *am, int iphlen)
 195 {
 196     void (*tproc) ();
 197     register unsigned short *tsp;
 198     int hdr;
 199     struct udphdr *tu;
 200     register struct ip *ti;
 201     struct udpiphdr *tvu;
 202     register int i;
 203     char *phandle;
 204     afs_int32 code;
 205     struct sockaddr_in taddr;
 206     int tlen;
 207     short port;
 208     int data_len, comp_sum;
 209
 210     SPLVAR;
 211     NETPRI;
 212
 213     /* make sure we have base ip and udp headers in first mbuf */
 214     if (iphlen > sizeof(struct ip)) {
 215         ip_stripoptions(am, NULL);
 216         iphlen = sizeof(struct ip);
 217     }
 218
 219     if (am->m_len < sizeof(struct udpiphdr)) {
 220         am = m_pullup(am, sizeof(struct udpiphdr));
 221         if (!am) {
 222             USERPRI;
 223             return;
 224         }
 225     }
 226
 227     ti = mtod(am, struct ip *);
 228     /* skip basic ip hdr */
 229     tu = (struct udphdr *)(((char *)ti) + sizeof(struct ip));
 230
 231     /* now read the port out */
 232     port = tu->uh_dport;
 233
 234     if (port) {
 235         for (tsp = rxk_ports, i = 0; i < MAXRXPORTS; i++) {
 236             if (*tsp++ == port) {
 237                 /* checksum the packet */
 238                 /*
 239                  * Make mbuf data length reflect UDP length.
 240                  * If not enough data to reflect UDP length, drop.
 241                  */
 242                 tvu = (struct udpiphdr *)ti;
 243                 tlen = ntohs((u_short) tvu->ui_ulen);
 244                 if ((int)ti->ip_len != tlen) {
 245                     if (tlen > (int)ti->ip_len) {
 246                         m_free(am);
 247                         USERPRI;
 248                         return;
 249                     }
 250                     m_adj(am, tlen - (int)ti->ip_len);
 251                 }
 252                 /* deliver packet to rx */
 253                 taddr.sin_family = AF_INET;     /* compute source address */
 254                 taddr.sin_port = tu->uh_sport;
 255                 taddr.sin_addr.s_addr = ti->ip_src.s_addr;
 256                 taddr.sin_len = sizeof(taddr);
 257                 tvu = (struct udpiphdr *)ti;    /* virtual udp structure, for cksum */
 258                 /* handle the checksum.  Note that this code damages the actual ip
 259                  * header (replacing it with the virtual one, which is the same size),
 260                  * so we must ensure we get everything out we need, first */
 261                 if (tu->uh_sum != 0) {
 262                     /* if the checksum is there, always check it. It's crazy not
 263                      * to, unless you can really be sure that your
 264                      * underlying network (and interfaces and drivers and
 265                      * DMA hardware, etc!) is error-free. First, fill
 266                      * in entire virtual ip header. */
 267                     memset(tvu->ui_i.ih_x1, 0, 9);
 268                     tvu->ui_len = tvu->ui_ulen;
 269                     tlen = ntohs((unsigned short)(tvu->ui_ulen));
 270                     if (in_cksum(am, sizeof(struct ip) + tlen)) {
 271                         /* checksum, including cksum field, doesn't come out 0, so
 272                          * this packet is bad */
 273                         m_freem(am);
 274                         USERPRI;
 275                         return;
 276                     }
 277                 }
 278
 279                 /*
 280                  * 28 is IP (20) + UDP (8) header.  ulen includes
 281                  * udp header, and we *don't* tell RX about udp
 282                  * header either.  So, we remove those 8 as well.
 283                  */
 284                 data_len = ntohs(tu->uh_ulen);
 285                 data_len -= 8;
 286                 AFS_RXGLOCK();
 287                 if (!(*rxk_GetPacketProc) (&phandle, data_len)) {
 288                     if (rx_mb_to_packet(am, m_freem, 28, data_len, phandle)) {
 289                         /* XXX should just increment counter here.. */
 290                         printf("rx: truncated UDP packet\n");
 291                         rxi_FreePacket(phandle);
 292                     } else
 293                         (*rxk_PacketArrivalProc) (phandle, &taddr,
 294                                                   rxk_portRocks[i], data_len);
 295                 } else
 296                     m_freem(am);
 297                 AFS_RXGUNLOCK();
 298                 USERPRI;
 299                 return;
 300             }
 301         }
 302     }
 303
 304     /* if we get here, try to deliver packet to udp */
 305     if (tproc = parent_proto.pr_input)
 306         (*tproc) (am, iphlen);
 307     USERPRI;
 308     return;
 309 }
 310
 311
 312 /*
 313  * UDP fast timer to raise events for all but Solaris and NCR.
 314  * Called about 5 times per second (at unknown priority?).  Must go to
 315  * splnet or obtain global lock before touching anything significant.
 316  */
 317 static void
 318 rxk_fasttimo(void)
 319 {
 320     void (*tproc) ();
 321     struct clock temp;
 322
 323     /* do rx fasttimo processing here */
 324     rxevent_RaiseEvents(&temp);
 325     if (tproc = parent_proto.pr_fasttimo)
 326         (*tproc) ();
 327 }
 328
 329 /* rx_NetSend - send asize bytes at adata from asocket to host at addr.
 330  *
 331  * Now, why do we allocate a new buffer when we could theoretically use the one
 332  * pointed to by adata?  Because PRU_SEND returns after queueing the message,
 333  * not after sending it.  If the sender changes the data after queueing it,
 334  * we'd see the already-queued data change.  One attempt to fix this without
 335  * adding a copy would be to have this function wait until the datagram is
 336  * sent; however this doesn't work well.  In particular, if a host is down, and
 337  * an ARP fails to that host, this packet will be queued until the ARP request
 338  * comes back, which could be hours later.  We can't block in this routine that
 339  * long, since it prevents RPC timeouts from happening.
 340  */
 341 /* XXX In the brave new world, steal the data bufs out of the rx_packet iovec,
 342  * and just queue those.  XXX
 343  */
 344
 345 /* set lock on sockbuf sb; can't call sblock since we're at interrupt level
 346  * sometimes */
 347 static
 348 trysblock(sb)
 349      register struct sockbuf *sb;
 350 {
 351     AFS_STATCNT(trysblock);
 352     if (sb->sb_flags & SB_LOCK) {
 353         return -1;              /* can't lock socket */
 354     }
 355     sb->sb_flags |= SB_LOCK;
 356     return 0;
 357 }
 358
 359 /* We only have to do all the mbuf management ourselves if we can be called at
 360    interrupt time. in RXK_LISTENER_ENV, we can just call sosend() */
 361 int
 362 osi_NetSend(osi_socket asocket, struct sockaddr_in *addr, struct iovec *dvec,
 363             int nvec, afs_int32 asize, int istack)
 364 {
 365     register struct mbuf *tm, *um;
 366     register afs_int32 code;
 367     int s;
 368     struct mbuf *top = 0;
 369     register struct mbuf *m, **mp;
 370     int len;
 371     char *tdata;
 372     caddr_t tpa;
 373     int i, tl, rlen;
 374     int mlen;
 375     int haveGlock;
 376 #if KNET_DEBUG
 377     static int before = 0;
 378 #endif
 379
 380     AFS_STATCNT(osi_NetSend);
 381 /* Actually, the Ultrix way is as good as any for us, so we don't bother with
 382  * special mbufs any more.  Used to think we could get away with not copying
 383  * the data to the interface, but there's no way to tell the caller not to
 384  * reuse the buffers after sending, so we lost out on that trick anyway */
 385     s = splnet();
 386     if (trysblock(&asocket->so_snd)) {
 387         splx(s);
 388         return 1;
 389     }
 390     mp = &top;
 391     i = 0;
 392     tdata = dvec[i].iov_base;
 393     tl = dvec[i].iov_len;
 394     while (1) {
 395         mlen = MLEN;
 396         if (top == 0) {
 397             MGETHDR(m, M_DONTWAIT, MT_DATA);
 398             if (!m) {
 399                 sbunlock(&asocket->so_snd);
 400                 splx(s);
 401                 return 1;
 402             }
 403             mlen = MHLEN;
 404             m->m_pkthdr.len = 0;
 405             m->m_pkthdr.rcvif = NULL;
 406         } else
 407             MGET(m, M_DONTWAIT, MT_DATA);
 408         if (!m) {
 409             /* can't get an mbuf, give up */
 410             if (top)
 411                 m_freem(top);   /* free mbuf list we're building */
 412             sbunlock(&asocket->so_snd);
 413             splx(s);
 414             return 1;
 415         }
 416         /*
 417          * WARNING: the `4 * MLEN' is somewhat dubious.  It is better than
 418          * `NBPG', which may have no relation to `CLBYTES'.  Also, `CLBYTES'
 419          * may be so large that we never use clusters, resulting in far
 420          * too many mbufs being used.  It is often better to briefly use
 421          * a cluster, even if we are only using a portion of it.  Since
 422          * we are on the xmit side, it shouldn't end up sitting on a queue
 423          * for a potentially unbounded time (except perhaps if we are talking
 424          * to ourself).
 425          */
 426         if (asize >= 4 * MLEN) {        /* try to get cluster mbuf */
 427             /* different algorithms for getting cluster mbuf */
 428             MCLGET(m, M_DONTWAIT);
 429             if ((m->m_flags & M_EXT) == 0)
 430                 goto nopages;
 431             mlen = MCLBYTES;
 432
 433             /* now compute usable size */
 434             len = MIN(mlen, asize);
 435 /* Should I look at MAPPED_MBUFS??? */
 436         } else {
 437           nopages:
 438             len = MIN(mlen, asize);
 439         }
 440         m->m_len = 0;
 441         *mp = m;                /* XXXX */
 442         top->m_pkthdr.len += len;
 443         tpa = mtod(m, caddr_t);
 444         while (len) {
 445             rlen = MIN(len, tl);
 446             memcpy(tpa, tdata, rlen);
 447             asize -= rlen;
 448             len -= rlen;
 449             tpa += rlen;
 450             m->m_len += rlen;
 451             tdata += rlen;
 452             tl -= rlen;
 453             if (tl <= 0) {
 454                 i++;
 455                 if (i > nvec) {
 456                     /* shouldn't come here! */
 457                     asize = 0;  /* so we make progress toward completion */
 458                     break;
 459                 }
 460                 tdata = dvec[i].iov_base;
 461                 tl = dvec[i].iov_len;
 462             }
 463         }
 464         *mp = m;
 465         mp = &m->m_next;
 466         if (asize <= 0)
 467             break;
 468     }
 469     tm = top;
 470
 471     tm->m_act = NULL;
 472
 473     /* setup mbuf corresponding to destination address */
 474     um = m_get(M_DONTWAIT, MT_SONAME);
 475     if (!um) {
 476         if (top)
 477             m_freem(top);       /* free mbuf chain */
 478         sbunlock(&asocket->so_snd);
 479         splx(s);
 480         return 1;
 481     }
 482     memcpy(mtod(um, caddr_t), addr, sizeof(*addr));
 483     addr->sin_len = um->m_len = sizeof(*addr);
 484     /* note that udp_usrreq frees funny mbuf.  We hold onto data, but mbuf
 485      * around it is gone. */
 486     /*    haveGlock = ISAFS_GLOCK();
 487      * if (haveGlock) {
 488      * AFS_GUNLOCK();
 489      * }  */
 490     /* SOCKET_LOCK(asocket); */
 491     /* code = (*asocket->so_proto->pr_usrreq)(asocket, PRU_SEND, tm, um, 0); */
 492 #if KNET_DEBUG
 493     if (before)
 494         Debugger("afs NetSend before");
 495 #endif
 496     code =
 497         (*asocket->so_proto->pr_usrreqs->pru_send) (asocket, 0, tm,
 498                                                     (struct sockaddr *)
 499                                                     addr, um, &proc0);
 500     /* SOCKET_UNLOCK(asocket); */
 501     /* if (haveGlock) {
 502      * AFS_GLOCK();
 503      * } */
 504     sbunlock(&asocket->so_snd);
 505     splx(s);
 506 #if KNET_DEBUG
 507     if (code) {
 508         if (code == EINVAL)
 509             Debugger("afs NetSend busted");
 510         else
 511             printf("z");
 512     }
 513 #endif
 514     return code;
 515 }
 516 #endif
 517
 518 #endif /* AFS_FBSD40_ENV */