src/rx/IRIX/rx_knet.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include "../afs/param.h"
  11 #include "../rx/rx_kcommon.h"
  12 #include "../h/tcp-param.h"
  13 /* This must be loaded after proc.h to avoid macro collision with a variable*/
  14 #include "../netinet/udp_var.h"
  15
  16
  17
  18
  19 #ifdef RXK_LISTENER_ENV
  20 /* osi_NetReceive
  21  * OS dependent part of kernel RX listener thread.
  22  *
  23  * Arguments:
  24  *      so      socket to receive on, typically rx_socket
  25  *      from    pointer to a sockaddr_in.
  26  *      iov     array of iovecs to fill in.
  27  *      iovcnt  how many iovecs there are.
  28  *      lengthp IN/OUT in: total space available in iovecs. out: size of read.
  29  *
  30  * Return
  31  * 0 if successful
  32  * error code (such as EINTR) if not
  33  *
  34  * Environment
  35  *      Note that the maximum number of iovecs is 2 + RX_MAXWVECS. This is
  36  *      so we have a little space to look for packets larger than
  37  *      rx_maxReceiveSize.
  38  */
  39 int rxk_lastSocketError = 0;
  40 int rxk_nSocketErrors = 0;
  41 int rxk_nSignalsCleared = 0;
  42 int osi_NetReceive(osi_socket so, struct sockaddr_in *from,
  43                    struct iovec *iov, int iovcnt, int *lengthp)
  44 {
  45     struct uio tuio;
  46     int code;
  47     struct mbuf *maddr = NULL;
  48     struct sockaddr_in *taddr;
  49     struct iovec tmpvec[RX_MAXWVECS+2];
  50 #ifdef AFS_SGI65_ENV
  51     bhv_desc_t bhv;
  52     BHV_PDATA(&bhv) = (void*)so;
  53 #endif
  54
  55     tuio.uio_iov = tmpvec;
  56     tuio.uio_iovcnt = iovcnt;
  57     tuio.uio_offset = 0;
  58     tuio.uio_segflg = AFS_UIOSYS;
  59     tuio.uio_fmode = 0;
  60     tuio.uio_resid = *lengthp;
  61     tuio.uio_pio = 0;
  62     tuio.uio_pbuf = 0;
  63
  64     if (iovcnt > RX_MAXWVECS+2) {
  65         osi_Panic("Too many (%d) iovecs passed to osi_NetReceive\n", iovcnt);
  66     }
  67     bcopy((char*)iov, tmpvec, (RX_MAXWVECS+1) * sizeof(struct iovec));
  68 #ifdef AFS_SGI65_ENV
  69     code = soreceive(&bhv, &maddr, &tuio, NULL, NULL);
  70 #else
  71     code = soreceive(so, &maddr, &tuio, NULL, NULL);
  72 #endif
  73
  74     if (code) {
  75 #ifdef AFS_SGI65_ENV
  76         /* Clear the error before using the socket again. I've tried being nice
  77          * and blocking SIGKILL and SIGSTOP from the kernel, but they get
  78          * delivered anyway. So, time to be crude and just clear the signals
  79          * pending on this thread.
  80          */
  81         if (code == EINTR) {
  82             uthread_t *ut = curuthread;
  83             int s;
  84             s = ut_lock(ut);
  85             sigemptyset(&ut->ut_sig);
  86             ut->ut_cursig = 0;
  87             thread_interrupt_clear(UT_TO_KT(ut), 1);
  88             ut_unlock(ut, s);
  89             rxk_nSignalsCleared++;
  90         }
  91 #endif
  92         /* Clear the error before using the socket again. */
  93         so->so_error = 0;
  94         rxk_lastSocketError = code;
  95         rxk_nSocketErrors ++ ;
  96         if (maddr)
  97             m_freem(maddr);
  98     }
  99     else {
 100         *lengthp = *lengthp - tuio.uio_resid;
 101         if (maddr) {
 102             bcopy((char*)mtod(maddr, struct sockaddr_in *), (char*)from,
 103                   sizeof(struct sockaddr_in));
 104             m_freem(maddr);
 105         }
 106         else {
 107             return -1;
 108         }
 109     }
 110     return code;
 111 }
 112 #else /* RXK_LISTENER_ENV */
 113
 114 static struct protosw parent_proto;     /* udp proto switch */
 115
 116 /*
 117  * RX input, fast timer and initialization routines.
 118  */
 119
 120 #ifdef AFS_SGI64_ENV
 121 static void rxk_input(struct mbuf *am, struct ifnet *aif, struct ipsec * spec)
 122 #else
 123 static void rxk_input(struct mbuf *am, struct ifnet *aif)
 124 #endif
 125 {
 126     void (*tproc)();
 127     register unsigned short *tsp;
 128     int hdr;
 129     struct udphdr *tu;
 130     register struct ip *ti;
 131     struct udpiphdr *tvu;
 132     register int i;
 133     char *phandle;
 134     struct sockaddr_in taddr;
 135     int tlen;
 136     short port;
 137     int data_len;
 138
 139     /* make sure we have base ip and udp headers in first mbuf */
 140     if (am->m_off > MMAXOFF || am->m_len < 28) {
 141         am = m_pullup(am, 28);
 142         if (!am) return;
 143     }
 144
 145     hdr = (mtod(am, struct ip *))->ip_hl;
 146     if (hdr > 5) {
 147         /* pull up more, the IP hdr is bigger than usual */
 148         if (am->m_len < (8 + (hdr<<2))) {
 149             am = m_pullup(am, 8+(hdr<<2));
 150             if (!am) return;
 151         }
 152         ti = mtod(am, struct ip *); /* recompute, since m_pullup allocates new mbuf */
 153         tu = (struct udphdr *)(((char *)ti) + (hdr<<2)); /* skip ip hdr */
 154     }
 155     else {
 156         ti = mtod(am, struct ip *);
 157         tu = (struct udphdr *)(((char *)ti) + 20);      /* skip basic ip hdr */
 158     }
 159     /* now read the port out */
 160     port = tu->uh_dport;
 161
 162     if (port) {
 163         for(tsp=rxk_ports, i=0; i<MAXRXPORTS;i++) {
 164             if (*tsp++ == port) {
 165                 /* checksum the packet */
 166                 if (hdr > 5) {
 167                     ip_stripoptions(am, (struct mbuf *) 0); /* get rid of anything we don't need */
 168                     tu = (struct udphdr *)(((char *)ti) + 20);
 169                 }
 170                 /*
 171                  * Make mbuf data length reflect UDP length.
 172                  * If not enough data to reflect UDP length, drop.
 173                  */
 174                 tvu = (struct udpiphdr *)ti;
 175                 tlen = ntohs((u_short)tvu->ui_ulen);
 176                 if ((int)ti->ip_len != tlen) {
 177                     if (tlen > (int)ti->ip_len) {
 178                         m_free(am);
 179                         return;
 180                     }
 181                     m_adj(am, tlen - (int)ti->ip_len);
 182                 }
 183                 /* deliver packet to rx */
 184                 taddr.sin_family = AF_INET;         /* compute source address */
 185                 taddr.sin_port = tu->uh_sport;
 186                 taddr.sin_addr.s_addr = ti->ip_src.s_addr;
 187                 /* handle the checksum.  Note that this code damages the actual ip
 188                    header (replacing it with the virtual one, which is the same size),
 189                    so we must ensure we get everything out we need, first */
 190                 if ( tu->uh_sum != 0) {
 191                         /* if the checksum is there, always check it. It's crazy not
 192                          * to, unless you can really be sure that your
 193                          * underlying network (and interfaces and drivers and
 194                          * DMA hardware, etc!) is error-free. First, fill
 195                          * in entire virtual ip header. */
 196                         tvu->ui_next = 0;
 197                         tvu->ui_prev = 0;
 198                         tvu->ui_x1 = 0;
 199                         tvu->ui_len = tvu->ui_ulen;
 200                         tlen = ntohs((unsigned short)(tvu->ui_ulen));
 201                 if ((!(am->m_flags & M_CKSUMMED)) &&
 202                         in_cksum(am, sizeof(struct ip) + tlen)){
 203                             /* checksum, including cksum field, doesn't come out 0, so
 204                                this packet is bad */
 205                             m_freem(am);
 206                             return;
 207                         }
 208                       }
 209
 210                 /*
 211                  * 28 is IP (20) + UDP (8) header.  ulen includes
 212                  * udp header, and we *don't* tell RX about udp
 213                  * header either.  So, we remove those 8 as well.
 214                  */
 215                 data_len = ntohs(tu->uh_ulen);
 216                 data_len -= 8;
 217                 if (!(*rxk_GetPacketProc)(&phandle, data_len)) {
 218                   if (rx_mb_to_packet(am, m_freem, 28, data_len, phandle)) {
 219                     /* XXX should just increment counter here.. */
 220                     printf("rx: truncated UDP packet\n");
 221                     rxi_FreePacket(phandle);
 222                   }
 223                   else
 224                     (*rxk_PacketArrivalProc)(phandle, &taddr,
 225                                              rxk_portRocks[i], data_len);
 226                 }else m_freem(am);
 227                 return;
 228                 }
 229             }
 230         }
 231
 232     /* if we get here, try to deliver packet to udp */
 233     if (tproc = parent_proto.pr_input) (*tproc)(am, aif);
 234     return;
 235 }
 236
 237 /*
 238  * UDP fast timer to raise events for all but Solaris and NCR.
 239  * Called about 5 times per second (at unknown priority?).  Must go to
 240  * splnet or obtain global lock before touching anything significant.
 241  */
 242 static void rxk_fasttimo (void)
 243 {
 244     int (*tproc)();
 245     struct clock temp;
 246
 247     /* do rx fasttimo processing here */
 248     rxevent_RaiseEvents(&temp);
 249     if (tproc = parent_proto.pr_fasttimo) (*tproc)();
 250 }
 251
 252
 253 /* start intercepting basic calls */
 254 void rxk_init(void) {
 255     register struct protosw *tpro, *last;
 256     if (rxk_initDone) return;
 257
 258     last = inetdomain.dom_protoswNPROTOSW;
 259     for (tpro = inetdomain.dom_protosw; tpro < last; tpro++) {
 260         if (tpro->pr_protocol == IPPROTO_UDP) {
 261             bcopy(tpro, &parent_proto, sizeof(parent_proto));
 262             tpro->pr_input = rxk_input;
 263             tpro->pr_fasttimo = rxk_fasttimo;
 264             rxk_initDone = 1;
 265             return;
 266         }
 267     }
 268     osi_Panic("inet:no udp");
 269 }
 270 #endif /* RXK_LISTENER_ENV */
 271
 272 /*
 273  * RX IP address routines.
 274  */
 275
 276 static afs_uint32 myNetAddrs[ADDRSPERSITE];
 277 static int myNetMTUs[ADDRSPERSITE];
 278 static int myNetFlags[ADDRSPERSITE];
 279 static int numMyNetAddrs = 0;
 280
 281 /* This version doesn't even begin to handle iterative requests, but then
 282  * we don't yet use them anyway. Fix this when rxi_InitPeerParams is changed
 283  * to find a true maximum.
 284  */
 285 static int rxi_MatchIfnet(struct hashbucket *h, caddr_t key, caddr_t arg1,
 286               caddr_t arg2)
 287 {
 288     afs_uint32 ppaddr = *(afs_uint32*)key;
 289     int match_value = *(int*)arg1;
 290     struct in_ifaddr *ifa = (struct in_ifaddr*)h;
 291     struct sockaddr_in *sin;
 292
 293     if ((ppaddr & ifa->ia_netmask) == ifa->ia_net) {
 294         if ((ppaddr & ifa->ia_subnetmask) == ifa->ia_subnet) {
 295             sin=IA_SIN(ifa);
 296             if ( sin->sin_addr.s_addr == ppaddr) {   /* ie, ME!!!  */
 297                 match_value = 4;
 298                 *(struct in_ifaddr**)arg2 = ifa;
 299             }
 300             if (match_value < 3) {
 301                 *(struct in_ifaddr**)arg2 = ifa;
 302                 match_value = 3;
 303             }
 304         }
 305         else {
 306             if (match_value < 2) {
 307                 *(struct in_ifaddr**)arg2 = ifa;
 308                 match_value = 2;
 309             }
 310         }
 311     }
 312     *(int*)arg1 = match_value;
 313     return 0;
 314 }
 315
 316
 317 struct ifnet * rxi_FindIfnet(addr, pifad)
 318      afs_uint32 addr;
 319      struct in_ifaddr **pifad;
 320 {
 321   afs_uint32 ppaddr;
 322   int match_value = 0;
 323   struct hashbucket *slop;
 324
 325   if (numMyNetAddrs == 0)
 326     (void) rxi_GetIFInfo();
 327
 328   ppaddr = ntohl(addr);
 329   *pifad = (struct in_ifaddr*)&hashinfo_inaddr;
 330
 331   slop = hash_enum(&hashinfo_inaddr, rxi_MatchIfnet, HTF_INET,
 332                    (caddr_t)&ppaddr, (caddr_t)&match_value, (caddr_t)pifad);
 333
 334   if (slop)
 335       return ((struct in_ifaddr*)slop)->ia_ifp;
 336   else
 337       return NULL;
 338
 339
 340 }
 341
 342 static int rxi_EnumGetIfInfo(struct hashbucket *h, caddr_t key, caddr_t arg1,
 343                   caddr_t arg2)
 344 {
 345     int different = *(int*)arg1;
 346     int i = *(int*)arg2;
 347     struct in_ifaddr *iap = (struct in_ifaddr*)h;
 348     struct ifnet *ifnp;
 349     afs_uint32 ifinaddr;
 350     afs_uint32 rxmtu;
 351
 352     if (i>=ADDRSPERSITE)
 353         return 0;
 354
 355     ifnp = iap->ia_ifp;
 356     rxmtu = (ifnp->if_mtu - RX_IPUDP_SIZE);
 357     ifinaddr = ntohl(iap->ia_addr.sin_addr.s_addr);
 358     if (myNetAddrs[i] != ifinaddr) {
 359         myNetAddrs[i] = ifinaddr;
 360         myNetMTUs[i] = rxmtu;
 361         different++;
 362         *(int*)arg1 = different;
 363     }
 364     rxmtu = rxmtu * rxi_nRecvFrags + ((rxi_nRecvFrags - 1) * UDP_HDR_SIZE);
 365     if ( ( ifinaddr != 0x7f000001 ) &&
 366         (rxmtu > rx_maxReceiveSize) ) {
 367         rx_maxReceiveSize = MIN( RX_MAX_PACKET_SIZE, rxmtu);
 368         rx_maxReceiveSize = MIN( rx_maxReceiveSize, rx_maxReceiveSizeUser);
 369     }
 370
 371     *(int*)arg2 = i + 1;
 372     return 0;
 373 }
 374
 375 int rxi_GetIFInfo()
 376 {
 377     int i = 0;
 378     int different = 0;
 379
 380     /* SGI 6.2 does not have a pointer from the ifnet to the list of
 381      * of addresses (if_addrlist). So it's more efficient to run the
 382      * in_ifaddr list and use the back pointers to the ifnet struct's.
 383      */
 384     (void) hash_enum(&hashinfo_inaddr, rxi_EnumGetIfInfo, HTF_INET,
 385                      NULL, (caddr_t)&different, (caddr_t)&i);
 386
 387     rx_maxJumboRecvSize = RX_HEADER_SIZE
 388                           + rxi_nDgramPackets * RX_JUMBOBUFFERSIZE
 389                           + (rxi_nDgramPackets-1) * RX_JUMBOHEADERSIZE;
 390     rx_maxJumboRecvSize = MAX(rx_maxJumboRecvSize, rx_maxReceiveSize);
 391
 392     return different;
 393 }
 394
 395 /* osi_NetSend - from the now defunct afs_osinet.c */
 396 #ifdef DEBUG
 397 #undef DEBUG
 398 #endif
 399 #ifdef MP
 400 #define _MP_NETLOCKS
 401 #endif
 402
 403 #ifdef AFS_SGI65_ENV
 404 osi_NetSend(asocket, addr, dvec, nvec, asize, istack)
 405      register struct osi_socket *asocket;
 406      struct iovec *dvec;
 407      int nvec;
 408      register afs_int32 asize;
 409      struct sockaddr_in *addr;
 410      int istack;
 411 {
 412     int code;
 413     struct iovec tvecs[RX_MAXWVECS+1];
 414     struct iovec *iovp;
 415     struct uio tuio;
 416     struct mbuf *to;
 417     int i;
 418     bhv_desc_t bhv;
 419
 420     if (nvec > RX_MAXWVECS+1) {
 421         osi_Panic("osi_NetSend: %d: Too many iovecs.\n", nvec);
 422     }
 423     bcopy((char*)dvec, (char*)tvecs, nvec * sizeof(struct iovec));
 424
 425     tuio.uio_iov = tvecs;
 426     tuio.uio_iovcnt = nvec;
 427     tuio.uio_segflg = UIO_SYSSPACE;
 428     tuio.uio_offset = 0;
 429     tuio.uio_sigpipe = 0;
 430     tuio.uio_pio = 0;
 431     tuio.uio_pbuf = 0;
 432
 433     tuio.uio_resid = 0;
 434     for (i=0, iovp = tvecs; i<nvec; i++, iovp++)
 435         tuio.uio_resid += iovp->iov_len;
 436
 437
 438     to = m_get(M_WAIT, MT_SONAME);
 439     to->m_len = sizeof(struct sockaddr_in);
 440     bcopy((char*)addr, mtod(to, caddr_t), to->m_len);
 441
 442     BHV_PDATA(&bhv) = (void*)asocket;
 443     code = sosend(&bhv, to, &tuio, 0, NULL);
 444
 445     m_freem(to);
 446     return code;
 447 }
 448 #else /* AFS_SGI65_ENV */
 449
 450 int dummy_sblock(struct sockbuf *a, int b,  struct socket *c, int *d, int e)
 451 {
 452     afs_warn("sblock was called before it was installed. Install proper afsd.\n");
 453 }
 454 void dummy_sbunlock(struct sockbuf *a, int b,  struct socket *c, int d)
 455 {
 456     afs_warn("sbunlock was called before it was installed. Install proper afsd.\n");
 457 }
 458
 459 int (*afs_sblockp)(struct sockbuf*, int, struct socket*, int*, int) =
 460      dummy_sblock;
 461 void (*afs_sbunlockp)(struct sockbuf*, int, struct socket*, int) =
 462      dummy_sbunlock;
 463 #define AFS_SBUNLOCK(SB, EV, SO, O) (*afs_sbunlockp)(SB, EV, SO, O)
 464
 465 /* osi_NetSend - send asize bytes at adata from asocket to host at addr.
 466  *
 467  * Now, why do we allocate a new buffer when we could theoretically use the one
 468  * pointed to by adata?  Because PRU_SEND returns after queueing the message,
 469  * not after sending it.  If the sender changes the data after queueing it,
 470  * we'd see the already-queued data change.  One attempt to fix this without
 471  * adding a copy would be to have this function wait until the datagram is
 472  * sent; however this doesn't work well.  In particular, if a host is down, and
 473  * an ARP fails to that host, this packet will be queued until the ARP request
 474  * comes back, which could be hours later.  We can't block in this routine that
 475  * long, since it prevents RPC timeouts from happening.
 476  */
 477 /* XXX In the brave new world, steal the data bufs out of the rx_packet iovec,
 478  * and just queue those.  XXX
 479  */
 480 int
 481 osi_NetSend(asocket, addr, dvec, nvec, asize, istack)
 482      register struct socket *asocket;
 483      struct iovec *dvec;
 484      int nvec;
 485      register afs_int32 asize;
 486      struct sockaddr_in *addr;
 487      int istack;
 488 {
 489     register struct mbuf *tm, *um;
 490     register afs_int32 code;
 491     int s;
 492     struct mbuf *top = 0;
 493     register struct mbuf *m, **mp;
 494     int len;
 495     char *tdata;
 496     caddr_t tpa;
 497     int i,tl,rlen;
 498
 499     NETSPL_DECL(s1)
 500     AFS_STATCNT(osi_NetSend);
 501
 502     (*afs_sblockp)(&asocket->so_snd, NETEVENT_SODOWN, asocket, &s1, istack);
 503
 504     s = splnet();
 505     mp = &top;
 506     i = 0;
 507     tdata = dvec[i].iov_base;
 508     tl = dvec[i].iov_len;
 509     while (1) {
 510         if ((m = m_vget(M_DONTWAIT, MIN(asize, VCL_MAX), MT_DATA)) == NULL) {
 511             if (top) m_freem(top);
 512             splx(s);
 513             AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
 514             return 1;
 515         }
 516         len = MIN(m->m_len, asize);
 517         m->m_len = 0;
 518         tpa = mtod(m, caddr_t);
 519         while (len) {
 520           rlen = MIN(len, tl);
 521           bcopy(tdata, tpa, rlen);
 522           asize -= rlen;
 523           len -= rlen;
 524           tpa += rlen;
 525           m->m_len += rlen;
 526           tdata += rlen;
 527           tl -= rlen;
 528           if (tl <= 0) {
 529             i++;
 530             if (i > nvec) {
 531               /* shouldn't come here! */
 532               asize = 0;   /* so we make progress toward completion */
 533               break;
 534             }
 535             tdata = dvec[i].iov_base;
 536             tl = dvec[i].iov_len;
 537           }
 538         }
 539         *mp = m;
 540         mp = &m->m_next;
 541         if (asize <= 0)
 542           break;
 543     }
 544     tm = top;
 545
 546     tm->m_act = (struct mbuf *) 0;
 547
 548     /* setup mbuf corresponding to destination address */
 549     um = m_get(M_DONTWAIT, MT_SONAME);
 550     if (!um) {
 551         if (top) m_freem(top);  /* free mbuf chain */
 552         /* if this were vfs40, we'd do sbunlock(asocket, &asocket->so_snd), but
 553            we don't do the locking at all for vfs40 systems */
 554         splx(s);
 555         AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
 556         return 1;
 557     }
 558     bcopy(addr, mtod(um, caddr_t), sizeof(*addr));
 559     um->m_len = sizeof(*addr);
 560     /* note that udp_usrreq frees funny mbuf.  We hold onto data, but mbuf
 561      * around it is gone.  we free address ourselves.  */
 562     code = (*asocket->so_proto->pr_usrreq)(asocket, PRU_SEND, tm, um, 0);
 563     splx(s);
 564     m_free(um);
 565     AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
 566
 567     return code;
 568 }
 569 #endif /* AFS_SGI65_ENV */
 570