src/rx/IRIX/rx_knet.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include <afsconfig.h>
  11 #include "../afs/param.h"
  12
  13 RCSID("$Header$");
  14
  15 #include "../rx/rx_kcommon.h"
  16 #include "../h/tcp-param.h"
  17 /* This must be loaded after proc.h to avoid macro collision with a variable*/
  18 #include "../netinet/udp_var.h"
  19
  20
  21
  22
  23 #ifdef RXK_LISTENER_ENV
  24 /* osi_NetReceive
  25  * OS dependent part of kernel RX listener thread.
  26  *
  27  * Arguments:
  28  *      so      socket to receive on, typically rx_socket
  29  *      from    pointer to a sockaddr_in.
  30  *      iov     array of iovecs to fill in.
  31  *      iovcnt  how many iovecs there are.
  32  *      lengthp IN/OUT in: total space available in iovecs. out: size of read.
  33  *
  34  * Return
  35  * 0 if successful
  36  * error code (such as EINTR) if not
  37  *
  38  * Environment
  39  *      Note that the maximum number of iovecs is 2 + RX_MAXWVECS. This is
  40  *      so we have a little space to look for packets larger than
  41  *      rx_maxReceiveSize.
  42  */
  43 int rxk_lastSocketError = 0;
  44 int rxk_nSocketErrors = 0;
  45 int rxk_nSignalsCleared = 0;
  46 int osi_NetReceive(osi_socket so, struct sockaddr_in *from,
  47                    struct iovec *iov, int iovcnt, int *lengthp)
  48 {
  49     struct uio tuio;
  50     int code;
  51     struct mbuf *maddr = NULL;
  52     struct sockaddr_in *taddr;
  53     struct iovec tmpvec[RX_MAXWVECS+2];
  54 #ifdef AFS_SGI65_ENV
  55     bhv_desc_t bhv;
  56     BHV_PDATA(&bhv) = (void*)so;
  57 #endif
  58
  59     tuio.uio_iov = tmpvec;
  60     tuio.uio_iovcnt = iovcnt;
  61     tuio.uio_offset = 0;
  62     tuio.uio_segflg = AFS_UIOSYS;
  63     tuio.uio_fmode = 0;
  64     tuio.uio_resid = *lengthp;
  65     tuio.uio_pio = 0;
  66     tuio.uio_pbuf = 0;
  67
  68     if (iovcnt > RX_MAXWVECS+2) {
  69         osi_Panic("Too many (%d) iovecs passed to osi_NetReceive\n", iovcnt);
  70     }
  71     memcpy(tmpvec, (char*)iov, (RX_MAXWVECS+1) * sizeof(struct iovec));
  72 #ifdef AFS_SGI65_ENV
  73     code = soreceive(&bhv, &maddr, &tuio, NULL, NULL);
  74 #else
  75     code = soreceive(so, &maddr, &tuio, NULL, NULL);
  76 #endif
  77
  78     if (code) {
  79 #ifdef AFS_SGI65_ENV
  80         /* Clear the error before using the socket again. I've tried being nice
  81          * and blocking SIGKILL and SIGSTOP from the kernel, but they get
  82          * delivered anyway. So, time to be crude and just clear the signals
  83          * pending on this thread.
  84          */
  85         if (code == EINTR) {
  86             uthread_t *ut = curuthread;
  87             int s;
  88             s = ut_lock(ut);
  89             sigemptyset(&ut->ut_sig);
  90             ut->ut_cursig = 0;
  91             thread_interrupt_clear(UT_TO_KT(ut), 1);
  92             ut_unlock(ut, s);
  93             rxk_nSignalsCleared++;
  94         }
  95 #endif
  96         /* Clear the error before using the socket again. */
  97         so->so_error = 0;
  98         rxk_lastSocketError = code;
  99         rxk_nSocketErrors ++ ;
 100         if (maddr)
 101             m_freem(maddr);
 102     }
 103     else {
 104         *lengthp = *lengthp - tuio.uio_resid;
 105         if (maddr) {
 106             memcpy((char*)from, (char*)mtod(maddr, struct sockaddr_in *),
 107                   sizeof(struct sockaddr_in));
 108             m_freem(maddr);
 109         }
 110         else {
 111             return -1;
 112         }
 113     }
 114     return code;
 115 }
 116 #else /* RXK_LISTENER_ENV */
 117
 118 static struct protosw parent_proto;     /* udp proto switch */
 119
 120 /*
 121  * RX input, fast timer and initialization routines.
 122  */
 123
 124 #ifdef AFS_SGI64_ENV
 125 static void rxk_input(struct mbuf *am, struct ifnet *aif, struct ipsec * spec)
 126 #else
 127 static void rxk_input(struct mbuf *am, struct ifnet *aif)
 128 #endif
 129 {
 130     void (*tproc)();
 131     register unsigned short *tsp;
 132     int hdr;
 133     struct udphdr *tu;
 134     register struct ip *ti;
 135     struct udpiphdr *tvu;
 136     register int i;
 137     char *phandle;
 138     struct sockaddr_in taddr;
 139     int tlen;
 140     short port;
 141     int data_len;
 142
 143     /* make sure we have base ip and udp headers in first mbuf */
 144     if (am->m_off > MMAXOFF || am->m_len < 28) {
 145         am = m_pullup(am, 28);
 146         if (!am) return;
 147     }
 148
 149     hdr = (mtod(am, struct ip *))->ip_hl;
 150     if (hdr > 5) {
 151         /* pull up more, the IP hdr is bigger than usual */
 152         if (am->m_len < (8 + (hdr<<2))) {
 153             am = m_pullup(am, 8+(hdr<<2));
 154             if (!am) return;
 155         }
 156         ti = mtod(am, struct ip *); /* recompute, since m_pullup allocates new mbuf */
 157         tu = (struct udphdr *)(((char *)ti) + (hdr<<2)); /* skip ip hdr */
 158     }
 159     else {
 160         ti = mtod(am, struct ip *);
 161         tu = (struct udphdr *)(((char *)ti) + 20);      /* skip basic ip hdr */
 162     }
 163     /* now read the port out */
 164     port = tu->uh_dport;
 165
 166     if (port) {
 167         for(tsp=rxk_ports, i=0; i<MAXRXPORTS;i++) {
 168             if (*tsp++ == port) {
 169                 /* checksum the packet */
 170                 if (hdr > 5) {
 171                     ip_stripoptions(am, (struct mbuf *) 0); /* get rid of anything we don't need */
 172                     tu = (struct udphdr *)(((char *)ti) + 20);
 173                 }
 174                 /*
 175                  * Make mbuf data length reflect UDP length.
 176                  * If not enough data to reflect UDP length, drop.
 177                  */
 178                 tvu = (struct udpiphdr *)ti;
 179                 tlen = ntohs((u_short)tvu->ui_ulen);
 180                 if ((int)ti->ip_len != tlen) {
 181                     if (tlen > (int)ti->ip_len) {
 182                         m_free(am);
 183                         return;
 184                     }
 185                     m_adj(am, tlen - (int)ti->ip_len);
 186                 }
 187                 /* deliver packet to rx */
 188                 taddr.sin_family = AF_INET;         /* compute source address */
 189                 taddr.sin_port = tu->uh_sport;
 190                 taddr.sin_addr.s_addr = ti->ip_src.s_addr;
 191                 /* handle the checksum.  Note that this code damages the actual ip
 192                    header (replacing it with the virtual one, which is the same size),
 193                    so we must ensure we get everything out we need, first */
 194                 if ( tu->uh_sum != 0) {
 195                         /* if the checksum is there, always check it. It's crazy not
 196                          * to, unless you can really be sure that your
 197                          * underlying network (and interfaces and drivers and
 198                          * DMA hardware, etc!) is error-free. First, fill
 199                          * in entire virtual ip header. */
 200                         tvu->ui_next = 0;
 201                         tvu->ui_prev = 0;
 202                         tvu->ui_x1 = 0;
 203                         tvu->ui_len = tvu->ui_ulen;
 204                         tlen = ntohs((unsigned short)(tvu->ui_ulen));
 205                 if ((!(am->m_flags & M_CKSUMMED)) &&
 206                         in_cksum(am, sizeof(struct ip) + tlen)){
 207                             /* checksum, including cksum field, doesn't come out 0, so
 208                                this packet is bad */
 209                             m_freem(am);
 210                             return;
 211                         }
 212                       }
 213
 214                 /*
 215                  * 28 is IP (20) + UDP (8) header.  ulen includes
 216                  * udp header, and we *don't* tell RX about udp
 217                  * header either.  So, we remove those 8 as well.
 218                  */
 219                 data_len = ntohs(tu->uh_ulen);
 220                 data_len -= 8;
 221                 if (!(*rxk_GetPacketProc)(&phandle, data_len)) {
 222                   if (rx_mb_to_packet(am, m_freem, 28, data_len, phandle)) {
 223                     /* XXX should just increment counter here.. */
 224                     printf("rx: truncated UDP packet\n");
 225                     rxi_FreePacket(phandle);
 226                   }
 227                   else
 228                     (*rxk_PacketArrivalProc)(phandle, &taddr,
 229                                              rxk_portRocks[i], data_len);
 230                 }else m_freem(am);
 231                 return;
 232                 }
 233             }
 234         }
 235
 236     /* if we get here, try to deliver packet to udp */
 237     if (tproc = parent_proto.pr_input) (*tproc)(am, aif);
 238     return;
 239 }
 240
 241 /*
 242  * UDP fast timer to raise events for all but Solaris and NCR.
 243  * Called about 5 times per second (at unknown priority?).  Must go to
 244  * splnet or obtain global lock before touching anything significant.
 245  */
 246 static void rxk_fasttimo (void)
 247 {
 248     int (*tproc)();
 249     struct clock temp;
 250
 251     /* do rx fasttimo processing here */
 252     rxevent_RaiseEvents(&temp);
 253     if (tproc = parent_proto.pr_fasttimo) (*tproc)();
 254 }
 255
 256
 257 /* start intercepting basic calls */
 258 void rxk_init(void) {
 259     register struct protosw *tpro, *last;
 260     if (rxk_initDone) return;
 261
 262     last = inetdomain.dom_protoswNPROTOSW;
 263     for (tpro = inetdomain.dom_protosw; tpro < last; tpro++) {
 264         if (tpro->pr_protocol == IPPROTO_UDP) {
 265             memcpy(&parent_proto, tpro, sizeof(parent_proto));
 266             tpro->pr_input = rxk_input;
 267             tpro->pr_fasttimo = rxk_fasttimo;
 268             rxk_initDone = 1;
 269             return;
 270         }
 271     }
 272     osi_Panic("inet:no udp");
 273 }
 274 #endif /* RXK_LISTENER_ENV */
 275
 276 /*
 277  * RX IP address routines.
 278  */
 279
 280 static afs_uint32 myNetAddrs[ADDRSPERSITE];
 281 static int myNetMTUs[ADDRSPERSITE];
 282 static int myNetFlags[ADDRSPERSITE];
 283 static int numMyNetAddrs = 0;
 284
 285 /* This version doesn't even begin to handle iterative requests, but then
 286  * we don't yet use them anyway. Fix this when rxi_InitPeerParams is changed
 287  * to find a true maximum.
 288  */
 289 static int rxi_MatchIfnet(struct hashbucket *h, caddr_t key, caddr_t arg1,
 290               caddr_t arg2)
 291 {
 292     afs_uint32 ppaddr = *(afs_uint32*)key;
 293     int match_value = *(int*)arg1;
 294     struct in_ifaddr *ifa = (struct in_ifaddr*)h;
 295     struct sockaddr_in *sin;
 296
 297     if ((ppaddr & ifa->ia_netmask) == ifa->ia_net) {
 298         if ((ppaddr & ifa->ia_subnetmask) == ifa->ia_subnet) {
 299             sin=IA_SIN(ifa);
 300             if ( sin->sin_addr.s_addr == ppaddr) {   /* ie, ME!!!  */
 301                 match_value = 4;
 302                 *(struct in_ifaddr**)arg2 = ifa;
 303             }
 304             if (match_value < 3) {
 305                 *(struct in_ifaddr**)arg2 = ifa;
 306                 match_value = 3;
 307             }
 308         }
 309         else {
 310             if (match_value < 2) {
 311                 *(struct in_ifaddr**)arg2 = ifa;
 312                 match_value = 2;
 313             }
 314         }
 315     }
 316     *(int*)arg1 = match_value;
 317     return 0;
 318 }
 319
 320
 321 struct ifnet * rxi_FindIfnet(addr, pifad)
 322      afs_uint32 addr;
 323      struct in_ifaddr **pifad;
 324 {
 325   afs_uint32 ppaddr;
 326   int match_value = 0;
 327   struct hashbucket *slop;
 328
 329   if (numMyNetAddrs == 0)
 330     (void) rxi_GetIFInfo();
 331
 332   ppaddr = ntohl(addr);
 333   *pifad = (struct in_ifaddr*)&hashinfo_inaddr;
 334
 335   slop = hash_enum(&hashinfo_inaddr, rxi_MatchIfnet, HTF_INET,
 336                    (caddr_t)&ppaddr, (caddr_t)&match_value, (caddr_t)pifad);
 337
 338   if (slop)
 339       return ((struct in_ifaddr*)slop)->ia_ifp;
 340   else
 341       return NULL;
 342
 343
 344 }
 345
 346 static int rxi_EnumGetIfInfo(struct hashbucket *h, caddr_t key, caddr_t arg1,
 347                   caddr_t arg2)
 348 {
 349     int different = *(int*)arg1;
 350     int i = *(int*)arg2;
 351     struct in_ifaddr *iap = (struct in_ifaddr*)h;
 352     struct ifnet *ifnp;
 353     afs_uint32 ifinaddr;
 354     afs_uint32 rxmtu;
 355
 356     if (i>=ADDRSPERSITE)
 357         return 0;
 358
 359     ifnp = iap->ia_ifp;
 360     rxmtu = (ifnp->if_mtu - RX_IPUDP_SIZE);
 361     ifinaddr = ntohl(iap->ia_addr.sin_addr.s_addr);
 362     if (myNetAddrs[i] != ifinaddr) {
 363         myNetAddrs[i] = ifinaddr;
 364         myNetMTUs[i] = rxmtu;
 365         different++;
 366         *(int*)arg1 = different;
 367     }
 368     rxmtu = rxmtu * rxi_nRecvFrags + ((rxi_nRecvFrags - 1) * UDP_HDR_SIZE);
 369     if ( ( ifinaddr != 0x7f000001 ) &&
 370         (rxmtu > rx_maxReceiveSize) ) {
 371         rx_maxReceiveSize = MIN( RX_MAX_PACKET_SIZE, rxmtu);
 372         rx_maxReceiveSize = MIN( rx_maxReceiveSize, rx_maxReceiveSizeUser);
 373     }
 374
 375     *(int*)arg2 = i + 1;
 376     return 0;
 377 }
 378
 379 int rxi_GetIFInfo()
 380 {
 381     int i = 0;
 382     int different = 0;
 383
 384     /* SGI 6.2 does not have a pointer from the ifnet to the list of
 385      * of addresses (if_addrlist). So it's more efficient to run the
 386      * in_ifaddr list and use the back pointers to the ifnet struct's.
 387      */
 388     (void) hash_enum(&hashinfo_inaddr, rxi_EnumGetIfInfo, HTF_INET,
 389                      NULL, (caddr_t)&different, (caddr_t)&i);
 390
 391     rx_maxJumboRecvSize = RX_HEADER_SIZE
 392                           + rxi_nDgramPackets * RX_JUMBOBUFFERSIZE
 393                           + (rxi_nDgramPackets-1) * RX_JUMBOHEADERSIZE;
 394     rx_maxJumboRecvSize = MAX(rx_maxJumboRecvSize, rx_maxReceiveSize);
 395
 396     return different;
 397 }
 398
 399 /* osi_NetSend - from the now defunct afs_osinet.c */
 400 #ifdef DEBUG
 401 #undef DEBUG
 402 #endif
 403 #ifdef MP
 404 #define _MP_NETLOCKS
 405 #endif
 406
 407 #ifdef AFS_SGI65_ENV
 408 osi_NetSend(asocket, addr, dvec, nvec, asize, istack)
 409      register struct osi_socket *asocket;
 410      struct iovec *dvec;
 411      int nvec;
 412      register afs_int32 asize;
 413      struct sockaddr_in *addr;
 414      int istack;
 415 {
 416     int code;
 417     struct iovec tvecs[RX_MAXWVECS+1];
 418     struct iovec *iovp;
 419     struct uio tuio;
 420     struct mbuf *to;
 421     int i;
 422     bhv_desc_t bhv;
 423
 424     if (nvec > RX_MAXWVECS+1) {
 425         osi_Panic("osi_NetSend: %d: Too many iovecs.\n", nvec);
 426     }
 427     memcpy((char*)tvecs, (char*)dvec, nvec * sizeof(struct iovec));
 428
 429     tuio.uio_iov = tvecs;
 430     tuio.uio_iovcnt = nvec;
 431     tuio.uio_segflg = UIO_SYSSPACE;
 432     tuio.uio_offset = 0;
 433     tuio.uio_sigpipe = 0;
 434     tuio.uio_pio = 0;
 435     tuio.uio_pbuf = 0;
 436
 437     tuio.uio_resid = 0;
 438     for (i=0, iovp = tvecs; i<nvec; i++, iovp++)
 439         tuio.uio_resid += iovp->iov_len;
 440
 441
 442     to = m_get(M_WAIT, MT_SONAME);
 443     to->m_len = sizeof(struct sockaddr_in);
 444     memcpy(mtod(to, caddr_t), (char*)addr, to->m_len);
 445
 446     BHV_PDATA(&bhv) = (void*)asocket;
 447     code = sosend(&bhv, to, &tuio, 0, NULL);
 448
 449     m_freem(to);
 450     return code;
 451 }
 452 #else /* AFS_SGI65_ENV */
 453
 454 int dummy_sblock(struct sockbuf *a, int b,  struct socket *c, int *d, int e)
 455 {
 456     afs_warn("sblock was called before it was installed. Install proper afsd.\n");
 457 }
 458 void dummy_sbunlock(struct sockbuf *a, int b,  struct socket *c, int d)
 459 {
 460     afs_warn("sbunlock was called before it was installed. Install proper afsd.\n");
 461 }
 462
 463 int (*afs_sblockp)(struct sockbuf*, int, struct socket*, int*, int) =
 464      dummy_sblock;
 465 void (*afs_sbunlockp)(struct sockbuf*, int, struct socket*, int) =
 466      dummy_sbunlock;
 467 #define AFS_SBUNLOCK(SB, EV, SO, O) (*afs_sbunlockp)(SB, EV, SO, O)
 468
 469 /* osi_NetSend - send asize bytes at adata from asocket to host at addr.
 470  *
 471  * Now, why do we allocate a new buffer when we could theoretically use the one
 472  * pointed to by adata?  Because PRU_SEND returns after queueing the message,
 473  * not after sending it.  If the sender changes the data after queueing it,
 474  * we'd see the already-queued data change.  One attempt to fix this without
 475  * adding a copy would be to have this function wait until the datagram is
 476  * sent; however this doesn't work well.  In particular, if a host is down, and
 477  * an ARP fails to that host, this packet will be queued until the ARP request
 478  * comes back, which could be hours later.  We can't block in this routine that
 479  * long, since it prevents RPC timeouts from happening.
 480  */
 481 /* XXX In the brave new world, steal the data bufs out of the rx_packet iovec,
 482  * and just queue those.  XXX
 483  */
 484 int
 485 osi_NetSend(asocket, addr, dvec, nvec, asize, istack)
 486      register struct socket *asocket;
 487      struct iovec *dvec;
 488      int nvec;
 489      register afs_int32 asize;
 490      struct sockaddr_in *addr;
 491      int istack;
 492 {
 493     register struct mbuf *tm, *um;
 494     register afs_int32 code;
 495     int s;
 496     struct mbuf *top = 0;
 497     register struct mbuf *m, **mp;
 498     int len;
 499     char *tdata;
 500     caddr_t tpa;
 501     int i,tl,rlen;
 502
 503     NETSPL_DECL(s1)
 504     AFS_STATCNT(osi_NetSend);
 505
 506     (*afs_sblockp)(&asocket->so_snd, NETEVENT_SODOWN, asocket, &s1, istack);
 507
 508     s = splnet();
 509     mp = &top;
 510     i = 0;
 511     tdata = dvec[i].iov_base;
 512     tl = dvec[i].iov_len;
 513     while (1) {
 514         if ((m = m_vget(M_DONTWAIT, MIN(asize, VCL_MAX), MT_DATA)) == NULL) {
 515             if (top) m_freem(top);
 516             splx(s);
 517             AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
 518             return 1;
 519         }
 520         len = MIN(m->m_len, asize);
 521         m->m_len = 0;
 522         tpa = mtod(m, caddr_t);
 523         while (len) {
 524           rlen = MIN(len, tl);
 525           memcpy(tpa, tdata, rlen);
 526           asize -= rlen;
 527           len -= rlen;
 528           tpa += rlen;
 529           m->m_len += rlen;
 530           tdata += rlen;
 531           tl -= rlen;
 532           if (tl <= 0) {
 533             i++;
 534             if (i > nvec) {
 535               /* shouldn't come here! */
 536               asize = 0;   /* so we make progress toward completion */
 537               break;
 538             }
 539             tdata = dvec[i].iov_base;
 540             tl = dvec[i].iov_len;
 541           }
 542         }
 543         *mp = m;
 544         mp = &m->m_next;
 545         if (asize <= 0)
 546           break;
 547     }
 548     tm = top;
 549
 550     tm->m_act = (struct mbuf *) 0;
 551
 552     /* setup mbuf corresponding to destination address */
 553     um = m_get(M_DONTWAIT, MT_SONAME);
 554     if (!um) {
 555         if (top) m_freem(top);  /* free mbuf chain */
 556         /* if this were vfs40, we'd do sbunlock(asocket, &asocket->so_snd), but
 557            we don't do the locking at all for vfs40 systems */
 558         splx(s);
 559         AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
 560         return 1;
 561     }
 562     memcpy(mtod(um, caddr_t), addr, sizeof(*addr));
 563     um->m_len = sizeof(*addr);
 564     /* note that udp_usrreq frees funny mbuf.  We hold onto data, but mbuf
 565      * around it is gone.  we free address ourselves.  */
 566     code = (*asocket->so_proto->pr_usrreq)(asocket, PRU_SEND, tm, um, 0);
 567     splx(s);
 568     m_free(um);
 569     AFS_SBUNLOCK(&asocket->so_snd, NETEVENT_SODOWN, asocket, s1);
 570
 571     return code;
 572 }
 573 #endif /* AFS_SGI65_ENV */
 574