src/rx/AIX/rx_knet.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include "../afs/param.h"
  11 #ifdef AFS_AIX41_ENV
  12 #include "../rx/rx_kcommon.h"
  13
  14 static struct protosw parent_proto;     /* udp proto switch */
  15
  16 static void rxk_input(am, hlen)
  17 register struct mbuf *am; {
  18     register unsigned short *tsp;
  19     int hdr;
  20     struct udphdr *tu;
  21     register struct ip *ti;
  22     struct udpiphdr *tvu;
  23     register int i;
  24     char *phandle;
  25     long code;
  26     struct sockaddr_in taddr;
  27     int tlen;
  28     short port;
  29     int data_len, comp_sum;
  30     /* make sure we have base ip and udp headers in first mbuf */
  31     if (M_HASCL(am) || am->m_len < 28) {
  32         am = m_pullup(am, 28);
  33         if (!am) return;
  34     }
  35     hdr = (mtod(am, struct ip *))->ip_hl;
  36     if (hdr > 5) {
  37         /* pull up more, the IP hdr is bigger than usual */
  38         if (am->m_len < (8 + (hdr<<2))) {
  39             am = m_pullup(am, 8+(hdr<<2));
  40             if (!am) return;
  41         }
  42         ti = mtod(am, struct ip *); /* recompute, since m_pullup allocates new mbuf */
  43         tu = (struct udphdr *)(((char *)ti) + (hdr<<2)); /* skip ip hdr */
  44     } else {
  45         ti = mtod(am, struct ip *);
  46         tu = (struct udphdr *)(((char *)ti) + 20);      /* skip basic ip hdr */
  47     }
  48
  49     /* now read the port out */
  50     port = tu->uh_dport;
  51     if (port) {
  52         for(tsp=rxk_ports, i=0; i<MAXRXPORTS;i++) {
  53             if (*tsp++ == port) {
  54                 rxk_kpork(am);
  55                 return;
  56             }
  57         }
  58     }
  59     /* if we get here, try to deliver packet to udp */
  60     if (parent_proto.pr_input)
  61         udp_input(am, hlen);
  62 }
  63
  64 /*
  65  * the AIX version is complicated by the fact that the internet protocols
  66  * are in a separate kernel extension, and they are unwilling to export their
  67  * symbols to us.  We can get there indirectly, however.
  68  */
  69 #include <net/netisr.h>
  70 static struct ifqueue rxk_q;                    /* RXKluge queue        */
  71 static struct arpcom rxk_bogosity;
  72
  73 /* rxk_kpork -  send pkt over to netwerk kporc for processing */
  74 rxk_kpork(m)
  75 register struct mbuf *m;
  76 {
  77     find_input_type(0xdead, m, &rxk_bogosity, 0);
  78 }
  79
  80 /*
  81  * AIX 4.3.3 changed the type of the second argument to
  82  * ip_stripoptions().  The ip_stripoptions() prototype is in
  83  * <netinet/proto_inet.h>.  This header file also acquired a guard
  84  * macro, _PROTO_INET_H_, at the same time.  So we test for the guard
  85  * macro to see which type we need to use for the second argument to
  86  * ip_stripoptions().
  87  *
  88  * This way we don't have to introduce a port just to compile AFS on AIX
  89  * 4.3.3.
  90  */
  91
  92 #if defined(_PROTO_INET_H_)     /* AIX 4.3.3 and presumably later */
  93 #define STRIP_ARG2_TYPE unsigned long
  94 #else                           /* AIX 4.3.2 and earlier */
  95 #define STRIP_ARG2_TYPE struct mbuf *
  96 #endif
  97
  98 void ip_stripoptions(struct mbuf *m, STRIP_ARG2_TYPE mopt)
  99 {
 100         struct ip *ip = mtod(m, struct ip *);
 101         register int i;
 102         register caddr_t opts;
 103         int olen;
 104
 105         olen = (ip->ip_hl<<2) - sizeof (struct ip);
 106         opts = (caddr_t)(ip + 1);
 107         i = m->m_len - (sizeof (struct ip) + olen);
 108         bcopy(opts  + olen, opts, (unsigned)i);
 109         m->m_len -= olen;
 110         if (m->m_flags & M_PKTHDR)
 111                 m->m_pkthdr.len -= olen;
 112         ip->ip_hl = sizeof(struct ip) >> 2;
 113 }
 114
 115 /* rxk_RX_input -       RX pkt input process */
 116 rxk_RX_input(am)
 117 register struct mbuf *am; {
 118     register unsigned short *tsp;
 119     int hdr;
 120     struct udphdr *tu;
 121     register struct ip *ti;
 122     struct udpiphdr *tvu;
 123     register int i;
 124     char *phandle;
 125     long code;
 126     struct sockaddr_in taddr;
 127     int tlen;
 128     short port;
 129     int data_len, comp_sum;
 130
 131     hdr = (ti = mtod(am, struct ip *))->ip_hl;
 132     if (hdr > 5) {
 133         ip_stripoptions(am, 0); /* get rid of anything we don't need */
 134     }
 135     tu = (struct udphdr *)(((char *)ti) + 20);
 136     /*
 137      * Make mbuf data length reflect UDP length.
 138      * If not enough data to reflect UDP length, drop.
 139      */
 140     tvu = (struct udpiphdr *)ti;
 141     tlen = ntohs((u_short)tvu->ui_ulen);
 142     if ((int)ti->ip_len != tlen) {
 143         if (tlen > (int)ti->ip_len) {
 144             m_free(am);
 145             return;
 146         }
 147         m_adj(am, tlen - (int)ti->ip_len);
 148     }
 149     /* deliver packet to rx */
 150     taddr.sin_family = AF_INET;     /* compute source address */
 151     taddr.sin_port = tu->uh_sport;
 152     taddr.sin_addr.s_addr = ti->ip_src.s_addr;
 153     /* handle the checksum.  Note that this code damages the actual ip
 154        header (replacing it with the virtual one, which is the same size),
 155        so we must ensure we get everything out we need, first */
 156     if ( tu->uh_sum != 0) {
 157         /* if the checksum is there, always check it. It's crazy not
 158          * to, unless you can really be sure that your
 159          * underlying network (and interfaces and drivers and
 160          * DMA hardware, etc!) is error-free. First, fill
 161          * in entire virtual ip header. */
 162         tvu->ui_next = 0;
 163         tvu->ui_prev = 0;
 164         tvu->ui_x1 = 0;
 165         tvu->ui_len = tvu->ui_ulen;
 166         am->m_flags |= M_PKTHDR;
 167         am->m_pkthdr.len = tlen;
 168         if (in_cksum(am, sizeof(struct ip) + tlen)) {
 169             /* checksum, including cksum field, doesn't come out 0, so
 170                this packet is bad */
 171             m_freem(am);
 172             return;
 173         }
 174     }
 175
 176     /*
 177      * 28 is IP (20) + UDP (8) header.  ulen includes
 178      * udp header, and we *don't* tell RX about udp
 179      * header either.  So, we remove those 8 as well.
 180      */
 181     data_len = ntohs(tu->uh_ulen);
 182     data_len -= 8;
 183     if (!(*rxk_GetPacketProc)(&phandle, data_len)) {
 184         if (rx_mb_to_packet(am, m_freem, 28, data_len, phandle)) {
 185             /* XXX should just increment counter here.. */
 186             printf("rx: truncated UDP packet\n");
 187             rxi_FreePacket(phandle);
 188         } else
 189             (*rxk_PacketArrivalProc)(phandle, &taddr, rx_socket, data_len);
 190     } else
 191         m_freem(am);
 192 }
 193
 194 /* rxk_isr - RX Kluge Input Service Routine */
 195 static rxk_isr() {
 196     register struct mbuf *m;
 197     IFQ_LOCK_DECL();    /* silly macro has trailing ';'.  Sigh. */
 198     while (1) {
 199         IF_DEQUEUE(&rxk_q, m);
 200         if (!m) return;
 201         rxk_RX_input(m);
 202     }
 203 }
 204
 205 /*
 206  * UDP fast timer to raise events for all but Solaris and NCR.
 207  * Called about 5 times per second (at unknown priority?).  Must go to
 208  * splnet or obtain global lock before touching anything significant.
 209  */
 210 static void rxk_fasttimo (void)
 211 {
 212     int (*tproc)();
 213     struct clock temp;
 214
 215     /* do rx fasttimo processing here */
 216     rxevent_RaiseEvents(&temp);
 217     if (tproc = parent_proto.pr_fasttimo) (*tproc)();
 218 }
 219
 220
 221 void rxk_init(void)
 222 {
 223     register struct protosw *pr;
 224     extern struct protosw *pffindproto();
 225
 226     if (!rxk_initDone &&
 227         (pr = pffindproto(AF_INET, IPPROTO_UDP, SOCK_DGRAM))) {
 228         parent_proto = *pr;
 229
 230         pr->pr_input    = rxk_input;
 231         pr->pr_fasttimo = rxk_fasttimo;
 232
 233
 234         /*
 235          * don't bother with pr_drain and pr_ctlinput
 236          * until we have something to do
 237          */
 238         rxk_q.ifq_maxlen = 128;         /* obligatory XXX       */
 239         /* add pseudo pkt types as haque to get back onto net kproc */
 240         if (!add_input_type(0xdead, NET_KPROC, rxk_isr, &rxk_q, NETISR_MAX-1))
 241                 rxk_initDone = 1;
 242     }
 243
 244     if (!rxk_initDone) {
 245         printf("\nAFS: no INTERNET protocol support found\n");
 246     }
 247 }
 248
 249
 250
 251 void shutdown_rxkernel(void)
 252 {
 253     register struct protosw *pr;
 254     register int i;
 255     extern struct protosw *pffindproto();
 256
 257     if (rxk_initDone && (pr = pffindproto(AF_INET, IPPROTO_UDP, SOCK_DGRAM))) {
 258         *pr = parent_proto;
 259
 260         rxk_initDone = 0;
 261         for (i=0; i<MAXRXPORTS;i++) {
 262             if (rxk_ports[i]) {
 263                 rxk_ports[i] = 0;
 264                 soclose((struct socket *)rxk_portRocks[i]);
 265                 rxk_portRocks[i] = (char *)0;
 266             }
 267         }
 268     }
 269 }
 270
 271
 272 /* osi_NetSend - send asize bytes at adata from asocket to host at addr.
 273  *
 274  * Now, why do we allocate a new buffer when we could theoretically use the one
 275  * pointed to by adata?  Because PRU_SEND returns after queueing the message,
 276  * not after sending it.  If the sender changes the data after queueing it,
 277  * we'd see the already-queued data change.  One attempt to fix this without
 278  * adding a copy would be to have this function wait until the datagram is
 279  * sent; however this doesn't work well.  In particular, if a host is down, and
 280  * an ARP fails to that host, this packet will be queued until the ARP request
 281  * comes back, which could be hours later.  We can't block in this routine that
 282  * long, since it prevents RPC timeouts from happening.
 283  */
 284 /* XXX In the brave new world, steal the data bufs out of the rx_packet iovec,
 285  * and just queue those.  XXX
 286  */
 287
 288 osi_NetSend(asocket, addr, dvec, nvec, asize, istack)
 289 register struct socket *asocket;
 290 struct iovec *dvec;
 291 int nvec;
 292 register afs_int32 asize;
 293 struct sockaddr_in *addr;
 294 int istack;
 295 {
 296     register struct mbuf *tm, *um;
 297     register afs_int32 code;
 298     struct mbuf *top = 0;
 299     register struct mbuf *m, **mp;
 300     int len, mlen;
 301     char *tdata;
 302     caddr_t tpa;
 303     int i,tl,rlen;
 304
 305     AFS_STATCNT(osi_NetSend);
 306 #ifndef AFS_AIX41_ENV
 307     /*
 308      * VRMIX has a version of sun's mclgetx() that works correctly with
 309      * respect to mcopy(), so we can just dummy up the entire packet as
 310      * an mbuf cluster, and pass it to the IP output routine (which will
 311      * most likely have to frag it, but since mclgetx() has been fixed,
 312      * will work ok).  The only problem is that we have to wait until
 313      * m_free() has been called on the cluster, to guarantee that we
 314      * do not muck with it until it has gone out.  We also must refrain
 315      * from inadvertantly touching a piece of data that falls within the
 316      * same cache line as any portion of the packet, if we have been lucky
 317      * enough to be DMA-ing directly out from it.
 318      * Certain IBM architects assure me that the rios is fast enough
 319      * that the cost of the extra copy, as opposed to trying to
 320      * DMA directly from the packet is barely worth my while,
 321      * but I have a hard time accepting this.
 322      *
 323      * We can only use this code once we are passed in an indication of
 324      * whether we are being called `process-synchronously' or not.
 325      *
 326      * of course, the packet must be pinned, which is currently true,
 327      * but in future may not be.
 328      */
 329 #endif
 330     mp = &top;
 331     i = 0;
 332     tdata = dvec[0].iov_base;
 333     tl = dvec[0].iov_len;
 334
 335     while (1) {
 336         if (!top) {
 337             MGETHDR(m, M_DONTWAIT, MT_DATA);
 338             mlen = MHLEN;
 339         } else {
 340             MGET(m, M_DONTWAIT, MT_DATA);
 341             mlen = MLEN;
 342         }
 343         if (!m) {
 344            /* can't get an mbuf, give up */
 345            if (top)
 346               m_freem(top);     /* free mbuf list we're building */
 347            return 1;
 348         }
 349         if (!top) {
 350            m->m_flags |= M_PKTHDR; /* XXX - temp */
 351            m->m_pkthdr.len = 0;
 352            m->m_pkthdr.rcvif = (struct ifnet *)0;
 353         }
 354
 355             /*
 356              * WARNING: the `4 * MLEN' is somewhat dubious.  It is better than
 357              * `NBPG', which may have no relation to `CLBYTES'.  Also,
 358              * `CLBYTES' may be so large that we never use clusters,
 359              * resulting in far too many mbufs being used.  It is often
 360              * better to briefly use a cluster, even if we are only using a
 361              * portion of it.  Since we are on the xmit side, it shouldn't
 362              * end up sitting on a queue for a potentially unbounded time
 363              * (except perhaps if we are talking to ourself).
 364              */
 365             if (asize >= (MHLEN + 3*MLEN)) {
 366                 MCLGET(m,M_DONTWAIT);
 367             }
 368             /* now compute usable size */
 369             if (M_HASCL(m)) {
 370                 len = MIN(m->m_ext.ext_size, asize);
 371             } else {
 372                 len = MIN(mlen, asize);
 373             }
 374
 375         tpa = mtod(m, caddr_t);
 376         *mp = m;
 377         mp = &m->m_next;
 378         m->m_len = 0;
 379         while (len) {
 380           rlen = MIN(len, tl);
 381           bcopy(tdata, tpa, rlen);
 382           asize -= rlen;
 383           len -= rlen;
 384           tpa += rlen;
 385           m->m_len += rlen;
 386           top->m_pkthdr.len += rlen;
 387           tdata += rlen;
 388           tl -= rlen;
 389           if (tl <= 0) {
 390             i++;
 391             if (i > nvec) {
 392               /* shouldn't come here! */
 393               asize = 0;   /* so we make progress toward completion */
 394               break;
 395             }
 396             tdata = dvec[i].iov_base;
 397             tl = dvec[i].iov_len;
 398           }
 399         }
 400
 401         if (asize <= 0)
 402           break;
 403         }
 404         tm = top;
 405
 406         tm->m_act = (struct mbuf *) 0;
 407
 408         /* setup mbuf corresponding to destination address */
 409         MGETHDR(um, M_DONTWAIT, MT_SONAME);
 410         if (!um) {
 411             if (top)
 412                 m_freem(top);   /* free mbuf chain */
 413             return 1;
 414         }
 415         bcopy(addr, mtod(um, caddr_t), sizeof(*addr));
 416         um->m_len = sizeof(*addr);
 417         um->m_pkthdr.len = sizeof(*addr);
 418         um->m_flags |= M_PKTHDR;
 419
 420     SOCKET_LOCK(asocket);
 421     code = (*asocket->so_proto->pr_usrreq)(asocket, PRU_SEND, tm, um, 0);
 422     SOCKET_UNLOCK(asocket);
 423     m_free(um);
 424
 425     return code;
 426 }
 427
 428
 429
 430 #endif /* AFS_AIX41_ENV */