2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Implementation of the AFS FileServer probe facility.
14 *------------------------------------------------------------------------*/
16 #include <afsconfig.h>
17 #include <afs/param.h>
21 #include <fsprobe.h> /*Interface for this module*/
22 #include <lwp.h> /*Lightweight process package*/
23 #include <afs/cellconfig.h>
25 #define LWP_STACK_SIZE (16 * 1024)
28 * Routines we need that don't have explicit include file definitions.
30 extern int RXAFSCB_ExecuteRequest(); /*AFS callback dispatcher*/
31 extern char *hostutil_GetNameByINet(); /*Host parsing utility*/
34 * Help out the linker by explicitly importing the callback routines.
36 extern afs_int32 SRXAFSCB_CallBack();
37 extern afs_int32 SRXAFSCB_InitCallBackState2();
38 extern afs_int32 SRXAFSCB_Probe();
43 int fsprobe_numServers; /*Num servers connected*/
44 struct fsprobe_ConnectionInfo *fsprobe_ConnInfo; /*Ptr to connection array*/
45 struct fsprobe_ProbeResults fsprobe_Results; /*Latest probe results*/
46 int fsprobe_ProbeFreqInSecs; /*Probe freq. in seconds*/
51 static int fsprobe_initflag = 0; /*Was init routine called?*/
52 static int fsprobe_debug = 0; /*Debugging output enabled?*/
53 static int (*fsprobe_Handler)(); /*Probe handler routine*/
54 static PROCESS probeLWP_ID; /*Probe LWP process ID*/
55 static int fsprobe_statsBytes; /*Num bytes in stats block*/
56 static int fsprobe_probeOKBytes; /*Num bytes in probeOK block*/
59 * We have to pass a port to Rx to start up our callback listener
60 * service, but 7001 is already taken up by the Cache Manager. So,
63 #define FSPROBE_CBPORT 7101
66 /*------------------------------------------------------------------------
67 * [private] fsprobe_CleanupInit
70 * Set up for recovery after an error in initialization (i.e.,
71 * during a call to fsprobe_Init.
78 * Error value otherwise.
81 * This routine is private to the module.
84 * Zeros out basic data structures.
85 *------------------------------------------------------------------------*/
87 static int fsprobe_CleanupInit()
89 { /*fsprobe_CleanupInit*/
91 afs_int32 code; /*Return code from callback stubs*/
92 struct rx_call *rxcall; /*Bogus param*/
93 AFSCBFids *Fids_Array; /*Bogus param*/
94 AFSCBs *CallBack_Array; /*Bogus param*/
95 struct interfaceAddr *interfaceAddr; /*Bogus param*/
97 fsprobe_ConnInfo = (struct fsprobe_ConnectionInfo *)0;
98 bzero(fsprobe_Results, sizeof(struct fsprobe_ProbeResults));
100 rxcall = (struct rx_call *)0;
101 Fids_Array = (AFSCBFids *)0;
102 CallBack_Array = (AFSCBs *)0;
103 interfaceAddr = (struct interfaceAddr *)0;
105 code = SRXAFSCB_CallBack(rxcall, Fids_Array, CallBack_Array);
108 code = SRXAFSCB_InitCallBackState2(rxcall, interfaceAddr);
111 code = SRXAFSCB_Probe(rxcall);
114 } /*fsprobe_CleanupInit*/
117 /*------------------------------------------------------------------------
118 * [exported] fsprobe_Cleanup
121 * Clean up our memory and connection state.
124 * int a_releaseMem : Should we free up malloc'ed areas?
127 * 0 on total success,
128 * -1 if the module was never initialized, or there was a problem
129 * with the fsprobe connection array.
132 * fsprobe_numServers should be properly set. We don't do anything
133 * unless fsprobe_Init() has already been called.
136 * Shuts down Rx connections gracefully, frees allocated space
138 *------------------------------------------------------------------------*/
140 int fsprobe_Cleanup(a_releaseMem)
143 { /*fsprobe_Cleanup*/
145 static char rn[] = "fsprobe_Cleanup"; /*Routine name*/
146 int code; /*Return code*/
147 int conn_idx; /*Current connection index*/
148 struct fsprobe_ConnectionInfo *curr_conn; /*Ptr to fsprobe connection*/
151 * Assume the best, but check the worst.
153 if (!fsprobe_initflag) {
154 fprintf(stderr, "[%s] Refused; module not initialized\n", rn);
161 * Take care of all Rx connections first. Check to see that the
162 * server count is a legal value.
164 if (fsprobe_numServers <= 0) {
166 "[%s] Illegal number of servers to clean up (fsprobe_numServers = %d)\n",
167 rn, fsprobe_numServers);
171 if (fsprobe_ConnInfo != (struct fsprobe_ConnectionInfo *)0) {
173 * The fsprobe connection structure array exists. Go through it
174 * and close up any Rx connections it holds.
176 curr_conn = fsprobe_ConnInfo;
177 for (conn_idx = 0; conn_idx < fsprobe_numServers; conn_idx++) {
178 if (curr_conn->rxconn != (struct rx_connection *)0) {
179 rx_DestroyConnection(curr_conn->rxconn);
180 curr_conn->rxconn = (struct rx_connection *)0;
182 if (curr_conn->rxVolconn != (struct rx_connection *)0) {
183 rx_DestroyConnection(curr_conn->rxVolconn);
184 curr_conn->rxVolconn = (struct rx_connection *)0;
187 } /*for each fsprobe connection*/
188 } /*fsprobe connection structure exists*/
189 } /*Legal number of servers*/
192 * Now, release all the space we've allocated, if asked to.
195 if (fsprobe_ConnInfo != (struct fsprobe_ConnectionInfo *)0)
196 free(fsprobe_ConnInfo);
197 if (fsprobe_Results.stats != (struct ProbeViceStatistics *)0)
198 free(fsprobe_Results.stats);
199 if (fsprobe_Results.probeOK != (int *)0)
200 free(fsprobe_Results.probeOK);
204 * Return the news, whatever it is.
208 } /*fsprobe_Cleanup*/
210 /*------------------------------------------------------------------------
211 * [private] fsprobe_LWP
214 * This LWP iterates over the server connections and gathers up
215 * the desired statistics from each one on a regular basis. When
216 * the sweep is done, the associated handler function is called
217 * to process the new data.
226 * Started by fsprobe_Init(), uses global sturctures.
230 *------------------------------------------------------------------------*/
231 static void fsprobe_LWP()
235 static char rn[] = "fsprobe_LWP"; /*Routine name*/
236 register afs_int32 code; /*Results of calls*/
237 struct timeval tv; /*Time structure*/
238 int conn_idx; /*Connection index*/
239 struct fsprobe_ConnectionInfo *curr_conn; /*Current connection*/
240 struct ProbeViceStatistics *curr_stats; /*Current stats region*/
241 int *curr_probeOK; /*Current probeOK field*/
243 while (1) { /*Service loop*/
245 * Iterate through the server connections, gathering data.
246 * Don't forget to bump the probe count and zero the statistics
247 * areas before calling the servers.
250 fprintf(stderr, "[%s] Waking up, collecting data from %d connected servers\n",
251 rn, fsprobe_numServers);
252 curr_conn = fsprobe_ConnInfo;
253 curr_stats = fsprobe_Results.stats;
254 curr_probeOK = fsprobe_Results.probeOK;
255 fsprobe_Results.probeNum++;
256 bzero(fsprobe_Results.stats, fsprobe_statsBytes);
257 bzero(fsprobe_Results.probeOK, fsprobe_probeOKBytes);
259 for (conn_idx = 0; conn_idx < fsprobe_numServers; conn_idx++) {
261 * Grab the statistics for the current FileServer, if the
262 * connection is valid.
265 fprintf(stderr, "[%s] Contacting server %s\n", rn, curr_conn->hostName);
266 if (curr_conn->rxconn != (struct rx_connection *)0) {
268 fprintf(stderr, "[%s] Connection valid, calling RXAFS_GetStatistics\n", rn);
269 *curr_probeOK = RXAFS_GetStatistics(curr_conn->rxconn, curr_stats);
271 } /*Valid Rx connection*/
274 * Call the Volume Server too to get additional stats
277 fprintf(stderr, "[%s] Contacting volume server %s\n", rn, curr_conn->hostName);
278 if (curr_conn->rxVolconn != (struct rx_connection *)0) {
281 struct diskPartition partition;
284 fprintf(stderr, "[%s] Connection valid, calling RXAFS_GetStatistics\n", rn);
285 for (i = 0 ; i < curr_conn->partCnt; i++) {
286 if (curr_conn->partList.partFlags[i] & PARTVALID) {
287 MapPartIdIntoName(curr_conn->partList.partId[i], pname);
288 code = AFSVolPartitionInfo(curr_conn->rxVolconn, pname, &partition);
290 fprintf(stderr, "Could not get information on server %s partition %s\n", curr_conn->hostName, pname);
292 curr_stats->Disk[i].BlocksAvailable = partition.free;
293 curr_stats->Disk[i].TotalBlocks = partition.minFree;
294 strcpy(curr_stats->Disk[i].Name, pname);
303 * Advance the fsprobe connection pointer & stats pointer.
309 } /*For each fsprobe connection*/
312 * All (valid) connections have been probed. Now, call the
313 * associated handler function. The handler does not take
314 * any explicit parameters, rather gets to the goodies via
315 * some of the objects exported by this module.
318 fprintf(stderr, "[%s] Polling complete, calling associated handler routine.\n",
320 code = fsprobe_Handler();
322 fprintf(stderr, "[%s] Handler routine returned error code %d\n", rn, code);
325 * Fall asleep for the prescribed number of seconds.
327 tv.tv_sec = fsprobe_ProbeFreqInSecs;
330 fprintf(stderr, "[%s] Falling asleep for %d seconds\n", rn, fsprobe_ProbeFreqInSecs);
331 code = IOMGR_Select(0, /*Num fids*/
332 0, /*Descriptors ready for reading*/
333 0, /*Descriptors ready for writing*/
334 0, /*Descriptors w/exceptional conditions*/
335 &tv); /*Ptr to timeout structure*/
337 fprintf(stderr, "[%s] IOMGR_Select returned code %d\n", rn, code);
342 /*list all the partitions on <aserver> */
343 static int newvolserver=0;
344 XListPartitions(aconn, ptrPartList, cntp)
345 struct rx_connection *aconn;
346 struct partList *ptrPartList;
350 struct partEntries partEnts;
351 register int i, j=0, code;
354 if (newvolserver == 1) {
355 for(i = 0; i < 26; i++)
356 partIds.partIds[i] = -1;
358 code = AFSVolListPartitions(aconn, &partIds);
360 for (i = 0;i < 26; i++) {
361 if((partIds.partIds[i]) != -1) {
362 ptrPartList->partId[j] = partIds.partIds[i];
363 ptrPartList->partFlags[j] = PARTVALID;
366 ptrPartList->partFlags[i] = 0;
372 partEnts.partEntries_len = 0;
373 partEnts.partEntries_val = (afs_int32 *)0;
374 code = AFSVolXListPartitions(aconn, &partEnts);
376 if (code == RXGEN_OPCODE) {
377 newvolserver = 1; /* Doesn't support new interface */
384 *cntp = partEnts.partEntries_len;
385 if (*cntp > VOLMAXPARTS) {
386 fprintf(stderr,"Warning: number of partitions on the server too high %d (process only %d)\n",
390 for (i = 0;i < *cntp; i++) {
391 ptrPartList->partId[i] = partEnts.partEntries_val[i];
392 ptrPartList->partFlags[i] = PARTVALID;
394 free(partEnts.partEntries_val);
398 fprintf(stderr,"Could not fetch the list of partitions from the server\n");
403 /*------------------------------------------------------------------------
404 * [exported] fsprobe_Init
407 * Initialize the fsprobe module: set up Rx connections to the
408 * given set of servers, start up the probe and callback LWPs,
409 * and associate the routine to be called when a probe completes.
412 * int a_numServers : Num. servers to connect to.
413 * struct sockaddr_in *a_socketArray : Array of server sockets.
414 * int a_ProbeFreqInSecs : Probe frequency in seconds.
415 * int (*a_ProbeHandler)() : Ptr to probe handler fcn.
416 * int a_debug; : Turn debugging output on?
420 * -2 for (at least one) connection error,
421 * LWP process creation code, if it failed,
422 * -1 for other fatal errors.
425 * *** MUST BE THE FIRST ROUTINE CALLED FROM THIS PACKAGE ***
426 * Also, the server security object CBsecobj MUST be a static,
427 * since it has to stick around after this routine exits.
430 * Sets up just about everything.
431 *------------------------------------------------------------------------*/
433 int fsprobe_Init(a_numServers, a_socketArray, a_ProbeFreqInSecs, a_ProbeHandler, a_debug)
435 struct sockaddr_in *a_socketArray;
436 int a_ProbeFreqInSecs;
437 int (*a_ProbeHandler)();
442 static char rn[] = "fsprobe_Init"; /*Routine name*/
443 register afs_int32 code; /*Return value*/
444 static struct rx_securityClass *CBsecobj; /*Callback security object*/
445 struct rx_securityClass *secobj; /*Client security object*/
446 struct rx_service *rxsrv_afsserver; /*Server for AFS*/
447 int arg_errfound; /*Argument error found?*/
448 int curr_srv; /*Current server idx*/
449 struct fsprobe_ConnectionInfo *curr_conn; /*Ptr to current conn*/
450 char *hostNameFound; /*Ptr to returned host name*/
451 int conn_err; /*Connection error?*/
452 int PortToUse; /*Callback port to use*/
455 * If we've already been called, snicker at the bozo, gently
456 * remind him of his doubtful heritage, and return success.
458 if (fsprobe_initflag) {
459 fprintf(stderr, "[%s] Called multiple times!\n", rn);
463 fsprobe_initflag = 1;
466 * Check the parameters for bogosities.
469 if (a_numServers <= 0) {
470 fprintf(stderr, "[%s] Illegal number of servers: %d\n",
474 if (a_socketArray == (struct sockaddr_in *)0) {
475 fprintf(stderr, "[%s] Null server socket array argument\n", rn);
478 if (a_ProbeFreqInSecs <= 0) {
479 fprintf(stderr, "[%s] Illegal probe frequency: %d\n",
480 rn, a_ProbeFreqInSecs);
483 if (a_ProbeHandler == (int (*)())0) {
484 fprintf(stderr, "[%s] Null probe handler function argument\n", rn);
491 * Record our passed-in info.
493 fsprobe_debug = a_debug;
494 fsprobe_numServers = a_numServers;
495 fsprobe_Handler = a_ProbeHandler;
496 fsprobe_ProbeFreqInSecs = a_ProbeFreqInSecs;
499 * Get ready in case we have to do a cleanup - basically, zero
502 fsprobe_CleanupInit();
505 * Allocate the necessary data structures and initialize everything
509 (struct fsprobe_ConnectionInfo *)
510 malloc(a_numServers * sizeof(struct fsprobe_ConnectionInfo));
511 if (fsprobe_ConnInfo == (struct fsprobe_ConnectionInfo *)0) {
513 "[%s] Can't allocate %d connection info structs (%d bytes)\n",
515 (a_numServers * sizeof(struct fsprobe_ConnectionInfo)));
516 return(-1); /*No cleanup needs to be done yet*/
520 fprintf(stderr, "[%s] fsprobe_ConnInfo allocated (%d bytes)\n",
521 rn, a_numServers * sizeof(struct fsprobe_ConnectionInfo));
524 fsprobe_statsBytes = a_numServers * sizeof(struct ProbeViceStatistics);
525 fsprobe_Results.stats = (struct ProbeViceStatistics *)
526 malloc(fsprobe_statsBytes);
527 if (fsprobe_Results.stats == (struct ProbeViceStatistics *)0) {
529 "[%s] Can't allocate %d statistics structs (%d bytes)\n",
530 rn, a_numServers, fsprobe_statsBytes);
531 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas*/
536 fprintf(stderr, "[%s] fsprobe_Results.stats allocated (%d bytes)\n",
537 rn, fsprobe_statsBytes);
539 fsprobe_probeOKBytes = a_numServers * sizeof(int);
540 fsprobe_Results.probeOK = (int *) malloc(fsprobe_probeOKBytes);
541 if (fsprobe_Results.probeOK == (int *)0) {
543 "[%s] Can't allocate %d probeOK array entries (%d bytes)\n",
544 rn, a_numServers, fsprobe_probeOKBytes);
545 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas*/
551 "[%s] fsprobe_Results.probeOK allocated (%d bytes)\n",
552 rn, fsprobe_probeOKBytes);
554 fsprobe_Results.probeNum = 0;
555 fsprobe_Results.probeTime = 0;
556 bzero(fsprobe_Results.stats,
557 (a_numServers * sizeof(struct ProbeViceStatistics)));
560 * Initialize the Rx subsystem, just in case nobody's done it.
563 fprintf(stderr, "[%s] Initializing Rx\n", rn);
564 PortToUse = FSPROBE_CBPORT;
566 code = rx_Init(htons(PortToUse));
568 if (code == RX_ADDRINUSE) {
570 fprintf(stderr, "[%s] Callback port %d in use, advancing\n",
575 fprintf(stderr, "[%s] Fatal error in rx_Init()\n", rn);
581 fprintf(stderr, "[%s] Rx initialized on port %d\n", rn, PortToUse);
584 * Create a null Rx server security object, to be used by the
587 CBsecobj = (struct rx_securityClass *) rxnull_NewServerSecurityObject();
588 if (CBsecobj == (struct rx_securityClass *)0) {
590 "[%s] Can't create null security object for the callback listener.\n",
592 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas*/
596 fprintf(stderr, "[%s] Callback server security object created\n", rn);
599 * Create a null Rx client security object, to be used by the
602 secobj = (struct rx_securityClass *) rxnull_NewClientSecurityObject();
603 if (secobj == (struct rx_securityClass *)0) {
605 "[%s] Can't create client security object for probe LWP.\n",
607 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas*/
611 fprintf(stderr, "[%s] Probe LWP client security object created\n",
614 curr_conn = fsprobe_ConnInfo;
616 for (curr_srv = 0; curr_srv < a_numServers; curr_srv++) {
618 * Copy in the socket info for the current server, resolve its
619 * printable name if possible.
622 fprintf(stderr, "[%s] Copying in the following socket info:\n", rn);
623 fprintf(stderr, "[%s] IP addr 0x%lx, port %d\n", rn,
624 (a_socketArray + curr_srv)->sin_addr.s_addr,
625 (a_socketArray + curr_srv)->sin_port);
627 bcopy(a_socketArray + curr_srv,
629 sizeof(struct sockaddr_in));
631 hostNameFound = hostutil_GetNameByINet(curr_conn->skt.sin_addr.s_addr);
632 if (hostNameFound == (char *)0) {
634 "[%s] Can't map Internet address %lu to a string name\n",
635 rn, curr_conn->skt.sin_addr.s_addr);
636 curr_conn->hostName[0] = '\0';
639 strcpy(curr_conn->hostName, hostNameFound);
641 fprintf(stderr, "[%s] Host name for server index %d is %s\n",
642 rn, curr_srv, curr_conn->hostName);
646 * Make an Rx connection to the current server.
650 "[%s] Connecting to srv idx %d, IP addr 0x%lx, port %d, service 1\n",
651 rn, curr_srv, curr_conn->skt.sin_addr.s_addr,
652 curr_conn->skt.sin_port);
654 rx_NewConnection(curr_conn->skt.sin_addr.s_addr, /*Server addr*/
655 curr_conn->skt.sin_port, /*Server port*/
656 1, /*AFS service num*/
657 secobj, /*Security object*/
658 0); /*Number of above*/
659 if (curr_conn->rxconn == (struct rx_connection *)0) {
661 "[%s] Can't create Rx connection to server %s (%lu)\n",
662 rn, curr_conn->hostName, curr_conn->skt.sin_addr.s_addr);
666 fprintf(stderr, "[%s] New connection at 0x%lx\n",
667 rn, curr_conn->rxconn);
670 * Make an Rx connection to the current volume server.
674 "[%s] Connecting to srv idx %d, IP addr 0x%lx, port %d, service 1\n",
675 rn, curr_srv, curr_conn->skt.sin_addr.s_addr, htons(7005));
676 curr_conn->rxVolconn =
677 rx_NewConnection(curr_conn->skt.sin_addr.s_addr, /*Server addr*/
678 htons(AFSCONF_VOLUMEPORT), /*Volume Server port*/
679 VOLSERVICE_ID, /*AFS service num*/
680 secobj, /*Security object*/
681 0); /*Number of above*/
682 if (curr_conn->rxVolconn == (struct rx_connection *)0) {
684 "[%s] Can't create Rx connection to volume server %s (%lu)\n",
685 rn, curr_conn->hostName, curr_conn->skt.sin_addr.s_addr);
690 bzero(&curr_conn->partList, sizeof(struct partList));
691 curr_conn->partCnt = 0;
692 i = XListPartitions(curr_conn->rxVolconn, &curr_conn->partList, &cnt);
694 curr_conn->partCnt = cnt;
698 fprintf(stderr, "[%s] New connection at 0x%lx\n",
699 rn, curr_conn->rxVolconn);
703 * Bump the current fsprobe connection to set up.
710 * Create the AFS callback service (listener).
713 fprintf(stderr, "[%s] Creating AFS callback listener\n", rn);
715 rx_NewService(0, /*Use default port*/
717 "afs", /*Service name*/
718 &CBsecobj, /*Ptr to security object(s)*/
719 1, /*Number of security objects*/
720 RXAFSCB_ExecuteRequest); /*Dispatcher*/
721 if (rxsrv_afsserver == (struct rx_service *)0) {
722 fprintf(stderr, "[%s] Can't create callback Rx service/listener\n", rn);
723 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas*/
727 fprintf(stderr, "[%s] Callback listener created\n", rn);
730 * Start up the AFS callback service.
733 fprintf(stderr, "[%s] Starting up callback listener.\n", rn);
734 rx_StartServer(0 /*Don't donate yourself to LWP pool*/);
737 * Start up the probe LWP.
740 fprintf(stderr, "[%s] Creating the probe LWP\n", rn);
742 LWP_CreateProcess(fsprobe_LWP, /*Function to start up*/
743 LWP_STACK_SIZE, /*Stack size in bytes*/
746 "fsprobe Worker", /*Name to use*/
747 &probeLWP_ID); /*Returned LWP process ID*/
749 fprintf(stderr, "[%s] Can't create fsprobe LWP! Error is %d\n", rn, code);
750 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas*/
755 "[%s] Probe LWP process structure located at 0x%x\n",
760 * Do I need to do this?
763 fprintf(stderr, "[%s] Calling osi_Wakeup()\n", rn);
764 osi_Wakeup(&rxsrv_afsserver); /*Wake up anyone waiting for it*/
768 * Return the final results.
778 /*------------------------------------------------------------------------
779 * [exported] fsprobe_ForceProbeNow
782 * Wake up the probe LWP, forcing it to execute a probe immediately.
789 * Error value otherwise.
792 * The module must have been initialized.
796 *------------------------------------------------------------------------*/
798 int fsprobe_ForceProbeNow()
800 { /*fsprobe_ForceProbeNow*/
802 static char rn[] = "fsprobe_ForceProbeNow"; /*Routine name*/
805 * There isn't a prayer unless we've been initialized.
807 if (!fsprobe_initflag) {
808 fprintf(stderr, "[%s] Must call fsprobe_Init first!\n", rn);
813 * Kick the sucker in the side.
815 IOMGR_Cancel(probeLWP_ID);
818 * We did it, so report the happy news.
822 } /*fsprobe_ForceProbeNow*/