2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Implementation of the AFS FileServer probe facility.
14 *------------------------------------------------------------------------*/
16 #include <afsconfig.h>
17 #include <afs/param.h>
31 #include <fsprobe.h> /*Interface for this module */
32 #include <lwp.h> /*Lightweight process package */
33 #include <afs/cellconfig.h>
35 #define LWP_STACK_SIZE (16 * 1024)
38 * Routines we need that don't have explicit include file definitions.
40 extern int RXAFSCB_ExecuteRequest(); /*AFS callback dispatcher */
41 extern char *hostutil_GetNameByINet(); /*Host parsing utility */
44 * Help out the linker by explicitly importing the callback routines.
46 extern afs_int32 SRXAFSCB_CallBack();
47 extern afs_int32 SRXAFSCB_InitCallBackState2();
48 extern afs_int32 SRXAFSCB_Probe();
53 int fsprobe_numServers; /*Num servers connected */
54 struct fsprobe_ConnectionInfo *fsprobe_ConnInfo; /*Ptr to connection array */
55 struct fsprobe_ProbeResults fsprobe_Results; /*Latest probe results */
56 int fsprobe_ProbeFreqInSecs; /*Probe freq. in seconds */
61 static int fsprobe_initflag = 0; /*Was init routine called? */
62 static int fsprobe_debug = 0; /*Debugging output enabled? */
63 static int (*fsprobe_Handler) (); /*Probe handler routine */
64 static PROCESS probeLWP_ID; /*Probe LWP process ID */
65 static int fsprobe_statsBytes; /*Num bytes in stats block */
66 static int fsprobe_probeOKBytes; /*Num bytes in probeOK block */
69 * We have to pass a port to Rx to start up our callback listener
70 * service, but 7001 is already taken up by the Cache Manager. So,
73 #define FSPROBE_CBPORT 7101
76 /*------------------------------------------------------------------------
77 * [private] fsprobe_CleanupInit
80 * Set up for recovery after an error in initialization (i.e.,
81 * during a call to fsprobe_Init.
88 * Error value otherwise.
91 * This routine is private to the module.
94 * Zeros out basic data structures.
95 *------------------------------------------------------------------------*/
99 { /*fsprobe_CleanupInit */
101 afs_int32 code; /*Return code from callback stubs */
102 struct rx_call *rxcall; /*Bogus param */
103 AFSCBFids *Fids_Array; /*Bogus param */
104 AFSCBs *CallBack_Array; /*Bogus param */
105 struct interfaceAddr *interfaceAddr; /*Bogus param */
107 fsprobe_ConnInfo = (struct fsprobe_ConnectionInfo *)0;
108 memset(&fsprobe_Results, 0, sizeof(struct fsprobe_ProbeResults));
110 rxcall = (struct rx_call *)0;
111 Fids_Array = (AFSCBFids *) 0;
112 CallBack_Array = (AFSCBs *) 0;
113 interfaceAddr = NULL;
115 code = SRXAFSCB_CallBack(rxcall, Fids_Array, CallBack_Array);
118 code = SRXAFSCB_InitCallBackState2(rxcall, interfaceAddr);
121 code = SRXAFSCB_Probe(rxcall);
124 } /*fsprobe_CleanupInit */
127 /*------------------------------------------------------------------------
128 * [exported] fsprobe_Cleanup
131 * Clean up our memory and connection state.
134 * int a_releaseMem : Should we free up malloc'ed areas?
137 * 0 on total success,
138 * -1 if the module was never initialized, or there was a problem
139 * with the fsprobe connection array.
142 * fsprobe_numServers should be properly set. We don't do anything
143 * unless fsprobe_Init() has already been called.
146 * Shuts down Rx connections gracefully, frees allocated space
148 *------------------------------------------------------------------------*/
151 fsprobe_Cleanup(a_releaseMem)
154 { /*fsprobe_Cleanup */
156 static char rn[] = "fsprobe_Cleanup"; /*Routine name */
157 int code; /*Return code */
158 int conn_idx; /*Current connection index */
159 struct fsprobe_ConnectionInfo *curr_conn; /*Ptr to fsprobe connection */
162 * Assume the best, but check the worst.
164 if (!fsprobe_initflag) {
165 fprintf(stderr, "[%s] Refused; module not initialized\n", rn);
171 * Take care of all Rx connections first. Check to see that the
172 * server count is a legal value.
174 if (fsprobe_numServers <= 0) {
176 "[%s] Illegal number of servers to clean up (fsprobe_numServers = %d)\n",
177 rn, fsprobe_numServers);
180 if (fsprobe_ConnInfo != (struct fsprobe_ConnectionInfo *)0) {
182 * The fsprobe connection structure array exists. Go through it
183 * and close up any Rx connections it holds.
185 curr_conn = fsprobe_ConnInfo;
186 for (conn_idx = 0; conn_idx < fsprobe_numServers; conn_idx++) {
187 if (curr_conn->rxconn != (struct rx_connection *)0) {
188 rx_DestroyConnection(curr_conn->rxconn);
189 curr_conn->rxconn = (struct rx_connection *)0;
191 if (curr_conn->rxVolconn != (struct rx_connection *)0) {
192 rx_DestroyConnection(curr_conn->rxVolconn);
193 curr_conn->rxVolconn = (struct rx_connection *)0;
196 } /*for each fsprobe connection */
197 } /*fsprobe connection structure exists */
198 } /*Legal number of servers */
201 * Now, release all the space we've allocated, if asked to.
204 if (fsprobe_ConnInfo != (struct fsprobe_ConnectionInfo *)0)
205 free(fsprobe_ConnInfo);
206 if (fsprobe_Results.stats != NULL)
207 free(fsprobe_Results.stats);
208 if (fsprobe_Results.probeOK != (int *)0)
209 free(fsprobe_Results.probeOK);
213 * Return the news, whatever it is.
217 } /*fsprobe_Cleanup */
219 /*------------------------------------------------------------------------
220 * [private] fsprobe_LWP
223 * This LWP iterates over the server connections and gathers up
224 * the desired statistics from each one on a regular basis. When
225 * the sweep is done, the associated handler function is called
226 * to process the new data.
235 * Started by fsprobe_Init(), uses global sturctures.
239 *------------------------------------------------------------------------*/
244 static char rn[] = "fsprobe_LWP"; /*Routine name */
245 register afs_int32 code; /*Results of calls */
246 struct timeval tv; /*Time structure */
247 int conn_idx; /*Connection index */
248 struct fsprobe_ConnectionInfo *curr_conn; /*Current connection */
249 struct ProbeViceStatistics *curr_stats; /*Current stats region */
250 int *curr_probeOK; /*Current probeOK field */
252 while (1) { /*Service loop */
254 * Iterate through the server connections, gathering data.
255 * Don't forget to bump the probe count and zero the statistics
256 * areas before calling the servers.
260 "[%s] Waking up, collecting data from %d connected servers\n",
261 rn, fsprobe_numServers);
262 curr_conn = fsprobe_ConnInfo;
263 curr_stats = fsprobe_Results.stats;
264 curr_probeOK = fsprobe_Results.probeOK;
265 fsprobe_Results.probeNum++;
266 memset(fsprobe_Results.stats, 0, fsprobe_statsBytes);
267 memset(fsprobe_Results.probeOK, 0, fsprobe_probeOKBytes);
269 for (conn_idx = 0; conn_idx < fsprobe_numServers; conn_idx++) {
271 * Grab the statistics for the current FileServer, if the
272 * connection is valid.
275 fprintf(stderr, "[%s] Contacting server %s\n", rn,
276 curr_conn->hostName);
277 if (curr_conn->rxconn != (struct rx_connection *)0) {
280 "[%s] Connection valid, calling RXAFS_GetStatistics\n",
283 RXAFS_GetStatistics(curr_conn->rxconn, curr_stats);
287 /*Valid Rx connection */
289 * Call the Volume Server too to get additional stats
292 fprintf(stderr, "[%s] Contacting volume server %s\n", rn,
293 curr_conn->hostName);
294 if (curr_conn->rxVolconn != (struct rx_connection *)0) {
297 struct diskPartition partition;
301 "[%s] Connection valid, calling RXAFS_GetStatistics\n",
303 for (i = 0; i < curr_conn->partCnt; i++) {
304 if (curr_conn->partList.partFlags[i] & PARTVALID) {
305 MapPartIdIntoName(curr_conn->partList.partId[i],
308 AFSVolPartitionInfo(curr_conn->rxVolconn, pname,
312 "Could not get information on server %s partition %s\n",
313 curr_conn->hostName, pname);
315 curr_stats->Disk[i].BlocksAvailable =
317 curr_stats->Disk[i].TotalBlocks =
319 strcpy(curr_stats->Disk[i].Name, pname);
328 * Advance the fsprobe connection pointer & stats pointer.
334 } /*For each fsprobe connection */
337 * All (valid) connections have been probed. Now, call the
338 * associated handler function. The handler does not take
339 * any explicit parameters, rather gets to the goodies via
340 * some of the objects exported by this module.
344 "[%s] Polling complete, calling associated handler routine.\n",
346 code = fsprobe_Handler();
348 fprintf(stderr, "[%s] Handler routine returned error code %d\n",
352 * Fall asleep for the prescribed number of seconds.
354 tv.tv_sec = fsprobe_ProbeFreqInSecs;
357 fprintf(stderr, "[%s] Falling asleep for %d seconds\n", rn,
358 fsprobe_ProbeFreqInSecs);
359 code = IOMGR_Select(0, /*Num fids */
360 0, /*Descriptors ready for reading */
361 0, /*Descriptors ready for writing */
362 0, /*Descriptors w/exceptional conditions */
363 &tv); /*Ptr to timeout structure */
365 fprintf(stderr, "[%s] IOMGR_Select returned code %d\n", rn, code);
370 /*list all the partitions on <aserver> */
371 static int newvolserver = 0;
372 XListPartitions(aconn, ptrPartList, cntp)
373 struct rx_connection *aconn;
374 struct partList *ptrPartList;
378 struct partEntries partEnts;
379 register int i, j = 0, code;
382 if (newvolserver == 1) {
383 for (i = 0; i < 26; i++)
384 partIds.partIds[i] = -1;
386 code = AFSVolListPartitions(aconn, &partIds);
388 for (i = 0; i < 26; i++) {
389 if ((partIds.partIds[i]) != -1) {
390 ptrPartList->partId[j] = partIds.partIds[i];
391 ptrPartList->partFlags[j] = PARTVALID;
394 ptrPartList->partFlags[i] = 0;
400 partEnts.partEntries_len = 0;
401 partEnts.partEntries_val = NULL;
402 code = AFSVolXListPartitions(aconn, &partEnts);
404 if (code == RXGEN_OPCODE) {
405 newvolserver = 1; /* Doesn't support new interface */
412 *cntp = partEnts.partEntries_len;
413 if (*cntp > VOLMAXPARTS) {
415 "Warning: number of partitions on the server too high %d (process only %d)\n",
419 for (i = 0; i < *cntp; i++) {
420 ptrPartList->partId[i] = partEnts.partEntries_val[i];
421 ptrPartList->partFlags[i] = PARTVALID;
423 free(partEnts.partEntries_val);
428 "Could not fetch the list of partitions from the server\n");
433 /*------------------------------------------------------------------------
434 * [exported] fsprobe_Init
437 * Initialize the fsprobe module: set up Rx connections to the
438 * given set of servers, start up the probe and callback LWPs,
439 * and associate the routine to be called when a probe completes.
442 * int a_numServers : Num. servers to connect to.
443 * struct sockaddr_in *a_socketArray : Array of server sockets.
444 * int a_ProbeFreqInSecs : Probe frequency in seconds.
445 * int (*a_ProbeHandler)() : Ptr to probe handler fcn.
446 * int a_debug; : Turn debugging output on?
450 * -2 for (at least one) connection error,
451 * LWP process creation code, if it failed,
452 * -1 for other fatal errors.
455 * *** MUST BE THE FIRST ROUTINE CALLED FROM THIS PACKAGE ***
456 * Also, the server security object CBsecobj MUST be a static,
457 * since it has to stick around after this routine exits.
460 * Sets up just about everything.
461 *------------------------------------------------------------------------*/
464 fsprobe_Init(a_numServers, a_socketArray, a_ProbeFreqInSecs, a_ProbeHandler,
467 struct sockaddr_in *a_socketArray;
468 int a_ProbeFreqInSecs;
469 int (*a_ProbeHandler) ();
474 static char rn[] = "fsprobe_Init"; /*Routine name */
475 register afs_int32 code; /*Return value */
476 static struct rx_securityClass *CBsecobj; /*Callback security object */
477 struct rx_securityClass *secobj; /*Client security object */
478 struct rx_service *rxsrv_afsserver; /*Server for AFS */
479 int arg_errfound; /*Argument error found? */
480 int curr_srv; /*Current server idx */
481 struct fsprobe_ConnectionInfo *curr_conn; /*Ptr to current conn */
482 char *hostNameFound; /*Ptr to returned host name */
483 int conn_err; /*Connection error? */
484 int PortToUse; /*Callback port to use */
487 * If we've already been called, snicker at the bozo, gently
488 * remind him of his doubtful heritage, and return success.
490 if (fsprobe_initflag) {
491 fprintf(stderr, "[%s] Called multiple times!\n", rn);
494 fsprobe_initflag = 1;
497 * Check the parameters for bogosities.
500 if (a_numServers <= 0) {
501 fprintf(stderr, "[%s] Illegal number of servers: %d\n", rn,
505 if (a_socketArray == (struct sockaddr_in *)0) {
506 fprintf(stderr, "[%s] Null server socket array argument\n", rn);
509 if (a_ProbeFreqInSecs <= 0) {
510 fprintf(stderr, "[%s] Illegal probe frequency: %d\n", rn,
514 if (a_ProbeHandler == (int (*)())0) {
515 fprintf(stderr, "[%s] Null probe handler function argument\n", rn);
522 * Record our passed-in info.
524 fsprobe_debug = a_debug;
525 fsprobe_numServers = a_numServers;
526 fsprobe_Handler = a_ProbeHandler;
527 fsprobe_ProbeFreqInSecs = a_ProbeFreqInSecs;
530 * Get ready in case we have to do a cleanup - basically, zero
533 fsprobe_CleanupInit();
536 * Allocate the necessary data structures and initialize everything
539 fsprobe_ConnInfo = (struct fsprobe_ConnectionInfo *)
540 malloc(a_numServers * sizeof(struct fsprobe_ConnectionInfo));
541 if (fsprobe_ConnInfo == (struct fsprobe_ConnectionInfo *)0) {
543 "[%s] Can't allocate %d connection info structs (%d bytes)\n",
545 (a_numServers * sizeof(struct fsprobe_ConnectionInfo)));
546 return (-1); /*No cleanup needs to be done yet */
550 fprintf(stderr, "[%s] fsprobe_ConnInfo allocated (%d bytes)\n", rn,
551 a_numServers * sizeof(struct fsprobe_ConnectionInfo));
554 fsprobe_statsBytes = a_numServers * sizeof(struct ProbeViceStatistics);
555 fsprobe_Results.stats = (struct ProbeViceStatistics *)
556 malloc(fsprobe_statsBytes);
557 if (fsprobe_Results.stats == NULL) {
559 "[%s] Can't allocate %d statistics structs (%d bytes)\n", rn,
560 a_numServers, fsprobe_statsBytes);
561 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas */
563 } else if (fsprobe_debug)
564 fprintf(stderr, "[%s] fsprobe_Results.stats allocated (%d bytes)\n",
565 rn, fsprobe_statsBytes);
567 fsprobe_probeOKBytes = a_numServers * sizeof(int);
568 fsprobe_Results.probeOK = (int *)malloc(fsprobe_probeOKBytes);
569 if (fsprobe_Results.probeOK == (int *)0) {
571 "[%s] Can't allocate %d probeOK array entries (%d bytes)\n",
572 rn, a_numServers, fsprobe_probeOKBytes);
573 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas */
575 } else if (fsprobe_debug)
576 fprintf(stderr, "[%s] fsprobe_Results.probeOK allocated (%d bytes)\n",
577 rn, fsprobe_probeOKBytes);
579 fsprobe_Results.probeNum = 0;
580 fsprobe_Results.probeTime = 0;
581 memset(fsprobe_Results.stats, 0,
582 (a_numServers * sizeof(struct ProbeViceStatistics)));
585 * Initialize the Rx subsystem, just in case nobody's done it.
588 fprintf(stderr, "[%s] Initializing Rx\n", rn);
589 PortToUse = FSPROBE_CBPORT;
591 code = rx_Init(htons(PortToUse));
593 if (code == RX_ADDRINUSE) {
596 "[%s] Callback port %d in use, advancing\n", rn,
600 fprintf(stderr, "[%s] Fatal error in rx_Init()\n", rn);
606 fprintf(stderr, "[%s] Rx initialized on port %d\n", rn, PortToUse);
609 * Create a null Rx server security object, to be used by the
612 CBsecobj = rxnull_NewServerSecurityObject();
613 if (CBsecobj == (struct rx_securityClass *)0) {
615 "[%s] Can't create null security object for the callback listener.\n",
617 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas */
621 fprintf(stderr, "[%s] Callback server security object created\n", rn);
624 * Create a null Rx client security object, to be used by the
627 secobj = rxnull_NewClientSecurityObject();
628 if (secobj == (struct rx_securityClass *)0) {
630 "[%s] Can't create client security object for probe LWP.\n",
632 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas */
636 fprintf(stderr, "[%s] Probe LWP client security object created\n",
639 curr_conn = fsprobe_ConnInfo;
641 for (curr_srv = 0; curr_srv < a_numServers; curr_srv++) {
643 * Copy in the socket info for the current server, resolve its
644 * printable name if possible.
647 fprintf(stderr, "[%s] Copying in the following socket info:\n",
649 fprintf(stderr, "[%s] IP addr 0x%lx, port %d\n", rn,
650 (a_socketArray + curr_srv)->sin_addr.s_addr,
651 (a_socketArray + curr_srv)->sin_port);
653 memcpy(&(curr_conn->skt), a_socketArray + curr_srv,
654 sizeof(struct sockaddr_in));
657 hostutil_GetNameByINet(curr_conn->skt.sin_addr.s_addr);
658 if (hostNameFound == NULL) {
660 "[%s] Can't map Internet address %lu to a string name\n",
661 rn, curr_conn->skt.sin_addr.s_addr);
662 curr_conn->hostName[0] = '\0';
664 strcpy(curr_conn->hostName, hostNameFound);
666 fprintf(stderr, "[%s] Host name for server index %d is %s\n",
667 rn, curr_srv, curr_conn->hostName);
671 * Make an Rx connection to the current server.
675 "[%s] Connecting to srv idx %d, IP addr 0x%lx, port %d, service 1\n",
676 rn, curr_srv, curr_conn->skt.sin_addr.s_addr,
677 curr_conn->skt.sin_port);
678 curr_conn->rxconn = rx_NewConnection(curr_conn->skt.sin_addr.s_addr, /*Server addr */
679 curr_conn->skt.sin_port, /*Server port */
680 1, /*AFS service num */
681 secobj, /*Security object */
682 0); /*Number of above */
683 if (curr_conn->rxconn == (struct rx_connection *)0) {
685 "[%s] Can't create Rx connection to server %s (%lu)\n",
686 rn, curr_conn->hostName, curr_conn->skt.sin_addr.s_addr);
690 fprintf(stderr, "[%s] New connection at 0x%lx\n", rn,
694 * Make an Rx connection to the current volume server.
698 "[%s] Connecting to srv idx %d, IP addr 0x%lx, port %d, service 1\n",
699 rn, curr_srv, curr_conn->skt.sin_addr.s_addr,
701 curr_conn->rxVolconn = rx_NewConnection(curr_conn->skt.sin_addr.s_addr, /*Server addr */
702 htons(AFSCONF_VOLUMEPORT), /*Volume Server port */
703 VOLSERVICE_ID, /*AFS service num */
704 secobj, /*Security object */
705 0); /*Number of above */
706 if (curr_conn->rxVolconn == (struct rx_connection *)0) {
708 "[%s] Can't create Rx connection to volume server %s (%lu)\n",
709 rn, curr_conn->hostName, curr_conn->skt.sin_addr.s_addr);
714 memset(&curr_conn->partList, 0, sizeof(struct partList));
715 curr_conn->partCnt = 0;
716 i = XListPartitions(curr_conn->rxVolconn, &curr_conn->partList,
719 curr_conn->partCnt = cnt;
723 fprintf(stderr, "[%s] New connection at 0x%lx\n", rn,
724 curr_conn->rxVolconn);
728 * Bump the current fsprobe connection to set up.
735 * Create the AFS callback service (listener).
738 fprintf(stderr, "[%s] Creating AFS callback listener\n", rn);
739 rxsrv_afsserver = rx_NewService(0, /*Use default port */
741 "afs", /*Service name */
742 &CBsecobj, /*Ptr to security object(s) */
743 1, /*Number of security objects */
744 RXAFSCB_ExecuteRequest); /*Dispatcher */
745 if (rxsrv_afsserver == (struct rx_service *)0) {
746 fprintf(stderr, "[%s] Can't create callback Rx service/listener\n",
748 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas */
752 fprintf(stderr, "[%s] Callback listener created\n", rn);
755 * Start up the AFS callback service.
758 fprintf(stderr, "[%s] Starting up callback listener.\n", rn);
759 rx_StartServer(0 /*Don't donate yourself to LWP pool */ );
762 * Start up the probe LWP.
765 fprintf(stderr, "[%s] Creating the probe LWP\n", rn);
766 code = LWP_CreateProcess(fsprobe_LWP, /*Function to start up */
767 LWP_STACK_SIZE, /*Stack size in bytes */
769 (void *)0, /*Parameters */
770 "fsprobe Worker", /*Name to use */
771 &probeLWP_ID); /*Returned LWP process ID */
773 fprintf(stderr, "[%s] Can't create fsprobe LWP! Error is %d\n", rn,
775 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas */
779 fprintf(stderr, "[%s] Probe LWP process structure located at 0x%x\n",
784 * Do I need to do this?
787 fprintf(stderr, "[%s] Calling osi_Wakeup()\n", rn);
788 osi_Wakeup(&rxsrv_afsserver); /*Wake up anyone waiting for it */
792 * Return the final results.
802 /*------------------------------------------------------------------------
803 * [exported] fsprobe_ForceProbeNow
806 * Wake up the probe LWP, forcing it to execute a probe immediately.
813 * Error value otherwise.
816 * The module must have been initialized.
820 *------------------------------------------------------------------------*/
823 fsprobe_ForceProbeNow()
824 { /*fsprobe_ForceProbeNow */
826 static char rn[] = "fsprobe_ForceProbeNow"; /*Routine name */
829 * There isn't a prayer unless we've been initialized.
831 if (!fsprobe_initflag) {
832 fprintf(stderr, "[%s] Must call fsprobe_Init first!\n", rn);
837 * Kick the sucker in the side.
839 IOMGR_Cancel(probeLWP_ID);
842 * We did it, so report the happy news.
846 } /*fsprobe_ForceProbeNow */