2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Implementation of the AFS FileServer probe facility.
14 *------------------------------------------------------------------------*/
16 #include <afsconfig.h>
17 #include <afs/param.h>
25 #include <fsprobe.h> /*Interface for this module */
26 #include <lwp.h> /*Lightweight process package */
27 #include <afs/cellconfig.h>
29 #define LWP_STACK_SIZE (16 * 1024)
32 * Routines we need that don't have explicit include file definitions.
34 extern int RXAFSCB_ExecuteRequest(); /*AFS callback dispatcher */
35 extern char *hostutil_GetNameByINet(); /*Host parsing utility */
38 * Help out the linker by explicitly importing the callback routines.
40 extern afs_int32 SRXAFSCB_CallBack();
41 extern afs_int32 SRXAFSCB_InitCallBackState2();
42 extern afs_int32 SRXAFSCB_Probe();
47 int fsprobe_numServers; /*Num servers connected */
48 struct fsprobe_ConnectionInfo *fsprobe_ConnInfo; /*Ptr to connection array */
49 struct fsprobe_ProbeResults fsprobe_Results; /*Latest probe results */
50 int fsprobe_ProbeFreqInSecs; /*Probe freq. in seconds */
55 static int fsprobe_initflag = 0; /*Was init routine called? */
56 static int fsprobe_debug = 0; /*Debugging output enabled? */
57 static int (*fsprobe_Handler) (); /*Probe handler routine */
58 static PROCESS probeLWP_ID; /*Probe LWP process ID */
59 static int fsprobe_statsBytes; /*Num bytes in stats block */
60 static int fsprobe_probeOKBytes; /*Num bytes in probeOK block */
63 * We have to pass a port to Rx to start up our callback listener
64 * service, but 7001 is already taken up by the Cache Manager. So,
67 #define FSPROBE_CBPORT 7101
70 /*------------------------------------------------------------------------
71 * [private] fsprobe_CleanupInit
74 * Set up for recovery after an error in initialization (i.e.,
75 * during a call to fsprobe_Init.
82 * Error value otherwise.
85 * This routine is private to the module.
88 * Zeros out basic data structures.
89 *------------------------------------------------------------------------*/
93 { /*fsprobe_CleanupInit */
95 afs_int32 code; /*Return code from callback stubs */
96 struct rx_call *rxcall; /*Bogus param */
97 AFSCBFids *Fids_Array; /*Bogus param */
98 AFSCBs *CallBack_Array; /*Bogus param */
99 struct interfaceAddr *interfaceAddr; /*Bogus param */
101 fsprobe_ConnInfo = (struct fsprobe_ConnectionInfo *)0;
102 memset(&fsprobe_Results, 0, sizeof(struct fsprobe_ProbeResults));
104 rxcall = (struct rx_call *)0;
105 Fids_Array = (AFSCBFids *) 0;
106 CallBack_Array = (AFSCBs *) 0;
107 interfaceAddr = NULL;
109 code = SRXAFSCB_CallBack(rxcall, Fids_Array, CallBack_Array);
112 code = SRXAFSCB_InitCallBackState2(rxcall, interfaceAddr);
115 code = SRXAFSCB_Probe(rxcall);
118 } /*fsprobe_CleanupInit */
121 /*------------------------------------------------------------------------
122 * [exported] fsprobe_Cleanup
125 * Clean up our memory and connection state.
128 * int a_releaseMem : Should we free up malloc'ed areas?
131 * 0 on total success,
132 * -1 if the module was never initialized, or there was a problem
133 * with the fsprobe connection array.
136 * fsprobe_numServers should be properly set. We don't do anything
137 * unless fsprobe_Init() has already been called.
140 * Shuts down Rx connections gracefully, frees allocated space
142 *------------------------------------------------------------------------*/
145 fsprobe_Cleanup(a_releaseMem)
148 { /*fsprobe_Cleanup */
150 static char rn[] = "fsprobe_Cleanup"; /*Routine name */
151 int code; /*Return code */
152 int conn_idx; /*Current connection index */
153 struct fsprobe_ConnectionInfo *curr_conn; /*Ptr to fsprobe connection */
156 * Assume the best, but check the worst.
158 if (!fsprobe_initflag) {
159 fprintf(stderr, "[%s] Refused; module not initialized\n", rn);
165 * Take care of all Rx connections first. Check to see that the
166 * server count is a legal value.
168 if (fsprobe_numServers <= 0) {
170 "[%s] Illegal number of servers to clean up (fsprobe_numServers = %d)\n",
171 rn, fsprobe_numServers);
174 if (fsprobe_ConnInfo != (struct fsprobe_ConnectionInfo *)0) {
176 * The fsprobe connection structure array exists. Go through it
177 * and close up any Rx connections it holds.
179 curr_conn = fsprobe_ConnInfo;
180 for (conn_idx = 0; conn_idx < fsprobe_numServers; conn_idx++) {
181 if (curr_conn->rxconn != (struct rx_connection *)0) {
182 rx_DestroyConnection(curr_conn->rxconn);
183 curr_conn->rxconn = (struct rx_connection *)0;
185 if (curr_conn->rxVolconn != (struct rx_connection *)0) {
186 rx_DestroyConnection(curr_conn->rxVolconn);
187 curr_conn->rxVolconn = (struct rx_connection *)0;
190 } /*for each fsprobe connection */
191 } /*fsprobe connection structure exists */
192 } /*Legal number of servers */
195 * Now, release all the space we've allocated, if asked to.
198 if (fsprobe_ConnInfo != (struct fsprobe_ConnectionInfo *)0)
199 free(fsprobe_ConnInfo);
200 if (fsprobe_Results.stats != NULL)
201 free(fsprobe_Results.stats);
202 if (fsprobe_Results.probeOK != (int *)0)
203 free(fsprobe_Results.probeOK);
207 * Return the news, whatever it is.
211 } /*fsprobe_Cleanup */
213 /*------------------------------------------------------------------------
214 * [private] fsprobe_LWP
217 * This LWP iterates over the server connections and gathers up
218 * the desired statistics from each one on a regular basis. When
219 * the sweep is done, the associated handler function is called
220 * to process the new data.
229 * Started by fsprobe_Init(), uses global sturctures.
233 *------------------------------------------------------------------------*/
238 static char rn[] = "fsprobe_LWP"; /*Routine name */
239 register afs_int32 code; /*Results of calls */
240 struct timeval tv; /*Time structure */
241 int conn_idx; /*Connection index */
242 struct fsprobe_ConnectionInfo *curr_conn; /*Current connection */
243 struct ProbeViceStatistics *curr_stats; /*Current stats region */
244 int *curr_probeOK; /*Current probeOK field */
246 while (1) { /*Service loop */
248 * Iterate through the server connections, gathering data.
249 * Don't forget to bump the probe count and zero the statistics
250 * areas before calling the servers.
254 "[%s] Waking up, collecting data from %d connected servers\n",
255 rn, fsprobe_numServers);
256 curr_conn = fsprobe_ConnInfo;
257 curr_stats = fsprobe_Results.stats;
258 curr_probeOK = fsprobe_Results.probeOK;
259 fsprobe_Results.probeNum++;
260 memset(fsprobe_Results.stats, 0, fsprobe_statsBytes);
261 memset(fsprobe_Results.probeOK, 0, fsprobe_probeOKBytes);
263 for (conn_idx = 0; conn_idx < fsprobe_numServers; conn_idx++) {
265 * Grab the statistics for the current FileServer, if the
266 * connection is valid.
269 fprintf(stderr, "[%s] Contacting server %s\n", rn,
270 curr_conn->hostName);
271 if (curr_conn->rxconn != (struct rx_connection *)0) {
274 "[%s] Connection valid, calling RXAFS_GetStatistics\n",
277 RXAFS_GetStatistics(curr_conn->rxconn, curr_stats);
281 /*Valid Rx connection */
283 * Call the Volume Server too to get additional stats
286 fprintf(stderr, "[%s] Contacting volume server %s\n", rn,
287 curr_conn->hostName);
288 if (curr_conn->rxVolconn != (struct rx_connection *)0) {
291 struct diskPartition partition;
295 "[%s] Connection valid, calling RXAFS_GetStatistics\n",
297 for (i = 0; i < curr_conn->partCnt; i++) {
298 if (curr_conn->partList.partFlags[i] & PARTVALID) {
299 MapPartIdIntoName(curr_conn->partList.partId[i],
302 AFSVolPartitionInfo(curr_conn->rxVolconn, pname,
306 "Could not get information on server %s partition %s\n",
307 curr_conn->hostName, pname);
309 curr_stats->Disk[i].BlocksAvailable =
311 curr_stats->Disk[i].TotalBlocks =
313 strcpy(curr_stats->Disk[i].Name, pname);
322 * Advance the fsprobe connection pointer & stats pointer.
328 } /*For each fsprobe connection */
331 * All (valid) connections have been probed. Now, call the
332 * associated handler function. The handler does not take
333 * any explicit parameters, rather gets to the goodies via
334 * some of the objects exported by this module.
338 "[%s] Polling complete, calling associated handler routine.\n",
340 code = fsprobe_Handler();
342 fprintf(stderr, "[%s] Handler routine returned error code %d\n",
346 * Fall asleep for the prescribed number of seconds.
348 tv.tv_sec = fsprobe_ProbeFreqInSecs;
351 fprintf(stderr, "[%s] Falling asleep for %d seconds\n", rn,
352 fsprobe_ProbeFreqInSecs);
353 code = IOMGR_Select(0, /*Num fids */
354 0, /*Descriptors ready for reading */
355 0, /*Descriptors ready for writing */
356 0, /*Descriptors w/exceptional conditions */
357 &tv); /*Ptr to timeout structure */
359 fprintf(stderr, "[%s] IOMGR_Select returned code %d\n", rn, code);
364 /*list all the partitions on <aserver> */
365 static int newvolserver = 0;
366 XListPartitions(aconn, ptrPartList, cntp)
367 struct rx_connection *aconn;
368 struct partList *ptrPartList;
372 struct partEntries partEnts;
373 register int i, j = 0, code;
376 if (newvolserver == 1) {
377 for (i = 0; i < 26; i++)
378 partIds.partIds[i] = -1;
380 code = AFSVolListPartitions(aconn, &partIds);
382 for (i = 0; i < 26; i++) {
383 if ((partIds.partIds[i]) != -1) {
384 ptrPartList->partId[j] = partIds.partIds[i];
385 ptrPartList->partFlags[j] = PARTVALID;
388 ptrPartList->partFlags[i] = 0;
394 partEnts.partEntries_len = 0;
395 partEnts.partEntries_val = NULL;
396 code = AFSVolXListPartitions(aconn, &partEnts);
398 if (code == RXGEN_OPCODE) {
399 newvolserver = 1; /* Doesn't support new interface */
406 *cntp = partEnts.partEntries_len;
407 if (*cntp > VOLMAXPARTS) {
409 "Warning: number of partitions on the server too high %d (process only %d)\n",
413 for (i = 0; i < *cntp; i++) {
414 ptrPartList->partId[i] = partEnts.partEntries_val[i];
415 ptrPartList->partFlags[i] = PARTVALID;
417 free(partEnts.partEntries_val);
422 "Could not fetch the list of partitions from the server\n");
427 /*------------------------------------------------------------------------
428 * [exported] fsprobe_Init
431 * Initialize the fsprobe module: set up Rx connections to the
432 * given set of servers, start up the probe and callback LWPs,
433 * and associate the routine to be called when a probe completes.
436 * int a_numServers : Num. servers to connect to.
437 * struct sockaddr_in *a_socketArray : Array of server sockets.
438 * int a_ProbeFreqInSecs : Probe frequency in seconds.
439 * int (*a_ProbeHandler)() : Ptr to probe handler fcn.
440 * int a_debug; : Turn debugging output on?
444 * -2 for (at least one) connection error,
445 * LWP process creation code, if it failed,
446 * -1 for other fatal errors.
449 * *** MUST BE THE FIRST ROUTINE CALLED FROM THIS PACKAGE ***
450 * Also, the server security object CBsecobj MUST be a static,
451 * since it has to stick around after this routine exits.
454 * Sets up just about everything.
455 *------------------------------------------------------------------------*/
458 fsprobe_Init(a_numServers, a_socketArray, a_ProbeFreqInSecs, a_ProbeHandler,
461 struct sockaddr_in *a_socketArray;
462 int a_ProbeFreqInSecs;
463 int (*a_ProbeHandler) ();
468 static char rn[] = "fsprobe_Init"; /*Routine name */
469 register afs_int32 code; /*Return value */
470 static struct rx_securityClass *CBsecobj; /*Callback security object */
471 struct rx_securityClass *secobj; /*Client security object */
472 struct rx_service *rxsrv_afsserver; /*Server for AFS */
473 int arg_errfound; /*Argument error found? */
474 int curr_srv; /*Current server idx */
475 struct fsprobe_ConnectionInfo *curr_conn; /*Ptr to current conn */
476 char *hostNameFound; /*Ptr to returned host name */
477 int conn_err; /*Connection error? */
478 int PortToUse; /*Callback port to use */
481 * If we've already been called, snicker at the bozo, gently
482 * remind him of his doubtful heritage, and return success.
484 if (fsprobe_initflag) {
485 fprintf(stderr, "[%s] Called multiple times!\n", rn);
488 fsprobe_initflag = 1;
491 * Check the parameters for bogosities.
494 if (a_numServers <= 0) {
495 fprintf(stderr, "[%s] Illegal number of servers: %d\n", rn,
499 if (a_socketArray == (struct sockaddr_in *)0) {
500 fprintf(stderr, "[%s] Null server socket array argument\n", rn);
503 if (a_ProbeFreqInSecs <= 0) {
504 fprintf(stderr, "[%s] Illegal probe frequency: %d\n", rn,
508 if (a_ProbeHandler == (int (*)())0) {
509 fprintf(stderr, "[%s] Null probe handler function argument\n", rn);
516 * Record our passed-in info.
518 fsprobe_debug = a_debug;
519 fsprobe_numServers = a_numServers;
520 fsprobe_Handler = a_ProbeHandler;
521 fsprobe_ProbeFreqInSecs = a_ProbeFreqInSecs;
524 * Get ready in case we have to do a cleanup - basically, zero
527 fsprobe_CleanupInit();
530 * Allocate the necessary data structures and initialize everything
533 fsprobe_ConnInfo = (struct fsprobe_ConnectionInfo *)
534 malloc(a_numServers * sizeof(struct fsprobe_ConnectionInfo));
535 if (fsprobe_ConnInfo == (struct fsprobe_ConnectionInfo *)0) {
537 "[%s] Can't allocate %d connection info structs (%d bytes)\n",
539 (a_numServers * sizeof(struct fsprobe_ConnectionInfo)));
540 return (-1); /*No cleanup needs to be done yet */
544 fprintf(stderr, "[%s] fsprobe_ConnInfo allocated (%d bytes)\n", rn,
545 a_numServers * sizeof(struct fsprobe_ConnectionInfo));
548 fsprobe_statsBytes = a_numServers * sizeof(struct ProbeViceStatistics);
549 fsprobe_Results.stats = (struct ProbeViceStatistics *)
550 malloc(fsprobe_statsBytes);
551 if (fsprobe_Results.stats == NULL) {
553 "[%s] Can't allocate %d statistics structs (%d bytes)\n", rn,
554 a_numServers, fsprobe_statsBytes);
555 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas */
557 } else if (fsprobe_debug)
558 fprintf(stderr, "[%s] fsprobe_Results.stats allocated (%d bytes)\n",
559 rn, fsprobe_statsBytes);
561 fsprobe_probeOKBytes = a_numServers * sizeof(int);
562 fsprobe_Results.probeOK = (int *)malloc(fsprobe_probeOKBytes);
563 if (fsprobe_Results.probeOK == (int *)0) {
565 "[%s] Can't allocate %d probeOK array entries (%d bytes)\n",
566 rn, a_numServers, fsprobe_probeOKBytes);
567 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas */
569 } else if (fsprobe_debug)
570 fprintf(stderr, "[%s] fsprobe_Results.probeOK allocated (%d bytes)\n",
571 rn, fsprobe_probeOKBytes);
573 fsprobe_Results.probeNum = 0;
574 fsprobe_Results.probeTime = 0;
575 memset(fsprobe_Results.stats, 0,
576 (a_numServers * sizeof(struct ProbeViceStatistics)));
579 * Initialize the Rx subsystem, just in case nobody's done it.
582 fprintf(stderr, "[%s] Initializing Rx\n", rn);
583 PortToUse = FSPROBE_CBPORT;
585 code = rx_Init(htons(PortToUse));
587 if (code == RX_ADDRINUSE) {
590 "[%s] Callback port %d in use, advancing\n", rn,
594 fprintf(stderr, "[%s] Fatal error in rx_Init()\n", rn);
600 fprintf(stderr, "[%s] Rx initialized on port %d\n", rn, PortToUse);
603 * Create a null Rx server security object, to be used by the
606 CBsecobj = rxnull_NewServerSecurityObject();
607 if (CBsecobj == (struct rx_securityClass *)0) {
609 "[%s] Can't create null security object for the callback listener.\n",
611 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas */
615 fprintf(stderr, "[%s] Callback server security object created\n", rn);
618 * Create a null Rx client security object, to be used by the
621 secobj = rxnull_NewClientSecurityObject();
622 if (secobj == (struct rx_securityClass *)0) {
624 "[%s] Can't create client security object for probe LWP.\n",
626 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas */
630 fprintf(stderr, "[%s] Probe LWP client security object created\n",
633 curr_conn = fsprobe_ConnInfo;
635 for (curr_srv = 0; curr_srv < a_numServers; curr_srv++) {
637 * Copy in the socket info for the current server, resolve its
638 * printable name if possible.
641 fprintf(stderr, "[%s] Copying in the following socket info:\n",
643 fprintf(stderr, "[%s] IP addr 0x%lx, port %d\n", rn,
644 (a_socketArray + curr_srv)->sin_addr.s_addr,
645 (a_socketArray + curr_srv)->sin_port);
647 memcpy(&(curr_conn->skt), a_socketArray + curr_srv,
648 sizeof(struct sockaddr_in));
651 hostutil_GetNameByINet(curr_conn->skt.sin_addr.s_addr);
652 if (hostNameFound == NULL) {
654 "[%s] Can't map Internet address %lu to a string name\n",
655 rn, curr_conn->skt.sin_addr.s_addr);
656 curr_conn->hostName[0] = '\0';
658 strcpy(curr_conn->hostName, hostNameFound);
660 fprintf(stderr, "[%s] Host name for server index %d is %s\n",
661 rn, curr_srv, curr_conn->hostName);
665 * Make an Rx connection to the current server.
669 "[%s] Connecting to srv idx %d, IP addr 0x%lx, port %d, service 1\n",
670 rn, curr_srv, curr_conn->skt.sin_addr.s_addr,
671 curr_conn->skt.sin_port);
672 curr_conn->rxconn = rx_NewConnection(curr_conn->skt.sin_addr.s_addr, /*Server addr */
673 curr_conn->skt.sin_port, /*Server port */
674 1, /*AFS service num */
675 secobj, /*Security object */
676 0); /*Number of above */
677 if (curr_conn->rxconn == (struct rx_connection *)0) {
679 "[%s] Can't create Rx connection to server %s (%lu)\n",
680 rn, curr_conn->hostName, curr_conn->skt.sin_addr.s_addr);
684 fprintf(stderr, "[%s] New connection at 0x%lx\n", rn,
688 * Make an Rx connection to the current volume server.
692 "[%s] Connecting to srv idx %d, IP addr 0x%lx, port %d, service 1\n",
693 rn, curr_srv, curr_conn->skt.sin_addr.s_addr,
695 curr_conn->rxVolconn = rx_NewConnection(curr_conn->skt.sin_addr.s_addr, /*Server addr */
696 htons(AFSCONF_VOLUMEPORT), /*Volume Server port */
697 VOLSERVICE_ID, /*AFS service num */
698 secobj, /*Security object */
699 0); /*Number of above */
700 if (curr_conn->rxVolconn == (struct rx_connection *)0) {
702 "[%s] Can't create Rx connection to volume server %s (%lu)\n",
703 rn, curr_conn->hostName, curr_conn->skt.sin_addr.s_addr);
708 memset(&curr_conn->partList, 0, sizeof(struct partList));
709 curr_conn->partCnt = 0;
710 i = XListPartitions(curr_conn->rxVolconn, &curr_conn->partList,
713 curr_conn->partCnt = cnt;
717 fprintf(stderr, "[%s] New connection at 0x%lx\n", rn,
718 curr_conn->rxVolconn);
722 * Bump the current fsprobe connection to set up.
729 * Create the AFS callback service (listener).
732 fprintf(stderr, "[%s] Creating AFS callback listener\n", rn);
733 rxsrv_afsserver = rx_NewService(0, /*Use default port */
735 "afs", /*Service name */
736 &CBsecobj, /*Ptr to security object(s) */
737 1, /*Number of security objects */
738 RXAFSCB_ExecuteRequest); /*Dispatcher */
739 if (rxsrv_afsserver == (struct rx_service *)0) {
740 fprintf(stderr, "[%s] Can't create callback Rx service/listener\n",
742 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas */
746 fprintf(stderr, "[%s] Callback listener created\n", rn);
749 * Start up the AFS callback service.
752 fprintf(stderr, "[%s] Starting up callback listener.\n", rn);
753 rx_StartServer(0 /*Don't donate yourself to LWP pool */ );
756 * Start up the probe LWP.
759 fprintf(stderr, "[%s] Creating the probe LWP\n", rn);
760 code = LWP_CreateProcess(fsprobe_LWP, /*Function to start up */
761 LWP_STACK_SIZE, /*Stack size in bytes */
763 (void *)0, /*Parameters */
764 "fsprobe Worker", /*Name to use */
765 &probeLWP_ID); /*Returned LWP process ID */
767 fprintf(stderr, "[%s] Can't create fsprobe LWP! Error is %d\n", rn,
769 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas */
773 fprintf(stderr, "[%s] Probe LWP process structure located at 0x%x\n",
778 * Do I need to do this?
781 fprintf(stderr, "[%s] Calling osi_Wakeup()\n", rn);
782 osi_Wakeup(&rxsrv_afsserver); /*Wake up anyone waiting for it */
786 * Return the final results.
796 /*------------------------------------------------------------------------
797 * [exported] fsprobe_ForceProbeNow
800 * Wake up the probe LWP, forcing it to execute a probe immediately.
807 * Error value otherwise.
810 * The module must have been initialized.
814 *------------------------------------------------------------------------*/
817 fsprobe_ForceProbeNow()
818 { /*fsprobe_ForceProbeNow */
820 static char rn[] = "fsprobe_ForceProbeNow"; /*Routine name */
823 * There isn't a prayer unless we've been initialized.
825 if (!fsprobe_initflag) {
826 fprintf(stderr, "[%s] Must call fsprobe_Init first!\n", rn);
831 * Kick the sucker in the side.
833 IOMGR_Cancel(probeLWP_ID);
836 * We did it, so report the happy news.
840 } /*fsprobe_ForceProbeNow */