2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Implementation of the AFS FileServer probe facility.
14 *------------------------------------------------------------------------*/
16 #include <afsconfig.h>
17 #include <afs/param.h>
30 #include <fsprobe.h> /*Interface for this module*/
31 #include <lwp.h> /*Lightweight process package*/
32 #include <afs/cellconfig.h>
34 #define LWP_STACK_SIZE (16 * 1024)
37 * Routines we need that don't have explicit include file definitions.
39 extern int RXAFSCB_ExecuteRequest(); /*AFS callback dispatcher*/
40 extern char *hostutil_GetNameByINet(); /*Host parsing utility*/
43 * Help out the linker by explicitly importing the callback routines.
45 extern afs_int32 SRXAFSCB_CallBack();
46 extern afs_int32 SRXAFSCB_InitCallBackState2();
47 extern afs_int32 SRXAFSCB_Probe();
52 int fsprobe_numServers; /*Num servers connected*/
53 struct fsprobe_ConnectionInfo *fsprobe_ConnInfo; /*Ptr to connection array*/
54 struct fsprobe_ProbeResults fsprobe_Results; /*Latest probe results*/
55 int fsprobe_ProbeFreqInSecs; /*Probe freq. in seconds*/
60 static int fsprobe_initflag = 0; /*Was init routine called?*/
61 static int fsprobe_debug = 0; /*Debugging output enabled?*/
62 static int (*fsprobe_Handler)(); /*Probe handler routine*/
63 static PROCESS probeLWP_ID; /*Probe LWP process ID*/
64 static int fsprobe_statsBytes; /*Num bytes in stats block*/
65 static int fsprobe_probeOKBytes; /*Num bytes in probeOK block*/
68 * We have to pass a port to Rx to start up our callback listener
69 * service, but 7001 is already taken up by the Cache Manager. So,
72 #define FSPROBE_CBPORT 7101
75 /*------------------------------------------------------------------------
76 * [private] fsprobe_CleanupInit
79 * Set up for recovery after an error in initialization (i.e.,
80 * during a call to fsprobe_Init.
87 * Error value otherwise.
90 * This routine is private to the module.
93 * Zeros out basic data structures.
94 *------------------------------------------------------------------------*/
96 static int fsprobe_CleanupInit()
98 { /*fsprobe_CleanupInit*/
100 afs_int32 code; /*Return code from callback stubs*/
101 struct rx_call *rxcall; /*Bogus param*/
102 AFSCBFids *Fids_Array; /*Bogus param*/
103 AFSCBs *CallBack_Array; /*Bogus param*/
104 struct interfaceAddr *interfaceAddr; /*Bogus param*/
106 fsprobe_ConnInfo = (struct fsprobe_ConnectionInfo *)0;
107 memset(&fsprobe_Results, 0, sizeof(struct fsprobe_ProbeResults));
109 rxcall = (struct rx_call *)0;
110 Fids_Array = (AFSCBFids *)0;
111 CallBack_Array = (AFSCBs *)0;
112 interfaceAddr = NULL;
114 code = SRXAFSCB_CallBack(rxcall, Fids_Array, CallBack_Array);
117 code = SRXAFSCB_InitCallBackState2(rxcall, interfaceAddr);
120 code = SRXAFSCB_Probe(rxcall);
123 } /*fsprobe_CleanupInit*/
126 /*------------------------------------------------------------------------
127 * [exported] fsprobe_Cleanup
130 * Clean up our memory and connection state.
133 * int a_releaseMem : Should we free up malloc'ed areas?
136 * 0 on total success,
137 * -1 if the module was never initialized, or there was a problem
138 * with the fsprobe connection array.
141 * fsprobe_numServers should be properly set. We don't do anything
142 * unless fsprobe_Init() has already been called.
145 * Shuts down Rx connections gracefully, frees allocated space
147 *------------------------------------------------------------------------*/
149 int fsprobe_Cleanup(a_releaseMem)
152 { /*fsprobe_Cleanup*/
154 static char rn[] = "fsprobe_Cleanup"; /*Routine name*/
155 int code; /*Return code*/
156 int conn_idx; /*Current connection index*/
157 struct fsprobe_ConnectionInfo *curr_conn; /*Ptr to fsprobe connection*/
160 * Assume the best, but check the worst.
162 if (!fsprobe_initflag) {
163 fprintf(stderr, "[%s] Refused; module not initialized\n", rn);
170 * Take care of all Rx connections first. Check to see that the
171 * server count is a legal value.
173 if (fsprobe_numServers <= 0) {
175 "[%s] Illegal number of servers to clean up (fsprobe_numServers = %d)\n",
176 rn, fsprobe_numServers);
180 if (fsprobe_ConnInfo != (struct fsprobe_ConnectionInfo *)0) {
182 * The fsprobe connection structure array exists. Go through it
183 * and close up any Rx connections it holds.
185 curr_conn = fsprobe_ConnInfo;
186 for (conn_idx = 0; conn_idx < fsprobe_numServers; conn_idx++) {
187 if (curr_conn->rxconn != (struct rx_connection *)0) {
188 rx_DestroyConnection(curr_conn->rxconn);
189 curr_conn->rxconn = (struct rx_connection *)0;
191 if (curr_conn->rxVolconn != (struct rx_connection *)0) {
192 rx_DestroyConnection(curr_conn->rxVolconn);
193 curr_conn->rxVolconn = (struct rx_connection *)0;
196 } /*for each fsprobe connection*/
197 } /*fsprobe connection structure exists*/
198 } /*Legal number of servers*/
201 * Now, release all the space we've allocated, if asked to.
204 if (fsprobe_ConnInfo != (struct fsprobe_ConnectionInfo *)0)
205 free(fsprobe_ConnInfo);
206 if (fsprobe_Results.stats != NULL)
207 free(fsprobe_Results.stats);
208 if (fsprobe_Results.probeOK != (int *)0)
209 free(fsprobe_Results.probeOK);
213 * Return the news, whatever it is.
217 } /*fsprobe_Cleanup*/
219 /*------------------------------------------------------------------------
220 * [private] fsprobe_LWP
223 * This LWP iterates over the server connections and gathers up
224 * the desired statistics from each one on a regular basis. When
225 * the sweep is done, the associated handler function is called
226 * to process the new data.
235 * Started by fsprobe_Init(), uses global sturctures.
239 *------------------------------------------------------------------------*/
240 static void fsprobe_LWP()
244 static char rn[] = "fsprobe_LWP"; /*Routine name*/
245 register afs_int32 code; /*Results of calls*/
246 struct timeval tv; /*Time structure*/
247 int conn_idx; /*Connection index*/
248 struct fsprobe_ConnectionInfo *curr_conn; /*Current connection*/
249 struct ProbeViceStatistics *curr_stats; /*Current stats region*/
250 int *curr_probeOK; /*Current probeOK field*/
252 while (1) { /*Service loop*/
254 * Iterate through the server connections, gathering data.
255 * Don't forget to bump the probe count and zero the statistics
256 * areas before calling the servers.
259 fprintf(stderr, "[%s] Waking up, collecting data from %d connected servers\n",
260 rn, fsprobe_numServers);
261 curr_conn = fsprobe_ConnInfo;
262 curr_stats = fsprobe_Results.stats;
263 curr_probeOK = fsprobe_Results.probeOK;
264 fsprobe_Results.probeNum++;
265 memset(fsprobe_Results.stats, 0, fsprobe_statsBytes);
266 memset(fsprobe_Results.probeOK, 0, fsprobe_probeOKBytes);
268 for (conn_idx = 0; conn_idx < fsprobe_numServers; conn_idx++) {
270 * Grab the statistics for the current FileServer, if the
271 * connection is valid.
274 fprintf(stderr, "[%s] Contacting server %s\n", rn, curr_conn->hostName);
275 if (curr_conn->rxconn != (struct rx_connection *)0) {
277 fprintf(stderr, "[%s] Connection valid, calling RXAFS_GetStatistics\n", rn);
278 *curr_probeOK = RXAFS_GetStatistics(curr_conn->rxconn, curr_stats);
280 } /*Valid Rx connection*/
283 * Call the Volume Server too to get additional stats
286 fprintf(stderr, "[%s] Contacting volume server %s\n", rn, curr_conn->hostName);
287 if (curr_conn->rxVolconn != (struct rx_connection *)0) {
290 struct diskPartition partition;
293 fprintf(stderr, "[%s] Connection valid, calling RXAFS_GetStatistics\n", rn);
294 for (i = 0 ; i < curr_conn->partCnt; i++) {
295 if (curr_conn->partList.partFlags[i] & PARTVALID) {
296 MapPartIdIntoName(curr_conn->partList.partId[i], pname);
297 code = AFSVolPartitionInfo(curr_conn->rxVolconn, pname, &partition);
299 fprintf(stderr, "Could not get information on server %s partition %s\n", curr_conn->hostName, pname);
301 curr_stats->Disk[i].BlocksAvailable = partition.free;
302 curr_stats->Disk[i].TotalBlocks = partition.minFree;
303 strcpy(curr_stats->Disk[i].Name, pname);
312 * Advance the fsprobe connection pointer & stats pointer.
318 } /*For each fsprobe connection*/
321 * All (valid) connections have been probed. Now, call the
322 * associated handler function. The handler does not take
323 * any explicit parameters, rather gets to the goodies via
324 * some of the objects exported by this module.
327 fprintf(stderr, "[%s] Polling complete, calling associated handler routine.\n",
329 code = fsprobe_Handler();
331 fprintf(stderr, "[%s] Handler routine returned error code %d\n", rn, code);
334 * Fall asleep for the prescribed number of seconds.
336 tv.tv_sec = fsprobe_ProbeFreqInSecs;
339 fprintf(stderr, "[%s] Falling asleep for %d seconds\n", rn, fsprobe_ProbeFreqInSecs);
340 code = IOMGR_Select(0, /*Num fids*/
341 0, /*Descriptors ready for reading*/
342 0, /*Descriptors ready for writing*/
343 0, /*Descriptors w/exceptional conditions*/
344 &tv); /*Ptr to timeout structure*/
346 fprintf(stderr, "[%s] IOMGR_Select returned code %d\n", rn, code);
351 /*list all the partitions on <aserver> */
352 static int newvolserver=0;
353 XListPartitions(aconn, ptrPartList, cntp)
354 struct rx_connection *aconn;
355 struct partList *ptrPartList;
359 struct partEntries partEnts;
360 register int i, j=0, code;
363 if (newvolserver == 1) {
364 for(i = 0; i < 26; i++)
365 partIds.partIds[i] = -1;
367 code = AFSVolListPartitions(aconn, &partIds);
369 for (i = 0;i < 26; i++) {
370 if((partIds.partIds[i]) != -1) {
371 ptrPartList->partId[j] = partIds.partIds[i];
372 ptrPartList->partFlags[j] = PARTVALID;
375 ptrPartList->partFlags[i] = 0;
381 partEnts.partEntries_len = 0;
382 partEnts.partEntries_val = NULL;
383 code = AFSVolXListPartitions(aconn, &partEnts);
385 if (code == RXGEN_OPCODE) {
386 newvolserver = 1; /* Doesn't support new interface */
393 *cntp = partEnts.partEntries_len;
394 if (*cntp > VOLMAXPARTS) {
395 fprintf(stderr,"Warning: number of partitions on the server too high %d (process only %d)\n",
399 for (i = 0;i < *cntp; i++) {
400 ptrPartList->partId[i] = partEnts.partEntries_val[i];
401 ptrPartList->partFlags[i] = PARTVALID;
403 free(partEnts.partEntries_val);
407 fprintf(stderr,"Could not fetch the list of partitions from the server\n");
412 /*------------------------------------------------------------------------
413 * [exported] fsprobe_Init
416 * Initialize the fsprobe module: set up Rx connections to the
417 * given set of servers, start up the probe and callback LWPs,
418 * and associate the routine to be called when a probe completes.
421 * int a_numServers : Num. servers to connect to.
422 * struct sockaddr_in *a_socketArray : Array of server sockets.
423 * int a_ProbeFreqInSecs : Probe frequency in seconds.
424 * int (*a_ProbeHandler)() : Ptr to probe handler fcn.
425 * int a_debug; : Turn debugging output on?
429 * -2 for (at least one) connection error,
430 * LWP process creation code, if it failed,
431 * -1 for other fatal errors.
434 * *** MUST BE THE FIRST ROUTINE CALLED FROM THIS PACKAGE ***
435 * Also, the server security object CBsecobj MUST be a static,
436 * since it has to stick around after this routine exits.
439 * Sets up just about everything.
440 *------------------------------------------------------------------------*/
442 int fsprobe_Init(a_numServers, a_socketArray, a_ProbeFreqInSecs, a_ProbeHandler, a_debug)
444 struct sockaddr_in *a_socketArray;
445 int a_ProbeFreqInSecs;
446 int (*a_ProbeHandler)();
451 static char rn[] = "fsprobe_Init"; /*Routine name*/
452 register afs_int32 code; /*Return value*/
453 static struct rx_securityClass *CBsecobj; /*Callback security object*/
454 struct rx_securityClass *secobj; /*Client security object*/
455 struct rx_service *rxsrv_afsserver; /*Server for AFS*/
456 int arg_errfound; /*Argument error found?*/
457 int curr_srv; /*Current server idx*/
458 struct fsprobe_ConnectionInfo *curr_conn; /*Ptr to current conn*/
459 char *hostNameFound; /*Ptr to returned host name*/
460 int conn_err; /*Connection error?*/
461 int PortToUse; /*Callback port to use*/
464 * If we've already been called, snicker at the bozo, gently
465 * remind him of his doubtful heritage, and return success.
467 if (fsprobe_initflag) {
468 fprintf(stderr, "[%s] Called multiple times!\n", rn);
472 fsprobe_initflag = 1;
475 * Check the parameters for bogosities.
478 if (a_numServers <= 0) {
479 fprintf(stderr, "[%s] Illegal number of servers: %d\n",
483 if (a_socketArray == (struct sockaddr_in *)0) {
484 fprintf(stderr, "[%s] Null server socket array argument\n", rn);
487 if (a_ProbeFreqInSecs <= 0) {
488 fprintf(stderr, "[%s] Illegal probe frequency: %d\n",
489 rn, a_ProbeFreqInSecs);
492 if (a_ProbeHandler == (int (*)())0) {
493 fprintf(stderr, "[%s] Null probe handler function argument\n", rn);
500 * Record our passed-in info.
502 fsprobe_debug = a_debug;
503 fsprobe_numServers = a_numServers;
504 fsprobe_Handler = a_ProbeHandler;
505 fsprobe_ProbeFreqInSecs = a_ProbeFreqInSecs;
508 * Get ready in case we have to do a cleanup - basically, zero
511 fsprobe_CleanupInit();
514 * Allocate the necessary data structures and initialize everything
518 (struct fsprobe_ConnectionInfo *)
519 malloc(a_numServers * sizeof(struct fsprobe_ConnectionInfo));
520 if (fsprobe_ConnInfo == (struct fsprobe_ConnectionInfo *)0) {
522 "[%s] Can't allocate %d connection info structs (%d bytes)\n",
524 (a_numServers * sizeof(struct fsprobe_ConnectionInfo)));
525 return(-1); /*No cleanup needs to be done yet*/
529 fprintf(stderr, "[%s] fsprobe_ConnInfo allocated (%d bytes)\n",
530 rn, a_numServers * sizeof(struct fsprobe_ConnectionInfo));
533 fsprobe_statsBytes = a_numServers * sizeof(struct ProbeViceStatistics);
534 fsprobe_Results.stats = (struct ProbeViceStatistics *)
535 malloc(fsprobe_statsBytes);
536 if (fsprobe_Results.stats == NULL) {
538 "[%s] Can't allocate %d statistics structs (%d bytes)\n",
539 rn, a_numServers, fsprobe_statsBytes);
540 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas*/
545 fprintf(stderr, "[%s] fsprobe_Results.stats allocated (%d bytes)\n",
546 rn, fsprobe_statsBytes);
548 fsprobe_probeOKBytes = a_numServers * sizeof(int);
549 fsprobe_Results.probeOK = (int *) malloc(fsprobe_probeOKBytes);
550 if (fsprobe_Results.probeOK == (int *)0) {
552 "[%s] Can't allocate %d probeOK array entries (%d bytes)\n",
553 rn, a_numServers, fsprobe_probeOKBytes);
554 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas*/
560 "[%s] fsprobe_Results.probeOK allocated (%d bytes)\n",
561 rn, fsprobe_probeOKBytes);
563 fsprobe_Results.probeNum = 0;
564 fsprobe_Results.probeTime = 0;
565 memset(fsprobe_Results.stats, 0, (a_numServers * sizeof(struct ProbeViceStatistics)));
568 * Initialize the Rx subsystem, just in case nobody's done it.
571 fprintf(stderr, "[%s] Initializing Rx\n", rn);
572 PortToUse = FSPROBE_CBPORT;
574 code = rx_Init(htons(PortToUse));
576 if (code == RX_ADDRINUSE) {
578 fprintf(stderr, "[%s] Callback port %d in use, advancing\n",
583 fprintf(stderr, "[%s] Fatal error in rx_Init()\n", rn);
589 fprintf(stderr, "[%s] Rx initialized on port %d\n", rn, PortToUse);
592 * Create a null Rx server security object, to be used by the
595 CBsecobj = rxnull_NewServerSecurityObject();
596 if (CBsecobj == (struct rx_securityClass *)0) {
598 "[%s] Can't create null security object for the callback listener.\n",
600 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas*/
604 fprintf(stderr, "[%s] Callback server security object created\n", rn);
607 * Create a null Rx client security object, to be used by the
610 secobj = rxnull_NewClientSecurityObject();
611 if (secobj == (struct rx_securityClass *)0) {
613 "[%s] Can't create client security object for probe LWP.\n",
615 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas*/
619 fprintf(stderr, "[%s] Probe LWP client security object created\n",
622 curr_conn = fsprobe_ConnInfo;
624 for (curr_srv = 0; curr_srv < a_numServers; curr_srv++) {
626 * Copy in the socket info for the current server, resolve its
627 * printable name if possible.
630 fprintf(stderr, "[%s] Copying in the following socket info:\n", rn);
631 fprintf(stderr, "[%s] IP addr 0x%lx, port %d\n", rn,
632 (a_socketArray + curr_srv)->sin_addr.s_addr,
633 (a_socketArray + curr_srv)->sin_port);
635 memcpy(&(curr_conn->skt), a_socketArray + curr_srv, sizeof(struct sockaddr_in));
637 hostNameFound = hostutil_GetNameByINet(curr_conn->skt.sin_addr.s_addr);
638 if (hostNameFound == NULL) {
640 "[%s] Can't map Internet address %lu to a string name\n",
641 rn, curr_conn->skt.sin_addr.s_addr);
642 curr_conn->hostName[0] = '\0';
645 strcpy(curr_conn->hostName, hostNameFound);
647 fprintf(stderr, "[%s] Host name for server index %d is %s\n",
648 rn, curr_srv, curr_conn->hostName);
652 * Make an Rx connection to the current server.
656 "[%s] Connecting to srv idx %d, IP addr 0x%lx, port %d, service 1\n",
657 rn, curr_srv, curr_conn->skt.sin_addr.s_addr,
658 curr_conn->skt.sin_port);
660 rx_NewConnection(curr_conn->skt.sin_addr.s_addr, /*Server addr*/
661 curr_conn->skt.sin_port, /*Server port*/
662 1, /*AFS service num*/
663 secobj, /*Security object*/
664 0); /*Number of above*/
665 if (curr_conn->rxconn == (struct rx_connection *)0) {
667 "[%s] Can't create Rx connection to server %s (%lu)\n",
668 rn, curr_conn->hostName, curr_conn->skt.sin_addr.s_addr);
672 fprintf(stderr, "[%s] New connection at 0x%lx\n",
673 rn, curr_conn->rxconn);
676 * Make an Rx connection to the current volume server.
680 "[%s] Connecting to srv idx %d, IP addr 0x%lx, port %d, service 1\n",
681 rn, curr_srv, curr_conn->skt.sin_addr.s_addr, htons(7005));
682 curr_conn->rxVolconn =
683 rx_NewConnection(curr_conn->skt.sin_addr.s_addr, /*Server addr*/
684 htons(AFSCONF_VOLUMEPORT), /*Volume Server port*/
685 VOLSERVICE_ID, /*AFS service num*/
686 secobj, /*Security object*/
687 0); /*Number of above*/
688 if (curr_conn->rxVolconn == (struct rx_connection *)0) {
690 "[%s] Can't create Rx connection to volume server %s (%lu)\n",
691 rn, curr_conn->hostName, curr_conn->skt.sin_addr.s_addr);
696 memset(&curr_conn->partList, 0, sizeof(struct partList));
697 curr_conn->partCnt = 0;
698 i = XListPartitions(curr_conn->rxVolconn, &curr_conn->partList, &cnt);
700 curr_conn->partCnt = cnt;
704 fprintf(stderr, "[%s] New connection at 0x%lx\n",
705 rn, curr_conn->rxVolconn);
709 * Bump the current fsprobe connection to set up.
716 * Create the AFS callback service (listener).
719 fprintf(stderr, "[%s] Creating AFS callback listener\n", rn);
721 rx_NewService(0, /*Use default port*/
723 "afs", /*Service name*/
724 &CBsecobj, /*Ptr to security object(s)*/
725 1, /*Number of security objects*/
726 RXAFSCB_ExecuteRequest); /*Dispatcher*/
727 if (rxsrv_afsserver == (struct rx_service *)0) {
728 fprintf(stderr, "[%s] Can't create callback Rx service/listener\n", rn);
729 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas*/
733 fprintf(stderr, "[%s] Callback listener created\n", rn);
736 * Start up the AFS callback service.
739 fprintf(stderr, "[%s] Starting up callback listener.\n", rn);
740 rx_StartServer(0 /*Don't donate yourself to LWP pool*/);
743 * Start up the probe LWP.
746 fprintf(stderr, "[%s] Creating the probe LWP\n", rn);
748 LWP_CreateProcess(fsprobe_LWP, /*Function to start up*/
749 LWP_STACK_SIZE, /*Stack size in bytes*/
752 "fsprobe Worker", /*Name to use*/
753 &probeLWP_ID); /*Returned LWP process ID*/
755 fprintf(stderr, "[%s] Can't create fsprobe LWP! Error is %d\n", rn, code);
756 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas*/
761 "[%s] Probe LWP process structure located at 0x%x\n",
766 * Do I need to do this?
769 fprintf(stderr, "[%s] Calling osi_Wakeup()\n", rn);
770 osi_Wakeup(&rxsrv_afsserver); /*Wake up anyone waiting for it*/
774 * Return the final results.
784 /*------------------------------------------------------------------------
785 * [exported] fsprobe_ForceProbeNow
788 * Wake up the probe LWP, forcing it to execute a probe immediately.
795 * Error value otherwise.
798 * The module must have been initialized.
802 *------------------------------------------------------------------------*/
804 int fsprobe_ForceProbeNow()
806 { /*fsprobe_ForceProbeNow*/
808 static char rn[] = "fsprobe_ForceProbeNow"; /*Routine name*/
811 * There isn't a prayer unless we've been initialized.
813 if (!fsprobe_initflag) {
814 fprintf(stderr, "[%s] Must call fsprobe_Init first!\n", rn);
819 * Kick the sucker in the side.
821 IOMGR_Cancel(probeLWP_ID);
824 * We did it, so report the happy news.
828 } /*fsprobe_ForceProbeNow*/