2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Implementation of the AFS FileServer probe facility.
14 *------------------------------------------------------------------------*/
16 #include <afs/param.h>
17 #include <fsprobe.h> /*Interface for this module*/
18 #include <lwp.h> /*Lightweight process package*/
19 #include <afs/cellconfig.h>
21 #define LWP_STACK_SIZE (16 * 1024)
24 * Routines we need that don't have explicit include file definitions.
26 extern int RXAFSCB_ExecuteRequest(); /*AFS callback dispatcher*/
27 extern char *hostutil_GetNameByINet(); /*Host parsing utility*/
30 * Help out the linker by explicitly importing the callback routines.
32 extern afs_int32 SRXAFSCB_CallBack();
33 extern afs_int32 SRXAFSCB_InitCallBackState2();
34 extern afs_int32 SRXAFSCB_Probe();
39 int fsprobe_numServers; /*Num servers connected*/
40 struct fsprobe_ConnectionInfo *fsprobe_ConnInfo; /*Ptr to connection array*/
41 struct fsprobe_ProbeResults fsprobe_Results; /*Latest probe results*/
42 int fsprobe_ProbeFreqInSecs; /*Probe freq. in seconds*/
47 static int fsprobe_initflag = 0; /*Was init routine called?*/
48 static int fsprobe_debug = 0; /*Debugging output enabled?*/
49 static int (*fsprobe_Handler)(); /*Probe handler routine*/
50 static PROCESS probeLWP_ID; /*Probe LWP process ID*/
51 static int fsprobe_statsBytes; /*Num bytes in stats block*/
52 static int fsprobe_probeOKBytes; /*Num bytes in probeOK block*/
55 * We have to pass a port to Rx to start up our callback listener
56 * service, but 7001 is already taken up by the Cache Manager. So,
59 #define FSPROBE_CBPORT 7101
62 /*------------------------------------------------------------------------
63 * [private] fsprobe_CleanupInit
66 * Set up for recovery after an error in initialization (i.e.,
67 * during a call to fsprobe_Init.
74 * Error value otherwise.
77 * This routine is private to the module.
80 * Zeros out basic data structures.
81 *------------------------------------------------------------------------*/
83 static int fsprobe_CleanupInit()
85 { /*fsprobe_CleanupInit*/
87 afs_int32 code; /*Return code from callback stubs*/
88 struct rx_call *rxcall; /*Bogus param*/
89 AFSCBFids *Fids_Array; /*Bogus param*/
90 AFSCBs *CallBack_Array; /*Bogus param*/
91 struct interfaceAddr *interfaceAddr; /*Bogus param*/
93 fsprobe_ConnInfo = (struct fsprobe_ConnectionInfo *)0;
94 bzero(fsprobe_Results, sizeof(struct fsprobe_ProbeResults));
96 rxcall = (struct rx_call *)0;
97 Fids_Array = (AFSCBFids *)0;
98 CallBack_Array = (AFSCBs *)0;
99 interfaceAddr = (struct interfaceAddr *)0;
101 code = SRXAFSCB_CallBack(rxcall, Fids_Array, CallBack_Array);
104 code = SRXAFSCB_InitCallBackState2(rxcall, interfaceAddr);
107 code = SRXAFSCB_Probe(rxcall);
110 } /*fsprobe_CleanupInit*/
113 /*------------------------------------------------------------------------
114 * [exported] fsprobe_Cleanup
117 * Clean up our memory and connection state.
120 * int a_releaseMem : Should we free up malloc'ed areas?
123 * 0 on total success,
124 * -1 if the module was never initialized, or there was a problem
125 * with the fsprobe connection array.
128 * fsprobe_numServers should be properly set. We don't do anything
129 * unless fsprobe_Init() has already been called.
132 * Shuts down Rx connections gracefully, frees allocated space
134 *------------------------------------------------------------------------*/
136 int fsprobe_Cleanup(a_releaseMem)
139 { /*fsprobe_Cleanup*/
141 static char rn[] = "fsprobe_Cleanup"; /*Routine name*/
142 int code; /*Return code*/
143 int conn_idx; /*Current connection index*/
144 struct fsprobe_ConnectionInfo *curr_conn; /*Ptr to fsprobe connection*/
147 * Assume the best, but check the worst.
149 if (!fsprobe_initflag) {
150 fprintf(stderr, "[%s] Refused; module not initialized\n", rn);
157 * Take care of all Rx connections first. Check to see that the
158 * server count is a legal value.
160 if (fsprobe_numServers <= 0) {
162 "[%s] Illegal number of servers to clean up (fsprobe_numServers = %d)\n",
163 rn, fsprobe_numServers);
167 if (fsprobe_ConnInfo != (struct fsprobe_ConnectionInfo *)0) {
169 * The fsprobe connection structure array exists. Go through it
170 * and close up any Rx connections it holds.
172 curr_conn = fsprobe_ConnInfo;
173 for (conn_idx = 0; conn_idx < fsprobe_numServers; conn_idx++) {
174 if (curr_conn->rxconn != (struct rx_connection *)0) {
175 rx_DestroyConnection(curr_conn->rxconn);
176 curr_conn->rxconn = (struct rx_connection *)0;
178 if (curr_conn->rxVolconn != (struct rx_connection *)0) {
179 rx_DestroyConnection(curr_conn->rxVolconn);
180 curr_conn->rxVolconn = (struct rx_connection *)0;
183 } /*for each fsprobe connection*/
184 } /*fsprobe connection structure exists*/
185 } /*Legal number of servers*/
188 * Now, release all the space we've allocated, if asked to.
191 if (fsprobe_ConnInfo != (struct fsprobe_ConnectionInfo *)0)
192 free(fsprobe_ConnInfo);
193 if (fsprobe_Results.stats != (struct ProbeViceStatistics *)0)
194 free(fsprobe_Results.stats);
195 if (fsprobe_Results.probeOK != (int *)0)
196 free(fsprobe_Results.probeOK);
200 * Return the news, whatever it is.
204 } /*fsprobe_Cleanup*/
206 /*------------------------------------------------------------------------
207 * [private] fsprobe_LWP
210 * This LWP iterates over the server connections and gathers up
211 * the desired statistics from each one on a regular basis. When
212 * the sweep is done, the associated handler function is called
213 * to process the new data.
222 * Started by fsprobe_Init(), uses global sturctures.
226 *------------------------------------------------------------------------*/
227 static void fsprobe_LWP()
231 static char rn[] = "fsprobe_LWP"; /*Routine name*/
232 register afs_int32 code; /*Results of calls*/
233 struct timeval tv; /*Time structure*/
234 int conn_idx; /*Connection index*/
235 struct fsprobe_ConnectionInfo *curr_conn; /*Current connection*/
236 struct ProbeViceStatistics *curr_stats; /*Current stats region*/
237 int *curr_probeOK; /*Current probeOK field*/
239 while (1) { /*Service loop*/
241 * Iterate through the server connections, gathering data.
242 * Don't forget to bump the probe count and zero the statistics
243 * areas before calling the servers.
246 fprintf(stderr, "[%s] Waking up, collecting data from %d connected servers\n",
247 rn, fsprobe_numServers);
248 curr_conn = fsprobe_ConnInfo;
249 curr_stats = fsprobe_Results.stats;
250 curr_probeOK = fsprobe_Results.probeOK;
251 fsprobe_Results.probeNum++;
252 bzero(fsprobe_Results.stats, fsprobe_statsBytes);
253 bzero(fsprobe_Results.probeOK, fsprobe_probeOKBytes);
255 for (conn_idx = 0; conn_idx < fsprobe_numServers; conn_idx++) {
257 * Grab the statistics for the current FileServer, if the
258 * connection is valid.
261 fprintf(stderr, "[%s] Contacting server %s\n", rn, curr_conn->hostName);
262 if (curr_conn->rxconn != (struct rx_connection *)0) {
264 fprintf(stderr, "[%s] Connection valid, calling RXAFS_GetStatistics\n", rn);
265 *curr_probeOK = RXAFS_GetStatistics(curr_conn->rxconn, curr_stats);
267 } /*Valid Rx connection*/
270 * Call the Volume Server too to get additional stats
273 fprintf(stderr, "[%s] Contacting volume server %s\n", rn, curr_conn->hostName);
274 if (curr_conn->rxVolconn != (struct rx_connection *)0) {
277 struct diskPartition partition;
280 fprintf(stderr, "[%s] Connection valid, calling RXAFS_GetStatistics\n", rn);
281 for (i = 0 ; i < curr_conn->partCnt; i++) {
282 if (curr_conn->partList.partFlags[i] & PARTVALID) {
283 MapPartIdIntoName(curr_conn->partList.partId[i], pname);
284 code = AFSVolPartitionInfo(curr_conn->rxVolconn, pname, &partition);
286 fprintf(stderr, "Could not get information on server %s partition %s\n", curr_conn->hostName, pname);
288 curr_stats->Disk[i].BlocksAvailable = partition.free;
289 curr_stats->Disk[i].TotalBlocks = partition.minFree;
290 strcpy(curr_stats->Disk[i].Name, pname);
299 * Advance the fsprobe connection pointer & stats pointer.
305 } /*For each fsprobe connection*/
308 * All (valid) connections have been probed. Now, call the
309 * associated handler function. The handler does not take
310 * any explicit parameters, rather gets to the goodies via
311 * some of the objects exported by this module.
314 fprintf(stderr, "[%s] Polling complete, calling associated handler routine.\n",
316 code = fsprobe_Handler();
318 fprintf(stderr, "[%s] Handler routine returned error code %d\n", rn, code);
321 * Fall asleep for the prescribed number of seconds.
323 tv.tv_sec = fsprobe_ProbeFreqInSecs;
326 fprintf(stderr, "[%s] Falling asleep for %d seconds\n", rn, fsprobe_ProbeFreqInSecs);
327 code = IOMGR_Select(0, /*Num fids*/
328 0, /*Descriptors ready for reading*/
329 0, /*Descriptors ready for writing*/
330 0, /*Descriptors w/exceptional conditions*/
331 &tv); /*Ptr to timeout structure*/
333 fprintf(stderr, "[%s] IOMGR_Select returned code %d\n", rn, code);
338 /*list all the partitions on <aserver> */
339 static int newvolserver=0;
340 XListPartitions(aconn, ptrPartList, cntp)
341 struct rx_connection *aconn;
342 struct partList *ptrPartList;
346 struct partEntries partEnts;
347 register int i, j=0, code;
350 if (newvolserver == 1) {
351 for(i = 0; i < 26; i++)
352 partIds.partIds[i] = -1;
354 code = AFSVolListPartitions(aconn, &partIds);
356 for (i = 0;i < 26; i++) {
357 if((partIds.partIds[i]) != -1) {
358 ptrPartList->partId[j] = partIds.partIds[i];
359 ptrPartList->partFlags[j] = PARTVALID;
362 ptrPartList->partFlags[i] = 0;
368 partEnts.partEntries_len = 0;
369 partEnts.partEntries_val = (afs_int32 *)0;
370 code = AFSVolXListPartitions(aconn, &partEnts);
372 if (code == RXGEN_OPCODE) {
373 newvolserver = 1; /* Doesn't support new interface */
380 *cntp = partEnts.partEntries_len;
381 if (*cntp > VOLMAXPARTS) {
382 fprintf(stderr,"Warning: number of partitions on the server too high %d (process only %d)\n",
386 for (i = 0;i < *cntp; i++) {
387 ptrPartList->partId[i] = partEnts.partEntries_val[i];
388 ptrPartList->partFlags[i] = PARTVALID;
390 free(partEnts.partEntries_val);
394 fprintf(stderr,"Could not fetch the list of partitions from the server\n");
399 /*------------------------------------------------------------------------
400 * [exported] fsprobe_Init
403 * Initialize the fsprobe module: set up Rx connections to the
404 * given set of servers, start up the probe and callback LWPs,
405 * and associate the routine to be called when a probe completes.
408 * int a_numServers : Num. servers to connect to.
409 * struct sockaddr_in *a_socketArray : Array of server sockets.
410 * int a_ProbeFreqInSecs : Probe frequency in seconds.
411 * int (*a_ProbeHandler)() : Ptr to probe handler fcn.
412 * int a_debug; : Turn debugging output on?
416 * -2 for (at least one) connection error,
417 * LWP process creation code, if it failed,
418 * -1 for other fatal errors.
421 * *** MUST BE THE FIRST ROUTINE CALLED FROM THIS PACKAGE ***
422 * Also, the server security object CBsecobj MUST be a static,
423 * since it has to stick around after this routine exits.
426 * Sets up just about everything.
427 *------------------------------------------------------------------------*/
429 int fsprobe_Init(a_numServers, a_socketArray, a_ProbeFreqInSecs, a_ProbeHandler, a_debug)
431 struct sockaddr_in *a_socketArray;
432 int a_ProbeFreqInSecs;
433 int (*a_ProbeHandler)();
438 static char rn[] = "fsprobe_Init"; /*Routine name*/
439 register afs_int32 code; /*Return value*/
440 static struct rx_securityClass *CBsecobj; /*Callback security object*/
441 struct rx_securityClass *secobj; /*Client security object*/
442 struct rx_service *rxsrv_afsserver; /*Server for AFS*/
443 int arg_errfound; /*Argument error found?*/
444 int curr_srv; /*Current server idx*/
445 struct fsprobe_ConnectionInfo *curr_conn; /*Ptr to current conn*/
446 char *hostNameFound; /*Ptr to returned host name*/
447 int conn_err; /*Connection error?*/
448 int PortToUse; /*Callback port to use*/
451 * If we've already been called, snicker at the bozo, gently
452 * remind him of his doubtful heritage, and return success.
454 if (fsprobe_initflag) {
455 fprintf(stderr, "[%s] Called multiple times!\n", rn);
459 fsprobe_initflag = 1;
462 * Check the parameters for bogosities.
465 if (a_numServers <= 0) {
466 fprintf(stderr, "[%s] Illegal number of servers: %d\n",
470 if (a_socketArray == (struct sockaddr_in *)0) {
471 fprintf(stderr, "[%s] Null server socket array argument\n", rn);
474 if (a_ProbeFreqInSecs <= 0) {
475 fprintf(stderr, "[%s] Illegal probe frequency: %d\n",
476 rn, a_ProbeFreqInSecs);
479 if (a_ProbeHandler == (int (*)())0) {
480 fprintf(stderr, "[%s] Null probe handler function argument\n", rn);
487 * Record our passed-in info.
489 fsprobe_debug = a_debug;
490 fsprobe_numServers = a_numServers;
491 fsprobe_Handler = a_ProbeHandler;
492 fsprobe_ProbeFreqInSecs = a_ProbeFreqInSecs;
495 * Get ready in case we have to do a cleanup - basically, zero
498 fsprobe_CleanupInit();
501 * Allocate the necessary data structures and initialize everything
505 (struct fsprobe_ConnectionInfo *)
506 malloc(a_numServers * sizeof(struct fsprobe_ConnectionInfo));
507 if (fsprobe_ConnInfo == (struct fsprobe_ConnectionInfo *)0) {
509 "[%s] Can't allocate %d connection info structs (%d bytes)\n",
511 (a_numServers * sizeof(struct fsprobe_ConnectionInfo)));
512 return(-1); /*No cleanup needs to be done yet*/
516 fprintf(stderr, "[%s] fsprobe_ConnInfo allocated (%d bytes)\n",
517 rn, a_numServers * sizeof(struct fsprobe_ConnectionInfo));
520 fsprobe_statsBytes = a_numServers * sizeof(struct ProbeViceStatistics);
521 fsprobe_Results.stats = (struct ProbeViceStatistics *)
522 malloc(fsprobe_statsBytes);
523 if (fsprobe_Results.stats == (struct ProbeViceStatistics *)0) {
525 "[%s] Can't allocate %d statistics structs (%d bytes)\n",
526 rn, a_numServers, fsprobe_statsBytes);
527 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas*/
532 fprintf(stderr, "[%s] fsprobe_Results.stats allocated (%d bytes)\n",
533 rn, fsprobe_statsBytes);
535 fsprobe_probeOKBytes = a_numServers * sizeof(int);
536 fsprobe_Results.probeOK = (int *) malloc(fsprobe_probeOKBytes);
537 if (fsprobe_Results.probeOK == (int *)0) {
539 "[%s] Can't allocate %d probeOK array entries (%d bytes)\n",
540 rn, a_numServers, fsprobe_probeOKBytes);
541 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas*/
547 "[%s] fsprobe_Results.probeOK allocated (%d bytes)\n",
548 rn, fsprobe_probeOKBytes);
550 fsprobe_Results.probeNum = 0;
551 fsprobe_Results.probeTime = 0;
552 bzero(fsprobe_Results.stats,
553 (a_numServers * sizeof(struct ProbeViceStatistics)));
556 * Initialize the Rx subsystem, just in case nobody's done it.
559 fprintf(stderr, "[%s] Initializing Rx\n", rn);
560 PortToUse = FSPROBE_CBPORT;
562 code = rx_Init(htons(PortToUse));
564 if (code == RX_ADDRINUSE) {
566 fprintf(stderr, "[%s] Callback port %d in use, advancing\n",
571 fprintf(stderr, "[%s] Fatal error in rx_Init()\n", rn);
577 fprintf(stderr, "[%s] Rx initialized on port %d\n", rn, PortToUse);
580 * Create a null Rx server security object, to be used by the
583 CBsecobj = (struct rx_securityClass *) rxnull_NewServerSecurityObject();
584 if (CBsecobj == (struct rx_securityClass *)0) {
586 "[%s] Can't create null security object for the callback listener.\n",
588 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas*/
592 fprintf(stderr, "[%s] Callback server security object created\n", rn);
595 * Create a null Rx client security object, to be used by the
598 secobj = (struct rx_securityClass *) rxnull_NewClientSecurityObject();
599 if (secobj == (struct rx_securityClass *)0) {
601 "[%s] Can't create client security object for probe LWP.\n",
603 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas*/
607 fprintf(stderr, "[%s] Probe LWP client security object created\n",
610 curr_conn = fsprobe_ConnInfo;
612 for (curr_srv = 0; curr_srv < a_numServers; curr_srv++) {
614 * Copy in the socket info for the current server, resolve its
615 * printable name if possible.
618 fprintf(stderr, "[%s] Copying in the following socket info:\n", rn);
619 fprintf(stderr, "[%s] IP addr 0x%lx, port %d\n", rn,
620 (a_socketArray + curr_srv)->sin_addr.s_addr,
621 (a_socketArray + curr_srv)->sin_port);
623 bcopy(a_socketArray + curr_srv,
625 sizeof(struct sockaddr_in));
627 hostNameFound = hostutil_GetNameByINet(curr_conn->skt.sin_addr.s_addr);
628 if (hostNameFound == (char *)0) {
630 "[%s] Can't map Internet address %lu to a string name\n",
631 rn, curr_conn->skt.sin_addr.s_addr);
632 curr_conn->hostName[0] = '\0';
635 strcpy(curr_conn->hostName, hostNameFound);
637 fprintf(stderr, "[%s] Host name for server index %d is %s\n",
638 rn, curr_srv, curr_conn->hostName);
642 * Make an Rx connection to the current server.
646 "[%s] Connecting to srv idx %d, IP addr 0x%lx, port %d, service 1\n",
647 rn, curr_srv, curr_conn->skt.sin_addr.s_addr,
648 curr_conn->skt.sin_port);
650 rx_NewConnection(curr_conn->skt.sin_addr.s_addr, /*Server addr*/
651 curr_conn->skt.sin_port, /*Server port*/
652 1, /*AFS service num*/
653 secobj, /*Security object*/
654 0); /*Number of above*/
655 if (curr_conn->rxconn == (struct rx_connection *)0) {
657 "[%s] Can't create Rx connection to server %s (%lu)\n",
658 rn, curr_conn->hostName, curr_conn->skt.sin_addr.s_addr);
662 fprintf(stderr, "[%s] New connection at 0x%lx\n",
663 rn, curr_conn->rxconn);
666 * Make an Rx connection to the current volume server.
670 "[%s] Connecting to srv idx %d, IP addr 0x%lx, port %d, service 1\n",
671 rn, curr_srv, curr_conn->skt.sin_addr.s_addr, htons(7005));
672 curr_conn->rxVolconn =
673 rx_NewConnection(curr_conn->skt.sin_addr.s_addr, /*Server addr*/
674 htons(AFSCONF_VOLUMEPORT), /*Volume Server port*/
675 VOLSERVICE_ID, /*AFS service num*/
676 secobj, /*Security object*/
677 0); /*Number of above*/
678 if (curr_conn->rxVolconn == (struct rx_connection *)0) {
680 "[%s] Can't create Rx connection to volume server %s (%lu)\n",
681 rn, curr_conn->hostName, curr_conn->skt.sin_addr.s_addr);
686 bzero(&curr_conn->partList, sizeof(struct partList));
687 curr_conn->partCnt = 0;
688 i = XListPartitions(curr_conn->rxVolconn, &curr_conn->partList, &cnt);
690 curr_conn->partCnt = cnt;
694 fprintf(stderr, "[%s] New connection at 0x%lx\n",
695 rn, curr_conn->rxVolconn);
699 * Bump the current fsprobe connection to set up.
706 * Create the AFS callback service (listener).
709 fprintf(stderr, "[%s] Creating AFS callback listener\n", rn);
711 rx_NewService(0, /*Use default port*/
713 "afs", /*Service name*/
714 &CBsecobj, /*Ptr to security object(s)*/
715 1, /*Number of security objects*/
716 RXAFSCB_ExecuteRequest); /*Dispatcher*/
717 if (rxsrv_afsserver == (struct rx_service *)0) {
718 fprintf(stderr, "[%s] Can't create callback Rx service/listener\n", rn);
719 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas*/
723 fprintf(stderr, "[%s] Callback listener created\n", rn);
726 * Start up the AFS callback service.
729 fprintf(stderr, "[%s] Starting up callback listener.\n", rn);
730 rx_StartServer(0 /*Don't donate yourself to LWP pool*/);
733 * Start up the probe LWP.
736 fprintf(stderr, "[%s] Creating the probe LWP\n", rn);
738 LWP_CreateProcess(fsprobe_LWP, /*Function to start up*/
739 LWP_STACK_SIZE, /*Stack size in bytes*/
742 "fsprobe Worker", /*Name to use*/
743 &probeLWP_ID); /*Returned LWP process ID*/
745 fprintf(stderr, "[%s] Can't create fsprobe LWP! Error is %d\n", rn, code);
746 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas*/
751 "[%s] Probe LWP process structure located at 0x%x\n",
756 * Do I need to do this?
759 fprintf(stderr, "[%s] Calling osi_Wakeup()\n", rn);
760 osi_Wakeup(&rxsrv_afsserver); /*Wake up anyone waiting for it*/
764 * Return the final results.
774 /*------------------------------------------------------------------------
775 * [exported] fsprobe_ForceProbeNow
778 * Wake up the probe LWP, forcing it to execute a probe immediately.
785 * Error value otherwise.
788 * The module must have been initialized.
792 *------------------------------------------------------------------------*/
794 int fsprobe_ForceProbeNow()
796 { /*fsprobe_ForceProbeNow*/
798 static char rn[] = "fsprobe_ForceProbeNow"; /*Routine name*/
801 * There isn't a prayer unless we've been initialized.
803 if (!fsprobe_initflag) {
804 fprintf(stderr, "[%s] Must call fsprobe_Init first!\n", rn);
809 * Kick the sucker in the side.
811 IOMGR_Cancel(probeLWP_ID);
814 * We did it, so report the happy news.
818 } /*fsprobe_ForceProbeNow*/