2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Implementation of the AFS FileServer probe facility.
14 *------------------------------------------------------------------------*/
16 #include <afsconfig.h>
17 #include <afs/param.h>
22 #include <afs/cellconfig.h>
23 #include <afs/afsint.h>
24 #include <afs/afsutil.h>
25 #include <afs/volser.h>
26 #include <afs/volser_prototypes.h>
27 #define FSINT_COMMON_XG
28 #include <afs/afscbint.h>
30 #include "fsprobe.h" /*Interface for this module */
35 int fsprobe_numServers; /*Num servers connected */
36 struct fsprobe_ConnectionInfo *fsprobe_ConnInfo; /*Ptr to connection array */
37 struct fsprobe_ProbeResults fsprobe_Results; /*Latest probe results */
38 int fsprobe_ProbeFreqInSecs; /*Probe freq. in seconds */
43 static int fsprobe_initflag = 0; /*Was init routine called? */
44 static int fsprobe_debug = 0; /*Debugging output enabled? */
45 static int (*fsprobe_Handler) (void); /*Probe handler routine */
46 static pthread_t fsprobe_thread; /*Probe thread */
47 static int fsprobe_statsBytes; /*Num bytes in stats block */
48 static int fsprobe_probeOKBytes; /*Num bytes in probeOK block */
49 static opr_mutex_t fsprobe_force_lock; /*Lock to force probe */
50 static opr_cv_t fsprobe_force_cv; /*Condvar to force probe */
52 /*------------------------------------------------------------------------
53 * [private] fsprobe_CleanupInit
56 * Set up for recovery after an error in initialization (i.e.,
57 * during a call to fsprobe_Init.
64 * Error value otherwise.
67 * This routine is private to the module.
70 * Zeros out basic data structures.
71 *------------------------------------------------------------------------*/
74 fsprobe_CleanupInit(void)
75 { /*fsprobe_CleanupInit */
77 afs_int32 code; /*Return code from callback stubs */
78 struct rx_call *rxcall; /*Bogus param */
79 AFSCBFids *Fids_Array; /*Bogus param */
80 AFSCBs *CallBack_Array; /*Bogus param */
81 struct interfaceAddr *interfaceAddr; /*Bogus param */
83 fsprobe_ConnInfo = (struct fsprobe_ConnectionInfo *)0;
84 memset(&fsprobe_Results, 0, sizeof(struct fsprobe_ProbeResults));
86 rxcall = (struct rx_call *)0;
87 Fids_Array = (AFSCBFids *) 0;
88 CallBack_Array = (AFSCBs *) 0;
91 code = SRXAFSCB_CallBack(rxcall, Fids_Array, CallBack_Array);
94 code = SRXAFSCB_InitCallBackState2(rxcall, interfaceAddr);
97 code = SRXAFSCB_Probe(rxcall);
100 } /*fsprobe_CleanupInit */
103 /*------------------------------------------------------------------------
104 * [exported] fsprobe_Cleanup
107 * Clean up our memory and connection state.
110 * int a_releaseMem : Should we free up malloc'ed areas?
113 * 0 on total success,
114 * -1 if the module was never initialized, or there was a problem
115 * with the fsprobe connection array.
118 * fsprobe_numServers should be properly set. We don't do anything
119 * unless fsprobe_Init() has already been called.
122 * Shuts down Rx connections gracefully, frees allocated space
124 *------------------------------------------------------------------------*/
127 fsprobe_Cleanup(int a_releaseMem)
128 { /*fsprobe_Cleanup */
130 static char rn[] = "fsprobe_Cleanup"; /*Routine name */
131 int code; /*Return code */
132 int conn_idx; /*Current connection index */
133 struct fsprobe_ConnectionInfo *curr_conn; /*Ptr to fsprobe connection */
136 * Assume the best, but check the worst.
138 if (!fsprobe_initflag) {
139 fprintf(stderr, "[%s] Refused; module not initialized\n", rn);
145 * Take care of all Rx connections first. Check to see that the
146 * server count is a legal value.
148 if (fsprobe_numServers <= 0) {
150 "[%s] Illegal number of servers to clean up (fsprobe_numServers = %d)\n",
151 rn, fsprobe_numServers);
154 if (fsprobe_ConnInfo != (struct fsprobe_ConnectionInfo *)0) {
156 * The fsprobe connection structure array exists. Go through it
157 * and close up any Rx connections it holds.
159 curr_conn = fsprobe_ConnInfo;
160 for (conn_idx = 0; conn_idx < fsprobe_numServers; conn_idx++) {
161 if (curr_conn->rxconn != (struct rx_connection *)0) {
162 rx_DestroyConnection(curr_conn->rxconn);
163 curr_conn->rxconn = (struct rx_connection *)0;
165 if (curr_conn->rxVolconn != (struct rx_connection *)0) {
166 rx_DestroyConnection(curr_conn->rxVolconn);
167 curr_conn->rxVolconn = (struct rx_connection *)0;
170 } /*for each fsprobe connection */
171 } /*fsprobe connection structure exists */
172 } /*Legal number of servers */
175 * Now, release all the space we've allocated, if asked to.
178 if (fsprobe_ConnInfo != (struct fsprobe_ConnectionInfo *)0)
179 free(fsprobe_ConnInfo);
180 if (fsprobe_Results.stats != NULL)
181 free(fsprobe_Results.stats);
182 if (fsprobe_Results.probeOK != (int *)0)
183 free(fsprobe_Results.probeOK);
187 * Return the news, whatever it is.
191 } /*fsprobe_Cleanup */
193 /*------------------------------------------------------------------------
194 * [private] fsprobe_LWP
197 * This thread iterates over the server connections and gathers up
198 * the desired statistics from each one on a regular basis. When
199 * the sweep is done, the associated handler function is called
200 * to process the new data.
209 * Started by fsprobe_Init(), uses global sturctures.
213 *------------------------------------------------------------------------*/
215 fsprobe_LWP(void *unused)
218 static char rn[] = "fsprobe_LWP"; /*Routine name */
219 afs_int32 code; /*Results of calls */
220 struct timeval tv; /*Time structure */
221 struct timespec wait; /*Time to wait */
222 int conn_idx; /*Connection index */
223 struct fsprobe_ConnectionInfo *curr_conn; /*Current connection */
224 struct ProbeViceStatistics *curr_stats; /*Current stats region */
225 int *curr_probeOK; /*Current probeOK field */
226 ViceStatistics64 stats64; /*Current stats region */
227 stats64.ViceStatistics64_val = malloc(STATS64_VERSION *
229 while (1) { /*Service loop */
231 * Iterate through the server connections, gathering data.
232 * Don't forget to bump the probe count and zero the statistics
233 * areas before calling the servers.
237 "[%s] Waking up, collecting data from %d connected servers\n",
238 rn, fsprobe_numServers);
239 curr_conn = fsprobe_ConnInfo;
240 curr_stats = fsprobe_Results.stats;
241 curr_probeOK = fsprobe_Results.probeOK;
242 fsprobe_Results.probeNum++;
243 memset(fsprobe_Results.stats, 0, fsprobe_statsBytes);
244 memset(fsprobe_Results.probeOK, 0, fsprobe_probeOKBytes);
246 for (conn_idx = 0; conn_idx < fsprobe_numServers; conn_idx++) {
248 * Grab the statistics for the current FileServer, if the
249 * connection is valid.
252 fprintf(stderr, "[%s] Contacting server %s\n", rn,
253 curr_conn->hostName);
254 if (curr_conn->rxconn != (struct rx_connection *)0) {
257 "[%s] Connection valid, calling RXAFS_GetStatistics\n",
260 RXAFS_GetStatistics64(curr_conn->rxconn, STATS64_VERSION, &stats64);
261 if (*curr_probeOK == RXGEN_OPCODE)
263 RXAFS_GetStatistics(curr_conn->rxconn, (ViceStatistics *)curr_stats);
264 else if (*curr_probeOK == 0) {
265 curr_stats->CurrentTime = RoundInt64ToInt32(stats64.ViceStatistics64_val[STATS64_CURRENTTIME]);
266 curr_stats->BootTime = RoundInt64ToInt32(stats64.ViceStatistics64_val[STATS64_BOOTTIME]);
267 curr_stats->StartTime = RoundInt64ToInt32(stats64.ViceStatistics64_val[STATS64_STARTTIME]);
268 curr_stats->CurrentConnections = RoundInt64ToInt32(stats64.ViceStatistics64_val[STATS64_CURRENTCONNECTIONS]);
269 curr_stats->TotalFetchs = RoundInt64ToInt32(stats64.ViceStatistics64_val[STATS64_TOTALFETCHES]);
270 curr_stats->TotalStores = RoundInt64ToInt32(stats64.ViceStatistics64_val[STATS64_TOTALSTORES]);
271 curr_stats->WorkStations = RoundInt64ToInt32(stats64.ViceStatistics64_val[STATS64_WORKSTATIONS]);
275 /*Valid Rx connection */
277 * Call the Volume Server too to get additional stats
280 fprintf(stderr, "[%s] Contacting volume server %s\n", rn,
281 curr_conn->hostName);
282 if (curr_conn->rxVolconn != (struct rx_connection *)0) {
285 struct diskPartition partition;
286 struct diskPartition64 *partition64p =
287 malloc(sizeof(struct diskPartition64));
291 "[%s] Connection valid, calling RXAFS_GetStatistics\n",
293 for (i = 0; i < curr_conn->partCnt; i++) {
294 if (curr_conn->partList.partFlags[i] & PARTVALID) {
295 MapPartIdIntoName(curr_conn->partList.partId[i],
298 AFSVolPartitionInfo64(curr_conn->rxVolconn, pname,
302 curr_stats->Disk[i].BlocksAvailable =
303 RoundInt64ToInt31(partition64p->free);
304 curr_stats->Disk[i].TotalBlocks =
305 RoundInt64ToInt31(partition64p->minFree);
306 strcpy(curr_stats->Disk[i].Name, pname);
308 if (code == RXGEN_OPCODE) {
310 AFSVolPartitionInfo(curr_conn->rxVolconn,
313 curr_stats->Disk[i].BlocksAvailable =
315 curr_stats->Disk[i].TotalBlocks =
317 strcpy(curr_stats->Disk[i].Name, pname);
322 "Could not get information on server %s partition %s\n",
323 curr_conn->hostName, pname);
332 * Advance the fsprobe connection pointer & stats pointer.
338 } /*For each fsprobe connection */
341 * All (valid) connections have been probed. Now, call the
342 * associated handler function. The handler does not take
343 * any explicit parameters, rather gets to the goodies via
344 * some of the objects exported by this module.
348 "[%s] Polling complete, calling associated handler routine.\n",
350 code = fsprobe_Handler();
352 fprintf(stderr, "[%s] Handler routine returned error code %d\n",
356 * Fall asleep for the prescribed number of seconds or wakeup
359 gettimeofday(&tv, NULL);
360 wait.tv_sec = tv.tv_sec + fsprobe_ProbeFreqInSecs;
361 wait.tv_nsec = tv.tv_usec * 1000;
362 opr_mutex_enter(&fsprobe_force_lock);
363 code = opr_cv_timedwait(&fsprobe_force_cv, &fsprobe_force_lock, &wait);
364 opr_mutex_exit(&fsprobe_force_lock);
366 AFS_UNREACHED(free(stats64.ViceStatistics64_val));
367 AFS_UNREACHED(return(NULL));
370 /*list all the partitions on <aserver> */
371 static int newvolserver = 0;
374 XListPartitions(struct rx_connection *aconn, struct partList *ptrPartList,
378 struct partEntries partEnts;
382 if (newvolserver == 1) {
383 for (i = 0; i < 26; i++)
384 partIds.partIds[i] = -1;
386 code = AFSVolListPartitions(aconn, &partIds);
388 for (i = 0; i < 26; i++) {
389 if ((partIds.partIds[i]) != -1) {
390 ptrPartList->partId[j] = partIds.partIds[i];
391 ptrPartList->partFlags[j] = PARTVALID;
394 ptrPartList->partFlags[i] = 0;
400 partEnts.partEntries_len = 0;
401 partEnts.partEntries_val = NULL;
402 code = AFSVolXListPartitions(aconn, &partEnts);
404 if (code == RXGEN_OPCODE) {
405 newvolserver = 1; /* Doesn't support new interface */
412 *cntp = partEnts.partEntries_len;
413 if (*cntp > VOLMAXPARTS) {
415 "Warning: number of partitions on the server too high %d (process only %d)\n",
419 for (i = 0; i < *cntp; i++) {
420 ptrPartList->partId[i] = partEnts.partEntries_val[i];
421 ptrPartList->partFlags[i] = PARTVALID;
423 free(partEnts.partEntries_val);
428 "Could not fetch the list of partitions from the server\n");
433 /*------------------------------------------------------------------------
434 * [exported] fsprobe_Init
437 * Initialize the fsprobe module: set up Rx connections to the
438 * given set of servers, start up the probe and callback threads,
439 * and associate the routine to be called when a probe completes.
442 * int a_numServers : Num. servers to connect to.
443 * struct sockaddr_in *a_socketArray : Array of server sockets.
444 * int a_ProbeFreqInSecs : Probe frequency in seconds.
445 * int (*a_ProbeHandler)() : Ptr to probe handler fcn.
446 * int a_debug; : Turn debugging output on?
450 * -2 for (at least one) connection error,
451 * thread process creation code, if it failed,
452 * -1 for other fatal errors.
455 * *** MUST BE THE FIRST ROUTINE CALLED FROM THIS PACKAGE ***
456 * Also, the server security object CBsecobj MUST be a static,
457 * since it has to stick around after this routine exits.
460 * Sets up just about everything.
461 *------------------------------------------------------------------------*/
464 fsprobe_Init(int a_numServers, struct sockaddr_in *a_socketArray,
465 int a_ProbeFreqInSecs, int (*a_ProbeHandler)(void),
469 static char rn[] = "fsprobe_Init"; /*Routine name */
470 afs_int32 code; /*Return value */
471 static struct rx_securityClass *CBsecobj; /*Callback security object */
472 struct rx_securityClass *secobj; /*Client security object */
473 struct rx_service *rxsrv_afsserver; /*Server for AFS */
474 int arg_errfound; /*Argument error found? */
475 int curr_srv; /*Current server idx */
476 struct fsprobe_ConnectionInfo *curr_conn; /*Ptr to current conn */
477 char *hostNameFound; /*Ptr to returned host name */
478 int conn_err; /*Connection error? */
481 * If we've already been called, snicker at the bozo, gently
482 * remind him of his doubtful heritage, and return success.
484 if (fsprobe_initflag) {
485 fprintf(stderr, "[%s] Called multiple times!\n", rn);
488 fsprobe_initflag = 1;
490 opr_mutex_init(&fsprobe_force_lock);
491 opr_cv_init(&fsprobe_force_cv);
494 * Check the parameters for bogosities.
497 if (a_numServers <= 0) {
498 fprintf(stderr, "[%s] Illegal number of servers: %d\n", rn,
502 if (a_socketArray == (struct sockaddr_in *)0) {
503 fprintf(stderr, "[%s] Null server socket array argument\n", rn);
506 if (a_ProbeFreqInSecs <= 0) {
507 fprintf(stderr, "[%s] Illegal probe frequency: %d\n", rn,
511 if (a_ProbeHandler == NULL) {
512 fprintf(stderr, "[%s] Null probe handler function argument\n", rn);
519 * Record our passed-in info.
521 fsprobe_debug = a_debug;
522 fsprobe_numServers = a_numServers;
523 fsprobe_Handler = a_ProbeHandler;
524 fsprobe_ProbeFreqInSecs = a_ProbeFreqInSecs;
527 * Get ready in case we have to do a cleanup - basically, zero
530 fsprobe_CleanupInit();
533 * Allocate the necessary data structures and initialize everything
536 fsprobe_ConnInfo = (struct fsprobe_ConnectionInfo *)
537 malloc(a_numServers * sizeof(struct fsprobe_ConnectionInfo));
538 if (fsprobe_ConnInfo == (struct fsprobe_ConnectionInfo *)0) {
540 "[%s] Can't allocate %d connection info structs (%"AFS_SIZET_FMT" bytes)\n",
542 (a_numServers * sizeof(struct fsprobe_ConnectionInfo)));
543 return (-1); /*No cleanup needs to be done yet */
546 fsprobe_statsBytes = a_numServers * sizeof(struct ProbeViceStatistics);
547 fsprobe_Results.stats = (struct ProbeViceStatistics *)
548 malloc(fsprobe_statsBytes);
549 if (fsprobe_Results.stats == NULL) {
551 "[%s] Can't allocate %d statistics structs (%d bytes)\n", rn,
552 a_numServers, fsprobe_statsBytes);
553 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas */
555 } else if (fsprobe_debug)
556 fprintf(stderr, "[%s] fsprobe_Results.stats allocated (%d bytes)\n",
557 rn, fsprobe_statsBytes);
559 fsprobe_probeOKBytes = a_numServers * sizeof(int);
560 fsprobe_Results.probeOK = malloc(fsprobe_probeOKBytes);
561 if (fsprobe_Results.probeOK == (int *)0) {
563 "[%s] Can't allocate %d probeOK array entries (%d bytes)\n",
564 rn, a_numServers, fsprobe_probeOKBytes);
565 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas */
567 } else if (fsprobe_debug)
568 fprintf(stderr, "[%s] fsprobe_Results.probeOK allocated (%d bytes)\n",
569 rn, fsprobe_probeOKBytes);
571 fsprobe_Results.probeNum = 0;
572 fsprobe_Results.probeTime = 0;
573 memset(fsprobe_Results.stats, 0,
574 (a_numServers * sizeof(struct ProbeViceStatistics)));
577 * Initialize the Rx subsystem, just in case nobody's done it.
580 fprintf(stderr, "[%s] Initializing Rx\n", rn);
583 fprintf(stderr, "[%s] Fatal error in rx_Init()\n", rn);
587 fprintf(stderr, "[%s] Rx initialized.\n", rn);
590 * Create a null Rx server security object, to be used by the
593 CBsecobj = rxnull_NewServerSecurityObject();
594 if (CBsecobj == (struct rx_securityClass *)0) {
596 "[%s] Can't create null security object for the callback listener.\n",
598 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas */
602 fprintf(stderr, "[%s] Callback server security object created\n", rn);
605 * Create a null Rx client security object, to be used by the
608 secobj = rxnull_NewClientSecurityObject();
609 if (secobj == (struct rx_securityClass *)0) {
611 "[%s] Can't create client security object for probe thread.\n",
613 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas */
617 fprintf(stderr, "[%s] Probe thread client security object created\n",
620 curr_conn = fsprobe_ConnInfo;
622 for (curr_srv = 0; curr_srv < a_numServers; curr_srv++) {
624 * Copy in the socket info for the current server, resolve its
625 * printable name if possible.
628 fprintf(stderr, "[%s] Copying in the following socket info:\n",
630 fprintf(stderr, "[%s] IP addr 0x%x, port %d\n", rn,
631 (a_socketArray + curr_srv)->sin_addr.s_addr,
632 (a_socketArray + curr_srv)->sin_port);
634 memcpy(&(curr_conn->skt), a_socketArray + curr_srv,
635 sizeof(struct sockaddr_in));
638 hostutil_GetNameByINet(curr_conn->skt.sin_addr.s_addr);
639 if (hostNameFound == NULL) {
641 "[%s] Can't map Internet address %u to a string name\n",
642 rn, curr_conn->skt.sin_addr.s_addr);
643 curr_conn->hostName[0] = '\0';
645 strcpy(curr_conn->hostName, hostNameFound);
647 fprintf(stderr, "[%s] Host name for server index %d is %s\n",
648 rn, curr_srv, curr_conn->hostName);
652 * Make an Rx connection to the current server.
656 "[%s] Connecting to srv idx %d, IP addr 0x%x, port %d, service 1\n",
657 rn, curr_srv, curr_conn->skt.sin_addr.s_addr,
658 curr_conn->skt.sin_port);
659 curr_conn->rxconn = rx_NewConnection(curr_conn->skt.sin_addr.s_addr, /*Server addr */
660 curr_conn->skt.sin_port, /*Server port */
661 1, /*AFS service num */
662 secobj, /*Security object */
663 0); /*Number of above */
664 if (curr_conn->rxconn == (struct rx_connection *)0) {
666 "[%s] Can't create Rx connection to server %s (%u)\n",
667 rn, curr_conn->hostName, curr_conn->skt.sin_addr.s_addr);
671 fprintf(stderr, "[%s] New connection at %p\n", rn,
675 * Make an Rx connection to the current volume server.
679 "[%s] Connecting to srv idx %d, IP addr 0x%x, port %d, service 1\n",
680 rn, curr_srv, curr_conn->skt.sin_addr.s_addr,
682 curr_conn->rxVolconn = rx_NewConnection(curr_conn->skt.sin_addr.s_addr, /*Server addr */
683 htons(AFSCONF_VOLUMEPORT), /*Volume Server port */
684 VOLSERVICE_ID, /*AFS service num */
685 secobj, /*Security object */
686 0); /*Number of above */
687 if (curr_conn->rxVolconn == (struct rx_connection *)0) {
689 "[%s] Can't create Rx connection to volume server %s (%u)\n",
690 rn, curr_conn->hostName, curr_conn->skt.sin_addr.s_addr);
695 memset(&curr_conn->partList, 0, sizeof(struct partList));
696 curr_conn->partCnt = 0;
697 i = XListPartitions(curr_conn->rxVolconn, &curr_conn->partList,
700 curr_conn->partCnt = cnt;
704 fprintf(stderr, "[%s] New connection at %p\n", rn,
705 curr_conn->rxVolconn);
709 * Bump the current fsprobe connection to set up.
716 * Create the AFS callback service (listener).
719 fprintf(stderr, "[%s] Creating AFS callback listener\n", rn);
720 rxsrv_afsserver = rx_NewService(0, /*Use default port */
722 "afs", /*Service name */
723 &CBsecobj, /*Ptr to security object(s) */
724 1, /*Number of security objects */
725 RXAFSCB_ExecuteRequest); /*Dispatcher */
726 if (rxsrv_afsserver == (struct rx_service *)0) {
727 fprintf(stderr, "[%s] Can't create callback Rx service/listener\n",
729 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas */
733 fprintf(stderr, "[%s] Callback listener created\n", rn);
736 * Start up the AFS callback service.
739 fprintf(stderr, "[%s] Starting up callback listener.\n", rn);
740 rx_StartServer(0 /*Don't donate yourself to thread pool */ );
743 * Start up the probe thread.
746 fprintf(stderr, "[%s] Creating the probe thread\n", rn);
747 code = pthread_create(&fsprobe_thread, NULL, fsprobe_LWP, NULL);
749 fprintf(stderr, "[%s] Can't create fsprobe thread! Error is %d\n", rn,
751 fsprobe_Cleanup(1); /*Delete already-malloc'ed areas */
756 * Return the final results.
766 /*------------------------------------------------------------------------
767 * [exported] fsprobe_ForceProbeNow
770 * Wake up the probe thread, forcing it to execute a probe immediately.
777 * Error value otherwise.
780 * The module must have been initialized.
784 *------------------------------------------------------------------------*/
787 fsprobe_ForceProbeNow(void)
788 { /*fsprobe_ForceProbeNow */
790 static char rn[] = "fsprobe_ForceProbeNow"; /*Routine name */
793 * There isn't a prayer unless we've been initialized.
795 if (!fsprobe_initflag) {
796 fprintf(stderr, "[%s] Must call fsprobe_Init first!\n", rn);
801 * Kick the sucker in the side.
803 opr_mutex_enter(&fsprobe_force_lock);
804 opr_cv_signal(&fsprobe_force_cv);
805 opr_mutex_exit(&fsprobe_force_lock);
808 * We did it, so report the happy news.
812 } /*fsprobe_ForceProbeNow */
814 /*------------------------------------------------------------------------
815 * [exported] fsprobe_Wait
818 * Wait for the collection to complete.
821 * int sleep_secs : time to wait in seconds. 0 means sleep forever.
825 * Error value otherwise.
828 * The module must have been initialized.
832 *------------------------------------------------------------------------*/
834 fsprobe_Wait(int sleep_secs)
839 if (sleep_secs == 0) {
843 code = select(0, 0, 0, 0, &tv);
848 tv.tv_sec = sleep_secs;
850 code = select(0, 0, 0, 0, &tv);