From 0a8e7b15486f5baa089eef661cdf0924af736160 Mon Sep 17 00:00:00 2001 From: "matt@linuxbox.com" Date: Tue, 6 Oct 2009 08:42:51 -0400 Subject: [PATCH] viced ihandle boost Make ihandle file descriptor cache parameters tunable, and accommodate platforms where max open files is large. Expand the fd cache hash table to 2048 entries. Raise fd cache size automatically to match configured number of lwps. NOTE: This code has been tested on Centos 5.3 x86_64, on VMWare, 2 physical, 2 logical CPUs (in tandem with viced_more_threads). LICENSE BSD Change-Id: If68eda6e1c955e026b250ca52bddf0b8383959c9 Change-Id: I5fbbec95523ea9cd9ff42dcf43f17db94c7bb161 Reviewed-on: http://gerrit.openafs.org/584 Reviewed-by: Derrick Brashear Tested-by: Derrick Brashear --- doc/man-pages/pod8/fileserver.pod | 15 ++++++++++ src/viced/viced.c | 35 ++++++++++++++++++++-- src/vol/ihandle.c | 61 ++++++++++++++++++++++++++++++++------- src/vol/ihandle.h | 41 +++++++++++++++++++++++++- src/vol/volume.c | 3 ++ src/vol/volume.h | 1 + 6 files changed, 143 insertions(+), 13 deletions(-) diff --git a/doc/man-pages/pod8/fileserver.pod b/doc/man-pages/pod8/fileserver.pod index 7b2625c..8ba8c9e 100644 --- a/doc/man-pages/pod8/fileserver.pod +++ b/doc/man-pages/pod8/fileserver.pod @@ -50,6 +50,9 @@ B S<<< [B<-auditlog> >] >>> S<<< [B<-fs-state-dont-save>] >>> S<<< [B<-fs-state-dont-restore>] >>> S<<< [B<-fs-state-verify>] (none | save | restore | both)] >>> + S<<< [B<-vhandle-setaside> >] >>> + S<<< [B<-vhandle-max-cachesize> >] >>> + S<<< [B<-vhandle-initial-cachesize> >] >>> S<<< [B<-vhashsize> >] >>> S<<< [B<-vlrudisable>] >>> S<<< [B<-vlruthresh> >] >>> @@ -542,6 +545,18 @@ The default is C. This option is only supported by the demand-attach file server. +=item B<-vhandle-setaside> > + +Number of file handles set aside for I/O not in the cache. Defaults to 128. + +=item B<-vhandle-max-cachesize> > + +Maximum number of available file handles. + +=item B<-vhandle-initial-cachesize> > + +Number of file handles set aside for I/O in the cache. Defaults to 128. + =item B<-vhashsize > The log(2) number of of volume hash buckets. Default is 8 (i.e., by diff --git a/src/viced/viced.c b/src/viced/viced.c index e034ef4..ad5b134 100644 --- a/src/viced/viced.c +++ b/src/viced/viced.c @@ -903,6 +903,9 @@ FlagMsg(void) fputs("[-rxmaxmtu ] ", stdout); fputs("[-rxbind (bind the Rx socket to one address)] ", stdout); fputs("[-allow-dotted-principals (disable the rxkad principal name dot check)] ", stdout); + fputs("[-vhandle-setaside (fds reserved for non-cache io [default 128])] ", stdout); + fputs("[-vhandle-max-cachesize (max open files [default 128])] ", stdout); + fputs("[-vhandle-initial-cachesize (fds reserved for cache io [default 128])] ", stdout); #ifdef AFS_DEMAND_ATTACH_FS fputs("[-fs-state-dont-save (disable state save during shutdown)] ", stdout); fputs("[-fs-state-dont-restore (disable state restore during startup)] ", stdout); @@ -1036,6 +1039,9 @@ max_fileserver_thread(void) return MAX_FILESERVER_THREAD; } +/* from ihandle.c */ +extern ih_init_params vol_io_params; + static int ParseArgs(int argc, char *argv[]) { @@ -1122,6 +1128,24 @@ ParseArgs(int argc, char *argv[]) } vol_attach_threads = atoi(argv[++i]); #endif /* AFS_PTHREAD_ENV */ + } else if (!strcmp(argv[i], "-vhandle-setaside")) { + if ((i + 1) >= argc) { + fprintf(stderr, "missing argument for %s\n", argv[i]); + return -1; + } + vol_io_params.fd_handle_setaside = atoi(argv[++i]); + } else if (!strcmp(argv[i], "-vhandle-max-cachesize")) { + if ((i + 1) >= argc) { + fprintf(stderr, "missing argument for %s\n", argv[i]); + return -1; + } + vol_io_params.fd_max_cachesize = atoi(argv[++i]); + } else if (!strcmp(argv[i], "-vhandle-initial-cachesize")) { + if ((i + 1) >= argc) { + fprintf(stderr, "missing argument for %s\n", argv[i]); + return -1; + } + vol_io_params.fd_initial_cachesize = atoi(argv[++i]); #ifdef AFS_DEMAND_ATTACH_FS } else if (!strcmp(argv[i], "-fs-state-dont-save")) { fs_state.options.fs_state_save = 0; @@ -1909,6 +1933,8 @@ main(int argc, char *argv[]) #ifndef AFS_QUIETFS_ENV console = afs_fopen("/dev/console", "w"); #endif + /* set ihandle package defaults prior to parsing args */ + ih_PkgDefaults(); if (ParseArgs(argc, argv)) { FlagMsg(); @@ -2013,9 +2039,14 @@ main(int argc, char *argv[]) lwps = curLimit; else if (lwps > 16) lwps = 16; /* default to a maximum of 16 threads */ + + /* tune the ihandle fd cache accordingly */ + if (vol_io_params.fd_max_cachesize < curLimit) + vol_io_params.fd_max_cachesize = curLimit + 1; + ViceLog(0, - ("The system supports a max of %d open files and we are starting %d threads\n", - curLimit, lwps)); + ("The system supports a max of %d open files and we are starting %d threads (ihandle fd cache is %d)\n", + curLimit, lwps, vol_io_params.fd_max_cachesize)); } #ifndef AFS_PTHREAD_ENV assert(LWP_InitializeProcessSupport(LWP_MAX_PRIORITY - 2, &parentPid) == diff --git a/src/vol/ihandle.c b/src/vol/ihandle.c index fa6b6f6..8dafd8e 100644 --- a/src/vol/ihandle.c +++ b/src/vol/ihandle.c @@ -77,11 +77,16 @@ FdHandle_t *fdLruHead; FdHandle_t *fdLruTail; int ih_Inited = 0; +int ih_PkgDefaultsSet = 0; /* Most of the servers use fopen/fdopen. Since the FILE structure * only has eight bits for the file descriptor, the cache size * has to be less than 256. The cache can be made larger as long * as you are sure you don't need fopen/fdopen. */ + +/* As noted in ihandle.h, the fileno member of FILE on most platforms + * in 2008 is a 16- or 32-bit signed int. -Matt + */ int fdMaxCacheSize = 0; int fdCacheSize = 0; @@ -93,6 +98,28 @@ IHashBucket_t ihashTable[I_HANDLE_HASH_SIZE]; void *ih_sync_thread(void *); +/* start-time configurable I/O limits */ +ih_init_params vol_io_params; + +void ih_PkgDefaults(void) +{ + /* once */ + ih_PkgDefaultsSet = 1; + + /* default to well-known values */ + vol_io_params.fd_handle_setaside = FD_HANDLE_SETASIDE; + + /* initial fd cachesize. the only one that will be used if + * the application does not call ih_UseLargeCache(). set this + * to a value representable in fileno member of the system's + * FILE structure (or equivalent). */ + vol_io_params.fd_initial_cachesize = FD_DEFAULT_CACHESIZE; + + /* fd cache size that will be used if/when ih_UseLargeCache() + * is called */ + vol_io_params.fd_max_cachesize = FD_MAX_CACHESIZE; +} + #ifdef AFS_PTHREAD_ENV /* Initialize the global ihandle mutex */ void @@ -116,14 +143,14 @@ ih_Initialize(void) DLL_INIT_LIST(ihashTable[i].ihash_head, ihashTable[i].ihash_tail); } #if defined(AFS_NT40_ENV) - fdMaxCacheSize = FD_MAX_CACHESIZE; + fdMaxCacheSize = vol_io_params.fd_max_cachesize; #elif defined(AFS_SUN5_ENV) || defined(AFS_NBSD_ENV) { struct rlimit rlim; assert(getrlimit(RLIMIT_NOFILE, &rlim) == 0); rlim.rlim_cur = rlim.rlim_max; assert(setrlimit(RLIMIT_NOFILE, &rlim) == 0); - fdMaxCacheSize = rlim.rlim_cur - FD_HANDLE_SETASIDE; + fdMaxCacheSize = rlim.rlim_cur - vol_io_params.fd_handle_setaside; #ifdef AFS_NBSD_ENV /* XXX this is to avoid using up all system fd netbsd is * somewhat broken and have set maximum fd for a root process @@ -135,7 +162,7 @@ ih_Initialize(void) */ fdMaxCacheSize /= 4; #endif - fdMaxCacheSize = MIN(fdMaxCacheSize, FD_MAX_CACHESIZE); + fdMaxCacheSize = MIN(fdMaxCacheSize, vol_io_params.fd_max_cachesize); assert(fdMaxCacheSize > 0); } #elif defined(AFS_HPUX_ENV) @@ -143,11 +170,12 @@ ih_Initialize(void) fdMaxCacheSize = 0; #else { - long fdMax = MAX(sysconf(_SC_OPEN_MAX) - FD_HANDLE_SETASIDE, 0); - fdMaxCacheSize = (int)MIN(fdMax, FD_MAX_CACHESIZE); + long fdMax = MAX(sysconf(_SC_OPEN_MAX) - vol_io_params.fd_handle_setaside, + 0); + fdMaxCacheSize = (int)MIN(fdMax, vol_io_params.fd_max_cachesize); } #endif - fdCacheSize = MIN(fdMaxCacheSize, FD_DEFAULT_CACHESIZE); + fdCacheSize = MIN(fdMaxCacheSize, vol_io_params.fd_initial_cachesize); { #ifdef AFS_PTHREAD_ENV @@ -155,7 +183,7 @@ ih_Initialize(void) pthread_attr_t tattr; pthread_attr_init(&tattr); - pthread_attr_setdetachstate(&tattr,PTHREAD_CREATE_DETACHED); + pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_DETACHED); pthread_create(&syncer, &tattr, ih_sync_thread, NULL); #else /* AFS_PTHREAD_ENV */ @@ -168,14 +196,23 @@ ih_Initialize(void) } /* Make the file descriptor cache as big as possible. Don't this call - * if the program uses fopen or fdopen. */ + * if the program uses fopen or fdopen, if fd_max_cachesize cannot be + * represented in the fileno member of the system FILE structure (or + * equivalent). + */ void ih_UseLargeCache(void) { IH_LOCK; + + if (!ih_PkgDefaultsSet) { + ih_PkgDefaults(); + } + if (!ih_Inited) { - ih_Initialize(); + ih_Initialize(); } + fdCacheSize = fdMaxCacheSize; IH_UNLOCK; @@ -204,9 +241,13 @@ ih_init(int dev, int vid, Inode ino) int ihash = IH_HASH(dev, vid, ino); IHandle_t *ihP; + if (!ih_PkgDefaultsSet) { + ih_PkgDefaults(); + } + IH_LOCK; if (!ih_Inited) { - ih_Initialize(); + ih_Initialize(); } /* Do we already have a handle for this Inode? */ diff --git a/src/vol/ihandle.h b/src/vol/ihandle.h index 922430f..42c635b 100644 --- a/src/vol/ihandle.h +++ b/src/vol/ihandle.h @@ -192,9 +192,35 @@ typedef struct StreamHandle_s { #define FD_HANDLE_MALLOCSIZE ((size_t)((4096/sizeof(FdHandle_t)))) #define STREAM_HANDLE_MALLOCSIZE 1 + +/* READ THIS. + * + * On modern platforms tuned for I/O intensive workloads, there may be + * thousands of file descriptors available (64K on 32-bit Solaris 7, + * for example), and threading in Solaris 9 and Linux 2.6 (NPTL) are + * tuned for (many) thousands of concurrent threads at peak. + * + * On these platforms, it makes sense to allow administrators to set + * appropriate limits for their hardware. Clients may now set desired + * values in the exported vol_io_params, of type ih_init_params. + */ + +typedef struct ih_init_params +{ + afs_uint32 fd_handle_setaside; /* for non-cached i/o, trad. was 128 */ + afs_uint32 fd_initial_cachesize; /* what was 'default' */ + afs_uint32 fd_max_cachesize; /* max open files if large-cache activated */ +} ih_init_params; + + /* Number of file descriptors needed for non-cached I/O */ #define FD_HANDLE_SETASIDE 128 /* Match to MAX_FILESERVER_THREAD */ +/* Which systems have 8-bit fileno? On GNU/Linux systems, the + * fileno member of FILE is an int. On NetBSD 5, it's a short. + * Ditto for OpenBSD 4.5. Through Solaris 10 8/07 it's unsigned char. + */ + /* Don't try to have more than 256 files open at once if you are planning * to use fopen or fdopen. The FILE structure has an eight bit field for * the file descriptor. */ @@ -206,6 +232,17 @@ typedef struct StreamHandle_s { */ #define FD_MAX_CACHESIZE (2000 - FD_HANDLE_SETASIDE) +/* On modern platforms, this is sized higher than the note implies. + * For HP, see http://forums11.itrc.hp.com/service/forums/questionanswer.do?admit=109447626+1242508538748+28353475&threadId=302950 + * On AIX, it's said to be self-tuning (sar -v) + * On Solaris, http://www.princeton.edu/~unix/Solaris/troubleshoot/kerntune.html + * says stdio limit (FILE) may exist, but then backtracks and says the 64bit + * solaris and POLL (rather than select) io avoid the issue. Solaris Internals + * states that Solaris 7 and above deal with up to 64K on 32bit. + * However, extended FILE must be enabled to use this. See + * enable_extended_FILE_stdio(3C) + */ + /* Inode handle */ typedef struct IHandle_s { afs_uint32 ih_vid; /* Parent volume id. */ @@ -224,7 +261,8 @@ typedef struct IHandle_s { #define IH_REALLY_CLOSED 1 /* Hash function for inode handles */ -#define I_HANDLE_HASH_SIZE 1024 /* power of 2 */ +#define I_HANDLE_HASH_SIZE 2048 /* power of 2 */ + /* The casts to int's ensure NT gets the xor operation correct. */ #define IH_HASH(D, V, I) ((int)(((D)^(V)^((int)(I)))&(I_HANDLE_HASH_SIZE-1))) @@ -252,6 +290,7 @@ extern FILE *ih_fdopen(FdHandle_t * h, char *fdperms); /* * Prototypes for file descriptor cache routines */ +extern void ih_PkgDefaults(void); extern void ih_Initialize(void); extern void ih_UseLargeCache(void); extern IHandle_t *ih_init(int /*@alt Device@ */ dev, int /*@alt VolId@ */ vid, diff --git a/src/vol/volume.c b/src/vol/volume.c index 47fc80e..cf294fa 100644 --- a/src/vol/volume.c +++ b/src/vol/volume.c @@ -157,6 +157,9 @@ pthread_cond_t vol_init_attach_cond; int vol_attach_threads = 1; #endif /* AFS_PTHREAD_ENV */ +/* start-time configurable I/O parameters */ +ih_init_params vol_io_params; + #ifdef AFS_DEMAND_ATTACH_FS pthread_mutex_t vol_salvsync_mutex; diff --git a/src/vol/volume.h b/src/vol/volume.h index 42741c3..38846cd 100644 --- a/src/vol/volume.h +++ b/src/vol/volume.h @@ -68,6 +68,7 @@ extern pthread_mutex_t vol_glock_mutex; extern pthread_mutex_t vol_trans_mutex; extern pthread_cond_t vol_put_volume_cond; extern pthread_cond_t vol_sleep_cond; +extern ih_init_params vol_io_params; extern int vol_attach_threads; #ifdef VOL_LOCK_DEBUG extern pthread_t vol_glock_holder; -- 1.9.4