2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 /* ihandle.c - file descriptor cacheing for Inode handles. */
12 /************************************************************************/
14 #include <afsconfig.h>
15 #include <afs/param.h>
21 #include <sys/types.h>
29 #if defined(AFS_SUN5_ENV) || defined(AFS_NBSD_ENV)
30 #include <sys/fcntl.h>
31 #include <sys/resource.h>
42 #include <afs/afsint.h>
44 #include <afs/afssyscalls.h>
47 #include "viceinode.h"
48 #ifdef AFS_PTHREAD_ENV
50 #else /* AFS_PTHREAD_ENV */
51 #include "afs/assert.h"
52 #endif /* AFS_PTHREAD_ENV */
57 #define afs_stat stat64
58 #define afs_fstat fstat64
59 #else /* !O_LARGEFILE */
61 #define afs_fstat fstat
62 #endif /* !O_LARGEFILE */
63 #endif /* AFS_NT40_ENV */
65 #ifdef AFS_PTHREAD_ENV
66 pthread_once_t ih_glock_once = PTHREAD_ONCE_INIT;
67 pthread_mutex_t ih_glock_mutex;
68 #endif /* AFS_PTHREAD_ENV */
70 /* Linked list of available inode handles */
71 IHandle_t *ihAvailHead;
72 IHandle_t *ihAvailTail;
74 /* Linked list of available file descriptor handles */
75 FdHandle_t *fdAvailHead;
76 FdHandle_t *fdAvailTail;
78 /* Linked list of available stream descriptor handles */
79 StreamHandle_t *streamAvailHead;
80 StreamHandle_t *streamAvailTail;
82 /* LRU list for file descriptor handles */
83 FdHandle_t *fdLruHead;
84 FdHandle_t *fdLruTail;
88 /* Most of the servers use fopen/fdopen. Since the FILE structure
89 * only has eight bits for the file descriptor, the cache size
90 * has to be less than 256. The cache can be made larger as long
91 * as you are sure you don't need fopen/fdopen. */
92 int fdMaxCacheSize = 0;
95 /* Number of in use file descriptors */
98 /* Hash table for inode handles */
99 IHashBucket_t ihashTable[I_HANDLE_HASH_SIZE];
102 #ifdef AFS_PTHREAD_ENV
103 /* Initialize the global ihandle mutex */
107 assert(pthread_mutex_init(&ih_glock_mutex, NULL) == 0);
109 #endif /* AFS_PTHREAD_ENV */
111 /* Initialize the file descriptor cache */
118 DLL_INIT_LIST(ihAvailHead, ihAvailTail);
119 DLL_INIT_LIST(fdAvailHead, fdAvailTail);
120 DLL_INIT_LIST(fdLruHead, fdLruTail);
121 for (i = 0; i < I_HANDLE_HASH_SIZE; i++) {
122 DLL_INIT_LIST(ihashTable[i].ihash_head, ihashTable[i].ihash_tail);
124 #if defined(AFS_NT40_ENV)
125 fdMaxCacheSize = FD_MAX_CACHESIZE;
126 #elif defined(AFS_SUN5_ENV) || defined(AFS_NBSD_ENV)
129 assert(getrlimit(RLIMIT_NOFILE, &rlim) == 0);
130 rlim.rlim_cur = rlim.rlim_max;
131 assert(setrlimit(RLIMIT_NOFILE, &rlim) == 0);
132 fdMaxCacheSize = rlim.rlim_cur - FD_HANDLE_SETASIDE;
134 /* XXX this is to avoid using up all system fd netbsd is
135 * somewhat broken and have set maximum fd for a root process
136 * to the same as system fd that is avaible, so if the
137 * fileserver uses all up process fds, all system fd will be
140 * Check for this better
144 fdMaxCacheSize = MIN(fdMaxCacheSize, FD_MAX_CACHESIZE);
145 assert(fdMaxCacheSize > 0);
147 #elif defined(AFS_HPUX_ENV)
148 /* Avoid problems with "UFSOpen: igetinode failed" panics on HPUX 11.0 */
152 long fdMax = MAX(sysconf(_SC_OPEN_MAX) - FD_HANDLE_SETASIDE, 0);
153 fdMaxCacheSize = (int)MIN(fdMax, FD_MAX_CACHESIZE);
156 fdCacheSize = MIN(fdMaxCacheSize, FD_DEFAULT_CACHESIZE);
159 /* Make the file descriptor cache as big as possible. Don't this call
160 * if the program uses fopen or fdopen. */
162 ih_UseLargeCache(void)
168 fdCacheSize = fdMaxCacheSize;
173 /* Allocate a chunk of inode handles */
175 iHandleAllocateChunk(void)
180 assert(ihAvailHead == NULL);
181 ihP = (IHandle_t *) malloc(I_HANDLE_MALLOCSIZE * sizeof(IHandle_t));
183 for (i = 0; i < I_HANDLE_MALLOCSIZE; i++) {
184 ihP[i].ih_refcnt = 0;
185 DLL_INSERT_TAIL(&ihP[i], ihAvailHead, ihAvailTail, ih_next, ih_prev);
189 /* Initialize an inode handle */
191 ih_init(int dev, int vid, Inode ino)
193 int ihash = IH_HASH(dev, vid, ino);
201 /* Do we already have a handle for this Inode? */
202 for (ihP = ihashTable[ihash].ihash_head; ihP; ihP = ihP->ih_next) {
203 if (ihP->ih_ino == ino && ihP->ih_vid == vid && ihP->ih_dev == dev) {
210 /* Allocate and initialize a new Inode handle */
211 if (ihAvailHead == NULL) {
212 iHandleAllocateChunk();
215 assert(ihP->ih_refcnt == 0);
216 DLL_DELETE(ihP, ihAvailHead, ihAvailTail, ih_next, ih_prev);
222 DLL_INIT_LIST(ihP->ih_fdhead, ihP->ih_fdtail);
223 DLL_INSERT_TAIL(ihP, ihashTable[ihash].ihash_head,
224 ihashTable[ihash].ihash_tail, ih_next, ih_prev);
229 /* Copy an inode handle */
231 ih_copy(IHandle_t * ihP)
235 assert(ihP->ih_refcnt > 0);
241 /* Allocate a chunk of file descriptor handles */
243 fdHandleAllocateChunk(void)
248 assert(fdAvailHead == NULL);
249 fdP = (FdHandle_t *) malloc(FD_HANDLE_MALLOCSIZE * sizeof(FdHandle_t));
251 for (i = 0; i < FD_HANDLE_MALLOCSIZE; i++) {
252 fdP[i].fd_status = FD_HANDLE_AVAIL;
254 fdP[i].fd_fd = INVALID_FD;
255 DLL_INSERT_TAIL(&fdP[i], fdAvailHead, fdAvailTail, fd_next, fd_prev);
259 /* Allocate a chunk of stream handles */
261 streamHandleAllocateChunk(void)
264 StreamHandle_t *streamP;
266 assert(streamAvailHead == NULL);
267 streamP = (StreamHandle_t *)
268 malloc(STREAM_HANDLE_MALLOCSIZE * sizeof(StreamHandle_t));
269 assert(streamP != NULL);
270 for (i = 0; i < STREAM_HANDLE_MALLOCSIZE; i++) {
271 streamP[i].str_fd = INVALID_FD;
272 DLL_INSERT_TAIL(&streamP[i], streamAvailHead, streamAvailTail,
278 * Get a file descriptor handle given an Inode handle
281 ih_open(IHandle_t * ihP)
287 if (!ihP) /* XXX should log here in the fileserver */
292 /* Do we already have an open file handle for this Inode? */
293 for (fdP = ihP->ih_fdtail; fdP != NULL; fdP = fdP->fd_ihprev) {
294 if (fdP->fd_status != FD_HANDLE_INUSE) {
295 assert(fdP->fd_status == FD_HANDLE_OPEN);
296 fdP->fd_status = FD_HANDLE_INUSE;
297 DLL_DELETE(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
300 (void) FDH_SEEK(fdP, 0, SEEK_SET);
306 * Try to open the Inode, return NULL on error.
312 if (fd == INVALID_FD) {
318 /* fdCacheSize limits the size of the descriptor cache, but
319 * we permit the number of open files to exceed fdCacheSize.
320 * We only recycle open file descriptors when the number
321 * of open files reaches the size of the cache */
322 if (fdInUseCount > fdCacheSize && fdLruHead != NULL) {
324 assert(fdP->fd_status == FD_HANDLE_OPEN);
325 DLL_DELETE(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
326 DLL_DELETE(fdP, fdP->fd_ih->ih_fdhead, fdP->fd_ih->ih_fdtail,
327 fd_ihnext, fd_ihprev);
328 closeFd = fdP->fd_fd;
330 if (fdAvailHead == NULL) {
331 fdHandleAllocateChunk();
334 assert(fdP->fd_status == FD_HANDLE_AVAIL);
335 DLL_DELETE(fdP, fdAvailHead, fdAvailTail, fd_next, fd_prev);
336 closeFd = INVALID_FD;
339 fdP->fd_status = FD_HANDLE_INUSE;
345 /* Add this handle to the Inode's list of open descriptors */
346 DLL_INSERT_TAIL(fdP, ihP->ih_fdhead, ihP->ih_fdtail, fd_ihnext,
349 if (closeFd != INVALID_FD) {
361 * Return a file descriptor handle to the cache
364 fd_close(FdHandle_t * fdP)
373 assert(fdInUseCount > 0);
374 assert(fdP->fd_status == FD_HANDLE_INUSE);
378 /* Call fd_reallyclose to really close the unused file handles if
379 * the previous attempt to close (ih_reallyclose()) all file handles
380 * failed (this is determined by checking the ihandle for the flag
381 * IH_REALLY_CLOSED) or we have too many open files.
383 if (ihP->ih_flags & IH_REALLY_CLOSED || fdInUseCount > fdCacheSize) {
385 return fd_reallyclose(fdP);
388 /* Put this descriptor back into the cache */
389 fdP->fd_status = FD_HANDLE_OPEN;
390 DLL_INSERT_TAIL(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
392 /* If this is not the only reference to the Inode then we can decrement
393 * the reference count, otherwise we need to call ih_release.
395 if (ihP->ih_refcnt > 1) {
407 * Actually close the file descriptor handle and return it to
411 fd_reallyclose(FdHandle_t * fdP)
421 assert(fdInUseCount > 0);
422 assert(fdP->fd_status == FD_HANDLE_INUSE);
425 closeFd = fdP->fd_fd;
427 DLL_DELETE(fdP, ihP->ih_fdhead, ihP->ih_fdtail, fd_ihnext, fd_ihprev);
428 DLL_INSERT_TAIL(fdP, fdAvailHead, fdAvailTail, fd_next, fd_prev);
430 fdP->fd_status = FD_HANDLE_AVAIL;
432 fdP->fd_fd = INVALID_FD;
434 /* All the file descriptor handles have been closed; reset
435 * the IH_REALLY_CLOSED flag indicating that ih_reallyclose
436 * has completed its job.
438 if (!ihP->ih_fdhead) {
439 ihP->ih_flags &= ~IH_REALLY_CLOSED;
447 /* If this is not the only reference to the Inode then we can decrement
448 * the reference count, otherwise we need to call ih_release. */
449 if (ihP->ih_refcnt > 1) {
460 /* Enable buffered I/O on a file descriptor */
462 stream_fdopen(FD_t fd)
464 StreamHandle_t *streamP;
467 if (streamAvailHead == NULL) {
468 streamHandleAllocateChunk();
470 streamP = streamAvailHead;
471 DLL_DELETE(streamP, streamAvailHead, streamAvailTail, str_next, str_prev);
473 streamP->str_fd = fd;
474 streamP->str_buflen = 0;
475 streamP->str_bufoff = 0;
476 streamP->str_error = 0;
477 streamP->str_eof = 0;
478 streamP->str_direction = STREAM_DIRECTION_NONE;
482 /* Open a file for buffered I/O */
484 stream_open(const char *filename, const char *mode)
488 if (strcmp(mode, "r") == 0) {
489 fd = OS_OPEN(filename, O_RDONLY, 0);
490 } else if (strcmp(mode, "r+") == 0) {
491 fd = OS_OPEN(filename, O_RDWR, 0);
492 } else if (strcmp(mode, "w") == 0) {
493 fd = OS_OPEN(filename, O_WRONLY | O_TRUNC | O_CREAT, 0);
494 } else if (strcmp(mode, "w+") == 0) {
495 fd = OS_OPEN(filename, O_RDWR | O_TRUNC | O_CREAT, 0);
496 } else if (strcmp(mode, "a") == 0) {
497 fd = OS_OPEN(filename, O_WRONLY | O_APPEND | O_CREAT, 0);
498 } else if (strcmp(mode, "a+") == 0) {
499 fd = OS_OPEN(filename, O_RDWR | O_APPEND | O_CREAT, 0);
501 assert(FALSE); /* not implemented */
504 if (fd == INVALID_FD) {
507 return stream_fdopen(fd);
510 /* fread for buffered I/O handles */
512 stream_read(void *ptr, afs_fsize_t size, afs_fsize_t nitems,
513 StreamHandle_t * streamP)
515 afs_fsize_t nbytes, bytesRead, bytesToRead;
518 /* Need to seek before changing direction */
519 if (streamP->str_direction == STREAM_DIRECTION_NONE) {
520 streamP->str_direction = STREAM_DIRECTION_READ;
521 streamP->str_bufoff = 0;
522 streamP->str_buflen = 0;
524 assert(streamP->str_direction == STREAM_DIRECTION_READ);
528 nbytes = size * nitems;
530 while (nbytes > 0 && !streamP->str_eof) {
531 if (streamP->str_buflen == 0) {
532 streamP->str_bufoff = 0;
533 streamP->str_buflen =
534 OS_READ(streamP->str_fd, streamP->str_buffer,
535 STREAM_HANDLE_BUFSIZE);
536 if (streamP->str_buflen < 0) {
537 streamP->str_error = errno;
538 streamP->str_buflen = 0;
541 } else if (streamP->str_buflen == 0) {
542 streamP->str_eof = 1;
547 bytesToRead = nbytes;
548 if (bytesToRead > streamP->str_buflen) {
549 bytesToRead = streamP->str_buflen;
551 memcpy(p, streamP->str_buffer + streamP->str_bufoff, bytesToRead);
553 streamP->str_bufoff += bytesToRead;
554 streamP->str_buflen -= bytesToRead;
555 bytesRead += bytesToRead;
556 nbytes -= bytesToRead;
559 return (bytesRead / size);
562 /* fwrite for buffered I/O handles */
564 stream_write(void *ptr, afs_fsize_t size, afs_fsize_t nitems,
565 StreamHandle_t * streamP)
569 afs_fsize_t nbytes, bytesWritten, bytesToWrite;
571 /* Need to seek before changing direction */
572 if (streamP->str_direction == STREAM_DIRECTION_NONE) {
573 streamP->str_direction = STREAM_DIRECTION_WRITE;
574 streamP->str_bufoff = 0;
575 streamP->str_buflen = STREAM_HANDLE_BUFSIZE;
577 assert(streamP->str_direction == STREAM_DIRECTION_WRITE);
580 nbytes = size * nitems;
584 if (streamP->str_buflen == 0) {
585 rc = OS_WRITE(streamP->str_fd, streamP->str_buffer,
586 STREAM_HANDLE_BUFSIZE);
588 streamP->str_error = errno;
592 streamP->str_bufoff = 0;
593 streamP->str_buflen = STREAM_HANDLE_BUFSIZE;
596 bytesToWrite = nbytes;
597 if (bytesToWrite > streamP->str_buflen) {
598 bytesToWrite = streamP->str_buflen;
600 memcpy(streamP->str_buffer + streamP->str_bufoff, p, bytesToWrite);
602 streamP->str_bufoff += bytesToWrite;
603 streamP->str_buflen -= bytesToWrite;
604 bytesWritten += bytesToWrite;
605 nbytes -= bytesToWrite;
608 return (bytesWritten / size);
611 /* fseek for buffered I/O handles */
613 stream_seek(StreamHandle_t * streamP, afs_foff_t offset, int whence)
618 if (streamP->str_direction == STREAM_DIRECTION_WRITE
619 && streamP->str_bufoff > 0) {
620 rc = OS_WRITE(streamP->str_fd, streamP->str_buffer,
621 streamP->str_bufoff);
623 streamP->str_error = errno;
627 streamP->str_bufoff = 0;
628 streamP->str_buflen = 0;
629 streamP->str_eof = 0;
630 streamP->str_direction = STREAM_DIRECTION_NONE;
631 if (OS_SEEK(streamP->str_fd, offset, whence) < 0) {
632 streamP->str_error = errno;
638 /* fflush for buffered I/O handles */
640 stream_flush(StreamHandle_t * streamP)
645 if (streamP->str_direction == STREAM_DIRECTION_WRITE
646 && streamP->str_bufoff > 0) {
647 rc = OS_WRITE(streamP->str_fd, streamP->str_buffer,
648 streamP->str_bufoff);
650 streamP->str_error = errno;
653 streamP->str_bufoff = 0;
654 streamP->str_buflen = STREAM_HANDLE_BUFSIZE;
660 /* Free a buffered I/O handle */
662 stream_close(StreamHandle_t * streamP, int reallyClose)
667 assert(streamP != NULL);
668 if (streamP->str_direction == STREAM_DIRECTION_WRITE
669 && streamP->str_bufoff > 0) {
670 rc = OS_WRITE(streamP->str_fd, streamP->str_buffer,
671 streamP->str_bufoff);
677 rc = OS_CLOSE(streamP->str_fd);
682 streamP->str_fd = INVALID_FD;
685 DLL_INSERT_TAIL(streamP, streamAvailHead, streamAvailTail,
691 /* Close all unused file descriptors associated with the inode
692 * handle. Called with IH_LOCK held. May drop and reacquire
693 * IH_LOCK. Sets the IH_REALLY_CLOSED flag in the inode handle
694 * if it fails to close all file handles.
697 ih_fdclose(IHandle_t * ihP)
699 int closeCount, closedAll;
700 FdHandle_t *fdP, *head, *tail, *next;
702 assert(ihP->ih_refcnt > 0);
705 DLL_INIT_LIST(head, tail);
706 ihP->ih_flags &= ~IH_REALLY_CLOSED;
709 * Remove the file descriptors for this Inode from the LRU queue
710 * and the IHandle queue and put them on a temporary queue so we
711 * can drop the lock before we close the files.
713 for (fdP = ihP->ih_fdhead; fdP != NULL; fdP = next) {
714 next = fdP->fd_ihnext;
715 assert(fdP->fd_ih == ihP);
716 assert(fdP->fd_status == FD_HANDLE_OPEN
717 || fdP->fd_status == FD_HANDLE_INUSE);
718 if (fdP->fd_status == FD_HANDLE_OPEN) {
719 DLL_DELETE(fdP, ihP->ih_fdhead, ihP->ih_fdtail, fd_ihnext,
721 DLL_DELETE(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
722 DLL_INSERT_TAIL(fdP, head, tail, fd_next, fd_prev);
725 ihP->ih_flags |= IH_REALLY_CLOSED;
729 /* If the ihandle reference count is 1, we should have
730 * closed all file descriptors.
732 if (ihP->ih_refcnt == 1 || closedAll) {
734 assert(!ihP->ih_fdhead);
735 assert(!ihP->ih_fdtail);
739 return 0; /* No file descriptors closed */
744 * Close the file descriptors
747 for (fdP = head; fdP != NULL; fdP = fdP->fd_next) {
748 OS_CLOSE(fdP->fd_fd);
749 fdP->fd_status = FD_HANDLE_AVAIL;
750 fdP->fd_fd = INVALID_FD;
756 assert(fdInUseCount >= closeCount);
757 fdInUseCount -= closeCount;
760 * Append the temporary queue to the list of available descriptors
762 if (fdAvailHead == NULL) {
766 fdAvailTail->fd_next = head;
767 head->fd_prev = fdAvailTail;
774 /* Close all cached file descriptors for this inode. */
776 ih_reallyclose(IHandle_t * ihP)
782 assert(ihP->ih_refcnt > 0);
789 /* Release an Inode handle. All cached file descriptors for this
790 * inode are closed when the last reference to this handle is released
793 ih_release(IHandle_t * ihP)
801 assert(ihP->ih_refcnt > 0);
803 if (ihP->ih_refcnt > 1) {
809 ihash = IH_HASH(ihP->ih_dev, ihP->ih_vid, ihP->ih_ino);
810 DLL_DELETE(ihP, ihashTable[ihash].ihash_head,
811 ihashTable[ihash].ihash_tail, ih_next, ih_prev);
817 DLL_INSERT_TAIL(ihP, ihAvailHead, ihAvailTail, ih_next, ih_prev);
823 /* Sync an inode to disk if its handle isn't NULL */
825 ih_condsync(IHandle_t * ihP)
837 code = FDH_SYNC(fdP);
845 /*************************************************************************
846 * OS specific support routines.
847 *************************************************************************/
848 #ifndef AFS_NAMEI_ENV
850 ih_icreate(IHandle_t * ih, int dev, char *part, Inode nI, int p1, int p2,
855 /* See viceinode.h */
856 if (p2 == INODESPECIAL) {
862 ino = ICREATE(dev, part, nI, p1, p2, p3, p4);
865 #endif /* AFS_NAMEI_ENV */
872 struct afs_stat status;
873 if (afs_fstat(fd, &status) < 0)
875 return status.st_size;