2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 /* ihandle.c - file descriptor cacheing for Inode handles. */
12 /************************************************************************/
14 #include <afsconfig.h>
15 #include <afs/param.h>
21 #include <sys/types.h>
30 #if defined(AFS_SUN5_ENV) || defined(AFS_NBSD_ENV)
31 #include <sys/fcntl.h>
32 #include <sys/resource.h>
37 #include <afs/afsint.h>
39 #include <afs/afssyscalls.h>
42 #include "viceinode.h"
43 #ifdef AFS_PTHREAD_ENV
45 #else /* AFS_PTHREAD_ENV */
46 #include "afs/assert.h"
47 #endif /* AFS_PTHREAD_ENV */
52 #define afs_stat stat64
53 #define afs_fstat fstat64
54 #else /* !O_LARGEFILE */
56 #define afs_fstat fstat
57 #endif /* !O_LARGEFILE */
58 #endif /* AFS_NT40_ENV */
60 #ifdef AFS_PTHREAD_ENV
61 pthread_once_t ih_glock_once = PTHREAD_ONCE_INIT;
62 pthread_mutex_t ih_glock_mutex;
63 #endif /* AFS_PTHREAD_ENV */
65 /* Linked list of available inode handles */
66 IHandle_t *ihAvailHead;
67 IHandle_t *ihAvailTail;
69 /* Linked list of available file descriptor handles */
70 FdHandle_t *fdAvailHead;
71 FdHandle_t *fdAvailTail;
73 /* Linked list of available stream descriptor handles */
74 StreamHandle_t *streamAvailHead;
75 StreamHandle_t *streamAvailTail;
77 /* LRU list for file descriptor handles */
78 FdHandle_t *fdLruHead;
79 FdHandle_t *fdLruTail;
83 /* Most of the servers use fopen/fdopen. Since the FILE structure
84 * only has eight bits for the file descriptor, the cache size
85 * has to be less than 256. The cache can be made larger as long
86 * as you are sure you don't need fopen/fdopen. */
87 int fdMaxCacheSize = 0;
90 /* Number of in use file descriptors */
93 /* Hash table for inode handles */
94 IHashBucket_t ihashTable[I_HANDLE_HASH_SIZE];
97 #ifdef AFS_PTHREAD_ENV
98 /* Initialize the global ihandle mutex */
102 assert(pthread_mutex_init(&ih_glock_mutex, NULL) == 0);
104 #endif /* AFS_PTHREAD_ENV */
106 /* Initialize the file descriptor cache */
113 DLL_INIT_LIST(ihAvailHead, ihAvailTail);
114 DLL_INIT_LIST(fdAvailHead, fdAvailTail);
115 DLL_INIT_LIST(fdLruHead, fdLruTail);
116 for (i = 0; i < I_HANDLE_HASH_SIZE; i++) {
117 DLL_INIT_LIST(ihashTable[i].ihash_head, ihashTable[i].ihash_tail);
119 #if defined(AFS_NT40_ENV)
120 fdMaxCacheSize = FD_MAX_CACHESIZE;
121 #elif defined(AFS_SUN5_ENV) || defined(AFS_NBSD_ENV)
124 assert(getrlimit(RLIMIT_NOFILE, &rlim) == 0);
125 rlim.rlim_cur = rlim.rlim_max;
126 assert(setrlimit(RLIMIT_NOFILE, &rlim) == 0);
127 fdMaxCacheSize = rlim.rlim_cur - FD_HANDLE_SETASIDE;
129 /* XXX this is to avoid using up all system fd netbsd is
130 * somewhat broken and have set maximum fd for a root process
131 * to the same as system fd that is avaible, so if the
132 * fileserver uses all up process fds, all system fd will be
135 * Check for this better
139 fdMaxCacheSize = MIN(fdMaxCacheSize, FD_MAX_CACHESIZE);
140 assert(fdMaxCacheSize > 0);
142 #elif defined(AFS_HPUX_ENV)
143 /* Avoid problems with "UFSOpen: igetinode failed" panics on HPUX 11.0 */
147 long fdMax = MAX(sysconf(_SC_OPEN_MAX) - FD_HANDLE_SETASIDE, 0);
148 fdMaxCacheSize = (int)MIN(fdMax, FD_MAX_CACHESIZE);
151 fdCacheSize = MIN(fdMaxCacheSize, FD_DEFAULT_CACHESIZE);
154 void *ih_sync_thread();
155 #ifdef AFS_PTHREAD_ENV
157 pthread_attr_t tattr;
159 pthread_attr_init(&tattr);
160 pthread_attr_setdetachstate(&tattr,PTHREAD_CREATE_DETACHED);
162 pthread_create(&syncer, &tattr, ih_sync_thread, NULL);
163 #else /* AFS_PTHREAD_ENV */
165 LWP_CreateProcess(ih_sync_thread, 16*1024, LWP_MAX_PRIORITY - 2,
166 NULL, "ih_syncer", &syncer);
167 #endif /* AFS_PTHREAD_ENV */
172 /* Make the file descriptor cache as big as possible. Don't this call
173 * if the program uses fopen or fdopen. */
175 ih_UseLargeCache(void)
181 fdCacheSize = fdMaxCacheSize;
186 /* Allocate a chunk of inode handles */
188 iHandleAllocateChunk(void)
193 assert(ihAvailHead == NULL);
194 ihP = (IHandle_t *) malloc(I_HANDLE_MALLOCSIZE * sizeof(IHandle_t));
196 for (i = 0; i < I_HANDLE_MALLOCSIZE; i++) {
197 ihP[i].ih_refcnt = 0;
198 DLL_INSERT_TAIL(&ihP[i], ihAvailHead, ihAvailTail, ih_next, ih_prev);
202 /* Initialize an inode handle */
204 ih_init(int dev, int vid, Inode ino)
206 int ihash = IH_HASH(dev, vid, ino);
214 /* Do we already have a handle for this Inode? */
215 for (ihP = ihashTable[ihash].ihash_head; ihP; ihP = ihP->ih_next) {
216 if (ihP->ih_ino == ino && ihP->ih_vid == vid && ihP->ih_dev == dev) {
223 /* Allocate and initialize a new Inode handle */
224 if (ihAvailHead == NULL) {
225 iHandleAllocateChunk();
228 assert(ihP->ih_refcnt == 0);
229 DLL_DELETE(ihP, ihAvailHead, ihAvailTail, ih_next, ih_prev);
235 DLL_INIT_LIST(ihP->ih_fdhead, ihP->ih_fdtail);
236 DLL_INSERT_TAIL(ihP, ihashTable[ihash].ihash_head,
237 ihashTable[ihash].ihash_tail, ih_next, ih_prev);
242 /* Copy an inode handle */
244 ih_copy(IHandle_t * ihP)
248 assert(ihP->ih_refcnt > 0);
254 /* Allocate a chunk of file descriptor handles */
256 fdHandleAllocateChunk(void)
261 assert(fdAvailHead == NULL);
262 fdP = (FdHandle_t *) malloc(FD_HANDLE_MALLOCSIZE * sizeof(FdHandle_t));
264 for (i = 0; i < FD_HANDLE_MALLOCSIZE; i++) {
265 fdP[i].fd_status = FD_HANDLE_AVAIL;
267 fdP[i].fd_fd = INVALID_FD;
268 DLL_INSERT_TAIL(&fdP[i], fdAvailHead, fdAvailTail, fd_next, fd_prev);
272 /* Allocate a chunk of stream handles */
274 streamHandleAllocateChunk(void)
277 StreamHandle_t *streamP;
279 assert(streamAvailHead == NULL);
280 streamP = (StreamHandle_t *)
281 malloc(STREAM_HANDLE_MALLOCSIZE * sizeof(StreamHandle_t));
282 assert(streamP != NULL);
283 for (i = 0; i < STREAM_HANDLE_MALLOCSIZE; i++) {
284 streamP[i].str_fd = INVALID_FD;
285 DLL_INSERT_TAIL(&streamP[i], streamAvailHead, streamAvailTail,
291 * Get a file descriptor handle given an Inode handle
294 ih_open(IHandle_t * ihP)
300 if (!ihP) /* XXX should log here in the fileserver */
305 /* Do we already have an open file handle for this Inode? */
306 for (fdP = ihP->ih_fdtail; fdP != NULL; fdP = fdP->fd_ihprev) {
307 if (fdP->fd_status != FD_HANDLE_INUSE) {
308 assert(fdP->fd_status == FD_HANDLE_OPEN);
309 fdP->fd_status = FD_HANDLE_INUSE;
310 DLL_DELETE(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
313 (void)FDH_SEEK(fdP, 0, SEEK_SET);
319 * Try to open the Inode, return NULL on error.
325 if (fd == INVALID_FD) {
331 /* fdCacheSize limits the size of the descriptor cache, but
332 * we permit the number of open files to exceed fdCacheSize.
333 * We only recycle open file descriptors when the number
334 * of open files reaches the size of the cache */
335 if (fdInUseCount > fdCacheSize && fdLruHead != NULL) {
337 assert(fdP->fd_status == FD_HANDLE_OPEN);
338 DLL_DELETE(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
339 DLL_DELETE(fdP, fdP->fd_ih->ih_fdhead, fdP->fd_ih->ih_fdtail,
340 fd_ihnext, fd_ihprev);
341 closeFd = fdP->fd_fd;
343 if (fdAvailHead == NULL) {
344 fdHandleAllocateChunk();
347 assert(fdP->fd_status == FD_HANDLE_AVAIL);
348 DLL_DELETE(fdP, fdAvailHead, fdAvailTail, fd_next, fd_prev);
349 closeFd = INVALID_FD;
352 fdP->fd_status = FD_HANDLE_INUSE;
358 /* Add this handle to the Inode's list of open descriptors */
359 DLL_INSERT_TAIL(fdP, ihP->ih_fdhead, ihP->ih_fdtail, fd_ihnext,
362 if (closeFd != INVALID_FD) {
374 * Return a file descriptor handle to the cache
377 fd_close(FdHandle_t * fdP)
386 assert(fdInUseCount > 0);
387 assert(fdP->fd_status == FD_HANDLE_INUSE);
391 /* Call fd_reallyclose to really close the unused file handles if
392 * the previous attempt to close (ih_reallyclose()) all file handles
393 * failed (this is determined by checking the ihandle for the flag
394 * IH_REALLY_CLOSED) or we have too many open files.
396 if (ihP->ih_flags & IH_REALLY_CLOSED || fdInUseCount > fdCacheSize) {
398 return fd_reallyclose(fdP);
401 /* Put this descriptor back into the cache */
402 fdP->fd_status = FD_HANDLE_OPEN;
403 DLL_INSERT_TAIL(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
405 /* If this is not the only reference to the Inode then we can decrement
406 * the reference count, otherwise we need to call ih_release.
408 if (ihP->ih_refcnt > 1) {
420 * Actually close the file descriptor handle and return it to
424 fd_reallyclose(FdHandle_t * fdP)
434 assert(fdInUseCount > 0);
435 assert(fdP->fd_status == FD_HANDLE_INUSE);
438 closeFd = fdP->fd_fd;
440 DLL_DELETE(fdP, ihP->ih_fdhead, ihP->ih_fdtail, fd_ihnext, fd_ihprev);
441 DLL_INSERT_TAIL(fdP, fdAvailHead, fdAvailTail, fd_next, fd_prev);
443 fdP->fd_status = FD_HANDLE_AVAIL;
445 fdP->fd_fd = INVALID_FD;
447 /* All the file descriptor handles have been closed; reset
448 * the IH_REALLY_CLOSED flag indicating that ih_reallyclose
449 * has completed its job.
451 if (!ihP->ih_fdhead) {
452 ihP->ih_flags &= ~IH_REALLY_CLOSED;
460 /* If this is not the only reference to the Inode then we can decrement
461 * the reference count, otherwise we need to call ih_release. */
462 if (ihP->ih_refcnt > 1) {
473 /* Enable buffered I/O on a file descriptor */
475 stream_fdopen(FD_t fd)
477 StreamHandle_t *streamP;
480 if (streamAvailHead == NULL) {
481 streamHandleAllocateChunk();
483 streamP = streamAvailHead;
484 DLL_DELETE(streamP, streamAvailHead, streamAvailTail, str_next, str_prev);
486 streamP->str_fd = fd;
487 streamP->str_buflen = 0;
488 streamP->str_bufoff = 0;
489 streamP->str_error = 0;
490 streamP->str_eof = 0;
491 streamP->str_direction = STREAM_DIRECTION_NONE;
495 /* Open a file for buffered I/O */
497 stream_open(const char *filename, const char *mode)
501 if (strcmp(mode, "r") == 0) {
502 fd = OS_OPEN(filename, O_RDONLY, 0);
503 } else if (strcmp(mode, "r+") == 0) {
504 fd = OS_OPEN(filename, O_RDWR, 0);
505 } else if (strcmp(mode, "w") == 0) {
506 fd = OS_OPEN(filename, O_WRONLY | O_TRUNC | O_CREAT, 0);
507 } else if (strcmp(mode, "w+") == 0) {
508 fd = OS_OPEN(filename, O_RDWR | O_TRUNC | O_CREAT, 0);
509 } else if (strcmp(mode, "a") == 0) {
510 fd = OS_OPEN(filename, O_WRONLY | O_APPEND | O_CREAT, 0);
511 } else if (strcmp(mode, "a+") == 0) {
512 fd = OS_OPEN(filename, O_RDWR | O_APPEND | O_CREAT, 0);
514 assert(FALSE); /* not implemented */
517 if (fd == INVALID_FD) {
520 return stream_fdopen(fd);
523 /* fread for buffered I/O handles */
525 stream_read(void *ptr, afs_fsize_t size, afs_fsize_t nitems,
526 StreamHandle_t * streamP)
528 afs_fsize_t nbytes, bytesRead, bytesToRead;
531 /* Need to seek before changing direction */
532 if (streamP->str_direction == STREAM_DIRECTION_NONE) {
533 streamP->str_direction = STREAM_DIRECTION_READ;
534 streamP->str_bufoff = 0;
535 streamP->str_buflen = 0;
537 assert(streamP->str_direction == STREAM_DIRECTION_READ);
541 nbytes = size * nitems;
543 while (nbytes > 0 && !streamP->str_eof) {
544 if (streamP->str_buflen == 0) {
545 streamP->str_bufoff = 0;
546 streamP->str_buflen =
547 OS_READ(streamP->str_fd, streamP->str_buffer,
548 STREAM_HANDLE_BUFSIZE);
549 if (streamP->str_buflen < 0) {
550 streamP->str_error = errno;
551 streamP->str_buflen = 0;
554 } else if (streamP->str_buflen == 0) {
555 streamP->str_eof = 1;
560 bytesToRead = nbytes;
561 if (bytesToRead > streamP->str_buflen) {
562 bytesToRead = streamP->str_buflen;
564 memcpy(p, streamP->str_buffer + streamP->str_bufoff, bytesToRead);
566 streamP->str_bufoff += bytesToRead;
567 streamP->str_buflen -= bytesToRead;
568 bytesRead += bytesToRead;
569 nbytes -= bytesToRead;
572 return (bytesRead / size);
575 /* fwrite for buffered I/O handles */
577 stream_write(void *ptr, afs_fsize_t size, afs_fsize_t nitems,
578 StreamHandle_t * streamP)
582 afs_fsize_t nbytes, bytesWritten, bytesToWrite;
584 /* Need to seek before changing direction */
585 if (streamP->str_direction == STREAM_DIRECTION_NONE) {
586 streamP->str_direction = STREAM_DIRECTION_WRITE;
587 streamP->str_bufoff = 0;
588 streamP->str_buflen = STREAM_HANDLE_BUFSIZE;
590 assert(streamP->str_direction == STREAM_DIRECTION_WRITE);
593 nbytes = size * nitems;
597 if (streamP->str_buflen == 0) {
598 rc = OS_WRITE(streamP->str_fd, streamP->str_buffer,
599 STREAM_HANDLE_BUFSIZE);
601 streamP->str_error = errno;
605 streamP->str_bufoff = 0;
606 streamP->str_buflen = STREAM_HANDLE_BUFSIZE;
609 bytesToWrite = nbytes;
610 if (bytesToWrite > streamP->str_buflen) {
611 bytesToWrite = streamP->str_buflen;
613 memcpy(streamP->str_buffer + streamP->str_bufoff, p, bytesToWrite);
615 streamP->str_bufoff += bytesToWrite;
616 streamP->str_buflen -= bytesToWrite;
617 bytesWritten += bytesToWrite;
618 nbytes -= bytesToWrite;
621 return (bytesWritten / size);
624 /* fseek for buffered I/O handles */
626 stream_seek(StreamHandle_t * streamP, afs_foff_t offset, int whence)
631 if (streamP->str_direction == STREAM_DIRECTION_WRITE
632 && streamP->str_bufoff > 0) {
633 rc = OS_WRITE(streamP->str_fd, streamP->str_buffer,
634 streamP->str_bufoff);
636 streamP->str_error = errno;
640 streamP->str_bufoff = 0;
641 streamP->str_buflen = 0;
642 streamP->str_eof = 0;
643 streamP->str_direction = STREAM_DIRECTION_NONE;
644 if (OS_SEEK(streamP->str_fd, offset, whence) < 0) {
645 streamP->str_error = errno;
651 /* fflush for buffered I/O handles */
653 stream_flush(StreamHandle_t * streamP)
658 if (streamP->str_direction == STREAM_DIRECTION_WRITE
659 && streamP->str_bufoff > 0) {
660 rc = OS_WRITE(streamP->str_fd, streamP->str_buffer,
661 streamP->str_bufoff);
663 streamP->str_error = errno;
666 streamP->str_bufoff = 0;
667 streamP->str_buflen = STREAM_HANDLE_BUFSIZE;
673 /* Free a buffered I/O handle */
675 stream_close(StreamHandle_t * streamP, int reallyClose)
680 assert(streamP != NULL);
681 if (streamP->str_direction == STREAM_DIRECTION_WRITE
682 && streamP->str_bufoff > 0) {
683 rc = OS_WRITE(streamP->str_fd, streamP->str_buffer,
684 streamP->str_bufoff);
690 rc = OS_CLOSE(streamP->str_fd);
695 streamP->str_fd = INVALID_FD;
698 DLL_INSERT_TAIL(streamP, streamAvailHead, streamAvailTail,
704 /* Close all unused file descriptors associated with the inode
705 * handle. Called with IH_LOCK held. May drop and reacquire
706 * IH_LOCK. Sets the IH_REALLY_CLOSED flag in the inode handle
707 * if it fails to close all file handles.
710 ih_fdclose(IHandle_t * ihP)
712 int closeCount, closedAll;
713 FdHandle_t *fdP, *head, *tail, *next;
715 assert(ihP->ih_refcnt > 0);
718 DLL_INIT_LIST(head, tail);
719 ihP->ih_flags &= ~IH_REALLY_CLOSED;
722 * Remove the file descriptors for this Inode from the LRU queue
723 * and the IHandle queue and put them on a temporary queue so we
724 * can drop the lock before we close the files.
726 for (fdP = ihP->ih_fdhead; fdP != NULL; fdP = next) {
727 next = fdP->fd_ihnext;
728 assert(fdP->fd_ih == ihP);
729 assert(fdP->fd_status == FD_HANDLE_OPEN
730 || fdP->fd_status == FD_HANDLE_INUSE);
731 if (fdP->fd_status == FD_HANDLE_OPEN) {
732 DLL_DELETE(fdP, ihP->ih_fdhead, ihP->ih_fdtail, fd_ihnext,
734 DLL_DELETE(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
735 DLL_INSERT_TAIL(fdP, head, tail, fd_next, fd_prev);
738 ihP->ih_flags |= IH_REALLY_CLOSED;
742 /* If the ihandle reference count is 1, we should have
743 * closed all file descriptors.
745 if (ihP->ih_refcnt == 1 || closedAll) {
747 assert(!ihP->ih_fdhead);
748 assert(!ihP->ih_fdtail);
752 return 0; /* No file descriptors closed */
757 * Close the file descriptors
760 for (fdP = head; fdP != NULL; fdP = fdP->fd_next) {
761 OS_CLOSE(fdP->fd_fd);
762 fdP->fd_status = FD_HANDLE_AVAIL;
763 fdP->fd_fd = INVALID_FD;
769 assert(fdInUseCount >= closeCount);
770 fdInUseCount -= closeCount;
773 * Append the temporary queue to the list of available descriptors
775 if (fdAvailHead == NULL) {
779 fdAvailTail->fd_next = head;
780 head->fd_prev = fdAvailTail;
787 /* Close all cached file descriptors for this inode. */
789 ih_reallyclose(IHandle_t * ihP)
795 assert(ihP->ih_refcnt > 0);
802 /* Release an Inode handle. All cached file descriptors for this
803 * inode are closed when the last reference to this handle is released
806 ih_release(IHandle_t * ihP)
814 assert(ihP->ih_refcnt > 0);
816 if (ihP->ih_refcnt > 1) {
822 ihash = IH_HASH(ihP->ih_dev, ihP->ih_vid, ihP->ih_ino);
823 DLL_DELETE(ihP, ihashTable[ihash].ihash_head,
824 ihashTable[ihash].ihash_tail, ih_next, ih_prev);
830 DLL_INSERT_TAIL(ihP, ihAvailHead, ihAvailTail, ih_next, ih_prev);
836 /* Sync an inode to disk if its handle isn't NULL */
838 ih_condsync(IHandle_t * ihP)
850 code = FDH_SYNC(fdP);
861 for (ihash = 0; ihash < I_HANDLE_HASH_SIZE; ihash++) {
862 IHandle_t *ihP, *ihPnext;
864 ihP = ihashTable[ihash].ihash_head;
866 ihP->ih_refcnt++; /* must not disappear over unlock */
867 for (; ihP; ihP = ihPnext) {
869 if (ihP->ih_synced) {
876 if (fdP) OS_SYNC(fdP->fd_fd);
882 /* when decrementing the refcount, the ihandle might disappear
883 and we might not even be able to proceed to the next one.
884 Hence the gymnastics putting a hold on the next one already */
885 ihPnext = ihP->ih_next;
886 if (ihPnext) ihPnext->ih_refcnt++;
888 if (ihP->ih_refcnt > 1) {
905 #ifdef AFS_PTHREAD_ENV
907 #else /* AFS_PTHREAD_ENV */
909 #endif /* AFS_PTHREAD_ENV */
919 /*************************************************************************
920 * OS specific support routines.
921 *************************************************************************/
922 #ifndef AFS_NAMEI_ENV
924 ih_icreate(IHandle_t * ih, int dev, char *part, Inode nI, int p1, int p2,
929 /* See viceinode.h */
930 if (p2 == INODESPECIAL) {
936 ino = ICREATE(dev, part, nI, p1, p2, p3, p4);
939 #endif /* AFS_NAMEI_ENV */
946 struct afs_stat status;
947 if (afs_fstat(fd, &status) < 0)
949 return status.st_size;