2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 /* ihandle.c - file descriptor cacheing for Inode handles. */
12 /************************************************************************/
14 #include <afsconfig.h>
15 #include <afs/param.h>
19 #include <sys/types.h>
28 #if defined(AFS_SUN5_ENV) || defined(AFS_NBSD_ENV)
29 #include <sys/fcntl.h>
30 #include <sys/resource.h>
35 #include <afs/afsint.h>
37 #include <afs/afssyscalls.h>
40 #include "viceinode.h"
41 #ifdef AFS_PTHREAD_ENV
43 #else /* AFS_PTHREAD_ENV */
44 #include "afs/assert.h"
45 #endif /* AFS_PTHREAD_ENV */
50 #define afs_stat stat64
51 #define afs_fstat fstat64
52 #else /* !O_LARGEFILE */
54 #define afs_fstat fstat
55 #endif /* !O_LARGEFILE */
56 #endif /* AFS_NT40_ENV */
58 #ifdef AFS_PTHREAD_ENV
59 pthread_once_t ih_glock_once = PTHREAD_ONCE_INIT;
60 pthread_mutex_t ih_glock_mutex;
61 #endif /* AFS_PTHREAD_ENV */
63 /* Linked list of available inode handles */
64 IHandle_t *ihAvailHead;
65 IHandle_t *ihAvailTail;
67 /* Linked list of available file descriptor handles */
68 FdHandle_t *fdAvailHead;
69 FdHandle_t *fdAvailTail;
71 /* Linked list of available stream descriptor handles */
72 StreamHandle_t *streamAvailHead;
73 StreamHandle_t *streamAvailTail;
75 /* LRU list for file descriptor handles */
76 FdHandle_t *fdLruHead;
77 FdHandle_t *fdLruTail;
81 /* Most of the servers use fopen/fdopen. Since the FILE structure
82 * only has eight bits for the file descriptor, the cache size
83 * has to be less than 256. The cache can be made larger as long
84 * as you are sure you don't need fopen/fdopen. */
85 int fdMaxCacheSize = 0;
88 /* Number of in use file descriptors */
91 /* Hash table for inode handles */
92 IHashBucket_t ihashTable[I_HANDLE_HASH_SIZE];
94 void *ih_sync_thread(void *);
96 #ifdef AFS_PTHREAD_ENV
97 /* Initialize the global ihandle mutex */
101 assert(pthread_mutex_init(&ih_glock_mutex, NULL) == 0);
103 #endif /* AFS_PTHREAD_ENV */
105 /* Initialize the file descriptor cache */
112 DLL_INIT_LIST(ihAvailHead, ihAvailTail);
113 DLL_INIT_LIST(fdAvailHead, fdAvailTail);
114 DLL_INIT_LIST(fdLruHead, fdLruTail);
115 for (i = 0; i < I_HANDLE_HASH_SIZE; i++) {
116 DLL_INIT_LIST(ihashTable[i].ihash_head, ihashTable[i].ihash_tail);
118 #if defined(AFS_NT40_ENV)
119 fdMaxCacheSize = FD_MAX_CACHESIZE;
120 #elif defined(AFS_SUN5_ENV) || defined(AFS_NBSD_ENV)
123 assert(getrlimit(RLIMIT_NOFILE, &rlim) == 0);
124 rlim.rlim_cur = rlim.rlim_max;
125 assert(setrlimit(RLIMIT_NOFILE, &rlim) == 0);
126 fdMaxCacheSize = rlim.rlim_cur - FD_HANDLE_SETASIDE;
128 /* XXX this is to avoid using up all system fd netbsd is
129 * somewhat broken and have set maximum fd for a root process
130 * to the same as system fd that is avaible, so if the
131 * fileserver uses all up process fds, all system fd will be
134 * Check for this better
138 fdMaxCacheSize = MIN(fdMaxCacheSize, FD_MAX_CACHESIZE);
139 assert(fdMaxCacheSize > 0);
141 #elif defined(AFS_HPUX_ENV)
142 /* Avoid problems with "UFSOpen: igetinode failed" panics on HPUX 11.0 */
146 long fdMax = MAX(sysconf(_SC_OPEN_MAX) - FD_HANDLE_SETASIDE, 0);
147 fdMaxCacheSize = (int)MIN(fdMax, FD_MAX_CACHESIZE);
150 fdCacheSize = MIN(fdMaxCacheSize, FD_DEFAULT_CACHESIZE);
153 #ifdef AFS_PTHREAD_ENV
155 pthread_attr_t tattr;
157 pthread_attr_init(&tattr);
158 pthread_attr_setdetachstate(&tattr,PTHREAD_CREATE_DETACHED);
160 pthread_create(&syncer, &tattr, ih_sync_thread, NULL);
161 #else /* AFS_PTHREAD_ENV */
163 LWP_CreateProcess(ih_sync_thread, 16*1024, LWP_MAX_PRIORITY - 2,
164 NULL, "ih_syncer", &syncer);
165 #endif /* AFS_PTHREAD_ENV */
170 /* Make the file descriptor cache as big as possible. Don't this call
171 * if the program uses fopen or fdopen. */
173 ih_UseLargeCache(void)
179 fdCacheSize = fdMaxCacheSize;
184 /* Allocate a chunk of inode handles */
186 iHandleAllocateChunk(void)
191 assert(ihAvailHead == NULL);
192 ihP = (IHandle_t *) malloc(I_HANDLE_MALLOCSIZE * sizeof(IHandle_t));
194 for (i = 0; i < I_HANDLE_MALLOCSIZE; i++) {
195 ihP[i].ih_refcnt = 0;
196 DLL_INSERT_TAIL(&ihP[i], ihAvailHead, ihAvailTail, ih_next, ih_prev);
200 /* Initialize an inode handle */
202 ih_init(int dev, int vid, Inode ino)
204 int ihash = IH_HASH(dev, vid, ino);
212 /* Do we already have a handle for this Inode? */
213 for (ihP = ihashTable[ihash].ihash_head; ihP; ihP = ihP->ih_next) {
214 if (ihP->ih_ino == ino && ihP->ih_vid == vid && ihP->ih_dev == dev) {
221 /* Allocate and initialize a new Inode handle */
222 if (ihAvailHead == NULL) {
223 iHandleAllocateChunk();
226 assert(ihP->ih_refcnt == 0);
227 DLL_DELETE(ihP, ihAvailHead, ihAvailTail, ih_next, ih_prev);
233 DLL_INIT_LIST(ihP->ih_fdhead, ihP->ih_fdtail);
234 DLL_INSERT_TAIL(ihP, ihashTable[ihash].ihash_head,
235 ihashTable[ihash].ihash_tail, ih_next, ih_prev);
240 /* Copy an inode handle */
242 ih_copy(IHandle_t * ihP)
246 assert(ihP->ih_refcnt > 0);
252 /* Allocate a chunk of file descriptor handles */
254 fdHandleAllocateChunk(void)
259 assert(fdAvailHead == NULL);
260 fdP = (FdHandle_t *) malloc(FD_HANDLE_MALLOCSIZE * sizeof(FdHandle_t));
262 for (i = 0; i < FD_HANDLE_MALLOCSIZE; i++) {
263 fdP[i].fd_status = FD_HANDLE_AVAIL;
265 fdP[i].fd_fd = INVALID_FD;
266 DLL_INSERT_TAIL(&fdP[i], fdAvailHead, fdAvailTail, fd_next, fd_prev);
270 /* Allocate a chunk of stream handles */
272 streamHandleAllocateChunk(void)
275 StreamHandle_t *streamP;
277 assert(streamAvailHead == NULL);
278 streamP = (StreamHandle_t *)
279 malloc(STREAM_HANDLE_MALLOCSIZE * sizeof(StreamHandle_t));
280 assert(streamP != NULL);
281 for (i = 0; i < STREAM_HANDLE_MALLOCSIZE; i++) {
282 streamP[i].str_fd = INVALID_FD;
283 DLL_INSERT_TAIL(&streamP[i], streamAvailHead, streamAvailTail,
289 * Get a file descriptor handle given an Inode handle
292 ih_open(IHandle_t * ihP)
298 if (!ihP) /* XXX should log here in the fileserver */
303 /* Do we already have an open file handle for this Inode? */
304 for (fdP = ihP->ih_fdtail; fdP != NULL; fdP = fdP->fd_ihprev) {
305 if (fdP->fd_status != FD_HANDLE_INUSE) {
306 assert(fdP->fd_status == FD_HANDLE_OPEN);
307 fdP->fd_status = FD_HANDLE_INUSE;
308 DLL_DELETE(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
311 (void)FDH_SEEK(fdP, 0, SEEK_SET);
317 * Try to open the Inode, return NULL on error.
324 if (fd == INVALID_FD && (errno != EMFILE || fdLruHead == NULL) ) {
330 /* fdCacheSize limits the size of the descriptor cache, but
331 * we permit the number of open files to exceed fdCacheSize.
332 * We only recycle open file descriptors when the number
333 * of open files reaches the size of the cache */
334 if ((fdInUseCount > fdCacheSize || fd == INVALID_FD) && fdLruHead != NULL) {
336 assert(fdP->fd_status == FD_HANDLE_OPEN);
337 DLL_DELETE(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
338 DLL_DELETE(fdP, fdP->fd_ih->ih_fdhead, fdP->fd_ih->ih_fdtail,
339 fd_ihnext, fd_ihprev);
340 closeFd = fdP->fd_fd;
341 if (fd == INVALID_FD) {
342 fdCacheSize--; /* reduce in order to not run into here too often */
343 DLL_INSERT_TAIL(fdP, fdAvailHead, fdAvailTail, fd_next, fd_prev);
344 fdP->fd_status = FD_HANDLE_AVAIL;
346 fdP->fd_fd = INVALID_FD;
352 if (fdAvailHead == NULL) {
353 fdHandleAllocateChunk();
356 assert(fdP->fd_status == FD_HANDLE_AVAIL);
357 DLL_DELETE(fdP, fdAvailHead, fdAvailTail, fd_next, fd_prev);
358 closeFd = INVALID_FD;
361 fdP->fd_status = FD_HANDLE_INUSE;
367 /* Add this handle to the Inode's list of open descriptors */
368 DLL_INSERT_TAIL(fdP, ihP->ih_fdhead, ihP->ih_fdtail, fd_ihnext,
371 if (closeFd != INVALID_FD) {
383 * Return a file descriptor handle to the cache
386 fd_close(FdHandle_t * fdP)
395 assert(fdInUseCount > 0);
396 assert(fdP->fd_status == FD_HANDLE_INUSE);
400 /* Call fd_reallyclose to really close the unused file handles if
401 * the previous attempt to close (ih_reallyclose()) all file handles
402 * failed (this is determined by checking the ihandle for the flag
403 * IH_REALLY_CLOSED) or we have too many open files.
405 if (ihP->ih_flags & IH_REALLY_CLOSED || fdInUseCount > fdCacheSize) {
407 return fd_reallyclose(fdP);
410 /* Put this descriptor back into the cache */
411 fdP->fd_status = FD_HANDLE_OPEN;
412 DLL_INSERT_TAIL(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
414 /* If this is not the only reference to the Inode then we can decrement
415 * the reference count, otherwise we need to call ih_release.
417 if (ihP->ih_refcnt > 1) {
429 * Actually close the file descriptor handle and return it to
433 fd_reallyclose(FdHandle_t * fdP)
443 assert(fdInUseCount > 0);
444 assert(fdP->fd_status == FD_HANDLE_INUSE);
447 closeFd = fdP->fd_fd;
449 DLL_DELETE(fdP, ihP->ih_fdhead, ihP->ih_fdtail, fd_ihnext, fd_ihprev);
450 DLL_INSERT_TAIL(fdP, fdAvailHead, fdAvailTail, fd_next, fd_prev);
452 fdP->fd_status = FD_HANDLE_AVAIL;
454 fdP->fd_fd = INVALID_FD;
456 /* All the file descriptor handles have been closed; reset
457 * the IH_REALLY_CLOSED flag indicating that ih_reallyclose
458 * has completed its job.
460 if (!ihP->ih_fdhead) {
461 ihP->ih_flags &= ~IH_REALLY_CLOSED;
469 /* If this is not the only reference to the Inode then we can decrement
470 * the reference count, otherwise we need to call ih_release. */
471 if (ihP->ih_refcnt > 1) {
482 /* Enable buffered I/O on a file descriptor */
484 stream_fdopen(FD_t fd)
486 StreamHandle_t *streamP;
489 if (streamAvailHead == NULL) {
490 streamHandleAllocateChunk();
492 streamP = streamAvailHead;
493 DLL_DELETE(streamP, streamAvailHead, streamAvailTail, str_next, str_prev);
495 streamP->str_fd = fd;
496 streamP->str_buflen = 0;
497 streamP->str_bufoff = 0;
498 streamP->str_error = 0;
499 streamP->str_eof = 0;
500 streamP->str_direction = STREAM_DIRECTION_NONE;
504 /* Open a file for buffered I/O */
506 stream_open(const char *filename, const char *mode)
508 FD_t fd = INVALID_FD;
510 if (strcmp(mode, "r") == 0) {
511 fd = OS_OPEN(filename, O_RDONLY, 0);
512 } else if (strcmp(mode, "r+") == 0) {
513 fd = OS_OPEN(filename, O_RDWR, 0);
514 } else if (strcmp(mode, "w") == 0) {
515 fd = OS_OPEN(filename, O_WRONLY | O_TRUNC | O_CREAT, 0);
516 } else if (strcmp(mode, "w+") == 0) {
517 fd = OS_OPEN(filename, O_RDWR | O_TRUNC | O_CREAT, 0);
518 } else if (strcmp(mode, "a") == 0) {
519 fd = OS_OPEN(filename, O_WRONLY | O_APPEND | O_CREAT, 0);
520 } else if (strcmp(mode, "a+") == 0) {
521 fd = OS_OPEN(filename, O_RDWR | O_APPEND | O_CREAT, 0);
523 assert(FALSE); /* not implemented */
526 if (fd == INVALID_FD) {
529 return stream_fdopen(fd);
532 /* fread for buffered I/O handles */
534 stream_read(void *ptr, afs_fsize_t size, afs_fsize_t nitems,
535 StreamHandle_t * streamP)
537 afs_fsize_t nbytes, bytesRead, bytesToRead;
540 /* Need to seek before changing direction */
541 if (streamP->str_direction == STREAM_DIRECTION_NONE) {
542 streamP->str_direction = STREAM_DIRECTION_READ;
543 streamP->str_bufoff = 0;
544 streamP->str_buflen = 0;
546 assert(streamP->str_direction == STREAM_DIRECTION_READ);
550 nbytes = size * nitems;
552 while (nbytes > 0 && !streamP->str_eof) {
553 if (streamP->str_buflen == 0) {
554 streamP->str_bufoff = 0;
555 streamP->str_buflen =
556 OS_READ(streamP->str_fd, streamP->str_buffer,
557 STREAM_HANDLE_BUFSIZE);
558 if (streamP->str_buflen < 0) {
559 streamP->str_error = errno;
560 streamP->str_buflen = 0;
563 } else if (streamP->str_buflen == 0) {
564 streamP->str_eof = 1;
569 bytesToRead = nbytes;
570 if (bytesToRead > streamP->str_buflen) {
571 bytesToRead = streamP->str_buflen;
573 memcpy(p, streamP->str_buffer + streamP->str_bufoff, bytesToRead);
575 streamP->str_bufoff += bytesToRead;
576 streamP->str_buflen -= bytesToRead;
577 bytesRead += bytesToRead;
578 nbytes -= bytesToRead;
581 return (bytesRead / size);
584 /* fwrite for buffered I/O handles */
586 stream_write(void *ptr, afs_fsize_t size, afs_fsize_t nitems,
587 StreamHandle_t * streamP)
591 afs_fsize_t nbytes, bytesWritten, bytesToWrite;
593 /* Need to seek before changing direction */
594 if (streamP->str_direction == STREAM_DIRECTION_NONE) {
595 streamP->str_direction = STREAM_DIRECTION_WRITE;
596 streamP->str_bufoff = 0;
597 streamP->str_buflen = STREAM_HANDLE_BUFSIZE;
599 assert(streamP->str_direction == STREAM_DIRECTION_WRITE);
602 nbytes = size * nitems;
606 if (streamP->str_buflen == 0) {
607 rc = OS_WRITE(streamP->str_fd, streamP->str_buffer,
608 STREAM_HANDLE_BUFSIZE);
610 streamP->str_error = errno;
614 streamP->str_bufoff = 0;
615 streamP->str_buflen = STREAM_HANDLE_BUFSIZE;
618 bytesToWrite = nbytes;
619 if (bytesToWrite > streamP->str_buflen) {
620 bytesToWrite = streamP->str_buflen;
622 memcpy(streamP->str_buffer + streamP->str_bufoff, p, bytesToWrite);
624 streamP->str_bufoff += bytesToWrite;
625 streamP->str_buflen -= bytesToWrite;
626 bytesWritten += bytesToWrite;
627 nbytes -= bytesToWrite;
630 return (bytesWritten / size);
633 /* fseek for buffered I/O handles */
635 stream_seek(StreamHandle_t * streamP, afs_foff_t offset, int whence)
640 if (streamP->str_direction == STREAM_DIRECTION_WRITE
641 && streamP->str_bufoff > 0) {
642 rc = OS_WRITE(streamP->str_fd, streamP->str_buffer,
643 streamP->str_bufoff);
645 streamP->str_error = errno;
649 streamP->str_bufoff = 0;
650 streamP->str_buflen = 0;
651 streamP->str_eof = 0;
652 streamP->str_direction = STREAM_DIRECTION_NONE;
653 if (OS_SEEK(streamP->str_fd, offset, whence) < 0) {
654 streamP->str_error = errno;
660 /* fflush for buffered I/O handles */
662 stream_flush(StreamHandle_t * streamP)
667 if (streamP->str_direction == STREAM_DIRECTION_WRITE
668 && streamP->str_bufoff > 0) {
669 rc = OS_WRITE(streamP->str_fd, streamP->str_buffer,
670 streamP->str_bufoff);
672 streamP->str_error = errno;
675 streamP->str_bufoff = 0;
676 streamP->str_buflen = STREAM_HANDLE_BUFSIZE;
682 /* Free a buffered I/O handle */
684 stream_close(StreamHandle_t * streamP, int reallyClose)
689 assert(streamP != NULL);
690 if (streamP->str_direction == STREAM_DIRECTION_WRITE
691 && streamP->str_bufoff > 0) {
692 rc = OS_WRITE(streamP->str_fd, streamP->str_buffer,
693 streamP->str_bufoff);
699 rc = OS_CLOSE(streamP->str_fd);
704 streamP->str_fd = INVALID_FD;
707 DLL_INSERT_TAIL(streamP, streamAvailHead, streamAvailTail,
713 /* Close all unused file descriptors associated with the inode
714 * handle. Called with IH_LOCK held. May drop and reacquire
715 * IH_LOCK. Sets the IH_REALLY_CLOSED flag in the inode handle
716 * if it fails to close all file handles.
719 ih_fdclose(IHandle_t * ihP)
721 int closeCount, closedAll;
722 FdHandle_t *fdP, *head, *tail, *next;
724 assert(ihP->ih_refcnt > 0);
727 DLL_INIT_LIST(head, tail);
728 ihP->ih_flags &= ~IH_REALLY_CLOSED;
731 * Remove the file descriptors for this Inode from the LRU queue
732 * and the IHandle queue and put them on a temporary queue so we
733 * can drop the lock before we close the files.
735 for (fdP = ihP->ih_fdhead; fdP != NULL; fdP = next) {
736 next = fdP->fd_ihnext;
737 assert(fdP->fd_ih == ihP);
738 assert(fdP->fd_status == FD_HANDLE_OPEN
739 || fdP->fd_status == FD_HANDLE_INUSE);
740 if (fdP->fd_status == FD_HANDLE_OPEN) {
741 DLL_DELETE(fdP, ihP->ih_fdhead, ihP->ih_fdtail, fd_ihnext,
743 DLL_DELETE(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
744 DLL_INSERT_TAIL(fdP, head, tail, fd_next, fd_prev);
747 ihP->ih_flags |= IH_REALLY_CLOSED;
751 /* If the ihandle reference count is 1, we should have
752 * closed all file descriptors.
754 if (ihP->ih_refcnt == 1 || closedAll) {
756 assert(!ihP->ih_fdhead);
757 assert(!ihP->ih_fdtail);
761 return 0; /* No file descriptors closed */
766 * Close the file descriptors
769 for (fdP = head; fdP != NULL; fdP = fdP->fd_next) {
770 OS_CLOSE(fdP->fd_fd);
771 fdP->fd_status = FD_HANDLE_AVAIL;
772 fdP->fd_fd = INVALID_FD;
778 assert(fdInUseCount >= closeCount);
779 fdInUseCount -= closeCount;
782 * Append the temporary queue to the list of available descriptors
784 if (fdAvailHead == NULL) {
788 fdAvailTail->fd_next = head;
789 head->fd_prev = fdAvailTail;
796 /* Close all cached file descriptors for this inode. */
798 ih_reallyclose(IHandle_t * ihP)
804 ihP->ih_refcnt++; /* must not disappear over unlock */
805 if (ihP->ih_synced) {
818 assert(ihP->ih_refcnt > 0);
823 if (ihP->ih_refcnt > 1) {
833 /* Release an Inode handle. All cached file descriptors for this
834 * inode are closed when the last reference to this handle is released
837 ih_release(IHandle_t * ihP)
845 assert(ihP->ih_refcnt > 0);
847 if (ihP->ih_refcnt > 1) {
853 ihash = IH_HASH(ihP->ih_dev, ihP->ih_vid, ihP->ih_ino);
854 DLL_DELETE(ihP, ihashTable[ihash].ihash_head,
855 ihashTable[ihash].ihash_tail, ih_next, ih_prev);
861 DLL_INSERT_TAIL(ihP, ihAvailHead, ihAvailTail, ih_next, ih_prev);
867 /* Sync an inode to disk if its handle isn't NULL */
869 ih_condsync(IHandle_t * ihP)
881 code = FDH_SYNC(fdP);
893 for (ihash = 0; ihash < I_HANDLE_HASH_SIZE; ihash++) {
894 IHandle_t *ihP, *ihPnext;
896 ihP = ihashTable[ihash].ihash_head;
898 ihP->ih_refcnt++; /* must not disappear over unlock */
899 for (; ihP; ihP = ihPnext) {
901 if (ihP->ih_synced) {
916 /* when decrementing the refcount, the ihandle might disappear
917 and we might not even be able to proceed to the next one.
918 Hence the gymnastics putting a hold on the next one already */
919 ihPnext = ihP->ih_next;
920 if (ihPnext) ihPnext->ih_refcnt++;
922 if (ihP->ih_refcnt > 1) {
936 ih_sync_thread(void *dummy) {
939 #ifdef AFS_PTHREAD_ENV
941 #else /* AFS_PTHREAD_ENV */
943 #endif /* AFS_PTHREAD_ENV */
954 /*************************************************************************
955 * OS specific support routines.
956 *************************************************************************/
957 #ifndef AFS_NAMEI_ENV
959 ih_icreate(IHandle_t * ih, int dev, char *part, Inode nI, int p1, int p2,
964 /* See viceinode.h */
965 if (p2 == INODESPECIAL) {
971 ino = ICREATE(dev, part, nI, p1, p2, p3, p4);
974 #endif /* AFS_NAMEI_ENV */
981 struct afs_stat status;
982 if (afs_fstat(fd, &status) < 0)
984 return status.st_size;