2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 /* ihandle.c - file descriptor cacheing for Inode handles. */
12 /************************************************************************/
14 #include <afsconfig.h>
15 #include <afs/param.h>
21 #include <sys/types.h>
29 #if defined(AFS_SUN5_ENV) || defined(AFS_NBSD_ENV)
30 #include <sys/fcntl.h>
31 #include <sys/resource.h>
42 #include <afs/afsint.h>
44 #include <afs/afssyscalls.h>
47 #include "viceinode.h"
48 #ifdef AFS_PTHREAD_ENV
50 #else /* AFS_PTHREAD_ENV */
51 #include "afs/assert.h"
52 #endif /* AFS_PTHREAD_ENV */
57 #define afs_stat stat64
58 #define afs_fstat fstat64
59 #else /* !O_LARGEFILE */
61 #define afs_fstat fstat
62 #endif /* !O_LARGEFILE */
63 #endif /* AFS_NT40_ENV */
65 #ifdef AFS_PTHREAD_ENV
66 pthread_once_t ih_glock_once = PTHREAD_ONCE_INIT;
67 pthread_mutex_t ih_glock_mutex;
68 #endif /* AFS_PTHREAD_ENV */
70 /* Linked list of available inode handles */
71 IHandle_t *ihAvailHead;
72 IHandle_t *ihAvailTail;
74 /* Linked list of available file descriptor handles */
75 FdHandle_t *fdAvailHead;
76 FdHandle_t *fdAvailTail;
78 /* Linked list of available stream descriptor handles */
79 StreamHandle_t *streamAvailHead;
80 StreamHandle_t *streamAvailTail;
82 /* LRU list for file descriptor handles */
83 FdHandle_t *fdLruHead;
84 FdHandle_t *fdLruTail;
88 /* Most of the servers use fopen/fdopen. Since the FILE structure
89 * only has eight bits for the file descriptor, the cache size
90 * has to be less than 256. The cache can be made larger as long
91 * as you are sure you don't need fopen/fdopen. */
92 int fdMaxCacheSize = 0;
95 /* Number of in use file descriptors */
98 /* Hash table for inode handles */
99 IHashBucket_t ihashTable[I_HANDLE_HASH_SIZE];
102 #ifdef AFS_PTHREAD_ENV
103 /* Initialize the global ihandle mutex */
107 assert(pthread_mutex_init(&ih_glock_mutex, NULL) == 0);
109 #endif /* AFS_PTHREAD_ENV */
111 /* Initialize the file descriptor cache */
118 DLL_INIT_LIST(ihAvailHead, ihAvailTail);
119 DLL_INIT_LIST(fdAvailHead, fdAvailTail);
120 DLL_INIT_LIST(fdLruHead, fdLruTail);
121 for (i = 0; i < I_HANDLE_HASH_SIZE; i++) {
122 DLL_INIT_LIST(ihashTable[i].ihash_head, ihashTable[i].ihash_tail);
124 #if defined(AFS_NT40_ENV)
125 fdMaxCacheSize = FD_MAX_CACHESIZE;
126 #elif defined(AFS_SUN5_ENV) || defined(AFS_NBSD_ENV)
129 assert(getrlimit(RLIMIT_NOFILE, &rlim) == 0);
130 rlim.rlim_cur = rlim.rlim_max;
131 assert(setrlimit(RLIMIT_NOFILE, &rlim) == 0);
132 fdMaxCacheSize = rlim.rlim_cur - FD_HANDLE_SETASIDE;
134 /* XXX this is to avoid using up all system fd netbsd is
135 * somewhat broken and have set maximum fd for a root process
136 * to the same as system fd that is avaible, so if the
137 * fileserver uses all up process fds, all system fd will be
140 * Check for this better
144 fdMaxCacheSize = MIN(fdMaxCacheSize, FD_MAX_CACHESIZE);
145 assert(fdMaxCacheSize > 0);
147 #elif defined(AFS_HPUX_ENV)
148 /* Avoid problems with "UFSOpen: igetinode failed" panics on HPUX 11.0 */
152 long fdMax = MAX(sysconf(_SC_OPEN_MAX) - FD_HANDLE_SETASIDE, 0);
153 fdMaxCacheSize = (int)MIN(fdMax, FD_MAX_CACHESIZE);
156 fdCacheSize = MIN(fdMaxCacheSize, FD_DEFAULT_CACHESIZE);
159 void *ih_sync_thread();
160 #ifdef AFS_PTHREAD_ENV
162 pthread_attr_t tattr;
164 pthread_attr_init(&tattr);
165 pthread_attr_setdetachstate(&tattr,PTHREAD_CREATE_DETACHED);
167 pthread_create(&syncer, &tattr, ih_sync_thread, NULL);
168 #else /* AFS_PTHREAD_ENV */
170 LWP_CreateProcess(ih_sync_thread, 16*1024, LWP_MAX_PRIORITY - 2,
171 NULL, "ih_syncer", &syncer);
172 #endif /* AFS_PTHREAD_ENV */
177 /* Make the file descriptor cache as big as possible. Don't this call
178 * if the program uses fopen or fdopen. */
180 ih_UseLargeCache(void)
186 fdCacheSize = fdMaxCacheSize;
191 /* Allocate a chunk of inode handles */
193 iHandleAllocateChunk(void)
198 assert(ihAvailHead == NULL);
199 ihP = (IHandle_t *) malloc(I_HANDLE_MALLOCSIZE * sizeof(IHandle_t));
201 for (i = 0; i < I_HANDLE_MALLOCSIZE; i++) {
202 ihP[i].ih_refcnt = 0;
203 DLL_INSERT_TAIL(&ihP[i], ihAvailHead, ihAvailTail, ih_next, ih_prev);
207 /* Initialize an inode handle */
209 ih_init(int dev, int vid, Inode ino)
211 int ihash = IH_HASH(dev, vid, ino);
219 /* Do we already have a handle for this Inode? */
220 for (ihP = ihashTable[ihash].ihash_head; ihP; ihP = ihP->ih_next) {
221 if (ihP->ih_ino == ino && ihP->ih_vid == vid && ihP->ih_dev == dev) {
228 /* Allocate and initialize a new Inode handle */
229 if (ihAvailHead == NULL) {
230 iHandleAllocateChunk();
233 assert(ihP->ih_refcnt == 0);
234 DLL_DELETE(ihP, ihAvailHead, ihAvailTail, ih_next, ih_prev);
240 DLL_INIT_LIST(ihP->ih_fdhead, ihP->ih_fdtail);
241 DLL_INSERT_TAIL(ihP, ihashTable[ihash].ihash_head,
242 ihashTable[ihash].ihash_tail, ih_next, ih_prev);
247 /* Copy an inode handle */
249 ih_copy(IHandle_t * ihP)
253 assert(ihP->ih_refcnt > 0);
259 /* Allocate a chunk of file descriptor handles */
261 fdHandleAllocateChunk(void)
266 assert(fdAvailHead == NULL);
267 fdP = (FdHandle_t *) malloc(FD_HANDLE_MALLOCSIZE * sizeof(FdHandle_t));
269 for (i = 0; i < FD_HANDLE_MALLOCSIZE; i++) {
270 fdP[i].fd_status = FD_HANDLE_AVAIL;
272 fdP[i].fd_fd = INVALID_FD;
273 DLL_INSERT_TAIL(&fdP[i], fdAvailHead, fdAvailTail, fd_next, fd_prev);
277 /* Allocate a chunk of stream handles */
279 streamHandleAllocateChunk(void)
282 StreamHandle_t *streamP;
284 assert(streamAvailHead == NULL);
285 streamP = (StreamHandle_t *)
286 malloc(STREAM_HANDLE_MALLOCSIZE * sizeof(StreamHandle_t));
287 assert(streamP != NULL);
288 for (i = 0; i < STREAM_HANDLE_MALLOCSIZE; i++) {
289 streamP[i].str_fd = INVALID_FD;
290 DLL_INSERT_TAIL(&streamP[i], streamAvailHead, streamAvailTail,
296 * Get a file descriptor handle given an Inode handle
299 ih_open(IHandle_t * ihP)
305 if (!ihP) /* XXX should log here in the fileserver */
310 /* Do we already have an open file handle for this Inode? */
311 for (fdP = ihP->ih_fdtail; fdP != NULL; fdP = fdP->fd_ihprev) {
312 if (fdP->fd_status != FD_HANDLE_INUSE) {
313 assert(fdP->fd_status == FD_HANDLE_OPEN);
314 fdP->fd_status = FD_HANDLE_INUSE;
315 DLL_DELETE(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
318 (void)FDH_SEEK(fdP, 0, SEEK_SET);
324 * Try to open the Inode, return NULL on error.
330 if (fd == INVALID_FD) {
336 /* fdCacheSize limits the size of the descriptor cache, but
337 * we permit the number of open files to exceed fdCacheSize.
338 * We only recycle open file descriptors when the number
339 * of open files reaches the size of the cache */
340 if (fdInUseCount > fdCacheSize && fdLruHead != NULL) {
342 assert(fdP->fd_status == FD_HANDLE_OPEN);
343 DLL_DELETE(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
344 DLL_DELETE(fdP, fdP->fd_ih->ih_fdhead, fdP->fd_ih->ih_fdtail,
345 fd_ihnext, fd_ihprev);
346 closeFd = fdP->fd_fd;
348 if (fdAvailHead == NULL) {
349 fdHandleAllocateChunk();
352 assert(fdP->fd_status == FD_HANDLE_AVAIL);
353 DLL_DELETE(fdP, fdAvailHead, fdAvailTail, fd_next, fd_prev);
354 closeFd = INVALID_FD;
357 fdP->fd_status = FD_HANDLE_INUSE;
363 /* Add this handle to the Inode's list of open descriptors */
364 DLL_INSERT_TAIL(fdP, ihP->ih_fdhead, ihP->ih_fdtail, fd_ihnext,
367 if (closeFd != INVALID_FD) {
379 * Return a file descriptor handle to the cache
382 fd_close(FdHandle_t * fdP)
391 assert(fdInUseCount > 0);
392 assert(fdP->fd_status == FD_HANDLE_INUSE);
396 /* Call fd_reallyclose to really close the unused file handles if
397 * the previous attempt to close (ih_reallyclose()) all file handles
398 * failed (this is determined by checking the ihandle for the flag
399 * IH_REALLY_CLOSED) or we have too many open files.
401 if (ihP->ih_flags & IH_REALLY_CLOSED || fdInUseCount > fdCacheSize) {
403 return fd_reallyclose(fdP);
406 /* Put this descriptor back into the cache */
407 fdP->fd_status = FD_HANDLE_OPEN;
408 DLL_INSERT_TAIL(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
410 /* If this is not the only reference to the Inode then we can decrement
411 * the reference count, otherwise we need to call ih_release.
413 if (ihP->ih_refcnt > 1) {
425 * Actually close the file descriptor handle and return it to
429 fd_reallyclose(FdHandle_t * fdP)
439 assert(fdInUseCount > 0);
440 assert(fdP->fd_status == FD_HANDLE_INUSE);
443 closeFd = fdP->fd_fd;
445 DLL_DELETE(fdP, ihP->ih_fdhead, ihP->ih_fdtail, fd_ihnext, fd_ihprev);
446 DLL_INSERT_TAIL(fdP, fdAvailHead, fdAvailTail, fd_next, fd_prev);
448 fdP->fd_status = FD_HANDLE_AVAIL;
450 fdP->fd_fd = INVALID_FD;
452 /* All the file descriptor handles have been closed; reset
453 * the IH_REALLY_CLOSED flag indicating that ih_reallyclose
454 * has completed its job.
456 if (!ihP->ih_fdhead) {
457 ihP->ih_flags &= ~IH_REALLY_CLOSED;
465 /* If this is not the only reference to the Inode then we can decrement
466 * the reference count, otherwise we need to call ih_release. */
467 if (ihP->ih_refcnt > 1) {
478 /* Enable buffered I/O on a file descriptor */
480 stream_fdopen(FD_t fd)
482 StreamHandle_t *streamP;
485 if (streamAvailHead == NULL) {
486 streamHandleAllocateChunk();
488 streamP = streamAvailHead;
489 DLL_DELETE(streamP, streamAvailHead, streamAvailTail, str_next, str_prev);
491 streamP->str_fd = fd;
492 streamP->str_buflen = 0;
493 streamP->str_bufoff = 0;
494 streamP->str_error = 0;
495 streamP->str_eof = 0;
496 streamP->str_direction = STREAM_DIRECTION_NONE;
500 /* Open a file for buffered I/O */
502 stream_open(const char *filename, const char *mode)
506 if (strcmp(mode, "r") == 0) {
507 fd = OS_OPEN(filename, O_RDONLY, 0);
508 } else if (strcmp(mode, "r+") == 0) {
509 fd = OS_OPEN(filename, O_RDWR, 0);
510 } else if (strcmp(mode, "w") == 0) {
511 fd = OS_OPEN(filename, O_WRONLY | O_TRUNC | O_CREAT, 0);
512 } else if (strcmp(mode, "w+") == 0) {
513 fd = OS_OPEN(filename, O_RDWR | O_TRUNC | O_CREAT, 0);
514 } else if (strcmp(mode, "a") == 0) {
515 fd = OS_OPEN(filename, O_WRONLY | O_APPEND | O_CREAT, 0);
516 } else if (strcmp(mode, "a+") == 0) {
517 fd = OS_OPEN(filename, O_RDWR | O_APPEND | O_CREAT, 0);
519 assert(FALSE); /* not implemented */
522 if (fd == INVALID_FD) {
525 return stream_fdopen(fd);
528 /* fread for buffered I/O handles */
530 stream_read(void *ptr, afs_fsize_t size, afs_fsize_t nitems,
531 StreamHandle_t * streamP)
533 afs_fsize_t nbytes, bytesRead, bytesToRead;
536 /* Need to seek before changing direction */
537 if (streamP->str_direction == STREAM_DIRECTION_NONE) {
538 streamP->str_direction = STREAM_DIRECTION_READ;
539 streamP->str_bufoff = 0;
540 streamP->str_buflen = 0;
542 assert(streamP->str_direction == STREAM_DIRECTION_READ);
546 nbytes = size * nitems;
548 while (nbytes > 0 && !streamP->str_eof) {
549 if (streamP->str_buflen == 0) {
550 streamP->str_bufoff = 0;
551 streamP->str_buflen =
552 OS_READ(streamP->str_fd, streamP->str_buffer,
553 STREAM_HANDLE_BUFSIZE);
554 if (streamP->str_buflen < 0) {
555 streamP->str_error = errno;
556 streamP->str_buflen = 0;
559 } else if (streamP->str_buflen == 0) {
560 streamP->str_eof = 1;
565 bytesToRead = nbytes;
566 if (bytesToRead > streamP->str_buflen) {
567 bytesToRead = streamP->str_buflen;
569 memcpy(p, streamP->str_buffer + streamP->str_bufoff, bytesToRead);
571 streamP->str_bufoff += bytesToRead;
572 streamP->str_buflen -= bytesToRead;
573 bytesRead += bytesToRead;
574 nbytes -= bytesToRead;
577 return (bytesRead / size);
580 /* fwrite for buffered I/O handles */
582 stream_write(void *ptr, afs_fsize_t size, afs_fsize_t nitems,
583 StreamHandle_t * streamP)
587 afs_fsize_t nbytes, bytesWritten, bytesToWrite;
589 /* Need to seek before changing direction */
590 if (streamP->str_direction == STREAM_DIRECTION_NONE) {
591 streamP->str_direction = STREAM_DIRECTION_WRITE;
592 streamP->str_bufoff = 0;
593 streamP->str_buflen = STREAM_HANDLE_BUFSIZE;
595 assert(streamP->str_direction == STREAM_DIRECTION_WRITE);
598 nbytes = size * nitems;
602 if (streamP->str_buflen == 0) {
603 rc = OS_WRITE(streamP->str_fd, streamP->str_buffer,
604 STREAM_HANDLE_BUFSIZE);
606 streamP->str_error = errno;
610 streamP->str_bufoff = 0;
611 streamP->str_buflen = STREAM_HANDLE_BUFSIZE;
614 bytesToWrite = nbytes;
615 if (bytesToWrite > streamP->str_buflen) {
616 bytesToWrite = streamP->str_buflen;
618 memcpy(streamP->str_buffer + streamP->str_bufoff, p, bytesToWrite);
620 streamP->str_bufoff += bytesToWrite;
621 streamP->str_buflen -= bytesToWrite;
622 bytesWritten += bytesToWrite;
623 nbytes -= bytesToWrite;
626 return (bytesWritten / size);
629 /* fseek for buffered I/O handles */
631 stream_seek(StreamHandle_t * streamP, afs_foff_t offset, int whence)
636 if (streamP->str_direction == STREAM_DIRECTION_WRITE
637 && streamP->str_bufoff > 0) {
638 rc = OS_WRITE(streamP->str_fd, streamP->str_buffer,
639 streamP->str_bufoff);
641 streamP->str_error = errno;
645 streamP->str_bufoff = 0;
646 streamP->str_buflen = 0;
647 streamP->str_eof = 0;
648 streamP->str_direction = STREAM_DIRECTION_NONE;
649 if (OS_SEEK(streamP->str_fd, offset, whence) < 0) {
650 streamP->str_error = errno;
656 /* fflush for buffered I/O handles */
658 stream_flush(StreamHandle_t * streamP)
663 if (streamP->str_direction == STREAM_DIRECTION_WRITE
664 && streamP->str_bufoff > 0) {
665 rc = OS_WRITE(streamP->str_fd, streamP->str_buffer,
666 streamP->str_bufoff);
668 streamP->str_error = errno;
671 streamP->str_bufoff = 0;
672 streamP->str_buflen = STREAM_HANDLE_BUFSIZE;
678 /* Free a buffered I/O handle */
680 stream_close(StreamHandle_t * streamP, int reallyClose)
685 assert(streamP != NULL);
686 if (streamP->str_direction == STREAM_DIRECTION_WRITE
687 && streamP->str_bufoff > 0) {
688 rc = OS_WRITE(streamP->str_fd, streamP->str_buffer,
689 streamP->str_bufoff);
695 rc = OS_CLOSE(streamP->str_fd);
700 streamP->str_fd = INVALID_FD;
703 DLL_INSERT_TAIL(streamP, streamAvailHead, streamAvailTail,
709 /* Close all unused file descriptors associated with the inode
710 * handle. Called with IH_LOCK held. May drop and reacquire
711 * IH_LOCK. Sets the IH_REALLY_CLOSED flag in the inode handle
712 * if it fails to close all file handles.
715 ih_fdclose(IHandle_t * ihP)
717 int closeCount, closedAll;
718 FdHandle_t *fdP, *head, *tail, *next;
720 assert(ihP->ih_refcnt > 0);
723 DLL_INIT_LIST(head, tail);
724 ihP->ih_flags &= ~IH_REALLY_CLOSED;
727 * Remove the file descriptors for this Inode from the LRU queue
728 * and the IHandle queue and put them on a temporary queue so we
729 * can drop the lock before we close the files.
731 for (fdP = ihP->ih_fdhead; fdP != NULL; fdP = next) {
732 next = fdP->fd_ihnext;
733 assert(fdP->fd_ih == ihP);
734 assert(fdP->fd_status == FD_HANDLE_OPEN
735 || fdP->fd_status == FD_HANDLE_INUSE);
736 if (fdP->fd_status == FD_HANDLE_OPEN) {
737 DLL_DELETE(fdP, ihP->ih_fdhead, ihP->ih_fdtail, fd_ihnext,
739 DLL_DELETE(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
740 DLL_INSERT_TAIL(fdP, head, tail, fd_next, fd_prev);
743 ihP->ih_flags |= IH_REALLY_CLOSED;
747 /* If the ihandle reference count is 1, we should have
748 * closed all file descriptors.
750 if (ihP->ih_refcnt == 1 || closedAll) {
752 assert(!ihP->ih_fdhead);
753 assert(!ihP->ih_fdtail);
757 return 0; /* No file descriptors closed */
762 * Close the file descriptors
765 for (fdP = head; fdP != NULL; fdP = fdP->fd_next) {
766 OS_CLOSE(fdP->fd_fd);
767 fdP->fd_status = FD_HANDLE_AVAIL;
768 fdP->fd_fd = INVALID_FD;
774 assert(fdInUseCount >= closeCount);
775 fdInUseCount -= closeCount;
778 * Append the temporary queue to the list of available descriptors
780 if (fdAvailHead == NULL) {
784 fdAvailTail->fd_next = head;
785 head->fd_prev = fdAvailTail;
792 /* Close all cached file descriptors for this inode. */
794 ih_reallyclose(IHandle_t * ihP)
800 assert(ihP->ih_refcnt > 0);
807 /* Release an Inode handle. All cached file descriptors for this
808 * inode are closed when the last reference to this handle is released
811 ih_release(IHandle_t * ihP)
819 assert(ihP->ih_refcnt > 0);
821 if (ihP->ih_refcnt > 1) {
827 ihash = IH_HASH(ihP->ih_dev, ihP->ih_vid, ihP->ih_ino);
828 DLL_DELETE(ihP, ihashTable[ihash].ihash_head,
829 ihashTable[ihash].ihash_tail, ih_next, ih_prev);
835 DLL_INSERT_TAIL(ihP, ihAvailHead, ihAvailTail, ih_next, ih_prev);
841 /* Sync an inode to disk if its handle isn't NULL */
843 ih_condsync(IHandle_t * ihP)
855 code = FDH_SYNC(fdP);
866 for (ihash = 0; ihash < I_HANDLE_HASH_SIZE; ihash++) {
867 IHandle_t *ihP, *ihPnext;
869 ihP = ihashTable[ihash].ihash_head;
871 ihP->ih_refcnt++; /* must not disappear over unlock */
872 for (; ihP; ihP = ihPnext) {
874 if (ihP->ih_synced) {
881 if (fdP) OS_SYNC(fdP->fd_fd);
887 /* when decrementing the refcount, the ihandle might disappear
888 and we might not even be able to proceed to the next one.
889 Hence the gymnastics putting a hold on the next one already */
890 ihPnext = ihP->ih_next;
891 if (ihPnext) ihPnext->ih_refcnt++;
893 if (ihP->ih_refcnt > 1) {
910 #ifdef AFS_PTHREAD_ENV
912 #else /* AFS_PTHREAD_ENV */
914 #endif /* AFS_PTHREAD_ENV */
924 /*************************************************************************
925 * OS specific support routines.
926 *************************************************************************/
927 #ifndef AFS_NAMEI_ENV
929 ih_icreate(IHandle_t * ih, int dev, char *part, Inode nI, int p1, int p2,
934 /* See viceinode.h */
935 if (p2 == INODESPECIAL) {
941 ino = ICREATE(dev, part, nI, p1, p2, p3, p4);
944 #endif /* AFS_NAMEI_ENV */
951 struct afs_stat status;
952 if (afs_fstat(fd, &status) < 0)
954 return status.st_size;