2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 /* ihandle.c - file descriptor cacheing for Inode handles. */
12 /************************************************************************/
14 #include <afsconfig.h>
15 #include <afs/param.h>
20 #include <sys/types.h>
28 #if defined(AFS_SUN5_ENV) || defined(AFS_NBSD_ENV)
29 #include <sys/fcntl.h>
30 #include <sys/resource.h>
41 #include <afs/afsint.h>
43 #include <afs/afssyscalls.h>
46 #include "viceinode.h"
47 #ifdef AFS_PTHREAD_ENV
49 #else /* AFS_PTHREAD_ENV */
50 #include "afs/assert.h"
51 #endif /* AFS_PTHREAD_ENV */
54 extern afs_int32 DErrno;
56 #ifdef AFS_PTHREAD_ENV
57 pthread_once_t ih_glock_once = PTHREAD_ONCE_INIT;
58 pthread_mutex_t ih_glock_mutex;
59 #endif /* AFS_PTHREAD_ENV */
61 /* Linked list of available inode handles */
62 IHandle_t *ihAvailHead;
63 IHandle_t *ihAvailTail;
65 /* Linked list of available file descriptor handles */
66 FdHandle_t *fdAvailHead;
67 FdHandle_t *fdAvailTail;
69 /* Linked list of available stream descriptor handles */
70 StreamHandle_t *streamAvailHead;
71 StreamHandle_t *streamAvailTail;
73 /* LRU list for file descriptor handles */
74 FdHandle_t *fdLruHead;
75 FdHandle_t *fdLruTail;
79 /* Most of the servers use fopen/fdopen. Since the FILE structure
80 * only has eight bits for the file descriptor, the cache size
81 * has to be less than 256. The cache can be made larger as long
82 * as you are sure you don't need fopen/fdopen. */
83 int fdMaxCacheSize = 0;
86 /* Number of in use file descriptors */
89 /* Hash table for inode handles */
90 IHashBucket_t ihashTable[I_HANDLE_HASH_SIZE];
93 #ifdef AFS_PTHREAD_ENV
94 /* Initialize the global ihandle mutex */
97 assert(pthread_mutex_init(&ih_glock_mutex, NULL) == 0);
99 #endif /* AFS_PTHREAD_ENV */
101 /* Initialize the file descriptor cache */
102 void ih_Initialize(void) {
106 DLL_INIT_LIST(ihAvailHead, ihAvailTail);
107 DLL_INIT_LIST(fdAvailHead, fdAvailTail);
108 DLL_INIT_LIST(fdLruHead, fdLruTail);
109 for (i = 0 ; i < I_HANDLE_HASH_SIZE ; i++) {
110 DLL_INIT_LIST(ihashTable[i].ihash_head, ihashTable[i].ihash_tail);
112 #if defined(AFS_NT40_ENV)
113 fdMaxCacheSize = FD_MAX_CACHESIZE;
114 #elif defined(AFS_SUN5_ENV) || defined(AFS_NBSD_ENV)
117 assert(getrlimit(RLIMIT_NOFILE, &rlim) == 0);
118 rlim.rlim_cur = rlim.rlim_max;
119 assert(setrlimit(RLIMIT_NOFILE, &rlim) == 0);
120 fdMaxCacheSize = rlim.rlim_cur-FD_HANDLE_SETASIDE;
122 /* XXX this is to avoid using up all system fd netbsd is
123 * somewhat broken and have set maximum fd for a root process
124 * to the same as system fd that is avaible, so if the
125 * fileserver uses all up process fds, all system fd will be
128 * Check for this better
132 fdMaxCacheSize = MIN(fdMaxCacheSize, FD_MAX_CACHESIZE);
133 assert(fdMaxCacheSize > 0);
135 #elif defined(AFS_HPUX_ENV)
136 /* Avoid problems with "UFSOpen: igetinode failed" panics on HPUX 11.0 */
140 long fdMax = MAX(sysconf(_SC_OPEN_MAX)-FD_HANDLE_SETASIDE, 0);
141 fdMaxCacheSize = (int) MIN(fdMax, FD_MAX_CACHESIZE);
144 fdCacheSize = MIN(fdMaxCacheSize, FD_DEFAULT_CACHESIZE);
147 /* Make the file descriptor cache as big as possible. Don't this call
148 * if the program uses fopen or fdopen. */
149 void ih_UseLargeCache(void) {
155 fdCacheSize = fdMaxCacheSize;
160 /* Allocate a chunk of inode handles */
161 void iHandleAllocateChunk(void)
166 assert(ihAvailHead == NULL);
167 ihP = (IHandle_t *)malloc(I_HANDLE_MALLOCSIZE * sizeof(IHandle_t));
169 for (i = 0 ; i < I_HANDLE_MALLOCSIZE ; i++) {
170 ihP[i].ih_refcnt = 0;
171 DLL_INSERT_TAIL(&ihP[i], ihAvailHead, ihAvailTail, ih_next, ih_prev);
175 /* Initialize an inode handle */
176 IHandle_t *ih_init(int dev, int vid, Inode ino)
178 int ihash = IH_HASH(dev, vid, ino);
187 /* Do we already have a handle for this Inode? */
188 for (ihP = ihashTable[ihash].ihash_head ; ihP ; ihP = ihP->ih_next) {
189 if (ihP->ih_ino == ino && ihP->ih_vid == vid && ihP->ih_dev == dev) {
196 /* Allocate and initialize a new Inode handle */
197 if (ihAvailHead == NULL) {
198 iHandleAllocateChunk();
201 assert(ihP->ih_refcnt == 0);
202 DLL_DELETE(ihP, ihAvailHead, ihAvailTail, ih_next, ih_prev);
208 DLL_INIT_LIST(ihP->ih_fdhead, ihP->ih_fdtail);
209 DLL_INSERT_TAIL(ihP, ihashTable[ihash].ihash_head,
210 ihashTable[ihash].ihash_tail, ih_next, ih_prev);
215 /* Copy an inode handle */
216 IHandle_t *ih_copy(IHandle_t *ihP)
220 assert(ihP->ih_refcnt > 0);
226 /* Allocate a chunk of file descriptor handles */
227 void fdHandleAllocateChunk(void)
232 assert(fdAvailHead == NULL);
233 fdP = (FdHandle_t *)malloc(FD_HANDLE_MALLOCSIZE * sizeof(FdHandle_t));
235 for (i = 0 ; i < FD_HANDLE_MALLOCSIZE ; i++) {
236 fdP[i].fd_status = FD_HANDLE_AVAIL;
238 fdP[i].fd_fd = INVALID_FD;
239 DLL_INSERT_TAIL(&fdP[i], fdAvailHead, fdAvailTail, fd_next, fd_prev);
243 /* Allocate a chunk of stream handles */
244 void streamHandleAllocateChunk(void)
247 StreamHandle_t *streamP;
249 assert(streamAvailHead == NULL);
250 streamP = (StreamHandle_t *)
251 malloc(STREAM_HANDLE_MALLOCSIZE * sizeof(StreamHandle_t));
252 assert(streamP != NULL);
253 for (i = 0 ; i < STREAM_HANDLE_MALLOCSIZE ; i++) {
254 streamP[i].str_fd = INVALID_FD;
255 DLL_INSERT_TAIL(&streamP[i], streamAvailHead, streamAvailTail,
261 * Get a file descriptor handle given an Inode handle
263 FdHandle_t *ih_open(IHandle_t *ihP)
269 if (!ihP) /* XXX should log here in the fileserver */
274 /* Do we already have an open file handle for this Inode? */
275 for (fdP = ihP->ih_fdtail ; fdP != NULL ; fdP = fdP->fd_ihprev) {
276 if (fdP->fd_status != FD_HANDLE_INUSE) {
277 assert(fdP->fd_status == FD_HANDLE_OPEN);
278 fdP->fd_status = FD_HANDLE_INUSE;
279 DLL_DELETE(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
282 (void) FDH_SEEK(fdP, 0, SEEK_SET);
288 * Try to open the Inode, return NULL on error.
294 if (fd == INVALID_FD) {
300 /* fdCacheSize limits the size of the descriptor cache, but
301 * we permit the number of open files to exceed fdCacheSize.
302 * We only recycle open file descriptors when the number
303 * of open files reaches the size of the cache */
304 if (fdInUseCount > fdCacheSize && fdLruHead != NULL) {
306 assert(fdP->fd_status == FD_HANDLE_OPEN);
307 DLL_DELETE(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
308 DLL_DELETE(fdP, fdP->fd_ih->ih_fdhead, fdP->fd_ih->ih_fdtail,
309 fd_ihnext, fd_ihprev);
310 closeFd = fdP->fd_fd;
312 if (fdAvailHead == NULL) {
313 fdHandleAllocateChunk();
316 assert(fdP->fd_status == FD_HANDLE_AVAIL);
317 DLL_DELETE(fdP, fdAvailHead, fdAvailTail, fd_next, fd_prev);
318 closeFd = INVALID_FD;
321 fdP->fd_status = FD_HANDLE_INUSE;
327 /* Add this handle to the Inode's list of open descriptors */
328 DLL_INSERT_TAIL(fdP, ihP->ih_fdhead, ihP->ih_fdtail, fd_ihnext, fd_ihprev);
330 if (closeFd != INVALID_FD) {
342 * Return a file descriptor handle to the cache
344 int fd_close(FdHandle_t *fdP)
355 assert(fdInUseCount > 0);
356 assert(fdP->fd_status == FD_HANDLE_INUSE);
360 /* Call fd_reallyclose to really close the unused file handles if
361 * the previous attempt to close (ih_reallyclose()) all file handles
362 * failed (this is determined by checking the ihandle for the flag
363 * IH_REALLY_CLOSED) or we have too many open files.
365 if (ihP->ih_flags & IH_REALLY_CLOSED || fdInUseCount > fdCacheSize) {
367 return fd_reallyclose(fdP);
370 /* Put this descriptor back into the cache */
371 fdP->fd_status = FD_HANDLE_OPEN;
372 DLL_INSERT_TAIL(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
374 /* If this is not the only reference to the Inode then we can decrement
375 * the reference count, otherwise we need to call ih_release.
377 if (ihP->ih_refcnt > 1) {
389 * Actually close the file descriptor handle and return it to
392 int fd_reallyclose(FdHandle_t *fdP)
403 assert(fdInUseCount > 0);
404 assert(fdP->fd_status == FD_HANDLE_INUSE);
407 closeFd = fdP->fd_fd;
409 DLL_DELETE(fdP, ihP->ih_fdhead, ihP->ih_fdtail, fd_ihnext, fd_ihprev);
410 DLL_INSERT_TAIL(fdP, fdAvailHead, fdAvailTail, fd_next, fd_prev);
412 fdP->fd_status = FD_HANDLE_AVAIL;
414 fdP->fd_fd = INVALID_FD;
416 /* All the file descriptor handles have been closed; reset
417 * the IH_REALLY_CLOSED flag indicating that ih_reallyclose
418 * has completed its job.
420 if (!ihP->ih_fdhead) {
421 ihP->ih_flags &= ~IH_REALLY_CLOSED;
430 /* If this is not the only reference to the Inode then we can decrement
431 * the reference count, otherwise we need to call ih_release. */
432 if (ihP->ih_refcnt > 1) {
443 /* Enable buffered I/O on a file descriptor */
444 StreamHandle_t *stream_fdopen(FD_t fd)
446 StreamHandle_t *streamP;
449 if (streamAvailHead == NULL) {
450 streamHandleAllocateChunk();
452 streamP = streamAvailHead;
453 DLL_DELETE(streamP, streamAvailHead, streamAvailTail, str_next, str_prev);
456 streamP->str_fd = fd;
457 streamP->str_buflen = 0;
458 streamP->str_bufoff = 0;
459 streamP->str_error = 0;
460 streamP->str_eof = 0;
461 streamP->str_direction = STREAM_DIRECTION_NONE;
465 /* Open a file for buffered I/O */
466 StreamHandle_t *stream_open(const char *filename, const char *mode)
470 if (strcmp(mode, "r") == 0) {
471 fd = OS_OPEN(filename, O_RDONLY, 0);
472 } else if (strcmp(mode, "r+") == 0) {
473 fd = OS_OPEN(filename, O_RDWR, 0);
474 } else if (strcmp(mode, "w") == 0) {
475 fd = OS_OPEN(filename, O_WRONLY|O_TRUNC|O_CREAT, 0);
476 } else if (strcmp(mode, "w+") == 0) {
477 fd = OS_OPEN(filename, O_RDWR|O_TRUNC|O_CREAT, 0);
478 } else if (strcmp(mode, "a") == 0) {
479 fd = OS_OPEN(filename, O_WRONLY|O_APPEND|O_CREAT, 0);
480 } else if (strcmp(mode, "a+") == 0) {
481 fd = OS_OPEN(filename, O_RDWR|O_APPEND|O_CREAT, 0);
483 assert(FALSE); /* not implemented */
486 if (fd == INVALID_FD) {
489 return stream_fdopen(fd);
492 /* fread for buffered I/O handles */
493 int stream_read(void *ptr, int size, int nitems, StreamHandle_t *streamP)
495 int nbytes, bytesRead, bytesToRead;
498 /* Need to seek before changing direction */
499 if (streamP->str_direction == STREAM_DIRECTION_NONE) {
500 streamP->str_direction = STREAM_DIRECTION_READ;
501 streamP->str_bufoff = 0;
502 streamP->str_buflen = 0;
504 assert(streamP->str_direction == STREAM_DIRECTION_READ);
508 nbytes = size * nitems;
510 while (nbytes > 0 && !streamP->str_eof) {
511 if (streamP->str_buflen == 0) {
512 streamP->str_bufoff = 0;
513 streamP->str_buflen = OS_READ(streamP->str_fd, streamP->str_buffer,
514 STREAM_HANDLE_BUFSIZE);
515 if (streamP->str_buflen < 0) {
516 streamP->str_error = errno;
517 streamP->str_buflen = 0;
520 } else if (streamP->str_buflen == 0) {
521 streamP->str_eof = 1;
526 bytesToRead = nbytes;
527 if (bytesToRead > streamP->str_buflen) {
528 bytesToRead = streamP->str_buflen;
530 memcpy(p, streamP->str_buffer+streamP->str_bufoff, bytesToRead);
532 streamP->str_bufoff += bytesToRead;
533 streamP->str_buflen -= bytesToRead;
534 bytesRead += bytesToRead;
535 nbytes -= bytesToRead;
538 return (bytesRead/size);
541 /* fwrite for buffered I/O handles */
542 int stream_write(void *ptr, int size, int nitems, StreamHandle_t *streamP)
545 int rc, nbytes, bytesWritten, bytesToWrite;
547 /* Need to seek before changing direction */
548 if (streamP->str_direction == STREAM_DIRECTION_NONE) {
549 streamP->str_direction = STREAM_DIRECTION_WRITE;
550 streamP->str_bufoff = 0;
551 streamP->str_buflen = STREAM_HANDLE_BUFSIZE;
553 assert(streamP->str_direction == STREAM_DIRECTION_WRITE);
556 nbytes = size * nitems;
560 if (streamP->str_buflen == 0) {
561 rc = OS_WRITE(streamP->str_fd, streamP->str_buffer,
562 STREAM_HANDLE_BUFSIZE);
564 streamP->str_error = errno;
568 streamP->str_bufoff = 0;
569 streamP->str_buflen = STREAM_HANDLE_BUFSIZE;
572 bytesToWrite = nbytes;
573 if (bytesToWrite > streamP->str_buflen) {
574 bytesToWrite = streamP->str_buflen;
576 memcpy(streamP->str_buffer+streamP->str_bufoff, p, bytesToWrite);
578 streamP->str_bufoff += bytesToWrite;
579 streamP->str_buflen -= bytesToWrite;
580 bytesWritten += bytesToWrite;
581 nbytes -= bytesToWrite;
584 return (bytesWritten/size);
587 /* fseek for buffered I/O handles */
588 int stream_seek(StreamHandle_t *streamP, int offset, int whence)
593 if (streamP->str_direction == STREAM_DIRECTION_WRITE &&
594 streamP->str_bufoff > 0) {
595 rc = OS_WRITE(streamP->str_fd, streamP->str_buffer,
596 streamP->str_bufoff);
598 streamP->str_error = errno;
602 streamP->str_bufoff = 0;
603 streamP->str_buflen = 0;
604 streamP->str_eof = 0;
605 streamP->str_direction = STREAM_DIRECTION_NONE;
606 if (OS_SEEK(streamP->str_fd, offset, whence) < 0) {
607 streamP->str_error = errno;
613 /* fflush for buffered I/O handles */
614 int stream_flush(StreamHandle_t *streamP)
619 if (streamP->str_direction == STREAM_DIRECTION_WRITE &&
620 streamP->str_bufoff > 0) {
621 rc = OS_WRITE(streamP->str_fd, streamP->str_buffer,
622 streamP->str_bufoff);
624 streamP->str_error = errno;
627 streamP->str_bufoff = 0;
628 streamP->str_buflen = STREAM_HANDLE_BUFSIZE;
634 /* Free a buffered I/O handle */
635 int stream_close(StreamHandle_t *streamP, int reallyClose)
640 assert(streamP != NULL);
641 if (streamP->str_direction == STREAM_DIRECTION_WRITE &&
642 streamP->str_bufoff > 0) {
643 rc = OS_WRITE(streamP->str_fd, streamP->str_buffer,
644 streamP->str_bufoff);
650 rc = OS_CLOSE(streamP->str_fd);
655 streamP->str_fd = INVALID_FD;
658 DLL_INSERT_TAIL(streamP, streamAvailHead, streamAvailTail,
665 /* Close all unused file descriptors associated with the inode
666 * handle. Called with IH_LOCK held. May drop and reacquire
667 * IH_LOCK. Sets the IH_REALLY_CLOSED flag in the inode handle
668 * if it fails to close all file handles.
670 static int ih_fdclose(IHandle_t *ihP)
672 int closeCount, closedAll;
673 FdHandle_t *fdP, *head, *tail, *next;
675 assert(ihP->ih_refcnt > 0);
678 DLL_INIT_LIST(head, tail);
679 ihP->ih_flags &= ~IH_REALLY_CLOSED;
682 * Remove the file descriptors for this Inode from the LRU queue
683 * and the IHandle queue and put them on a temporary queue so we
684 * can drop the lock before we close the files.
686 for (fdP = ihP->ih_fdhead; fdP != NULL; fdP = next) {
687 next = fdP->fd_ihnext;
688 assert(fdP->fd_ih == ihP);
689 assert(fdP->fd_status == FD_HANDLE_OPEN ||
690 fdP->fd_status == FD_HANDLE_INUSE);
691 if (fdP->fd_status == FD_HANDLE_OPEN) {
692 DLL_DELETE(fdP, ihP->ih_fdhead, ihP->ih_fdtail,
693 fd_ihnext, fd_ihprev);
694 DLL_DELETE(fdP, fdLruHead, fdLruTail, fd_next, fd_prev);
695 DLL_INSERT_TAIL(fdP, head, tail, fd_next, fd_prev);
698 ihP->ih_flags |= IH_REALLY_CLOSED;
702 /* If the ihandle reference count is 1, we should have
703 * closed all file descriptors.
705 if (ihP->ih_refcnt == 1 || closedAll) {
707 assert(!ihP->ih_fdhead);
708 assert(!ihP->ih_fdtail);
712 return 0; /* No file descriptors closed */
718 * Close the file descriptors
721 for (fdP = head; fdP != NULL; fdP = fdP->fd_next) {
722 OS_CLOSE(fdP->fd_fd);
723 fdP->fd_status = FD_HANDLE_AVAIL;
724 fdP->fd_fd = INVALID_FD;
731 assert(fdInUseCount >= closeCount);
732 fdInUseCount -= closeCount;
735 * Append the temporary queue to the list of available descriptors
737 if (fdAvailHead == NULL) {
741 fdAvailTail->fd_next = head;
742 head->fd_prev = fdAvailTail;
749 /* Close all cached file descriptors for this inode. */
750 int ih_reallyclose(IHandle_t *ihP)
757 assert(ihP->ih_refcnt > 0);
765 /* Release an Inode handle. All cached file descriptors for this
766 * inode are closed when the last reference to this handle is released
768 int ih_release(IHandle_t *ihP)
777 assert(ihP->ih_refcnt > 0);
779 if (ihP->ih_refcnt > 1) {
785 ihash = IH_HASH(ihP->ih_dev, ihP->ih_vid, ihP->ih_ino);
786 DLL_DELETE(ihP, ihashTable[ihash].ihash_head,
787 ihashTable[ihash].ihash_tail, ih_next, ih_prev);
793 DLL_INSERT_TAIL(ihP, ihAvailHead, ihAvailTail, ih_next, ih_prev);
800 /* Sync an inode to disk if its handle isn't NULL */
801 int ih_condsync(IHandle_t *ihP)
813 code = FDH_SYNC(fdP);
821 /*************************************************************************
822 * OS specific support routines.
823 *************************************************************************/
824 #ifndef AFS_NAMEI_ENV
825 Inode ih_icreate(IHandle_t *ih, int dev, char *part, Inode nI, int p1, int p2,
830 /* See viceinode.h */
831 if (p2 == INODESPECIAL) {
837 ino = ICREATE(dev, part, nI, p1, p2, p3, p4);
840 #endif /* AFS_NAMEI_ENV */
847 if (fstat(fd, &status)<0)
849 return status.st_size;