2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 /* An IHandle_t is an abstraction allowing the file and volume operations to
11 * pass the elements required to identify a file to the underlying file
12 * systen. For the usual Vice inode operations, this is no more than the
13 * usual device and inode numbers. For the user space file system used on NT
14 * we also need the volume id to identify the file.
16 * An FdHandle_t is an abstraction used to associate file descroptors
17 * with Inode handles. IH_OPEN is used to get a file descriptor that
18 * can be used in subsequent I/O operations. File descriptor handles are
19 * cached by IO_CLOSE. To make sure a file descriptor is really closed call
22 * The IHandle_t also provides a place to do other optimizations. In the
23 * NT user space file system, we keep a separate special file for the
24 * link counts and using the IHandle_t allows keeping the details of
25 * that at a lower level than the IDEC and IINC calls.
27 * To use the IHandle_t there are a new set of IH_xxxx/FDH_XXXX operations.
28 * Each takes a pointer to an IHandle_t or an FdHandle_t as the first
29 * argument. This pointer is considered an in/out variable. In particular,
30 * the open file descriptors for a given Inode are stored in a linked list
31 * of FdHandle_t hanging off of each IHandle_t. IH_OPEN returns NULL on error,
32 * and a valid FdHandle_t otherwise. All other IH_xxxx/FDH_xxxx macros return
33 * -1 on error and 0 on success.
35 * Inode handle operations:
36 * IH_INIT - Initialize the Inode handle with the device, volume id, and ino
37 * IH_COPY - Copy Inode handle info to a new handle with no open descriptors.
38 * IH_REALLYCLOSE - Close all cached file descriptors for Inode handle
39 * IH_RELEASE - release a Inode handle, close all cached file descriptors
40 * IH_CONDSYNC - snyc the Inode if it has any open file descriptors
42 * Inode operation replacements:
43 * IH_CREATE - create a file in the underlying filesystem and setup the
44 * information needed to reference this file in the IHandle_t.
45 * IH_OPEN - open the file belonging to the associated inode and set the
47 * IH_IREAD/IH_IWRITE - read/write an Inode.
48 * IH_INC/IH_DEC - increment/decrement the link count.
50 * Replacements for C runtime file operations
51 * FDH_READ/FDH_WRITE - read/write using the file descriptor.
52 * FDH_READV/FDH_WRITEV - readv/writev (Unix only)
53 * FDH_SEEK - set file handle's read/write position
54 * FDH_CLOSE - return a file descriptor to the cache
55 * FDH_REALLYCLOSE - Close a file descriptor, do not return to the cache
56 * FDH_SYNC - Unconditionally sync an open file.
57 * FDH_TRUNC - Truncate a file
60 * FDH_SIZE - returns the size of the file.
61 * FDH_NLINK - returns the link count of the file.
64 * FDH_FDOPEN - create a descriptor for buffered I/O
65 * STREAM_READ/STREAM_WRITE - buffered file I/O
71 #ifdef AFS_PTHREAD_ENV
73 extern pthread_once_t ih_glock_once;
74 extern pthread_mutex_t ih_glock_mutex;
75 extern void ih_glock_init(void);
77 do { osi_Assert(pthread_once(&ih_glock_once, ih_glock_init) == 0); \
78 MUTEX_ENTER(&ih_glock_mutex); \
80 #define IH_UNLOCK MUTEX_EXIT(&ih_glock_mutex)
81 #else /* AFS_PTHREAD_ENV */
84 #endif /* AFS_PTHREAD_ENV */
88 * Macro to initialize a doubly linked list, lifted from Encina
90 #define DLL_INIT_LIST(head, tail) \
97 * Macro to remove an element from a doubly linked list
99 #define DLL_DELETE(ptr,head,tail,next,prev) \
102 (ptr)->next->prev = (ptr)->prev; \
104 (tail) = (ptr)->prev; \
106 (ptr)->prev->next = (ptr)->next; \
108 (head) = (ptr)->next; \
109 (ptr)->next = (ptr)->prev = NULL; \
110 osi_Assert(!(head) || !((head)->prev)); \
114 * Macro to insert an element at the tail of a doubly linked list
116 #define DLL_INSERT_TAIL(ptr,head,tail,next,prev) \
118 (ptr)->next = NULL; \
119 (ptr)->prev = (tail); \
122 (ptr)->prev->next = (ptr); \
125 osi_Assert((head) && ((head)->prev == NULL)); \
128 #endif /* DLL_INIT_LIST */
131 typedef __int64 Inode;
133 #include <afs/afssyscalls.h>
136 /* The dir package's page hashing function is dependent upon the layout of
137 * IHandle_t as well as the containing DirHandle in viced/viced.h. Make
138 * Sure the volume id is still used as the hash after any changes to either
142 /* forward declaration */
145 /* File descriptors are HANDLE's on NT. The following typedef helps catch
146 * type errors. duplicated in libadmin/vos/afs_vosAdmin.c
153 #define INVALID_FD ((FD_t)-1)
155 /* file descriptor handle */
156 typedef struct FdHandle_s {
157 int fd_status; /* status flags */
158 int fd_refcnt; /* refcnt */
159 FD_t fd_fd; /* file descriptor */
160 struct IHandle_s *fd_ih; /* Pointer to Inode handle */
161 struct FdHandle_s *fd_next; /* LRU/Avail list pointers */
162 struct FdHandle_s *fd_prev;
163 struct FdHandle_s *fd_ihnext; /* Inode handle's list of file descriptors */
164 struct FdHandle_s *fd_ihprev;
167 /* File descriptor status values */
168 #define FD_HANDLE_AVAIL 1 /* handle is not open and available */
169 #define FD_HANDLE_OPEN 2 /* handle is open and not in use */
170 #define FD_HANDLE_INUSE 3 /* handle is open and in use */
172 /* buffered file descriptor handle */
173 #define STREAM_HANDLE_BUFSIZE 2048 /* buffer size for STR_READ/STR_WRITE */
174 typedef struct StreamHandle_s {
175 FD_t str_fd; /* file descriptor */
176 int str_direction; /* current read/write direction */
177 afs_sfsize_t str_buflen; /* bytes remaining in buffer */
178 afs_foff_t str_bufoff; /* current offset into buffer */
179 afs_foff_t str_fdoff; /* current offset into file */
180 int str_error; /* error code */
181 int str_eof; /* end of file flag */
182 struct StreamHandle_s *str_next; /* Avail list pointers */
183 struct StreamHandle_s *str_prev;
184 char str_buffer[STREAM_HANDLE_BUFSIZE]; /* data buffer */
187 #define STREAM_DIRECTION_NONE 1 /* stream is in initial mode */
188 #define STREAM_DIRECTION_READ 2 /* stream is in input mode */
189 #define STREAM_DIRECTION_WRITE 3 /* stream is in output mode */
191 /* number handles allocated at a shot */
192 #define I_HANDLE_MALLOCSIZE ((size_t)((4096/sizeof(IHandle_t))))
193 #define FD_HANDLE_MALLOCSIZE ((size_t)((4096/sizeof(FdHandle_t))))
194 #define STREAM_HANDLE_MALLOCSIZE 1
199 * On modern platforms tuned for I/O intensive workloads, there may be
200 * thousands of file descriptors available (64K on 32-bit Solaris 7,
201 * for example), and threading in Solaris 9 and Linux 2.6 (NPTL) are
202 * tuned for (many) thousands of concurrent threads at peak.
204 * On these platforms, it makes sense to allow administrators to set
205 * appropriate limits for their hardware. Clients may now set desired
206 * values in the exported vol_io_params, of type ih_init_params.
209 typedef struct ih_init_params
211 afs_uint32 fd_handle_setaside; /* for non-cached i/o, trad. was 128 */
212 afs_uint32 fd_initial_cachesize; /* what was 'default' */
213 afs_uint32 fd_max_cachesize; /* max open files if large-cache activated */
217 /* Number of file descriptors needed for non-cached I/O */
218 #define FD_HANDLE_SETASIDE 128 /* Match to MAX_FILESERVER_THREAD */
220 /* Which systems have 8-bit fileno? On GNU/Linux systems, the
221 * fileno member of FILE is an int. On NetBSD 5, it's a short.
222 * Ditto for OpenBSD 4.5. Through Solaris 10 8/07 it's unsigned char.
225 /* Don't try to have more than 256 files open at once if you are planning
226 * to use fopen or fdopen. The FILE structure has an eight bit field for
227 * the file descriptor. */
228 #define FD_DEFAULT_CACHESIZE (255-FD_HANDLE_SETASIDE)
230 /* We need some limit on the number of files open at once. Some systems
231 * say we can open lots of files, but when we do they run out of slots
234 #define FD_MAX_CACHESIZE (2000 - FD_HANDLE_SETASIDE)
236 /* On modern platforms, this is sized higher than the note implies.
237 * For HP, see http://forums11.itrc.hp.com/service/forums/questionanswer.do?admit=109447626+1242508538748+28353475&threadId=302950
238 * On AIX, it's said to be self-tuning (sar -v)
239 * On Solaris, http://www.princeton.edu/~unix/Solaris/troubleshoot/kerntune.html
240 * says stdio limit (FILE) may exist, but then backtracks and says the 64bit
241 * solaris and POLL (rather than select) io avoid the issue. Solaris Internals
242 * states that Solaris 7 and above deal with up to 64K on 32bit.
243 * However, extended FILE must be enabled to use this. See
244 * enable_extended_FILE_stdio(3C)
248 typedef struct IHandle_s {
249 afs_uint32 ih_vid; /* Parent volume id. */
250 int ih_dev; /* device id. */
251 int ih_flags; /* Flags */
252 int ih_synced; /* should be synced next time */
253 Inode ih_ino; /* Inode number */
254 int ih_refcnt; /* reference count */
255 struct FdHandle_s *ih_fdhead; /* List of open file desciptors */
256 struct FdHandle_s *ih_fdtail;
257 struct IHandle_s *ih_next; /* Links for avail list/hash chains */
258 struct IHandle_s *ih_prev;
261 /* Flags for the Inode handle */
262 #define IH_REALLY_CLOSED 1
264 /* Hash function for inode handles */
265 #define I_HANDLE_HASH_SIZE 2048 /* power of 2 */
267 /* The casts to int's ensure NT gets the xor operation correct. */
268 #define IH_HASH(D, V, I) ((int)(((D)^(V)^((int)(I)))&(I_HANDLE_HASH_SIZE-1)))
271 * Hash buckets for inode handles
273 typedef struct IHashBucket_s {
274 IHandle_t *ihash_head;
275 IHandle_t *ihash_tail;
278 /* Prototypes for handle support routines. */
283 #include "namei_ops.h"
285 extern void ih_clear(IHandle_t * h);
286 extern Inode ih_create(IHandle_t * h, int dev, char *part, Inode nI, int p1,
287 int p2, int p3, int p4);
288 extern FILE *ih_fdopen(FdHandle_t * h, char *fdperms);
289 #endif /* AFS_NAMEI_ENV */
292 * Prototypes for file descriptor cache routines
294 extern void ih_PkgDefaults(void);
295 extern void ih_Initialize(void);
296 extern void ih_UseLargeCache(void);
297 extern IHandle_t *ih_init(int /*@alt Device@ */ dev, int /*@alt VolId@ */ vid,
299 extern IHandle_t *ih_copy(IHandle_t * ihP);
300 extern FdHandle_t *ih_open(IHandle_t * ihP);
301 extern int fd_close(FdHandle_t * fdP);
302 extern int fd_reallyclose(FdHandle_t * fdP);
303 extern StreamHandle_t *stream_fdopen(FD_t fd);
304 extern StreamHandle_t *stream_open(const char *file, const char *mode);
305 extern afs_sfsize_t stream_read(void *ptr, afs_fsize_t size,
306 afs_fsize_t nitems, StreamHandle_t * streamP);
307 extern afs_sfsize_t stream_write(void *ptr, afs_fsize_t size,
309 StreamHandle_t * streamP);
310 extern int stream_aseek(StreamHandle_t * streamP, afs_foff_t offset);
311 extern int stream_flush(StreamHandle_t * streamP);
312 extern int stream_close(StreamHandle_t * streamP, int reallyClose);
313 extern int ih_reallyclose(IHandle_t * ihP);
314 extern int ih_release(IHandle_t * ihP);
315 extern int ih_condsync(IHandle_t * ihP);
317 /* Macros common to user space and inode API's. */
318 #define IH_INIT(H, D, V, I) ((H) = ih_init((D), (V), (I)))
320 #define IH_COPY(D, S) ((D) = ih_copy(S))
322 #define IH_NLINK(H) ih_nlink(H)
324 #define IH_OPEN(H) ih_open(H)
326 #define FDH_CLOSE(H) (fd_close(H), (H)=NULL, 0)
328 #define FDH_REALLYCLOSE(H) (fd_reallyclose(H), (H)=NULL, 0)
330 #define FDH_FDOPEN(H, A) stream_fdopen((H)->fd_fd)
332 #define STREAM_FDOPEN(A, B) stream_fdopen(A)
334 #define STREAM_OPEN(A, B) stream_open(A, B)
336 #define STREAM_READ(A, B, C, H) stream_read(A, B, C, H)
338 #define STREAM_WRITE(A, B, C, H) stream_write(A, B, C, H)
340 #define STREAM_ASEEK(H, A) stream_aseek(H, A)
342 #define STREAM_FLUSH(H) stream_flush(H)
344 #define STREAM_ERROR(H) ((H)->str_error)
346 #define STREAM_EOF(H) ((H)->str_eof)
348 #define STREAM_CLOSE(H) stream_close(H, 0)
350 #define STREAM_REALLYCLOSE(H) stream_close(H, 1)
352 #define IH_RELEASE(H) (ih_release(H), (H)=NULL, 0)
354 #define IH_REALLYCLOSE(H) ih_reallyclose(H)
356 #define IH_CONDSYNC(H) ih_condsync(H)
360 #define OS_PREAD(FD, B, S, O) pread64(FD, B, S, O)
361 #define OS_PWRITE(FD, B, S, O) pwrite64(FD, B, S, O)
362 #else /* !O_LARGEFILE */
363 #define OS_PREAD(FD, B, S, O) pread(FD, B, S, O)
364 #define OS_PWRITE(FD, B, S, O) pwrite(FD, B, S, O)
365 #endif /* !O_LARGEFILE */
366 #else /* !HAVE_PIO */
367 extern ssize_t ih_pread(int fd, void * buf, size_t count, afs_foff_t offset);
368 extern ssize_t ih_pwrite(int fd, const void * buf, size_t count, afs_foff_t offset);
369 #define OS_PREAD(FD, B, S, O) ih_pread(FD, B, S, O)
370 #define OS_PWRITE(FD, B, S, O) ih_pwrite(FD, B, S, O)
371 #endif /* !HAVE_PIO */
376 #define IH_CREATE(H, D, P, N, P1, P2, P3, P4) \
377 nt_icreate(H, P, P1, P2, P3, P4)
379 #define OS_IOPEN(H) nt_iopen(H)
380 #define OS_OPEN(F, M, P) nt_open(F, M, P)
381 #define OS_CLOSE(FD) nt_close(FD)
383 #define OS_READ(FD, B, S) nt_read(FD, B, S)
384 #define OS_WRITE(FD, B, S) nt_write(FD, B, S)
385 #define OS_SEEK(FD, O, F) nt_seek(FD, O, F)
387 #define OS_SYNC(FD) nt_fsync(FD)
388 #define OS_TRUNC(FD, L) nt_ftruncate(FD, L)
389 #define OS_SIZE(FD) nt_size(FD)
391 #define IH_INC(H, I, P) nt_inc(H, I, P)
392 #define IH_DEC(H, I, P) nt_dec(H, I, P)
393 #define IH_IREAD(H, O, B, S) nt_iread(H, O, B, S)
394 #define IH_IWRITE(H, O, B, S) nt_iwrite(H, O, B, S)
396 #else /* AFS_NT40_ENV */
398 /*@+fcnmacros +macrofcndecl@*/
400 extern Inode IH_CREATE(IHandle_t * H, int /*@alt Device @ */ D,
401 char *P, Inode N, int /*@alt VolumeId @ */ P1,
402 int /*@alt VnodeId @ */ P2,
403 int /*@alt Unique @ */ P3,
404 int /*@alt unsigned @ */ P4);
405 extern FD_t OS_IOPEN(IHandle_t * H);
406 extern int OS_OPEN(const char *F, int M, mode_t P);
407 extern int OS_CLOSE(int FD);
408 extern ssize_t OS_READ(int FD, void *B, size_t S);
409 extern ssize_t OS_WRITE(int FD, void *B, size_t S);
410 extern ssize_t OS_PREAD(int FD, void *B, size_t S, afs_foff_t O);
411 extern ssize_t OS_PWRITE(int FD, void *B, size_t S, afs_foff_t O);
412 extern int OS_SYNC(int FD);
413 extern afs_sfsize_t OS_SIZE(int FD);
414 extern int IH_INC(IHandle_t * H, Inode I, int /*@alt VolId, VolumeId @ */ P);
415 extern int IH_DEC(IHandle_t * H, Inode I, int /*@alt VolId, VolumeId @ */ P);
416 extern afs_sfsize_t IH_IREAD(IHandle_t * H, afs_foff_t O, void *B,
418 extern afs_sfsize_t IH_IWRITE(IHandle_t * H, afs_foff_t O, void *B,
421 extern off64_t OS_SEEK(int FD, off64_t O, int F);
422 extern int OS_TRUNC(int FD, off64_t L);
423 #else /* !O_LARGEFILE */
424 extern off_t OS_SEEK(int FD, off_t O, int F);
425 extern int OS_TRUNC(int FD, off_t L);
426 #endif /* !O_LARGEFILE */
427 #endif /*S_SPLINT_S */
429 #define IH_CREATE(H, D, P, N, P1, P2, P3, P4) \
430 namei_icreate(H, P, P1, P2, P3, P4)
432 #define OS_IOPEN(H) namei_iopen(H)
434 #define OS_OPEN(F, M, P) open64(F, M, P)
435 #else /* !O_LARGEFILE */
436 #define OS_OPEN(F, M, P) open(F, M, P)
437 #endif /* !O_LARGEFILE */
438 #define OS_CLOSE(FD) close(FD)
440 #define OS_READ(FD, B, S) read(FD, B, S)
441 #define OS_WRITE(FD, B, S) write(FD, B, S)
443 #define OS_SEEK(FD, O, F) lseek64(FD, (off64_t) (O), F)
444 #else /* !O_LARGEFILE */
445 #define OS_SEEK(FD, O, F) lseek(FD, (off_t) (O), F)
446 #endif /* !O_LARGEFILE */
448 #define OS_SYNC(FD) fsync(FD)
450 #define OS_TRUNC(FD, L) ftruncate64(FD, (off64_t) (L))
451 #else /* !O_LARGEFILE */
452 #define OS_TRUNC(FD, L) ftruncate(FD, (off_t) (L))
453 #endif /* !O_LARGEFILE */
454 #define OS_SIZE(FD) ih_size(FD)
455 extern afs_sfsize_t ih_size(int fd);
457 #define IH_INC(H, I, P) namei_inc(H, I, P)
458 #define IH_DEC(H, I, P) namei_dec(H, I, P)
459 #define IH_IREAD(H, O, B, S) namei_iread(H, O, B, S)
460 #define IH_IWRITE(H, O, B, S) namei_iwrite(H, O, B, S)
461 /*@=fcnmacros =macrofcndecl@*/
462 #endif /* AFS_NT40_ENV */
464 #else /* AFS_NAMEI_ENV */
465 extern Inode ih_icreate(IHandle_t * ih, int dev, char *part, Inode nI, int p1,
466 int p2, int p3, int p4);
468 #define IH_CREATE(H, D, P, N, P1, P2, P3, P4) \
469 ih_icreate(H, D, P, N, P1, P2, P3, P4)
471 #ifdef AFS_LINUX22_ENV
472 #define OS_IOPEN(H) -1
475 #define OS_IOPEN(H) (IOPEN((H)->ih_dev, (H)->ih_ino, O_RDWR|O_LARGEFILE))
477 #define OS_IOPEN(H) (IOPEN((H)->ih_dev, (H)->ih_ino, O_RDWR))
480 #define OS_OPEN(F, M, P) open(F, M, P)
481 #define OS_CLOSE(FD) close(FD)
484 #define OS_SEEK(FD, O, F) lseek64(FD, (off64_t) (O), F)
485 #else /* !O_LARGEFILE */
486 #define OS_SEEK(FD, O, F) lseek(FD, (off_t) (O), F)
487 #endif /* !O_LARGEFILE */
489 #define OS_SYNC(FD) fsync(FD)
491 #define OS_TRUNC(FD, L) ftruncate64(FD, (off64_t) (L))
492 #else /* !O_LARGEFILE */
493 #define OS_TRUNC(FD, L) ftruncate(FD, (off_t) (L))
494 #endif /* !O_LARGEFILE */
495 #define OS_SIZE(FD) ih_size(FD)
496 extern afs_sfsize_t ih_size(int fd);
498 #ifdef AFS_LINUX22_ENV
499 #define IH_INC(H, I, P) -1
500 #define IH_DEC(H, I, P) -1
501 #define IH_IREAD(H, O, B, S) -1
502 #define IH_IWRITE(H, O, B, S) -1
504 #define IH_INC(H, I, P) IINC((H)->ih_dev, I, P)
505 #define IH_DEC(H, I, P) IDEC((H)->ih_dev, I, P)
506 #define IH_IREAD(H, O, B, S) inode_read((H)->ih_dev, (H)->ih_ino, (H)->ih_vid,\
508 #define IH_IWRITE(H, O, B, S) \
509 inode_write((H)->ih_dev, (H)->ih_ino, (H)->ih_vid, O, B, S)
510 #endif /* AFS_LINUX22_ENV */
513 #endif /* AFS_NAMEI_ENV */
516 #define FDH_READV(H, I, N) readv((H)->fd_fd, I, N)
517 #define FDH_WRITEV(H, I, N) writev((H)->fd_fd, I, N)
521 #define FDH_PREADV(H, I, N, O) preadv((H)->fd_fd, I, N, O)
522 #define FDH_PWRITEV(H, I, N, O) pwritev((H)->fd_fd, I, N, O)
525 #define FDH_PREAD(H, B, S, O) OS_PREAD((H)->fd_fd, B, S, O)
526 #define FDH_PWRITE(H, B, S, O) OS_PWRITE((H)->fd_fd, B, S, O)
527 #define FDH_READ(H, B, S) OS_READ((H)->fd_fd, B, S)
528 #define FDH_WRITE(H, B, S) OS_WRITE((H)->fd_fd, B, S)
529 #define FDH_SEEK(H, O, F) OS_SEEK((H)->fd_fd, O, F)
531 #define FDH_SYNC(H) ((H->fd_ih!=NULL) ? ( H->fd_ih->ih_synced = 1) - 1 : 1)
532 #define FDH_TRUNC(H, L) OS_TRUNC((H)->fd_fd, L)
533 #define FDH_SIZE(H) OS_SIZE((H)->fd_fd)
535 #endif /* _IHANDLE_H_ */