2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
14 * afs_StoreOnLastReference
19 #include <afsconfig.h>
20 #include "afs/param.h"
23 #include "afs/sysincludes.h" /* Standard vendor system headers */
24 #include "afsincludes.h" /* Afs-based standard headers */
25 #include "afs/afs_stats.h" /* statistics */
26 #include "afs/afs_cbqueue.h"
27 #include "afs/nfsclient.h"
28 #include "afs/afs_osidnlc.h"
31 extern unsigned char *afs_indexFlags;
33 /* Called by all write-on-close routines: regular afs_close,
34 * store via background daemon and store via the
35 * afs_FlushActiveVCaches routine (when CCORE is on).
36 * avc->lock must be write-locked.
39 afs_StoreOnLastReference(struct vcache *avc,
40 struct vrequest *treq)
44 AFS_STATCNT(afs_StoreOnLastReference);
45 /* if CCore flag is set, we clear it and do the extra decrement
46 * ourselves now. If we're called by the CCore clearer, the CCore
47 * flag will already be clear, so we don't have to worry about
48 * clearing it twice. */
49 if (avc->f.states & CCore) {
52 avc->f.states &= ~CCore;
53 #if defined(AFS_SGI_ENV)
54 osi_Assert(avc->opens > 0 && avc->execsOrWriters > 0);
56 /* WARNING: Our linux cm code treats the execsOrWriters counter differently
57 * depending on the flags the file was opened with. So, if you make any
58 * changes to the way the execsOrWriters flag is handled check with the
61 avc->execsOrWriters--;
62 AFS_RELE(AFSTOV(avc)); /* VN_HOLD at set CCore(afs_FakeClose) */
63 cred = (afs_ucred_t *)avc->linkData; /* "crheld" in afs_FakeClose */
68 if (!AFS_IS_DISCONNECTED) {
71 /* Now, send the file back. Used to require 0 writers left, but now do
72 * it on every close for write, since two closes in a row are harmless
73 * since first will clean all chunks, and second will be noop. Note that
74 * this will also save confusion when someone keeps a file open
75 * inadvertently, since with old system, writes to the server would never
78 code = afs_StoreAllSegments(avc, treq, AFS_LASTSTORE /*!sync-to-disk */ );
80 * We have to do these after the above store in done: in some systems
81 * like aix they'll need to flush all the vm dirty pages to the disk via
82 * the strategy routine. During that all procedure (done under no avc
83 * locks) opens, refcounts would be zero, since it didn't reach the
84 * afs_{rd,wr} routines which means the vcache is a perfect candidate
87 } else if (AFS_IS_DISCON_RW) {
88 afs_DisconAddDirty(avc, VDisconWriteClose, 0);
89 } /* if not disconnected */
91 #if defined(AFS_SGI_ENV)
92 osi_Assert(avc->opens > 0 && avc->execsOrWriters > 0);
96 avc->execsOrWriters--;
101 afs_UFSWriteUIO(struct vcache *avc, afs_dcache_id_t *inode, struct uio *tuiop)
103 struct osi_file *tfile;
106 tfile = (struct osi_file *)osi_UFSOpen(inode);
110 #if defined(AFS_AIX41_ENV)
112 code = VNOP_RDWR(tfile->vnode, UIO_WRITE, FWRITE, tuiop, NULL, NULL,
113 NULL, afs_osi_credp);
115 #elif defined(AFS_AIX32_ENV)
116 code = VNOP_RDWR(tfile->vnode, UIO_WRITE, FWRITE, tuiop, NULL, NULL);
117 #elif defined(AFS_AIX_ENV)
118 code = VNOP_RDWR(tfile->vnode, UIO_WRITE, FWRITE, (off_t) &offset,
119 tuiop, NULL, NULL, -1);
120 #elif defined(AFS_SUN5_ENV)
122 # ifdef AFS_SUN510_ENV
123 VOP_RWLOCK(tfile->vnode, 1, NULL);
124 code = VOP_WRITE(tfile->vnode, tuiop, 0, afs_osi_credp, NULL);
125 VOP_RWUNLOCK(tfile->vnode, 1, NULL);
127 VOP_RWLOCK(tfile->vnode, 1);
128 code = VOP_WRITE(tfile->vnode, tuiop, 0, afs_osi_credp);
129 VOP_RWUNLOCK(tfile->vnode, 1);
134 #elif defined(AFS_SGI_ENV)
136 avc->f.states |= CWritingUFS;
137 AFS_VOP_RWLOCK(tfile->vnode, VRWLOCK_WRITE);
138 AFS_VOP_WRITE(tfile->vnode, tuiop, IO_ISLOCKED, afs_osi_credp, code);
139 AFS_VOP_RWUNLOCK(tfile->vnode, VRWLOCK_WRITE);
140 avc->f.states &= ~CWritingUFS;
142 #elif defined(AFS_HPUX100_ENV)
145 code = VOP_RDWR(tfile->vnode, tuiop, UIO_WRITE, 0, afs_osi_credp);
148 #elif defined(AFS_LINUX_ENV)
150 code = osi_rdwr(tfile, tuiop, UIO_WRITE);
152 #elif defined(AFS_DARWIN80_ENV)
154 code = VNOP_WRITE(tfile->vnode, tuiop, 0, afs_osi_ctxtp);
156 #elif defined(AFS_DARWIN_ENV)
158 VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, current_proc());
159 code = VOP_WRITE(tfile->vnode, tuiop, 0, afs_osi_credp);
160 VOP_UNLOCK(tfile->vnode, 0, current_proc());
162 #elif defined(AFS_FBSD_ENV)
164 VOP_LOCK(tfile->vnode, LK_EXCLUSIVE);
165 code = VOP_WRITE(tfile->vnode, tuiop, 0, afs_osi_credp);
166 VOP_UNLOCK(tfile->vnode, 0);
168 #elif defined(AFS_NBSD_ENV)
170 VOP_LOCK(tfile->vnode, LK_EXCLUSIVE);
171 code = VOP_WRITE(tfile->vnode, tuiop, 0, afs_osi_credp);
172 #if defined(AFS_NBSD60_ENV)
173 VOP_UNLOCK(tfile->vnode);
175 VOP_UNLOCK(tfile->vnode, 0);
178 #elif defined(AFS_XBSD_ENV)
180 VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curproc);
181 code = VOP_WRITE(tfile->vnode, tuiop, 0, afs_osi_credp);
182 VOP_UNLOCK(tfile->vnode, 0, curproc);
186 tuio.uio_fpflags &= ~FSYNCIO; /* don't do sync io */
188 code = VOP_RDWR(tfile->vnode, tuiop, UIO_WRITE, 0, afs_osi_credp);
195 /* called on writes */
197 afs_write(struct vcache *avc, struct uio *auio, int aio,
198 afs_ucred_t *acred, int noLock)
200 afs_size_t totalLength;
202 afs_size_t offset, len;
212 #if defined(AFS_FBSD_ENV) || defined(AFS_DFBSD_ENV)
213 struct vnode *vp = AFSTOV(avc);
215 struct uio *tuiop = NULL;
217 struct vrequest *treq = NULL;
219 AFS_STATCNT(afs_write);
222 return avc->vc_error;
224 if (AFS_IS_DISCONNECTED && !AFS_IS_DISCON_RW)
227 startDate = osi_Time();
228 if ((code = afs_CreateReq(&treq, acred)))
230 /* otherwise we read */
231 totalLength = AFS_UIO_RESID(auio);
232 filePos = AFS_UIO_OFFSET(auio);
234 afs_Trace4(afs_iclSetp, CM_TRACE_WRITE, ICL_TYPE_POINTER, avc,
235 ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_OFFSET,
236 ICL_HANDLE_OFFSET(totalLength), ICL_TYPE_OFFSET,
237 ICL_HANDLE_OFFSET(avc->f.m.Length));
239 afs_MaybeWakeupTruncateDaemon();
240 ObtainWriteLock(&avc->lock, 556);
242 #if defined(AFS_SGI_ENV)
246 * afs_xwrite handles setting m.Length
247 * and handles APPEND mode.
248 * Since we are called via strategy, we need to trim the write to
249 * the actual size of the file
251 osi_Assert(filePos <= avc->f.m.Length);
252 diff = avc->f.m.Length - filePos;
253 AFS_UIO_SETRESID(auio, MIN(totalLength, diff));
254 totalLength = AFS_UIO_RESID(auio);
257 if (aio & IO_APPEND) {
258 /* append mode, start it at the right spot */
259 #if defined(AFS_SUN5_ENV)
260 auio->uio_loffset = 0;
262 filePos = avc->f.m.Length;
263 AFS_UIO_SETOFFSET(auio, avc->f.m.Length);
267 * Note that we use startDate rather than calling osi_Time() here.
268 * This is to avoid counting lock-waiting time in file date (for ranlib).
270 avc->f.m.Date = startDate;
272 #if defined(AFS_HPUX_ENV)
273 #if defined(AFS_HPUX101_ENV)
274 if ((totalLength + filePos) >> 9 >
275 p_rlimit(u.u_procp)[RLIMIT_FSIZE].rlim_cur) {
277 if ((totalLength + filePos) >> 9 > u.u_rlimit[RLIMIT_FSIZE].rlim_cur) {
280 ReleaseWriteLock(&avc->lock);
281 afs_DestroyReq(treq);
285 #if defined(AFS_VM_RDWR_ENV) && !defined(AFS_FAKEOPEN_ENV)
287 * If write is implemented via VM, afs_FakeOpen() is called from the
288 * high-level write op.
290 if (avc->execsOrWriters <= 0) {
291 afs_warn("WARNING: afs_ufswr vcp=%lx, exOrW=%d\n", (unsigned long)avc,
292 avc->execsOrWriters);
298 while (totalLength > 0) {
300 * Note that we must set CDirty for every iteration of this loop.
301 * CDirty may get cleared below (such as during afs_DoPartialStore),
302 * but we're still writing to the file, so make sure CDirty is set
305 avc->f.states |= CDirty;
307 tdc = afs_ObtainDCacheForWriting(avc, filePos, totalLength, treq,
313 len = totalLength; /* write this amount by default */
314 offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk);
315 max = AFS_CHUNKTOSIZE(tdc->f.chunk); /* max size of this chunk */
316 if (max <= len + offset) { /*if we'd go past the end of this chunk */
317 /* it won't all fit in this chunk, so write as much
325 tuiop = afsio_partialcopy(auio, trimlen);
326 AFS_UIO_SETOFFSET(tuiop, offset);
328 code = (*(afs_cacheType->vwriteUIO))(avc, &tdc->f.inode, tuiop);
334 ZapDCE(tdc); /* bad data */
335 cfile = afs_CFileOpen(&tdc->f.inode);
337 afs_CFileTruncate(cfile, 0);
338 afs_CFileClose(cfile);
339 afs_AdjustSize(tdc, 0); /* sets f.chunkSize to 0 */
341 afs_stats_cmperf.cacheCurrDirtyChunks--;
342 afs_indexFlags[tdc->index] &= ~IFDataMod; /* so it does disappear */
343 ReleaseWriteLock(&tdc->lock);
347 /* otherwise we've written some, fixup length, etc and continue with next seg */
348 len = len - AFS_UIO_RESID(tuiop); /* compute amount really transferred */
350 afsio_skip(auio, tlen); /* advance auio over data written */
351 /* compute new file size */
352 if (offset + len > tdc->f.chunkBytes) {
353 afs_int32 tlength = offset + len;
354 afs_AdjustSize(tdc, tlength);
355 if (tdc->validPos < filePos + len)
356 tdc->validPos = filePos + len;
360 #if defined(AFS_SGI_ENV)
361 /* afs_xwrite handles setting m.Length */
362 osi_Assert(filePos <= avc->f.m.Length);
364 if (filePos > avc->f.m.Length) {
365 if (AFS_IS_DISCON_RW)
366 afs_PopulateDCache(avc, filePos, treq);
367 afs_Trace4(afs_iclSetp, CM_TRACE_SETLENGTH, ICL_TYPE_STRING,
368 __FILE__, ICL_TYPE_LONG, __LINE__, ICL_TYPE_OFFSET,
369 ICL_HANDLE_OFFSET(avc->f.m.Length), ICL_TYPE_OFFSET,
370 ICL_HANDLE_OFFSET(filePos));
371 avc->f.m.Length = filePos;
372 #if defined(AFS_FBSD_ENV) || defined(AFS_DFBSD_ENV)
373 vnode_pager_setsize(vp, filePos);
377 ReleaseWriteLock(&tdc->lock);
379 #if !defined(AFS_VM_RDWR_ENV)
381 * If write is implemented via VM, afs_DoPartialWrite() is called from
382 * the high-level write op.
385 code = afs_DoPartialWrite(avc, treq);
393 #if !defined(AFS_VM_RDWR_ENV) || defined(AFS_FAKEOPEN_ENV)
394 afs_FakeClose(avc, acred);
396 error = afs_CheckCode(error, treq, 7);
397 /* This set is here so we get the CheckCode. */
398 if (error && !avc->vc_error)
399 avc->vc_error = error;
401 ReleaseWriteLock(&avc->lock);
405 #ifndef AFS_VM_RDWR_ENV
407 * If write is implemented via VM, afs_fsync() is called from the high-level
410 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
411 if (noLock && (aio & IO_SYNC)) {
414 /* On hpux on synchronous writes syncio will be set to IO_SYNC. If
415 * we're doing them because the file was opened with O_SYNCIO specified,
416 * we have to look in the u area. No single mechanism here!!
418 if (noLock && ((aio & IO_SYNC) | (auio->uio_fpflags & FSYNCIO))) {
420 if (noLock && (aio & FSYNC)) {
423 if (!AFS_NFSXLATORREQ(acred))
424 afs_fsync(avc, acred);
427 afs_DestroyReq(treq);
431 /* do partial write if we're low on unmodified chunks */
433 afs_DoPartialWrite(struct vcache *avc, struct vrequest *areq)
438 if (afs_stats_cmperf.cacheCurrDirtyChunks <=
439 afs_stats_cmperf.cacheMaxDirtyChunks
440 || AFS_IS_DISCONNECTED)
441 return 0; /* nothing to do */
442 /* otherwise, call afs_StoreDCache (later try to do this async, if possible) */
443 afs_Trace2(afs_iclSetp, CM_TRACE_PARTIALWRITE, ICL_TYPE_POINTER, avc,
444 ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length));
447 #if defined(AFS_SUN5_ENV)
448 sync |= AFS_VMSYNC_INVAL;
449 #elif defined(AFS_FBSD_ENV)
450 sync |= AFS_NOVMSYNC;
452 code = afs_StoreAllSegments(avc, areq, sync);
457 static int bkg_store_disabled = 1;
459 static int bkg_store_disabled = 0;
462 /* handle any closing cleanup stuff */
464 #if defined(AFS_SGI_ENV)
465 afs_close(OSI_VC_DECL(avc), afs_int32 aflags, lastclose_t lastclose,
467 #elif defined(AFS_SUN5_ENV)
468 afs_close(OSI_VC_DECL(avc), afs_int32 aflags, int count, offset_t offset,
471 afs_close(OSI_VC_DECL(avc), afs_int32 aflags, afs_ucred_t *acred)
475 afs_int32 code_checkcode = 0;
477 struct vrequest *treq = NULL;
481 struct afs_fakestat_state fakestat;
484 AFS_STATCNT(afs_close);
485 afs_Trace2(afs_iclSetp, CM_TRACE_CLOSE, ICL_TYPE_POINTER, avc,
486 ICL_TYPE_INT32, aflags);
487 code = afs_CreateReq(&treq, acred);
490 afs_InitFakeStat(&fakestat);
491 code = afs_EvalFakeStat(&avc, &fakestat, treq);
493 afs_PutFakeStat(&fakestat);
494 afs_DestroyReq(treq);
499 if (avc->flockCount) {
500 HandleFlock(avc, LOCK_UN, treq, 0, 1 /*onlymine */ );
503 #if defined(AFS_SGI_ENV)
505 afs_PutFakeStat(&fakestat);
507 afs_DestroyReq(treq);
510 /* unlock any locks for pid - could be wrong for child .. */
511 AFS_RWLOCK((vnode_t *) avc, VRWLOCK_WRITE);
512 get_current_flid(&flid);
513 cleanlocks((vnode_t *) avc, flid.fl_pid, flid.fl_sysid);
514 HandleFlock(avc, LOCK_UN, treq, flid.fl_pid, 1 /*onlymine */ );
515 /* afs_chkpgoob will drop and re-acquire the global lock. */
516 afs_chkpgoob(&avc->v, btoc(avc->f.m.Length));
517 #elif defined(AFS_SUN5_ENV)
519 /* The vfs layer may call this repeatedly with higher "count"; only
520 * on the last close (i.e. count = 1) we should actually proceed
522 afs_PutFakeStat(&fakestat);
524 afs_DestroyReq(treq);
528 if (avc->flockCount) { /* Release Lock */
529 HandleFlock(avc, LOCK_UN, treq, 0, 1 /*onlymine */ );
532 if (aflags & (FWRITE | FTRUNC)) {
533 if (bkg_store_disabled || afs_BBusy() || (AFS_NFSXLATORREQ(acred)) || AFS_IS_DISCONNECTED) {
534 /* do it yourself if daemons are all busy */
535 ObtainWriteLock(&avc->lock, 124);
536 code = afs_StoreOnLastReference(avc, treq);
537 ReleaseWriteLock(&avc->lock);
538 #if defined(AFS_SGI_ENV)
539 AFS_RWUNLOCK((vnode_t *) avc, VRWLOCK_WRITE);
542 #if defined(AFS_SGI_ENV)
543 AFS_RWUNLOCK((vnode_t *) avc, VRWLOCK_WRITE);
545 /* at least one daemon is idle, so ask it to do the store.
546 * Also, note that we don't lock it any more... */
547 tb = afs_BQueue(BOP_STORE, avc, 0, 1, acred,
548 (afs_size_t) afs_cr_uid(acred), (afs_size_t) 0,
549 (void *)0, (void *)0, (void *)0);
550 /* sleep waiting for the store to start, then retrieve error code */
551 while ((tb->flags & BUVALID) == 0) {
556 code_checkcode = tb->code_checkcode;
560 /* VNOVNODE is "acceptable" error code from close, since
561 * may happen when deleting a file on another machine while
562 * it is open here. */
563 if (code == VNOVNODE)
566 /* Ensure last closer gets the error. If another thread caused
567 * DoPartialWrite and this thread does not actually store the data,
568 * it may not see the quota error.
570 ObtainWriteLock(&avc->lock, 406);
573 osi_ReleaseVM(avc, acred);
575 /* We don't know what the original raw error code was, so set
576 * 'code' to 0. But we have the afs_CheckCode-translated error
577 * code, so put that in code_checkcode. We cannot just set code
578 * to avc->vc_error, since vc_error is a checkcode-translated
579 * error code, and 'code' is supposed to be a raw error code. */
581 code_checkcode = avc->vc_error;
584 ReleaseWriteLock(&avc->lock);
586 /* some codes merit specific complaint */
588 afs_warnuser("afs: failed to store file (network problems)\n");
591 else if (code == ENOSPC || code_checkcode == ENOSPC) {
593 ("afs: failed to store file (over quota or partition full)\n");
596 else if (code == ENOSPC || code_checkcode == ENOSPC) {
597 afs_warnuser("afs: failed to store file (partition full)\n");
598 } else if (code == EDQUOT || code_checkcode == EDQUOT) {
599 afs_warnuser("afs: failed to store file (over quota)\n");
602 else if (code || code_checkcode)
603 afs_warnuser("afs: failed to store file (%d/%d)\n", code, code_checkcode);
605 /* finally, we flush any text pages lying around here */
609 #if defined(AFS_SGI_ENV)
610 AFS_RWUNLOCK((vnode_t *) avc, VRWLOCK_WRITE);
611 osi_Assert(avc->opens > 0);
613 /* file open for read */
614 ObtainWriteLock(&avc->lock, 411);
617 osi_ReleaseVM(avc, acred);
620 code_checkcode = avc->vc_error;
623 #if defined(AFS_FBSD_ENV)
626 afs_int32 opens, is_free, is_gone, is_doomed, iflag;
627 struct vnode *vp = AFSTOV(avc);
629 is_doomed = vp->v_iflag & VI_DOOMED;
630 is_free = vp->v_iflag & VI_FREE;
631 is_gone = vp->v_iflag & VI_DOINGINACT;
635 afs_warn("afs_close avc %p vp %p opens %d free %d doinginact %d doomed %d iflag %d\n",
636 avc, vp, opens, is_free, is_gone, is_doomed, iflag);
640 ReleaseWriteLock(&avc->lock);
643 afs_PutFakeStat(&fakestat);
645 if (code_checkcode) {
646 code = code_checkcode;
648 code = afs_CheckCode(code, treq, 5);
650 afs_DestroyReq(treq);
656 #if defined(AFS_SGI_ENV) || defined(AFS_SUN5_ENV)
657 afs_fsync(OSI_VC_DECL(avc), int flag, afs_ucred_t *acred
659 , off_t start, off_t stop
660 # endif /* AFS_SGI_ENV */
662 #else /* !SUN5 && !SGI */
663 afs_fsync(OSI_VC_DECL(avc), afs_ucred_t *acred)
667 struct vrequest *treq = NULL;
671 return avc->vc_error;
673 #if defined(AFS_SUN5_ENV)
674 /* back out if called from NFS server */
675 if (curthread->t_flag & T_DONTPEND)
679 AFS_STATCNT(afs_fsync);
680 afs_Trace1(afs_iclSetp, CM_TRACE_FSYNC, ICL_TYPE_POINTER, avc);
681 if ((code = afs_CreateReq(&treq, acred)))
684 #if defined(AFS_SGI_ENV)
685 AFS_RWLOCK((vnode_t *) avc, VRWLOCK_WRITE);
686 if (flag & FSYNC_INVAL)
687 osi_VM_FSyncInval(avc);
688 #endif /* AFS_SGI_ENV */
690 ObtainSharedLock(&avc->lock, 18);
692 if (avc->execsOrWriters > 0) {
693 if (!AFS_IS_DISCONNECTED && !AFS_IS_DISCON_RW) {
694 /* Your average flush. */
696 /* put the file back */
697 UpgradeSToWLock(&avc->lock, 41);
698 code = afs_StoreAllSegments(avc, treq, AFS_SYNC);
699 ConvertWToSLock(&avc->lock);
701 UpgradeSToWLock(&avc->lock, 711);
702 afs_DisconAddDirty(avc, VDisconWriteFlush, 1);
703 ConvertWToSLock(&avc->lock);
704 } /* if not disconnected */
705 } /* if (avc->execsOrWriters > 0) */
707 #if defined(AFS_SGI_ENV)
708 AFS_RWUNLOCK((vnode_t *) avc, VRWLOCK_WRITE);
709 if (code == VNOVNODE) {
710 /* syncing an unlinked file! - non-informative to pass an errno
711 * 102 (== VNOVNODE) to user
717 code = afs_CheckCode(code, treq, 33);
718 afs_DestroyReq(treq);
719 ReleaseSharedLock(&avc->lock);