2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
14 * afs_StoreOnLastReference
19 #include <afsconfig.h>
20 #include "afs/param.h"
23 #include "afs/sysincludes.h" /* Standard vendor system headers */
24 #include "afsincludes.h" /* Afs-based standard headers */
25 #include "afs/afs_stats.h" /* statistics */
26 #include "afs/afs_cbqueue.h"
27 #include "afs/nfsclient.h"
28 #include "afs/afs_osidnlc.h"
31 extern unsigned char *afs_indexFlags;
33 /* Called by all write-on-close routines: regular afs_close,
34 * store via background daemon and store via the
35 * afs_FlushActiveVCaches routine (when CCORE is on).
36 * avc->lock must be write-locked.
39 afs_StoreOnLastReference(struct vcache *avc,
40 struct vrequest *treq)
44 AFS_STATCNT(afs_StoreOnLastReference);
45 /* if CCore flag is set, we clear it and do the extra decrement
46 * ourselves now. If we're called by the CCore clearer, the CCore
47 * flag will already be clear, so we don't have to worry about
48 * clearing it twice. */
49 if (avc->f.states & CCore) {
50 avc->f.states &= ~CCore;
51 #if defined(AFS_SGI_ENV)
52 osi_Assert(avc->opens > 0 && avc->execsOrWriters > 0);
54 /* WARNING: Our linux cm code treats the execsOrWriters counter differently
55 * depending on the flags the file was opened with. So, if you make any
56 * changes to the way the execsOrWriters flag is handled check with the
59 avc->execsOrWriters--;
60 AFS_RELE(AFSTOV(avc)); /* VN_HOLD at set CCore(afs_FakeClose) */
61 crfree((afs_ucred_t *)avc->linkData); /* "crheld" in afs_FakeClose */
65 if (!AFS_IS_DISCONNECTED) {
68 /* Now, send the file back. Used to require 0 writers left, but now do
69 * it on every close for write, since two closes in a row are harmless
70 * since first will clean all chunks, and second will be noop. Note that
71 * this will also save confusion when someone keeps a file open
72 * inadvertently, since with old system, writes to the server would never
75 code = afs_StoreAllSegments(avc, treq, AFS_LASTSTORE /*!sync-to-disk */ );
77 * We have to do these after the above store in done: in some systems
78 * like aix they'll need to flush all the vm dirty pages to the disk via
79 * the strategy routine. During that all procedure (done under no avc
80 * locks) opens, refcounts would be zero, since it didn't reach the
81 * afs_{rd,wr} routines which means the vcache is a perfect candidate
84 } else if (AFS_IS_DISCON_RW) {
85 afs_DisconAddDirty(avc, VDisconWriteClose, 0);
86 } /* if not disconnected */
88 #if defined(AFS_SGI_ENV)
89 osi_Assert(avc->opens > 0 && avc->execsOrWriters > 0);
93 avc->execsOrWriters--;
98 afs_UFSWriteUIO(struct vcache *avc, afs_dcache_id_t *inode, struct uio *tuiop)
100 struct osi_file *tfile;
103 tfile = (struct osi_file *)osi_UFSOpen(inode);
104 #if defined(AFS_AIX41_ENV)
106 code = VNOP_RDWR(tfile->vnode, UIO_WRITE, FWRITE, tuiop, NULL, NULL,
107 NULL, afs_osi_credp);
109 #elif defined(AFS_AIX32_ENV)
110 code = VNOP_RDWR(tfile->vnode, UIO_WRITE, FWRITE, tuiop, NULL, NULL);
111 #elif defined(AFS_AIX_ENV)
112 code = VNOP_RDWR(tfile->vnode, UIO_WRITE, FWRITE, (off_t) &offset,
113 tuiop, NULL, NULL, -1);
114 #elif defined(AFS_SUN5_ENV)
116 # ifdef AFS_SUN510_ENV
120 VOP_RWLOCK(tfile->vnode, 1, &ct);
121 code = VOP_WRITE(tfile->vnode, tuiop, 0, afs_osi_credp, &ct);
122 VOP_RWUNLOCK(tfile->vnode, 1, &ct);
125 VOP_RWLOCK(tfile->vnode, 1);
126 code = VOP_WRITE(tfile->vnode, tuiop, 0, afs_osi_credp);
127 VOP_RWUNLOCK(tfile->vnode, 1);
132 ("\n\n\n*** Cache partition is full - decrease cachesize!!! ***\n\n\n");
133 #elif defined(AFS_SGI_ENV)
135 avc->f.states |= CWritingUFS;
136 AFS_VOP_RWLOCK(tfile->vnode, VRWLOCK_WRITE);
137 AFS_VOP_WRITE(tfile->vnode, tuiop, IO_ISLOCKED, afs_osi_credp, code);
138 AFS_VOP_RWUNLOCK(tfile->vnode, VRWLOCK_WRITE);
139 avc->f.states &= ~CWritingUFS;
141 #elif defined(AFS_HPUX100_ENV)
144 code = VOP_RDWR(tfile->vnode, tuiop, UIO_WRITE, 0, afs_osi_credp);
147 #elif defined(AFS_LINUX20_ENV)
149 code = osi_rdwr(tfile, tuiop, UIO_WRITE);
151 #elif defined(AFS_DARWIN80_ENV)
153 code = VNOP_WRITE(tfile->vnode, tuiop, 0, afs_osi_ctxtp);
155 #elif defined(AFS_DARWIN_ENV)
157 VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, current_proc());
158 code = VOP_WRITE(tfile->vnode, tuiop, 0, afs_osi_credp);
159 VOP_UNLOCK(tfile->vnode, 0, current_proc());
161 #elif defined(AFS_FBSD80_ENV)
163 VOP_LOCK(tfile->vnode, LK_EXCLUSIVE);
164 code = VOP_WRITE(tfile->vnode, tuiop, 0, afs_osi_credp);
165 VOP_UNLOCK(tfile->vnode, 0);
167 #elif defined(AFS_FBSD_ENV)
169 VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curthread);
170 code = VOP_WRITE(tfile->vnode, tuiop, 0, afs_osi_credp);
171 VOP_UNLOCK(tfile->vnode, 0, curthread);
173 #elif defined(AFS_NBSD_ENV)
175 VOP_LOCK(tfile->vnode, LK_EXCLUSIVE);
176 code = VOP_WRITE(tfile->vnode, tuiop, 0, afs_osi_credp);
177 #if defined(AFS_NBSD60_ENV)
178 VOP_UNLOCK(tfile->vnode);
180 VOP_UNLOCK(tfile->vnode, 0);
183 #elif defined(AFS_XBSD_ENV)
185 VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curproc);
186 code = VOP_WRITE(tfile->vnode, tuiop, 0, afs_osi_credp);
187 VOP_UNLOCK(tfile->vnode, 0, curproc);
191 tuio.uio_fpflags &= ~FSYNCIO; /* don't do sync io */
193 code = VOP_RDWR(tfile->vnode, tuiop, UIO_WRITE, 0, afs_osi_credp);
200 /* called on writes */
202 afs_write(struct vcache *avc, struct uio *auio, int aio,
203 afs_ucred_t *acred, int noLock)
205 afs_size_t totalLength;
206 afs_size_t transferLength;
208 afs_size_t offset, len;
218 #if defined(AFS_FBSD_ENV) || defined(AFS_DFBSD_ENV)
219 struct vnode *vp = AFSTOV(avc);
221 struct uio *tuiop = NULL;
223 struct vrequest treq;
225 AFS_STATCNT(afs_write);
228 return avc->vc_error;
230 if (AFS_IS_DISCONNECTED && !AFS_IS_DISCON_RW)
233 startDate = osi_Time();
234 if ((code = afs_InitReq(&treq, acred)))
236 /* otherwise we read */
237 totalLength = AFS_UIO_RESID(auio);
238 filePos = AFS_UIO_OFFSET(auio);
241 afs_Trace4(afs_iclSetp, CM_TRACE_WRITE, ICL_TYPE_POINTER, avc,
242 ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_OFFSET,
243 ICL_HANDLE_OFFSET(totalLength), ICL_TYPE_OFFSET,
244 ICL_HANDLE_OFFSET(avc->f.m.Length));
246 afs_MaybeWakeupTruncateDaemon();
247 ObtainWriteLock(&avc->lock, 556);
249 #if defined(AFS_SGI_ENV)
253 * afs_xwrite handles setting m.Length
254 * and handles APPEND mode.
255 * Since we are called via strategy, we need to trim the write to
256 * the actual size of the file
258 osi_Assert(filePos <= avc->f.m.Length);
259 diff = avc->f.m.Length - filePos;
260 AFS_UIO_SETRESID(auio, MIN(totalLength, diff));
261 totalLength = AFS_UIO_RESID(auio);
264 if (aio & IO_APPEND) {
265 /* append mode, start it at the right spot */
266 #if defined(AFS_SUN56_ENV)
267 auio->uio_loffset = 0;
269 filePos = avc->f.m.Length;
270 AFS_UIO_SETOFFSET(auio, avc->f.m.Length);
274 * Note that we use startDate rather than calling osi_Time() here.
275 * This is to avoid counting lock-waiting time in file date (for ranlib).
277 avc->f.m.Date = startDate;
279 #if defined(AFS_HPUX_ENV)
280 #if defined(AFS_HPUX101_ENV)
281 if ((totalLength + filePos) >> 9 >
282 p_rlimit(u.u_procp)[RLIMIT_FSIZE].rlim_cur) {
284 if ((totalLength + filePos) >> 9 > u.u_rlimit[RLIMIT_FSIZE].rlim_cur) {
287 ReleaseWriteLock(&avc->lock);
291 #if defined(AFS_VM_RDWR_ENV) && !defined(AFS_FAKEOPEN_ENV)
293 * If write is implemented via VM, afs_FakeOpen() is called from the
294 * high-level write op.
296 if (avc->execsOrWriters <= 0) {
297 afs_warn("WARNING: afs_ufswr vcp=%lx, exOrW=%d\n", (unsigned long)avc,
298 avc->execsOrWriters);
303 avc->f.states |= CDirty;
305 while (totalLength > 0) {
306 tdc = afs_ObtainDCacheForWriting(avc, filePos, totalLength, &treq,
312 len = totalLength; /* write this amount by default */
313 offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk);
314 max = AFS_CHUNKTOSIZE(tdc->f.chunk); /* max size of this chunk */
315 if (max <= len + offset) { /*if we'd go past the end of this chunk */
316 /* it won't all fit in this chunk, so write as much
324 tuiop = afsio_partialcopy(auio, trimlen);
325 AFS_UIO_SETOFFSET(tuiop, offset);
327 code = (*(afs_cacheType->vwriteUIO))(avc, &tdc->f.inode, tuiop);
333 ZapDCE(tdc); /* bad data */
334 cfile = afs_CFileOpen(&tdc->f.inode);
335 afs_CFileTruncate(cfile, 0);
336 afs_CFileClose(cfile);
337 afs_AdjustSize(tdc, 0); /* sets f.chunkSize to 0 */
339 afs_stats_cmperf.cacheCurrDirtyChunks--;
340 afs_indexFlags[tdc->index] &= ~IFDataMod; /* so it does disappear */
341 ReleaseWriteLock(&tdc->lock);
345 /* otherwise we've written some, fixup length, etc and continue with next seg */
346 len = len - AFS_UIO_RESID(tuiop); /* compute amount really transferred */
348 afsio_skip(auio, tlen); /* advance auio over data written */
349 /* compute new file size */
350 if (offset + len > tdc->f.chunkBytes) {
351 afs_int32 tlength = offset + len;
352 afs_AdjustSize(tdc, tlength);
353 if (tdc->validPos < filePos + len)
354 tdc->validPos = filePos + len;
357 transferLength += len;
359 #if defined(AFS_SGI_ENV)
360 /* afs_xwrite handles setting m.Length */
361 osi_Assert(filePos <= avc->f.m.Length);
363 if (filePos > avc->f.m.Length) {
364 if (AFS_IS_DISCON_RW)
365 afs_PopulateDCache(avc, filePos, &treq);
366 afs_Trace4(afs_iclSetp, CM_TRACE_SETLENGTH, ICL_TYPE_STRING,
367 __FILE__, ICL_TYPE_LONG, __LINE__, ICL_TYPE_OFFSET,
368 ICL_HANDLE_OFFSET(avc->f.m.Length), ICL_TYPE_OFFSET,
369 ICL_HANDLE_OFFSET(filePos));
370 avc->f.m.Length = filePos;
371 #if defined(AFS_FBSD_ENV) || defined(AFS_DFBSD_ENV)
372 vnode_pager_setsize(vp, filePos);
376 ReleaseWriteLock(&tdc->lock);
378 #if !defined(AFS_VM_RDWR_ENV)
380 * If write is implemented via VM, afs_DoPartialWrite() is called from
381 * the high-level write op.
384 code = afs_DoPartialWrite(avc, &treq);
392 #if !defined(AFS_VM_RDWR_ENV) || defined(AFS_FAKEOPEN_ENV)
393 afs_FakeClose(avc, acred);
395 error = afs_CheckCode(error, &treq, 7);
396 /* This set is here so we get the CheckCode. */
397 if (error && !avc->vc_error)
398 avc->vc_error = error;
400 ReleaseWriteLock(&avc->lock);
403 #ifndef AFS_VM_RDWR_ENV
405 * If write is implemented via VM, afs_fsync() is called from the high-level
408 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
409 if (noLock && (aio & IO_SYNC)) {
412 /* On hpux on synchronous writes syncio will be set to IO_SYNC. If
413 * we're doing them because the file was opened with O_SYNCIO specified,
414 * we have to look in the u area. No single mechanism here!!
416 if (noLock && ((aio & IO_SYNC) | (auio->uio_fpflags & FSYNCIO))) {
418 if (noLock && (aio & FSYNC)) {
421 if (!AFS_NFSXLATORREQ(acred))
422 afs_fsync(avc, acred);
428 /* do partial write if we're low on unmodified chunks */
430 afs_DoPartialWrite(struct vcache *avc, struct vrequest *areq)
434 if (afs_stats_cmperf.cacheCurrDirtyChunks <=
435 afs_stats_cmperf.cacheMaxDirtyChunks
436 || AFS_IS_DISCONNECTED)
437 return 0; /* nothing to do */
438 /* otherwise, call afs_StoreDCache (later try to do this async, if possible) */
439 afs_Trace2(afs_iclSetp, CM_TRACE_PARTIALWRITE, ICL_TYPE_POINTER, avc,
440 ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length));
442 #if defined(AFS_SUN5_ENV)
443 code = afs_StoreAllSegments(avc, areq, AFS_ASYNC | AFS_VMSYNC_INVAL);
445 code = afs_StoreAllSegments(avc, areq, AFS_ASYNC);
450 /* handle any closing cleanup stuff */
452 #if defined(AFS_SGI65_ENV)
453 afs_close(OSI_VC_DECL(avc), afs_int32 aflags, lastclose_t lastclose,
455 #elif defined(AFS_SGI64_ENV)
456 afs_close(OSI_VC_DECL(avc), afs_int32 aflags, lastclose_t lastclose,
457 off_t offset, afs_ucred_t *acred, struct flid *flp)
458 #elif defined(AFS_SGI_ENV)
459 afs_close(OSI_VC_DECL(avc), afs_int32 aflags, lastclose_t lastclose
460 off_t offset, afs_ucred_t *acred)
461 #elif defined(AFS_SUN5_ENV)
462 afs_close(OSI_VC_DECL(avc), afs_int32 aflags, int count, offset_t offset,
465 afs_close(OSI_VC_DECL(avc), afs_int32 aflags, afs_ucred_t *acred)
470 struct vrequest treq;
474 struct afs_fakestat_state fakestat;
477 AFS_STATCNT(afs_close);
478 afs_Trace2(afs_iclSetp, CM_TRACE_CLOSE, ICL_TYPE_POINTER, avc,
479 ICL_TYPE_INT32, aflags);
480 code = afs_InitReq(&treq, acred);
483 afs_InitFakeStat(&fakestat);
484 code = afs_EvalFakeStat(&avc, &fakestat, &treq);
486 afs_PutFakeStat(&fakestat);
491 if (avc->flockCount) {
492 HandleFlock(avc, LOCK_UN, &treq, 0, 1 /*onlymine */ );
495 #if defined(AFS_SGI_ENV)
497 afs_PutFakeStat(&fakestat);
501 /* unlock any locks for pid - could be wrong for child .. */
502 AFS_RWLOCK((vnode_t *) avc, VRWLOCK_WRITE);
503 # ifdef AFS_SGI65_ENV
504 get_current_flid(&flid);
505 cleanlocks((vnode_t *) avc, flid.fl_pid, flid.fl_sysid);
506 HandleFlock(avc, LOCK_UN, &treq, flid.fl_pid, 1 /*onlymine */ );
508 # ifdef AFS_SGI64_ENV
509 cleanlocks((vnode_t *) avc, flp);
510 # else /* AFS_SGI64_ENV */
511 cleanlocks((vnode_t *) avc, u.u_procp->p_epid, u.u_procp->p_sysid);
512 # endif /* AFS_SGI64_ENV */
513 HandleFlock(avc, LOCK_UN, &treq, OSI_GET_CURRENT_PID(), 1 /*onlymine */ );
514 # endif /* AFS_SGI65_ENV */
515 /* afs_chkpgoob will drop and re-acquire the global lock. */
516 afs_chkpgoob(&avc->v, btoc(avc->f.m.Length));
517 #elif defined(AFS_SUN5_ENV)
519 /* The vfs layer may call this repeatedly with higher "count"; only
520 * on the last close (i.e. count = 1) we should actually proceed
522 afs_PutFakeStat(&fakestat);
527 if (avc->flockCount) { /* Release Lock */
528 HandleFlock(avc, LOCK_UN, &treq, 0, 1 /*onlymine */ );
531 if (aflags & (FWRITE | FTRUNC)) {
532 if (afs_BBusy() || (AFS_NFSXLATORREQ(acred)) || AFS_IS_DISCONNECTED) {
533 /* do it yourself if daemons are all busy */
534 ObtainWriteLock(&avc->lock, 124);
535 code = afs_StoreOnLastReference(avc, &treq);
536 ReleaseWriteLock(&avc->lock);
537 #if defined(AFS_SGI_ENV)
538 AFS_RWUNLOCK((vnode_t *) avc, VRWLOCK_WRITE);
541 #if defined(AFS_SGI_ENV)
542 AFS_RWUNLOCK((vnode_t *) avc, VRWLOCK_WRITE);
544 /* at least one daemon is idle, so ask it to do the store.
545 * Also, note that we don't lock it any more... */
546 tb = afs_BQueue(BOP_STORE, avc, 0, 1, acred,
547 (afs_size_t) afs_cr_uid(acred), (afs_size_t) 0,
548 (void *)0, (void *)0, (void *)0);
549 /* sleep waiting for the store to start, then retrieve error code */
550 while ((tb->flags & BUVALID) == 0) {
558 /* VNOVNODE is "acceptable" error code from close, since
559 * may happen when deleting a file on another machine while
560 * it is open here. We do the same for ENOENT since in afs_CheckCode we map VNOVNODE -> ENOENT */
561 if (code == VNOVNODE || code == ENOENT)
564 /* Ensure last closer gets the error. If another thread caused
565 * DoPartialWrite and this thread does not actually store the data,
566 * it may not see the quota error.
568 ObtainWriteLock(&avc->lock, 406);
571 osi_ReleaseVM(avc, acred);
573 /* printf("avc->vc_error=%d\n", avc->vc_error); */
574 code = avc->vc_error;
577 ReleaseWriteLock(&avc->lock);
579 /* some codes merit specific complaint */
581 afs_warnuser("afs: failed to store file (network problems)\n");
584 else if (code == ENOSPC) {
586 ("afs: failed to store file (over quota or partition full)\n");
589 else if (code == ENOSPC) {
590 afs_warnuser("afs: failed to store file (partition full)\n");
591 } else if (code == EDQUOT) {
592 afs_warnuser("afs: failed to store file (over quota)\n");
596 afs_warnuser("afs: failed to store file (%d)\n", code);
598 /* finally, we flush any text pages lying around here */
602 #if defined(AFS_SGI_ENV)
603 AFS_RWUNLOCK((vnode_t *) avc, VRWLOCK_WRITE);
604 osi_Assert(avc->opens > 0);
606 /* file open for read */
607 ObtainWriteLock(&avc->lock, 411);
610 osi_ReleaseVM(avc, acred);
612 code = avc->vc_error;
615 #if defined(AFS_FBSD80_ENV)
618 afs_int32 opens, is_free, is_gone, is_doomed, iflag;
619 struct vnode *vp = AFSTOV(avc);
621 is_doomed = vp->v_iflag & VI_DOOMED;
622 is_free = vp->v_iflag & VI_FREE;
623 is_gone = vp->v_iflag & VI_DOINGINACT;
627 afs_warn("afs_close avc %p vp %p opens %d free %d doinginact %d doomed %d iflag %d\n",
628 avc, vp, opens, is_free, is_gone, is_doomed, iflag);
632 ReleaseWriteLock(&avc->lock);
635 afs_PutFakeStat(&fakestat);
636 code = afs_CheckCode(code, &treq, 5);
642 #if defined(AFS_SGI_ENV) || defined(AFS_SUN53_ENV)
643 afs_fsync(OSI_VC_DECL(avc), int flag, afs_ucred_t *acred
644 # ifdef AFS_SGI65_ENV
645 , off_t start, off_t stop
646 # endif /* AFS_SGI65_ENV */
648 #else /* !SUN53 && !SGI */
649 afs_fsync(OSI_VC_DECL(avc), afs_ucred_t *acred)
653 struct vrequest treq;
657 return avc->vc_error;
659 #if defined(AFS_SUN5_ENV)
660 /* back out if called from NFS server */
661 if (curthread->t_flag & T_DONTPEND)
665 AFS_STATCNT(afs_fsync);
666 afs_Trace1(afs_iclSetp, CM_TRACE_FSYNC, ICL_TYPE_POINTER, avc);
667 if ((code = afs_InitReq(&treq, acred)))
670 #if defined(AFS_SGI_ENV)
671 AFS_RWLOCK((vnode_t *) avc, VRWLOCK_WRITE);
672 if (flag & FSYNC_INVAL)
673 osi_VM_FSyncInval(avc);
674 #endif /* AFS_SGI_ENV */
676 ObtainSharedLock(&avc->lock, 18);
678 if (avc->execsOrWriters > 0) {
679 if (!AFS_IS_DISCONNECTED && !AFS_IS_DISCON_RW) {
680 /* Your average flush. */
682 /* put the file back */
683 UpgradeSToWLock(&avc->lock, 41);
684 code = afs_StoreAllSegments(avc, &treq, AFS_SYNC);
685 ConvertWToSLock(&avc->lock);
687 UpgradeSToWLock(&avc->lock, 711);
688 afs_DisconAddDirty(avc, VDisconWriteFlush, 1);
689 ConvertWToSLock(&avc->lock);
690 } /* if not disconnected */
691 } /* if (avc->execsOrWriters > 0) */
693 #if defined(AFS_SGI_ENV)
694 AFS_RWUNLOCK((vnode_t *) avc, VRWLOCK_WRITE);
695 if (code == VNOVNODE) {
696 /* syncing an unlinked file! - non-informative to pass an errno
697 * 102 (== VNOVNODE) to user
703 code = afs_CheckCode(code, &treq, 33);
704 ReleaseSharedLock(&avc->lock);