2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
11 * This package is used to actively manage the expiration of callbacks,
12 * so that the rest of the cache manager doesn't need to compute
13 * whether a callback has expired or not, but can tell with one simple
14 * check, that is, whether the CStatd bit is on or off.
16 * The base of the hash table moves periodically (every 128 seconds)
17 * QueueCallback rarely touches the first 3 slots in the hash table
18 * (only when called from CheckCallbacks) since MinTimeOut in
19 * viced/callback.c is currently 7 minutes.
20 * Therefore, CheckCallbacks should be able to run concurrently with
21 * QueueCallback, given the proper locking, of course.
24 * 1. CheckCallbacks and BumpBase never run simultaneously. This is because
25 * they are only called from afs_Daemon. Therefore, base and basetime will
26 * always be consistent during CheckCallbacks.
27 * 2. cbHashT [base] rarely (if ever) gets stuff queued in it. The only way
28 * that could happen is CheckCallbacks might fencepost and move something in
29 * place, or BumpBase might push some stuff up.
30 * 3. Hash chains aren't particularly sorted.
31 * 4. The file server keeps its callback state around for 3 minutes
32 * longer than it promises the cache manager in order to account for
33 * clock skew, network delay, and other bogeymen.
35 * For now I just use one large lock, which is fine on a uniprocessor,
36 * since it's not held during any RPCs or low-priority I/O operations.
37 * To make this code MP-fast, you need no more locks than processors,
38 * but probably more than one. In measurements on MP-safe implementations,
39 * I have never seen any contention over the xcbhash lock.
41 * Incompatible operations:
42 * Enqueue and "dequeue of first vcache" in same slot
43 * dequeue and "dequeue of preceding vcache" in same slot
44 * dequeue and "dequeue of successive vcache" in same slot
45 * BumpBase pushing a list and enqueue in the new base slot
46 * Two enqueues in same slot
49 * Certain invariants exist:
50 * 1 Callback expiration times granted by a file server will never
51 * decrease for a particular vnode UNLESS a CallBack RPC is invoked
52 * by the server in the interim.
53 * 2 A vcache will always expire no sooner than the slot in which it is
54 * currently enqueued. Callback times granted by the server may
55 * increase, in which case the vcache will be updated in-place. As a
56 * result, it may expire later than the slot in which it is enqueued.
57 * Not to worry, the CheckCallbacks code will move it if neccessary.
58 * This approach means that busy vnodes won't be continually moved
59 * around within the expiry queue: they are only moved when they
60 * finally advance to the lead bucket.
61 * 3 Anything which has a callback on it must be in the expiry
62 * queue. In AFS 3.3, that means everything but symlinks (which
63 * are immutable), including contents of Read-Only volumes
64 * (which have callbacks by virtue of the whole-volume callback)
66 * QueueCallback only checks that its vcache is in the list
67 * somewhere, counting on invariant #1 to guarantee that the vcache
68 * won't be in a slot later than QueueCallback would otherwise place
69 * it. Therefore, whenever we turn off the CStatd bit on the vcache, we
70 * *must* remove the vcache from the expiry queue. Otherwise, we
71 * might have missed a CallBack RPC, and a subsequent callback might be
72 * granted with a shorter expiration time.
74 #include <afsconfig.h>
75 #include "../afs/param.h"
79 #include "../afs/sysincludes.h" /*Standard vendor system headers*/
80 #include "../afs/afsincludes.h" /*AFS-based standard headers*/
81 #include "../afs/afs_cbqueue.h"
83 #include "../afs/lock.h"
84 #include "../afs/afs_stats.h"
86 static unsigned int base = 0;
87 static unsigned int basetime = 0;
88 static struct vcache *debugvc; /* used only for post-mortem debugging */
91 /* struct afs_lock lock; only if you want lots of locks... */
93 struct bucket cbHashT[CBHTSIZE];
94 struct afs_lock afs_xcbhash;
95 extern afs_int32 afs_cacheStats;
96 extern struct volume * afs_FindVolume();
97 extern unsigned int afs_paniconwarn;
101 * Takes a write-locked vcache pointer and a callback expiration time
102 * as returned by the file server (ie, in units of 128 seconds from "now").
104 * Uses the time as an index into a hash table, and inserts the vcache
105 * structure into the overflow chain.
107 * If the vcache is already on some hash chain, leave it there.
108 * CheckCallbacks will get to it eventually. In the meantime, it
109 * might get flushed, or it might already be on the right hash chain,
110 * so why bother messing with it now?
112 * NOTE: The caller must hold a write lock on afs_xcbhash
115 void afs_QueueCallback(avc, atime, avp)
122 if (avp && (avp->expireTime < avc->cbExpires))
123 avp->expireTime = avc->cbExpires;
124 if (!(avc->callsort.next)) {
125 atime = (atime + base) % CBHTSIZE;
126 QAdd(&(cbHashT[atime].head), &(avc->callsort));
130 } /* afs_QueueCallback */
132 /* afs_DequeueCallback
133 * Takes a write-locked vcache pointer and removes it from the callback
134 * hash table, without knowing beforehand which slot it was in.
136 * for now, just get a lock on everything when doing the dequeue, don't
137 * worry about getting a lock on the individual slot.
139 * the only other places that do anything like dequeues are CheckCallbacks
142 * NOTE: The caller must hold a write lock on afs_xcbhash
144 void afs_DequeueCallback(avc)
149 if (avc->callsort.prev) {
150 QRemove(&(avc->callsort));
151 avc->callsort.prev = avc->callsort.next = NULL;
153 else ; /* must have got dequeued in a race */
154 afs_symhint_inval(avc);
157 } /* afs_DequeueCallback */
159 /* afs_CheckCallbacks
160 * called periodically to determine which callbacks are likely to
161 * expire in the next n second interval. Preemptively marks them as
162 * expired. Rehashes items which are now in the wrong hash bucket.
163 * Preemptively renew recently-accessed items. Only removes things
164 * from the first and second bucket (as long as secs < 128), and
165 * inserts things into other, later buckets. either need to advance
166 * to the second bucket if secs spans two intervals, or else be
167 * certain to call afs_CheckCallbacks immediately after calling
168 * BumpBase (allows a little more slop but it's ok because file server
169 * keeps 3 minutes of slop time)
171 * There is a little race between CheckCallbacks and any code which
172 * updates cbExpires, always just prior to calling QueueCallback. We
173 * don't lock the vcache struct here (can't, or we'd risk deadlock),
174 * so GetVCache (for example) may update cbExpires before or after #1
175 * below. If before, CheckCallbacks moves this entry to its proper
176 * slot. If after, GetVCache blocks in the call to QueueCallbacks,
177 * this code dequeues the vcache, and then QueueCallbacks re-enqueues it.
179 * XXX to avoid the race, make QueueCallback take the "real" time
180 * and update cbExpires under the xcbhash lock.
182 * NB #1: There's a little optimization here: if I go to invalidate a
183 * RO vcache or volume, first check to see if the server is down. If
184 * it _is_, don't invalidate it, cuz we might just as well keep using
185 * it. Possibly, we could do the same thing for items in RW volumes,
186 * but that bears some drinking about.
188 * Don't really need to invalidate the hints, we could just wait to see if
189 * the dv has changed after a subsequent FetchStatus, but this is safer.
192 /* Sanity check on the callback queue. Allow for slop in the computation. */
194 extern afs_int32 afs_maxvcount;
195 #define CBQ_LIMIT (afs_maxvcount + 10)
197 #define CBQ_LIMIT (afs_cacheStats + afs_stats_cmperf.vcacheXAllocs + 10)
200 void afs_CheckCallbacks(secs)
204 register struct afs_q *tq;
210 ObtainWriteLock(&afs_xcbhash,85); /* pretty likely I'm going to remove something */
212 for(safety = 0, tq = cbHashT[base].head.prev;
213 (safety <= CBQ_LIMIT) && (tq != &(cbHashT[base].head));
218 if (tvc->cbExpires < now + secs) { /* race #1 here */
219 /* Get the volume, and if its callback expiration time is more than secs
220 * seconds into the future, update this vcache entry and requeue it below
222 if ((tvc->states & CRO) && (tvp=afs_FindVolume(&(tvc->fid), READ_LOCK))) {
223 if (tvp->expireTime > now + secs) {
224 tvc->cbExpires = tvp->expireTime; /* XXX race here */
228 for (i=0; i < MAXHOSTS && tvp->serverHost[i]; i++) {
229 if (!(tvp->serverHost[i]->flags & SRVR_ISDOWN)) {
230 /* What about locking xvcache or vrefcount++ or
231 * write locking tvc? */
233 tq->prev = tq->next = NULL;
234 tvc->states &= ~(CStatd | CMValid | CUnique);
235 if ((tvc->fid.Fid.Vnode & 1) || (vType(tvc) == VDIR))
236 osi_dnlc_purgedp(tvc);
237 tvc->quick.stamp = 0;
238 tvc->h1.dchint = NULL;/*invalidate em */
239 afs_ResetVolumeInfo(tvp);
244 afs_PutVolume(tvp, READ_LOCK);
247 /* Do I need to worry about things like execsorwriters?
248 * What about locking xvcache or vrefcount++ or write locking tvc?
251 tq->prev = tq->next = NULL;
252 tvc->states &= ~(CStatd | CMValid | CUnique);
253 if ((tvc->fid.Fid.Vnode & 1) || (vType(tvc) == VDIR))
254 osi_dnlc_purgedp(tvc);
258 if ((tvc->cbExpires > basetime) && CBHash(tvc->cbExpires - basetime)) {
259 /* it's been renewed on us. Have to be careful not to put it back
260 * into this slot, or we may never get out of here.
263 slot = (CBHash(tvc->cbExpires - basetime)+base)%CBHTSIZE ;
266 QRemove(&(tvc->callsort));
267 QAdd( &(cbHashT[slot].head), &(tvc->callsort) );
268 /* XXX remember to update volume expiration time */
269 /* -- not needed for correctness, though */
274 if (safety > CBQ_LIMIT) {
275 afs_stats_cmperf.cbloops++;
277 osi_Panic ("CheckCallbacks");
279 afs_warn("AFS Internal Error (minor): please contact AFS Product Support.\n");
280 ReleaseWriteLock(&afs_xcbhash);
284 else ReleaseWriteLock(&afs_xcbhash);
287 /* XXX future optimization:
288 if this item has been recently accessed, queue up a stat for it.
292 if ((adc = tvc->quick.dc) && (adc->stamp == tvc->quick.stamp)
293 && (afs_indexTimes[adc->index] > afs_indexCounter - 20)) {
294 queue up the stat request
300 } /* afs_CheckCallback */
303 * to be used only in dire circumstances, this drops all callbacks on
304 * the floor, without giving them back to the server. It's ok, the server can
305 * deal with it, but it is a little bit rude.
310 register struct vcache *tvc;
312 ObtainWriteLock(&afs_xcbhash,86); /* pretty likely I'm going to remove something */
314 for (i = 0; i < VCSIZE; i++) /* reset all the vnodes */
315 for (tvc = afs_vhashT[i]; tvc; tvc = tvc->hnext) {
317 tvc->quick.stamp = 0;
318 tvc->h1.dchint = NULL; /* invalidate hints */
319 tvc->states &= ~(CStatd);
320 if ((tvc->fid.Fid.Vnode & 1) || (vType(tvc) == VDIR))
321 osi_dnlc_purgedp(tvc);
322 tvc->callsort.prev = tvc->callsort.next = NULL;
327 ReleaseWriteLock(&afs_xcbhash);
330 /* afs_FlushServerCBs
331 * to be used only in dire circumstances, this drops all callbacks on
332 * the floor for a specific server, without giving them back to the server.
333 * It's ok, the server can deal with it, but it is a little bit rude.
335 void afs_FlushServerCBs(srvp)
339 register struct vcache *tvc;
341 ObtainWriteLock(&afs_xcbhash,86); /* pretty likely I'm going to remove something */
343 for (i=0; i<VCSIZE; i++) { /* reset all the vnodes */
344 for (tvc=afs_vhashT[i]; tvc; tvc=tvc->hnext) {
345 if (tvc->callback == srvp) {
347 tvc->quick.stamp = 0;
348 tvc->h1.dchint = NULL; /* invalidate hints */
349 tvc->states &= ~(CStatd);
350 if ((tvc->fid.Fid.Vnode & 1) || (vType(tvc) == VDIR)) {
351 osi_dnlc_purgedp(tvc);
353 afs_DequeueCallback(tvc);
358 ReleaseWriteLock(&afs_xcbhash);
362 * called to initialize static and global variables associated with
363 * the Callback expiration management mechanism.
365 void afs_InitCBQueue(doLockInit)
370 bzero((char *)cbHashT, CBHTSIZE*sizeof(struct bucket));
371 for (i=0;i<CBHTSIZE;i++) {
372 QInit(&(cbHashT[i].head));
373 /* Lock_Init(&(cbHashT[i].lock)); only if you want lots of locks, which
374 * don't seem too useful at present. */
377 basetime = osi_Time();
379 Lock_Init(&afs_xcbhash);
382 /* Because there are no real-time guarantees, and especially because a
383 * thread may wait on a lock indefinitely, this routine has to be
384 * careful that it doesn't get permanently out-of-date. Important
385 * assumption: this routine is only called from afs_Daemon, so there
386 * can't be more than one instance of this running at any one time.
387 * Presumes that basetime is never 0, and is always sane.
389 * Before calling this routine, be sure that the first slot is pretty
390 * empty. This -20 is because the granularity of the checks in
391 * afs_Daemon is pretty large, so I'd rather err on the side of safety
392 * sometimes. The fact that I only bump basetime by CBHTSLOTLEN-1
393 * instead of the whole CBHTSLOTLEN is also for "safety".
394 * Conceptually, it makes this clock run just a little faster than the
395 * clock governing which slot a callback gets hashed into. Both of these
396 * things make CheckCallbacks work a little harder than it would have to
397 * if I wanted to cut things finer.
398 * Everything from the old first slot is carried over into the new first
399 * slot. Thus, if there were some things that ought to have been invalidated,
400 * but weren't (say, if the server was down), they will be examined at every
401 * opportunity thereafter.
407 struct vcache *tvca, *tvcb;
411 ObtainWriteLock(&afs_xcbhash,87);
414 while ( basetime + (CBHTSLOTLEN-20) <= now ) {
416 basetime += CBHTSLOTLEN-1;
417 base = (base+1) % CBHTSIZE;
419 if (!QEmpty(&(cbHashT[oldbase].head))) {
420 QCat(&(cbHashT[oldbase].head), &(cbHashT[base].head));
423 ReleaseWriteLock(&afs_xcbhash);