2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 /* Copyright (C) 1994 Cazamar Systems, Inc. */
12 #include <afs/param.h>
21 /* Locking hierarchy for these critical sections:
23 * 1. lock osi_sleepFDCS
24 * 2. lock osi_critSec[i]
25 * 3. lock osi_sleepInfoAllocCS
28 /* file descriptor for iterating over sleeping threads */
29 osi_fdOps_t osi_sleepFDOps = {
36 * Thread-local storage for sleep Info structures
40 /* critical section serializing contents of all sleep FDs, so that
41 * concurrent GetInfo calls don't damage each other if applied
44 CRITICAL_SECTION osi_sleepFDCS;
46 /* critical regions used for SleepSched to guarantee atomicity.
47 * protects all sleep info structures while they're in the
50 static CRITICAL_SECTION osi_critSec[OSI_SLEEPHASHSIZE];
52 /* the sleep info structure hash table.
53 * all active entries are in here. In addition, deleted entries
54 * may be present, referenced by file descriptors from remote
55 * debuggers; these will have OSI_SLEEPINFO_DELETED set and
58 static osi_sleepInfo_t *osi_sleepers[OSI_SLEEPHASHSIZE];
60 /* allocate space for lock operations */
61 osi_lockOps_t *osi_lockOps[OSI_NLOCKTYPES];
63 /* some global statistics */
64 long osi_totalSleeps = 0;
66 /* critical section protecting sleepInfoFreeListp and all sleep entries in
69 CRITICAL_SECTION osi_sleepInfoAllocCS;
71 /* sleep entry free list */
72 osi_sleepInfo_t *osi_sleepInfoFreeListp;
75 unsigned long osi_bootTime;
77 /* count of free entries in free list, protected by osi_sleepInfoAllocCS */
78 long osi_sleepInfoCount=0;
80 /* count of # of allocates of sleep info structures */
81 long osi_sleepInfoAllocs = 0;
83 /* the sleep bucket lock must be held.
84 * Releases the reference count and frees the structure if the item has
87 void osi_ReleaseSleepInfo(osi_sleepInfo_t *ap)
89 if (--ap->refCount == 0 && (ap->states & OSI_SLEEPINFO_DELETED))
90 osi_FreeSleepInfo(ap);
93 /* must be called with sleep bucket locked.
94 * Frees the structure if it has a 0 reference count (and removes it
95 * from the hash bucket). Otherwise, we simply mark the item
96 * for deleting when the ref count hits zero.
98 void osi_FreeSleepInfo(osi_sleepInfo_t *ap)
102 if (ap->refCount > 0) {
103 TlsSetValue(osi_SleepSlot, NULL); /* don't reuse me */
104 ap->states |= OSI_SLEEPINFO_DELETED;
108 /* remove from hash if still there */
109 if (ap->states & OSI_SLEEPINFO_INHASH) {
110 ap->states &= ~OSI_SLEEPINFO_INHASH;
111 idx = osi_SLEEPHASH(ap->value);
112 osi_QRemove((osi_queue_t **) &osi_sleepers[idx], &ap->q);
115 if (ap->states & OSI_SLEEPINFO_DELETED) {
116 EnterCriticalSection(&osi_sleepInfoAllocCS);
117 ap->q.nextp = (osi_queue_t *) osi_sleepInfoFreeListp;
118 osi_sleepInfoFreeListp = ap;
119 osi_sleepInfoCount++;
120 LeaveCriticalSection(&osi_sleepInfoAllocCS);
124 /* allocate a new sleep structure from the free list */
125 osi_sleepInfo_t *osi_AllocSleepInfo()
129 EnterCriticalSection(&osi_sleepInfoAllocCS);
130 if (!(ap = osi_sleepInfoFreeListp)) {
131 ap = (osi_sleepInfo_t *) malloc(sizeof(osi_sleepInfo_t));
132 ap->sema = CreateSemaphore(NULL, 0, 65536, (char *) 0);
133 osi_sleepInfoAllocs++;
136 osi_sleepInfoFreeListp = (osi_sleepInfo_t *) ap->q.nextp;
137 osi_sleepInfoCount--;
139 ap->tid = GetCurrentThreadId();
140 ap->states = 0; /* not signalled yet */
141 LeaveCriticalSection(&osi_sleepInfoAllocCS);
146 int osi_Once(osi_once_t *argp)
150 while ((i=InterlockedExchange(&argp->atomic, 1)) != 0) {
154 if (argp->done == 0) {
159 /* otherwise we've already been initialized, so clear lock and return */
160 InterlockedExchange(&argp->atomic, 0);
164 void osi_EndOnce(osi_once_t *argp)
166 InterlockedExchange(&argp->atomic, 0);
169 int osi_TestOnce(osi_once_t *argp)
174 while ((i=InterlockedExchange(&argp->atomic, 1)) != 0) {
178 localDone = argp->done;
181 InterlockedExchange(&argp->atomic, 0);
183 return (localDone? 0 : 1);
186 /* Initialize the package, should be called while single-threaded.
187 * Can be safely called multiple times.
188 * Must be called before any osi package calls.
193 static osi_once_t once;
194 unsigned long remainder; /* for division output */
198 osi_hyper_t bootTime;
200 /* check to see if already initialized; if so, claim success */
201 if (!osi_Once(&once)) return;
203 /* setup boot time values */
204 GetSystemTime(&sysTime);
205 SystemTimeToFileTime(&sysTime, &fileTime);
207 /* change the base of the time so it won't be negative for a long time */
208 fileTime.dwHighDateTime -= 28000000;
210 bootTime.HighPart = fileTime.dwHighDateTime;
211 bootTime.LowPart = fileTime.dwLowDateTime;
212 /* now, bootTime is in 100 nanosecond units, and we'd really rather
213 * have it in 1 second units, units 10,000,000 times bigger.
216 bootTime = ExtendedLargeIntegerDivide(bootTime, 10000000, &remainder);
217 osi_bootTime = bootTime.LowPart;
219 /* initialize thread-local storage for sleep Info structures */
220 osi_SleepSlot = TlsAlloc();
225 /* initialize critical regions and semaphores */
226 for(i=0;i<OSI_SLEEPHASHSIZE; i++) {
227 InitializeCriticalSection(&osi_critSec[i]);
228 osi_sleepers[i] = (osi_sleepInfo_t *) NULL;
232 InitializeCriticalSection(&osi_sleepInfoAllocCS);
234 /* initialize cookie system */
235 InitializeCriticalSection(&osi_sleepFDCS);
237 /* register the FD type */
238 typep = osi_RegisterFDType("sleep", &osi_sleepFDOps, NULL);
240 /* add formatting info */
241 osi_AddFDFormatInfo(typep, OSI_DBRPC_REGIONINT, 0,
242 "Sleep address", OSI_DBRPC_HEX);
243 osi_AddFDFormatInfo(typep, OSI_DBRPC_REGIONINT, 1,
245 osi_AddFDFormatInfo(typep, OSI_DBRPC_REGIONINT, 2,
246 "States", OSI_DBRPC_HEX);
258 void osi_TWait(osi_turnstile_t *turnp, int waitFor, void *patchp, CRITICAL_SECTION *releasep)
263 sp = TlsGetValue(osi_SleepSlot);
265 sp = osi_AllocSleepInfo();
266 TlsSetValue(osi_SleepSlot, sp);
271 sp->waitFor = waitFor;
272 sp->value = (long) patchp;
273 osi_QAdd((osi_queue_t **) &turnp->firstp, &sp->q);
274 if (!turnp->lastp) turnp->lastp = sp;
275 LeaveCriticalSection(releasep);
277 /* now wait for the signal */
280 code = WaitForSingleObject(sp->sema,
281 /* timeout */ INFINITE);
283 /* if the reason for the wakeup was that we were signalled,
284 * break out, otherwise try again, since the semaphore count is
285 * decreased only when we get WAIT_OBJECT_0 back.
287 if (code == WAIT_OBJECT_0) break;
288 } /* while we're waiting */
290 /* we're the only one who should be looking at or changing this
291 * structure after it gets signalled. Sema sp->sema isn't signalled
292 * any longer after we're back from WaitForSingleObject, so we can
293 * free this element directly.
295 osi_assert(sp->states & OSI_SLEEPINFO_SIGNALLED);
297 osi_FreeSleepInfo(sp);
299 /* reobtain, since caller commonly needs it */
300 EnterCriticalSection(releasep);
303 /* must be called with a critical section held that guards the turnstile
304 * structure. We remove the sleepInfo structure from the queue so we don't
305 * wake the guy again, but we don't free it because we're still using the
306 * semaphore until the guy waiting wakes up.
308 void osi_TSignal(osi_turnstile_t *turnp)
312 if (!turnp->lastp) return;
315 turnp->lastp = (osi_sleepInfo_t *) osi_QPrev(&sp->q);
316 osi_QRemove((osi_queue_t **) &turnp->firstp, &sp->q);
317 sp->states |= OSI_SLEEPINFO_SIGNALLED;
318 ReleaseSemaphore(sp->sema, 1, (long *) 0);
321 /* like TSignal, only wake *everyone* */
322 void osi_TBroadcast(osi_turnstile_t *turnp)
326 while(sp = turnp->lastp) {
327 turnp->lastp = (osi_sleepInfo_t *) osi_QPrev(&sp->q);
328 osi_QRemove((osi_queue_t **) &turnp->firstp, &sp->q);
329 sp->states |= OSI_SLEEPINFO_SIGNALLED;
330 ReleaseSemaphore(sp->sema, 1, (long *) 0);
331 } /* while someone's still asleep */
334 /* special turnstile signal for mutexes and locks. Wakes up only those who
335 * will really be able to lock the lock. The assumption is that everyone who
336 * already can use the lock has already been woken (and is thus not in the
337 * turnstile any longer).
339 * The stillHaveReaders parm is set to 1 if this is a convert from write to read,
340 * indicating that there is still at least one reader, and we should only wake
341 * up other readers. We use it in a tricky manner: we just pretent we already woke
342 * a reader, and that is sufficient to prevent us from waking a writer.
344 * The crit sec. csp is released before the threads are woken, but after they
345 * are removed from the turnstile. It helps ensure that we won't have a spurious
346 * context swap back to us if the release performs a context swap for some reason.
348 void osi_TSignalForMLs(osi_turnstile_t *turnp, int stillHaveReaders, CRITICAL_SECTION *csp)
350 osi_sleepInfo_t *tsp; /* a temp */
351 osi_sleepInfo_t *nsp; /* a temp */
352 osi_queue_t *wakeupListp; /* list of dudes to wakeup after dropping lock */
357 wokeReader = stillHaveReaders;
359 while(tsp = turnp->lastp) {
360 /* look at each sleepInfo until we find someone we're not supposed to
363 if (tsp->waitFor & OSI_SLEEPINFO_W4WRITE) {
364 if (wokeReader) break;
368 /* otherwise, we will wake this guy. For now, remove from this list
369 * and move to private one, so we can do the wakeup after releasing
372 turnp->lastp = (osi_sleepInfo_t *) osi_QPrev(&tsp->q);
373 osi_QRemove((osi_queue_t **) &turnp->firstp, &tsp->q);
375 /* do the patching required for lock obtaining */
376 if (tsp->waitFor & OSI_SLEEPINFO_W4WRITE) {
377 cp = (void *) tsp->value;
378 (*cp) |= OSI_LOCKFLAG_EXCL;
380 else if (tsp->waitFor & OSI_SLEEPINFO_W4READ) {
381 sp = (void *) tsp->value;
385 /* and add to our own list */
386 tsp->q.nextp = wakeupListp;
387 wakeupListp = &tsp->q;
389 /* now if we woke a writer, we're done, since it is pointless
390 * to wake more than one writer.
392 if (!wokeReader) break;
395 /* hit end, or found someone we're not supposed to wakeup */
396 if (csp) LeaveCriticalSection(csp);
398 /* finally, wakeup everyone we found. Don't free things since the sleeper
399 * will free the sleepInfo structure.
401 for(tsp = (osi_sleepInfo_t *) wakeupListp; tsp; tsp = nsp) {
402 /* pull this out first, since *tsp *could* get freed immediately
403 * after the ReleaseSemaphore, if a context swap occurs.
405 nsp = (osi_sleepInfo_t *) tsp->q.nextp;
406 tsp->states |= OSI_SLEEPINFO_SIGNALLED;
407 ReleaseSemaphore(tsp->sema, 1, (long *) 0);
411 /* utility function to atomically (with respect to WakeSched)
412 * release an atomic counter spin lock and sleep on an
414 * Called with no locks held.
416 void osi_SleepSpin(long sleepValue, CRITICAL_SECTION *releasep)
421 CRITICAL_SECTION *csp;
423 sp = TlsGetValue(osi_SleepSlot);
425 sp = osi_AllocSleepInfo();
426 TlsSetValue(osi_SleepSlot, sp);
431 sp->value = sleepValue;
432 idx = osi_SLEEPHASH(sleepValue);
433 csp = &osi_critSec[idx];
434 EnterCriticalSection(csp);
435 osi_QAdd((osi_queue_t **) &osi_sleepers[idx], &sp->q);
436 sp->states |= OSI_SLEEPINFO_INHASH;
437 LeaveCriticalSection(releasep);
438 LeaveCriticalSection(csp);
439 osi_totalSleeps++; /* stats */
442 code = WaitForSingleObject(sp->sema,
443 /* timeout */ INFINITE);
445 /* if the reason for the wakeup was that we were signalled,
446 * break out, otherwise try again, since the semaphore count is
447 * decreased only when we get WAIT_OBJECT_0 back.
449 if (code == WAIT_OBJECT_0) break;
453 EnterCriticalSection(csp);
455 /* must be signalled */
456 osi_assert(sp->states & OSI_SLEEPINFO_SIGNALLED);
458 /* free the sleep structure, must be done under bucket lock
459 * so that we can check reference count and serialize with
460 * those who change it.
462 osi_FreeSleepInfo(sp);
464 LeaveCriticalSection(csp);
467 /* utility function to wakeup someone sleeping in SleepSched */
468 void osi_WakeupSpin(long sleepValue)
471 register CRITICAL_SECTION *csp;
472 register osi_sleepInfo_t *tsp;
474 idx = osi_SLEEPHASH(sleepValue);
475 csp = &osi_critSec[idx];
476 EnterCriticalSection(csp);
477 for(tsp=osi_sleepers[idx]; tsp; tsp=(osi_sleepInfo_t *) osi_QNext(&tsp->q)) {
478 if ((!(tsp->states & (OSI_SLEEPINFO_DELETED|OSI_SLEEPINFO_SIGNALLED)))
479 && tsp->value == sleepValue) {
480 ReleaseSemaphore(tsp->sema, 1, (long *) 0);
481 tsp->states |= OSI_SLEEPINFO_SIGNALLED;
484 LeaveCriticalSection(csp);
487 void osi_Sleep(long sleepVal)
489 CRITICAL_SECTION *csp;
491 /* may as well save some code by using SleepSched again */
492 csp = &osi_baseAtomicCS[0];
493 EnterCriticalSection(csp);
494 osi_SleepSpin(sleepVal, csp);
497 void osi_Wakeup(long sleepVal)
499 /* how do we do osi_Wakeup on a per-lock package type? */
501 osi_WakeupSpin(sleepVal);
504 long osi_SleepFDCreate(osi_fdType_t *fdTypep, osi_fd_t **outpp)
508 cp = (osi_sleepFD_t *)malloc(sizeof(*cp));
509 memset((void *) cp, 0, sizeof(*cp));
518 long osi_SleepFDClose(osi_fd_t *cp)
524 /* called with osi_sleepFDCS locked; returns with same, so that
525 * we know that the sleep info pointed to by the cookie won't change
526 * until the caller releases the lock.
528 void osi_AdvanceSleepFD(osi_sleepFD_t *cp)
530 int idx; /* index we're dealing with */
531 int oidx; /* index we locked */
532 osi_sleepInfo_t *sip;
533 osi_sleepInfo_t *nsip;
535 idx = 0; /* so we go around once safely */
537 while(idx < OSI_SLEEPHASHSIZE) {
538 /* cp->sip should be held */
540 EnterCriticalSection(&osi_critSec[idx]);
541 oidx = idx; /* remember original index; that's the one we locked */
543 /* if there's a sleep info structure in the FD, it should be held; it
544 * is the one we just processed, so we want to move on to the next.
545 * If not, then we want to process the chain in the bucket idx points
548 if ((sip = cp->sip) == NULL) {
549 sip = osi_sleepers[idx];
551 else sip->refCount++;
554 /* it is safe to release the current sleep info guy now
555 * since we hold the bucket lock. Pull next guy out first,
556 * since if sip is deleted, Release will move him into
559 nsip = (osi_sleepInfo_t *) sip->q.nextp;
560 osi_ReleaseSleepInfo(sip);
563 if (sip) sip->refCount++;
568 LeaveCriticalSection(&osi_critSec[oidx]);
570 /* now, if we advanced to a new sleep info structure, we're
571 * done, otherwise we continue and look at the next hash bucket
572 * until we're out of them.
579 long osi_SleepFDGetInfo(osi_fd_t *ifdp, osi_remGetInfoParms_t *parmsp)
581 osi_sleepFD_t *fdp = (osi_sleepFD_t *) ifdp;
582 osi_sleepInfo_t *sip;
585 /* now, grab a mutex serializing all iterations over FDs, so that
586 * if the RPC screws up and sends us two calls on the same FD, we don't
587 * crash and burn advancing the same FD concurrently. Probably paranoia,
588 * but you generally shouldn't trust stuff coming over the network.
590 EnterCriticalSection(&osi_sleepFDCS);
592 /* this next call advances the FD to the next guy, and simultaneously validates
593 * that the info from the network is valid. If it isn't, we do our best to
594 * resynchronize our position, but we might return some info multiple times.
596 osi_AdvanceSleepFD(fdp);
598 /* now copy out info */
599 if (sip = fdp->sip) { /* one '=' */
600 parmsp->idata[0] = sip->value;
601 parmsp->idata[1] = sip->tid;
602 parmsp->idata[2] = sip->states;
607 else code = OSI_DBRPC_EOF;
609 LeaveCriticalSection(&osi_sleepFDCS);
614 /* finally, DLL-specific code for NT */
615 BOOL APIENTRY DLLMain(HANDLE inst, DWORD why, char *reserved)
620 /* some misc functions for setting hash table sizes */
622 /* return true iff x is prime */
623 int osi_IsPrime(unsigned long x)
627 /* even numbers aren't prime */
628 if ((x & 1) == 0 && x != 2) return 0;
630 for(c = 3; c<x; c += 2) {
631 /* see if x is divisible by c */
632 if ((x % c) == 0) return 0; /* yup, it ain't prime */
634 /* see if we've gone far enough; only have to compute until
637 if (c*c > x) return 1;
640 /* probably never get here */
644 /* return first prime number less than or equal to x */
645 unsigned long osi_PrimeLessThan(unsigned long x) {
648 for(c = x; c > 1; c--) {
649 if (osi_IsPrime(c)) return c;
656 /* return the # of seconds since some fixed date */
657 unsigned long osi_GetBootTime(void)
662 static int (*notifFunc)(char *, char *, long) = NULL;
664 void osi_InitPanic(void *anotifFunc)
666 notifFunc = anotifFunc;
669 void osi_panic(char *msgp, char *filep, long line)
671 osi_LogPanic(filep, line);
674 (*notifFunc)(msgp, filep, line);
677 /* get time in seconds since some relatively recent time */
678 unsigned long osi_Time(void)
682 unsigned long remainder;
683 LARGE_INTEGER bootTime;
685 /* setup boot time values */
686 GetSystemTime(&sysTime);
687 SystemTimeToFileTime(&sysTime, &fileTime);
689 /* change the base of the time so it won't be negative for a long time */
690 fileTime.dwHighDateTime -= 28000000;
692 bootTime.HighPart = fileTime.dwHighDateTime;
693 bootTime.LowPart = fileTime.dwLowDateTime;
694 /* now, bootTime is in 100 nanosecond units, and we'd really rather
695 * have it in 1 second units, units 10,000,000 times bigger.
698 bootTime = ExtendedLargeIntegerDivide(bootTime, 10000000, &remainder);
699 return bootTime.LowPart;
702 /* get time in seconds since some relatively recent time */
703 void osi_GetTime(long *timesp)
707 unsigned long remainder;
708 LARGE_INTEGER bootTime;
710 /* setup boot time values */
711 GetSystemTime(&sysTime);
712 SystemTimeToFileTime(&sysTime, &fileTime);
714 /* change the base of the time so it won't be negative for a long time */
715 fileTime.dwHighDateTime -= 28000000;
717 bootTime.HighPart = fileTime.dwHighDateTime;
718 bootTime.LowPart = fileTime.dwLowDateTime;
719 /* now, bootTime is in 100 nanosecond units, and we'd really rather
720 * have it in 1 microsecond units, units 10 times bigger.
723 bootTime = ExtendedLargeIntegerDivide(bootTime, 10, &remainder);
724 bootTime = ExtendedLargeIntegerDivide(bootTime, 1000000, &remainder);
725 timesp[0] = bootTime.LowPart; /* seconds */
726 timesp[1] = remainder; /* microseconds */