2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 /* Copyright (C) 1994 Cazamar Systems, Inc. */
12 #include <afs/param.h>
21 #include "osi_internal.h"
23 /* Locking hierarchy for these critical sections:
25 * 1. lock osi_sleepFDCS
26 * 2. lock osi_critSec[i]
27 * 3. lock osi_sleepInfoAllocCS
30 /* file descriptor for iterating over sleeping threads */
31 osi_fdOps_t osi_sleepFDOps = {
38 * Thread-local storage for sleep Info structures
42 /* critical section serializing contents of all sleep FDs, so that
43 * concurrent GetInfo calls don't damage each other if applied
46 CRITICAL_SECTION osi_sleepFDCS;
48 /* critical regions used for SleepSched to guarantee atomicity.
49 * protects all sleep info structures while they're in the
52 static CRITICAL_SECTION osi_critSec[OSI_SLEEPHASHSIZE];
54 /* the sleep info structure hash table.
55 * all active entries are in here. In addition, deleted entries
56 * may be present, referenced by file descriptors from remote
57 * debuggers; these will have OSI_SLEEPINFO_DELETED set and
60 static osi_sleepInfo_t *osi_sleepers[OSI_SLEEPHASHSIZE];
61 static osi_sleepInfo_t *osi_sleepersEnd[OSI_SLEEPHASHSIZE];
63 /* allocate space for lock operations */
64 osi_lockOps_t *osi_lockOps[OSI_NLOCKTYPES];
66 /* some global statistics */
67 long osi_totalSleeps = 0;
69 /* critical section protecting sleepInfoFreeListp and all sleep entries in
72 CRITICAL_SECTION osi_sleepInfoAllocCS;
74 /* sleep entry free list */
75 osi_sleepInfo_t *osi_sleepInfoFreeListp;
78 unsigned long osi_bootTime;
80 /* count of free entries in free list, protected by osi_sleepInfoAllocCS */
81 long osi_sleepInfoCount=0;
83 /* count of # of allocates of sleep info structures */
84 long osi_sleepInfoAllocs = 0;
86 /* the sleep bucket lock must be held.
87 * Releases the reference count and frees the structure if the item has
90 void osi_ReleaseSleepInfo(osi_sleepInfo_t *sp)
92 if (InterlockedDecrement(&sp->refCount) == 0 && (sp->states & OSI_SLEEPINFO_DELETED))
93 osi_FreeSleepInfo(sp);
96 /* must be called with sleep bucket locked.
97 * Frees the structure if it has a 0 reference count (and removes it
98 * from the hash bucket). Otherwise, we simply mark the item
99 * for deleting when the ref count hits zero.
101 void osi_FreeSleepInfo(osi_sleepInfo_t *sp)
105 if (sp->refCount > 0) {
106 TlsSetValue(osi_SleepSlot, NULL); /* don't reuse me */
107 _InterlockedOr(&sp->states, OSI_SLEEPINFO_DELETED);
111 /* remove from hash if still there */
112 if (sp->states & OSI_SLEEPINFO_INHASH) {
113 idx = osi_SLEEPHASH(sp->value);
114 osi_QRemoveHT((osi_queue_t **) &osi_sleepers[idx], (osi_queue_t **) &osi_sleepersEnd[idx], &sp->q);
115 _InterlockedAnd(&sp->states, ~OSI_SLEEPINFO_INHASH);
118 if (sp->states & OSI_SLEEPINFO_DELETED) {
119 EnterCriticalSection(&osi_sleepInfoAllocCS);
120 sp->q.nextp = (osi_queue_t *) osi_sleepInfoFreeListp;
121 osi_sleepInfoFreeListp = sp;
122 _InterlockedAnd(&sp->states, ~OSI_SLEEPINFO_DELETED);
123 InterlockedIncrement(&osi_sleepInfoCount);
124 LeaveCriticalSection(&osi_sleepInfoAllocCS);
128 /* allocate a new sleep structure from the free list */
129 osi_sleepInfo_t *osi_AllocSleepInfo()
133 EnterCriticalSection(&osi_sleepInfoAllocCS);
134 if (!(sp = osi_sleepInfoFreeListp)) {
135 sp = (osi_sleepInfo_t *) malloc(sizeof(osi_sleepInfo_t));
136 memset(sp, 0, sizeof(*sp));
137 sp->sema = CreateSemaphore(NULL, 0, 65536, NULL);
138 InterlockedIncrement(&osi_sleepInfoAllocs);
141 osi_sleepInfoFreeListp = (osi_sleepInfo_t *) sp->q.nextp;
142 InterlockedDecrement(&osi_sleepInfoCount);
144 sp->tid = GetCurrentThreadId();
145 LeaveCriticalSection(&osi_sleepInfoAllocCS);
150 int osi_Once(osi_once_t *argp)
154 while ((i=InterlockedExchange(&argp->atomic, 1)) != 0) {
158 if (argp->done == 0) {
163 /* otherwise we've already been initialized, so clear lock and return */
164 InterlockedExchange(&argp->atomic, 0);
168 void osi_EndOnce(osi_once_t *argp)
170 InterlockedExchange(&argp->atomic, 0);
173 int osi_TestOnce(osi_once_t *argp)
178 while ((i=InterlockedExchange(&argp->atomic, 1)) != 0) {
182 localDone = argp->done;
185 InterlockedExchange(&argp->atomic, 0);
187 return (localDone? 0 : 1);
190 /* Initialize the package, should be called while single-threaded.
191 * Can be safely called multiple times.
192 * Must be called before any osi package calls.
197 static osi_once_t once;
198 unsigned long remainder; /* for division output */
202 osi_hyper_t bootTime;
204 /* check to see if already initialized; if so, claim success */
205 if (!osi_Once(&once))
208 /* setup boot time values */
209 GetSystemTime(&sysTime);
210 SystemTimeToFileTime(&sysTime, &fileTime);
212 /* change the base of the time so it won't be negative for a long time */
213 fileTime.dwHighDateTime -= 28000000;
215 bootTime.HighPart = fileTime.dwHighDateTime;
216 bootTime.LowPart = fileTime.dwLowDateTime;
217 /* now, bootTime is in 100 nanosecond units, and we'd really rather
218 * have it in 1 second units, units 10,000,000 times bigger.
221 bootTime = ExtendedLargeIntegerDivide(bootTime, 10000000, &remainder);
222 osi_bootTime = bootTime.LowPart;
224 /* initialize thread-local storage for sleep Info structures */
225 osi_SleepSlot = TlsAlloc();
230 /* initialize critical regions and semaphores */
231 for(i=0;i<OSI_SLEEPHASHSIZE; i++) {
232 InitializeCriticalSection(&osi_critSec[i]);
233 osi_sleepers[i] = NULL;
234 osi_sleepersEnd[i] = NULL;
238 InitializeCriticalSection(&osi_sleepInfoAllocCS);
240 /* initialize cookie system */
241 InitializeCriticalSection(&osi_sleepFDCS);
243 /* register the FD type */
244 typep = osi_RegisterFDType("sleep", &osi_sleepFDOps, NULL);
246 /* add formatting info */
247 osi_AddFDFormatInfo(typep, OSI_DBRPC_REGIONINT, 0,
248 "Sleep address", OSI_DBRPC_HEX);
249 osi_AddFDFormatInfo(typep, OSI_DBRPC_REGIONINT, 1,
251 osi_AddFDFormatInfo(typep, OSI_DBRPC_REGIONINT, 2,
252 "States", OSI_DBRPC_HEX);
264 void osi_TWait(osi_turnstile_t *turnp, int waitFor, void *patchp, DWORD *tidp, CRITICAL_SECTION *releasep)
266 osi_TWaitExt(turnp, waitFor, patchp, tidp, releasep, TRUE);
269 void osi_TWaitExt(osi_turnstile_t *turnp, int waitFor, void *patchp, DWORD *tidp, CRITICAL_SECTION *releasep, int prepend)
274 sp = TlsGetValue(osi_SleepSlot);
276 sp = osi_AllocSleepInfo();
277 TlsSetValue(osi_SleepSlot, sp);
280 _InterlockedAnd(&sp->states, 0);
282 sp->waitFor = waitFor;
283 sp->value = (LONG_PTR) patchp;
287 osi_QAddH((osi_queue_t **) &turnp->firstp, (osi_queue_t **) &turnp->lastp, &sp->q);
289 osi_QAddT((osi_queue_t **) &turnp->firstp, (osi_queue_t **) &turnp->lastp, &sp->q);
290 LeaveCriticalSection(releasep);
292 /* now wait for the signal */
295 code = WaitForSingleObject(sp->sema,
296 /* timeout */ INFINITE);
298 /* if the reason for the wakeup was that we were signalled,
299 * break out, otherwise try again, since the semaphore count is
300 * decreased only when we get WAIT_OBJECT_0 back.
302 if (code == WAIT_OBJECT_0) break;
303 } /* while we're waiting */
305 /* we're the only one who should be looking at or changing this
306 * structure after it gets signalled. Sema sp->sema isn't signalled
307 * any longer after we're back from WaitForSingleObject, so we can
308 * free this element directly.
310 osi_assert(sp->states & OSI_SLEEPINFO_SIGNALLED);
312 osi_FreeSleepInfo(sp);
314 /* reobtain, since caller commonly needs it */
315 EnterCriticalSection(releasep);
318 /* must be called with a critical section held that guards the turnstile
319 * structure. We remove the sleepInfo structure from the queue so we don't
320 * wake the guy again, but we don't free it because we're still using the
321 * semaphore until the guy waiting wakes up.
323 void osi_TSignal(osi_turnstile_t *turnp)
331 osi_QRemoveHT((osi_queue_t **) &turnp->firstp, (osi_queue_t **) &turnp->lastp, &sp->q);
332 _InterlockedOr(&sp->states, OSI_SLEEPINFO_SIGNALLED);
333 ReleaseSemaphore(sp->sema, 1, NULL);
336 /* like TSignal, only wake *everyone* */
337 void osi_TBroadcast(osi_turnstile_t *turnp)
341 while(sp = turnp->lastp) {
342 osi_QRemoveHT((osi_queue_t **) &turnp->firstp, (osi_queue_t **) &turnp->lastp, &sp->q);
343 _InterlockedOr(&sp->states, OSI_SLEEPINFO_SIGNALLED);
344 ReleaseSemaphore(sp->sema, 1, NULL);
345 } /* while someone's still asleep */
348 /* special turnstile signal for mutexes and locks. Wakes up only those who
349 * will really be able to lock the lock. The assumption is that everyone who
350 * already can use the lock has already been woken (and is thus not in the
351 * turnstile any longer).
353 * The stillHaveReaders parm is set to 1 if this is a convert from write to read,
354 * indicating that there is still at least one reader, and we should only wake
355 * up other readers. We use it in a tricky manner: we just pretent we already woke
356 * a reader, and that is sufficient to prevent us from waking a writer.
358 * The crit sec. csp is released before the threads are woken, but after they
359 * are removed from the turnstile. It helps ensure that we won't have a spurious
360 * context swap back to us if the release performs a context swap for some reason.
362 void osi_TSignalForMLs(osi_turnstile_t *turnp, int stillHaveReaders, CRITICAL_SECTION *csp)
364 osi_sleepInfo_t *tsp; /* a temp */
365 osi_sleepInfo_t *nsp; /* a temp */
366 osi_queue_t *wakeupListp; /* list of dudes to wakeup after dropping lock */
371 wokeReader = stillHaveReaders;
373 while(tsp = turnp->lastp) {
374 /* look at each sleepInfo until we find someone we're not supposed to
377 if (tsp->waitFor & OSI_SLEEPINFO_W4WRITE) {
384 /* otherwise, we will wake this guy. For now, remove from this list
385 * and move to private one, so we can do the wakeup after releasing
388 osi_QRemoveHT((osi_queue_t **) &turnp->firstp, (osi_queue_t **) &turnp->lastp, &tsp->q);
390 /* do the patching required for lock obtaining */
391 if (tsp->waitFor & OSI_SLEEPINFO_W4WRITE) {
392 cp = (void *) tsp->value;
393 (*cp) |= OSI_LOCKFLAG_EXCL;
394 tsp->tidp[0] = tsp->tid;
396 else if (tsp->waitFor & OSI_SLEEPINFO_W4READ) {
397 sp = (void *) tsp->value;
399 if ((*sp) < OSI_RWLOCK_THREADS)
400 tsp->tidp[*sp] = tsp->tid;
405 /* and add to our own list */
406 tsp->q.nextp = wakeupListp;
407 wakeupListp = &tsp->q;
409 /* now if we woke a writer, we're done, since it is pointless
410 * to wake more than one writer.
416 /* hit end, or found someone we're not supposed to wakeup */
418 LeaveCriticalSection(csp);
420 /* finally, wakeup everyone we found. Don't free things since the sleeper
421 * will free the sleepInfo structure.
423 for(tsp = (osi_sleepInfo_t *) wakeupListp; tsp; tsp = nsp) {
424 /* pull this out first, since *tsp *could* get freed immediately
425 * after the ReleaseSemaphore, if a context swap occurs.
427 nsp = (osi_sleepInfo_t *) tsp->q.nextp;
428 _InterlockedOr(&tsp->states, OSI_SLEEPINFO_SIGNALLED);
429 ReleaseSemaphore(tsp->sema, 1, NULL);
433 /* utility function to atomically (with respect to WakeSched)
434 * release an atomic counter spin lock and sleep on an
436 * Called with no locks held.
438 void osi_SleepSpin(LONG_PTR sleepValue, CRITICAL_SECTION *releasep)
442 CRITICAL_SECTION *csp;
444 sp = TlsGetValue(osi_SleepSlot);
446 sp = osi_AllocSleepInfo();
447 TlsSetValue(osi_SleepSlot, sp);
450 _InterlockedAnd(&sp->states, 0);
453 sp->value = sleepValue;
455 sp->idx = osi_SLEEPHASH(sleepValue);
456 csp = &osi_critSec[sp->idx];
457 EnterCriticalSection(csp);
458 osi_QAddT((osi_queue_t **) &osi_sleepers[sp->idx], (osi_queue_t **) &osi_sleepersEnd[sp->idx], &sp->q);
459 _InterlockedOr(&sp->states, OSI_SLEEPINFO_INHASH);
460 LeaveCriticalSection(csp);
461 LeaveCriticalSection(releasep);
462 InterlockedIncrement(&osi_totalSleeps); /* stats */
465 code = WaitForSingleObject(sp->sema,
466 /* timeout */ INFINITE);
468 /* if the reason for the wakeup was that we were signalled,
469 * break out, otherwise try again, since the semaphore count is
470 * decreased only when we get WAIT_OBJECT_0 back.
472 if (code == WAIT_OBJECT_0) break;
476 EnterCriticalSection(csp);
478 /* must be signalled */
479 osi_assert(sp->states & OSI_SLEEPINFO_SIGNALLED);
481 /* free the sleep structure, must be done under bucket lock
482 * so that we can check reference count and serialize with
483 * those who change it.
485 osi_FreeSleepInfo(sp);
487 LeaveCriticalSection(csp);
490 /* utility function to wakeup someone sleeping in SleepSched */
491 void osi_WakeupSpin(LONG_PTR sleepValue)
494 CRITICAL_SECTION *csp;
495 osi_sleepInfo_t *tsp;
497 idx = osi_SLEEPHASH(sleepValue);
498 csp = &osi_critSec[idx];
499 EnterCriticalSection(csp);
500 for(tsp=osi_sleepers[idx]; tsp; tsp=(osi_sleepInfo_t *) osi_QNext(&tsp->q)) {
501 if ((!(tsp->states & (OSI_SLEEPINFO_DELETED|OSI_SLEEPINFO_SIGNALLED)))
502 && tsp->value == sleepValue) {
503 _InterlockedOr(&tsp->states, OSI_SLEEPINFO_SIGNALLED);
504 ReleaseSemaphore(tsp->sema, 1, NULL);
507 LeaveCriticalSection(csp);
510 void osi_Sleep(LONG_PTR sleepVal)
512 CRITICAL_SECTION *csp;
514 /* may as well save some code by using SleepSched again */
515 csp = &osi_baseAtomicCS[0];
516 EnterCriticalSection(csp);
517 osi_SleepSpin(sleepVal, csp);
520 void osi_Wakeup(LONG_PTR sleepVal)
522 /* how do we do osi_Wakeup on a per-lock package type? */
524 osi_WakeupSpin(sleepVal);
527 long osi_SleepFDCreate(osi_fdType_t *fdTypep, osi_fd_t **outpp)
531 cp = (osi_sleepFD_t *)malloc(sizeof(*cp));
532 memset((void *) cp, 0, sizeof(*cp));
541 long osi_SleepFDClose(osi_fd_t *cp)
547 /* called with osi_sleepFDCS locked; returns with same, so that
548 * we know that the sleep info pointed to by the cookie won't change
549 * until the caller releases the lock.
551 void osi_AdvanceSleepFD(osi_sleepFD_t *cp)
553 int idx; /* index we're dealing with */
554 int oidx; /* index we locked */
555 osi_sleepInfo_t *sip;
556 osi_sleepInfo_t *nsip;
558 idx = 0; /* so we go around once safely */
560 while(idx < OSI_SLEEPHASHSIZE) {
561 /* cp->sip should be held */
563 EnterCriticalSection(&osi_critSec[idx]);
564 oidx = idx; /* remember original index; that's the one we locked */
566 /* if there's a sleep info structure in the FD, it should be held; it
567 * is the one we just processed, so we want to move on to the next.
568 * If not, then we want to process the chain in the bucket idx points
571 if ((sip = cp->sip) == NULL) {
572 sip = osi_sleepers[idx];
575 InterlockedIncrement(&sip->refCount);
578 /* it is safe to release the current sleep info guy now
579 * since we hold the bucket lock. Pull next guy out first,
580 * since if sip is deleted, Release will move him into
583 nsip = (osi_sleepInfo_t *) sip->q.nextp;
584 osi_ReleaseSleepInfo(sip);
588 InterlockedIncrement(&sip->refCount);
594 LeaveCriticalSection(&osi_critSec[oidx]);
596 /* now, if we advanced to a new sleep info structure, we're
597 * done, otherwise we continue and look at the next hash bucket
598 * until we're out of them.
606 long osi_SleepFDGetInfo(osi_fd_t *ifdp, osi_remGetInfoParms_t *parmsp)
608 osi_sleepFD_t *fdp = (osi_sleepFD_t *) ifdp;
609 osi_sleepInfo_t *sip;
612 /* now, grab a mutex serializing all iterations over FDs, so that
613 * if the RPC screws up and sends us two calls on the same FD, we don't
614 * crash and burn advancing the same FD concurrently. Probably paranoia,
615 * but you generally shouldn't trust stuff coming over the network.
617 EnterCriticalSection(&osi_sleepFDCS);
619 /* this next call advances the FD to the next guy, and simultaneously validates
620 * that the info from the network is valid. If it isn't, we do our best to
621 * resynchronize our position, but we might return some info multiple times.
623 osi_AdvanceSleepFD(fdp);
625 /* now copy out info */
626 if (sip = fdp->sip) { /* one '=' */
627 parmsp->idata[0] = sip->value;
628 parmsp->idata[1] = sip->tid;
629 parmsp->idata[2] = sip->states;
634 else code = OSI_DBRPC_EOF;
636 LeaveCriticalSection(&osi_sleepFDCS);
641 /* finally, DLL-specific code for NT */
642 BOOL APIENTRY DLLMain(HANDLE inst, DWORD why, char *reserved)
647 /* some misc functions for setting hash table sizes */
649 /* return true iff x is prime */
650 int osi_IsPrime(unsigned long x)
654 /* even numbers aren't prime */
655 if ((x & 1) == 0 && x != 2) return 0;
657 for(c = 3; c<x; c += 2) {
658 /* see if x is divisible by c */
660 return 0; /* yup, it ain't prime */
662 /* see if we've gone far enough; only have to compute until
669 /* probably never get here */
673 /* return first prime number less than or equal to x */
674 unsigned long osi_PrimeLessThan(unsigned long x) {
677 for(c = x; c > 1; c--) {
686 /* return the # of seconds since some fixed date */
687 unsigned long osi_GetBootTime(void)
692 static int (*notifFunc)(char *, char *, long) = NULL;
694 void osi_InitPanic(void *anotifFunc)
696 notifFunc = anotifFunc;
699 void osi_panic(char *msgp, char *filep, long line)
702 (*notifFunc)(msgp, filep, line);
704 osi_LogPanic(msgp, filep, line);
707 /* get time in seconds since some relatively recent time */
708 time_t osi_Time(void)
712 unsigned long remainder;
713 LARGE_INTEGER bootTime;
715 /* setup boot time values */
716 GetSystemTime(&sysTime);
717 SystemTimeToFileTime(&sysTime, &fileTime);
719 /* change the base of the time so it won't be negative for a long time */
720 fileTime.dwHighDateTime -= 28000000;
722 bootTime.HighPart = fileTime.dwHighDateTime;
723 bootTime.LowPart = fileTime.dwLowDateTime;
724 /* now, bootTime is in 100 nanosecond units, and we'd really rather
725 * have it in 1 second units, units 10,000,000 times bigger.
728 bootTime = ExtendedLargeIntegerDivide(bootTime, 10000000, &remainder);
730 return bootTime.QuadPart;
732 return bootTime.LowPart;
736 /* get time in seconds since some relatively recent time */
737 void osi_GetTime(long *timesp)
741 unsigned long remainder;
742 LARGE_INTEGER bootTime;
744 /* setup boot time values */
745 GetSystemTime(&sysTime);
746 SystemTimeToFileTime(&sysTime, &fileTime);
748 /* change the base of the time so it won't be negative for a long time */
749 fileTime.dwHighDateTime -= 28000000;
751 bootTime.HighPart = fileTime.dwHighDateTime;
752 bootTime.LowPart = fileTime.dwLowDateTime;
753 /* now, bootTime is in 100 nanosecond units, and we'd really rather
754 * have it in 1 microsecond units, units 10 times bigger.
757 bootTime = ExtendedLargeIntegerDivide(bootTime, 10, &remainder);
758 bootTime = ExtendedLargeIntegerDivide(bootTime, 1000000, &remainder);
759 timesp[0] = bootTime.LowPart; /* seconds */
760 timesp[1] = remainder; /* microseconds */