2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 /* Copyright (C) 1994 Cazamar Systems, Inc. */
12 #include <afs/param.h>
21 /* Locking hierarchy for these critical sections:
23 * 1. lock osi_sleepFDCS
24 * 2. lock osi_critSec[i]
25 * 3. lock osi_sleepInfoAllocCS
28 /* file descriptor for iterating over sleeping threads */
29 osi_fdOps_t osi_sleepFDOps = {
36 * Thread-local storage for sleep Info structures
40 /* critical section serializing contents of all sleep FDs, so that
41 * concurrent GetInfo calls don't damage each other if applied
44 CRITICAL_SECTION osi_sleepFDCS;
46 /* critical regions used for SleepSched to guarantee atomicity.
47 * protects all sleep info structures while they're in the
50 static CRITICAL_SECTION osi_critSec[OSI_SLEEPHASHSIZE];
52 /* the sleep info structure hash table.
53 * all active entries are in here. In addition, deleted entries
54 * may be present, referenced by file descriptors from remote
55 * debuggers; these will have OSI_SLEEPINFO_DELETED set and
58 static osi_sleepInfo_t *osi_sleepers[OSI_SLEEPHASHSIZE];
59 static osi_sleepInfo_t *osi_sleepersEnd[OSI_SLEEPHASHSIZE];
61 /* allocate space for lock operations */
62 osi_lockOps_t *osi_lockOps[OSI_NLOCKTYPES];
64 /* some global statistics */
65 long osi_totalSleeps = 0;
67 /* critical section protecting sleepInfoFreeListp and all sleep entries in
70 CRITICAL_SECTION osi_sleepInfoAllocCS;
72 /* sleep entry free list */
73 osi_sleepInfo_t *osi_sleepInfoFreeListp;
76 unsigned long osi_bootTime;
78 /* count of free entries in free list, protected by osi_sleepInfoAllocCS */
79 long osi_sleepInfoCount=0;
81 /* count of # of allocates of sleep info structures */
82 long osi_sleepInfoAllocs = 0;
84 /* the sleep bucket lock must be held.
85 * Releases the reference count and frees the structure if the item has
88 void osi_ReleaseSleepInfo(osi_sleepInfo_t *ap)
90 if (--ap->refCount == 0 && (ap->states & OSI_SLEEPINFO_DELETED))
91 osi_FreeSleepInfo(ap);
94 /* must be called with sleep bucket locked.
95 * Frees the structure if it has a 0 reference count (and removes it
96 * from the hash bucket). Otherwise, we simply mark the item
97 * for deleting when the ref count hits zero.
99 void osi_FreeSleepInfo(osi_sleepInfo_t *ap)
103 if (ap->refCount > 0) {
104 TlsSetValue(osi_SleepSlot, NULL); /* don't reuse me */
105 ap->states |= OSI_SLEEPINFO_DELETED;
109 /* remove from hash if still there */
110 if (ap->states & OSI_SLEEPINFO_INHASH) {
111 ap->states &= ~OSI_SLEEPINFO_INHASH;
112 idx = osi_SLEEPHASH(ap->value);
113 osi_QRemoveHT((osi_queue_t **) &osi_sleepers[idx], (osi_queue_t **) &osi_sleepersEnd[idx], &ap->q);
116 if (ap->states & OSI_SLEEPINFO_DELETED) {
117 EnterCriticalSection(&osi_sleepInfoAllocCS);
118 ap->q.nextp = (osi_queue_t *) osi_sleepInfoFreeListp;
119 osi_sleepInfoFreeListp = ap;
120 osi_sleepInfoCount++;
121 LeaveCriticalSection(&osi_sleepInfoAllocCS);
125 /* allocate a new sleep structure from the free list */
126 osi_sleepInfo_t *osi_AllocSleepInfo()
130 EnterCriticalSection(&osi_sleepInfoAllocCS);
131 if (!(ap = osi_sleepInfoFreeListp)) {
132 ap = (osi_sleepInfo_t *) malloc(sizeof(osi_sleepInfo_t));
133 ap->sema = CreateSemaphore(NULL, 0, 65536, (char *) 0);
134 osi_sleepInfoAllocs++;
137 osi_sleepInfoFreeListp = (osi_sleepInfo_t *) ap->q.nextp;
138 osi_sleepInfoCount--;
140 ap->tid = GetCurrentThreadId();
141 ap->states = 0; /* not signalled yet */
142 LeaveCriticalSection(&osi_sleepInfoAllocCS);
147 int osi_Once(osi_once_t *argp)
151 while ((i=InterlockedExchange(&argp->atomic, 1)) != 0) {
155 if (argp->done == 0) {
160 /* otherwise we've already been initialized, so clear lock and return */
161 InterlockedExchange(&argp->atomic, 0);
165 void osi_EndOnce(osi_once_t *argp)
167 InterlockedExchange(&argp->atomic, 0);
170 int osi_TestOnce(osi_once_t *argp)
175 while ((i=InterlockedExchange(&argp->atomic, 1)) != 0) {
179 localDone = argp->done;
182 InterlockedExchange(&argp->atomic, 0);
184 return (localDone? 0 : 1);
187 /* Initialize the package, should be called while single-threaded.
188 * Can be safely called multiple times.
189 * Must be called before any osi package calls.
194 static osi_once_t once;
195 unsigned long remainder; /* for division output */
199 osi_hyper_t bootTime;
201 /* check to see if already initialized; if so, claim success */
202 if (!osi_Once(&once))
205 /* setup boot time values */
206 GetSystemTime(&sysTime);
207 SystemTimeToFileTime(&sysTime, &fileTime);
209 /* change the base of the time so it won't be negative for a long time */
210 fileTime.dwHighDateTime -= 28000000;
212 bootTime.HighPart = fileTime.dwHighDateTime;
213 bootTime.LowPart = fileTime.dwLowDateTime;
214 /* now, bootTime is in 100 nanosecond units, and we'd really rather
215 * have it in 1 second units, units 10,000,000 times bigger.
218 bootTime = ExtendedLargeIntegerDivide(bootTime, 10000000, &remainder);
219 osi_bootTime = bootTime.LowPart;
221 /* initialize thread-local storage for sleep Info structures */
222 osi_SleepSlot = TlsAlloc();
227 /* initialize critical regions and semaphores */
228 for(i=0;i<OSI_SLEEPHASHSIZE; i++) {
229 InitializeCriticalSection(&osi_critSec[i]);
230 osi_sleepers[i] = (osi_sleepInfo_t *) NULL;
231 osi_sleepersEnd[i] = (osi_sleepInfo_t *) NULL;
235 InitializeCriticalSection(&osi_sleepInfoAllocCS);
237 /* initialize cookie system */
238 InitializeCriticalSection(&osi_sleepFDCS);
240 /* register the FD type */
241 typep = osi_RegisterFDType("sleep", &osi_sleepFDOps, NULL);
243 /* add formatting info */
244 osi_AddFDFormatInfo(typep, OSI_DBRPC_REGIONINT, 0,
245 "Sleep address", OSI_DBRPC_HEX);
246 osi_AddFDFormatInfo(typep, OSI_DBRPC_REGIONINT, 1,
248 osi_AddFDFormatInfo(typep, OSI_DBRPC_REGIONINT, 2,
249 "States", OSI_DBRPC_HEX);
261 void osi_TWait(osi_turnstile_t *turnp, int waitFor, void *patchp, DWORD *tidp, CRITICAL_SECTION *releasep)
263 osi_TWaitExt(turnp, waitFor, patchp, tidp, releasep, TRUE);
266 void osi_TWaitExt(osi_turnstile_t *turnp, int waitFor, void *patchp, DWORD *tidp, CRITICAL_SECTION *releasep, int prepend)
271 sp = TlsGetValue(osi_SleepSlot);
273 sp = osi_AllocSleepInfo();
274 TlsSetValue(osi_SleepSlot, sp);
280 sp->waitFor = waitFor;
281 sp->value = (LONG_PTR) patchp;
284 osi_QAddH((osi_queue_t **) &turnp->firstp, (osi_queue_t **) &turnp->lastp, &sp->q);
286 osi_QAddT((osi_queue_t **) &turnp->firstp, (osi_queue_t **) &turnp->lastp, &sp->q);
287 LeaveCriticalSection(releasep);
289 /* now wait for the signal */
292 code = WaitForSingleObject(sp->sema,
293 /* timeout */ INFINITE);
295 /* if the reason for the wakeup was that we were signalled,
296 * break out, otherwise try again, since the semaphore count is
297 * decreased only when we get WAIT_OBJECT_0 back.
299 if (code == WAIT_OBJECT_0) break;
300 } /* while we're waiting */
302 /* we're the only one who should be looking at or changing this
303 * structure after it gets signalled. Sema sp->sema isn't signalled
304 * any longer after we're back from WaitForSingleObject, so we can
305 * free this element directly.
307 osi_assert(sp->states & OSI_SLEEPINFO_SIGNALLED);
309 osi_FreeSleepInfo(sp);
311 /* reobtain, since caller commonly needs it */
312 EnterCriticalSection(releasep);
315 /* must be called with a critical section held that guards the turnstile
316 * structure. We remove the sleepInfo structure from the queue so we don't
317 * wake the guy again, but we don't free it because we're still using the
318 * semaphore until the guy waiting wakes up.
320 void osi_TSignal(osi_turnstile_t *turnp)
328 turnp->lastp = (osi_sleepInfo_t *) osi_QPrev(&sp->q);
329 osi_QRemoveHT((osi_queue_t **) &turnp->firstp, (osi_queue_t **) &turnp->lastp, &sp->q);
330 sp->states |= OSI_SLEEPINFO_SIGNALLED;
331 ReleaseSemaphore(sp->sema, 1, (long *) 0);
334 /* like TSignal, only wake *everyone* */
335 void osi_TBroadcast(osi_turnstile_t *turnp)
339 while(sp = turnp->lastp) {
340 turnp->lastp = (osi_sleepInfo_t *) osi_QPrev(&sp->q);
341 osi_QRemoveHT((osi_queue_t **) &turnp->firstp, (osi_queue_t **) &turnp->lastp, &sp->q);
342 sp->states |= OSI_SLEEPINFO_SIGNALLED;
343 ReleaseSemaphore(sp->sema, 1, (long *) 0);
344 } /* while someone's still asleep */
347 /* special turnstile signal for mutexes and locks. Wakes up only those who
348 * will really be able to lock the lock. The assumption is that everyone who
349 * already can use the lock has already been woken (and is thus not in the
350 * turnstile any longer).
352 * The stillHaveReaders parm is set to 1 if this is a convert from write to read,
353 * indicating that there is still at least one reader, and we should only wake
354 * up other readers. We use it in a tricky manner: we just pretent we already woke
355 * a reader, and that is sufficient to prevent us from waking a writer.
357 * The crit sec. csp is released before the threads are woken, but after they
358 * are removed from the turnstile. It helps ensure that we won't have a spurious
359 * context swap back to us if the release performs a context swap for some reason.
361 void osi_TSignalForMLs(osi_turnstile_t *turnp, int stillHaveReaders, CRITICAL_SECTION *csp)
363 osi_sleepInfo_t *tsp; /* a temp */
364 osi_sleepInfo_t *nsp; /* a temp */
365 osi_queue_t *wakeupListp; /* list of dudes to wakeup after dropping lock */
370 wokeReader = stillHaveReaders;
372 while(tsp = turnp->lastp) {
373 /* look at each sleepInfo until we find someone we're not supposed to
376 if (tsp->waitFor & OSI_SLEEPINFO_W4WRITE) {
383 /* otherwise, we will wake this guy. For now, remove from this list
384 * and move to private one, so we can do the wakeup after releasing
387 turnp->lastp = (osi_sleepInfo_t *) osi_QPrev(&tsp->q);
388 osi_QRemoveHT((osi_queue_t **) &turnp->firstp, (osi_queue_t **) &turnp->lastp, &tsp->q);
390 /* do the patching required for lock obtaining */
391 if (tsp->waitFor & OSI_SLEEPINFO_W4WRITE) {
392 cp = (void *) tsp->value;
393 (*cp) |= OSI_LOCKFLAG_EXCL;
394 tsp->tidp[0] = tsp->tid;
396 else if (tsp->waitFor & OSI_SLEEPINFO_W4READ) {
397 sp = (void *) tsp->value;
399 if ((*sp) <= OSI_RWLOCK_THREADS)
400 tsp->tidp[(*sp)-1] = tsp->tid;
403 /* and add to our own list */
404 tsp->q.nextp = wakeupListp;
405 wakeupListp = &tsp->q;
407 /* now if we woke a writer, we're done, since it is pointless
408 * to wake more than one writer.
414 /* hit end, or found someone we're not supposed to wakeup */
416 LeaveCriticalSection(csp);
418 /* finally, wakeup everyone we found. Don't free things since the sleeper
419 * will free the sleepInfo structure.
421 for(tsp = (osi_sleepInfo_t *) wakeupListp; tsp; tsp = nsp) {
422 /* pull this out first, since *tsp *could* get freed immediately
423 * after the ReleaseSemaphore, if a context swap occurs.
425 nsp = (osi_sleepInfo_t *) tsp->q.nextp;
426 tsp->states |= OSI_SLEEPINFO_SIGNALLED;
427 ReleaseSemaphore(tsp->sema, 1, (long *) 0);
431 /* utility function to atomically (with respect to WakeSched)
432 * release an atomic counter spin lock and sleep on an
434 * Called with no locks held.
436 void osi_SleepSpin(LONG_PTR sleepValue, CRITICAL_SECTION *releasep)
441 CRITICAL_SECTION *csp;
443 sp = TlsGetValue(osi_SleepSlot);
445 sp = osi_AllocSleepInfo();
446 TlsSetValue(osi_SleepSlot, sp);
452 sp->value = sleepValue;
453 idx = osi_SLEEPHASH(sleepValue);
454 csp = &osi_critSec[idx];
455 EnterCriticalSection(csp);
456 osi_QAddT((osi_queue_t **) &osi_sleepers[idx], (osi_queue_t **) &osi_sleepersEnd[idx], &sp->q);
457 sp->states |= OSI_SLEEPINFO_INHASH;
458 LeaveCriticalSection(releasep);
459 LeaveCriticalSection(csp);
460 osi_totalSleeps++; /* stats */
463 code = WaitForSingleObject(sp->sema,
464 /* timeout */ INFINITE);
466 /* if the reason for the wakeup was that we were signalled,
467 * break out, otherwise try again, since the semaphore count is
468 * decreased only when we get WAIT_OBJECT_0 back.
470 if (code == WAIT_OBJECT_0) break;
474 EnterCriticalSection(csp);
476 /* must be signalled */
477 osi_assert(sp->states & OSI_SLEEPINFO_SIGNALLED);
479 /* free the sleep structure, must be done under bucket lock
480 * so that we can check reference count and serialize with
481 * those who change it.
483 osi_FreeSleepInfo(sp);
485 LeaveCriticalSection(csp);
488 /* utility function to wakeup someone sleeping in SleepSched */
489 void osi_WakeupSpin(LONG_PTR sleepValue)
492 CRITICAL_SECTION *csp;
493 osi_sleepInfo_t *tsp;
495 idx = osi_SLEEPHASH(sleepValue);
496 csp = &osi_critSec[idx];
497 EnterCriticalSection(csp);
498 for(tsp=osi_sleepers[idx]; tsp; tsp=(osi_sleepInfo_t *) osi_QNext(&tsp->q)) {
499 if ((!(tsp->states & (OSI_SLEEPINFO_DELETED|OSI_SLEEPINFO_SIGNALLED)))
500 && tsp->value == sleepValue) {
501 ReleaseSemaphore(tsp->sema, 1, (long *) 0);
502 tsp->states |= OSI_SLEEPINFO_SIGNALLED;
505 LeaveCriticalSection(csp);
508 void osi_Sleep(LONG_PTR sleepVal)
510 CRITICAL_SECTION *csp;
512 /* may as well save some code by using SleepSched again */
513 csp = &osi_baseAtomicCS[0];
514 EnterCriticalSection(csp);
515 osi_SleepSpin(sleepVal, csp);
518 void osi_Wakeup(LONG_PTR sleepVal)
520 /* how do we do osi_Wakeup on a per-lock package type? */
522 osi_WakeupSpin(sleepVal);
525 long osi_SleepFDCreate(osi_fdType_t *fdTypep, osi_fd_t **outpp)
529 cp = (osi_sleepFD_t *)malloc(sizeof(*cp));
530 memset((void *) cp, 0, sizeof(*cp));
539 long osi_SleepFDClose(osi_fd_t *cp)
545 /* called with osi_sleepFDCS locked; returns with same, so that
546 * we know that the sleep info pointed to by the cookie won't change
547 * until the caller releases the lock.
549 void osi_AdvanceSleepFD(osi_sleepFD_t *cp)
551 int idx; /* index we're dealing with */
552 int oidx; /* index we locked */
553 osi_sleepInfo_t *sip;
554 osi_sleepInfo_t *nsip;
556 idx = 0; /* so we go around once safely */
558 while(idx < OSI_SLEEPHASHSIZE) {
559 /* cp->sip should be held */
561 EnterCriticalSection(&osi_critSec[idx]);
562 oidx = idx; /* remember original index; that's the one we locked */
564 /* if there's a sleep info structure in the FD, it should be held; it
565 * is the one we just processed, so we want to move on to the next.
566 * If not, then we want to process the chain in the bucket idx points
569 if ((sip = cp->sip) == NULL) {
570 sip = osi_sleepers[idx];
572 else sip->refCount++;
575 /* it is safe to release the current sleep info guy now
576 * since we hold the bucket lock. Pull next guy out first,
577 * since if sip is deleted, Release will move him into
580 nsip = (osi_sleepInfo_t *) sip->q.nextp;
581 osi_ReleaseSleepInfo(sip);
591 LeaveCriticalSection(&osi_critSec[oidx]);
593 /* now, if we advanced to a new sleep info structure, we're
594 * done, otherwise we continue and look at the next hash bucket
595 * until we're out of them.
603 long osi_SleepFDGetInfo(osi_fd_t *ifdp, osi_remGetInfoParms_t *parmsp)
605 osi_sleepFD_t *fdp = (osi_sleepFD_t *) ifdp;
606 osi_sleepInfo_t *sip;
609 /* now, grab a mutex serializing all iterations over FDs, so that
610 * if the RPC screws up and sends us two calls on the same FD, we don't
611 * crash and burn advancing the same FD concurrently. Probably paranoia,
612 * but you generally shouldn't trust stuff coming over the network.
614 EnterCriticalSection(&osi_sleepFDCS);
616 /* this next call advances the FD to the next guy, and simultaneously validates
617 * that the info from the network is valid. If it isn't, we do our best to
618 * resynchronize our position, but we might return some info multiple times.
620 osi_AdvanceSleepFD(fdp);
622 /* now copy out info */
623 if (sip = fdp->sip) { /* one '=' */
624 parmsp->idata[0] = sip->value;
625 parmsp->idata[1] = sip->tid;
626 parmsp->idata[2] = sip->states;
631 else code = OSI_DBRPC_EOF;
633 LeaveCriticalSection(&osi_sleepFDCS);
638 /* finally, DLL-specific code for NT */
639 BOOL APIENTRY DLLMain(HANDLE inst, DWORD why, char *reserved)
644 /* some misc functions for setting hash table sizes */
646 /* return true iff x is prime */
647 int osi_IsPrime(unsigned long x)
651 /* even numbers aren't prime */
652 if ((x & 1) == 0 && x != 2) return 0;
654 for(c = 3; c<x; c += 2) {
655 /* see if x is divisible by c */
657 return 0; /* yup, it ain't prime */
659 /* see if we've gone far enough; only have to compute until
666 /* probably never get here */
670 /* return first prime number less than or equal to x */
671 unsigned long osi_PrimeLessThan(unsigned long x) {
674 for(c = x; c > 1; c--) {
683 /* return the # of seconds since some fixed date */
684 unsigned long osi_GetBootTime(void)
689 static int (*notifFunc)(char *, char *, long) = NULL;
691 void osi_InitPanic(void *anotifFunc)
693 notifFunc = anotifFunc;
696 void osi_panic(char *msgp, char *filep, long line)
699 (*notifFunc)(msgp, filep, line);
701 osi_LogPanic(msgp, filep, line);
704 /* get time in seconds since some relatively recent time */
705 time_t osi_Time(void)
709 unsigned long remainder;
710 LARGE_INTEGER bootTime;
712 /* setup boot time values */
713 GetSystemTime(&sysTime);
714 SystemTimeToFileTime(&sysTime, &fileTime);
716 /* change the base of the time so it won't be negative for a long time */
717 fileTime.dwHighDateTime -= 28000000;
719 bootTime.HighPart = fileTime.dwHighDateTime;
720 bootTime.LowPart = fileTime.dwLowDateTime;
721 /* now, bootTime is in 100 nanosecond units, and we'd really rather
722 * have it in 1 second units, units 10,000,000 times bigger.
725 bootTime = ExtendedLargeIntegerDivide(bootTime, 10000000, &remainder);
727 return bootTime.QuadPart;
729 return bootTime.LowPart;
733 /* get time in seconds since some relatively recent time */
734 void osi_GetTime(long *timesp)
738 unsigned long remainder;
739 LARGE_INTEGER bootTime;
741 /* setup boot time values */
742 GetSystemTime(&sysTime);
743 SystemTimeToFileTime(&sysTime, &fileTime);
745 /* change the base of the time so it won't be negative for a long time */
746 fileTime.dwHighDateTime -= 28000000;
748 bootTime.HighPart = fileTime.dwHighDateTime;
749 bootTime.LowPart = fileTime.dwLowDateTime;
750 /* now, bootTime is in 100 nanosecond units, and we'd really rather
751 * have it in 1 microsecond units, units 10 times bigger.
754 bootTime = ExtendedLargeIntegerDivide(bootTime, 10, &remainder);
755 bootTime = ExtendedLargeIntegerDivide(bootTime, 1000000, &remainder);
756 timesp[0] = bootTime.LowPart; /* seconds */
757 timesp[1] = remainder; /* microseconds */