2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include "../afs/param.h"
15 #include "../afs/sysincludes.h" /* Standard vendor system headers */
16 #include "../afs/afsincludes.h" /* Afs-based standard headers */
17 #include "../afs/afs_stats.h" /* afs statistics */
19 #include <sys/adspace.h> /* for vm_att(), vm_det() */
22 static char memZero; /* address of 0 bytes for kmem_alloc */
23 extern int afs_osicred_initialized;
29 /* osi_Init -- do once per kernel installation initialization.
30 * -- On Solaris this is called from modload initialization.
31 * -- On AIX called from afs_config.
32 * -- On HP called from afsc_link.
33 * -- On SGI called from afs_init. */
36 lock_t afs_event_lock;
46 if (once++ > 0) /* just in case */
48 #if defined(AFS_HPUX_ENV)
50 #else /* AFS_HPUX_ENV */
51 #if defined(AFS_GLOBAL_SUNLOCK)
52 #if defined(AFS_SGI62_ENV)
53 mutex_init(&afs_global_lock, MUTEX_DEFAULT, "afs_global_lock");
54 #elif defined(AFS_OSF_ENV)
55 usimple_lock_init(&afs_global_lock);
56 afs_global_owner = (thread_t)0;
57 #elif defined(AFS_DARWIN_ENV) || defined(AFS_FBSD_ENV)
58 lockinit(&afs_global_lock, PLOCK, "afs global lock", 0, 0);
59 afs_global_owner = (thread_t)0;
60 #elif defined(AFS_AIX41_ENV)
61 lock_alloc((void*)&afs_global_lock, LOCK_ALLOC_PIN, 1, 1);
62 simple_lock_init((void *)&afs_global_lock);
64 #ifndef AFS_LINUX22_ENV
65 /* Linux initialization in osi directory. Should move the others. */
66 mutex_init(&afs_global_lock, "afs_global_lock", MUTEX_DEFAULT, NULL);
69 /* afs_rxglobal_lock is initialized in rx_Init. */
71 #endif /* AFS_HPUX_ENV */
73 if ( !afs_osicred_initialized )
75 memset((char *)&afs_osi_cred, 0, sizeof(struct AFS_UCRED));
76 crhold(&afs_osi_cred); /* don't let it evaporate */
77 afs_osicred_initialized = 1;
80 osi_flid.fl_pid = osi_flid.fl_sysid = 0;
85 register struct vcache *avc; {
86 AFS_STATCNT(osi_Active);
87 #if defined(AFS_SUN_ENV) || defined(AFS_AIX_ENV) || defined(AFS_OSF_ENV) || defined(AFS_SUN5_ENV) || (AFS_LINUX20_ENV) || defined(AFS_DARWIN_ENV) || defined(AFS_FBSD_ENV)
88 if ((avc->opens > 0) || (avc->states & CMAPPED)) return 1; /* XXX: Warning, verify this XXX */
90 #if defined(AFS_MACH_ENV)
91 if (avc->opens > 0 || ((avc->v.v_flag & VTEXT) && !inode_uncache_try(avc))) return 1;
93 #if defined(AFS_SGI_ENV)
94 if ((avc->opens > 0) || AFS_VN_MAPPED((struct vnode *)avc))
97 if (avc->opens > 0 || (avc->v.v_flag & VTEXT)) return(1);
99 #endif /* AFS_MACH_ENV */
104 /* this call, unlike osi_FlushText, is supposed to discard caches that may
105 contain invalid information if a file is written remotely, but that may
106 contain valid information that needs to be written back if the file is
107 being written locally. It doesn't subsume osi_FlushText, since the latter
108 function may be needed to flush caches that are invalidated by local writes.
110 avc->pvnLock is already held, avc->lock is guaranteed not to be held (by
113 void osi_FlushPages(avc, credp)
114 register struct vcache *avc;
115 struct AFS_UCRED *credp;
118 ObtainReadLock(&avc->lock);
119 /* If we've already purged this version, or if we're the ones
120 writing this version, don't flush it (could lose the
121 data we're writing). */
122 if ((hcmp((avc->m.DataVersion), (avc->mapDV)) <= 0) ||
123 ((avc->execsOrWriters > 0) && afs_DirtyPages(avc))) {
124 ReleaseReadLock(&avc->lock);
127 ReleaseReadLock(&avc->lock);
128 ObtainWriteLock(&avc->lock,10);
130 if ((hcmp((avc->m.DataVersion), (avc->mapDV)) <= 0) ||
131 ((avc->execsOrWriters > 0) && afs_DirtyPages(avc))) {
132 ReleaseWriteLock(&avc->lock);
135 if (hiszero(avc->mapDV)) {
136 hset(avc->mapDV, avc->m.DataVersion);
137 ReleaseWriteLock(&avc->lock);
141 AFS_STATCNT(osi_FlushPages);
142 hset(origDV, avc->m.DataVersion);
143 afs_Trace3(afs_iclSetp, CM_TRACE_FLUSHPAGES, ICL_TYPE_POINTER, avc,
144 ICL_TYPE_INT32, origDV.low, ICL_TYPE_INT32, avc->m.Length);
146 ReleaseWriteLock(&avc->lock);
148 osi_VM_FlushPages(avc, credp);
150 ObtainWriteLock(&avc->lock,88);
152 /* do this last, and to original version, since stores may occur
153 while executing above PUTPAGE call */
154 hset(avc->mapDV, origDV);
155 ReleaseWriteLock(&avc->lock);
158 afs_lock_t afs_ftf; /* flush text lock */
162 /* This call is supposed to flush all caches that might be invalidated
163 * by either a local write operation or a write operation done on
164 * another client. This call may be called repeatedly on the same
165 * version of a file, even while a file is being written, so it
166 * shouldn't do anything that would discard newly written data before
167 * it is written to the file system. */
169 void osi_FlushText_really(vp)
170 register struct vcache *vp; {
171 afs_hyper_t fdv; /* version before which we'll flush */
173 AFS_STATCNT(osi_FlushText);
174 /* see if we've already flushed this data version */
175 if (hcmp(vp->m.DataVersion, vp->flushDV) <= 0) return;
179 void afs_gfs_FlushText();
180 afs_gfs_FlushText(vp);
185 MObtainWriteLock(&afs_ftf,317);
186 hset(fdv, vp->m.DataVersion);
188 /* why this disgusting code below?
189 * xuntext, called by xrele, doesn't notice when it is called
190 * with a freed text object. Sun continually calls xrele or xuntext
191 * without any locking, as long as VTEXT is set on the
192 * corresponding vnode.
193 * But, if the text object is locked when you check the VTEXT
194 * flag, several processes can wait in xuntext, waiting for the
195 * text lock; when the second one finally enters xuntext's
196 * critical region, the text object is already free, but the check
197 * was already done by xuntext's caller.
198 * Even worse, it turns out that xalloc locks the text object
199 * before reading or stating a file via the vnode layer. Thus, we
200 * could end up in getdcache, being asked to bring in a new
201 * version of a file, but the corresponding text object could be
202 * locked. We can't flush the text object without causing
203 * deadlock, so now we just don't try to lock the text object
204 * unless it is guaranteed to work. And we try to flush the text
205 * when we need to a bit more often at the vnode layer. Sun
206 * really blew the vm-cache flushing interface.
209 #if defined (AFS_HPUX_ENV)
210 if (vp->v.v_flag & VTEXT) {
213 if (vp->v.v_flag & VTEXT) { /* still has a text object? */
214 MReleaseWriteLock(&afs_ftf);
220 /* next do the stuff that need not check for deadlock problems */
223 /* finally, record that we've done it */
224 hset(vp->flushDV, fdv);
225 MReleaseWriteLock(&afs_ftf);
227 #endif /* AFS_DEC_ENV */
231 /* I don't really like using xinval() here, because it kills processes
232 * a bit aggressively. Previous incarnations of this functionality
233 * used to use xrele() instead of xinval, and didn't invoke
234 * cacheinval(). But they would panic. So it might be worth looking
235 * into some middle ground...
238 afs_gfs_FlushText(vp)
239 register struct vcache *vp; {
240 afs_hyper_t fdv; /* version before which we'll flush */
241 register struct text *xp;
244 MObtainWriteLock(&afs_ftf,318);
245 hset(fdv, vp->m.DataVersion);
249 /* this happens frequently after cores are created. */
250 MReleaseWriteLock(&afs_ftf);
254 if (gp->g_flag & GTEXT) {
256 xp = (struct text *) gp->g_textp ;
257 /* if text object is locked, give up */
258 if (xp && (xp->x_flag & XLOCK)) {
259 MReleaseWriteLock(&afs_ftf);
263 else xp = (struct text *) 0;
265 if (gp->g_flag & GTEXT) {/* still has a text object? */
270 /* next do the stuff that need not check for deadlock problems */
271 /* maybe xinval(gp); here instead of above */
274 /* finally, record that we've done it */
275 hset(vp->flushDV, fdv);
277 MReleaseWriteLock(&afs_ftf);
279 #endif /* AFS_DEC_ENV */
281 #endif /* AFS_TEXT_ENV */
283 /* mask signals in afsds */
284 void afs_osi_MaskSignals(){
285 #ifdef AFS_LINUX22_ENV
290 /* unmask signals in rxk listener */
291 void afs_osi_UnmaskRxkSignals(){
292 #ifdef AFS_LINUX22_ENV
297 /* register rxk listener proc info */
298 void afs_osi_RxkRegister(){
299 #ifdef AFS_LINUX22_ENV
304 /* procedure for making our processes as invisible as we can */
305 void afs_osi_Invisible() {
306 #ifdef AFS_LINUX22_ENV
307 afs_osi_MaskSignals();
310 u.u_procp->p_type |= SSYS;
313 curproc->p_flag |= SSYS;
316 set_system_proc(u.u_procp);
318 #if defined(AFS_DARWIN_ENV) || defined(AFS_FBSD_ENV)
319 /* maybe call init_process instead? */
320 current_proc()->p_flag |= P_SYSTEM;
322 #if defined(AFS_SGI_ENV)
324 #endif /* AFS_SGI_ENV */
326 AFS_STATCNT(osi_Invisible);
330 #ifndef AFS_LINUX20_ENV /* Linux version in osi_misc.c */
331 /* set the real time */
333 register osi_timeval_t *atv; {
336 struct timestruc_t t;
338 t.tv_sec = atv->tv_sec;
339 t.tv_nsec = atv->tv_usec * 1000;
340 ksettimer(&t); /* Was -> settimer(TIMEOFDAY, &t); */
347 * To get more than second resolution we can use adjtime. The problem
348 * is that the usecs from the server are wrong (by now) so it isn't
349 * worth complicating the following code.
354 extern int stime(struct stimea *time, rval_t *rvp);
356 sta.time = atv->tv_sec;
360 #if defined(AFS_SGI_ENV)
364 extern int stime(struct stimea *time);
367 sta.time = atv->tv_sec;
371 #if defined(AFS_DARWIN_ENV) || defined(AFS_FBSD_ENV)
376 /* stolen from kern_time.c */
378 boottime.tv_sec += atv->tv_sec - time.tv_sec;
384 t.tv_sec = atv->tv_sec;
385 t.tv_usec = atv->tv_usec;
386 s = spl7(); time = t; (void) splx(s);
392 s = splclock(); time = *atv; (void) splx(s);
397 logtchg(atv->tv_sec);
399 #endif /* AFS_DARWIN_ENV || AFS_FBSD_ENV */
400 #endif /* AFS_SGI_ENV */
401 #endif /* AFS_SUN55_ENV */
402 #endif /* AFS_SUN5_ENV */
403 #endif /* AFS_AIX32_ENV */
404 AFS_STATCNT(osi_SetTime);
407 #endif /* AFS_LINUX20_ENV */
410 void *afs_osi_Alloc(size_t x)
412 register struct osimem *tm = NULL;
415 AFS_STATCNT(osi_Alloc);
416 /* 0-length allocs may return NULL ptr from AFS_KALLOC, so we special-case
417 things so that NULL returned iff an error occurred */
418 if (x == 0) return &memZero;
420 AFS_STATS(afs_stats_cmperf.OutStandingAllocs++);
421 AFS_STATS(afs_stats_cmperf.OutStandingMemUsage += x);
422 #ifdef AFS_LINUX20_ENV
423 return osi_linux_alloc(x);
426 tm = (struct osimem *) AFS_KALLOC(size);
429 osi_Panic("osi_Alloc: Couldn't allocate %d bytes; out of memory!\n",
436 #if defined(AFS_SUN_ENV) || defined(AFS_SGI_ENV)
438 void *afs_osi_Alloc_NoSleep(size_t x)
440 register struct osimem *tm;
443 AFS_STATCNT(osi_Alloc);
444 /* 0-length allocs may return NULL ptr from AFS_KALLOC, so we special-case
445 things so that NULL returned iff an error occurred */
446 if (x == 0) return &memZero;
449 AFS_STATS(afs_stats_cmperf.OutStandingAllocs++);
450 AFS_STATS(afs_stats_cmperf.OutStandingMemUsage += x);
451 tm = (struct osimem *) AFS_KALLOC_NOSLEEP(size);
455 #endif /* SUN || SGI */
457 void afs_osi_Free(void *x, size_t asize)
459 register struct osimem *tm, **lm, *um;
461 AFS_STATCNT(osi_Free);
462 if (x == &memZero) return; /* check for putting memZero back */
464 AFS_STATS(afs_stats_cmperf.OutStandingAllocs--);
465 AFS_STATS(afs_stats_cmperf.OutStandingMemUsage -= asize);
466 #ifdef AFS_LINUX20_ENV
469 AFS_KFREE((struct osimem *)x, asize);
474 /* ? is it moderately likely that there are dirty VM pages associated with
477 * Prereqs: avc must be write-locked
479 * System Dependencies: - *must* support each type of system for which
480 * memory mapped files are supported, even if all
481 * it does is return TRUE;
483 * NB: this routine should err on the side of caution for ProcessFS to work
484 * correctly (or at least, not to introduce worse bugs than already exist)
493 if (avc->execsOrWriters <= 0)
494 return 0; /* can't be many dirty pages here, I guess */
496 #if defined (AFS_AIX32_ENV)
498 /* because of the level of hardware involvment with VM and all the
499 * warnings about "This routine must be called at VMM interrupt
500 * level", I thought it would be safest to disable interrupts while
501 * looking at the software page fault table. */
503 /* convert vm handle into index into array: I think that stoinio is
504 * always zero... Look into this XXX */
505 #define VMHASH(handle) ( \
506 ( ((handle) & ~vmker.stoinio) \
507 ^ ((((handle) & ~vmker.stoinio) & vmker.stoimask) << vmker.stoihash) \
511 unsigned int pagef, pri, index, next;
512 extern struct vmkerdata vmker;
514 index = VMHASH(avc->vmh);
515 if (scb_valid(index)) { /* could almost be an ASSERT */
517 pri = disable_ints();
518 for (pagef = scb_sidlist(index); pagef >= 0; pagef = next) {
519 next = pft_sidfwd(pagef);
520 if (pft_modbit(pagef)) { /* has page frame been modified? */
530 #endif /* AFS_AIX32_ENV */
532 #if defined (AFS_SUN_ENV)
533 if (avc->states & CMAPPED) {
535 for (pg = avc->v.v_s.v_Pages ; pg ; pg = pg->p_vpnext) {
548 * Solaris osi_ReleaseVM should not drop and re-obtain the vcache entry lock.
549 * This leads to bad races when osi_ReleaseVM() is called from
550 * afs_InvalidateAllSegments().
552 * We can do this because Solaris osi_VM_Truncate() doesn't care whether the
553 * vcache entry lock is held or not.
555 * For other platforms, in some cases osi_VM_Truncate() doesn't care, but
556 * there may be cases where it does care. If so, it would be good to fix
557 * them so they don't care. Until then, we assume the worst.
559 * Locking: the vcache entry lock is held. It is dropped and re-obtained.
562 osi_ReleaseVM(avc, acred)
564 struct AFS_UCRED *acred;
568 osi_VM_Truncate(avc, 0, acred);
571 ReleaseWriteLock(&avc->lock);
573 osi_VM_Truncate(avc, 0, acred);
575 ObtainWriteLock(&avc->lock, 80);
582 extern int afs_cold_shutdown;
584 AFS_STATCNT(shutdown_osi);
585 if (afs_cold_shutdown) {
586 LOCK_INIT(&afs_ftf, "afs_ftf");
594 return afs_suser(credp);
602 /* afs_osi_TraverseProcTable() - Walk through the systems process
603 * table, calling afs_GCPAGs_perproc_func() for each process.
606 #if defined(AFS_SUN5_ENV)
607 void afs_osi_TraverseProcTable()
610 for (prp = practive; prp != NULL; prp = prp->p_next) {
611 afs_GCPAGs_perproc_func(prp);
616 #if defined(AFS_HPUX_ENV)
619 * NOTE: h/proc_private.h gives the process table locking rules
620 * It indicates that access to p_cred must be protected by
622 * mp_mtproc_unlock(p);
624 * The code in sys/pm_prot.c uses pcred_lock() to protect access to
625 * the process creds, and uses mp_mtproc_lock() only for audit-related
626 * changes. To be safe, we use both.
629 void afs_osi_TraverseProcTable()
634 MP_SPINLOCK(activeproc_lock);
635 MP_SPINLOCK(sched_lock);
639 * Instead of iterating through all of proc[], traverse only
640 * the list of active processes. As an example of this,
641 * see foreach_process() in sys/vm_sched.c.
643 * We hold the locks for the entire scan in order to get a
644 * consistent view of the current set of creds.
647 for(p = proc; endchain == 0; p = &proc[p->p_fandx]) {
648 if (p->p_fandx == 0) {
656 afs_GCPAGs_perproc_func(p);
661 MP_SPINUNLOCK(sched_lock);
662 MP_SPINUNLOCK(activeproc_lock);
666 #if defined(AFS_SGI_ENV)
669 /* TODO: Fix this later. */
670 static int SGI_ProcScanFunc(void *p, void *arg, int mode)
674 #else /* AFS_SGI65_ENV */
675 static int SGI_ProcScanFunc(proc_t *p, void *arg, int mode)
677 afs_int32 (*perproc_func)(struct proc *) = arg;
679 /* we pass in the function pointer for arg,
680 * mode ==0 for startup call, ==1 for each valid proc,
681 * and ==2 for terminate call.
684 code = perproc_func(p);
688 #endif /* AFS_SGI65_ENV */
690 void afs_osi_TraverseProcTable()
692 procscan(SGI_ProcScanFunc, afs_GCPAGs_perproc_func);
694 #endif /* AFS_SGI_ENV */
696 #if defined(AFS_AIX_ENV)
697 void afs_osi_TraverseProcTable()
703 * For binary compatibility, on AIX we need to be careful to use the
704 * proper size of a struct proc, even if it is different from what
705 * we were compiled with.
707 if (!afs_gcpags_procsize)
710 simple_lock(&proc_tbl_lock);
711 for (p = (struct proc *)v.vb_proc, i = 0;
713 p = (struct proc *)((char *)p + afs_gcpags_procsize), i++) {
715 if (p->p_stat == SNONE)
717 if (p->p_stat == SIDL)
719 if (p->p_stat == SEXIT)
724 if (PROCMASK(p->p_pid) != i) {
725 afs_gcpags = AFS_GCPAGS_EPIDCHECK;
731 if ((p->p_nice < P_NICE_MIN) || (P_NICE_MAX < p->p_nice)) {
732 afs_gcpags = AFS_GCPAGS_ENICECHECK;
736 afs_GCPAGs_perproc_func(p);
738 simple_unlock(&proc_tbl_lock);
742 #if defined(AFS_OSF_ENV)
743 void afs_osi_TraverseProcTable()
745 struct pid_entry *pe;
747 extern struct pid_entry *pidtab;
749 #define pidNPID (pidtab + npid)
754 for (pe = pidtab; pe < pidNPID; ++pe) {
755 if (pe->pe_proc != PROC_NULL)
756 afs_GCPAGs_perproc_func(pe->pe_proc);
762 #if defined(AFS_DARWIN_ENV) || defined(AFS_FBSD_ENV)
763 void afs_osi_TraverseProcTable()
766 LIST_FOREACH(p, &allproc, p_list) {
767 if (p->p_stat == SIDL)
769 if (p->p_stat == SZOMB)
771 if (p->p_flag & P_SYSTEM)
773 afs_GCPAGs_perproc_func(p);
778 /* return a pointer (sometimes a static copy ) to the cred for a
780 * subsequent calls may overwrite the previously returned value.
783 #if defined(AFS_SGI65_ENV)
784 const struct AFS_UCRED *afs_osi_proc2cred(AFS_PROC *pr)
788 #elif defined(AFS_HPUX_ENV)
789 const struct AFS_UCRED *afs_osi_proc2cred(proc_t *p)
795 * Cannot use afs_warnuser() here, as the code path
796 * eventually wants to grab sched_lock, which is
802 #elif defined(AFS_AIX_ENV)
804 /* GLOBAL DECLARATIONS */
806 extern int xmattach(); /* fills out cross memory descriptor */
807 extern int xmdetach(); /* decrements reference count to segment */
810 * LOCKS: the caller must do
811 * simple_lock(&proc_tbl_lock);
812 * simple_unlock(&proc_tbl_lock);
813 * around calls to this function.
816 const struct AFS_UCRED *afs_osi_proc2cred(AFS_PROC *pproc)
818 struct AFS_UCRED *pcred = 0;
821 * pointer to process user structure valid in *our*
824 * The user structure for a process is stored in the user
825 * address space (as distinct from the kernel address
826 * space), and so to refer to the user structure of a
827 * different process we must employ special measures.
829 * I followed the example used in the AIX getproc() system
830 * call in bos/kernel/proc/getproc.c
832 struct user *xmem_userp;
834 struct xmem dp; /* ptr to xmem descriptor */
835 int xm; /* xmem result */
842 * The process private segment in which the user
843 * area is located may disappear. We need to increment
844 * its use count. Therefore we
845 * - get the proc_tbl_lock to hold the segment.
846 * - get the p_lock to lockout vm_cleardata.
847 * - vm_att to load the segment register (no check)
848 * - xmattach to bump its use count.
849 * - release the p_lock.
850 * - release the proc_tbl_lock.
851 * - do whatever we need.
852 * - xmdetach to decrement the use count.
853 * - vm_det to free the segment register (no check)
858 /* simple_lock(&proc_tbl_lock); */
859 if (pproc->p_adspace != NULLSEGVAL) {
861 simple_lock(&pproc->p_lock);
863 if (pproc->p_threadcount &&
864 pproc->p_threadlist) {
867 * arbitrarily pick the first thread in pproc
869 struct thread *pproc_thread =
873 * location of 'struct user' in pproc's
876 struct user *pproc_userp =
877 pproc_thread->t_userp;
880 * create a pointer valid in my own address space
884 (struct user *)vm_att(pproc->p_adspace,
887 dp.aspace_id = XMEM_INVAL;
888 xm = xmattach(xmem_userp,
893 simple_unlock(&pproc->p_lock);
895 /* simple_unlock(&proc_tbl_lock); */
896 if (xm == XMEM_SUCC) {
898 static struct AFS_UCRED cred;
901 * What locking should we use to protect access to the user
902 * area? If needed also change the code in AIX/osi_groups.c.
905 /* copy cred to local address space */
906 cred = *xmem_userp->U_cred;
912 vm_det((void *)xmem_userp);
918 #elif defined(AFS_OSF_ENV)
919 const struct AFS_UCRED *afs_osi_proc2cred(AFS_PROC *pr)
921 struct AFS_UCRED *rv=NULL;
927 if((pr->p_stat == SSLEEP) ||
928 (pr->p_stat == SRUN) ||
929 (pr->p_stat == SSTOP))
934 #elif defined(AFS_DARWIN_ENV) || defined(AFS_FBSD_ENV)
935 const struct AFS_UCRED *afs_osi_proc2cred(AFS_PROC *pr)
937 struct AFS_UCRED *rv=NULL;
938 static struct AFS_UCRED cr;
944 if((pr->p_stat == SSLEEP) ||
945 (pr->p_stat == SRUN) ||
946 (pr->p_stat == SSTOP)) {
949 cr.cr_uid=pr->p_cred->pc_ucred->cr_uid;
950 cr.cr_ngroups=pr->p_cred->pc_ucred->cr_ngroups;
951 memcpy(cr.cr_groups, pr->p_cred->pc_ucred->cr_groups, NGROUPS *
960 const struct AFS_UCRED *afs_osi_proc2cred(AFS_PROC *pr)
962 struct AFS_UCRED *rv=NULL;
973 #endif /* AFS_GCPAGS */