2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
15 Institution: The Information Technology Center, Carnegie-Mellon University
19 #include <afs/param.h>
26 #include <sys/param.h>
29 #include <sys/statvfs.h>
30 #endif /* AFS_HAVE_STATVFS */
32 #if !defined(AFS_SGI_ENV)
34 #include <sys/mount.h>
36 #else /* AFS_OSF_ENV */
37 #ifdef AFS_VFSINCL_ENV
40 #include <sys/fs/ufs_fs.h>
44 #else /* AFS_VFSINCL_ENV */
45 #if !defined(AFS_AIX_ENV) && !defined(AFS_LINUX22_ENV)
48 #endif /* AFS_VFSINCL_ENV */
49 #endif /* AFS_OSF_ENV */
50 #include <sys/errno.h>
56 #include <sys/lockf.h>
62 #include <checklist.h>
64 #if defined(AFS_SUN_ENV)
69 #include <sys/mnttab.h>
70 #include <sys/mntent.h>
72 #ifdef AFS_LINUX22_ENV
74 #include <sys/statfs.h>
81 #endif /* AFS_SGI_ENV */
82 #endif /* AFS_NT40_ENV */
83 #if defined(AFS_SGI_ENV)
84 #include <sys/errno.h>
92 #include <afs/afsint.h>
94 #include <afs/errors.h>
97 #include <afs/afssyscalls.h>
103 #include "namei_ops.h"
105 #endif /* AFS_NAMEI_ENV */
108 #include "partition.h"
109 #ifdef AFS_PTHREAD_ENV
111 #else /* AFS_PTHREAD_ENV */
112 #include <afs/assert.h>
113 #endif /* AFS_PTHREAD_ENV */
115 #if defined(AFS_HPUX_ENV)
116 #include <sys/types.h>
117 #include <sys/privgrp.h>
118 #endif /* defined(AFS_HPUX_ENV) */
121 #include <jfs/filsys.h>
124 int aixlow_water = 8; /* default 8% */
125 struct DiskPartition *DiskPartitionList;
127 #ifdef AFS_SGI_XFS_IOPS_ENV
128 /* Verify that the on disk XFS inodes on the partition are large enough to
129 * hold the AFS attribute. Returns -1 if the attribute can't be set or is
130 * too small to fit in the inode. Returns 0 if the attribute does fit in
133 #include <afs/xfsattrs.h>
134 static int VerifyXFSInodeSize(char *part, char *fstype)
137 int length = SIZEOF_XFS_ATTR_T;
142 if (strcmp("xfs", fstype))
145 if (attr_set(part, AFS_XFS_ATTR, &junk, length, ATTR_ROOT) == 0) {
146 if (((fd=open(part, O_RDONLY, 0)) != -1)
147 && (fcntl(fd, F_FSGETXATTRA, &fsx) == 0)) {
149 if (fsx.fsx_nextents) {
150 Log("Partition %s: XFS inodes too small, exiting.\n", part);
151 Log("Run xfs_size_check utility and remake partitions.\n");
159 (void) attr_remove(part, AFS_XFS_ATTR, ATTR_ROOT);
166 static void VInitPartition_r(char *path, char *devname, Device dev)
168 struct DiskPartition *dp, *op;
169 dp = (struct DiskPartition *) malloc(sizeof (struct DiskPartition));
170 /* Add it to the end, to preserve order when we print statistics */
171 for (op = DiskPartitionList; op; op = op->next) {
178 DiskPartitionList = dp;
180 strcpy(dp->name, path);
181 #if defined(AFS_NAMEI_ENV) && !defined(AFS_NT40_ENV)
182 strcpy(dp->devName, path);
183 dp->device = volutil_GetPartitionID(path);
185 strcpy(dp->devName, devname);
190 dp->f_files = 1; /* just a default value */
191 #if defined(AFS_NAMEI_ENV) && !defined(AFS_NT40_ENV)
192 if (programType == fileServer)
193 (void) namei_ViceREADME(VPartitionPath(dp));
195 VSetPartitionDiskUsage_r(dp);
198 static void VInitPartition(char *path, char *devname, Device dev)
201 VInitPartition_r(path, devname, dev);
206 /* VAttachPartitions() finds the vice partitions on this server. Calls
207 * VCheckPartition() to do some basic checks on the partition. If the partition
208 * is a valid vice partition, VCheckPartition will add it to the DiskPartition
210 * Returns the number of errors returned by VCheckPartition. An error in
211 * VCheckPartition means that partition is a valid vice partition but the
212 * fileserver should not start because of the error found on that partition.
215 * No specific user space file system checks, since we don't know what
216 * is being used for vice partitions.
218 * Use partition name as devname.
220 int VCheckPartition(part, devname)
226 /* Only keep track of "/vicepx" partitions since it can get hairy
227 * when NFS mounts are involved.. */
228 if (strncmp(part, VICE_PARTITION_PREFIX, VICE_PREFIX_SIZE)) {
231 if (stat(part, &status) < 0) {
232 Log("VInitVnodes: Couldn't find file system %s; ignored\n", part);
236 #ifndef AFS_AIX32_ENV
237 if (programType == fileServer) {
238 char salvpath[MAXPATHLEN];
239 strcpy(salvpath, part);
240 strcat(salvpath, "/FORCESALVAGE");
241 if (stat(salvpath, &status) == 0) {
242 Log("VInitVnodes: Found %s; aborting\n", salvpath);
248 #ifdef AFS_SGI_XFS_IOPS_ENV
249 if (VerifyXFSInodeSize(part, status.st_fstype) < 0)
254 if (status.st_ino != ROOTINO) {
255 Log("%s is not a mounted file system; ignored.\n", part);
260 VInitPartition(part, devname, status.st_dev);
264 #endif /* AFS_NT40_ENV */
267 int VAttachPartitions(void)
273 if (!(mntfile = fopen(MNTTAB, "r"))) {
274 Log("Can't open %s\n", MNTTAB);
278 while (!getmntent(mntfile, &mnt)) {
279 /* Ignore non ufs or non read/write partitions */
280 if ((strcmp(mnt.mnt_fstype, "ufs") !=0) ||
281 (strncmp(mnt.mnt_mntopts, "ro,ignore",9) ==0))
284 if (VCheckPartition(mnt.mnt_mountp, mnt.mnt_special) < 0 )
288 (void) fclose(mntfile);
293 #endif /* AFS_SUN5_ENV */
294 #if defined(AFS_SGI_ENV) || (defined(AFS_SUN_ENV) && !defined(AFS_SUN5_ENV)) || defined(AFS_HPUX_ENV)
295 int VAttachPartitions(void)
299 struct mntent *mntent;
301 if ((mfd = setmntent(MOUNTED, "r")) == NULL) {
302 Log("Problems in getting mount entries(setmntent)\n");
305 while (mntent = getmntent(mfd)) {
306 if (!hasmntopt(mntent, MNTOPT_RW)) continue;
308 if (VCheckPartition(mntent->mnt_dir, mntent->mnt_fsname) < 0 )
319 * (This function was grabbed from df.c)
323 register struct vmount **vmountpp; /* place to tell where buffer is */
326 register struct vmount *vm;
329 /* set initial size of mntctl buffer to a MAGIC NUMBER */
332 /* try the operation until ok or a fatal error */
334 if ((vm = (struct vmount *)malloc(size)) == NULL) {
335 /* failed getting memory for mount status buf */
336 perror("FATAL ERROR: get_stat malloc failed\n");
341 * perform the QUERY mntctl - if it returns > 0, that is the
342 * number of vmount structures in the buffer. If it returns
343 * -1, an error occured. If it returned 0, then look in
344 * first word of buffer for needed size.
346 if ((nmounts = mntctl(MCTL_QUERY, size, (caddr_t)vm)) > 0) {
347 /* OK, got it, now return */
351 } else if (nmounts == 0) {
352 /* the buffer wasn't big enough .... */
353 /* .... get required buffer size */
358 /* some other kind of error occurred */
365 int VAttachPartitions(void)
369 struct vmount *vmountp;
371 if ((nmounts = getmount(&vmountp)) <= 0) {
372 Log("Problems in getting # of mount entries(getmount)\n");
375 for (; nmounts; nmounts--,
376 vmountp = (struct vmount *)((int)vmountp + vmountp->vmt_length)) {
377 char *part = vmt2dataptr(vmountp, VMT_STUB);
379 if (vmountp->vmt_flags & (MNT_READONLY|MNT_REMOVABLE|MNT_REMOTE))
380 continue; /* Ignore any "special" partitions */
384 struct superblock fs;
385 /* The Log statements are non-sequiters in the SalvageLog and don't
386 * even appear in the VolserLog, so restrict them to the FileLog.
388 if (ReadSuper(&fs, vmt2dataptr(vmountp, VMT_OBJECT))<0) {
389 if (programType == fileServer)
390 Log("Can't read superblock for %s, ignoring it.\n", part);
393 if (IsBigFilesFileSystem(&fs)) {
394 if (programType == fileServer)
395 Log("%s is a big files filesystem, ignoring it.\n", part);
401 if (VCheckPartition(part, vmt2dataptr(vmountp, VMT_OBJECT)) < 0 )
409 int VAttachPartitions(void)
414 if (setfsent() < 0) {
415 Log("Error listing filesystems.\n");
419 while (fsent = getfsent()) {
420 if (strcmp(fsent->fs_type, "rw") != 0) continue;
422 if (VCheckPartition(fsent->fs_file, fsent->fs_spec) < 0 )
433 #include <sys/stat.h>
436 * validate names in vptab.
443 int VValidVPTEntry(struct vptab *vpe)
445 int len = strlen(vpe->vp_name);
448 if (len < VICE_PREFIX_SIZE+1 || len > VICE_PREFIX_SIZE + 2)
450 if (strncmp(vpe->vp_name, VICE_PARTITION_PREFIX, VICE_PREFIX_SIZE))
453 for (i=VICE_PREFIX_SIZE; i<len; i++) {
454 if (vpe->vp_name[i] < 'a' || vpe->vp_name[i] > 'z') {
455 Log("Invalid partition name %s in registry, ignoring it.\n",
460 if (len == VICE_PREFIX_SIZE + 2) {
461 i = (int)(vpe->vp_name[VICE_PREFIX_SIZE]-'a') * 26 +
462 (int)(vpe->vp_name[VICE_PREFIX_SIZE+1]-'a') ;
464 Log("Invalid partition name %s in registry, ignoring it.\n",
470 len = strlen(vpe->vp_dev);
471 if (len != 2 || vpe->vp_dev[1] != ':' || vpe->vp_dev[0] < 'A' ||
472 vpe->vp_dev[0] > 'Z') {
473 Log("Invalid device name %s in registry, ignoring it.\n",
481 int VCheckPartition(char *partName)
488 /* partName is presumed to be of the form "X:" */
489 (void) sprintf(volRoot, "%c:\\", *partName);
491 if (!GetVolumeInformation(volRoot, /* volume root directory */
492 NULL, /* volume name buffer */
493 0, /* volume name size */
494 NULL, /* volume serial number */
495 &dwDummy, /* max component length */
496 &dwDummy, /* file system flags */
497 volFsType, /* file system name */
498 sizeof(volFsType))) {
499 err = GetLastError();
500 Log("VCheckPartition: Failed to get partition information for %s, ignoring it.\n",
505 if (strcmp(volFsType, "NTFS")) {
506 Log("VCheckPartition: Partition %s is not an NTFS partition, ignoring it.\n", partName);
514 int VAttachPartitions(void)
516 struct DiskPartition *partP, *prevP, *nextP;
517 struct vpt_iter iter;
520 if (vpt_Start(&iter)<0) {
521 Log("No partitions to attach.\n");
525 while (0==vpt_NextEntry(&iter, &entry)) {
526 if (!VValidVPTEntry(&entry)) {
530 /* This test for duplicates relies on the fact that the method
531 * of storing the partition names in the NT registry means the same
532 * partition name will never appear twice in the list.
534 for (partP = DiskPartitionList; partP; partP = partP->next) {
535 if (*partP->devName == *entry.vp_dev) {
536 Log("Same drive (%s) used for both partition %s and partition %s, ignoring both.\n", entry.vp_dev, partP->name, entry.vp_name);
537 partP->flags = PART_DUPLICATE;
538 break; /* Only one entry will ever be in this list. */
541 if (partP) continue; /* found a duplicate */
543 if (VCheckPartition(entry.vp_dev)<0)
545 /* This test allows for manually inserting the FORCESALVAGE flag
546 * and thereby invoking the salvager. scandisk obviously won't be
549 if (programType == fileServer) {
551 char salvpath[MAXPATHLEN];
552 strcpy(salvpath, entry.vp_dev);
553 strcat(salvpath, "\\FORCESALVAGE");
554 if (stat(salvpath, &status) == 0) {
555 Log("VAttachPartitions: Found %s; aborting\n", salvpath);
559 VInitPartition(entry.vp_name, entry.vp_dev, *entry.vp_dev - 'A');
563 /* Run through partition list and clear out the dupes. */
564 prevP = nextP = NULL;
565 for (partP = DiskPartitionList; partP; partP = nextP) {
567 if (partP->flags == PART_DUPLICATE) {
569 prevP->next = partP->next;
571 DiskPartitionList = partP->next;
582 #ifdef AFS_LINUX22_ENV
583 int VAttachPartitions(void)
587 struct mntent *mntent;
589 if ((mfd = setmntent("/proc/mounts", "r")) == NULL) {
590 if ((mfd = setmntent("/etc/mtab", "r")) == NULL) {
591 Log("Problems in getting mount entries(setmntent)\n");
595 while (mntent = getmntent(mfd)) {
596 if (VCheckPartition(mntent->mnt_dir, mntent->mnt_fsname) < 0 )
603 #endif /* AFS_LINUX22_ENV */
605 /* This routine is to be called whenever the actual name of the partition
606 * is required. The canonical name is still in part->name.
608 char * VPartitionPath(struct DiskPartition *part)
611 return part->devName;
617 /* get partition structure, abortp tells us if we should abort on failure */
618 struct DiskPartition *VGetPartition_r(char *name, int abortp)
620 register struct DiskPartition *dp;
621 for (dp = DiskPartitionList; dp; dp = dp->next) {
622 if (strcmp(dp->name, name) == 0)
630 struct DiskPartition *VGetPartition(char *name, int abortp)
632 struct DiskPartition *retVal;
634 retVal = VGetPartition_r(name, abortp);
640 void VSetPartitionDiskUsage_r(register struct DiskPartition *dp)
642 ULARGE_INTEGER free_user, total, free_total;
643 int ufree, tot, tfree;
645 if (!GetDiskFreeSpaceEx(VPartitionPath(dp), &free_user, &total,
647 printf("Failed to get disk space info for %s, error = %d\n",
648 dp->name, GetLastError());
652 /* Convert to 1K units. */
653 ufree = (int) Int64ShraMod32(free_user.QuadPart, 10);
654 tot = (int) Int64ShraMod32(total.QuadPart, 10);
655 tfree = (int) Int64ShraMod32(free_total.QuadPart, 10);
657 dp->minFree = tfree - ufree; /* only used in VPrintDiskStats_r */
658 dp->totalUsable = tot;
663 void VSetPartitionDiskUsage_r(register struct DiskPartition *dp)
666 int fd, totalblks, free, used, availblks, bsize, code;
669 struct statvfs statbuf;
671 struct statfs statbuf;
674 if (dp->flags & PART_DONTUPDATE)
676 /* Note: we don't bother syncing because it's only an estimate, update
677 is syncing every 30 seconds anyway, we only have to keep the disk
678 approximately 10% from full--you just can't get the stuff in from
679 the net fast enough to worry */
681 code = statvfs(dp->name, &statbuf);
683 code = statfs(dp->name, &statbuf);
686 Log("statfs of %s failed in VSetPartitionDiskUsage (errno = %d)\n", dp->name, errno);
689 if (statbuf.f_blocks == -1) { /* Undefined; skip stats.. */
690 Log("statfs of %s failed in VSetPartitionDiskUsage\n", dp->name);
693 totalblks = statbuf.f_blocks;
694 free = statbuf.f_bfree;
695 reserved = free - statbuf.f_bavail;
697 bsize = statbuf.f_frsize;
699 bsize = statbuf.f_bsize;
701 availblks = totalblks - reserved;
702 dp->f_files = statbuf.f_files; /* max # of files in partition */
704 /* Now free and totalblks are in fragment units, but we want them in
708 free *= (bsize/1024);
709 totalblks *= (bsize / 1024);
710 availblks *= (bsize / 1024 );
711 reserved *= (bsize / 1024 );
714 free /= (1024/bsize);
715 totalblks /= (1024/bsize);
716 availblks /= (1024/bsize);
717 reserved /= (1024/bsize);
719 /* now compute remaining figures */
720 used = totalblks - free;
722 dp->minFree = reserved; /* only used in VPrintDiskStats_r */
723 dp->totalUsable = availblks;
724 dp->free = availblks - used; /* this is exactly f_bavail */
726 #endif /* AFS_NT40_ENV */
728 void VSetPartitionDiskUsage(register struct DiskPartition *dp)
731 VSetPartitionDiskUsage_r(dp);
735 void VResetDiskUsage_r(void)
737 struct DiskPartition *dp;
738 for (dp = DiskPartitionList; dp; dp = dp->next) {
739 VSetPartitionDiskUsage_r(dp);
740 #ifndef AFS_PTHREAD_ENV
742 #endif /* !AFS_PTHREAD_ENV */
746 void VResetDiskUsage(void)
753 void VAdjustDiskUsage_r(Error *ec, Volume *vp, afs_int32 blocks, afs_int32 checkBlocks)
755 afs_int32 rem, minavail;
757 /* why blocks instead of checkBlocks in the check below? Otherwise, any check
758 for less than BlocksSpare would skip the error-checking path, and we
759 could grow existing files forever, not just for another BlocksSpare
763 if ((rem = vp->partition->free - checkBlocks) <
764 (minavail = (vp->partition->totalUsable * aixlow_water) / 100))
766 if (vp->partition->free - checkBlocks < 0)
769 else if (V_maxquota(vp) && V_diskused(vp) + checkBlocks > V_maxquota(vp))
772 vp->partition->free -= blocks;
773 V_diskused(vp) += blocks;
776 void VAdjustDiskUsage(Error *ec, Volume *vp, afs_int32 blocks, afs_int32 checkBlocks)
779 VAdjustDiskUsage_r(ec, vp, blocks, checkBlocks);
783 int VDiskUsage_r(Volume *vp, afs_int32 blocks)
785 afs_int32 rem, minavail;
788 if ((rem = vp->partition->free - blocks) <
789 (minavail = (vp->partition->totalUsable * aixlow_water) / 100))
791 if (vp->partition->free - blocks < 0)
795 vp->partition->free -= blocks;
799 int VDiskUsage(Volume *vp, afs_int32 blocks)
803 retVal = VDiskUsage_r(vp, blocks);
808 void VPrintDiskStats_r(void)
810 struct DiskPartition *dp;
811 for (dp = DiskPartitionList; dp; dp = dp->next) {
812 Log("Partition %s: %d available 1K blocks (minfree=%d), ",
813 dp->name, dp->totalUsable, dp->minFree);
815 Log("overallocated by %d blocks\n", -dp->free);
817 Log("%d free blocks\n", dp->free);
821 void VPrintDiskStats(void)
829 /* Need a separate lock file on NT, since NT only has mandatory file locks. */
830 #define LOCKFILE "LOCKFILE"
831 void VLockPartition_r(char *name)
833 struct DiskPartition *dp = VGetPartition_r(name, 0);
837 if (dp->lock_fd == -1) {
840 (void) sprintf(path, "%s\\%s", VPartitionPath(dp), LOCKFILE);
841 dp->lock_fd = (int)CreateFile(path, GENERIC_WRITE,
842 FILE_SHARE_READ|FILE_SHARE_WRITE, NULL,
843 CREATE_ALWAYS, FILE_ATTRIBUTE_HIDDEN, NULL);
844 assert (dp->lock_fd != (int)INVALID_HANDLE_VALUE);
846 memset((char*)&lap, 0, sizeof(lap));
847 rc = LockFileEx((HANDLE)dp->lock_fd, LOCKFILE_EXCLUSIVE_LOCK,
853 void VUnlockPartition_r(char *name)
855 register struct DiskPartition *dp = VGetPartition_r(name, 0);
858 if (!dp) return; /* no partition, will fail later */
859 memset((char*)&lap, 0, sizeof(lap));
861 UnlockFileEx((HANDLE)dp->lock_fd, 0, 1, 0, &lap);
862 CloseHandle((HANDLE)dp->lock_fd);
865 #else /* AFS_NT40_ENV */
867 #if defined(AFS_HPUX_ENV)
868 #define BITS_PER_CHAR (8)
869 #define BITS(type) (sizeof(type) * BITS_PER_CHAR)
871 #define LOCKRDONLY_OFFSET ((PRIV_LOCKRDONLY - 1) / BITS(int))
872 #endif /* defined(AFS_HPUX_ENV) */
874 void VLockPartition_r(char *name)
876 register struct DiskPartition *dp = VGetPartition_r(name, 0);
879 struct timeval pausing;
880 #if defined(AFS_HPUX_ENV)
882 struct privgrp_map privGrpList[PRIV_MAXGRPS];
883 unsigned int *globalMask;
885 #endif /* defined(AFS_HPUX_ENV) */
887 if (!dp) return; /* no partition, will fail later */
888 if (dp->lock_fd != -1) return;
890 #if defined(AFS_SUN5_ENV) || defined(AFS_AIX41_ENV)
891 partitionName = dp->devName;
894 partitionName = dp->name;
898 for (retries=25; retries; retries--) {
899 dp->lock_fd = open(partitionName, code);
900 if (dp->lock_fd != -1) break;
902 pausing.tv_usec = 500000;
903 select(0, NULL, NULL, NULL, &pausing);
905 assert(retries != 0);
907 #if defined (AFS_HPUX_ENV)
909 assert(getprivgrp(privGrpList) == 0);
912 * In general, it will difficult and time-consuming ,if not impossible,
913 * to try to find the privgroup to which this process belongs that has the
914 * smallest membership, to minimise the security hole. So, we use the privgrp
915 * to which everybody belongs.
917 /* first, we have to find the global mask */
918 for (globalMaskIndex = 0; globalMaskIndex < PRIV_MAXGRPS;
920 if (privGrpList[globalMaskIndex].priv_groupno == PRIV_GLOBAL) {
921 globalMask = &(privGrpList[globalMaskIndex].
922 priv_mask[LOCKRDONLY_OFFSET]);
927 if (((*globalMask) & privmask(PRIV_LOCKRDONLY)) == 0) {
928 /* allow everybody to set a lock on a read-only file descriptor */
929 (*globalMask) |= privmask(PRIV_LOCKRDONLY);
930 assert(setprivgrp(PRIV_GLOBAL,
931 privGrpList[globalMaskIndex].priv_mask) == 0);
933 lockfRtn = lockf(dp->lock_fd, F_LOCK, 0);
935 /* remove the privilege granted to everybody to lock a read-only fd */
936 (*globalMask) &= ~(privmask(PRIV_LOCKRDONLY));
937 assert(setprivgrp(PRIV_GLOBAL,
938 privGrpList[globalMaskIndex].priv_mask) == 0);
941 /* in this case, we should be able to do this with impunity, anyway */
942 lockfRtn = lockf(dp->lock_fd, F_LOCK, 0);
945 assert (lockfRtn != -1);
947 #if defined(AFS_AIX_ENV) || defined(AFS_SUN5_ENV)
948 assert (lockf(dp->lock_fd, F_LOCK, 0) != -1);
950 assert (flock(dp->lock_fd, LOCK_EX) == 0);
951 #endif /* defined(AFS_AIX_ENV) */
955 void VUnlockPartition_r(char *name)
957 register struct DiskPartition *dp = VGetPartition_r(name, 0);
958 if (!dp) return; /* no partition, will fail later */
963 #endif /* AFS_NT40_ENV */
965 void VLockPartition(char *name)
968 VLockPartition_r(name);
972 void VUnlockPartition(char *name)
975 VUnlockPartition_r(name);