2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
15 Institution: The Information Technology Center, Carnegie-Mellon University
19 #include <afs/param.h>
26 #include <sys/param.h>
29 #include <sys/statvfs.h>
30 #endif /* AFS_HAVE_STATVFS */
32 #if !defined(AFS_SGI_ENV)
34 #include <sys/mount.h>
36 #else /* AFS_OSF_ENV */
37 #ifdef AFS_VFSINCL_ENV
40 #include <sys/fs/ufs_fs.h>
44 #else /* AFS_VFSINCL_ENV */
45 #if !defined(AFS_AIX_ENV) && !defined(AFS_LINUX22_ENV)
48 #endif /* AFS_VFSINCL_ENV */
49 #endif /* AFS_OSF_ENV */
50 #include <sys/errno.h>
56 #include <sys/lockf.h>
62 #include <checklist.h>
64 #if defined(AFS_SUN_ENV)
69 #include <sys/mnttab.h>
70 #include <sys/mntent.h>
72 #ifdef AFS_LINUX22_ENV
74 #include <sys/statfs.h>
81 #endif /* AFS_SGI_ENV */
82 #endif /* AFS_NT40_ENV */
83 #if defined(AFS_SGI_ENV)
84 #include <sys/errno.h>
92 #include <afs/afsint.h>
94 #include <afs/errors.h>
97 #include <afs/afssyscalls.h>
103 #include "namei_ops.h"
105 #endif /* AFS_NAMEI_ENV */
108 #include "partition.h"
109 #ifdef AFS_PTHREAD_ENV
111 #else /* AFS_PTHREAD_ENV */
112 #include <afs/assert.h>
113 #endif /* AFS_PTHREAD_ENV */
115 #if defined(AFS_HPUX_ENV)
116 #include <sys/types.h>
117 #include <sys/privgrp.h>
118 #endif /* defined(AFS_HPUX_ENV) */
121 #include <jfs/filsys.h>
124 int aixlow_water = 8; /* default 8% */
125 struct DiskPartition *DiskPartitionList;
127 #ifdef AFS_SGI_XFS_IOPS_ENV
128 /* Verify that the on disk XFS inodes on the partition are large enough to
129 * hold the AFS attribute. Returns -1 if the attribute can't be set or is
130 * too small to fit in the inode. Returns 0 if the attribute does fit in
133 #include <afs/xfsattrs.h>
134 static int VerifyXFSInodeSize(char *part, char *fstype)
137 int length = SIZEOF_XFS_ATTR_T;
142 if (strcmp("xfs", fstype))
145 if (attr_set(part, AFS_XFS_ATTR, &junk, length, ATTR_ROOT) == 0) {
146 if (((fd=open(part, O_RDONLY, 0)) != -1)
147 && (fcntl(fd, F_FSGETXATTRA, &fsx) == 0)) {
149 if (fsx.fsx_nextents) {
150 Log("Partition %s: XFS inodes too small, exiting.\n", part);
151 Log("Run xfs_size_check utility and remake partitions.\n");
159 (void) attr_remove(part, AFS_XFS_ATTR, ATTR_ROOT);
166 static void VInitPartition_r(char *path, char *devname, Device dev)
168 struct DiskPartition *dp, *op;
169 dp = (struct DiskPartition *) malloc(sizeof (struct DiskPartition));
170 /* Add it to the end, to preserve order when we print statistics */
171 for (op = DiskPartitionList; op; op = op->next) {
178 DiskPartitionList = dp;
180 strcpy(dp->name, path);
181 #if defined(AFS_NAMEI_ENV) && !defined(AFS_NT40_ENV)
183 strcpy(dp->devName, devname);
184 #else /* AFS_SUN5_ENV */
185 strcpy(dp->devName, path);
187 dp->device = volutil_GetPartitionID(path);
189 strcpy(dp->devName, devname);
194 dp->f_files = 1; /* just a default value */
195 #if defined(AFS_NAMEI_ENV) && !defined(AFS_NT40_ENV)
196 if (programType == fileServer)
197 (void) namei_ViceREADME(VPartitionPath(dp));
199 VSetPartitionDiskUsage_r(dp);
202 static void VInitPartition(char *path, char *devname, Device dev)
205 VInitPartition_r(path, devname, dev);
210 /* VAttachPartitions() finds the vice partitions on this server. Calls
211 * VCheckPartition() to do some basic checks on the partition. If the partition
212 * is a valid vice partition, VCheckPartition will add it to the DiskPartition
214 * Returns the number of errors returned by VCheckPartition. An error in
215 * VCheckPartition means that partition is a valid vice partition but the
216 * fileserver should not start because of the error found on that partition.
219 * No specific user space file system checks, since we don't know what
220 * is being used for vice partitions.
222 * Use partition name as devname.
224 int VCheckPartition(part, devname)
230 /* Only keep track of "/vicepx" partitions since it can get hairy
231 * when NFS mounts are involved.. */
232 if (strncmp(part, VICE_PARTITION_PREFIX, VICE_PREFIX_SIZE)) {
235 if (stat(part, &status) < 0) {
236 Log("VInitVnodes: Couldn't find file system %s; ignored\n", part);
240 #ifndef AFS_AIX32_ENV
241 if (programType == fileServer) {
242 char salvpath[MAXPATHLEN];
243 strcpy(salvpath, part);
244 strcat(salvpath, "/FORCESALVAGE");
245 if (stat(salvpath, &status) == 0) {
246 Log("VInitVnodes: Found %s; aborting\n", salvpath);
252 #ifdef AFS_SGI_XFS_IOPS_ENV
253 if (VerifyXFSInodeSize(part, status.st_fstype) < 0)
258 if (status.st_ino != ROOTINO) {
259 Log("%s is not a mounted file system; ignored.\n", part);
264 VInitPartition(part, devname, status.st_dev);
268 #endif /* AFS_NT40_ENV */
271 int VAttachPartitions(void)
277 if (!(mntfile = fopen(MNTTAB, "r"))) {
278 Log("Can't open %s\n", MNTTAB);
282 while (!getmntent(mntfile, &mnt)) {
283 /* Ignore non ufs or non read/write partitions */
284 if ((strcmp(mnt.mnt_fstype, "ufs") !=0) ||
285 (strncmp(mnt.mnt_mntopts, "ro,ignore",9) ==0))
288 if (VCheckPartition(mnt.mnt_mountp, mnt.mnt_special) < 0 )
292 (void) fclose(mntfile);
297 #endif /* AFS_SUN5_ENV */
298 #if defined(AFS_SGI_ENV) || (defined(AFS_SUN_ENV) && !defined(AFS_SUN5_ENV)) || defined(AFS_HPUX_ENV)
299 int VAttachPartitions(void)
303 struct mntent *mntent;
305 if ((mfd = setmntent(MOUNTED, "r")) == NULL) {
306 Log("Problems in getting mount entries(setmntent)\n");
309 while (mntent = getmntent(mfd)) {
310 if (!hasmntopt(mntent, MNTOPT_RW)) continue;
312 if (VCheckPartition(mntent->mnt_dir, mntent->mnt_fsname) < 0 )
323 * (This function was grabbed from df.c)
327 register struct vmount **vmountpp; /* place to tell where buffer is */
330 register struct vmount *vm;
333 /* set initial size of mntctl buffer to a MAGIC NUMBER */
336 /* try the operation until ok or a fatal error */
338 if ((vm = (struct vmount *)malloc(size)) == NULL) {
339 /* failed getting memory for mount status buf */
340 perror("FATAL ERROR: get_stat malloc failed\n");
345 * perform the QUERY mntctl - if it returns > 0, that is the
346 * number of vmount structures in the buffer. If it returns
347 * -1, an error occured. If it returned 0, then look in
348 * first word of buffer for needed size.
350 if ((nmounts = mntctl(MCTL_QUERY, size, (caddr_t)vm)) > 0) {
351 /* OK, got it, now return */
355 } else if (nmounts == 0) {
356 /* the buffer wasn't big enough .... */
357 /* .... get required buffer size */
362 /* some other kind of error occurred */
369 int VAttachPartitions(void)
373 struct vmount *vmountp;
375 if ((nmounts = getmount(&vmountp)) <= 0) {
376 Log("Problems in getting # of mount entries(getmount)\n");
379 for (; nmounts; nmounts--,
380 vmountp = (struct vmount *)((int)vmountp + vmountp->vmt_length)) {
381 char *part = vmt2dataptr(vmountp, VMT_STUB);
383 if (vmountp->vmt_flags & (MNT_READONLY|MNT_REMOVABLE|MNT_REMOTE))
384 continue; /* Ignore any "special" partitions */
388 struct superblock fs;
389 /* The Log statements are non-sequiters in the SalvageLog and don't
390 * even appear in the VolserLog, so restrict them to the FileLog.
392 if (ReadSuper(&fs, vmt2dataptr(vmountp, VMT_OBJECT))<0) {
393 if (programType == fileServer)
394 Log("Can't read superblock for %s, ignoring it.\n", part);
397 if (IsBigFilesFileSystem(&fs)) {
398 if (programType == fileServer)
399 Log("%s is a big files filesystem, ignoring it.\n", part);
405 if (VCheckPartition(part, vmt2dataptr(vmountp, VMT_OBJECT)) < 0 )
413 int VAttachPartitions(void)
418 if (setfsent() < 0) {
419 Log("Error listing filesystems.\n");
423 while (fsent = getfsent()) {
424 if (strcmp(fsent->fs_type, "rw") != 0) continue;
426 if (VCheckPartition(fsent->fs_file, fsent->fs_spec) < 0 )
437 #include <sys/stat.h>
440 * validate names in vptab.
447 int VValidVPTEntry(struct vptab *vpe)
449 int len = strlen(vpe->vp_name);
452 if (len < VICE_PREFIX_SIZE+1 || len > VICE_PREFIX_SIZE + 2)
454 if (strncmp(vpe->vp_name, VICE_PARTITION_PREFIX, VICE_PREFIX_SIZE))
457 for (i=VICE_PREFIX_SIZE; i<len; i++) {
458 if (vpe->vp_name[i] < 'a' || vpe->vp_name[i] > 'z') {
459 Log("Invalid partition name %s in registry, ignoring it.\n",
464 if (len == VICE_PREFIX_SIZE + 2) {
465 i = (int)(vpe->vp_name[VICE_PREFIX_SIZE]-'a') * 26 +
466 (int)(vpe->vp_name[VICE_PREFIX_SIZE+1]-'a') ;
468 Log("Invalid partition name %s in registry, ignoring it.\n",
474 len = strlen(vpe->vp_dev);
475 if (len != 2 || vpe->vp_dev[1] != ':' || vpe->vp_dev[0] < 'A' ||
476 vpe->vp_dev[0] > 'Z') {
477 Log("Invalid device name %s in registry, ignoring it.\n",
485 int VCheckPartition(char *partName)
492 /* partName is presumed to be of the form "X:" */
493 (void) sprintf(volRoot, "%c:\\", *partName);
495 if (!GetVolumeInformation(volRoot, /* volume root directory */
496 NULL, /* volume name buffer */
497 0, /* volume name size */
498 NULL, /* volume serial number */
499 &dwDummy, /* max component length */
500 &dwDummy, /* file system flags */
501 volFsType, /* file system name */
502 sizeof(volFsType))) {
503 err = GetLastError();
504 Log("VCheckPartition: Failed to get partition information for %s, ignoring it.\n",
509 if (strcmp(volFsType, "NTFS")) {
510 Log("VCheckPartition: Partition %s is not an NTFS partition, ignoring it.\n", partName);
518 int VAttachPartitions(void)
520 struct DiskPartition *partP, *prevP, *nextP;
521 struct vpt_iter iter;
524 if (vpt_Start(&iter)<0) {
525 Log("No partitions to attach.\n");
529 while (0==vpt_NextEntry(&iter, &entry)) {
530 if (!VValidVPTEntry(&entry)) {
534 /* This test for duplicates relies on the fact that the method
535 * of storing the partition names in the NT registry means the same
536 * partition name will never appear twice in the list.
538 for (partP = DiskPartitionList; partP; partP = partP->next) {
539 if (*partP->devName == *entry.vp_dev) {
540 Log("Same drive (%s) used for both partition %s and partition %s, ignoring both.\n", entry.vp_dev, partP->name, entry.vp_name);
541 partP->flags = PART_DUPLICATE;
542 break; /* Only one entry will ever be in this list. */
545 if (partP) continue; /* found a duplicate */
547 if (VCheckPartition(entry.vp_dev)<0)
549 /* This test allows for manually inserting the FORCESALVAGE flag
550 * and thereby invoking the salvager. scandisk obviously won't be
553 if (programType == fileServer) {
555 char salvpath[MAXPATHLEN];
556 strcpy(salvpath, entry.vp_dev);
557 strcat(salvpath, "\\FORCESALVAGE");
558 if (stat(salvpath, &status) == 0) {
559 Log("VAttachPartitions: Found %s; aborting\n", salvpath);
563 VInitPartition(entry.vp_name, entry.vp_dev, *entry.vp_dev - 'A');
567 /* Run through partition list and clear out the dupes. */
568 prevP = nextP = NULL;
569 for (partP = DiskPartitionList; partP; partP = nextP) {
571 if (partP->flags == PART_DUPLICATE) {
573 prevP->next = partP->next;
575 DiskPartitionList = partP->next;
586 #ifdef AFS_LINUX22_ENV
587 int VAttachPartitions(void)
591 struct mntent *mntent;
593 if ((mfd = setmntent("/proc/mounts", "r")) == NULL) {
594 if ((mfd = setmntent("/etc/mtab", "r")) == NULL) {
595 Log("Problems in getting mount entries(setmntent)\n");
599 while (mntent = getmntent(mfd)) {
600 if (VCheckPartition(mntent->mnt_dir, mntent->mnt_fsname) < 0 )
607 #endif /* AFS_LINUX22_ENV */
609 /* This routine is to be called whenever the actual name of the partition
610 * is required. The canonical name is still in part->name.
612 char * VPartitionPath(struct DiskPartition *part)
615 return part->devName;
621 /* get partition structure, abortp tells us if we should abort on failure */
622 struct DiskPartition *VGetPartition_r(char *name, int abortp)
624 register struct DiskPartition *dp;
625 for (dp = DiskPartitionList; dp; dp = dp->next) {
626 if (strcmp(dp->name, name) == 0)
634 struct DiskPartition *VGetPartition(char *name, int abortp)
636 struct DiskPartition *retVal;
638 retVal = VGetPartition_r(name, abortp);
644 void VSetPartitionDiskUsage_r(register struct DiskPartition *dp)
646 ULARGE_INTEGER free_user, total, free_total;
647 int ufree, tot, tfree;
649 if (!GetDiskFreeSpaceEx(VPartitionPath(dp), &free_user, &total,
651 printf("Failed to get disk space info for %s, error = %d\n",
652 dp->name, GetLastError());
656 /* Convert to 1K units. */
657 ufree = (int) Int64ShraMod32(free_user.QuadPart, 10);
658 tot = (int) Int64ShraMod32(total.QuadPart, 10);
659 tfree = (int) Int64ShraMod32(free_total.QuadPart, 10);
661 dp->minFree = tfree - ufree; /* only used in VPrintDiskStats_r */
662 dp->totalUsable = tot;
667 void VSetPartitionDiskUsage_r(register struct DiskPartition *dp)
670 int fd, totalblks, free, used, availblks, bsize, code;
673 struct statvfs statbuf;
675 struct statfs statbuf;
678 if (dp->flags & PART_DONTUPDATE)
680 /* Note: we don't bother syncing because it's only an estimate, update
681 is syncing every 30 seconds anyway, we only have to keep the disk
682 approximately 10% from full--you just can't get the stuff in from
683 the net fast enough to worry */
685 code = statvfs(dp->name, &statbuf);
687 code = statfs(dp->name, &statbuf);
690 Log("statfs of %s failed in VSetPartitionDiskUsage (errno = %d)\n", dp->name, errno);
693 if (statbuf.f_blocks == -1) { /* Undefined; skip stats.. */
694 Log("statfs of %s failed in VSetPartitionDiskUsage\n", dp->name);
697 totalblks = statbuf.f_blocks;
698 free = statbuf.f_bfree;
699 reserved = free - statbuf.f_bavail;
701 bsize = statbuf.f_frsize;
703 bsize = statbuf.f_bsize;
705 availblks = totalblks - reserved;
706 dp->f_files = statbuf.f_files; /* max # of files in partition */
708 /* Now free and totalblks are in fragment units, but we want them in
712 free *= (bsize/1024);
713 totalblks *= (bsize / 1024);
714 availblks *= (bsize / 1024 );
715 reserved *= (bsize / 1024 );
718 free /= (1024/bsize);
719 totalblks /= (1024/bsize);
720 availblks /= (1024/bsize);
721 reserved /= (1024/bsize);
723 /* now compute remaining figures */
724 used = totalblks - free;
726 dp->minFree = reserved; /* only used in VPrintDiskStats_r */
727 dp->totalUsable = availblks;
728 dp->free = availblks - used; /* this is exactly f_bavail */
730 #endif /* AFS_NT40_ENV */
732 void VSetPartitionDiskUsage(register struct DiskPartition *dp)
735 VSetPartitionDiskUsage_r(dp);
739 void VResetDiskUsage_r(void)
741 struct DiskPartition *dp;
742 for (dp = DiskPartitionList; dp; dp = dp->next) {
743 VSetPartitionDiskUsage_r(dp);
744 #ifndef AFS_PTHREAD_ENV
746 #endif /* !AFS_PTHREAD_ENV */
750 void VResetDiskUsage(void)
757 void VAdjustDiskUsage_r(Error *ec, Volume *vp, afs_int32 blocks, afs_int32 checkBlocks)
759 afs_int32 rem, minavail;
761 /* why blocks instead of checkBlocks in the check below? Otherwise, any check
762 for less than BlocksSpare would skip the error-checking path, and we
763 could grow existing files forever, not just for another BlocksSpare
767 if ((rem = vp->partition->free - checkBlocks) <
768 (minavail = (vp->partition->totalUsable * aixlow_water) / 100))
770 if (vp->partition->free - checkBlocks < 0)
773 else if (V_maxquota(vp) && V_diskused(vp) + checkBlocks > V_maxquota(vp))
776 vp->partition->free -= blocks;
777 V_diskused(vp) += blocks;
780 void VAdjustDiskUsage(Error *ec, Volume *vp, afs_int32 blocks, afs_int32 checkBlocks)
783 VAdjustDiskUsage_r(ec, vp, blocks, checkBlocks);
787 int VDiskUsage_r(Volume *vp, afs_int32 blocks)
789 afs_int32 rem, minavail;
792 if ((rem = vp->partition->free - blocks) <
793 (minavail = (vp->partition->totalUsable * aixlow_water) / 100))
795 if (vp->partition->free - blocks < 0)
799 vp->partition->free -= blocks;
803 int VDiskUsage(Volume *vp, afs_int32 blocks)
807 retVal = VDiskUsage_r(vp, blocks);
812 void VPrintDiskStats_r(void)
814 struct DiskPartition *dp;
815 for (dp = DiskPartitionList; dp; dp = dp->next) {
816 Log("Partition %s: %d available 1K blocks (minfree=%d), ",
817 dp->name, dp->totalUsable, dp->minFree);
819 Log("overallocated by %d blocks\n", -dp->free);
821 Log("%d free blocks\n", dp->free);
825 void VPrintDiskStats(void)
833 /* Need a separate lock file on NT, since NT only has mandatory file locks. */
834 #define LOCKFILE "LOCKFILE"
835 void VLockPartition_r(char *name)
837 struct DiskPartition *dp = VGetPartition_r(name, 0);
841 if (dp->lock_fd == -1) {
844 (void) sprintf(path, "%s\\%s", VPartitionPath(dp), LOCKFILE);
845 dp->lock_fd = (int)CreateFile(path, GENERIC_WRITE,
846 FILE_SHARE_READ|FILE_SHARE_WRITE, NULL,
847 CREATE_ALWAYS, FILE_ATTRIBUTE_HIDDEN, NULL);
848 assert (dp->lock_fd != (int)INVALID_HANDLE_VALUE);
850 memset((char*)&lap, 0, sizeof(lap));
851 rc = LockFileEx((HANDLE)dp->lock_fd, LOCKFILE_EXCLUSIVE_LOCK,
857 void VUnlockPartition_r(char *name)
859 register struct DiskPartition *dp = VGetPartition_r(name, 0);
862 if (!dp) return; /* no partition, will fail later */
863 memset((char*)&lap, 0, sizeof(lap));
865 UnlockFileEx((HANDLE)dp->lock_fd, 0, 1, 0, &lap);
866 CloseHandle((HANDLE)dp->lock_fd);
869 #else /* AFS_NT40_ENV */
871 #if defined(AFS_HPUX_ENV)
872 #define BITS_PER_CHAR (8)
873 #define BITS(type) (sizeof(type) * BITS_PER_CHAR)
875 #define LOCKRDONLY_OFFSET ((PRIV_LOCKRDONLY - 1) / BITS(int))
876 #endif /* defined(AFS_HPUX_ENV) */
878 void VLockPartition_r(char *name)
880 register struct DiskPartition *dp = VGetPartition_r(name, 0);
883 struct timeval pausing;
884 #if defined(AFS_HPUX_ENV)
886 struct privgrp_map privGrpList[PRIV_MAXGRPS];
887 unsigned int *globalMask;
889 #endif /* defined(AFS_HPUX_ENV) */
891 if (!dp) return; /* no partition, will fail later */
892 if (dp->lock_fd != -1) return;
894 #if defined(AFS_SUN5_ENV) || defined(AFS_AIX41_ENV)
895 partitionName = dp->devName;
898 partitionName = dp->name;
902 for (retries=25; retries; retries--) {
903 dp->lock_fd = open(partitionName, code);
904 if (dp->lock_fd != -1) break;
906 pausing.tv_usec = 500000;
907 select(0, NULL, NULL, NULL, &pausing);
909 assert(retries != 0);
911 #if defined (AFS_HPUX_ENV)
913 assert(getprivgrp(privGrpList) == 0);
916 * In general, it will difficult and time-consuming ,if not impossible,
917 * to try to find the privgroup to which this process belongs that has the
918 * smallest membership, to minimise the security hole. So, we use the privgrp
919 * to which everybody belongs.
921 /* first, we have to find the global mask */
922 for (globalMaskIndex = 0; globalMaskIndex < PRIV_MAXGRPS;
924 if (privGrpList[globalMaskIndex].priv_groupno == PRIV_GLOBAL) {
925 globalMask = &(privGrpList[globalMaskIndex].
926 priv_mask[LOCKRDONLY_OFFSET]);
931 if (((*globalMask) & privmask(PRIV_LOCKRDONLY)) == 0) {
932 /* allow everybody to set a lock on a read-only file descriptor */
933 (*globalMask) |= privmask(PRIV_LOCKRDONLY);
934 assert(setprivgrp(PRIV_GLOBAL,
935 privGrpList[globalMaskIndex].priv_mask) == 0);
937 lockfRtn = lockf(dp->lock_fd, F_LOCK, 0);
939 /* remove the privilege granted to everybody to lock a read-only fd */
940 (*globalMask) &= ~(privmask(PRIV_LOCKRDONLY));
941 assert(setprivgrp(PRIV_GLOBAL,
942 privGrpList[globalMaskIndex].priv_mask) == 0);
945 /* in this case, we should be able to do this with impunity, anyway */
946 lockfRtn = lockf(dp->lock_fd, F_LOCK, 0);
949 assert (lockfRtn != -1);
951 #if defined(AFS_AIX_ENV) || defined(AFS_SUN5_ENV)
952 assert (lockf(dp->lock_fd, F_LOCK, 0) != -1);
954 assert (flock(dp->lock_fd, LOCK_EX) == 0);
955 #endif /* defined(AFS_AIX_ENV) */
959 void VUnlockPartition_r(char *name)
961 register struct DiskPartition *dp = VGetPartition_r(name, 0);
962 if (!dp) return; /* no partition, will fail later */
967 #endif /* AFS_NT40_ENV */
969 void VLockPartition(char *name)
972 VLockPartition_r(name);
976 void VUnlockPartition(char *name)
979 VUnlockPartition_r(name);