2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
15 Institution: The Information Technology Center, Carnegie-Mellon University
19 #include <afs/param.h>
26 #include <sys/param.h>
29 #include <sys/statvfs.h>
30 #endif /* AFS_HAVE_STATVFS */
32 #include <sys/mount.h>
35 #if !defined(AFS_SGI_ENV)
37 #include <sys/mount.h>
39 #else /* AFS_OSF_ENV */
40 #ifdef AFS_VFSINCL_ENV
43 #include <sys/fs/ufs_fs.h>
46 #include <ufs/ufs/dinode.h>
47 #include <ufs/ffs/fs.h>
52 #else /* AFS_VFSINCL_ENV */
53 #if !defined(AFS_AIX_ENV) && !defined(AFS_LINUX22_ENV) && !defined(AFS_DARWIN_ENV)
56 #endif /* AFS_VFSINCL_ENV */
57 #endif /* AFS_OSF_ENV */
58 #include <sys/errno.h>
64 #include <sys/lockf.h>
70 #include <checklist.h>
72 #if defined(AFS_SUN_ENV)
77 #include <sys/mnttab.h>
78 #include <sys/mntent.h>
80 #ifdef AFS_LINUX22_ENV
82 #include <sys/statfs.h>
89 #endif /* AFS_SGI_ENV */
90 #endif /* AFS_NT40_ENV */
91 #if defined(AFS_SGI_ENV)
92 #include <sys/errno.h>
100 #include <afs/afsint.h>
102 #include <afs/errors.h>
105 #include <afs/afssyscalls.h>
111 #include "namei_ops.h"
113 #endif /* AFS_NAMEI_ENV */
116 #include "partition.h"
117 #ifdef AFS_PTHREAD_ENV
119 #else /* AFS_PTHREAD_ENV */
120 #include <afs/assert.h>
121 #endif /* AFS_PTHREAD_ENV */
123 #if defined(AFS_HPUX_ENV)
124 #include <sys/types.h>
125 #include <sys/privgrp.h>
126 #endif /* defined(AFS_HPUX_ENV) */
129 #include <jfs/filsys.h>
132 int aixlow_water = 8; /* default 8% */
133 struct DiskPartition *DiskPartitionList;
135 #ifdef AFS_SGI_XFS_IOPS_ENV
136 /* Verify that the on disk XFS inodes on the partition are large enough to
137 * hold the AFS attribute. Returns -1 if the attribute can't be set or is
138 * too small to fit in the inode. Returns 0 if the attribute does fit in
141 #include <afs/xfsattrs.h>
142 static int VerifyXFSInodeSize(char *part, char *fstype)
145 int length = SIZEOF_XFS_ATTR_T;
150 if (strcmp("xfs", fstype))
153 if (attr_set(part, AFS_XFS_ATTR, &junk, length, ATTR_ROOT) == 0) {
154 if (((fd=open(part, O_RDONLY, 0)) != -1)
155 && (fcntl(fd, F_FSGETXATTRA, &fsx) == 0)) {
157 if (fsx.fsx_nextents) {
158 Log("Partition %s: XFS inodes too small, exiting.\n", part);
159 Log("Run xfs_size_check utility and remake partitions.\n");
167 (void) attr_remove(part, AFS_XFS_ATTR, ATTR_ROOT);
174 static void VInitPartition_r(char *path, char *devname, Device dev)
176 struct DiskPartition *dp, *op;
177 dp = (struct DiskPartition *) malloc(sizeof (struct DiskPartition));
178 /* Add it to the end, to preserve order when we print statistics */
179 for (op = DiskPartitionList; op; op = op->next) {
186 DiskPartitionList = dp;
188 strcpy(dp->name, path);
189 #if defined(AFS_NAMEI_ENV) && !defined(AFS_NT40_ENV)
191 strcpy(dp->devName, devname);
192 #else /* AFS_SUN5_ENV */
193 strcpy(dp->devName, path);
195 dp->device = volutil_GetPartitionID(path);
197 strcpy(dp->devName, devname);
202 dp->f_files = 1; /* just a default value */
203 #if defined(AFS_NAMEI_ENV) && !defined(AFS_NT40_ENV)
204 if (programType == fileServer)
205 (void) namei_ViceREADME(VPartitionPath(dp));
207 VSetPartitionDiskUsage_r(dp);
210 static void VInitPartition(char *path, char *devname, Device dev)
213 VInitPartition_r(path, devname, dev);
218 /* VAttachPartitions() finds the vice partitions on this server. Calls
219 * VCheckPartition() to do some basic checks on the partition. If the partition
220 * is a valid vice partition, VCheckPartition will add it to the DiskPartition
222 * Returns the number of errors returned by VCheckPartition. An error in
223 * VCheckPartition means that partition is a valid vice partition but the
224 * fileserver should not start because of the error found on that partition.
227 * No specific user space file system checks, since we don't know what
228 * is being used for vice partitions.
230 * Use partition name as devname.
232 int VCheckPartition(part, devname)
238 /* Only keep track of "/vicepx" partitions since it can get hairy
239 * when NFS mounts are involved.. */
240 if (strncmp(part, VICE_PARTITION_PREFIX, VICE_PREFIX_SIZE)) {
243 if (stat(part, &status) < 0) {
244 Log("VInitVnodes: Couldn't find file system %s; ignored\n", part);
248 #ifndef AFS_AIX32_ENV
249 if (programType == fileServer) {
250 char salvpath[MAXPATHLEN];
251 strcpy(salvpath, part);
252 strcat(salvpath, "/FORCESALVAGE");
253 if (stat(salvpath, &status) == 0) {
254 Log("VInitVnodes: Found %s; aborting\n", salvpath);
260 #ifdef AFS_SGI_XFS_IOPS_ENV
261 if (VerifyXFSInodeSize(part, status.st_fstype) < 0)
266 if (status.st_ino != ROOTINO) {
267 Log("%s is not a mounted file system; ignored.\n", part);
272 VInitPartition(part, devname, status.st_dev);
276 #endif /* AFS_NT40_ENV */
279 int VAttachPartitions(void)
285 if (!(mntfile = fopen(MNTTAB, "r"))) {
286 Log("Can't open %s\n", MNTTAB);
290 while (!getmntent(mntfile, &mnt)) {
291 /* Ignore non ufs or non read/write partitions */
292 if ((strcmp(mnt.mnt_fstype, "ufs") !=0) ||
293 (strncmp(mnt.mnt_mntopts, "ro,ignore",9) ==0))
296 if (VCheckPartition(mnt.mnt_mountp, mnt.mnt_special) < 0 )
300 (void) fclose(mntfile);
305 #endif /* AFS_SUN5_ENV */
306 #if defined(AFS_SGI_ENV) || (defined(AFS_SUN_ENV) && !defined(AFS_SUN5_ENV)) || defined(AFS_HPUX_ENV)
307 int VAttachPartitions(void)
311 struct mntent *mntent;
313 if ((mfd = setmntent(MOUNTED, "r")) == NULL) {
314 Log("Problems in getting mount entries(setmntent)\n");
317 while (mntent = getmntent(mfd)) {
318 if (!hasmntopt(mntent, MNTOPT_RW)) continue;
320 if (VCheckPartition(mntent->mnt_dir, mntent->mnt_fsname) < 0 )
331 * (This function was grabbed from df.c)
335 register struct vmount **vmountpp; /* place to tell where buffer is */
338 register struct vmount *vm;
341 /* set initial size of mntctl buffer to a MAGIC NUMBER */
344 /* try the operation until ok or a fatal error */
346 if ((vm = (struct vmount *)malloc(size)) == NULL) {
347 /* failed getting memory for mount status buf */
348 perror("FATAL ERROR: get_stat malloc failed\n");
353 * perform the QUERY mntctl - if it returns > 0, that is the
354 * number of vmount structures in the buffer. If it returns
355 * -1, an error occured. If it returned 0, then look in
356 * first word of buffer for needed size.
358 if ((nmounts = mntctl(MCTL_QUERY, size, (caddr_t)vm)) > 0) {
359 /* OK, got it, now return */
363 } else if (nmounts == 0) {
364 /* the buffer wasn't big enough .... */
365 /* .... get required buffer size */
370 /* some other kind of error occurred */
377 int VAttachPartitions(void)
381 struct vmount *vmountp;
383 if ((nmounts = getmount(&vmountp)) <= 0) {
384 Log("Problems in getting # of mount entries(getmount)\n");
387 for (; nmounts; nmounts--,
388 vmountp = (struct vmount *)((int)vmountp + vmountp->vmt_length)) {
389 char *part = vmt2dataptr(vmountp, VMT_STUB);
391 if (vmountp->vmt_flags & (MNT_READONLY|MNT_REMOVABLE|MNT_REMOTE))
392 continue; /* Ignore any "special" partitions */
396 struct superblock fs;
397 /* The Log statements are non-sequiters in the SalvageLog and don't
398 * even appear in the VolserLog, so restrict them to the FileLog.
400 if (ReadSuper(&fs, vmt2dataptr(vmountp, VMT_OBJECT))<0) {
401 if (programType == fileServer)
402 Log("Can't read superblock for %s, ignoring it.\n", part);
405 if (IsBigFilesFileSystem(&fs)) {
406 if (programType == fileServer)
407 Log("%s is a big files filesystem, ignoring it.\n", part);
413 if (VCheckPartition(part, vmt2dataptr(vmountp, VMT_OBJECT)) < 0 )
420 #if defined(AFS_DUX40_ENV) || defined(AFS_DARWIN_ENV)
421 int VAttachPartitions(void)
426 if (setfsent() < 0) {
427 Log("Error listing filesystems.\n");
431 while (fsent = getfsent()) {
432 if (strcmp(fsent->fs_type, "rw") != 0) continue;
434 if (VCheckPartition(fsent->fs_file, fsent->fs_spec) < 0 )
445 #include <sys/stat.h>
448 * validate names in vptab.
455 int VValidVPTEntry(struct vptab *vpe)
457 int len = strlen(vpe->vp_name);
460 if (len < VICE_PREFIX_SIZE+1 || len > VICE_PREFIX_SIZE + 2)
462 if (strncmp(vpe->vp_name, VICE_PARTITION_PREFIX, VICE_PREFIX_SIZE))
465 for (i=VICE_PREFIX_SIZE; i<len; i++) {
466 if (vpe->vp_name[i] < 'a' || vpe->vp_name[i] > 'z') {
467 Log("Invalid partition name %s in registry, ignoring it.\n",
472 if (len == VICE_PREFIX_SIZE + 2) {
473 i = (int)(vpe->vp_name[VICE_PREFIX_SIZE]-'a') * 26 +
474 (int)(vpe->vp_name[VICE_PREFIX_SIZE+1]-'a') ;
476 Log("Invalid partition name %s in registry, ignoring it.\n",
482 len = strlen(vpe->vp_dev);
483 if (len != 2 || vpe->vp_dev[1] != ':' || vpe->vp_dev[0] < 'A' ||
484 vpe->vp_dev[0] > 'Z') {
485 Log("Invalid device name %s in registry, ignoring it.\n",
493 int VCheckPartition(char *partName)
500 /* partName is presumed to be of the form "X:" */
501 (void) sprintf(volRoot, "%c:\\", *partName);
503 if (!GetVolumeInformation(volRoot, /* volume root directory */
504 NULL, /* volume name buffer */
505 0, /* volume name size */
506 NULL, /* volume serial number */
507 &dwDummy, /* max component length */
508 &dwDummy, /* file system flags */
509 volFsType, /* file system name */
510 sizeof(volFsType))) {
511 err = GetLastError();
512 Log("VCheckPartition: Failed to get partition information for %s, ignoring it.\n",
517 if (strcmp(volFsType, "NTFS")) {
518 Log("VCheckPartition: Partition %s is not an NTFS partition, ignoring it.\n", partName);
526 int VAttachPartitions(void)
528 struct DiskPartition *partP, *prevP, *nextP;
529 struct vpt_iter iter;
532 if (vpt_Start(&iter)<0) {
533 Log("No partitions to attach.\n");
537 while (0==vpt_NextEntry(&iter, &entry)) {
538 if (!VValidVPTEntry(&entry)) {
542 /* This test for duplicates relies on the fact that the method
543 * of storing the partition names in the NT registry means the same
544 * partition name will never appear twice in the list.
546 for (partP = DiskPartitionList; partP; partP = partP->next) {
547 if (*partP->devName == *entry.vp_dev) {
548 Log("Same drive (%s) used for both partition %s and partition %s, ignoring both.\n", entry.vp_dev, partP->name, entry.vp_name);
549 partP->flags = PART_DUPLICATE;
550 break; /* Only one entry will ever be in this list. */
553 if (partP) continue; /* found a duplicate */
555 if (VCheckPartition(entry.vp_dev)<0)
557 /* This test allows for manually inserting the FORCESALVAGE flag
558 * and thereby invoking the salvager. scandisk obviously won't be
561 if (programType == fileServer) {
563 char salvpath[MAXPATHLEN];
564 strcpy(salvpath, entry.vp_dev);
565 strcat(salvpath, "\\FORCESALVAGE");
566 if (stat(salvpath, &status) == 0) {
567 Log("VAttachPartitions: Found %s; aborting\n", salvpath);
571 VInitPartition(entry.vp_name, entry.vp_dev, *entry.vp_dev - 'A');
575 /* Run through partition list and clear out the dupes. */
576 prevP = nextP = NULL;
577 for (partP = DiskPartitionList; partP; partP = nextP) {
579 if (partP->flags == PART_DUPLICATE) {
581 prevP->next = partP->next;
583 DiskPartitionList = partP->next;
594 #ifdef AFS_LINUX22_ENV
595 int VAttachPartitions(void)
599 struct mntent *mntent;
601 if ((mfd = setmntent("/proc/mounts", "r")) == NULL) {
602 if ((mfd = setmntent("/etc/mtab", "r")) == NULL) {
603 Log("Problems in getting mount entries(setmntent)\n");
607 while (mntent = getmntent(mfd)) {
608 if (VCheckPartition(mntent->mnt_dir, mntent->mnt_fsname) < 0 )
615 #endif /* AFS_LINUX22_ENV */
617 /* This routine is to be called whenever the actual name of the partition
618 * is required. The canonical name is still in part->name.
620 char * VPartitionPath(struct DiskPartition *part)
623 return part->devName;
629 /* get partition structure, abortp tells us if we should abort on failure */
630 struct DiskPartition *VGetPartition_r(char *name, int abortp)
632 register struct DiskPartition *dp;
633 for (dp = DiskPartitionList; dp; dp = dp->next) {
634 if (strcmp(dp->name, name) == 0)
642 struct DiskPartition *VGetPartition(char *name, int abortp)
644 struct DiskPartition *retVal;
646 retVal = VGetPartition_r(name, abortp);
652 void VSetPartitionDiskUsage_r(register struct DiskPartition *dp)
654 ULARGE_INTEGER free_user, total, free_total;
655 int ufree, tot, tfree;
657 if (!GetDiskFreeSpaceEx(VPartitionPath(dp), &free_user, &total,
659 printf("Failed to get disk space info for %s, error = %d\n",
660 dp->name, GetLastError());
664 /* Convert to 1K units. */
665 ufree = (int) Int64ShraMod32(free_user.QuadPart, 10);
666 tot = (int) Int64ShraMod32(total.QuadPart, 10);
667 tfree = (int) Int64ShraMod32(free_total.QuadPart, 10);
669 dp->minFree = tfree - ufree; /* only used in VPrintDiskStats_r */
670 dp->totalUsable = tot;
675 void VSetPartitionDiskUsage_r(register struct DiskPartition *dp)
678 int fd, totalblks, free, used, availblks, bsize, code;
681 struct statvfs statbuf;
683 struct statfs statbuf;
686 if (dp->flags & PART_DONTUPDATE)
688 /* Note: we don't bother syncing because it's only an estimate, update
689 is syncing every 30 seconds anyway, we only have to keep the disk
690 approximately 10% from full--you just can't get the stuff in from
691 the net fast enough to worry */
693 code = statvfs(dp->name, &statbuf);
695 code = statfs(dp->name, &statbuf);
698 Log("statfs of %s failed in VSetPartitionDiskUsage (errno = %d)\n", dp->name, errno);
701 if (statbuf.f_blocks == -1) { /* Undefined; skip stats.. */
702 Log("statfs of %s failed in VSetPartitionDiskUsage\n", dp->name);
705 totalblks = statbuf.f_blocks;
706 free = statbuf.f_bfree;
707 reserved = free - statbuf.f_bavail;
709 bsize = statbuf.f_frsize;
711 bsize = statbuf.f_bsize;
713 availblks = totalblks - reserved;
714 dp->f_files = statbuf.f_files; /* max # of files in partition */
716 /* Now free and totalblks are in fragment units, but we want them in
720 free *= (bsize/1024);
721 totalblks *= (bsize / 1024);
722 availblks *= (bsize / 1024 );
723 reserved *= (bsize / 1024 );
726 free /= (1024/bsize);
727 totalblks /= (1024/bsize);
728 availblks /= (1024/bsize);
729 reserved /= (1024/bsize);
731 /* now compute remaining figures */
732 used = totalblks - free;
734 dp->minFree = reserved; /* only used in VPrintDiskStats_r */
735 dp->totalUsable = availblks;
736 dp->free = availblks - used; /* this is exactly f_bavail */
738 #endif /* AFS_NT40_ENV */
740 void VSetPartitionDiskUsage(register struct DiskPartition *dp)
743 VSetPartitionDiskUsage_r(dp);
747 void VResetDiskUsage_r(void)
749 struct DiskPartition *dp;
750 for (dp = DiskPartitionList; dp; dp = dp->next) {
751 VSetPartitionDiskUsage_r(dp);
752 #ifndef AFS_PTHREAD_ENV
754 #endif /* !AFS_PTHREAD_ENV */
758 void VResetDiskUsage(void)
765 void VAdjustDiskUsage_r(Error *ec, Volume *vp, afs_int32 blocks, afs_int32 checkBlocks)
767 afs_int32 rem, minavail;
769 /* why blocks instead of checkBlocks in the check below? Otherwise, any check
770 for less than BlocksSpare would skip the error-checking path, and we
771 could grow existing files forever, not just for another BlocksSpare
775 if ((rem = vp->partition->free - checkBlocks) <
776 (minavail = (vp->partition->totalUsable * aixlow_water) / 100))
778 if (vp->partition->free - checkBlocks < 0)
781 else if (V_maxquota(vp) && V_diskused(vp) + checkBlocks > V_maxquota(vp))
784 vp->partition->free -= blocks;
785 V_diskused(vp) += blocks;
788 void VAdjustDiskUsage(Error *ec, Volume *vp, afs_int32 blocks, afs_int32 checkBlocks)
791 VAdjustDiskUsage_r(ec, vp, blocks, checkBlocks);
795 int VDiskUsage_r(Volume *vp, afs_int32 blocks)
797 afs_int32 rem, minavail;
800 if ((rem = vp->partition->free - blocks) <
801 (minavail = (vp->partition->totalUsable * aixlow_water) / 100))
803 if (vp->partition->free - blocks < 0)
807 vp->partition->free -= blocks;
811 int VDiskUsage(Volume *vp, afs_int32 blocks)
815 retVal = VDiskUsage_r(vp, blocks);
820 void VPrintDiskStats_r(void)
822 struct DiskPartition *dp;
823 for (dp = DiskPartitionList; dp; dp = dp->next) {
824 Log("Partition %s: %d available 1K blocks (minfree=%d), ",
825 dp->name, dp->totalUsable, dp->minFree);
827 Log("overallocated by %d blocks\n", -dp->free);
829 Log("%d free blocks\n", dp->free);
833 void VPrintDiskStats(void)
841 /* Need a separate lock file on NT, since NT only has mandatory file locks. */
842 #define LOCKFILE "LOCKFILE"
843 void VLockPartition_r(char *name)
845 struct DiskPartition *dp = VGetPartition_r(name, 0);
849 if (dp->lock_fd == -1) {
852 (void) sprintf(path, "%s\\%s", VPartitionPath(dp), LOCKFILE);
853 dp->lock_fd = (int)CreateFile(path, GENERIC_WRITE,
854 FILE_SHARE_READ|FILE_SHARE_WRITE, NULL,
855 CREATE_ALWAYS, FILE_ATTRIBUTE_HIDDEN, NULL);
856 assert (dp->lock_fd != (int)INVALID_HANDLE_VALUE);
858 memset((char*)&lap, 0, sizeof(lap));
859 rc = LockFileEx((HANDLE)dp->lock_fd, LOCKFILE_EXCLUSIVE_LOCK,
865 void VUnlockPartition_r(char *name)
867 register struct DiskPartition *dp = VGetPartition_r(name, 0);
870 if (!dp) return; /* no partition, will fail later */
871 memset((char*)&lap, 0, sizeof(lap));
873 UnlockFileEx((HANDLE)dp->lock_fd, 0, 1, 0, &lap);
874 CloseHandle((HANDLE)dp->lock_fd);
877 #else /* AFS_NT40_ENV */
879 #if defined(AFS_HPUX_ENV)
880 #define BITS_PER_CHAR (8)
881 #define BITS(type) (sizeof(type) * BITS_PER_CHAR)
883 #define LOCKRDONLY_OFFSET ((PRIV_LOCKRDONLY - 1) / BITS(int))
884 #endif /* defined(AFS_HPUX_ENV) */
886 void VLockPartition_r(char *name)
888 register struct DiskPartition *dp = VGetPartition_r(name, 0);
891 struct timeval pausing;
892 #if defined(AFS_HPUX_ENV)
894 struct privgrp_map privGrpList[PRIV_MAXGRPS];
895 unsigned int *globalMask;
897 #endif /* defined(AFS_HPUX_ENV) */
899 if (!dp) return; /* no partition, will fail later */
900 if (dp->lock_fd != -1) return;
902 #if defined(AFS_SUN5_ENV) || defined(AFS_AIX41_ENV)
903 partitionName = dp->devName;
906 partitionName = dp->name;
910 for (retries=25; retries; retries--) {
911 dp->lock_fd = open(partitionName, code);
912 if (dp->lock_fd != -1) break;
914 pausing.tv_usec = 500000;
915 select(0, NULL, NULL, NULL, &pausing);
917 assert(retries != 0);
919 #if defined (AFS_HPUX_ENV)
921 assert(getprivgrp(privGrpList) == 0);
924 * In general, it will difficult and time-consuming ,if not impossible,
925 * to try to find the privgroup to which this process belongs that has the
926 * smallest membership, to minimise the security hole. So, we use the privgrp
927 * to which everybody belongs.
929 /* first, we have to find the global mask */
930 for (globalMaskIndex = 0; globalMaskIndex < PRIV_MAXGRPS;
932 if (privGrpList[globalMaskIndex].priv_groupno == PRIV_GLOBAL) {
933 globalMask = &(privGrpList[globalMaskIndex].
934 priv_mask[LOCKRDONLY_OFFSET]);
939 if (((*globalMask) & privmask(PRIV_LOCKRDONLY)) == 0) {
940 /* allow everybody to set a lock on a read-only file descriptor */
941 (*globalMask) |= privmask(PRIV_LOCKRDONLY);
942 assert(setprivgrp(PRIV_GLOBAL,
943 privGrpList[globalMaskIndex].priv_mask) == 0);
945 lockfRtn = lockf(dp->lock_fd, F_LOCK, 0);
947 /* remove the privilege granted to everybody to lock a read-only fd */
948 (*globalMask) &= ~(privmask(PRIV_LOCKRDONLY));
949 assert(setprivgrp(PRIV_GLOBAL,
950 privGrpList[globalMaskIndex].priv_mask) == 0);
953 /* in this case, we should be able to do this with impunity, anyway */
954 lockfRtn = lockf(dp->lock_fd, F_LOCK, 0);
957 assert (lockfRtn != -1);
959 #if defined(AFS_AIX_ENV) || defined(AFS_SUN5_ENV)
960 assert (lockf(dp->lock_fd, F_LOCK, 0) != -1);
962 assert (flock(dp->lock_fd, LOCK_EX) == 0);
963 #endif /* defined(AFS_AIX_ENV) */
967 void VUnlockPartition_r(char *name)
969 register struct DiskPartition *dp = VGetPartition_r(name, 0);
970 if (!dp) return; /* no partition, will fail later */
975 #endif /* AFS_NT40_ENV */
977 void VLockPartition(char *name)
980 VLockPartition_r(name);
984 void VUnlockPartition(char *name)
987 VUnlockPartition_r(name);