From: Hartmut Reuter Date: Tue, 24 Mar 2009 13:36:35 +0000 (+0000) Subject: DEVEL15-volserver-split-volume-functionality-20090324 X-Git-Tag: openafs-devel-1_5_58~16 X-Git-Url: http://git.openafs.org/?p=openafs.git;a=commitdiff_plain;h=5a2771be8b7f8d933e4822f0098f8db536e9093e DEVEL15-volserver-split-volume-functionality-20090324 LICENSE IPL10 FIXES 124520 implement the server side of vos split (cherry picked from commit ba742892e2102cf545157e9c97df10799a58403e) --- diff --git a/src/dir/dir.c b/src/dir/dir.c index 33cd8b6..e767c11 100644 --- a/src/dir/dir.c +++ b/src/dir/dir.c @@ -86,6 +86,7 @@ extern void *DNew(register afs_int32 *fid, register int page); #define LookupOffset afs_dir_LookupOffset #define EnumerateDir afs_dir_EnumerateDir #define IsEmpty afs_dir_IsEmpty +#define InverseLookup afs_dir_InverseLookup #if defined(AFS_DISCON_ENV) #define ChangeFid afs_dir_ChangeFid @@ -546,6 +547,65 @@ FindItem(void *dir, char *ename, unsigned short **previtem) } } +static struct DirEntry * +FindFid (void *dir, afs_uint32 vnode, afs_uint32 unique) +{ + /* Find a directory entry, given the vnode and uniquifier of a object. + * This entry returns a pointer to a locked buffer. If no entry is found, + * however, no items are left locked, and a null pointer is returned + * instead. + */ + register int i; + register struct DirHeader *dhp; + register unsigned short *lp; + register struct DirEntry *tp; + dhp = (struct DirHeader *) DRead(dir,0); + if (!dhp) return 0; + for (i=0; ihashTable[i] != 0) { + tp = GetBlob(dir,(u_short)ntohs(dhp->hashTable[i])); + if (!tp) { /* should not happen */ + DRelease(dhp, 0); + return 0; + } + while(tp) { + if (vnode == ntohl(tp->fid.vnode) + && unique == ntohl(tp->fid.vunique)) { + DRelease(dhp,0); + return tp; + } + lp = &(tp->next); + if (tp->next == 0) + break; + tp = GetBlob(dir,(u_short)ntohs(tp->next)); + DRelease(lp,0); + } + DRelease(lp,0); + } + } + DRelease(dhp,0); + return (struct DirEntry *)0; +} + +int +InverseLookup (void *dir, afs_uint32 vnode, afs_uint32 unique, char *name, + afs_uint32 length) +{ + /* Look for the name pointing to given vnode and unique in a directory */ + register struct DirEntry *entry; + int code = 0; + + entry = FindFid(dir, vnode, unique); + if (!entry) + return ENOENT; + if (strlen(entry->name) >= length) + code = E2BIG; + else + strcpy(name, entry->name); + DRelease(entry, 0); + return code; +} + #if defined(AFS_DISCON_ENV) /*! * Change an entry fid. diff --git a/src/tvolser/Makefile.in b/src/tvolser/Makefile.in index 9dee0d0..1546c7b 100644 --- a/src/tvolser/Makefile.in +++ b/src/tvolser/Makefile.in @@ -25,7 +25,7 @@ FSINT=../fsint VOLSER=../volser RX=../rx -VOLSEROBJS=volmain.o volprocs.o physio.o voltrans.o volerr.o volint.cs.o dumpstuff.o volint.ss.o volint.xdr.o vscommon.o +VOLSEROBJS=volmain.o volprocs.o physio.o voltrans.o volerr.o volint.cs.o dumpstuff.o volint.ss.o volint.xdr.o vscommon.o vol_split.o VLSERVEROBJS=vldbint.cs.o vldbint.xdr.o vl_errors.o @@ -69,6 +69,8 @@ rx_pthread.o: ${RX}/rx_pthread.c ${COMPILE} -DDPF_FSLOG volmain.o: ${VOLSER}/volmain.c ${COMPILE} +vol_split.o: ${VOLSER}/vol_split.c + ${COMPILE} volprocs.o: ${VOLSER}/volprocs.c ${COMPILE} physio.o: ${VOLSER}/physio.c diff --git a/src/vol/namei_ops.c b/src/vol/namei_ops.c index 8e1e140..f530a31 100644 --- a/src/vol/namei_ops.c +++ b/src/vol/namei_ops.c @@ -797,6 +797,79 @@ namei_inc(IHandle_t * h, Inode ino, int p1) return code; } +int +namei_replace_file_by_hardlink(IHandle_t *hLink, IHandle_t *hTarget) +{ + afs_int32 code; + namei_t nameLink; + namei_t nameTarget; + + /* Convert handle to file name. */ + namei_HandleToName(&nameLink, hLink); + namei_HandleToName(&nameTarget, hTarget); + + unlink(nameLink.n_path); + code = link(nameTarget.n_path, nameLink.n_path); + return code; +} + +int +namei_copy_on_write(IHandle_t *h) +{ + afs_int32 fd, code = 0; + namei_t name; + FdHandle_t *fdP; + struct afs_stat tstat; + + namei_HandleToName(&name, h); + if (afs_stat(name.n_path, &tstat) < 0) + return EIO; + if (tstat.st_nlink > 1) { /* do a copy on write */ + char path[259]; + char *buf; + afs_size_t size; + afs_int32 tlen; + + fdP = IH_OPEN(h); + if (!fdP) + return EIO; + strcpy(&path, name.n_path); + strcat(&path, "-tmp"); + fd = afs_open(path, O_CREAT | O_EXCL | O_TRUNC | O_RDWR, 0); + if (fd < 0) { + FDH_CLOSE(fdP); + return EIO; + } + buf = malloc(8192); + if (!buf) { + close(fd); + unlink(path); + FDH_CLOSE(fdP); + return ENOMEM; + } + size = tstat.st_size; + FDH_SEEK(fdP, 0, 0); + while (size) { + tlen = size > 8192 ? 8192 : size; + if (FDH_READ(fdP, buf, tlen) != tlen) + break; + if (write(fd, buf, tlen) != tlen) + break; + size -= tlen; + } + close(fd); + FDH_REALLYCLOSE(fdP); + free(buf); + if (size) + code = EIO; + else { + unlink(name.n_path); + code = rename(path, name.n_path); + } + } + return code; +} + /************************************************************************ * File Name Structure ************************************************************************ diff --git a/src/volser/Makefile.in b/src/volser/Makefile.in index 2bfa088..844eb38 100644 --- a/src/volser/Makefile.in +++ b/src/volser/Makefile.in @@ -52,7 +52,7 @@ VOLDUMP_LIBS = \ VSOBJS=vsprocs.o vsutils.o lockprocs.o volint.xdr.o volerr.o SOBJS=volmain.o volprocs.o physio.o common.o voltrans.o volerr.o \ - volint.cs.o dumpstuff.o volint.ss.o volint.xdr.o + volint.cs.o dumpstuff.o volint.ss.o volint.xdr.o vol_split.o all: volserver vos restorevol voldump \ ${TOP_INCDIR}/afs/volser.h \ @@ -115,6 +115,7 @@ vsprocs.o: vsprocs.c ${VINCLS} ${RINCLS} ${INTINCLS} physio.o: physio.c ${VINCLS} common.o: common.c ${VINCLS} lockprocs.o: lockprocs.c ${VINCLS} ${INTINCLS} ${RINCLS} +vol_split.o: vol_split.c ${VINCLS} ${INTINCLS} ${RINCLS} # # Installation targets @@ -241,4 +242,4 @@ check-splint:: vos.c restorevol.c \ vsprocs.c vsutils.c lockprocs.c volerr.c \ volmain.c volprocs.c physio.c common.c voltrans.c \ - dumpstuff.c + dumpstuff.c vol_split.c diff --git a/src/volser/vol_split.c b/src/volser/vol_split.c new file mode 100644 index 0000000..1b3df2d --- /dev/null +++ b/src/volser/vol_split.c @@ -0,0 +1,886 @@ +/* + * Copyright (c) 2007, Hartmut Reuter, + * RZG, Max-Planck-Institut f. Plasmaphysik. + * All Rights Reserved. + * + */ + +#include +#include + +#include +#include +#ifdef AFS_PTHREAD_ENV +#include +#else /* AFS_PTHREAD_ENV */ +#include +#endif /* AFS_PTHREAD_ENV */ +#ifdef AFS_NT40_ENV +#include +#include +#include +#include +#include +#else +#include +#include +#endif +#ifdef HAVE_STRING_H +#include +#else +#ifdef HAVE_STRINGS_H +#include +#endif +#endif +#include +#include + +#include +#include +#include "nfs.h" +#include "lwp.h" +#include "lock.h" +#include +#include "ihandle.h" +#include "vnode.h" +#include "volume.h" +#include "partition.h" +#include "viceinode.h" +#include "vol.h" +#ifdef AFS_RXOSD_SUPPORT +#include "rxosd.h" +#include "vol_osd.h" +#include "../vol/vol_osd_prototypes.h" +#endif + +#define NEEDED 1 +#define PARENT 2 +#define CHANGEPARENT 4 + +#define NAMEI_VNODEMASK 0x03ffffff +#define NAMEI_TAGMASK 0x7 +#define NAMEI_TAGSHIFT 26 +#define NAMEI_UNIQMASK 0xffffffff +#define NAMEI_UNIQSHIFT 32 + +struct VnodeExtract { + afs_uint32 vN; + afs_uint32 parent; + afs_uint32 flag; +}; + +struct Msg { + struct rx_call * call; + int verbose; + char line[1024]; +}; + +afs_int32 ExtractVnodes(struct Msg *m, Volume *vol, afs_int32 class, + struct VnodeExtract **list, + afs_int32 *length, afs_uint32 where, + struct VnodeDiskObject *vd, + afs_int32 *parent, struct VnodeDiskObject *parentvd) +{ + afs_int32 code = 0; + char buf[SIZEOF_LARGEDISKVNODE]; + struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)&buf; + FdHandle_t *fdP = 0; + StreamHandle_t *stream = 0; + struct VnodeClassInfo *vcp = &VnodeClassInfo[class]; + struct VnodeExtract *e; + afs_uint32 size; + afs_uint32 offset; + afs_uint32 vN; + + *length = 0; + if (parent) + *parent = 0; + + fdP = IH_OPEN(vol->vnodeIndex[class].handle); + if (!fdP) { + sprintf(m->line, "Couldn't open %s Index of volume %u\n", + class ? "small":"large", V_id(vol)); + rx_Write(m->call, m->line, strlen(m->line)); + code = EIO; + goto Bad_Extract; + } + size = FDH_SIZE(fdP); + *list = (struct VnodeExtract *) malloc(size / vcp->diskSize + * sizeof(struct VnodeExtract)); + if (!(*list)) { + code = ENOMEM; + goto Bad_Extract; + } + memset(*list, 0, size / vcp->diskSize * sizeof(struct VnodeExtract)); + stream = FDH_FDOPEN(fdP, "r"); + if (!stream) { + sprintf(m->line, "Couldn't stream open %s Index of volume %u\n", + class ? "small":"large", V_id(vol)); + rx_Write(m->call, m->line, strlen(m->line)); + return EIO; + goto Bad_Extract; + } + code = STREAM_SEEK(stream, vcp->diskSize, 0); + if (code) + goto Bad_Extract; + + offset = vcp->diskSize; + e = *list; + while (!STREAM_EOF(stream)) { + afs_int32 vN = (offset >> (vcp->logSize -1)) - 1 + class; + if (STREAM_READ(vnode, vcp->diskSize, 1, stream) == 1) { + if (vnode->type != vNull) { + e->vN = vN; + e->parent = vnode->parent; + if (vN == where && class == vLarge) { + memcpy(vd, vnode, vcp->diskSize); + *parent = vnode->parent; + } + e++; + } + offset += vcp->diskSize; + } + } + *length = (e - *list); + if (class == vLarge) { + if (*parent) { + offset = (*parent + 1 - class) << (vcp->logSize -1); + code = STREAM_SEEK(stream, offset, 0); + if (STREAM_READ(vnode, vcp->diskSize, 1, stream) == 1) + memcpy(parentvd, vnode, vcp->diskSize); + else + code = EIO; + } else { + sprintf(m->line, "SplitVolume: extract didn't see directory %u\n", where); + rx_Write(m->call, m->line, strlen(m->line)); + code = ENOENT; + } + } + if (m->verbose) { + sprintf(m->line, "Volume %u has %u %s vnodes in volume %uu\n", + V_parentId(vol), *length, class? "small":"large"); + rx_Write(m->call, m->line, strlen(m->line)); + } + +Bad_Extract: + if (stream) + STREAM_CLOSE(stream); + if (fdP) + FDH_CLOSE(fdP); + if (code) { + free(*list); + *list = 0; + } + return code; +} + +afs_int32 FindVnodes(struct Msg *m, afs_uint32 where, + struct VnodeExtract *list, afs_int32 length, + struct VnodeExtract *dlist, afs_int32 dlength, + afs_int32 *needed, afs_int32 class) +{ + afs_int32 i, j, found = 0; + afs_int32 parent = 0; + + *needed = 0; + for (i=0; iline, + "SplitVolume: directory %u where to start new volume not found\n", + where); + rx_Write(m->call, m->line, strlen(m->line)); + return ENOENT; + } + found = 0; + for (i=0; iline, "SplitVolume: parent directory %u not found\n", + parent); + rx_Write(m->call, m->line, strlen(m->line)); + return ENOENT; + } + } + found = 1; + while (found) { + found = 0; + for (i=0; iverbose) { + sprintf(m->line, "%u %s vnodes will go into the new volume\n", + *needed, class ? "small" : "large"); + rx_Write(m->call, m->line, strlen(m->line)); + } + return 0; +} + +afs_int32 copyDir(struct Msg *m, IHandle_t *inh, IHandle_t *outh) +{ + afs_int32 code; + FdHandle_t *infdP, *outfdP; + char *tbuf; + afs_size_t size; + + infdP = IH_OPEN(inh); + if (!infdP) { + sprintf(m->line, "Couldn't open input directory %u.%u.%u\n", + infdP->fd_ih->ih_vid, + (afs_uint32)(infdP->fd_ih->ih_ino & NAMEI_VNODEMASK), + (afs_uint32)(infdP->fd_ih->ih_ino >> NAMEI_UNIQSHIFT)); + rx_Write(m->call, m->line, strlen(m->line)); + return EIO; + } + outfdP = IH_OPEN(outh); + if (!outfdP) { + sprintf(m->line, "Couldn't open output directory %u.%u.%u\n", + outfdP->fd_ih->ih_vid, + (afs_uint32)(outfdP->fd_ih->ih_ino & NAMEI_VNODEMASK), + (afs_uint32)(outfdP->fd_ih->ih_ino >> NAMEI_UNIQSHIFT)); + rx_Write(m->call, m->line, strlen(m->line)); + FDH_REALLYCLOSE(infdP); + return EIO; + } + tbuf = malloc(2048); + FDH_SEEK(infdP, 0, 0); + FDH_SEEK(outfdP, 0, 0); + size = FDH_SIZE(infdP); + while (size) { + afs_int32 tlen; + tlen = size > 2048 ? 2048 : size; + if (FDH_READ(infdP, tbuf, tlen) != tlen) { + sprintf(m->line, "Couldn't read directory %u.%u.%u\n", + infdP->fd_ih->ih_vid, + (afs_uint32)(infdP->fd_ih->ih_ino & NAMEI_VNODEMASK), + (afs_uint32)(infdP->fd_ih->ih_ino >> NAMEI_UNIQSHIFT)); + rx_Write(m->call, m->line, strlen(m->line)); + FDH_REALLYCLOSE(infdP); + FDH_REALLYCLOSE(outfdP); + free(tbuf); + return EIO; + } + if (FDH_WRITE(outfdP, tbuf, tlen) != tlen) { + sprintf(m->line, "Couldn't write directory %u.%u.%u\n", + outfdP->fd_ih->ih_vid, + (afs_uint32)(outfdP->fd_ih->ih_ino & NAMEI_VNODEMASK), + (afs_uint32)(outfdP->fd_ih->ih_ino >> NAMEI_UNIQSHIFT)); + rx_Write(m->call, m->line, strlen(m->line)); + FDH_REALLYCLOSE(infdP); + FDH_REALLYCLOSE(outfdP); + free(tbuf); + return EIO; + } + size -= tlen; + } + free(tbuf); + FDH_CLOSE(outfdP); + FDH_REALLYCLOSE(infdP); + return 0; +} + +afs_int32 copyVnodes(struct Msg *m, Volume *vol, Volume *newvol, + afs_int32 class, + struct VnodeExtract *list, afs_int32 length, + afs_int32 where, afs_uint64 *blocks, + struct VnodeDiskObject *parVnode) +{ + afs_int32 i, code = 0; + char buf[SIZEOF_LARGEDISKVNODE]; + struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)&buf; + FdHandle_t *fdP = 0; + FdHandle_t *newfdP = 0; + StreamHandle_t *newstream = 0; + struct VnodeClassInfo *vcp = &VnodeClassInfo[class]; + struct VnodeExtract *e; + afs_uint64 size; + afs_uint64 offset; + Inode ino, newino, nearInode; + afs_uint32 newVn; + + fdP = IH_OPEN(vol->vnodeIndex[class].handle); + if (!fdP) { + Log("Couldn't open %s Index of volume %u\n", + class ? "small":"large", V_id(vol)); + code = EIO; + goto Bad_Copy; + } + newfdP = IH_OPEN(newvol->vnodeIndex[class].handle); + if (!newfdP) { + Log("Couldn't open %s Index of volume %u\n", + class ? "small":"large", V_id(newvol)); + code = EIO; + goto Bad_Copy; + } + size = FDH_SIZE(fdP); + + for (i=0; iflag) { + afs_uint64 size; + offset = (e->vN + 1 - class) << (vcp->logSize -1); + if (FDH_SEEK(fdP, offset, 0) != offset + || FDH_READ(fdP, vnode, vcp->diskSize) != vcp->diskSize) { + Log("Couldn't read in %s Index of volume %u at offset\n", + class ? "small":"large", V_id(vol), offset); + code = EIO; + goto Bad_Copy; + } + if (e->flag & PARENT) { + /* + * do a preventive copy on write for later update + */ + IHandle_t *newh = 0; + IHandle_t *h = 0; + FdHandle_t *infdP = 0; + FdHandle_t *outfdP = 0; + char *tbuf = malloc(2048); + newino = IH_CREATE(V_linkHandle(vol), V_device(vol), + VPartitionPath(V_partition(vol)), + nearInode, V_parentId(vol), + e->vN, vnode->uniquifier, + vnode->dataVersion); + IH_INIT(newh, V_device(vol), V_parentId(vol), newino); + ino = VNDISK_GET_INO(vnode); + IH_INIT(h, V_device(vol), V_parentId(vol), ino); + code = copyDir(m, h, newh); + if (code) + goto Bad_Copy; + /* Now update the vnode and write it back to disk */ + VNDISK_SET_INO(vnode, newino); + vnode->cloned = 0; + if (FDH_SEEK(fdP, offset, 0) != offset + || FDH_WRITE(fdP, vnode, vcp->diskSize) != vcp->diskSize) { + Log("Couldn't write in %s Index of volume %u at offset\n", + class ? "small":"large", V_id(vol), offset); + code = EIO; + goto Bad_Copy; + } + memcpy(parVnode, vnode, sizeof(struct VnodeDiskObject)); + } + if (e->flag & NEEDED && e->vN != where) { + VNDISK_GET_LEN(size, vnode); + *blocks += (size + 0x3ff) >> 10; + ino = VNDISK_GET_INO(vnode); + if (ino) { + IHandle_t *h, *newh; + IH_INIT(h, vol->device, V_parentId(vol), ino); + if (e->parent == where) + vnode->parent = 1; + newino = IH_CREATE(V_linkHandle(newvol), V_device(newvol), + VPartitionPath(V_partition(newvol)), + nearInode, V_parentId(newvol), + e->vN, vnode->uniquifier, + vnode->dataVersion); + if (!VALID_INO(newino)) { + Log("IH_CREATE failed for %u.%u.%u\n", + V_id(newvol), e->vN, vnode->uniquifier); + code = EIO; + goto Bad_Copy; + } + nearInode = newino; + IH_INIT(newh, newvol->device, V_parentId(newvol), newino); + code = namei_replace_file_by_hardlink(newh, h); + VNDISK_SET_INO(vnode, newino); +#ifdef AFS_RXOSD_SUPPORT + } else { + code = osd_split_objects(vol, newvol, vnode, e->vN); +#endif /* AFS_RXOSD_SUPPORT */ + } + if (code) + goto Bad_Copy; + if (e->flag & CHANGEPARENT) + vnode->parent = 1; /* in new root-directory */ + vnode->cloned = 0; + if (FDH_SEEK(newfdP, offset, 0) != offset + || FDH_WRITE(newfdP, vnode, vcp->diskSize) != vcp->diskSize) { + Log("Couldn't write in %s Index of volume %u to offset\n", + class ? "small":"large", V_id(newvol), + offset); + code = EIO; + goto Bad_Copy; + } + } + } + } + /* + * Now copy the root directory from old to new volume + */ + if (class == vLarge) { + IHandle_t *h, *newh; + char buf2[SIZEOF_LARGEDISKVNODE]; + struct VnodeDiskObject *vnode2 = (struct VnodeDiskObject *)&buf2; + afs_uint64 newoffset; + + newoffset = vcp->diskSize; + if (FDH_SEEK(newfdP, newoffset, 0) != newoffset + || FDH_READ(newfdP, vnode2, vcp->diskSize) != vcp->diskSize) { + Log("splitvolume: couldn't read in large Index of new volume %u at offset %u\n", + V_id(newvol), vcp->diskSize); + code = EIO; + goto Bad_Copy; + } + offset = (where + 1 - class) << (vcp->logSize -1); + if (FDH_SEEK(fdP, offset, 0) != offset + || FDH_READ(fdP, vnode, vcp->diskSize) != vcp->diskSize) { + Log("Couldn't read in large Index of old volume %u at offset\n", + V_id(vol), offset); + code = EIO; + goto Bad_Copy; + } + VNDISK_GET_LEN(size, vnode); + *blocks += (size + 0x3ff) >> 10; + ino = VNDISK_GET_INO(vnode); + IH_INIT(h, vol->device, V_parentId(vol), ino); + newino = VNDISK_GET_INO(vnode2); + IH_INIT(newh, newvol->device, V_parentId(newvol), newino); + code = copyDir(m, h, newh); + if (code) { + Log("splitvolume: copyDir failed for new root from %u.u.u to %u.1.1\n", + V_id(vol), where, vnode->uniquifier, V_id(newvol)); + code = EIO; + goto Bad_Copy; + } + VNDISK_SET_INO(vnode, newino); + vnode->uniquifier = 1; + vnode->cloned = 0; + vnode->parent = vnode2->parent; + vnode->serverModifyTime = vnode2->serverModifyTime; + if (FDH_SEEK(newfdP, newoffset, 0) != newoffset + || FDH_WRITE(newfdP, vnode, vcp->diskSize) != vcp->diskSize) { + Log("splitvolume: couldn't write in large Index of %u at offset %u\n", + V_id(newvol), vcp->diskSize); + code = EIO; + } + } +Bad_Copy: + if (fdP) + FDH_CLOSE(fdP); + if (newfdP) + FDH_CLOSE(newfdP); + return code; +} + +afs_int32 +findName(Volume *vol, struct VnodeDiskObject *vd, afs_uint32 vN, afs_uint32 un, + char *name,afs_int32 length) +{ + afs_int32 code; + Inode ino; + DirHandle dir; + + ino = VNDISK_GET_INO(vd); + SetSalvageDirHandle(&dir, V_id(vol), V_device(vol), ino); + + code = InverseLookup(&dir, vN, un, name, length); + FidZap(&dir); + return code; +} + +afs_int32 +createMountpoint(Volume *vol, Volume *newvol, struct VnodeDiskObject *parent, + afs_uint32 vN, struct VnodeDiskObject *vd, char *name) +{ + afs_int32 code; + Inode ino, newino; + DirHandle dir; + IHandle_t *h; + struct VnodeDiskObject vnode; + FdHandle_t *fdP, *fdP2; + afs_uint64 offset, size; + afs_int32 class = vSmall; + struct VnodeClassInfo *vcp = &VnodeClassInfo[class]; + AFSFid fid; + struct timeval now; + afs_uint32 newvN; + char symlink[32]; + + TM_GetTimeOfDay(&now, 0); + fdP = IH_OPEN(vol->vnodeIndex[vSmall].handle); + if (!fdP) { + Log("split volume: error opening small vnode index of %u\n", V_id(vol)); + return EIO; + } + offset = vcp->diskSize; + if (FDH_SEEK(fdP, offset, 0) != offset) { + Log("split volume: error seeking in small vnode index of %u\n", V_id(vol)); + return EIO; + } + while (1) { + if (FDH_READ(fdP, &vnode, vcp->diskSize) != vcp->diskSize) + break; + if (vnode.type == vNull) + break; + offset += vcp->diskSize; + } + memset(&vnode, 0, sizeof(vnode)); + vnode.type = vSymlink; + V_nextVnodeUnique(vol)++; + vnode.uniquifier = V_nextVnodeUnique(vol); + vnode.author = vd->author; + vnode.owner = vd->owner; + vnode.group = vd->group; + vnode.modeBits = 0644; + vnode.unixModifyTime = now.tv_sec; + vnode.serverModifyTime = now.tv_sec; + vnode.dataVersion = 1; + vnode.linkCount = 1; + vnode.parent = vN; + + newvN = (offset >> (VnodeClassInfo[class].logSize - 1)) - 1 + class; + newino = IH_CREATE(V_linkHandle(vol), V_device(vol), + VPartitionPath(V_partition(vol)), nearInode, + V_parentId(vol), newvN, vnode.uniquifier, 1); + + IH_INIT(h, V_device(vol), V_parentId(vol), newino); + fdP2 = IH_OPEN(h); + if (!fdP2) { + Log("split volume: couldn't open inode for mountpoint %u.%u.%u\n", + V_id(vol), newvN, vnode.uniquifier); + return EIO; + } + FDH_SEEK(fdP2, 0, 0); + sprintf(&symlink, "#%s", V_name(newvol)); + size = strlen(symlink) + 1; + if (FDH_WRITE(fdP2, &symlink, size) != size) { + Log("split volume: couldn't write mountpoint %u.%u.%u\n", + V_id(vol), newvN, vnode.uniquifier); + return EIO; + } + FDH_REALLYCLOSE(fdP2); + IH_RELEASE(h); + VNDISK_SET_INO(&vnode, newino); + VNDISK_SET_LEN(&vnode, size); +#ifndef AFS_RXOSD_SUPPORT + vnode.vnodeMagic = SMALLVNODEMAGIC; +#endif + if (FDH_SEEK(fdP, offset, 0) != offset + || FDH_WRITE(fdP, &vnode, vcp->diskSize) != vcp->diskSize) { + Log("split volume: couldn't write vnode for mountpoint %u.%u.%u\n", + V_id(vol), newvN, vnode.uniquifier); + return EIO; + } + FDH_REALLYCLOSE(fdP); + + fid.Volume = V_id(vol); + fid.Vnode = newvN; + fid.Unique = vnode.uniquifier; + + /* + * Now update the parent directory. + */ + + ino = VNDISK_GET_INO(parent); + SetSalvageDirHandle(&dir, V_id(vol), V_device(vol), ino); + + code = Delete(&dir, name); + if (code) { + Log("splitvolume: couldn't delete directory entry for %s in %u.%u.%u, code = %d\n", + name, V_id(vol), vN, parent->uniquifier, code); + return code; + } + code = Create(&dir, name, &fid); + FidZap(&dir); + + class = vLarge; + vcp = &VnodeClassInfo[class]; + fdP = IH_OPEN(vol->vnodeIndex[class].handle); + offset = (vN + 1 - class) << (vcp->logSize -1); + parent->dataVersion++; + if (FDH_SEEK(fdP, offset, 0) != offset + || FDH_WRITE(fdP, parent, vcp->diskSize) != vcp->diskSize) { + Log("split volume: couldn't write vnode for parent directory %u.%u.%u\n", + V_id(vol), vN, parent->uniquifier); + return EIO; + } + FDH_REALLYCLOSE(fdP); + return code; +} + +afs_int32 deleteVnodes(Volume *vol, afs_int32 class, + struct VnodeExtract *list, afs_int32 length, + afs_uint64 *blocks) +{ + afs_int32 i, code = 0; + char buf[SIZEOF_LARGEDISKVNODE]; + struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)&buf; + FdHandle_t *fdP = 0; + struct VnodeClassInfo *vcp = &VnodeClassInfo[class]; + struct VnodeExtract *e; + afs_uint64 size; + afs_uint64 offset; + Inode ino; + + fdP = IH_OPEN(vol->vnodeIndex[class].handle); + if (!fdP) { + Log("Couldn't open %s Index of volume %u\n", + class ? "small":"large", V_id(vol)); + code = EIO; + goto Bad_Delete; + } + size = FDH_SIZE(fdP); + + for (i=0; iflag & NEEDED) { + afs_uint64 size; + offset = (e->vN + 1 - class) << (vcp->logSize -1); + if (FDH_SEEK(fdP, offset, 0) != offset) { + Log("Couldn't seek in %s Index of volume %u to offset\n", + class ? "small":"large", V_id(vol), offset); + code = EIO; + goto Bad_Delete; + } + if (FDH_READ(fdP, vnode, vcp->diskSize) != vcp->diskSize) { + Log("Couldn't read in %s Index of volume %u at offset\n", + class ? "small":"large", V_id(vol), offset); + code = EIO; + goto Bad_Delete; + } + VNDISK_GET_LEN(size, vnode); + *blocks += (size + 0x3ff) >> 10; + ino = VNDISK_GET_INO(vnode); + if (ino) { + IHandle_t *h, *newh; + IH_INIT(h, vol->device, V_parentId(vol), ino); + IH_DEC(h, ino, V_parentId(vol)); +#ifdef AFS_RXOSD_SUPPORT + } else { + code = osdRemove(vol, vnode, e->vN); +#endif /* AFS_RXOSD_SUPPORT */ + } + memset(vnode, 0, vcp->diskSize); + vnode->type = vNull; + if (FDH_SEEK(fdP, offset, 0) != offset + || FDH_WRITE(fdP, vnode, vcp->diskSize) != vcp->diskSize) { + Log("Couldn't write in %s Index of volume %u to offset\n", + class ? "small":"large", V_id(vol), + offset); + } + } + } +Bad_Delete: + if (fdP) + FDH_CLOSE(fdP); + return code; +} +afs_int32 +split_volume(struct rx_call *call, Volume *vol, Volume *newvol, + afs_uint32 where, afs_int32 verbose) +{ + afs_int32 code = 0; + struct VnodeExtract *dirList = 0; + struct VnodeExtract *fileList = 0; + afs_uint64 blocks = 0; + afs_uint32 filesNeeded, dirsNeeded; + afs_uint32 dl, fl; + char buf[SIZEOF_LARGEDISKVNODE]; + char buf2[SIZEOF_LARGEDISKVNODE]; + struct VnodeDiskObject *rootVnode = (struct VnodeDiskObject *)&buf; + struct VnodeDiskObject *parVnode = (struct VnodeDiskObject *)&buf2; + char name[256]; + afs_uint32 parent; + struct Msg *m; + + m = (struct Msg *) malloc(sizeof(struct Msg)); + memset(m, 0, sizeof(struct Msg)); + m->call = call; + m->verbose = verbose; + + /* + * First step: planning + * + * Find out which directories will belong to the new volume + * + */ + if (verbose) { + sprintf(m->line, + "1st step: extract vnode essence from large vnode file\n"); + rx_Write(m->call, m->line, strlen(m->line)); + } + + code = ExtractVnodes(m, vol, vLarge, &dirList, &dl, where, rootVnode, + &parent, parVnode); + if (code) { + sprintf(m->line, + "ExtractVnodes failed for %u for directories with code %d\n", + V_id(vol), code); + rx_Write(m->call, m->line, strlen(m->line)); + return code; + } + + if (verbose) { + sprintf(m->line, "2nd step: look for name of vnode %u in directory %u.%u.%u\n", + where, V_id(vol), parent, parVnode->uniquifier); + rx_Write(m->call, m->line, strlen(m->line)); + } + code = findName(vol, parVnode, where, rootVnode->uniquifier, + &name, sizeof(name)); + if (code) { + sprintf(m->line, + "splitvolume: could'nt find name of %u in directory %u.%u.%u.\n", + where, V_id(vol), parent, parVnode->uniquifier); + rx_Write(m->call, m->line, strlen(m->line)); + return code; + } + if (verbose) { + sprintf(m->line, "name of %u is %s\n", where, name); + rx_Write(m->call, m->line, strlen(m->line)); + } + + if (verbose) { + sprintf(m->line, "3rd step: find all directory vnodes belonging to the subtree under %u \"%s\"\n", + where, name); + rx_Write(m->call, m->line, strlen(m->line)); + } + code = FindVnodes(m, where, dirList, dl, dirList, dl, &dirsNeeded, 1); + if (code) { + sprintf(m->line, + "FindVnodes for directories failed with code %d\n", code); + rx_Write(m->call, m->line, strlen(m->line)); + return code; + } + + if (verbose) { + sprintf(m->line, "4th step extract vnode essence from small vnode file\n"); + rx_Write(m->call, m->line, strlen(m->line)); + } + code = ExtractVnodes(m, vol, vSmall, &fileList, &fl, where, 0, 0, 0); + if (code) { + sprintf(m->line, + "ExtractVnodes failed for %u for files with code %d\n", + V_id(vol), code); + rx_Write(m->call, m->line, strlen(m->line)); + return code; + } + if (verbose) { + sprintf(m->line, "5th step: find all small vnodes belonging to the subtree under %u \"%s\"\n", + where, name); + rx_Write(m->call, m->line, strlen(m->line)); + } + FindVnodes(m, where, fileList, fl, dirList, dl, &filesNeeded, 0); + + /* + * Third step: create hard links for all files needed + * + */ + + V_destroyMe(newvol) = DESTROY_ME; + V_inService(newvol) = 0; + if (verbose) { + sprintf(m->line, "6th step: create hard links in the AFSIDat tree between files of the old and new volume\n"); + rx_Write(m->call, m->line, strlen(m->line)); + } + code = copyVnodes(m, vol, newvol, 1, fileList, fl, where, &blocks, 0); + if (code) { + sprintf(m->line, "copyVnodes for files failed with code %d\n", code); + rx_Write(m->call, m->line, strlen(m->line)); + return code; + } + + /* + * Forth step: create hard links for all directories and copy + * split directory to new root directory + */ + + if (verbose) { + sprintf(m->line, "7th step: create hard links in the AFSIDat tree between directories of the old and new volume and make dir %u to new volume's root directory.\n", + where); + rx_Write(m->call, m->line, strlen(m->line)); + } + code = copyVnodes(m, vol, newvol, 0, dirList, dl, where, &blocks, parVnode); + if (code) { + sprintf(m->line, "copyVnodes for directories failed with code %d\n", code); + rx_Write(m->call, m->line, strlen(m->line)); + return code; + } + + /* + * Finalize new volume + * + */ + if (verbose) { + sprintf(m->line, "8th step: write new volume's metadata to disk\n"); + rx_Write(m->call, m->line, strlen(m->line)); + } + + V_diskused(newvol) = blocks; + /*V_osdFlag(newvol) = V_osdFlag(vol);*/ + V_filecount(newvol) = filesNeeded + dirsNeeded; + V_destroyMe(newvol) = 0; + V_maxquota(newvol) = V_maxquota(vol); + V_uniquifier(newvol) = V_uniquifier(vol); + V_inService(newvol) = 1; + VUpdateVolume(&code, newvol); + + /* + * Sixth step: change directory entry in old volume: + * rename old tree and create mount point for new volume. + */ + if (verbose) { + sprintf(m->line, "9th step: create mountpoint \"%s\" for new volume in old volume's directory %u.\n", name, parent); + rx_Write(m->call, m->line, strlen(m->line)); + } + + code = createMountpoint(vol, newvol, parVnode, parent, rootVnode, name); + if (code) { + sprintf(m->line, "createMountpoint failed with code %d\n", code); + rx_Write(m->call, m->line, strlen(m->line)); + return code; + } + /* + * Now both volumes should be ready and consistent, but the old volume + * contains still the vnodes and data we transferred into the new one. + * Delete orphaned vnodes and data. + */ + + blocks = 0; + if (verbose) { + sprintf(m->line, "10th step: delete large vnodes belonging to subtree in the old volume.\n"); + rx_Write(m->call, m->line, strlen(m->line)); + } + deleteVnodes(vol, vLarge, dirList, dl, &blocks); + if (verbose) { + sprintf(m->line, "11th step: delete small vnodes belonging to subtree in the old volume.\n"); + rx_Write(m->call, m->line, strlen(m->line)); + } + deleteVnodes(vol, vSmall, fileList, fl, &blocks); + V_diskused(vol) -= blocks; + V_filecount(vol) -= (filesNeeded + dirsNeeded + 1); + VUpdateVolume(&code, vol); + + sprintf(m->line, "Finished!\n"); + rx_Write(m->call, m->line, strlen(m->line)); + m->line[0] = 0; + m->line[1] = 0; + m->line[2] = 0; + m->line[3] = 0; + rx_Write(m->call, m->line, 4); + free(m); + return code; +} diff --git a/src/volser/volint.xg b/src/volser/volint.xg index 5125f3a..9ce8b6f 100644 --- a/src/volser/volint.xg +++ b/src/volser/volint.xg @@ -51,6 +51,15 @@ statindex 16 #define VOLGETSIZE 65537 #define VOLDUMPV2 65538 #define VOLDISKPART64 65539 +#define VOLOSDSUPPORT 65540 +#define VOLTRAVERSE 65541 +#define VOLWIPECAND 65542 +#define OLDVOLSALVAGE 65543 +#define VOLARCHCANDold 65544 +#define VOLSALVAGE 65545 +#define VOLLISTOBJECTS 65546 +#define VOLSPLIT 65547 +#define VOLARCHCAND 65548 /* Bits for flags for DumpV2 */ %#define VOLDUMPV2_OMITDIRS 1 @@ -434,3 +443,10 @@ proc PartitionInfo64( IN string name<>, OUT struct diskPartition64 *partition ) = VOLDISKPART64; + +proc SplitVolume ( + IN afs_uint32 vid, + IN afs_uint32 new, + IN afs_uint32 where, + IN afs_int32 verbose +) split = VOLSPLIT; diff --git a/src/volser/volprocs.c b/src/volser/volprocs.c index 9d0024c..5dc071f 100644 --- a/src/volser/volprocs.c +++ b/src/volser/volprocs.c @@ -2918,6 +2918,87 @@ SAFSVolGetSize(struct rx_call *acid, afs_int32 fromTrans, afs_int32 fromDate, return code; } +afs_int32 +SAFSVolSplitVolume(struct rx_call *acall, afs_uint32 vid, afs_uint32 new, + afs_uint32 where, afs_int32 verbose) +{ + afs_int32 code, code2; + Volume *vol=0, *newvol=0; + struct volser_trans *tt = 0, *tt2 = 0; + char caller[MAXKTCNAMELEN]; + char line[128]; + + if (!afsconf_SuperUser(tdir, acall, caller)) + return EPERM; + + vol = VAttachVolume(&code, vid, V_VOLUPD); + if (!vol) { + if (!code) + code = ENOENT; + return code; + } + newvol = VAttachVolume(&code, new, V_VOLUPD); + if (!newvol) { + VDetachVolume(&code2, vol); + if (!code) + code = ENOENT; + return code; + } + if (V_device(vol) != V_device(newvol) + || V_uniquifier(newvol) != 2) { + if (V_device(vol) != V_device(newvol)) { + sprintf(line, "Volumes %u and %u are not in the same partition, aborted.\n", + vid, new); + rx_Write(acall, line, strlen(line)); + } + if (V_uniquifier(newvol) != 2) { + sprintf(line, "Volume %u is not freshly created, aborted.\n", new); + rx_Write(acall, line, strlen(line)); + } + line[0] = 0; + rx_Write(acall, line, 1); + VDetachVolume(&code2, vol); + VDetachVolume(&code2, newvol); + return EINVAL; + } + tt = NewTrans(vid, V_device(vol)); + if (!tt) { + sprintf(line, "Couldn't create transaction for %u, aborted.\n", vid); + rx_Write(acall, line, strlen(line)); + line[0] = 0; + rx_Write(acall, line, 1); + VDetachVolume(&code2, vol); + VDetachVolume(&code2, newvol); + return VOLSERVOLBUSY; + } + tt->iflags = ITBusy; + tt->vflags = 0; + strcpy(tt->lastProcName, "SplitVolume"); + + tt2 = NewTrans(new, V_device(newvol)); + if (!tt2) { + sprintf(line, "Couldn't create transaction for %u, aborted.\n", new); + rx_Write(acall, line, strlen(line)); + line[0] = 0; + rx_Write(acall, line, 1); + DeleteTrans(tt, 1); + VDetachVolume(&code2, vol); + VDetachVolume(&code2, newvol); + return VOLSERVOLBUSY; + } + tt2->iflags = ITBusy; + tt2->vflags = 0; + strcpy(tt2->lastProcName, "SplitVolume"); + + code = split_volume(acall, vol, newvol, where, verbose); + + VDetachVolume(&code2, vol); + DeleteTrans(tt, 1); + VDetachVolume(&code2, newvol); + DeleteTrans(tt2, 1); + return code; +} + /* GetPartName - map partid (a decimal number) into pname (a string) * Since for NT we actually want to return the drive name, we map through the * partition struct.