2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
16 /* Clone a volume. Assumes the new volume is already created */
18 #include <afsconfig.h>
19 #include <afs/param.h>
23 #include <sys/types.h>
25 #ifdef AFS_PTHREAD_ENV
27 #else /* AFS_PTHREAD_ENV */
28 #include <afs/assert.h>
29 #endif /* AFS_PTHREAD_ENV */
44 #include <afs/afsint.h>
48 #include <afs/afssyscalls.h>
52 #include "partition.h"
53 #include "viceinode.h"
55 int (*vol_PollProc)() = 0; /* someone must init this */
57 #define ERROR_EXIT(code) {error = code; goto error_exit;}
59 /* parameters for idec call - this could just be an IHandle_t, but leaving
60 * open the possibility of decrementing the special files as well.
67 #define CLONE_MAXITEMS 100
69 struct clone_items *next;
71 Inode data[CLONE_MAXITEMS];
75 struct clone_items *first;
76 struct clone_items *last;
82 static ci_AddItem(ah, aino)
84 struct clone_head *ah; {
85 register struct clone_items *ti;
87 /* if no last elt (first call) or last item full, get a new one */
88 if ((!ah->last) || ah->last->nitems >= CLONE_MAXITEMS) {
89 ti = (struct clone_items *) malloc(sizeof(struct clone_items));
91 ti->next = (struct clone_items *) 0;
97 /* first dude in the list */
98 ah->first = ah->last = ti;
103 /* now ti points to the end of the list, to a clone_item with room
104 * for at least one more element. Add it.
106 ti->data[ti->nitems++] = aino;
110 /* initialize a clone header */
111 int ci_InitHead(struct clone_head *ah)
113 memset(ah, 0, sizeof(*ah));
117 /* apply a function to all dudes in the set */
118 int ci_Apply(struct clone_head *ah, int (*aproc)(), char *arock)
120 register struct clone_items *ti;
123 for(ti=ah->first; ti; ti=ti->next) {
124 for(i=0; i<ti->nitems; i++) {
125 (*aproc)(ti->data[i], arock);
131 /* free all dudes in the list */
132 int ci_Destroy(struct clone_head *ah)
134 register struct clone_items *ti, *ni;
136 for(ti=ah->first; ti; ti=ni) {
137 ni = ti->next; /* guard against freeing */
143 static IDecProc(adata, aparm)
145 struct clone_rock *aparm; {
146 IH_DEC(aparm->h, adata, aparm->vol);
151 afs_int32 DoCloneIndex(rwvp, clvp, class, reclone)
152 Volume *rwvp; /* The RW volume */
153 Volume *clvp; /* The cloned volume */
155 int reclone; /* Whether to reclone or not */
157 afs_int32 code, error=0;
158 FdHandle_t *rwFd=0, *clFdIn=0, *clFdOut=0;
159 StreamHandle_t *rwfile=0, *clfilein=0, *clfileout=0;
160 IHandle_t *rwH=0, *clHin=0, *clHout=0;
161 char buf[SIZEOF_LARGEDISKVNODE], dbuf[SIZEOF_LARGEDISKVNODE];
162 struct VnodeDiskObject *rwvnode = (struct VnodeDiskObject *) buf;
163 struct VnodeDiskObject *clvnode = (struct VnodeDiskObject *) dbuf;
164 Inode rwinode, clinode;
165 struct clone_head decHead;
166 struct clone_rock decRock;
167 afs_int32 offset, dircloned, inodeinced;
169 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
170 int ReadWriteOriginal = VolumeWriteable(rwvp);
171 struct DiskPartition *partition = rwvp->partition;
172 Device device = rwvp->device;
174 /* Open the RW volume's index file and seek to beginning */
175 IH_COPY(rwH, rwvp->vnodeIndex[class].handle);
177 if (!rwFd) ERROR_EXIT(EIO);
178 rwfile = FDH_FDOPEN(rwFd, ReadWriteOriginal? "r+":"r");
179 if (!rwfile) ERROR_EXIT(EIO);
180 STREAM_SEEK(rwfile, vcp->diskSize, 0); /* Will fail if no vnodes */
182 /* Open the clone volume's index file and seek to beginning */
183 IH_COPY(clHout, clvp->vnodeIndex[class].handle);
184 clFdOut = IH_OPEN(clHout);
185 if (!clFdOut) ERROR_EXIT(EIO);
186 clfileout = FDH_FDOPEN(clFdOut, "a");
187 if (!clfileout) ERROR_EXIT(EIO);
188 code = STREAM_SEEK(clfileout, vcp->diskSize, 0);
189 if (code) ERROR_EXIT(EIO);
191 /* If recloning, open the new volume's index; this time for
192 * reading. We never read anything that we're simultaneously
193 * writing, so this all works.
196 IH_COPY(clHin, clvp->vnodeIndex[class].handle);
197 clFdIn = IH_OPEN(clHin);
198 if (!clFdIn) ERROR_EXIT(EIO);
199 clfilein = FDH_FDOPEN(clFdIn, "r");
200 if (!clfilein) ERROR_EXIT(EIO);
201 STREAM_SEEK(clfilein, vcp->diskSize, 0); /* Will fail if no vnodes */
204 /* Initialize list of inodes to nuke */
205 ci_InitHead(&decHead);
206 decRock.h = V_linkHandle(rwvp);
207 decRock.vol = V_parentId(rwvp);
209 /* Read each vnode in the old volume's index file */
210 for (offset=vcp->diskSize;
211 STREAM_READ(rwvnode,vcp->diskSize,1,rwfile) == 1;
212 offset+=vcp->diskSize) {
213 dircloned = inodeinced = 0;
215 /* If we are recloning the volume, read the corresponding vnode
216 * from the clone and determine its inode number.
218 if ( reclone && !STREAM_EOF(clfilein) &&
219 (STREAM_READ(clvnode, vcp->diskSize, 1, clfilein) == 1) ) {
220 clinode = VNDISK_GET_INO(clvnode);
225 if (rwvnode->type != vNull) {
226 if (rwvnode->vnodeMagic != vcp->magic) ERROR_EXIT(-1);
227 rwinode = VNDISK_GET_INO(rwvnode);
229 /* Increment the inode if not already */
230 if (clinode && (clinode == rwinode)) {
231 clinode = 0; /* already cloned - don't delete later */
232 } else if (rwinode) {
233 assert(IH_INC(V_linkHandle(rwvp), rwinode, V_parentId(rwvp)) != -1);
237 /* If a directory, mark vnode in old volume as cloned */
238 if ((rwvnode->type == vDirectory) && ReadWriteOriginal) {
241 * It is my firmly held belief that immediately after
242 * copy-on-write, the two directories can be identical.
243 * If the new copy is changed (presumably, that is the very
244 * next thing that will happen) then the dataVersion will
247 /* NOTE: the dataVersion++ is incredibly important!!!.
248 This will cause the inode created by the file server
249 on copy-on-write to be stamped with a dataVersion bigger
250 than the current one. The salvager will then do the
252 rwvnode->dataVersion++;
255 code = STREAM_SEEK(rwfile, offset, 0);
256 if (code == -1) goto clonefailed;
257 code = STREAM_WRITE(rwvnode, vcp->diskSize, 1, rwfile);
258 if (code != 1) goto clonefailed;
260 code = STREAM_SEEK(rwfile, offset + vcp->diskSize, 0);
261 if (code == -1) goto clonefailed;
263 rwvnode->dataVersion--; /* Really needs to be set to the value in the inode,
264 for the read-only volume */
269 /* Overwrite the vnode etnry in the clone volume */
271 code = STREAM_WRITE(rwvnode, vcp->diskSize, 1, clfileout);
274 /* Couldn't clone, go back and decrement the inode's link count */
276 assert(IH_DEC(V_linkHandle(rwvp), rwinode, V_parentId(rwvp)) != -1);
278 /* And if the directory was marked clone, unmark it */
281 if (STREAM_SEEK(rwfile, offset, 0) != -1)
282 STREAM_WRITE(rwvnode, vcp->diskSize, 1, rwfile);
287 /* Removal of the old cloned inode */
289 ci_AddItem(&decHead, clinode); /* just queue it */
294 if (STREAM_ERROR(clfileout)) ERROR_EXIT(EIO);
296 /* Clean out any junk at end of clone file */
298 STREAM_SEEK(clfilein, offset, 0);
299 while (STREAM_READ(clvnode, vcp->diskSize, 1, clfilein) == 1) {
300 if (clvnode->type != vNull && VNDISK_GET_INO(clvnode) != 0) {
301 ci_AddItem(&decHead, VNDISK_GET_INO(clvnode));
307 /* come here to finish up. If code is non-zero, we've already run into problems,
308 * and shouldn't do the idecs.
311 if (rwfile) STREAM_CLOSE(rwfile);
312 if (clfilein) STREAM_CLOSE(clfilein);
313 if (clfileout) STREAM_CLOSE(clfileout);
315 if (rwFd) FDH_CLOSE(rwFd);
316 if (clFdIn) FDH_CLOSE(clFdIn);
317 if (clFdOut) FDH_CLOSE(clFdOut);
319 if (rwH) IH_RELEASE(rwH);
320 if (clHout) IH_RELEASE(clHout);
321 if (clHin) IH_RELEASE(clHin);
323 /* Next, we sync the disk. We have to reopen in case we're truncating,
324 * since we were using stdio above, and don't know when the buffers
325 * would otherwise be flushed. There's no stdio fftruncate call.
327 rwFd = IH_OPEN(clvp->vnodeIndex[class].handle);
329 if (!error) error = EIO;
332 /* If doing a reclone, we're keeping the clone. We need to
333 * truncate the file to offset bytes.
335 if (reclone && !error) {
336 error = FDH_TRUNC(rwFd, offset);
343 /* Now finally do the idec's. At this point, all potential
344 * references have been cleaned up and sent to the disk
345 * (see above fclose and fsync). No matter what happens, we
346 * no longer need to keep these references around.
348 code = ci_Apply(&decHead, IDecProc, (char *) &decRock);
349 if (!error) error = code;
350 ci_Destroy(&decHead);
356 CloneVolume(error, original, new, old)
358 Volume *original, *new, *old;
361 CloneVolume_r(error, original, new, old);
366 CloneVolume_r(rerror, original, new, old)
368 Volume *original, *new, *old;
370 afs_int32 code, error=0;
374 reclone = ((new == old) ? 1 : 0);
376 code = DoCloneIndex(original, new, vLarge, reclone);
377 if (code) ERROR_EXIT(code);
378 code = DoCloneIndex(original, new, vSmall, reclone);
379 if (code) ERROR_EXIT(code);
381 code = CopyVolumeHeader_r(&V_disk(original), &V_disk(new));
382 if (code) ERROR_EXIT(code);