2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
16 /* Clone a volume. Assumes the new volume is already created */
18 #include <afsconfig.h>
19 #include <afs/param.h>
29 #include <afs/afsint.h>
30 #include <afs/afssyscalls.h>
38 #include "partition.h"
39 #include "viceinode.h"
40 #include "vol_prototypes.h"
43 int (*vol_PollProc) (void) = 0; /* someone must init this */
45 #define ERROR_EXIT(code) do { \
50 /* parameters for idec call - this could just be an IHandle_t, but leaving
51 * open the possibility of decrementing the special files as well.
58 #define CLONE_MAXITEMS 100
60 struct clone_items *next;
62 Inode data[CLONE_MAXITEMS];
66 struct clone_items *first;
67 struct clone_items *last;
70 void CloneVolume(Error *, Volume *, Volume *, Volume *);
73 ci_AddItem(struct clone_head *ah, Inode aino)
75 struct clone_items *ti;
77 /* if no last elt (first call) or last item full, get a new one */
78 if ((!ah->last) || ah->last->nitems >= CLONE_MAXITEMS) {
79 ti = malloc(sizeof(struct clone_items));
81 Log("ci_AddItem: malloc failed\n");
82 osi_Panic("ci_AddItem: malloc failed\n");
85 ti->next = (struct clone_items *)0;
90 /* first dude in the list */
91 ah->first = ah->last = ti;
96 /* now ti points to the end of the list, to a clone_item with room
97 * for at least one more element. Add it.
99 ti->data[ti->nitems++] = aino;
103 /* initialize a clone header */
105 ci_InitHead(struct clone_head *ah)
107 memset(ah, 0, sizeof(*ah));
111 /* apply a function to all dudes in the set */
113 ci_Apply(struct clone_head *ah, int (*aproc) (Inode, void *), void *arock)
115 struct clone_items *ti;
118 for (ti = ah->first; ti; ti = ti->next) {
119 for (i = 0; i < ti->nitems; i++) {
120 (*aproc) (ti->data[i], arock);
126 /* free all dudes in the list */
128 ci_Destroy(struct clone_head *ah)
130 struct clone_items *ti, *ni;
132 for (ti = ah->first; ti; ti = ni) {
133 ni = ti->next; /* guard against freeing */
140 IDecProc(Inode adata, void *arock)
142 struct clone_rock *aparm = (struct clone_rock *)arock;
143 IH_DEC(aparm->h, adata, aparm->vol);
149 DoCloneIndex(Volume * rwvp, Volume * clvp, VnodeClass class, int reclone)
151 afs_int32 code, error = 0;
152 FdHandle_t *rwFd = 0, *clFdIn = 0, *clFdOut = 0;
153 StreamHandle_t *rwfile = 0, *clfilein = 0, *clfileout = 0;
154 IHandle_t *rwH = 0, *clHin = 0, *clHout = 0;
155 char buf[SIZEOF_LARGEDISKVNODE], dbuf[SIZEOF_LARGEDISKVNODE];
156 struct VnodeDiskObject *rwvnode = (struct VnodeDiskObject *)buf;
157 struct VnodeDiskObject *clvnode = (struct VnodeDiskObject *)dbuf;
160 struct clone_head decHead;
161 struct clone_rock decRock;
162 afs_foff_t offset = 0;
163 afs_int32 dircloned, inodeinced;
164 afs_int32 filecount = 0, diskused = 0;
167 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
169 * The fileserver's -readonly switch should make this false, but we
170 * have no useful way to know in the volserver.
171 * This doesn't make client data mutable.
173 int ReadWriteOriginal = 1;
175 /* Correct number of files in volume: this assumes indexes are always
176 cloned starting with vLarge */
177 if (ReadWriteOriginal && class != vLarge) {
178 filecount = V_filecount(rwvp);
179 diskused = V_diskused(rwvp);
182 /* Initialize list of inodes to nuke - must do this before any calls
183 * to ERROR_EXIT, as the error handler requires an initialised list
185 ci_InitHead(&decHead);
186 decRock.h = V_linkHandle(rwvp);
187 decRock.vol = V_parentId(rwvp);
189 /* Open the RW volume's index file and seek to beginning */
190 IH_COPY(rwH, rwvp->vnodeIndex[class].handle);
194 rwfile = FDH_FDOPEN(rwFd, ReadWriteOriginal ? "r+" : "r");
197 STREAM_ASEEK(rwfile, vcp->diskSize); /* Will fail if no vnodes */
199 /* Open the clone volume's index file and seek to beginning */
200 IH_COPY(clHout, clvp->vnodeIndex[class].handle);
201 clFdOut = IH_OPEN(clHout);
204 clfileout = FDH_FDOPEN(clFdOut, "a");
207 code = STREAM_ASEEK(clfileout, vcp->diskSize);
211 /* If recloning, open the new volume's index; this time for
212 * reading. We never read anything that we're simultaneously
213 * writing, so this all works.
216 IH_COPY(clHin, clvp->vnodeIndex[class].handle);
217 clFdIn = IH_OPEN(clHin);
220 clfilein = FDH_FDOPEN(clFdIn, "r");
223 STREAM_ASEEK(clfilein, vcp->diskSize); /* Will fail if no vnodes */
226 /* Read each vnode in the old volume's index file */
227 for (offset = vcp->diskSize;
228 STREAM_READ(rwvnode, vcp->diskSize, 1, rwfile) == 1;
229 offset += vcp->diskSize) {
230 dircloned = inodeinced = 0;
232 /* If we are recloning the volume, read the corresponding vnode
233 * from the clone and determine its inode number.
235 if (reclone && !STREAM_EOF(clfilein)
236 && (STREAM_READ(clvnode, vcp->diskSize, 1, clfilein) == 1)) {
237 clinode = VNDISK_GET_INO(clvnode);
242 if (rwvnode->type != vNull) {
245 if (rwvnode->vnodeMagic != vcp->magic)
247 rwinode = VNDISK_GET_INO(rwvnode);
249 VNDISK_GET_LEN(ll, rwvnode);
250 diskused += nBlocks(ll);
252 /* Increment the inode if not already */
253 if (clinode && (clinode == rwinode)) {
254 clinode = 0; /* already cloned - don't delete later */
255 } else if (rwinode) {
256 if (IH_INC(V_linkHandle(rwvp), rwinode, V_parentId(rwvp)) ==
258 Log("IH_INC failed: %"AFS_PTR_FMT", %s, %u errno %d\n",
259 V_linkHandle(rwvp), PrintInode(stmp, rwinode),
260 V_parentId(rwvp), errno);
267 /* If a directory, mark vnode in old volume as cloned */
268 if ((rwvnode->type == vDirectory) && ReadWriteOriginal) {
271 * It is my firmly held belief that immediately after
272 * copy-on-write, the two directories can be identical.
273 * If the new copy is changed (presumably, that is the very
274 * next thing that will happen) then the dataVersion will
277 /* NOTE: the dataVersion++ is incredibly important!!!.
278 * This will cause the inode created by the file server
279 * on copy-on-write to be stamped with a dataVersion bigger
280 * than the current one. The salvager will then do the
282 rwvnode->dataVersion++;
285 code = STREAM_ASEEK(rwfile, offset);
288 code = STREAM_WRITE(rwvnode, vcp->diskSize, 1, rwfile);
292 code = STREAM_ASEEK(rwfile, offset + vcp->diskSize);
296 rwvnode->dataVersion--; /* Really needs to be set to the value in the inode,
297 * for the read-only volume */
302 /* Overwrite the vnode entry in the clone volume */
304 code = STREAM_WRITE(rwvnode, vcp->diskSize, 1, clfileout);
307 /* Couldn't clone, go back and decrement the inode's link count */
309 if (IH_DEC(V_linkHandle(rwvp), rwinode, V_parentId(rwvp)) ==
311 Log("IH_DEC failed: %"AFS_PTR_FMT", %s, %u errno %d\n",
312 V_linkHandle(rwvp), PrintInode(stmp, rwinode),
313 V_parentId(rwvp), errno);
318 /* And if the directory was marked clone, unmark it */
321 if (STREAM_ASEEK(rwfile, offset) != -1)
322 (void)STREAM_WRITE(rwvnode, vcp->diskSize, 1, rwfile);
327 /* Removal of the old cloned inode */
329 ci_AddItem(&decHead, clinode); /* just queue it */
334 if (STREAM_ERROR(clfileout))
337 /* Clean out any junk at end of clone file */
339 STREAM_ASEEK(clfilein, offset);
340 while (STREAM_READ(clvnode, vcp->diskSize, 1, clfilein) == 1) {
341 if (clvnode->type != vNull && VNDISK_GET_INO(clvnode) != 0) {
342 ci_AddItem(&decHead, VNDISK_GET_INO(clvnode));
348 /* come here to finish up. If code is non-zero, we've already run into problems,
349 * and shouldn't do the idecs.
353 STREAM_CLOSE(rwfile);
355 STREAM_CLOSE(clfilein);
357 STREAM_CLOSE(clfileout);
373 /* Next, we sync the disk. We have to reopen in case we're truncating,
374 * since we were using stdio above, and don't know when the buffers
375 * would otherwise be flushed. There's no stdio fftruncate call.
377 rwFd = IH_OPEN(clvp->vnodeIndex[class].handle);
383 /* If doing a reclone, we're keeping the clone. We need to
384 * truncate the file to offset bytes.
386 if (reclone && !error) {
387 error = FDH_TRUNC(rwFd, offset);
390 (void)FDH_SYNC(rwFd);
394 /* Now finally do the idec's. At this point, all potential
395 * references have been cleaned up and sent to the disk
396 * (see above fclose and fsync). No matter what happens, we
397 * no longer need to keep these references around.
399 code = ci_Apply(&decHead, IDecProc, (char *)&decRock);
402 ci_Destroy(&decHead);
404 if (ReadWriteOriginal && filecount > 0)
405 V_filecount(rwvp) = filecount;
406 if (ReadWriteOriginal && diskused > 0)
407 V_diskused(rwvp) = diskused;
412 CloneVolume(Error * rerror, Volume * original, Volume * new, Volume * old)
414 afs_int32 code, error = 0;
416 afs_int32 filecount = V_filecount(original), diskused = V_diskused(original);
419 reclone = ((new == old) ? 1 : 0);
421 code = DoCloneIndex(original, new, vLarge, reclone);
424 code = DoCloneIndex(original, new, vSmall, reclone);
427 if (filecount != V_filecount(original) || diskused != V_diskused(original))
428 Log("Clone %u: filecount %d -> %d diskused %d -> %d\n",
429 V_id(original), filecount, V_filecount(original), diskused, V_diskused(original));
431 code = CopyVolumeHeader(&V_disk(original), &V_disk(new));