Magellan Linux

Contents of /trunk/kernel26-magellan/patches-2.6.21-r15/0153-2.6.21-unionfs-2.1.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 388 - (show annotations) (download)
Thu Oct 25 23:11:24 2007 UTC (16 years, 6 months ago) by niro
File size: 318435 byte(s)
-added 2.6.21-r15

1 diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
2 index 5717858..2ef035e 100644
3 --- a/Documentation/filesystems/00-INDEX
4 +++ b/Documentation/filesystems/00-INDEX
5 @@ -84,6 +84,8 @@ udf.txt
6 - info and mount options for the UDF filesystem.
7 ufs.txt
8 - info on the ufs filesystem.
9 +unionfs/
10 + - info on the unionfs filesystem
11 vfat.txt
12 - info on using the VFAT filesystem used in Windows NT and Windows 95
13 vfs.txt
14 diff --git a/Documentation/filesystems/unionfs/00-INDEX b/Documentation/filesystems/unionfs/00-INDEX
15 new file mode 100644
16 index 0000000..96fdf67
17 --- /dev/null
18 +++ b/Documentation/filesystems/unionfs/00-INDEX
19 @@ -0,0 +1,10 @@
20 +00-INDEX
21 + - this file.
22 +concepts.txt
23 + - A brief introduction of concepts.
24 +issues.txt
25 + - A summary of known issues with unionfs.
26 +rename.txt
27 + - Information regarding rename operations.
28 +usage.txt
29 + - Usage information and examples.
30 diff --git a/Documentation/filesystems/unionfs/concepts.txt b/Documentation/filesystems/unionfs/concepts.txt
31 new file mode 100644
32 index 0000000..eb74aac
33 --- /dev/null
34 +++ b/Documentation/filesystems/unionfs/concepts.txt
35 @@ -0,0 +1,181 @@
36 +Unionfs 2.0 CONCEPTS:
37 +=====================
38 +
39 +This file describes the concepts needed by a namespace unification file
40 +system.
41 +
42 +
43 +Branch Priority:
44 +================
45 +
46 +Each branch is assigned a unique priority - starting from 0 (highest
47 +priority). No two branches can have the same priority.
48 +
49 +
50 +Branch Mode:
51 +============
52 +
53 +Each branch is assigned a mode - read-write or read-only. This allows
54 +directories on media mounted read-write to be used in a read-only manner.
55 +
56 +
57 +Whiteouts:
58 +==========
59 +
60 +A whiteout removes a file name from the namespace. Whiteouts are needed when
61 +one attempts to remove a file on a read-only branch.
62 +
63 +Suppose we have a two-branch union, where branch 0 is read-write and branch
64 +1 is read-only. And a file 'foo' on branch 1:
65 +
66 +./b0/
67 +./b1/
68 +./b1/foo
69 +
70 +The unified view would simply be:
71 +
72 +./union/
73 +./union/foo
74 +
75 +Since 'foo' is stored on a read-only branch, it cannot be removed. A
76 +whiteout is used to remove the name 'foo' from the unified namespace. Again,
77 +since branch 1 is read-only, the whiteout cannot be created there. So, we
78 +try on a higher priority (lower numerically) branch and create the whiteout
79 +there.
80 +
81 +./b0/
82 +./b0/.wh.foo
83 +./b1/
84 +./b1/foo
85 +
86 +Later, when Unionfs traverses branches (due to lookup or readdir), it
87 +eliminate 'foo' from the namespace (as well as the whiteout itself.)
88 +
89 +
90 +Duplicate Elimination:
91 +======================
92 +
93 +It is possible for files on different branches to have the same name.
94 +Unionfs then has to select which instance of the file to show to the user.
95 +Given the fact that each branch has a priority associated with it, the
96 +simplest solution is to take the instance from the highest priority
97 +(numerically lowest value) and "hide" the others.
98 +
99 +
100 +Copyup:
101 +=======
102 +
103 +When a change is made to the contents of a file's data or meta-data, they
104 +have to be stored somewhere. The best way is to create a copy of the
105 +original file on a branch that is writable, and then redirect the write
106 +though to this copy. The copy must be made on a higher priority branch so
107 +that lookup and readdir return this newer "version" of the file rather than
108 +the original (see duplicate elimination).
109 +
110 +
111 +Cache Coherency:
112 +================
113 +
114 +Unionfs users often want to be able to modify files and directories directly
115 +on the lower branches, and have those changes be visible at the Unionfs
116 +level. This means that data (e.g., pages) and meta-data (dentries, inodes,
117 +open files, etc.) have to be synchronized between the upper and lower
118 +layers. In other words, the newest changes from a layer below have to be
119 +propagated to the Unionfs layer above. If the two layers are not in sync, a
120 +cache incoherency ensues, which could lead to application failures and even
121 +oopses. The Linux kernel, however, has a rather limited set of mechanisms
122 +to ensure this inter-layer cache coherency---so Unionfs has to do most of
123 +the hard work on its own.
124 +
125 +Maintaining Invariants:
126 +
127 +The way Unionfs ensures cache coherency is as follows. At each entry point
128 +to a Unionfs file system method, we call a utility function to validate the
129 +primary objects of this method. Generally, we call unionfs_file_revalidate
130 +on open files, and __Unionfs_d_revalidate_chain on dentries (which also
131 +validates inodes). These utility functions check to see whether the upper
132 +Unionfs object is in sync with any of the lower objects that it represents.
133 +The checks we perform include whether the Unionfs superblock has a newer
134 +generation number, or if any of the lower objects mtime's or ctime's are
135 +newer. (Note: generation numbers change when branch-management commands are
136 +issued, so in a way, maintaining cache coherency is also very important for
137 +branch-management.) If indeed we determine that any Unionfs object is no
138 +longer in sync with its lower counterparts, then we rebuild that object
139 +similarly to how we do so for branch-management.
140 +
141 +While rebuilding Unionfs's objects, we also purge any page mappings and
142 +truncate inode pages (see fs/Unionfs/dentry.c:purge_inode_data). This is to
143 +ensure that Unionfs will re-get the newer data from the lower branches. We
144 +perform this purging only if the Unionfs operation in question is a reading
145 +operation; if Unionfs is performing a data writing operation (e.g., ->write,
146 +->commit_write, etc.) then we do NOT flush the lower mappings/pages: this is
147 +because (1) a self-deadlock could occur and (2) the upper Unionfs pages are
148 +considered more authoritative anyway, as they are newer and will overwrite
149 +any lower pages.
150 +
151 +Unionfs maintains the following important invariant regarding mtime's,
152 +ctime's, and atime's: the upper inode object's times are the max() of all of
153 +the lower ones. For non-directory objects, there's only one object below,
154 +so the mapping is simple; for directory objects, there could me multiple
155 +lower objects and we have to sync up with the newest one of all the lower
156 +ones. This invariant is important to maintain, especially for directories
157 +(besides, we need this to be POSIX compliant). A union could comprise
158 +multiple writable branches, each of which could change. If we don't reflect
159 +the newest possible mtime/ctime, some applications could fail. For example,
160 +NFSv2/v3 exports check for newer directory mtimes on the server to determine
161 +if the client-side attribute cache should be purged.
162 +
163 +To maintain these important invariants, of course, Unionfs carefully
164 +synchronizes upper and lower times in various places. For example, if we
165 +copy-up a file to a top-level branch, the parent directory where the file
166 +was copied up to will now have a new mtime: so after a successful copy-up,
167 +we sync up with the new top-level branch's parent directory mtime.
168 +
169 +Implementation:
170 +
171 +This cache-coherency implementation is efficient because it defers any
172 +synchronizing between the upper and lower layers until absolutely needed.
173 +Consider the example a common situation where users perform a lot of lower
174 +changes, such as untarring a whole package. While these take place,
175 +typically the user doesn't access the files via Unionfs; only after the
176 +lower changes are done, does the user try to access the lower files. With
177 +our cache-coherency implementation, the entirety of the changes to the lower
178 +branches will not result in a single CPU cycle spent at the Unionfs level
179 +until the user invokes a system call that goes through Unionfs.
180 +
181 +We have considered two alternate cache-coherency designs. (1) Using the
182 +dentry/inode notify functionality to register interest in finding out about
183 +any lower changes. This is a somewhat limited and also a heavy-handed
184 +approach which could result in many notifications to the Unionfs layer upon
185 +each small change at the lower layer (imagine a file being modified multiple
186 +times in rapid succession). (2) Rewriting the VFS to support explicit
187 +callbacks from lower objects to upper objects. We began exploring such an
188 +implementation, but found it to be very complicated--it would have resulted
189 +in massive VFS/MM changes which are unlikely to be accepted by the LKML
190 +community. We therefore believe that our current cache-coherency design and
191 +implementation represent the best approach at this time.
192 +
193 +Limitations:
194 +
195 +Our implementation works in that as long as a user process will have caused
196 +Unionfs to be called, directly or indirectly, even to just do
197 +->d_revalidate; then we will have purged the current Unionfs data and the
198 +process will see the new data. For example, a process that continually
199 +re-reads the same file's data will see the NEW data as soon as the lower
200 +file had changed, upon the next read(2) syscall (even if the file is still
201 +open!) However, this doesn't work when the process re-reads the open file's
202 +data via mmap(2) (unless the user unmaps/closes the file and remaps/reopens
203 +it). Once we respond to ->readpage(s), then the kernel maps the page into
204 +the process's address space and there doesn't appear to be a way to force
205 +the kernel to invalidate those pages/mappings, and force the process to
206 +re-issue ->readpage. If there's a way to invalidate active mappings and
207 +force a ->readpage, let us know please (invalidate_inode_pages2 doesn't do
208 +the trick).
209 +
210 +Our current Unionfs code has to perform many file-revalidation calls. It
211 +would be really nice if the VFS would export an optional file system hook
212 +->file_revalidate (similarly to dentry->d_revalidate) that will be called
213 +before each VFS op that has a "struct file" in it.
214 +
215 +
216 +For more information, see <http://unionfs.filesystems.org/>.
217 diff --git a/Documentation/filesystems/unionfs/issues.txt b/Documentation/filesystems/unionfs/issues.txt
218 new file mode 100644
219 index 0000000..3644fea
220 --- /dev/null
221 +++ b/Documentation/filesystems/unionfs/issues.txt
222 @@ -0,0 +1,15 @@
223 +KNOWN Unionfs 2.0 ISSUES:
224 +=========================
225 +
226 +1. The NFS server returns -EACCES for read-only exports, instead of -EROFS.
227 + This means we can't reliably detect a read-only NFS export.
228 +
229 +2. Unionfs should not use lookup_one_len() on the underlying f/s as it
230 + confuses NFSv4. Currently, unionfs_lookup() passes lookup intents to the
231 + lower file-system, this eliminates part of the problem. The remaining
232 + calls to lookup_one_len may need to be changed to pass an intent. We are
233 + currently introducing VFS changes to fs/namei.c's do_path_lookup() to
234 + allow proper file lookup and opening in stackable file systems.
235 +
236 +
237 +For more information, see <http://unionfs.filesystems.org/>.
238 diff --git a/Documentation/filesystems/unionfs/rename.txt b/Documentation/filesystems/unionfs/rename.txt
239 new file mode 100644
240 index 0000000..e20bb82
241 --- /dev/null
242 +++ b/Documentation/filesystems/unionfs/rename.txt
243 @@ -0,0 +1,31 @@
244 +Rename is a complex beast. The following table shows which rename(2) operations
245 +should succeed and which should fail.
246 +
247 +o: success
248 +E: error (either unionfs or vfs)
249 +X: EXDEV
250 +
251 +none = file does not exist
252 +file = file is a file
253 +dir = file is a empty directory
254 +child= file is a non-empty directory
255 +wh = file is a directory containing only whiteouts; this makes it logically
256 + empty
257 +
258 + none file dir child wh
259 +file o o E E E
260 +dir o E o E o
261 +child X E X E X
262 +wh o E o E o
263 +
264 +
265 +Renaming directories:
266 +=====================
267 +
268 +Whenever a empty (either physically or logically) directory is being renamed,
269 +the following sequence of events should take place:
270 +
271 +1) Remove whiteouts from both source and destination directory
272 +2) Rename source to destination
273 +3) Make destination opaque to prevent anything under it from showing up
274 +
275 diff --git a/Documentation/filesystems/unionfs/usage.txt b/Documentation/filesystems/unionfs/usage.txt
276 new file mode 100644
277 index 0000000..c035243
278 --- /dev/null
279 +++ b/Documentation/filesystems/unionfs/usage.txt
280 @@ -0,0 +1,97 @@
281 +Unionfs is a stackable unification file system, which can appear to merge
282 +the contents of several directories (branches), while keeping their physical
283 +content separate. Unionfs is useful for unified source tree management,
284 +merged contents of split CD-ROM, merged separate software package
285 +directories, data grids, and more. Unionfs allows any mix of read-only and
286 +read-write branches, as well as insertion and deletion of branches anywhere
287 +in the fan-out. To maintain Unix semantics, Unionfs handles elimination of
288 +duplicates, partial-error conditions, and more.
289 +
290 +# mount -t unionfs -o branch-option[,union-options[,...]] none MOUNTPOINT
291 +
292 +The available branch-option for the mount command is:
293 +
294 + dirs=branch[=ro|=rw][:...]
295 +
296 +specifies a separated list of which directories compose the union.
297 +Directories that come earlier in the list have a higher precedence than
298 +those which come later. Additionally, read-only or read-write permissions of
299 +the branch can be specified by appending =ro or =rw (default) to each
300 +directory.
301 +
302 +Syntax:
303 +
304 + dirs=/branch1[=ro|=rw]:/branch2[=ro|=rw]:...:/branchN[=ro|=rw]
305 +
306 +Example:
307 +
308 + dirs=/writable_branch=rw:/read-only_branch=ro
309 +
310 +
311 +DYNAMIC BRANCH MANAGEMENT AND REMOUNTS
312 +======================================
313 +
314 +You can remount a union and change its overall mode, or reconfigure the
315 +branches, as follows.
316 +
317 +To downgrade a union from read-write to read-only:
318 +
319 +# mount -t unionfs -o remount,ro none MOUNTPOINT
320 +
321 +To upgrade a union from read-only to read-write:
322 +
323 +# mount -t unionfs -o remount,rw none MOUNTPOINT
324 +
325 +To delete a branch /foo, regardless where it is in the current union:
326 +
327 +# mount -t unionfs -o remount,del=/foo none MOUNTPOINT
328 +
329 +To insert (add) a branch /foo before /bar:
330 +
331 +# mount -t unionfs -o remount,add=/bar:/foo none MOUNTPOINT
332 +
333 +To insert (add) a branch /foo (with the "rw" mode flag) before /bar:
334 +
335 +# mount -t unionfs -o remount,add=/bar:/foo=rw none MOUNTPOINT
336 +
337 +To insert (add) a branch /foo (in "rw" mode) at the very beginning (i.e., a
338 +new highest-priority branch), you can use the above syntax, or use a short
339 +hand version as follows:
340 +
341 +# mount -t unionfs -o remount,add=/foo none MOUNTPOINT
342 +
343 +To append a branch to the very end (new lowest-priority branch):
344 +
345 +# mount -t unionfs -o remount,add=:/foo none MOUNTPOINT
346 +
347 +To append a branch to the very end (new lowest-priority branch), in
348 +read-only mode:
349 +
350 +# mount -t unionfs -o remount,add=:/foo:ro none MOUNTPOINT
351 +
352 +Finally, to change the mode of one existing branch, say /foo, from read-only
353 +to read-write, and change /bar from read-write to read-only:
354 +
355 +# mount -t unionfs -o remount,mode=/foo=rw,mode=/bar=ro none MOUNTPOINT
356 +
357 +
358 +CACHE CONSISTENCY
359 +=================
360 +
361 +If you modify any file on any of the lower branches directly, while there is
362 +a Unionfs 2.0 mounted above any of those branches, you should tell Unionfs
363 +to purge its caches and re-get the objects. To do that, you have to
364 +increment the generation number of the superblock using the following
365 +command:
366 +
367 +# mount -t unionfs -o remount,incgen none MOUNTPOINT
368 +
369 +Note that the older way of incrementing the generation number using an
370 +ioctl, is no longer supported in Unionfs 2.0. Ioctls in general are not
371 +encouraged. Plus, an ioctl is per-file concept, whereas the generation
372 +number is a per-file-system concept. Worse, such an ioctl requires an open
373 +file, which then has to be invalidated by the very nature of the generation
374 +number increase (read: the old generation increase ioctl was pretty racy).
375 +
376 +
377 +For more information, see <http://unionfs.filesystems.org/>.
378 diff --git a/MAINTAINERS b/MAINTAINERS
379 index 277877a..d694ced 100644
380 --- a/MAINTAINERS
381 +++ b/MAINTAINERS
382 @@ -3364,6 +3364,15 @@ L: linux-kernel@vger.kernel.org
383 W: http://www.kernel.dk
384 S: Maintained
385
386 +UNIONFS
387 +P: Erez Zadok
388 +M: ezk@cs.sunysb.edu
389 +P: Josef "Jeff" Sipek
390 +M: jsipek@cs.sunysb.edu
391 +L: unionfs@filesystems.org
392 +W: http://unionfs.filesystems.org
393 +S: Maintained
394 +
395 USB ACM DRIVER
396 P: Oliver Neukum
397 M: oliver@neukum.name
398 diff --git a/fs/Kconfig b/fs/Kconfig
399 index 3c4886b..ac4dc6d 100644
400 --- a/fs/Kconfig
401 +++ b/fs/Kconfig
402 @@ -1034,6 +1034,41 @@ config CONFIGFS_FS
403
404 endmenu
405
406 +menu "Layered filesystems"
407 +
408 +config ECRYPT_FS
409 + tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
410 + depends on EXPERIMENTAL && KEYS && CRYPTO && NET
411 + help
412 + Encrypted filesystem that operates on the VFS layer. See
413 + <file:Documentation/ecryptfs.txt> to learn more about
414 + eCryptfs. Userspace components are required and can be
415 + obtained from <http://ecryptfs.sf.net>.
416 +
417 + To compile this file system support as a module, choose M here: the
418 + module will be called ecryptfs.
419 +
420 +config UNION_FS
421 + tristate "Union file system (EXPERIMENTAL)"
422 + depends on EXPERIMENTAL
423 + help
424 + Unionfs is a stackable unification file system, which appears to
425 + merge the contents of several directories (branches), while keeping
426 + their physical content separate.
427 +
428 + See <http://unionfs.filesystems.org> for details
429 +
430 +config UNION_FS_XATTR
431 + bool "Unionfs extended attributes"
432 + depends on UNION_FS
433 + help
434 + Extended attributes are name:value pairs associated with inodes by
435 + the kernel or by users (see the attr(5) manual page).
436 +
437 + If unsure, say N.
438 +
439 +endmenu
440 +
441 menu "Miscellaneous filesystems"
442
443 config ADFS_FS
444 @@ -1086,18 +1121,6 @@ config AFFS_FS
445 To compile this file system support as a module, choose M here: the
446 module will be called affs. If unsure, say N.
447
448 -config ECRYPT_FS
449 - tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
450 - depends on EXPERIMENTAL && KEYS && CRYPTO && NET
451 - help
452 - Encrypted filesystem that operates on the VFS layer. See
453 - <file:Documentation/ecryptfs.txt> to learn more about
454 - eCryptfs. Userspace components are required and can be
455 - obtained from <http://ecryptfs.sf.net>.
456 -
457 - To compile this file system support as a module, choose M here: the
458 - module will be called ecryptfs.
459 -
460 config HFS_FS
461 tristate "Apple Macintosh file system support (EXPERIMENTAL)"
462 depends on BLOCK && EXPERIMENTAL
463 diff --git a/fs/Makefile b/fs/Makefile
464 index 9edf411..b490b1a 100644
465 --- a/fs/Makefile
466 +++ b/fs/Makefile
467 @@ -114,3 +114,4 @@ obj-$(CONFIG_HPPFS) += hppfs/
468 obj-$(CONFIG_DEBUG_FS) += debugfs/
469 obj-$(CONFIG_OCFS2_FS) += ocfs2/
470 obj-$(CONFIG_GFS2_FS) += gfs2/
471 +obj-$(CONFIG_UNION_FS) += unionfs/
472 diff --git a/fs/drop_caches.c b/fs/drop_caches.c
473 index 03ea769..6a7aa05 100644
474 --- a/fs/drop_caches.c
475 +++ b/fs/drop_caches.c
476 @@ -3,6 +3,7 @@
477 */
478
479 #include <linux/kernel.h>
480 +#include <linux/module.h>
481 #include <linux/mm.h>
482 #include <linux/fs.h>
483 #include <linux/writeback.h>
484 @@ -12,7 +13,7 @@
485 /* A global variable is a bit ugly, but it keeps the code simple */
486 int sysctl_drop_caches;
487
488 -static void drop_pagecache_sb(struct super_block *sb)
489 +void drop_pagecache_sb(struct super_block *sb)
490 {
491 struct inode *inode;
492
493 @@ -24,6 +25,7 @@ static void drop_pagecache_sb(struct super_block *sb)
494 }
495 spin_unlock(&inode_lock);
496 }
497 +EXPORT_SYMBOL(drop_pagecache_sb);
498
499 void drop_pagecache(void)
500 {
501 diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
502 index cb20b96..a8c1686 100644
503 --- a/fs/ecryptfs/dentry.c
504 +++ b/fs/ecryptfs/dentry.c
505 @@ -62,7 +62,7 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
506 struct inode *lower_inode =
507 ecryptfs_inode_to_lower(dentry->d_inode);
508
509 - fsstack_copy_attr_all(dentry->d_inode, lower_inode, NULL);
510 + fsstack_copy_attr_all(dentry->d_inode, lower_inode);
511 }
512 out:
513 return rc;
514 diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
515 index 1548be2..d37cc12 100644
516 --- a/fs/ecryptfs/inode.c
517 +++ b/fs/ecryptfs/inode.c
518 @@ -280,7 +280,9 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
519 int rc = 0;
520 struct dentry *lower_dir_dentry;
521 struct dentry *lower_dentry;
522 + struct dentry *dentry_save;
523 struct vfsmount *lower_mnt;
524 + struct vfsmount *mnt_save;
525 char *encoded_name;
526 unsigned int encoded_namelen;
527 struct ecryptfs_crypt_stat *crypt_stat = NULL;
528 @@ -308,9 +310,13 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
529 }
530 ecryptfs_printk(KERN_DEBUG, "encoded_name = [%s]; encoded_namelen "
531 "= [%d]\n", encoded_name, encoded_namelen);
532 - lower_dentry = lookup_one_len(encoded_name, lower_dir_dentry,
533 - encoded_namelen - 1);
534 + dentry_save = nd->dentry;
535 + mnt_save = nd->mnt;
536 + lower_dentry = lookup_one_len_nd(encoded_name, lower_dir_dentry,
537 + (encoded_namelen - 1), nd);
538 kfree(encoded_name);
539 + nd->mnt = mnt_save;
540 + nd->dentry = dentry_save;
541 if (IS_ERR(lower_dentry)) {
542 ecryptfs_printk(KERN_ERR, "ERR from lower_dentry\n");
543 rc = PTR_ERR(lower_dentry);
544 @@ -597,9 +603,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
545 lower_new_dir_dentry->d_inode, lower_new_dentry);
546 if (rc)
547 goto out_lock;
548 - fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL);
549 + fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
550 if (new_dir != old_dir)
551 - fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode, NULL);
552 + fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
553 out_lock:
554 unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
555 dput(lower_new_dentry->d_parent);
556 @@ -892,7 +898,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
557 }
558 rc = notify_change(lower_dentry, ia);
559 out:
560 - fsstack_copy_attr_all(inode, lower_inode, NULL);
561 + fsstack_copy_attr_all(inode, lower_inode);
562 return rc;
563 }
564
565 diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
566 index fc4a3a2..07c3a58 100644
567 --- a/fs/ecryptfs/main.c
568 +++ b/fs/ecryptfs/main.c
569 @@ -151,7 +151,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
570 d_add(dentry, inode);
571 else
572 d_instantiate(dentry, inode);
573 - fsstack_copy_attr_all(inode, lower_inode, NULL);
574 + fsstack_copy_attr_all(inode, lower_inode);
575 /* This size will be overwritten for real files w/ headers and
576 * other metadata */
577 fsstack_copy_inode_size(inode, lower_inode);
578 diff --git a/fs/namei.c b/fs/namei.c
579 index ee60cc4..436e9fa 100644
580 --- a/fs/namei.c
581 +++ b/fs/namei.c
582 @@ -1125,6 +1125,10 @@ static int fastcall do_path_lookup(int dfd, const char *name,
583 nd->mnt = mntget(fs->rootmnt);
584 nd->dentry = dget(fs->root);
585 read_unlock(&fs->lock);
586 + } else if (flags & LOOKUP_ONE) {
587 + /* nd->mnt and nd->dentry already set, just grab references */
588 + mntget(nd->mnt);
589 + dget(nd->dentry);
590 } else if (dfd == AT_FDCWD) {
591 read_lock(&fs->lock);
592 nd->mnt = mntget(fs->pwdmnt);
593 @@ -1293,29 +1297,37 @@ static struct dentry *lookup_hash(struct nameidata *nd)
594 }
595
596 /* SMP-safe */
597 -struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
598 +static inline int __lookup_one_len(const char *name, struct qstr *this, struct dentry *base, int len)
599 {
600 unsigned long hash;
601 - struct qstr this;
602 unsigned int c;
603
604 - this.name = name;
605 - this.len = len;
606 + this->name = name;
607 + this->len = len;
608 if (!len)
609 - goto access;
610 + return -EACCES;
611
612 hash = init_name_hash();
613 while (len--) {
614 c = *(const unsigned char *)name++;
615 if (c == '/' || c == '\0')
616 - goto access;
617 + return -EACCES;
618 hash = partial_name_hash(c, hash);
619 }
620 - this.hash = end_name_hash(hash);
621 + this->hash = end_name_hash(hash);
622 + return 0;
623 +}
624 +
625 +struct dentry *lookup_one_len_nd(const char *name, struct dentry *base,
626 + int len, struct nameidata *nd)
627 +{
628 + int err;
629 + struct qstr this;
630
631 - return __lookup_hash(&this, base, NULL);
632 -access:
633 - return ERR_PTR(-EACCES);
634 + err = __lookup_one_len(name, &this, base, len);
635 + if (err)
636 + return ERR_PTR(err);
637 + return __lookup_hash(&this, base, nd);
638 }
639
640 /*
641 @@ -2758,7 +2770,7 @@ EXPORT_SYMBOL(follow_up);
642 EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
643 EXPORT_SYMBOL(getname);
644 EXPORT_SYMBOL(lock_rename);
645 -EXPORT_SYMBOL(lookup_one_len);
646 +EXPORT_SYMBOL(lookup_one_len_nd);
647 EXPORT_SYMBOL(page_follow_link_light);
648 EXPORT_SYMBOL(page_put_link);
649 EXPORT_SYMBOL(page_readlink);
650 diff --git a/fs/stack.c b/fs/stack.c
651 index 67716f6..56fd0df 100644
652 --- a/fs/stack.c
653 +++ b/fs/stack.c
654 @@ -1,8 +1,20 @@
655 +/*
656 + * Copyright (c) 2003-2007 Erez Zadok
657 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
658 + * Copyright (c) 2003-2007 Stony Brook University
659 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
660 + *
661 + * This program is free software; you can redistribute it and/or modify
662 + * it under the terms of the GNU General Public License version 2 as
663 + * published by the Free Software Foundation.
664 + */
665 +
666 #include <linux/module.h>
667 #include <linux/fs.h>
668 #include <linux/fs_stack.h>
669
670 -/* does _NOT_ require i_mutex to be held.
671 +/*
672 + * does _NOT_ require i_mutex to be held.
673 *
674 * This function cannot be inlined since i_size_{read,write} is rather
675 * heavy-weight on 32-bit systems
676 @@ -14,11 +26,11 @@ void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
677 }
678 EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
679
680 -/* copy all attributes; get_nlinks is optional way to override the i_nlink
681 +/*
682 + * copy all attributes; get_nlinks is optional way to override the i_nlink
683 * copying
684 */
685 -void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
686 - int (*get_nlinks)(struct inode *))
687 +void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
688 {
689 dest->i_mode = src->i_mode;
690 dest->i_uid = src->i_uid;
691 @@ -29,14 +41,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
692 dest->i_ctime = src->i_ctime;
693 dest->i_blkbits = src->i_blkbits;
694 dest->i_flags = src->i_flags;
695 -
696 - /*
697 - * Update the nlinks AFTER updating the above fields, because the
698 - * get_links callback may depend on them.
699 - */
700 - if (!get_nlinks)
701 - dest->i_nlink = src->i_nlink;
702 - else
703 - dest->i_nlink = (*get_nlinks)(dest);
704 + dest->i_nlink = src->i_nlink;
705 }
706 EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
707 diff --git a/fs/unionfs/Makefile b/fs/unionfs/Makefile
708 new file mode 100644
709 index 0000000..020b505
710 --- /dev/null
711 +++ b/fs/unionfs/Makefile
712 @@ -0,0 +1,24 @@
713 +UNIONFS_VERSION="2.1 (for 2.6.21.6)"
714 +
715 +EXTRA_CFLAGS += -DUNIONFS_VERSION=\"$(UNIONFS_VERSION)\"
716 +
717 +obj-$(CONFIG_UNION_FS) += unionfs.o
718 +
719 +unionfs-y := subr.o dentry.o file.o inode.o main.o super.o \
720 + rdstate.o copyup.o dirhelper.o rename.o \
721 + unlink.o lookup.o commonfops.o dirfops.o sioq.o \
722 + mmap.o
723 +
724 +unionfs-$(CONFIG_UNION_FS_XATTR) += xattr.o
725 +
726 +# If you want debugging output, please uncomment the following line
727 +# or put your options in a separate file in linux-x.y.z/fs/unionfs/local.mk
728 +#CONFIG_UNIONFS_DEBUG=y
729 +
730 +# Allow users to override debug options in a separate file
731 +-include fs/unionfs/local.mk
732 +
733 +ifeq ($(CONFIG_UNIONFS_DEBUG),y)
734 +unionfs-y += debug.o
735 +EXTRA_CFLAGS += -DUNIONFS_DEBUG=1
736 +endif
737 diff --git a/fs/unionfs/commonfops.c b/fs/unionfs/commonfops.c
738 new file mode 100644
739 index 0000000..d77608e
740 --- /dev/null
741 +++ b/fs/unionfs/commonfops.c
742 @@ -0,0 +1,837 @@
743 +/*
744 + * Copyright (c) 2003-2007 Erez Zadok
745 + * Copyright (c) 2003-2006 Charles P. Wright
746 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
747 + * Copyright (c) 2005-2006 Junjiro Okajima
748 + * Copyright (c) 2005 Arun M. Krishnakumar
749 + * Copyright (c) 2004-2006 David P. Quigley
750 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
751 + * Copyright (c) 2003 Puja Gupta
752 + * Copyright (c) 2003 Harikesavan Krishnan
753 + * Copyright (c) 2003-2007 Stony Brook University
754 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
755 + *
756 + * This program is free software; you can redistribute it and/or modify
757 + * it under the terms of the GNU General Public License version 2 as
758 + * published by the Free Software Foundation.
759 + */
760 +
761 +#include "union.h"
762 +
763 +/*
764 + * 1) Copyup the file
765 + * 2) Rename the file to '.unionfs<original inode#><counter>' - obviously
766 + * stolen from NFS's silly rename
767 + */
768 +static int copyup_deleted_file(struct file *file, struct dentry *dentry,
769 + int bstart, int bindex)
770 +{
771 + static unsigned int counter;
772 + const int i_inosize = sizeof(dentry->d_inode->i_ino) * 2;
773 + const int countersize = sizeof(counter) * 2;
774 + const int nlen = sizeof(".unionfs") + i_inosize + countersize - 1;
775 + char name[nlen + 1];
776 + int err;
777 + struct dentry *tmp_dentry = NULL;
778 + struct dentry *lower_dentry;
779 + struct dentry *lower_dir_dentry = NULL;
780 +
781 + lower_dentry = unionfs_lower_dentry_idx(dentry, bstart);
782 +
783 + sprintf(name, ".unionfs%*.*lx",
784 + i_inosize, i_inosize, lower_dentry->d_inode->i_ino);
785 +
786 + /*
787 + * Loop, looking for an unused temp name to copyup to.
788 + *
789 + * It's somewhat silly that we look for a free temp tmp name in the
790 + * source branch (bstart) instead of the dest branch (bindex), where
791 + * the final name will be created. We _will_ catch it if somehow
792 + * the name exists in the dest branch, but it'd be nice to catch it
793 + * sooner than later.
794 + */
795 +retry:
796 + tmp_dentry = NULL;
797 + do {
798 + char *suffix = name + nlen - countersize;
799 +
800 + dput(tmp_dentry);
801 + counter++;
802 + sprintf(suffix, "%*.*x", countersize, countersize, counter);
803 +
804 + printk(KERN_DEBUG "unionfs: trying to rename %s to %s\n",
805 + dentry->d_name.name, name);
806 +
807 + tmp_dentry = lookup_one_len(name, lower_dentry->d_parent,
808 + nlen);
809 + if (IS_ERR(tmp_dentry)) {
810 + err = PTR_ERR(tmp_dentry);
811 + goto out;
812 + }
813 + /* don't dput here because of do-while condition eval order */
814 + } while (tmp_dentry->d_inode != NULL); /* need negative dentry */
815 + dput(tmp_dentry);
816 +
817 + err = copyup_named_file(dentry->d_parent->d_inode, file, name, bstart,
818 + bindex, file->f_path.dentry->d_inode->i_size);
819 + if (err) {
820 + if (err == -EEXIST)
821 + goto retry;
822 + goto out;
823 + }
824 +
825 + /* bring it to the same state as an unlinked file */
826 + lower_dentry = unionfs_lower_dentry_idx(dentry, dbstart(dentry));
827 + if (!unionfs_lower_inode_idx(dentry->d_inode, bindex)) {
828 + atomic_inc(&lower_dentry->d_inode->i_count);
829 + unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
830 + lower_dentry->d_inode);
831 + }
832 + lower_dir_dentry = lock_parent(lower_dentry);
833 + err = vfs_unlink(lower_dir_dentry->d_inode, lower_dentry);
834 + unlock_dir(lower_dir_dentry);
835 +
836 +out:
837 + if (!err)
838 + unionfs_check_dentry(dentry);
839 + return err;
840 +}
841 +
842 +/*
843 + * put all references held by upper struct file and free lower file pointer
844 + * array
845 + */
846 +static void cleanup_file(struct file *file)
847 +{
848 + int bindex, bstart, bend;
849 + struct file **lower_files;
850 + struct file *lower_file;
851 + struct super_block *sb = file->f_path.dentry->d_sb;
852 +
853 + lower_files = UNIONFS_F(file)->lower_files;
854 + bstart = fbstart(file);
855 + bend = fbend(file);
856 +
857 + for (bindex = bstart; bindex <= bend; bindex++) {
858 + int i; /* holds (possibly) updated branch index */
859 + int old_bid;
860 +
861 + lower_file = unionfs_lower_file_idx(file, bindex);
862 + if (!lower_file)
863 + continue;
864 +
865 + /*
866 + * Find new index of matching branch with an open
867 + * file, since branches could have been added or
868 + * deleted causing the one with open files to shift.
869 + */
870 + old_bid = UNIONFS_F(file)->saved_branch_ids[bindex];
871 + i = branch_id_to_idx(sb, old_bid);
872 + if (i < 0) {
873 + printk(KERN_ERR "unionfs: no superblock for "
874 + "file %p\n", file);
875 + continue;
876 + }
877 +
878 + /* decrement count of open files */
879 + branchput(sb, i);
880 + /*
881 + * fput will perform an mntput for us on the correct branch.
882 + * Although we're using the file's old branch configuration,
883 + * bindex, which is the old index, correctly points to the
884 + * right branch in the file's branch list. In other words,
885 + * we're going to mntput the correct branch even if branches
886 + * have been added/removed.
887 + */
888 + fput(lower_file);
889 + UNIONFS_F(file)->lower_files[bindex] = NULL;
890 + UNIONFS_F(file)->saved_branch_ids[bindex] = -1;
891 + }
892 +
893 + UNIONFS_F(file)->lower_files = NULL;
894 + kfree(lower_files);
895 + kfree(UNIONFS_F(file)->saved_branch_ids);
896 + /* set to NULL because caller needs to know if to kfree on error */
897 + UNIONFS_F(file)->saved_branch_ids = NULL;
898 +}
899 +
900 +/* open all lower files for a given file */
901 +static int open_all_files(struct file *file)
902 +{
903 + int bindex, bstart, bend, err = 0;
904 + struct file *lower_file;
905 + struct dentry *lower_dentry;
906 + struct dentry *dentry = file->f_path.dentry;
907 + struct super_block *sb = dentry->d_sb;
908 +
909 + bstart = dbstart(dentry);
910 + bend = dbend(dentry);
911 +
912 + for (bindex = bstart; bindex <= bend; bindex++) {
913 + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
914 + if (!lower_dentry)
915 + continue;
916 +
917 + dget(lower_dentry);
918 + unionfs_mntget(dentry, bindex);
919 + branchget(sb, bindex);
920 +
921 + lower_file =
922 + dentry_open(lower_dentry,
923 + unionfs_lower_mnt_idx(dentry, bindex),
924 + file->f_flags);
925 + if (IS_ERR(lower_file)) {
926 + err = PTR_ERR(lower_file);
927 + goto out;
928 + } else
929 + unionfs_set_lower_file_idx(file, bindex, lower_file);
930 + }
931 +out:
932 + return err;
933 +}
934 +
935 +/* open the highest priority file for a given upper file */
936 +static int open_highest_file(struct file *file, int willwrite)
937 +{
938 + int bindex, bstart, bend, err = 0;
939 + struct file *lower_file;
940 + struct dentry *lower_dentry;
941 + struct dentry *dentry = file->f_path.dentry;
942 + struct inode *parent_inode = dentry->d_parent->d_inode;
943 + struct super_block *sb = dentry->d_sb;
944 + size_t inode_size = dentry->d_inode->i_size;
945 +
946 + bstart = dbstart(dentry);
947 + bend = dbend(dentry);
948 +
949 + lower_dentry = unionfs_lower_dentry(dentry);
950 + if (willwrite && IS_WRITE_FLAG(file->f_flags) && is_robranch(dentry)) {
951 + for (bindex = bstart - 1; bindex >= 0; bindex--) {
952 + err = copyup_file(parent_inode, file, bstart, bindex,
953 + inode_size);
954 + if (!err)
955 + break;
956 + }
957 + atomic_set(&UNIONFS_F(file)->generation,
958 + atomic_read(&UNIONFS_I(dentry->d_inode)->
959 + generation));
960 + goto out;
961 + }
962 +
963 + dget(lower_dentry);
964 + unionfs_mntget(dentry, bstart);
965 + lower_file = dentry_open(lower_dentry,
966 + unionfs_lower_mnt_idx(dentry, bstart),
967 + file->f_flags);
968 + if (IS_ERR(lower_file)) {
969 + err = PTR_ERR(lower_file);
970 + goto out;
971 + }
972 + branchget(sb, bstart);
973 + unionfs_set_lower_file(file, lower_file);
974 + /* Fix up the position. */
975 + lower_file->f_pos = file->f_pos;
976 +
977 + memcpy(&lower_file->f_ra, &file->f_ra, sizeof(struct file_ra_state));
978 +out:
979 + return err;
980 +}
981 +
982 +/* perform a delayed copyup of a read-write file on a read-only branch */
983 +static int do_delayed_copyup(struct file *file)
984 +{
985 + int bindex, bstart, bend, err = 0;
986 + struct dentry *dentry = file->f_path.dentry;
987 + struct inode *parent_inode = dentry->d_parent->d_inode;
988 + loff_t inode_size = dentry->d_inode->i_size;
989 +
990 + bstart = fbstart(file);
991 + bend = fbend(file);
992 +
993 + BUG_ON(!S_ISREG(dentry->d_inode->i_mode));
994 +
995 + unionfs_check_file(file);
996 + unionfs_check_dentry(dentry);
997 + for (bindex = bstart - 1; bindex >= 0; bindex--) {
998 + if (!d_deleted(dentry))
999 + err = copyup_file(parent_inode, file, bstart,
1000 + bindex, inode_size);
1001 + else
1002 + err = copyup_deleted_file(file, dentry, bstart,
1003 + bindex);
1004 +
1005 + if (!err)
1006 + break;
1007 + }
1008 + if (err || (bstart <= fbstart(file)))
1009 + goto out;
1010 + bend = fbend(file);
1011 + for (bindex = bstart; bindex <= bend; bindex++) {
1012 + if (unionfs_lower_file_idx(file, bindex)) {
1013 + branchput(dentry->d_sb, bindex);
1014 + fput(unionfs_lower_file_idx(file, bindex));
1015 + unionfs_set_lower_file_idx(file, bindex, NULL);
1016 + }
1017 + if (unionfs_lower_mnt_idx(dentry, bindex)) {
1018 + unionfs_mntput(dentry, bindex);
1019 + unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
1020 + }
1021 + if (unionfs_lower_dentry_idx(dentry, bindex)) {
1022 + BUG_ON(!dentry->d_inode);
1023 + iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
1024 + unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
1025 + NULL);
1026 + dput(unionfs_lower_dentry_idx(dentry, bindex));
1027 + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
1028 + }
1029 + }
1030 + /* for reg file, we only open it "once" */
1031 + fbend(file) = fbstart(file);
1032 + set_dbend(dentry, dbstart(dentry));
1033 + ibend(dentry->d_inode) = ibstart(dentry->d_inode);
1034 +
1035 +out:
1036 + unionfs_check_file(file);
1037 + unionfs_check_dentry(dentry);
1038 + return err;
1039 +}
1040 +
1041 +/*
1042 + * Revalidate the struct file
1043 + * @file: file to revalidate
1044 + * @willwrite: 1 if caller may cause changes to the file; 0 otherwise.
1045 + */
1046 +int unionfs_file_revalidate(struct file *file, int willwrite)
1047 +{
1048 + struct super_block *sb;
1049 + struct dentry *dentry;
1050 + int sbgen, fgen, dgen;
1051 + int bstart, bend;
1052 + int size;
1053 + int err = 0;
1054 +
1055 + dentry = file->f_path.dentry;
1056 + unionfs_lock_dentry(dentry);
1057 + sb = dentry->d_sb;
1058 +
1059 + /*
1060 + * First revalidate the dentry inside struct file,
1061 + * but not unhashed dentries.
1062 + */
1063 + if (!d_deleted(dentry) &&
1064 + !__unionfs_d_revalidate_chain(dentry, NULL, willwrite)) {
1065 + err = -ESTALE;
1066 + goto out_nofree;
1067 + }
1068 +
1069 + sbgen = atomic_read(&UNIONFS_SB(sb)->generation);
1070 + dgen = atomic_read(&UNIONFS_D(dentry)->generation);
1071 + fgen = atomic_read(&UNIONFS_F(file)->generation);
1072 +
1073 + BUG_ON(sbgen > dgen);
1074 +
1075 + /*
1076 + * There are two cases we are interested in. The first is if the
1077 + * generation is lower than the super-block. The second is if
1078 + * someone has copied up this file from underneath us, we also need
1079 + * to refresh things.
1080 + */
1081 + if (!d_deleted(dentry) &&
1082 + (sbgen > fgen || dbstart(dentry) != fbstart(file))) {
1083 + int orig_brid = /* save orig branch ID */
1084 + UNIONFS_F(file)->saved_branch_ids[fbstart(file)];
1085 +
1086 + /* First we throw out the existing files. */
1087 + cleanup_file(file);
1088 +
1089 + /* Now we reopen the file(s) as in unionfs_open. */
1090 + bstart = fbstart(file) = dbstart(dentry);
1091 + bend = fbend(file) = dbend(dentry);
1092 +
1093 + size = sizeof(struct file *) * sbmax(sb);
1094 + UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL);
1095 + if (!UNIONFS_F(file)->lower_files) {
1096 + err = -ENOMEM;
1097 + goto out;
1098 + }
1099 + size = sizeof(int) * sbmax(sb);
1100 + UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL);
1101 + if (!UNIONFS_F(file)->saved_branch_ids) {
1102 + err = -ENOMEM;
1103 + goto out;
1104 + }
1105 +
1106 + if (S_ISDIR(dentry->d_inode->i_mode)) {
1107 + /* We need to open all the files. */
1108 + err = open_all_files(file);
1109 + if (err)
1110 + goto out;
1111 + } else {
1112 + int new_brid;
1113 + /* We only open the highest priority branch. */
1114 + err = open_highest_file(file, willwrite);
1115 + if (err)
1116 + goto out;
1117 + new_brid = UNIONFS_F(file)->
1118 + saved_branch_ids[fbstart(file)];
1119 + if (new_brid != orig_brid && sbgen > fgen) {
1120 + /*
1121 + * If we re-opened the file on a different
1122 + * branch than the original one, and this
1123 + * was due to a new branch inserted, then
1124 + * update the mnt counts of the old and new
1125 + * branches accordingly.
1126 + */
1127 + unionfs_mntget(dentry, bstart); /* new branch */
1128 + unionfs_mntput(sb->s_root, /* orig branch */
1129 + branch_id_to_idx(sb, orig_brid));
1130 + }
1131 + }
1132 + atomic_set(&UNIONFS_F(file)->generation,
1133 + atomic_read(&UNIONFS_I(dentry->d_inode)->
1134 + generation));
1135 + }
1136 +
1137 + /* Copyup on the first write to a file on a readonly branch. */
1138 + if (willwrite && IS_WRITE_FLAG(file->f_flags) &&
1139 + !IS_WRITE_FLAG(unionfs_lower_file(file)->f_flags) &&
1140 + is_robranch(dentry)) {
1141 + printk(KERN_DEBUG "unionfs: do delay copyup of \"%s\"\n",
1142 + dentry->d_name.name);
1143 + err = do_delayed_copyup(file);
1144 + }
1145 +
1146 +out:
1147 + if (err) {
1148 + kfree(UNIONFS_F(file)->lower_files);
1149 + kfree(UNIONFS_F(file)->saved_branch_ids);
1150 + }
1151 +out_nofree:
1152 + if (!err)
1153 + unionfs_check_file(file);
1154 + unionfs_unlock_dentry(dentry);
1155 + return err;
1156 +}
1157 +
1158 +/* unionfs_open helper function: open a directory */
1159 +static int __open_dir(struct inode *inode, struct file *file)
1160 +{
1161 + struct dentry *lower_dentry;
1162 + struct file *lower_file;
1163 + int bindex, bstart, bend;
1164 +
1165 + bstart = fbstart(file) = dbstart(file->f_path.dentry);
1166 + bend = fbend(file) = dbend(file->f_path.dentry);
1167 +
1168 + for (bindex = bstart; bindex <= bend; bindex++) {
1169 + lower_dentry =
1170 + unionfs_lower_dentry_idx(file->f_path.dentry, bindex);
1171 + if (!lower_dentry)
1172 + continue;
1173 +
1174 + dget(lower_dentry);
1175 + unionfs_mntget(file->f_path.dentry, bindex);
1176 + lower_file = dentry_open(lower_dentry,
1177 + unionfs_lower_mnt_idx(file->f_path.dentry,
1178 + bindex),
1179 + file->f_flags);
1180 + if (IS_ERR(lower_file))
1181 + return PTR_ERR(lower_file);
1182 +
1183 + unionfs_set_lower_file_idx(file, bindex, lower_file);
1184 +
1185 + /*
1186 + * The branchget goes after the open, because otherwise
1187 + * we would miss the reference on release.
1188 + */
1189 + branchget(inode->i_sb, bindex);
1190 + }
1191 +
1192 + return 0;
1193 +}
1194 +
1195 +/* unionfs_open helper function: open a file */
1196 +static int __open_file(struct inode *inode, struct file *file)
1197 +{
1198 + struct dentry *lower_dentry;
1199 + struct file *lower_file;
1200 + int lower_flags;
1201 + int bindex, bstart, bend;
1202 +
1203 + lower_dentry = unionfs_lower_dentry(file->f_path.dentry);
1204 + lower_flags = file->f_flags;
1205 +
1206 + bstart = fbstart(file) = dbstart(file->f_path.dentry);
1207 + bend = fbend(file) = dbend(file->f_path.dentry);
1208 +
1209 + /*
1210 + * check for the permission for lower file. If the error is
1211 + * COPYUP_ERR, copyup the file.
1212 + */
1213 + if (lower_dentry->d_inode && is_robranch(file->f_path.dentry)) {
1214 + /*
1215 + * if the open will change the file, copy it up otherwise
1216 + * defer it.
1217 + */
1218 + if (lower_flags & O_TRUNC) {
1219 + int size = 0;
1220 + int err = -EROFS;
1221 +
1222 + /* copyup the file */
1223 + for (bindex = bstart - 1; bindex >= 0; bindex--) {
1224 + err = copyup_file(
1225 + file->f_path.dentry->d_parent->d_inode,
1226 + file, bstart, bindex, size);
1227 + if (!err)
1228 + break;
1229 + }
1230 + return err;
1231 + } else
1232 + lower_flags &= ~(OPEN_WRITE_FLAGS);
1233 + }
1234 +
1235 + dget(lower_dentry);
1236 +
1237 + /*
1238 + * dentry_open will decrement mnt refcnt if err.
1239 + * otherwise fput() will do an mntput() for us upon file close.
1240 + */
1241 + unionfs_mntget(file->f_path.dentry, bstart);
1242 + lower_file =
1243 + dentry_open(lower_dentry,
1244 + unionfs_lower_mnt_idx(file->f_path.dentry, bstart),
1245 + lower_flags);
1246 + if (IS_ERR(lower_file))
1247 + return PTR_ERR(lower_file);
1248 +
1249 + unionfs_set_lower_file(file, lower_file);
1250 + branchget(inode->i_sb, bstart);
1251 +
1252 + return 0;
1253 +}
1254 +
1255 +int unionfs_open(struct inode *inode, struct file *file)
1256 +{
1257 + int err = 0;
1258 + struct file *lower_file = NULL;
1259 + struct dentry *dentry = NULL;
1260 + int bindex = 0, bstart = 0, bend = 0;
1261 + int size;
1262 +
1263 + unionfs_read_lock(inode->i_sb);
1264 +
1265 + file->private_data =
1266 + kzalloc(sizeof(struct unionfs_file_info), GFP_KERNEL);
1267 + if (!UNIONFS_F(file)) {
1268 + err = -ENOMEM;
1269 + goto out_nofree;
1270 + }
1271 + fbstart(file) = -1;
1272 + fbend(file) = -1;
1273 + atomic_set(&UNIONFS_F(file)->generation,
1274 + atomic_read(&UNIONFS_I(inode)->generation));
1275 +
1276 + size = sizeof(struct file *) * sbmax(inode->i_sb);
1277 + UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL);
1278 + if (!UNIONFS_F(file)->lower_files) {
1279 + err = -ENOMEM;
1280 + goto out;
1281 + }
1282 + size = sizeof(int) * sbmax(inode->i_sb);
1283 + UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL);
1284 + if (!UNIONFS_F(file)->saved_branch_ids) {
1285 + err = -ENOMEM;
1286 + goto out;
1287 + }
1288 +
1289 + dentry = file->f_path.dentry;
1290 + unionfs_lock_dentry(dentry);
1291 +
1292 + bstart = fbstart(file) = dbstart(dentry);
1293 + bend = fbend(file) = dbend(dentry);
1294 +
1295 + /* increment, so that we can flush appropriately */
1296 + atomic_inc(&UNIONFS_I(dentry->d_inode)->totalopens);
1297 +
1298 + /*
1299 + * open all directories and make the unionfs file struct point to
1300 + * these lower file structs
1301 + */
1302 + if (S_ISDIR(inode->i_mode))
1303 + err = __open_dir(inode, file); /* open a dir */
1304 + else
1305 + err = __open_file(inode, file); /* open a file */
1306 +
1307 + /* freeing the allocated resources, and fput the opened files */
1308 + if (err) {
1309 + atomic_dec(&UNIONFS_I(dentry->d_inode)->totalopens);
1310 + for (bindex = bstart; bindex <= bend; bindex++) {
1311 + lower_file = unionfs_lower_file_idx(file, bindex);
1312 + if (!lower_file)
1313 + continue;
1314 +
1315 + branchput(file->f_path.dentry->d_sb, bindex);
1316 + /* fput calls dput for lower_dentry */
1317 + fput(lower_file);
1318 + }
1319 + }
1320 +
1321 + unionfs_unlock_dentry(dentry);
1322 +
1323 +out:
1324 + if (err) {
1325 + kfree(UNIONFS_F(file)->lower_files);
1326 + kfree(UNIONFS_F(file)->saved_branch_ids);
1327 + kfree(UNIONFS_F(file));
1328 + }
1329 +out_nofree:
1330 + unionfs_read_unlock(inode->i_sb);
1331 + unionfs_check_inode(inode);
1332 + if (!err) {
1333 + unionfs_check_file(file);
1334 + unionfs_check_dentry(file->f_path.dentry->d_parent);
1335 + }
1336 + return err;
1337 +}
1338 +
1339 +/*
1340 + * release all lower object references & free the file info structure
1341 + *
1342 + * No need to grab sb info's rwsem.
1343 + */
1344 +int unionfs_file_release(struct inode *inode, struct file *file)
1345 +{
1346 + struct file *lower_file = NULL;
1347 + struct unionfs_file_info *fileinfo;
1348 + struct unionfs_inode_info *inodeinfo;
1349 + struct super_block *sb = inode->i_sb;
1350 + int bindex, bstart, bend;
1351 + int fgen, err = 0;
1352 +
1353 + unionfs_read_lock(sb);
1354 + /*
1355 + * Yes, we have to revalidate this file even if it's being released.
1356 + * This is important for open-but-unlinked files, as well as mmap
1357 + * support.
1358 + */
1359 + if ((err = unionfs_file_revalidate(file, 1)))
1360 + goto out;
1361 + unionfs_check_file(file);
1362 + fileinfo = UNIONFS_F(file);
1363 + BUG_ON(file->f_path.dentry->d_inode != inode);
1364 + inodeinfo = UNIONFS_I(inode);
1365 +
1366 + /* fput all the lower files */
1367 + fgen = atomic_read(&fileinfo->generation);
1368 + bstart = fbstart(file);
1369 + bend = fbend(file);
1370 +
1371 + for (bindex = bstart; bindex <= bend; bindex++) {
1372 + lower_file = unionfs_lower_file_idx(file, bindex);
1373 +
1374 + if (lower_file) {
1375 + fput(lower_file);
1376 + branchput(sb, bindex);
1377 + }
1378 + }
1379 + kfree(fileinfo->lower_files);
1380 + kfree(fileinfo->saved_branch_ids);
1381 +
1382 + if (fileinfo->rdstate) {
1383 + fileinfo->rdstate->access = jiffies;
1384 + printk(KERN_DEBUG "unionfs: saving rdstate with cookie "
1385 + "%u [%d.%lld]\n",
1386 + fileinfo->rdstate->cookie,
1387 + fileinfo->rdstate->bindex,
1388 + (long long)fileinfo->rdstate->dirpos);
1389 + spin_lock(&inodeinfo->rdlock);
1390 + inodeinfo->rdcount++;
1391 + list_add_tail(&fileinfo->rdstate->cache,
1392 + &inodeinfo->readdircache);
1393 + mark_inode_dirty(inode);
1394 + spin_unlock(&inodeinfo->rdlock);
1395 + fileinfo->rdstate = NULL;
1396 + }
1397 + kfree(fileinfo);
1398 +
1399 +out:
1400 + unionfs_read_unlock(sb);
1401 + return err;
1402 +}
1403 +
1404 +/* pass the ioctl to the lower fs */
1405 +static long do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1406 +{
1407 + struct file *lower_file;
1408 + int err;
1409 +
1410 + lower_file = unionfs_lower_file(file);
1411 +
1412 + err = security_file_ioctl(lower_file, cmd, arg);
1413 + if (err)
1414 + goto out;
1415 +
1416 + err = -ENOTTY;
1417 + if (!lower_file || !lower_file->f_op)
1418 + goto out;
1419 + if (lower_file->f_op->unlocked_ioctl) {
1420 + err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
1421 + } else if (lower_file->f_op->ioctl) {
1422 + lock_kernel();
1423 + err = lower_file->f_op->ioctl(lower_file->f_path.dentry->d_inode,
1424 + lower_file, cmd, arg);
1425 + unlock_kernel();
1426 + }
1427 +
1428 +out:
1429 + return err;
1430 +}
1431 +
1432 +/*
1433 + * return to user-space the branch indices containing the file in question
1434 + *
1435 + * We use fd_set and therefore we are limited to the number of the branches
1436 + * to FD_SETSIZE, which is currently 1024 - plenty for most people
1437 + */
1438 +static int unionfs_ioctl_queryfile(struct file *file, unsigned int cmd,
1439 + unsigned long arg)
1440 +{
1441 + int err = 0;
1442 + fd_set branchlist;
1443 + int bstart = 0, bend = 0, bindex = 0;
1444 + int orig_bstart, orig_bend;
1445 + struct dentry *dentry, *lower_dentry;
1446 + struct vfsmount *mnt;
1447 +
1448 + dentry = file->f_path.dentry;
1449 + unionfs_lock_dentry(dentry);
1450 + orig_bstart = dbstart(dentry);
1451 + orig_bend = dbend(dentry);
1452 + if ((err = unionfs_partial_lookup(dentry)))
1453 + goto out;
1454 + bstart = dbstart(dentry);
1455 + bend = dbend(dentry);
1456 +
1457 + FD_ZERO(&branchlist);
1458 +
1459 + for (bindex = bstart; bindex <= bend; bindex++) {
1460 + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
1461 + if (!lower_dentry)
1462 + continue;
1463 + if (lower_dentry->d_inode)
1464 + FD_SET(bindex, &branchlist);
1465 + /* purge any lower objects after partial_lookup */
1466 + if (bindex < orig_bstart || bindex > orig_bend) {
1467 + dput(lower_dentry);
1468 + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
1469 + iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
1470 + unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
1471 + NULL);
1472 + mnt = unionfs_lower_mnt_idx(dentry, bindex);
1473 + if (!mnt)
1474 + continue;
1475 + unionfs_mntput(dentry, bindex);
1476 + unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
1477 + }
1478 + }
1479 + /* restore original dentry's offsets */
1480 + set_dbstart(dentry, orig_bstart);
1481 + set_dbend(dentry, orig_bend);
1482 + ibstart(dentry->d_inode) = orig_bstart;
1483 + ibend(dentry->d_inode) = orig_bend;
1484 +
1485 + err = copy_to_user((void __user *)arg, &branchlist, sizeof(fd_set));
1486 + if (err)
1487 + err = -EFAULT;
1488 +
1489 +out:
1490 + unionfs_unlock_dentry(dentry);
1491 + return err < 0 ? err : bend;
1492 +}
1493 +
1494 +long unionfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1495 +{
1496 + long err;
1497 +
1498 + unionfs_read_lock(file->f_path.dentry->d_sb);
1499 +
1500 + if ((err = unionfs_file_revalidate(file, 1)))
1501 + goto out;
1502 +
1503 + /* check if asked for local commands */
1504 + switch (cmd) {
1505 + case UNIONFS_IOCTL_INCGEN:
1506 + /* Increment the superblock generation count */
1507 + printk("unionfs: incgen ioctl deprecated; "
1508 + "use \"-o remount,incgen\"\n");
1509 + err = -ENOSYS;
1510 + break;
1511 +
1512 + case UNIONFS_IOCTL_QUERYFILE:
1513 + /* Return list of branches containing the given file */
1514 + err = unionfs_ioctl_queryfile(file, cmd, arg);
1515 + break;
1516 +
1517 + default:
1518 + /* pass the ioctl down */
1519 + err = do_ioctl(file, cmd, arg);
1520 + break;
1521 + }
1522 +
1523 +out:
1524 + unionfs_read_unlock(file->f_path.dentry->d_sb);
1525 + unionfs_check_file(file);
1526 + return err;
1527 +}
1528 +
1529 +int unionfs_flush(struct file *file, fl_owner_t id)
1530 +{
1531 + int err = 0;
1532 + struct file *lower_file = NULL;
1533 + struct dentry *dentry = file->f_path.dentry;
1534 + int bindex, bstart, bend;
1535 +
1536 + unionfs_read_lock(dentry->d_sb);
1537 +
1538 + if ((err = unionfs_file_revalidate(file, 1)))
1539 + goto out;
1540 + unionfs_check_file(file);
1541 +
1542 + if (!atomic_dec_and_test(&UNIONFS_I(dentry->d_inode)->totalopens))
1543 + goto out;
1544 +
1545 + unionfs_lock_dentry(dentry);
1546 +
1547 + bstart = fbstart(file);
1548 + bend = fbend(file);
1549 + for (bindex = bstart; bindex <= bend; bindex++) {
1550 + lower_file = unionfs_lower_file_idx(file, bindex);
1551 +
1552 + if (lower_file && lower_file->f_op &&
1553 + lower_file->f_op->flush) {
1554 + err = lower_file->f_op->flush(lower_file, id);
1555 + if (err)
1556 + goto out_lock;
1557 +
1558 + /* if there are no more refs to the dentry, dput it */
1559 + if (d_deleted(dentry)) {
1560 + dput(unionfs_lower_dentry_idx(dentry, bindex));
1561 + unionfs_set_lower_dentry_idx(dentry, bindex,
1562 + NULL);
1563 + }
1564 + }
1565 +
1566 + }
1567 +
1568 + /* on success, update our times */
1569 + unionfs_copy_attr_times(dentry->d_inode);
1570 + /* parent time could have changed too (async) */
1571 + unionfs_copy_attr_times(dentry->d_parent->d_inode);
1572 +
1573 +out_lock:
1574 + unionfs_unlock_dentry(dentry);
1575 +out:
1576 + unionfs_read_unlock(dentry->d_sb);
1577 + unionfs_check_file(file);
1578 + return err;
1579 +}
1580 diff --git a/fs/unionfs/copyup.c b/fs/unionfs/copyup.c
1581 new file mode 100644
1582 index 0000000..fb7a2de
1583 --- /dev/null
1584 +++ b/fs/unionfs/copyup.c
1585 @@ -0,0 +1,880 @@
1586 +/*
1587 + * Copyright (c) 2003-2007 Erez Zadok
1588 + * Copyright (c) 2003-2006 Charles P. Wright
1589 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
1590 + * Copyright (c) 2005-2006 Junjiro Okajima
1591 + * Copyright (c) 2005 Arun M. Krishnakumar
1592 + * Copyright (c) 2004-2006 David P. Quigley
1593 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
1594 + * Copyright (c) 2003 Puja Gupta
1595 + * Copyright (c) 2003 Harikesavan Krishnan
1596 + * Copyright (c) 2003-2007 Stony Brook University
1597 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
1598 + *
1599 + * This program is free software; you can redistribute it and/or modify
1600 + * it under the terms of the GNU General Public License version 2 as
1601 + * published by the Free Software Foundation.
1602 + */
1603 +
1604 +#include "union.h"
1605 +
1606 +/*
1607 + * For detailed explanation of copyup see:
1608 + * Documentation/filesystems/unionfs/concepts.txt
1609 + */
1610 +
1611 +#ifdef CONFIG_UNION_FS_XATTR
1612 +/* copyup all extended attrs for a given dentry */
1613 +static int copyup_xattrs(struct dentry *old_lower_dentry,
1614 + struct dentry *new_lower_dentry)
1615 +{
1616 + int err = 0;
1617 + ssize_t list_size = -1;
1618 + char *name_list = NULL;
1619 + char *attr_value = NULL;
1620 + char *name_list_orig = NULL;
1621 +
1622 + /* query the actual size of the xattr list */
1623 + list_size = vfs_listxattr(old_lower_dentry, NULL, 0);
1624 + if (list_size <= 0) {
1625 + err = list_size;
1626 + goto out;
1627 + }
1628 +
1629 + /* allocate space for the actual list */
1630 + name_list = unionfs_xattr_alloc(list_size + 1, XATTR_LIST_MAX);
1631 + if (!name_list || IS_ERR(name_list)) {
1632 + err = PTR_ERR(name_list);
1633 + goto out;
1634 + }
1635 +
1636 + name_list_orig = name_list; /* save for kfree at end */
1637 +
1638 + /* now get the actual xattr list of the source file */
1639 + list_size = vfs_listxattr(old_lower_dentry, name_list, list_size);
1640 + if (list_size <= 0) {
1641 + err = list_size;
1642 + goto out;
1643 + }
1644 +
1645 + /* allocate space to hold each xattr's value */
1646 + attr_value = unionfs_xattr_alloc(XATTR_SIZE_MAX, XATTR_SIZE_MAX);
1647 + if (!attr_value || IS_ERR(attr_value)) {
1648 + err = PTR_ERR(name_list);
1649 + goto out;
1650 + }
1651 +
1652 + /* in a loop, get and set each xattr from src to dst file */
1653 + while (*name_list) {
1654 + ssize_t size;
1655 +
1656 + /* Lock here since vfs_getxattr doesn't lock for us */
1657 + mutex_lock(&old_lower_dentry->d_inode->i_mutex);
1658 + size = vfs_getxattr(old_lower_dentry, name_list,
1659 + attr_value, XATTR_SIZE_MAX);
1660 + mutex_unlock(&old_lower_dentry->d_inode->i_mutex);
1661 + if (size < 0) {
1662 + err = size;
1663 + goto out;
1664 + }
1665 + if (size > XATTR_SIZE_MAX) {
1666 + err = -E2BIG;
1667 + goto out;
1668 + }
1669 + /* Don't lock here since vfs_setxattr does it for us. */
1670 + err = vfs_setxattr(new_lower_dentry, name_list, attr_value,
1671 + size, 0);
1672 + if (err < 0)
1673 + goto out;
1674 + name_list += strlen(name_list) + 1;
1675 + }
1676 +out:
1677 + if (name_list_orig)
1678 + kfree(name_list_orig);
1679 + if (attr_value)
1680 + kfree(attr_value);
1681 + /*
1682 + * Ignore if xattr isn't supported. Also ignore EPERM because that
1683 + * requires CAP_SYS_ADMIN for security.* xattrs, but copyup happens
1684 + * as normal users.
1685 + */
1686 + if (err == -ENOTSUPP || err == -EOPNOTSUPP || err == -EPERM)
1687 + err = 0;
1688 + return err;
1689 +}
1690 +#endif /* CONFIG_UNION_FS_XATTR */
1691 +
1692 +/*
1693 + * Determine the mode based on the copyup flags, and the existing dentry.
1694 + *
1695 + * Handle file systems which may not support certain options. For example
1696 + * jffs2 doesn't allow one to chmod a symlink. So we ignore such harmless
1697 + * errors, rather than propagating them up, which results in copyup errors
1698 + * and errors returned back to users.
1699 + */
1700 +static int copyup_permissions(struct super_block *sb,
1701 + struct dentry *old_lower_dentry,
1702 + struct dentry *new_lower_dentry)
1703 +{
1704 + struct inode *i = old_lower_dentry->d_inode;
1705 + struct iattr newattrs;
1706 + int err;
1707 +
1708 + newattrs.ia_atime = i->i_atime;
1709 + newattrs.ia_mtime = i->i_mtime;
1710 + newattrs.ia_ctime = i->i_ctime;
1711 + newattrs.ia_gid = i->i_gid;
1712 + newattrs.ia_uid = i->i_uid;
1713 + newattrs.ia_valid = ATTR_CTIME | ATTR_ATIME | ATTR_MTIME |
1714 + ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_FORCE |
1715 + ATTR_GID | ATTR_UID;
1716 + err = notify_change(new_lower_dentry, &newattrs);
1717 + if (err)
1718 + goto out;
1719 +
1720 + /* now try to change the mode and ignore EOPNOTSUPP on symlinks */
1721 + newattrs.ia_mode = i->i_mode;
1722 + newattrs.ia_valid = ATTR_MODE | ATTR_FORCE;
1723 + err = notify_change(new_lower_dentry, &newattrs);
1724 + if (err == -EOPNOTSUPP &&
1725 + S_ISLNK(new_lower_dentry->d_inode->i_mode)) {
1726 + printk(KERN_WARNING
1727 + "unionfs: changing \"%s\" symlink mode unsupported\n",
1728 + new_lower_dentry->d_name.name);
1729 + err = 0;
1730 + }
1731 +
1732 +out:
1733 + return err;
1734 +}
1735 +
1736 +/*
1737 + * create the new device/file/directory - use copyup_permission to copyup
1738 + * times, and mode
1739 + *
1740 + * if the object being copied up is a regular file, the file is only created,
1741 + * the contents have to be copied up separately
1742 + */
1743 +static int __copyup_ndentry(struct dentry *old_lower_dentry,
1744 + struct dentry *new_lower_dentry,
1745 + struct dentry *new_lower_parent_dentry,
1746 + char *symbuf)
1747 +{
1748 + int err = 0;
1749 + umode_t old_mode = old_lower_dentry->d_inode->i_mode;
1750 + struct sioq_args args;
1751 +
1752 + if (S_ISDIR(old_mode)) {
1753 + args.mkdir.parent = new_lower_parent_dentry->d_inode;
1754 + args.mkdir.dentry = new_lower_dentry;
1755 + args.mkdir.mode = old_mode;
1756 +
1757 + run_sioq(__unionfs_mkdir, &args);
1758 + err = args.err;
1759 + } else if (S_ISLNK(old_mode)) {
1760 + args.symlink.parent = new_lower_parent_dentry->d_inode;
1761 + args.symlink.dentry = new_lower_dentry;
1762 + args.symlink.symbuf = symbuf;
1763 + args.symlink.mode = old_mode;
1764 +
1765 + run_sioq(__unionfs_symlink, &args);
1766 + err = args.err;
1767 + } else if (S_ISBLK(old_mode) || S_ISCHR(old_mode) ||
1768 + S_ISFIFO(old_mode) || S_ISSOCK(old_mode)) {
1769 + args.mknod.parent = new_lower_parent_dentry->d_inode;
1770 + args.mknod.dentry = new_lower_dentry;
1771 + args.mknod.mode = old_mode;
1772 + args.mknod.dev = old_lower_dentry->d_inode->i_rdev;
1773 +
1774 + run_sioq(__unionfs_mknod, &args);
1775 + err = args.err;
1776 + } else if (S_ISREG(old_mode)) {
1777 + args.create.parent = new_lower_parent_dentry->d_inode;
1778 + args.create.dentry = new_lower_dentry;
1779 + args.create.mode = old_mode;
1780 + args.create.nd = NULL;
1781 +
1782 + run_sioq(__unionfs_create, &args);
1783 + err = args.err;
1784 + } else {
1785 + printk(KERN_ERR "unionfs: unknown inode type %d\n",
1786 + old_mode);
1787 + BUG();
1788 + }
1789 +
1790 + return err;
1791 +}
1792 +
1793 +static int __copyup_reg_data(struct dentry *dentry,
1794 + struct dentry *new_lower_dentry, int new_bindex,
1795 + struct dentry *old_lower_dentry, int old_bindex,
1796 + struct file **copyup_file, loff_t len)
1797 +{
1798 + struct super_block *sb = dentry->d_sb;
1799 + struct file *input_file;
1800 + struct file *output_file;
1801 + struct vfsmount *output_mnt;
1802 + mm_segment_t old_fs;
1803 + char *buf = NULL;
1804 + ssize_t read_bytes, write_bytes;
1805 + loff_t size;
1806 + int err = 0;
1807 +
1808 + /* open old file */
1809 + unionfs_mntget(dentry, old_bindex);
1810 + branchget(sb, old_bindex);
1811 + /* dentry_open calls dput and mntput if it returns an error */
1812 + input_file = dentry_open(old_lower_dentry,
1813 + unionfs_lower_mnt_idx(dentry, old_bindex),
1814 + O_RDONLY | O_LARGEFILE);
1815 + if (IS_ERR(input_file)) {
1816 + dput(old_lower_dentry);
1817 + err = PTR_ERR(input_file);
1818 + goto out;
1819 + }
1820 + if (!input_file->f_op || !input_file->f_op->read) {
1821 + err = -EINVAL;
1822 + goto out_close_in;
1823 + }
1824 +
1825 + /* open new file */
1826 + dget(new_lower_dentry);
1827 + output_mnt = unionfs_mntget(sb->s_root, new_bindex);
1828 + branchget(sb, new_bindex);
1829 + output_file = dentry_open(new_lower_dentry, output_mnt,
1830 + O_RDWR | O_LARGEFILE);
1831 + if (IS_ERR(output_file)) {
1832 + err = PTR_ERR(output_file);
1833 + goto out_close_in2;
1834 + }
1835 + if (!output_file->f_op || !output_file->f_op->write) {
1836 + err = -EINVAL;
1837 + goto out_close_out;
1838 + }
1839 +
1840 + /* allocating a buffer */
1841 + buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
1842 + if (!buf) {
1843 + err = -ENOMEM;
1844 + goto out_close_out;
1845 + }
1846 +
1847 + input_file->f_pos = 0;
1848 + output_file->f_pos = 0;
1849 +
1850 + old_fs = get_fs();
1851 + set_fs(KERNEL_DS);
1852 +
1853 + size = len;
1854 + err = 0;
1855 + do {
1856 + if (len >= PAGE_SIZE)
1857 + size = PAGE_SIZE;
1858 + else if ((len < PAGE_SIZE) && (len > 0))
1859 + size = len;
1860 +
1861 + len -= PAGE_SIZE;
1862 +
1863 + read_bytes =
1864 + input_file->f_op->read(input_file,
1865 + (char __user *)buf, size,
1866 + &input_file->f_pos);
1867 + if (read_bytes <= 0) {
1868 + err = read_bytes;
1869 + break;
1870 + }
1871 +
1872 + write_bytes =
1873 + output_file->f_op->write(output_file,
1874 + (char __user *)buf,
1875 + read_bytes,
1876 + &output_file->f_pos);
1877 + if ((write_bytes < 0) || (write_bytes < read_bytes)) {
1878 + err = write_bytes;
1879 + break;
1880 + }
1881 + } while ((read_bytes > 0) && (len > 0));
1882 +
1883 + set_fs(old_fs);
1884 +
1885 + kfree(buf);
1886 +
1887 + if (!err)
1888 + err = output_file->f_op->fsync(output_file,
1889 + new_lower_dentry, 0);
1890 +
1891 + if (err)
1892 + goto out_close_out;
1893 +
1894 + if (copyup_file) {
1895 + *copyup_file = output_file;
1896 + goto out_close_in;
1897 + }
1898 +
1899 +out_close_out:
1900 + fput(output_file);
1901 +
1902 +out_close_in2:
1903 + branchput(sb, new_bindex);
1904 +
1905 +out_close_in:
1906 + fput(input_file);
1907 +
1908 +out:
1909 + branchput(sb, old_bindex);
1910 +
1911 + return err;
1912 +}
1913 +
1914 +/*
1915 + * dput the lower references for old and new dentry & clear a lower dentry
1916 + * pointer
1917 + */
1918 +static void __clear(struct dentry *dentry, struct dentry *old_lower_dentry,
1919 + int old_bstart, int old_bend,
1920 + struct dentry *new_lower_dentry, int new_bindex)
1921 +{
1922 + /* get rid of the lower dentry and all its traces */
1923 + unionfs_set_lower_dentry_idx(dentry, new_bindex, NULL);
1924 + set_dbstart(dentry, old_bstart);
1925 + set_dbend(dentry, old_bend);
1926 +
1927 + dput(new_lower_dentry);
1928 + dput(old_lower_dentry);
1929 +}
1930 +
1931 +/*
1932 + * Copy up a dentry to a file of specified name.
1933 + *
1934 + * @dir: used to pull the ->i_sb to access other branches
1935 + * @dentry: the non-negative dentry whose lower_inode we should copy
1936 + * @bstart: the branch of the lower_inode to copy from
1937 + * @new_bindex: the branch to create the new file in
1938 + * @name: the name of the file to create
1939 + * @namelen: length of @name
1940 + * @copyup_file: the "struct file" to return (optional)
1941 + * @len: how many bytes to copy-up?
1942 + */
1943 +int copyup_dentry(struct inode *dir, struct dentry *dentry, int bstart,
1944 + int new_bindex, const char *name, int namelen,
1945 + struct file **copyup_file, loff_t len)
1946 +{
1947 + struct dentry *new_lower_dentry;
1948 + struct dentry *old_lower_dentry = NULL;
1949 + struct super_block *sb;
1950 + int err = 0;
1951 + int old_bindex;
1952 + int old_bstart;
1953 + int old_bend;
1954 + struct dentry *new_lower_parent_dentry = NULL;
1955 + mm_segment_t oldfs;
1956 + char *symbuf = NULL;
1957 +
1958 + verify_locked(dentry);
1959 +
1960 + old_bindex = bstart;
1961 + old_bstart = dbstart(dentry);
1962 + old_bend = dbend(dentry);
1963 +
1964 + BUG_ON(new_bindex < 0);
1965 + BUG_ON(new_bindex >= old_bindex);
1966 +
1967 + sb = dir->i_sb;
1968 +
1969 + if ((err = is_robranch_super(sb, new_bindex)))
1970 + goto out;
1971 +
1972 + /* Create the directory structure above this dentry. */
1973 + new_lower_dentry = create_parents(dir, dentry, name, new_bindex);
1974 + if (IS_ERR(new_lower_dentry)) {
1975 + err = PTR_ERR(new_lower_dentry);
1976 + goto out;
1977 + }
1978 +
1979 + old_lower_dentry = unionfs_lower_dentry_idx(dentry, old_bindex);
1980 + /* we conditionally dput this old_lower_dentry at end of function */
1981 + dget(old_lower_dentry);
1982 +
1983 + /* For symlinks, we must read the link before we lock the directory. */
1984 + if (S_ISLNK(old_lower_dentry->d_inode->i_mode)) {
1985 +
1986 + symbuf = kmalloc(PATH_MAX, GFP_KERNEL);
1987 + if (!symbuf) {
1988 + __clear(dentry, old_lower_dentry,
1989 + old_bstart, old_bend,
1990 + new_lower_dentry, new_bindex);
1991 + err = -ENOMEM;
1992 + goto out_free;
1993 + }
1994 +
1995 + oldfs = get_fs();
1996 + set_fs(KERNEL_DS);
1997 + err = old_lower_dentry->d_inode->i_op->readlink(
1998 + old_lower_dentry,
1999 + (char __user *)symbuf,
2000 + PATH_MAX);
2001 + set_fs(oldfs);
2002 + if (err < 0) {
2003 + __clear(dentry, old_lower_dentry,
2004 + old_bstart, old_bend,
2005 + new_lower_dentry, new_bindex);
2006 + goto out_free;
2007 + }
2008 + symbuf[err] = '\0';
2009 + }
2010 +
2011 + /* Now we lock the parent, and create the object in the new branch. */
2012 + new_lower_parent_dentry = lock_parent(new_lower_dentry);
2013 +
2014 + /* create the new inode */
2015 + err = __copyup_ndentry(old_lower_dentry, new_lower_dentry,
2016 + new_lower_parent_dentry, symbuf);
2017 +
2018 + if (err) {
2019 + __clear(dentry, old_lower_dentry,
2020 + old_bstart, old_bend,
2021 + new_lower_dentry, new_bindex);
2022 + goto out_unlock;
2023 + }
2024 +
2025 + /* We actually copyup the file here. */
2026 + if (S_ISREG(old_lower_dentry->d_inode->i_mode))
2027 + err = __copyup_reg_data(dentry, new_lower_dentry, new_bindex,
2028 + old_lower_dentry, old_bindex,
2029 + copyup_file, len);
2030 + if (err)
2031 + goto out_unlink;
2032 +
2033 + /* Set permissions. */
2034 + if ((err = copyup_permissions(sb, old_lower_dentry,
2035 + new_lower_dentry)))
2036 + goto out_unlink;
2037 +
2038 +#ifdef CONFIG_UNION_FS_XATTR
2039 + /* Selinux uses extended attributes for permissions. */
2040 + if ((err = copyup_xattrs(old_lower_dentry, new_lower_dentry)))
2041 + goto out_unlink;
2042 +#endif /* CONFIG_UNION_FS_XATTR */
2043 +
2044 + /* do not allow files getting deleted to be re-interposed */
2045 + if (!d_deleted(dentry))
2046 + unionfs_reinterpose(dentry);
2047 +
2048 + goto out_unlock;
2049 +
2050 +out_unlink:
2051 + /*
2052 + * copyup failed, because we possibly ran out of space or
2053 + * quota, or something else happened so let's unlink; we don't
2054 + * really care about the return value of vfs_unlink
2055 + */
2056 + vfs_unlink(new_lower_parent_dentry->d_inode, new_lower_dentry);
2057 +
2058 + if (copyup_file) {
2059 + /* need to close the file */
2060 +
2061 + fput(*copyup_file);
2062 + branchput(sb, new_bindex);
2063 + }
2064 +
2065 + /*
2066 + * TODO: should we reset the error to something like -EIO?
2067 + *
2068 + * If we don't reset, the user may get some nonsensical errors, but
2069 + * on the other hand, if we reset to EIO, we guarantee that the user
2070 + * will get a "confusing" error message.
2071 + */
2072 +
2073 +out_unlock:
2074 + unlock_dir(new_lower_parent_dentry);
2075 +
2076 +out_free:
2077 + /*
2078 + * If old_lower_dentry was a directory, we need to dput it. If it
2079 + * was a file, then it was already dput indirectly by other
2080 + * functions we call above which operate on regular files.
2081 + */
2082 + if (old_lower_dentry && old_lower_dentry->d_inode &&
2083 + (S_ISDIR(old_lower_dentry->d_inode->i_mode) ||
2084 + S_ISLNK(old_lower_dentry->d_inode->i_mode)))
2085 + dput(old_lower_dentry);
2086 + kfree(symbuf);
2087 +
2088 + if (err)
2089 + goto out;
2090 + if (!S_ISDIR(dentry->d_inode->i_mode)) {
2091 + unionfs_purge_extras(dentry);
2092 + if (!unionfs_lower_inode(dentry->d_inode)) {
2093 + /*
2094 + * If we got here, then we copied up to an
2095 + * unlinked-open file, whose name is .unionfsXXXXX.
2096 + */
2097 + struct inode *inode = new_lower_dentry->d_inode;
2098 + atomic_inc(&inode->i_count);
2099 + unionfs_set_lower_inode_idx(dentry->d_inode,
2100 + ibstart(dentry->d_inode),
2101 + inode);
2102 + }
2103 + }
2104 + unionfs_inherit_mnt(dentry);
2105 + /* sync inode times from copied-up inode to our inode */
2106 + unionfs_copy_attr_times(dentry->d_inode);
2107 + unionfs_check_inode(dir);
2108 + unionfs_check_dentry(dentry);
2109 +out:
2110 + return err;
2111 +}
2112 +
2113 +/*
2114 + * This function creates a copy of a file represented by 'file' which
2115 + * currently resides in branch 'bstart' to branch 'new_bindex.' The copy
2116 + * will be named "name".
2117 + */
2118 +int copyup_named_file(struct inode *dir, struct file *file, char *name,
2119 + int bstart, int new_bindex, loff_t len)
2120 +{
2121 + int err = 0;
2122 + struct file *output_file = NULL;
2123 +
2124 + err = copyup_dentry(dir, file->f_path.dentry, bstart, new_bindex,
2125 + name, strlen(name), &output_file, len);
2126 + if (!err) {
2127 + fbstart(file) = new_bindex;
2128 + unionfs_set_lower_file_idx(file, new_bindex, output_file);
2129 + }
2130 +
2131 + return err;
2132 +}
2133 +
2134 +/*
2135 + * This function creates a copy of a file represented by 'file' which
2136 + * currently resides in branch 'bstart' to branch 'new_bindex'.
2137 + */
2138 +int copyup_file(struct inode *dir, struct file *file, int bstart,
2139 + int new_bindex, loff_t len)
2140 +{
2141 + int err = 0;
2142 + struct file *output_file = NULL;
2143 + struct dentry *dentry = file->f_path.dentry;
2144 +
2145 + err = copyup_dentry(dir, dentry, bstart, new_bindex,
2146 + dentry->d_name.name, dentry->d_name.len,
2147 + &output_file, len);
2148 + if (!err) {
2149 + fbstart(file) = new_bindex;
2150 + unionfs_set_lower_file_idx(file, new_bindex, output_file);
2151 + }
2152 +
2153 + return err;
2154 +}
2155 +
2156 +/* purge a dentry's lower-branch states (dput/mntput, etc.) */
2157 +static void __cleanup_dentry(struct dentry *dentry, int bindex,
2158 + int old_bstart, int old_bend)
2159 +{
2160 + int loop_start;
2161 + int loop_end;
2162 + int new_bstart = -1;
2163 + int new_bend = -1;
2164 + int i;
2165 +
2166 + loop_start = min(old_bstart, bindex);
2167 + loop_end = max(old_bend, bindex);
2168 +
2169 + /*
2170 + * This loop sets the bstart and bend for the new dentry by
2171 + * traversing from left to right. It also dputs all negative
2172 + * dentries except bindex
2173 + */
2174 + for (i = loop_start; i <= loop_end; i++) {
2175 + if (!unionfs_lower_dentry_idx(dentry, i))
2176 + continue;
2177 +
2178 + if (i == bindex) {
2179 + new_bend = i;
2180 + if (new_bstart < 0)
2181 + new_bstart = i;
2182 + continue;
2183 + }
2184 +
2185 + if (!unionfs_lower_dentry_idx(dentry, i)->d_inode) {
2186 + dput(unionfs_lower_dentry_idx(dentry, i));
2187 + unionfs_set_lower_dentry_idx(dentry, i, NULL);
2188 +
2189 + unionfs_mntput(dentry, i);
2190 + unionfs_set_lower_mnt_idx(dentry, i, NULL);
2191 + } else {
2192 + if (new_bstart < 0)
2193 + new_bstart = i;
2194 + new_bend = i;
2195 + }
2196 + }
2197 +
2198 + if (new_bstart < 0)
2199 + new_bstart = bindex;
2200 + if (new_bend < 0)
2201 + new_bend = bindex;
2202 + set_dbstart(dentry, new_bstart);
2203 + set_dbend(dentry, new_bend);
2204 +
2205 +}
2206 +
2207 +/* set lower inode ptr and update bstart & bend if necessary */
2208 +static void __set_inode(struct dentry *upper, struct dentry *lower,
2209 + int bindex)
2210 +{
2211 + unionfs_set_lower_inode_idx(upper->d_inode, bindex,
2212 + igrab(lower->d_inode));
2213 + if (likely(ibstart(upper->d_inode) > bindex))
2214 + ibstart(upper->d_inode) = bindex;
2215 + if (likely(ibend(upper->d_inode) < bindex))
2216 + ibend(upper->d_inode) = bindex;
2217 +
2218 +}
2219 +
2220 +/* set lower dentry ptr and update bstart & bend if necessary */
2221 +static void __set_dentry(struct dentry *upper, struct dentry *lower,
2222 + int bindex)
2223 +{
2224 + unionfs_set_lower_dentry_idx(upper, bindex, lower);
2225 + if (likely(dbstart(upper) > bindex))
2226 + set_dbstart(upper, bindex);
2227 + if (likely(dbend(upper) < bindex))
2228 + set_dbend(upper, bindex);
2229 +}
2230 +
2231 +/*
2232 + * This function replicates the directory structure up-to given dentry
2233 + * in the bindex branch.
2234 + */
2235 +struct dentry *create_parents(struct inode *dir, struct dentry *dentry,
2236 + const char *name, int bindex)
2237 +{
2238 + int err;
2239 + struct dentry *child_dentry;
2240 + struct dentry *parent_dentry;
2241 + struct dentry *lower_parent_dentry = NULL;
2242 + struct dentry *lower_dentry = NULL;
2243 + const char *childname;
2244 + unsigned int childnamelen;
2245 + int nr_dentry;
2246 + int count = 0;
2247 + int old_bstart;
2248 + int old_bend;
2249 + struct dentry **path = NULL;
2250 + struct super_block *sb;
2251 +
2252 + verify_locked(dentry);
2253 +
2254 + if ((err = is_robranch_super(dir->i_sb, bindex))) {
2255 + lower_dentry = ERR_PTR(err);
2256 + goto out;
2257 + }
2258 +
2259 + old_bstart = dbstart(dentry);
2260 + old_bend = dbend(dentry);
2261 +
2262 + lower_dentry = ERR_PTR(-ENOMEM);
2263 +
2264 + /* There is no sense allocating any less than the minimum. */
2265 + nr_dentry = 1;
2266 + path = kmalloc(nr_dentry * sizeof(struct dentry *), GFP_KERNEL);
2267 + if (!path)
2268 + goto out;
2269 +
2270 + /* assume the negative dentry of unionfs as the parent dentry */
2271 + parent_dentry = dentry;
2272 +
2273 + /*
2274 + * This loop finds the first parent that exists in the given branch.
2275 + * We start building the directory structure from there. At the end
2276 + * of the loop, the following should hold:
2277 + * - child_dentry is the first nonexistent child
2278 + * - parent_dentry is the first existent parent
2279 + * - path[0] is the = deepest child
2280 + * - path[count] is the first child to create
2281 + */
2282 + do {
2283 + child_dentry = parent_dentry;
2284 +
2285 + /* find the parent directory dentry in unionfs */
2286 + parent_dentry = child_dentry->d_parent;
2287 + unionfs_lock_dentry(parent_dentry);
2288 +
2289 + /* find out the lower_parent_dentry in the given branch */
2290 + lower_parent_dentry =
2291 + unionfs_lower_dentry_idx(parent_dentry, bindex);
2292 +
2293 + /* grow path table */
2294 + if (count == nr_dentry) {
2295 + void *p;
2296 +
2297 + nr_dentry *= 2;
2298 + p = krealloc(path, nr_dentry * sizeof(struct dentry *),
2299 + GFP_KERNEL);
2300 + if (!p) {
2301 + lower_dentry = ERR_PTR(-ENOMEM);
2302 + goto out;
2303 + }
2304 + path = p;
2305 + }
2306 +
2307 + /* store the child dentry */
2308 + path[count++] = child_dentry;
2309 + } while (!lower_parent_dentry);
2310 + count--;
2311 +
2312 + sb = dentry->d_sb;
2313 +
2314 + /*
2315 + * This code goes between the begin/end labels and basically
2316 + * emulates a while(child_dentry != dentry), only cleaner and
2317 + * shorter than what would be a much longer while loop.
2318 + */
2319 +begin:
2320 + /* get lower parent dir in the current branch */
2321 + lower_parent_dentry = unionfs_lower_dentry_idx(parent_dentry, bindex);
2322 + unionfs_unlock_dentry(parent_dentry);
2323 +
2324 + /* init the values to lookup */
2325 + childname = child_dentry->d_name.name;
2326 + childnamelen = child_dentry->d_name.len;
2327 +
2328 + if (child_dentry != dentry) {
2329 + /* lookup child in the underlying file system */
2330 + lower_dentry = lookup_one_len(childname, lower_parent_dentry,
2331 + childnamelen);
2332 + if (IS_ERR(lower_dentry))
2333 + goto out;
2334 + } else {
2335 + /*
2336 + * Is the name a whiteout of the child name ? lookup the
2337 + * whiteout child in the underlying file system
2338 + */
2339 + lower_dentry = lookup_one_len(name, lower_parent_dentry,
2340 + strlen(name));
2341 + if (IS_ERR(lower_dentry))
2342 + goto out;
2343 +
2344 + /* Replace the current dentry (if any) with the new one */
2345 + dput(unionfs_lower_dentry_idx(dentry, bindex));
2346 + unionfs_set_lower_dentry_idx(dentry, bindex,
2347 + lower_dentry);
2348 +
2349 + __cleanup_dentry(dentry, bindex, old_bstart, old_bend);
2350 + goto out;
2351 + }
2352 +
2353 + if (lower_dentry->d_inode) {
2354 + /*
2355 + * since this already exists we dput to avoid
2356 + * multiple references on the same dentry
2357 + */
2358 + dput(lower_dentry);
2359 + } else {
2360 + struct sioq_args args;
2361 +
2362 + /* it's a negative dentry, create a new dir */
2363 + lower_parent_dentry = lock_parent(lower_dentry);
2364 +
2365 + args.mkdir.parent = lower_parent_dentry->d_inode;
2366 + args.mkdir.dentry = lower_dentry;
2367 + args.mkdir.mode = child_dentry->d_inode->i_mode;
2368 +
2369 + run_sioq(__unionfs_mkdir, &args);
2370 + err = args.err;
2371 +
2372 + if (!err)
2373 + err = copyup_permissions(dir->i_sb, child_dentry,
2374 + lower_dentry);
2375 + unlock_dir(lower_parent_dentry);
2376 + if (err) {
2377 + struct inode *inode = lower_dentry->d_inode;
2378 + /*
2379 + * If we get here, it means that we created a new
2380 + * dentry+inode, but copying permissions failed.
2381 + * Therefore, we should delete this inode and dput
2382 + * the dentry so as not to leave cruft behind.
2383 + */
2384 + if (lower_dentry->d_op && lower_dentry->d_op->d_iput)
2385 + lower_dentry->d_op->d_iput(lower_dentry,
2386 + inode);
2387 + else
2388 + iput(inode);
2389 + lower_dentry->d_inode = NULL;
2390 + dput(lower_dentry);
2391 + lower_dentry = ERR_PTR(err);
2392 + goto out;
2393 + }
2394 +
2395 + }
2396 +
2397 + __set_inode(child_dentry, lower_dentry, bindex);
2398 + __set_dentry(child_dentry, lower_dentry, bindex);
2399 + /*
2400 + * update times of this dentry, but also the parent, because if
2401 + * we changed, the parent may have changed too.
2402 + */
2403 + unionfs_copy_attr_times(parent_dentry->d_inode);
2404 + unionfs_copy_attr_times(child_dentry->d_inode);
2405 +
2406 + parent_dentry = child_dentry;
2407 + child_dentry = path[--count];
2408 + goto begin;
2409 +out:
2410 + /* cleanup any leftover locks from the do/while loop above */
2411 + if (IS_ERR(lower_dentry))
2412 + while (count)
2413 + unionfs_unlock_dentry(path[count--]);
2414 + kfree(path);
2415 + return lower_dentry;
2416 +}
2417 +
2418 +/* set lower mnt of dentry+parents to the first parent node that has an mnt */
2419 +void unionfs_inherit_mnt(struct dentry *dentry)
2420 +{
2421 + struct dentry *parent, *hasone;
2422 + int bindex = dbstart(dentry);
2423 +
2424 + if (unionfs_lower_mnt_idx(dentry, bindex))
2425 + return;
2426 + hasone = dentry->d_parent;
2427 + /* this loop should stop at root dentry */
2428 + while (!unionfs_lower_mnt_idx(hasone, bindex)) {
2429 + hasone = hasone->d_parent;
2430 + }
2431 + parent = dentry;
2432 + while (!unionfs_lower_mnt_idx(parent, bindex)) {
2433 + unionfs_set_lower_mnt_idx(parent, bindex,
2434 + unionfs_mntget(hasone, bindex));
2435 + parent = parent->d_parent;
2436 + }
2437 +}
2438 +
2439 +/*
2440 + * Regular files should have only one lower object(s). On copyup, we may
2441 + * have leftover objects from previous branches. So purge all such extra
2442 + * objects and keep only the most recent, leftmost, copied-up one.
2443 + */
2444 +void unionfs_purge_extras(struct dentry *dentry)
2445 +{
2446 + int bindex;
2447 +
2448 + BUG_ON(S_ISDIR(dentry->d_inode->i_mode));
2449 + for (bindex=dbstart(dentry)+1; bindex<=dbend(dentry); bindex++) {
2450 + if (unionfs_lower_mnt_idx(dentry, bindex)) {
2451 + unionfs_mntput(dentry, bindex);
2452 + unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
2453 + }
2454 + if (unionfs_lower_dentry_idx(dentry, bindex)) {
2455 + dput(unionfs_lower_dentry_idx(dentry, bindex));
2456 + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
2457 + iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
2458 + unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
2459 + NULL);
2460 + }
2461 + }
2462 + bindex = dbstart(dentry);
2463 + set_dbend(dentry, bindex);
2464 + ibend(dentry->d_inode) = ibstart(dentry->d_inode) = bindex;
2465 +}
2466 diff --git a/fs/unionfs/debug.c b/fs/unionfs/debug.c
2467 new file mode 100644
2468 index 0000000..94f0e84
2469 --- /dev/null
2470 +++ b/fs/unionfs/debug.c
2471 @@ -0,0 +1,494 @@
2472 +/*
2473 + * Copyright (c) 2003-2007 Erez Zadok
2474 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
2475 + * Copyright (c) 2003-2007 Stony Brook University
2476 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
2477 + *
2478 + * This program is free software; you can redistribute it and/or modify
2479 + * it under the terms of the GNU General Public License version 2 as
2480 + * published by the Free Software Foundation.
2481 + */
2482 +
2483 +#include "union.h"
2484 +
2485 +/*
2486 + * Helper debugging functions for maintainers (and for users to report back
2487 + * useful information back to maintainers)
2488 + */
2489 +
2490 +/* it's always useful to know what part of the code called us */
2491 +#define PRINT_CALLER() \
2492 + do { \
2493 + if (!printed_caller) { \
2494 + printk("PC:%s:%s:%d\n",fname,fxn,line); \
2495 + printed_caller = 1; \
2496 + } \
2497 + } while (0)
2498 +
2499 +/*
2500 + * __unionfs_check_{inode,dentry,file} perform exhaustive sanity checking on
2501 + * the fan-out of various Unionfs objects. We check that no lower objects
2502 + * exist outside the start/end branch range; that all objects within are
2503 + * non-NULL (with some allowed exceptions); that for every lower file
2504 + * there's a lower dentry+inode; that the start/end ranges match for all
2505 + * corresponding lower objects; that open files/symlinks have only one lower
2506 + * objects, but directories can have several; and more.
2507 + */
2508 +void __unionfs_check_inode(const struct inode *inode,
2509 + const char *fname, const char *fxn, int line)
2510 +{
2511 + int bindex;
2512 + int istart, iend;
2513 + struct inode *lower_inode;
2514 + struct super_block *sb;
2515 + int printed_caller = 0;
2516 +
2517 + /* for inodes now */
2518 + BUG_ON(!inode);
2519 + sb = inode->i_sb;
2520 + istart = ibstart(inode);
2521 + iend = ibend(inode);
2522 + if (istart > iend) {
2523 + PRINT_CALLER();
2524 + printk(" Ci0: inode=%p istart/end=%d:%d\n",
2525 + inode, istart, iend);
2526 + }
2527 + if ((istart == -1 && iend != -1) ||
2528 + (istart != -1 && iend == -1)) {
2529 + PRINT_CALLER();
2530 + printk(" Ci1: inode=%p istart/end=%d:%d\n",
2531 + inode, istart, iend);
2532 + }
2533 + if (!S_ISDIR(inode->i_mode)) {
2534 + if (iend != istart) {
2535 + PRINT_CALLER();
2536 + printk(" Ci2: inode=%p istart=%d iend=%d\n",
2537 + inode, istart, iend);
2538 + }
2539 + }
2540 +
2541 + for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2542 + if (!UNIONFS_I(inode)) {
2543 + PRINT_CALLER();
2544 + printk(" Ci3: no inode_info %p\n", inode);
2545 + return;
2546 + }
2547 + if (!UNIONFS_I(inode)->lower_inodes) {
2548 + PRINT_CALLER();
2549 + printk(" Ci4: no lower_inodes %p\n", inode);
2550 + return;
2551 + }
2552 + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2553 + if (lower_inode) {
2554 + if (bindex < istart || bindex > iend) {
2555 + PRINT_CALLER();
2556 + printk(" Ci5: inode/linode=%p:%p bindex=%d "
2557 + "istart/end=%d:%d\n", inode,
2558 + lower_inode, bindex, istart, iend);
2559 + } else if ((int)lower_inode == 0x5a5a5a5a) {
2560 + /* freed inode! */
2561 + PRINT_CALLER();
2562 + printk(" Ci6: inode/linode=%p:%p bindex=%d "
2563 + "istart/end=%d:%d\n", inode,
2564 + lower_inode, bindex, istart, iend);
2565 + }
2566 + } else { /* lower_inode == NULL */
2567 + if (bindex >= istart && bindex <= iend) {
2568 + /*
2569 + * directories can have NULL lower inodes in
2570 + * b/t start/end, but NOT if at the
2571 + * start/end range.
2572 + */
2573 + if (!(S_ISDIR(inode->i_mode) &&
2574 + bindex > istart && bindex < iend)) {
2575 + PRINT_CALLER();
2576 + printk(" Ci7: inode/linode=%p:%p "
2577 + "bindex=%d istart/end=%d:%d\n",
2578 + inode, lower_inode, bindex,
2579 + istart, iend);
2580 + }
2581 + }
2582 + }
2583 + }
2584 +}
2585 +
2586 +void __unionfs_check_dentry(const struct dentry *dentry,
2587 + const char *fname, const char *fxn, int line)
2588 +{
2589 + int bindex;
2590 + int dstart, dend, istart, iend;
2591 + struct dentry *lower_dentry;
2592 + struct inode *inode, *lower_inode;
2593 + struct super_block *sb;
2594 + struct vfsmount *lower_mnt;
2595 + int printed_caller = 0;
2596 +
2597 + BUG_ON(!dentry);
2598 + sb = dentry->d_sb;
2599 + inode = dentry->d_inode;
2600 + dstart = dbstart(dentry);
2601 + dend = dbend(dentry);
2602 + BUG_ON(dstart > dend);
2603 +
2604 + if ((dstart == -1 && dend != -1) ||
2605 + (dstart != -1 && dend == -1)) {
2606 + PRINT_CALLER();
2607 + printk(" CD0: dentry=%p dstart/end=%d:%d\n",
2608 + dentry, dstart, dend);
2609 + }
2610 + /*
2611 + * check for NULL dentries inside the start/end range, or
2612 + * non-NULL dentries outside the start/end range.
2613 + */
2614 + for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2615 + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
2616 + if (lower_dentry) {
2617 + if (bindex < dstart || bindex > dend) {
2618 + PRINT_CALLER();
2619 + printk(" CD1: dentry/lower=%p:%p(%p) "
2620 + "bindex=%d dstart/end=%d:%d\n",
2621 + dentry, lower_dentry,
2622 + (lower_dentry ? lower_dentry->d_inode :
2623 + (void *) 0xffffffff),
2624 + bindex, dstart, dend);
2625 + }
2626 + } else { /* lower_dentry == NULL */
2627 + if (bindex >= dstart && bindex <= dend) {
2628 + /*
2629 + * Directories can have NULL lower inodes in
2630 + * b/t start/end, but NOT if at the
2631 + * start/end range. Ignore this rule,
2632 + * however, if this is a NULL dentry or a
2633 + * deleted dentry.
2634 + */
2635 + if (!d_deleted((struct dentry *) dentry) &&
2636 + inode &&
2637 + !(inode && S_ISDIR(inode->i_mode) &&
2638 + bindex > dstart && bindex < dend)) {
2639 + PRINT_CALLER();
2640 + printk(" CD2: dentry/lower=%p:%p(%p) "
2641 + "bindex=%d dstart/end=%d:%d\n",
2642 + dentry, lower_dentry,
2643 + (lower_dentry ?
2644 + lower_dentry->d_inode :
2645 + (void *) 0xffffffff),
2646 + bindex, dstart, dend);
2647 + }
2648 + }
2649 + }
2650 + }
2651 +
2652 + /* check for vfsmounts same as for dentries */
2653 + for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2654 + lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
2655 + if (lower_mnt) {
2656 + if (bindex < dstart || bindex > dend) {
2657 + PRINT_CALLER();
2658 + printk(" CM0: dentry/lmnt=%p:%p bindex=%d "
2659 + "dstart/end=%d:%d\n", dentry,
2660 + lower_mnt, bindex, dstart, dend);
2661 + }
2662 + } else { /* lower_mnt == NULL */
2663 + if (bindex >= dstart && bindex <= dend) {
2664 + /*
2665 + * Directories can have NULL lower inodes in
2666 + * b/t start/end, but NOT if at the
2667 + * start/end range. Ignore this rule,
2668 + * however, if this is a NULL dentry.
2669 + */
2670 + if (inode &&
2671 + !(inode && S_ISDIR(inode->i_mode) &&
2672 + bindex > dstart && bindex < dend)) {
2673 + PRINT_CALLER();
2674 + printk(" CM1: dentry/lmnt=%p:%p "
2675 + "bindex=%d dstart/end=%d:%d\n",
2676 + dentry, lower_mnt, bindex,
2677 + dstart, dend);
2678 + }
2679 + }
2680 + }
2681 + }
2682 +
2683 + /* for inodes now */
2684 + if (!inode)
2685 + return;
2686 + istart = ibstart(inode);
2687 + iend = ibend(inode);
2688 + BUG_ON(istart > iend);
2689 + if ((istart == -1 && iend != -1) ||
2690 + (istart != -1 && iend == -1)) {
2691 + PRINT_CALLER();
2692 + printk(" CI0: dentry/inode=%p:%p istart/end=%d:%d\n",
2693 + dentry, inode, istart, iend);
2694 + }
2695 + if (istart != dstart) {
2696 + PRINT_CALLER();
2697 + printk(" CI1: dentry/inode=%p:%p istart=%d dstart=%d\n",
2698 + dentry, inode, istart, dstart);
2699 + }
2700 + if (iend != dend) {
2701 + PRINT_CALLER();
2702 + printk(" CI2: dentry/inode=%p:%p iend=%d dend=%d\n",
2703 + dentry, inode, iend, dend);
2704 + }
2705 +
2706 + if (!S_ISDIR(inode->i_mode)) {
2707 + if (dend != dstart) {
2708 + PRINT_CALLER();
2709 + printk(" CI3: dentry/inode=%p:%p dstart=%d dend=%d\n",
2710 + dentry, inode, dstart, dend);
2711 + }
2712 + if (iend != istart) {
2713 + PRINT_CALLER();
2714 + printk(" CI4: dentry/inode=%p:%p istart=%d iend=%d\n",
2715 + dentry, inode, istart, iend);
2716 + }
2717 + }
2718 +
2719 + for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2720 + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2721 + if (lower_inode) {
2722 + if (bindex < istart || bindex > iend) {
2723 + PRINT_CALLER();
2724 + printk(" CI5: dentry/linode=%p:%p bindex=%d "
2725 + "istart/end=%d:%d\n", dentry,
2726 + lower_inode, bindex, istart, iend);
2727 + } else if ((int)lower_inode == 0x5a5a5a5a) {
2728 + /* freed inode! */
2729 + PRINT_CALLER();
2730 + printk(" CI6: dentry/linode=%p:%p bindex=%d "
2731 + "istart/end=%d:%d\n", dentry,
2732 + lower_inode, bindex, istart, iend);
2733 + }
2734 + } else { /* lower_inode == NULL */
2735 + if (bindex >= istart && bindex <= iend) {
2736 + /*
2737 + * directories can have NULL lower inodes in
2738 + * b/t start/end, but NOT if at the
2739 + * start/end range.
2740 + */
2741 + if (!(S_ISDIR(inode->i_mode) &&
2742 + bindex > istart && bindex < iend)) {
2743 + PRINT_CALLER();
2744 + printk(" CI7: dentry/linode=%p:%p "
2745 + "bindex=%d istart/end=%d:%d\n",
2746 + dentry, lower_inode, bindex,
2747 + istart, iend);
2748 + }
2749 + }
2750 + }
2751 + }
2752 +
2753 + /*
2754 + * If it's a directory, then intermediate objects b/t start/end can
2755 + * be NULL. But, check that all three are NULL: lower dentry, mnt,
2756 + * and inode.
2757 + */
2758 + if (S_ISDIR(inode->i_mode))
2759 + for (bindex = dstart+1; bindex < dend; bindex++) {
2760 + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2761 + lower_dentry = unionfs_lower_dentry_idx(dentry,
2762 + bindex);
2763 + lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
2764 + if (!((lower_inode && lower_dentry && lower_mnt) ||
2765 + (!lower_inode && !lower_dentry && !lower_mnt))) {
2766 + PRINT_CALLER();
2767 + printk(" Cx: lmnt/ldentry/linode=%p:%p:%p "
2768 + "bindex=%d dstart/end=%d:%d\n",
2769 + lower_mnt, lower_dentry, lower_inode,
2770 + bindex, dstart, dend);
2771 + }
2772 + }
2773 + /* check if lower inode is newer than upper one (it shouldn't) */
2774 + if (is_newer_lower(dentry)) {
2775 + PRINT_CALLER();
2776 + for (bindex=ibstart(inode); bindex <= ibend(inode); bindex++) {
2777 + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2778 + if (!lower_inode)
2779 + continue;
2780 + printk(" CI8: bindex=%d mtime/lmtime=%lu.%lu/%lu.%lu "
2781 + "ctime/lctime=%lu.%lu/%lu.%lu\n",
2782 + bindex,
2783 + inode->i_mtime.tv_sec,
2784 + inode->i_mtime.tv_nsec,
2785 + lower_inode->i_mtime.tv_sec,
2786 + lower_inode->i_mtime.tv_nsec,
2787 + inode->i_ctime.tv_sec,
2788 + inode->i_ctime.tv_nsec,
2789 + lower_inode->i_ctime.tv_sec,
2790 + lower_inode->i_ctime.tv_nsec);
2791 + }
2792 + }
2793 +}
2794 +
2795 +void __unionfs_check_file(const struct file *file,
2796 + const char *fname, const char *fxn, int line)
2797 +{
2798 + int bindex;
2799 + int dstart, dend, fstart, fend;
2800 + struct dentry *dentry;
2801 + struct file *lower_file;
2802 + struct inode *inode;
2803 + struct super_block *sb;
2804 + int printed_caller = 0;
2805 +
2806 + BUG_ON(!file);
2807 + dentry = file->f_path.dentry;
2808 + sb = dentry->d_sb;
2809 + dstart = dbstart(dentry);
2810 + dend = dbend(dentry);
2811 + BUG_ON(dstart > dend);
2812 + fstart = fbstart(file);
2813 + fend = fbend(file);
2814 + BUG_ON(fstart > fend);
2815 +
2816 + if ((fstart == -1 && fend != -1) ||
2817 + (fstart != -1 && fend == -1)) {
2818 + PRINT_CALLER();
2819 + printk(" CF0: file/dentry=%p:%p fstart/end=%d:%d\n",
2820 + file, dentry, fstart, fend);
2821 + }
2822 + if (fstart != dstart) {
2823 + PRINT_CALLER();
2824 + printk(" CF1: file/dentry=%p:%p fstart=%d dstart=%d\n",
2825 + file, dentry, fstart, dstart);
2826 + }
2827 + if (fend != dend) {
2828 + PRINT_CALLER();
2829 + printk(" CF2: file/dentry=%p:%p fend=%d dend=%d\n",
2830 + file, dentry, fend, dend);
2831 + }
2832 + inode = dentry->d_inode;
2833 + if (!S_ISDIR(inode->i_mode)) {
2834 + if (fend != fstart) {
2835 + PRINT_CALLER();
2836 + printk(" CF3: file/inode=%p:%p fstart=%d fend=%d\n",
2837 + file, inode, fstart, fend);
2838 + }
2839 + if (dend != dstart) {
2840 + PRINT_CALLER();
2841 + printk(" CF4: file/dentry=%p:%p dstart=%d dend=%d\n",
2842 + file, dentry, dstart, dend);
2843 + }
2844 + }
2845 +
2846 + /*
2847 + * check for NULL dentries inside the start/end range, or
2848 + * non-NULL dentries outside the start/end range.
2849 + */
2850 + for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2851 + lower_file = unionfs_lower_file_idx(file, bindex);
2852 + if (lower_file) {
2853 + if (bindex < fstart || bindex > fend) {
2854 + PRINT_CALLER();
2855 + printk(" CF5: file/lower=%p:%p bindex=%d "
2856 + "fstart/end=%d:%d\n",
2857 + file, lower_file, bindex, fstart, fend);
2858 + }
2859 + } else { /* lower_file == NULL */
2860 + if (bindex >= fstart && bindex <= fend) {
2861 + /*
2862 + * directories can have NULL lower inodes in
2863 + * b/t start/end, but NOT if at the
2864 + * start/end range.
2865 + */
2866 + if (!(S_ISDIR(inode->i_mode) &&
2867 + bindex > fstart && bindex < fend)) {
2868 + PRINT_CALLER();
2869 + printk(" CF6: file/lower=%p:%p "
2870 + "bindex=%d fstart/end=%d:%d\n",
2871 + file, lower_file, bindex,
2872 + fstart, fend);
2873 + }
2874 + }
2875 + }
2876 + }
2877 +
2878 + __unionfs_check_dentry(dentry,fname,fxn,line);
2879 +}
2880 +
2881 +/* useful to track vfsmount leaks that could cause EBUSY on unmount */
2882 +void __show_branch_counts(const struct super_block *sb,
2883 + const char *file, const char *fxn, int line)
2884 +{
2885 + int i;
2886 + struct vfsmount *mnt;
2887 +
2888 + printk("BC:");
2889 + for (i=0; i<sbmax(sb); i++) {
2890 + if (sb->s_root)
2891 + mnt = UNIONFS_D(sb->s_root)->lower_paths[i].mnt;
2892 + else
2893 + mnt = NULL;
2894 + printk("%d:", (mnt ? atomic_read(&mnt->mnt_count) : -99));
2895 + }
2896 + printk("%s:%s:%d\n",file,fxn,line);
2897 +}
2898 +
2899 +void __show_inode_times(const struct inode *inode,
2900 + const char *file, const char *fxn, int line)
2901 +{
2902 + struct inode *lower_inode;
2903 + int bindex;
2904 +
2905 + for (bindex=ibstart(inode); bindex <= ibend(inode); bindex++) {
2906 + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2907 + if (!lower_inode)
2908 + continue;
2909 + printk("IT(%lu:%d): ", inode->i_ino, bindex);
2910 + printk("%s:%s:%d ",file,fxn,line);
2911 + printk("um=%lu/%lu lm=%lu/%lu ",
2912 + inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
2913 + lower_inode->i_mtime.tv_sec,
2914 + lower_inode->i_mtime.tv_nsec);
2915 + printk("uc=%lu/%lu lc=%lu/%lu\n",
2916 + inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
2917 + lower_inode->i_ctime.tv_sec,
2918 + lower_inode->i_ctime.tv_nsec);
2919 + }
2920 +}
2921 +
2922 +void __show_dinode_times(const struct dentry *dentry,
2923 + const char *file, const char *fxn, int line)
2924 +{
2925 + struct inode *inode = dentry->d_inode;
2926 + struct inode *lower_inode;
2927 + int bindex;
2928 +
2929 + for (bindex=ibstart(inode); bindex <= ibend(inode); bindex++) {
2930 + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2931 + if (!lower_inode)
2932 + continue;
2933 + printk("DT(%s:%lu:%d): ", dentry->d_name.name, inode->i_ino, bindex);
2934 + printk("%s:%s:%d ",file,fxn,line);
2935 + printk("um=%lu/%lu lm=%lu/%lu ",
2936 + inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
2937 + lower_inode->i_mtime.tv_sec,
2938 + lower_inode->i_mtime.tv_nsec);
2939 + printk("uc=%lu/%lu lc=%lu/%lu\n",
2940 + inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
2941 + lower_inode->i_ctime.tv_sec,
2942 + lower_inode->i_ctime.tv_nsec);
2943 + }
2944 +}
2945 +
2946 +void __show_inode_counts(const struct inode *inode,
2947 + const char *file, const char *fxn, int line)
2948 +{
2949 + struct inode *lower_inode;
2950 + int bindex;
2951 +
2952 + if (!inode) {
2953 + printk("SiC: Null inode\n");
2954 + return;
2955 + }
2956 + for (bindex=sbstart(inode->i_sb); bindex <= sbend(inode->i_sb); bindex++) {
2957 + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2958 + if (!lower_inode)
2959 + continue;
2960 + printk("SIC(%lu:%d:%d): ", inode->i_ino, bindex,
2961 + atomic_read(&(inode)->i_count));
2962 + printk("lc=%d ", atomic_read(&(lower_inode)->i_count));
2963 + printk("%s:%s:%d\n",file,fxn,line);
2964 + }
2965 +}
2966 diff --git a/fs/unionfs/dentry.c b/fs/unionfs/dentry.c
2967 new file mode 100644
2968 index 0000000..f3c1258
2969 --- /dev/null
2970 +++ b/fs/unionfs/dentry.c
2971 @@ -0,0 +1,480 @@
2972 +/*
2973 + * Copyright (c) 2003-2007 Erez Zadok
2974 + * Copyright (c) 2003-2006 Charles P. Wright
2975 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
2976 + * Copyright (c) 2005-2006 Junjiro Okajima
2977 + * Copyright (c) 2005 Arun M. Krishnakumar
2978 + * Copyright (c) 2004-2006 David P. Quigley
2979 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
2980 + * Copyright (c) 2003 Puja Gupta
2981 + * Copyright (c) 2003 Harikesavan Krishnan
2982 + * Copyright (c) 2003-2007 Stony Brook University
2983 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
2984 + *
2985 + * This program is free software; you can redistribute it and/or modify
2986 + * it under the terms of the GNU General Public License version 2 as
2987 + * published by the Free Software Foundation.
2988 + */
2989 +
2990 +#include "union.h"
2991 +
2992 +/*
2993 + * Revalidate a single dentry.
2994 + * Assume that dentry's info node is locked.
2995 + * Assume that parent(s) are all valid already, but
2996 + * the child may not yet be valid.
2997 + * Returns 1 if valid, 0 otherwise.
2998 + */
2999 +static int __unionfs_d_revalidate_one(struct dentry *dentry,
3000 + struct nameidata *nd)
3001 +{
3002 + int valid = 1; /* default is valid (1); invalid is 0. */
3003 + struct dentry *lower_dentry;
3004 + int bindex, bstart, bend;
3005 + int sbgen, dgen;
3006 + int positive = 0;
3007 + int locked = 0;
3008 + int interpose_flag;
3009 + struct nameidata lowernd; /* TODO: be gentler to the stack */
3010 +
3011 + if (nd)
3012 + memcpy(&lowernd, nd, sizeof(struct nameidata));
3013 + else
3014 + memset(&lowernd, 0, sizeof(struct nameidata));
3015 +
3016 + verify_locked(dentry);
3017 +
3018 + /* if the dentry is unhashed, do NOT revalidate */
3019 + if (d_deleted(dentry)) {
3020 + printk(KERN_DEBUG "unionfs: unhashed dentry being "
3021 + "revalidated: %*s\n",
3022 + dentry->d_name.len, dentry->d_name.name);
3023 + goto out;
3024 + }
3025 +
3026 + BUG_ON(dbstart(dentry) == -1);
3027 + if (dentry->d_inode)
3028 + positive = 1;
3029 + dgen = atomic_read(&UNIONFS_D(dentry)->generation);
3030 + sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
3031 + /*
3032 + * If we are working on an unconnected dentry, then there is no
3033 + * revalidation to be done, because this file does not exist within
3034 + * the namespace, and Unionfs operates on the namespace, not data.
3035 + */
3036 + if (sbgen != dgen) {
3037 + struct dentry *result;
3038 + int pdgen;
3039 +
3040 + /* The root entry should always be valid */
3041 + BUG_ON(IS_ROOT(dentry));
3042 +
3043 + /* We can't work correctly if our parent isn't valid. */
3044 + pdgen = atomic_read(&UNIONFS_D(dentry->d_parent)->generation);
3045 + BUG_ON(pdgen != sbgen); /* should never happen here */
3046 +
3047 + /* Free the pointers for our inodes and this dentry. */
3048 + bstart = dbstart(dentry);
3049 + bend = dbend(dentry);
3050 + if (bstart >= 0) {
3051 + struct dentry *lower_dentry;
3052 + for (bindex = bstart; bindex <= bend; bindex++) {
3053 + lower_dentry =
3054 + unionfs_lower_dentry_idx(dentry,
3055 + bindex);
3056 + dput(lower_dentry);
3057 + }
3058 + }
3059 + set_dbstart(dentry, -1);
3060 + set_dbend(dentry, -1);
3061 +
3062 + interpose_flag = INTERPOSE_REVAL_NEG;
3063 + if (positive) {
3064 + interpose_flag = INTERPOSE_REVAL;
3065 + /*
3066 + * During BRM, the VFS could already hold a lock on
3067 + * a file being read, so don't lock it again
3068 + * (deadlock), but if you lock it in this function,
3069 + * then release it here too.
3070 + */
3071 + if (!mutex_is_locked(&dentry->d_inode->i_mutex)) {
3072 + mutex_lock(&dentry->d_inode->i_mutex);
3073 + locked = 1;
3074 + }
3075 +
3076 + bstart = ibstart(dentry->d_inode);
3077 + bend = ibend(dentry->d_inode);
3078 + if (bstart >= 0) {
3079 + struct inode *lower_inode;
3080 + for (bindex = bstart; bindex <= bend;
3081 + bindex++) {
3082 + lower_inode =
3083 + unionfs_lower_inode_idx(
3084 + dentry->d_inode,
3085 + bindex);
3086 + iput(lower_inode);
3087 + }
3088 + }
3089 + kfree(UNIONFS_I(dentry->d_inode)->lower_inodes);
3090 + UNIONFS_I(dentry->d_inode)->lower_inodes = NULL;
3091 + ibstart(dentry->d_inode) = -1;
3092 + ibend(dentry->d_inode) = -1;
3093 + if (locked)
3094 + mutex_unlock(&dentry->d_inode->i_mutex);
3095 + }
3096 +
3097 + result = unionfs_lookup_backend(dentry, &lowernd,
3098 + interpose_flag);
3099 + if (result) {
3100 + if (IS_ERR(result)) {
3101 + valid = 0;
3102 + goto out;
3103 + }
3104 + /*
3105 + * current unionfs_lookup_backend() doesn't return
3106 + * a valid dentry
3107 + */
3108 + dput(dentry);
3109 + dentry = result;
3110 + }
3111 +
3112 + if (positive && UNIONFS_I(dentry->d_inode)->stale) {
3113 + make_bad_inode(dentry->d_inode);
3114 + d_drop(dentry);
3115 + valid = 0;
3116 + goto out;
3117 + }
3118 + goto out;
3119 + }
3120 +
3121 + /* The revalidation must occur across all branches */
3122 + bstart = dbstart(dentry);
3123 + bend = dbend(dentry);
3124 + BUG_ON(bstart == -1);
3125 + for (bindex = bstart; bindex <= bend; bindex++) {
3126 + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3127 + if (!lower_dentry || !lower_dentry->d_op
3128 + || !lower_dentry->d_op->d_revalidate)
3129 + continue;
3130 + if (!lower_dentry->d_op->d_revalidate(lower_dentry,
3131 + &lowernd))
3132 + valid = 0;
3133 + }
3134 +
3135 + if (!dentry->d_inode)
3136 + valid = 0;
3137 +
3138 + if (valid) {
3139 + /*
3140 + * If we get here, and we copy the meta-data from the lower
3141 + * inode to our inode, then it is vital that we have already
3142 + * purged all unionfs-level file data. We do that in the
3143 + * caller (__unionfs_d_revalidate_chain) by calling
3144 + * purge_inode_data.
3145 + */
3146 + unionfs_copy_attr_all(dentry->d_inode,
3147 + unionfs_lower_inode(dentry->d_inode));
3148 + fsstack_copy_inode_size(dentry->d_inode,
3149 + unionfs_lower_inode(dentry->d_inode));
3150 + }
3151 +
3152 +out:
3153 + return valid;
3154 +}
3155 +
3156 +/*
3157 + * Determine if the lower inode objects have changed from below the unionfs
3158 + * inode. Return 1 if changed, 0 otherwise.
3159 + */
3160 +int is_newer_lower(const struct dentry *dentry)
3161 +{
3162 + int bindex;
3163 + struct inode *inode;
3164 + struct inode *lower_inode;
3165 +
3166 + /* ignore if we're called on semi-initialized dentries/inodes */
3167 + if (!dentry || !UNIONFS_D(dentry))
3168 + return 0;
3169 + inode = dentry->d_inode;
3170 + if (!inode || !UNIONFS_I(inode) ||
3171 + ibstart(inode) < 0 || ibend(inode) < 0)
3172 + return 0;
3173 +
3174 + for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
3175 + lower_inode = unionfs_lower_inode_idx(inode, bindex);
3176 + if (!lower_inode)
3177 + continue;
3178 + /*
3179 + * We may want to apply other tests to determine if the
3180 + * lower inode's data has changed, but checking for changed
3181 + * ctime and mtime on the lower inode should be enough.
3182 + */
3183 + if (timespec_compare(&inode->i_mtime,
3184 + &lower_inode->i_mtime) < 0) {
3185 + printk("unionfs: new lower inode mtime "
3186 + "(bindex=%d, name=%s)\n", bindex,
3187 + dentry->d_name.name);
3188 + show_dinode_times(dentry);
3189 + return 1; /* mtime changed! */
3190 + }
3191 + if (timespec_compare(&inode->i_ctime,
3192 + &lower_inode->i_ctime) < 0) {
3193 + printk("unionfs: new lower inode ctime "
3194 + "(bindex=%d, name=%s)\n", bindex,
3195 + dentry->d_name.name);
3196 + show_dinode_times(dentry);
3197 + return 1; /* ctime changed! */
3198 + }
3199 + }
3200 + return 0; /* default: lower is not newer */
3201 +}
3202 +
3203 +/*
3204 + * Purge/remove/unmap all date pages of a unionfs inode. This is called
3205 + * when the lower inode has changed, and we have to force processes to get
3206 + * the new data.
3207 + *
3208 + * XXX: Our implementation works in that as long as a user process will have
3209 + * caused Unionfs to be called, directly or indirectly, even to just do
3210 + * ->d_revalidate; then we will have purged the current Unionfs data and the
3211 + * process will see the new data. For example, a process that continually
3212 + * re-reads the same file's data will see the NEW data as soon as the lower
3213 + * file had changed, upon the next read(2) syscall (even if the file is
3214 + * still open!) However, this doesn't work when the process re-reads the
3215 + * open file's data via mmap(2) (unless the user unmaps/closes the file and
3216 + * remaps/reopens it). Once we respond to ->readpage(s), then the kernel
3217 + * maps the page into the process's address space and there doesn't appear
3218 + * to be a way to force the kernel to invalidate those pages/mappings, and
3219 + * force the process to re-issue ->readpage. If there's a way to invalidate
3220 + * active mappings and force a ->readpage, let us know please
3221 + * (invalidate_inode_pages2 doesn't do the trick).
3222 + */
3223 +static inline void purge_inode_data(struct dentry *dentry)
3224 +{
3225 + /* remove all non-private mappings */
3226 + unmap_mapping_range(dentry->d_inode->i_mapping, 0, 0, 0);
3227 +
3228 + if (dentry->d_inode->i_data.nrpages)
3229 + truncate_inode_pages(&dentry->d_inode->i_data, 0);
3230 +}
3231 +
3232 +/*
3233 + * Revalidate a parent chain of dentries, then the actual node.
3234 + * Assumes that dentry is locked, but will lock all parents if/when needed.
3235 + *
3236 + * If 'willwrite' is 1, and the lower inode times are not in sync, then
3237 + * *don't* purge_inode_data, as it could deadlock if ->write calls us and we
3238 + * try to truncate a locked page. Besides, if unionfs is about to write
3239 + * data to a file, then there's the data unionfs is about to write is more
3240 + * authoritative than what's below, therefore we can safely overwrite the
3241 + * lower inode times and data.
3242 + */
3243 +int __unionfs_d_revalidate_chain(struct dentry *dentry, struct nameidata *nd,
3244 + int willwrite)
3245 +{
3246 + int valid = 0; /* default is invalid (0); valid is 1. */
3247 + struct dentry **chain = NULL; /* chain of dentries to reval */
3248 + int chain_len = 0;
3249 + struct dentry *dtmp;
3250 + int sbgen, dgen, i;
3251 + int saved_bstart, saved_bend, bindex;
3252 +
3253 + /* find length of chain needed to revalidate */
3254 + /* XXX: should I grab some global (dcache?) lock? */
3255 + chain_len = 0;
3256 + sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
3257 + dtmp = dentry->d_parent;
3258 + dgen = atomic_read(&UNIONFS_D(dtmp)->generation);
3259 + /* XXX: should we check if is_newer_lower all the way up? */
3260 + if (is_newer_lower(dtmp)) {
3261 + /*
3262 + * Special case: the root dentry's generation number must
3263 + * always be valid, but its lower inode times don't have to
3264 + * be, so sync up the times only.
3265 + */
3266 + if (IS_ROOT(dtmp))
3267 + unionfs_copy_attr_times(dtmp->d_inode);
3268 + else {
3269 + /*
3270 + * reset generation number to zero, guaranteed to be
3271 + * "old"
3272 + */
3273 + dgen = 0;
3274 + atomic_set(&UNIONFS_D(dtmp)->generation, dgen);
3275 + }
3276 + purge_inode_data(dtmp);
3277 + }
3278 + while (sbgen != dgen) {
3279 + /* The root entry should always be valid */
3280 + BUG_ON(IS_ROOT(dtmp));
3281 + chain_len++;
3282 + dtmp = dtmp->d_parent;
3283 + dgen = atomic_read(&UNIONFS_D(dtmp)->generation);
3284 + }
3285 + if (chain_len == 0)
3286 + goto out_this; /* shortcut if parents are OK */
3287 +
3288 + /*
3289 + * Allocate array of dentries to reval. We could use linked lists,
3290 + * but the number of entries we need to alloc here is often small,
3291 + * and short lived, so locality will be better.
3292 + */
3293 + chain = kzalloc(chain_len * sizeof(struct dentry *), GFP_KERNEL);
3294 + if (!chain) {
3295 + printk("unionfs: no more memory in %s\n", __FUNCTION__);
3296 + goto out;
3297 + }
3298 +
3299 + /*
3300 + * lock all dentries in chain, in child to parent order.
3301 + * if failed, then sleep for a little, then retry.
3302 + */
3303 + dtmp = dentry->d_parent;
3304 + for (i=chain_len-1; i>=0; i--) {
3305 + chain[i] = dget(dtmp);
3306 + dtmp = dtmp->d_parent;
3307 + }
3308 +
3309 + /*
3310 + * call __unionfs_d_revalidate_one() on each dentry, but in parent
3311 + * to child order.
3312 + */
3313 + for (i=0; i<chain_len; i++) {
3314 + unionfs_lock_dentry(chain[i]);
3315 + saved_bstart = dbstart(chain[i]);
3316 + saved_bend = dbend(chain[i]);
3317 + sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
3318 + dgen = atomic_read(&UNIONFS_D(chain[i])->generation);
3319 +
3320 + valid = __unionfs_d_revalidate_one(chain[i], nd);
3321 + /* XXX: is this the correct mntput condition?! */
3322 + if (valid && chain_len > 0 &&
3323 + sbgen != dgen && chain[i]->d_inode &&
3324 + S_ISDIR(chain[i]->d_inode->i_mode)) {
3325 + for (bindex = saved_bstart; bindex <= saved_bend;
3326 + bindex++)
3327 + unionfs_mntput(chain[i], bindex);
3328 + }
3329 + unionfs_unlock_dentry(chain[i]);
3330 +
3331 + if (!valid)
3332 + goto out_free;
3333 + }
3334 +
3335 +
3336 +out_this:
3337 + /* finally, lock this dentry and revalidate it */
3338 + verify_locked(dentry);
3339 + dgen = atomic_read(&UNIONFS_D(dentry)->generation);
3340 + if (is_newer_lower(dentry)) {
3341 + /* root dentry special case as aforementioned */
3342 + if (IS_ROOT(dentry))
3343 + unionfs_copy_attr_times(dentry->d_inode);
3344 + else {
3345 + /*
3346 + * reset generation number to zero, guaranteed to be
3347 + * "old"
3348 + */
3349 + dgen = 0;
3350 + atomic_set(&UNIONFS_D(dentry)->generation, dgen);
3351 + }
3352 + if (!willwrite)
3353 + purge_inode_data(dentry);
3354 + }
3355 + valid = __unionfs_d_revalidate_one(dentry, nd);
3356 +
3357 + /*
3358 + * If __unionfs_d_revalidate_one() succeeded above, then it will
3359 + * have incremented the refcnt of the mnt's, but also the branch
3360 + * indices of the dentry will have been updated (to take into
3361 + * account any branch insertions/deletion. So the current
3362 + * dbstart/dbend match the current, and new, indices of the mnts
3363 + * which __unionfs_d_revalidate_one has incremented. Note: the "if"
3364 + * test below does not depend on whether chain_len was 0 or greater.
3365 + */
3366 + if (valid && sbgen != dgen)
3367 + for (bindex = dbstart(dentry);
3368 + bindex <= dbend(dentry);
3369 + bindex++)
3370 + unionfs_mntput(dentry, bindex);
3371 +
3372 +out_free:
3373 + /* unlock/dput all dentries in chain and return status */
3374 + if (chain_len > 0) {
3375 + for (i=0; i<chain_len; i++)
3376 + dput(chain[i]);
3377 + kfree(chain);
3378 + }
3379 +out:
3380 + return valid;
3381 +}
3382 +
3383 +static int unionfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
3384 +{
3385 + int err;
3386 +
3387 + unionfs_read_lock(dentry->d_sb);
3388 +
3389 + unionfs_lock_dentry(dentry);
3390 + err = __unionfs_d_revalidate_chain(dentry, nd, 0);
3391 + unionfs_unlock_dentry(dentry);
3392 + unionfs_check_dentry(dentry);
3393 +
3394 + unionfs_read_unlock(dentry->d_sb);
3395 +
3396 + return err;
3397 +}
3398 +
3399 +/*
3400 + * At this point no one can reference this dentry, so we don't have to be
3401 + * careful about concurrent access.
3402 + */
3403 +static void unionfs_d_release(struct dentry *dentry)
3404 +{
3405 + int bindex, bstart, bend;
3406 +
3407 + unionfs_read_lock(dentry->d_sb);
3408 +
3409 + unionfs_check_dentry(dentry);
3410 + /* this could be a negative dentry, so check first */
3411 + if (!UNIONFS_D(dentry)) {
3412 + printk(KERN_DEBUG "unionfs: dentry without private data: %.*s",
3413 + dentry->d_name.len, dentry->d_name.name);
3414 + goto out;
3415 + } else if (dbstart(dentry) < 0) {
3416 + /* this is due to a failed lookup */
3417 + printk(KERN_DEBUG "unionfs: dentry without lower "
3418 + "dentries: %.*s",
3419 + dentry->d_name.len, dentry->d_name.name);
3420 + goto out_free;
3421 + }
3422 +
3423 + /* Release all the lower dentries */
3424 + bstart = dbstart(dentry);
3425 + bend = dbend(dentry);
3426 + for (bindex = bstart; bindex <= bend; bindex++) {
3427 + dput(unionfs_lower_dentry_idx(dentry, bindex));
3428 + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
3429 + /* NULL lower mnt is ok if this is a negative dentry */
3430 + if (!dentry->d_inode && !unionfs_lower_mnt_idx(dentry,bindex))
3431 + continue;
3432 + unionfs_mntput(dentry, bindex);
3433 + unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
3434 + }
3435 + /* free private data (unionfs_dentry_info) here */
3436 + kfree(UNIONFS_D(dentry)->lower_paths);
3437 + UNIONFS_D(dentry)->lower_paths = NULL;
3438 +
3439 +out_free:
3440 + /* No need to unlock it, because it is disappeared. */
3441 + free_dentry_private_data(dentry);
3442 +
3443 +out:
3444 + unionfs_read_unlock(dentry->d_sb);
3445 + return;
3446 +}
3447 +
3448 +struct dentry_operations unionfs_dops = {
3449 + .d_revalidate = unionfs_d_revalidate,
3450 + .d_release = unionfs_d_release,
3451 +};
3452 diff --git a/fs/unionfs/dirfops.c b/fs/unionfs/dirfops.c
3453 new file mode 100644
3454 index 0000000..980f125
3455 --- /dev/null
3456 +++ b/fs/unionfs/dirfops.c
3457 @@ -0,0 +1,278 @@
3458 +/*
3459 + * Copyright (c) 2003-2007 Erez Zadok
3460 + * Copyright (c) 2003-2006 Charles P. Wright
3461 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
3462 + * Copyright (c) 2005-2006 Junjiro Okajima
3463 + * Copyright (c) 2005 Arun M. Krishnakumar
3464 + * Copyright (c) 2004-2006 David P. Quigley
3465 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
3466 + * Copyright (c) 2003 Puja Gupta
3467 + * Copyright (c) 2003 Harikesavan Krishnan
3468 + * Copyright (c) 2003-2007 Stony Brook University
3469 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
3470 + *
3471 + * This program is free software; you can redistribute it and/or modify
3472 + * it under the terms of the GNU General Public License version 2 as
3473 + * published by the Free Software Foundation.
3474 + */
3475 +
3476 +#include "union.h"
3477 +
3478 +/* Make sure our rdstate is playing by the rules. */
3479 +static void verify_rdstate_offset(struct unionfs_dir_state *rdstate)
3480 +{
3481 + BUG_ON(rdstate->offset >= DIREOF);
3482 + BUG_ON(rdstate->cookie >= MAXRDCOOKIE);
3483 +}
3484 +
3485 +struct unionfs_getdents_callback {
3486 + struct unionfs_dir_state *rdstate;
3487 + void *dirent;
3488 + int entries_written;
3489 + int filldir_called;
3490 + int filldir_error;
3491 + filldir_t filldir;
3492 + struct super_block *sb;
3493 +};
3494 +
3495 +/* based on generic filldir in fs/readir.c */
3496 +static int unionfs_filldir(void *dirent, const char *name, int namelen,
3497 + loff_t offset, u64 ino, unsigned int d_type)
3498 +{
3499 + struct unionfs_getdents_callback *buf = dirent;
3500 + struct filldir_node *found = NULL;
3501 + int err = 0;
3502 + int is_wh_entry = 0;
3503 +
3504 + buf->filldir_called++;
3505 +
3506 + if ((namelen > UNIONFS_WHLEN) &&
3507 + !strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN)) {
3508 + name += UNIONFS_WHLEN;
3509 + namelen -= UNIONFS_WHLEN;
3510 + is_wh_entry = 1;
3511 + }
3512 +
3513 + found = find_filldir_node(buf->rdstate, name, namelen);
3514 +
3515 + if (found)
3516 + goto out;
3517 +
3518 + /* if 'name' isn't a whiteout, filldir it. */
3519 + if (!is_wh_entry) {
3520 + off_t pos = rdstate2offset(buf->rdstate);
3521 + u64 unionfs_ino = ino;
3522 +
3523 + if (!err) {
3524 + err = buf->filldir(buf->dirent, name, namelen, pos,
3525 + unionfs_ino, d_type);
3526 + buf->rdstate->offset++;
3527 + verify_rdstate_offset(buf->rdstate);
3528 + }
3529 + }
3530 + /*
3531 + * If we did fill it, stuff it in our hash, otherwise return an
3532 + * error.
3533 + */
3534 + if (err) {
3535 + buf->filldir_error = err;
3536 + goto out;
3537 + }
3538 + buf->entries_written++;
3539 + if ((err = add_filldir_node(buf->rdstate, name, namelen,
3540 + buf->rdstate->bindex, is_wh_entry)))
3541 + buf->filldir_error = err;
3542 +
3543 +out:
3544 + return err;
3545 +}
3546 +
3547 +static int unionfs_readdir(struct file *file, void *dirent, filldir_t filldir)
3548 +{
3549 + int err = 0;
3550 + struct file *lower_file = NULL;
3551 + struct inode *inode = NULL;
3552 + struct unionfs_getdents_callback buf;
3553 + struct unionfs_dir_state *uds;
3554 + int bend;
3555 + loff_t offset;
3556 +
3557 + unionfs_read_lock(file->f_path.dentry->d_sb);
3558 +
3559 + if ((err = unionfs_file_revalidate(file, 0)))
3560 + goto out;
3561 +
3562 + inode = file->f_path.dentry->d_inode;
3563 +
3564 + uds = UNIONFS_F(file)->rdstate;
3565 + if (!uds) {
3566 + if (file->f_pos == DIREOF) {
3567 + goto out;
3568 + } else if (file->f_pos > 0) {
3569 + uds = find_rdstate(inode, file->f_pos);
3570 + if (!uds) {
3571 + err = -ESTALE;
3572 + goto out;
3573 + }
3574 + UNIONFS_F(file)->rdstate = uds;
3575 + } else {
3576 + init_rdstate(file);
3577 + uds = UNIONFS_F(file)->rdstate;
3578 + }
3579 + }
3580 + bend = fbend(file);
3581 +
3582 + while (uds->bindex <= bend) {
3583 + lower_file = unionfs_lower_file_idx(file, uds->bindex);
3584 + if (!lower_file) {
3585 + uds->bindex++;
3586 + uds->dirpos = 0;
3587 + continue;
3588 + }
3589 +
3590 + /* prepare callback buffer */
3591 + buf.filldir_called = 0;
3592 + buf.filldir_error = 0;
3593 + buf.entries_written = 0;
3594 + buf.dirent = dirent;
3595 + buf.filldir = filldir;
3596 + buf.rdstate = uds;
3597 + buf.sb = inode->i_sb;
3598 +
3599 + /* Read starting from where we last left off. */
3600 + offset = vfs_llseek(lower_file, uds->dirpos, SEEK_SET);
3601 + if (offset < 0) {
3602 + err = offset;
3603 + goto out;
3604 + }
3605 + err = vfs_readdir(lower_file, unionfs_filldir, &buf);
3606 +
3607 + /* Save the position for when we continue. */
3608 + offset = vfs_llseek(lower_file, 0, SEEK_CUR);
3609 + if (offset < 0) {
3610 + err = offset;
3611 + goto out;
3612 + }
3613 + uds->dirpos = offset;
3614 +
3615 + /* Copy the atime. */
3616 + fsstack_copy_attr_atime(inode, lower_file->f_path.dentry->d_inode);
3617 +
3618 + if (err < 0)
3619 + goto out;
3620 +
3621 + if (buf.filldir_error)
3622 + break;
3623 +
3624 + if (!buf.entries_written) {
3625 + uds->bindex++;
3626 + uds->dirpos = 0;
3627 + }
3628 + }
3629 +
3630 + if (!buf.filldir_error && uds->bindex >= bend) {
3631 + /* Save the number of hash entries for next time. */
3632 + UNIONFS_I(inode)->hashsize = uds->hashentries;
3633 + free_rdstate(uds);
3634 + UNIONFS_F(file)->rdstate = NULL;
3635 + file->f_pos = DIREOF;
3636 + } else
3637 + file->f_pos = rdstate2offset(uds);
3638 +
3639 +out:
3640 + unionfs_read_unlock(file->f_path.dentry->d_sb);
3641 + return err;
3642 +}
3643 +
3644 +/*
3645 + * This is not meant to be a generic repositioning function. If you do
3646 + * things that aren't supported, then we return EINVAL.
3647 + *
3648 + * What is allowed:
3649 + * (1) seeking to the same position that you are currently at
3650 + * This really has no effect, but returns where you are.
3651 + * (2) seeking to the beginning of the file
3652 + * This throws out all state, and lets you begin again.
3653 + */
3654 +static loff_t unionfs_dir_llseek(struct file *file, loff_t offset, int origin)
3655 +{
3656 + struct unionfs_dir_state *rdstate;
3657 + loff_t err;
3658 +
3659 + unionfs_read_lock(file->f_path.dentry->d_sb);
3660 +
3661 + if ((err = unionfs_file_revalidate(file, 0)))
3662 + goto out;
3663 +
3664 + rdstate = UNIONFS_F(file)->rdstate;
3665 +
3666 + /*
3667 + * we let users seek to their current position, but not anywhere
3668 + * else.
3669 + */
3670 + if (!offset) {
3671 + switch (origin) {
3672 + case SEEK_SET:
3673 + if (rdstate) {
3674 + free_rdstate(rdstate);
3675 + UNIONFS_F(file)->rdstate = NULL;
3676 + }
3677 + init_rdstate(file);
3678 + err = 0;
3679 + break;
3680 + case SEEK_CUR:
3681 + err = file->f_pos;
3682 + break;
3683 + case SEEK_END:
3684 + /* Unsupported, because we would break everything. */
3685 + err = -EINVAL;
3686 + break;
3687 + }
3688 + } else {
3689 + switch (origin) {
3690 + case SEEK_SET:
3691 + if (rdstate) {
3692 + if (offset == rdstate2offset(rdstate))
3693 + err = offset;
3694 + else if (file->f_pos == DIREOF)
3695 + err = DIREOF;
3696 + else
3697 + err = -EINVAL;
3698 + } else {
3699 + rdstate = find_rdstate(file->f_path.dentry->d_inode,
3700 + offset);
3701 + if (rdstate) {
3702 + UNIONFS_F(file)->rdstate = rdstate;
3703 + err = rdstate->offset;
3704 + } else
3705 + err = -EINVAL;
3706 + }
3707 + break;
3708 + case SEEK_CUR:
3709 + case SEEK_END:
3710 + /* Unsupported, because we would break everything. */
3711 + err = -EINVAL;
3712 + break;
3713 + }
3714 + }
3715 +
3716 +out:
3717 + unionfs_read_unlock(file->f_path.dentry->d_sb);
3718 + return err;
3719 +}
3720 +
3721 +/*
3722 + * Trimmed directory options, we shouldn't pass everything down since
3723 + * we don't want to operate on partial directories.
3724 + */
3725 +struct file_operations unionfs_dir_fops = {
3726 + .llseek = unionfs_dir_llseek,
3727 + .read = generic_read_dir,
3728 + .readdir = unionfs_readdir,
3729 + .unlocked_ioctl = unionfs_ioctl,
3730 + .open = unionfs_open,
3731 + .release = unionfs_file_release,
3732 + .flush = unionfs_flush,
3733 + .fsync = unionfs_fsync,
3734 + .fasync = unionfs_fasync,
3735 +};
3736 diff --git a/fs/unionfs/dirhelper.c b/fs/unionfs/dirhelper.c
3737 new file mode 100644
3738 index 0000000..a72f711
3739 --- /dev/null
3740 +++ b/fs/unionfs/dirhelper.c
3741 @@ -0,0 +1,271 @@
3742 +/*
3743 + * Copyright (c) 2003-2007 Erez Zadok
3744 + * Copyright (c) 2003-2006 Charles P. Wright
3745 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
3746 + * Copyright (c) 2005-2006 Junjiro Okajima
3747 + * Copyright (c) 2005 Arun M. Krishnakumar
3748 + * Copyright (c) 2004-2006 David P. Quigley
3749 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
3750 + * Copyright (c) 2003 Puja Gupta
3751 + * Copyright (c) 2003 Harikesavan Krishnan
3752 + * Copyright (c) 2003-2007 Stony Brook University
3753 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
3754 + *
3755 + * This program is free software; you can redistribute it and/or modify
3756 + * it under the terms of the GNU General Public License version 2 as
3757 + * published by the Free Software Foundation.
3758 + */
3759 +
3760 +#include "union.h"
3761 +
3762 +/*
3763 + * Delete all of the whiteouts in a given directory for rmdir.
3764 + *
3765 + * lower directory inode should be locked
3766 + */
3767 +int do_delete_whiteouts(struct dentry *dentry, int bindex,
3768 + struct unionfs_dir_state *namelist)
3769 +{
3770 + int err = 0;
3771 + struct dentry *lower_dir_dentry = NULL;
3772 + struct dentry *lower_dentry;
3773 + char *name = NULL, *p;
3774 + struct inode *lower_dir;
3775 + int i;
3776 + struct list_head *pos;
3777 + struct filldir_node *cursor;
3778 +
3779 + /* Find out lower parent dentry */
3780 + lower_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3781 + BUG_ON(!S_ISDIR(lower_dir_dentry->d_inode->i_mode));
3782 + lower_dir = lower_dir_dentry->d_inode;
3783 + BUG_ON(!S_ISDIR(lower_dir->i_mode));
3784 +
3785 + err = -ENOMEM;
3786 + name = __getname();
3787 + if (!name)
3788 + goto out;
3789 + strcpy(name, UNIONFS_WHPFX);
3790 + p = name + UNIONFS_WHLEN;
3791 +
3792 + err = 0;
3793 + for (i = 0; !err && i < namelist->size; i++) {
3794 + list_for_each(pos, &namelist->list[i]) {
3795 + cursor =
3796 + list_entry(pos, struct filldir_node,
3797 + file_list);
3798 + /* Only operate on whiteouts in this branch. */
3799 + if (cursor->bindex != bindex)
3800 + continue;
3801 + if (!cursor->whiteout)
3802 + continue;
3803 +
3804 + strcpy(p, cursor->name);
3805 + lower_dentry =
3806 + lookup_one_len(name, lower_dir_dentry,
3807 + cursor->namelen +
3808 + UNIONFS_WHLEN);
3809 + if (IS_ERR(lower_dentry)) {
3810 + err = PTR_ERR(lower_dentry);
3811 + break;
3812 + }
3813 + if (lower_dentry->d_inode)
3814 + err = vfs_unlink(lower_dir, lower_dentry);
3815 + dput(lower_dentry);
3816 + if (err)
3817 + break;
3818 + }
3819 + }
3820 +
3821 + __putname(name);
3822 +
3823 + /* After all of the removals, we should copy the attributes once. */
3824 + fsstack_copy_attr_times(dentry->d_inode, lower_dir_dentry->d_inode);
3825 +
3826 +out:
3827 + return err;
3828 +}
3829 +
3830 +/* delete whiteouts in a dir (for rmdir operation) using sioq if necessary */
3831 +int delete_whiteouts(struct dentry *dentry, int bindex,
3832 + struct unionfs_dir_state *namelist)
3833 +{
3834 + int err;
3835 + struct super_block *sb;
3836 + struct dentry *lower_dir_dentry;
3837 + struct inode *lower_dir;
3838 + struct sioq_args args;
3839 +
3840 + sb = dentry->d_sb;
3841 +
3842 + BUG_ON(!S_ISDIR(dentry->d_inode->i_mode));
3843 + BUG_ON(bindex < dbstart(dentry));
3844 + BUG_ON(bindex > dbend(dentry));
3845 + err = is_robranch_super(sb, bindex);
3846 + if (err)
3847 + goto out;
3848 +
3849 + lower_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3850 + BUG_ON(!S_ISDIR(lower_dir_dentry->d_inode->i_mode));
3851 + lower_dir = lower_dir_dentry->d_inode;
3852 + BUG_ON(!S_ISDIR(lower_dir->i_mode));
3853 +
3854 + mutex_lock(&lower_dir->i_mutex);
3855 + if (!permission(lower_dir, MAY_WRITE | MAY_EXEC, NULL))
3856 + err = do_delete_whiteouts(dentry, bindex, namelist);
3857 + else {
3858 + args.deletewh.namelist = namelist;
3859 + args.deletewh.dentry = dentry;
3860 + args.deletewh.bindex = bindex;
3861 + run_sioq(__delete_whiteouts, &args);
3862 + err = args.err;
3863 + }
3864 + mutex_unlock(&lower_dir->i_mutex);
3865 +
3866 +out:
3867 + return err;
3868 +}
3869 +
3870 +#define RD_NONE 0
3871 +#define RD_CHECK_EMPTY 1
3872 +/* The callback structure for check_empty. */
3873 +struct unionfs_rdutil_callback {
3874 + int err;
3875 + int filldir_called;
3876 + struct unionfs_dir_state *rdstate;
3877 + int mode;
3878 +};
3879 +
3880 +/* This filldir function makes sure only whiteouts exist within a directory. */
3881 +static int readdir_util_callback(void *dirent, const char *name, int namelen,
3882 + loff_t offset, u64 ino, unsigned int d_type)
3883 +{
3884 + int err = 0;
3885 + struct unionfs_rdutil_callback *buf = dirent;
3886 + int whiteout = 0;
3887 + struct filldir_node *found;
3888 +
3889 + buf->filldir_called = 1;
3890 +
3891 + if (name[0] == '.' && (namelen == 1 ||
3892 + (name[1] == '.' && namelen == 2)))
3893 + goto out;
3894 +
3895 + if (namelen > UNIONFS_WHLEN &&
3896 + !strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN)) {
3897 + namelen -= UNIONFS_WHLEN;
3898 + name += UNIONFS_WHLEN;
3899 + whiteout = 1;
3900 + }
3901 +
3902 + found = find_filldir_node(buf->rdstate, name, namelen);
3903 + /* If it was found in the table there was a previous whiteout. */
3904 + if (found)
3905 + goto out;
3906 +
3907 + /*
3908 + * if it wasn't found and isn't a whiteout, the directory isn't
3909 + * empty.
3910 + */
3911 + err = -ENOTEMPTY;
3912 + if ((buf->mode == RD_CHECK_EMPTY) && !whiteout)
3913 + goto out;
3914 +
3915 + err = add_filldir_node(buf->rdstate, name, namelen,
3916 + buf->rdstate->bindex, whiteout);
3917 +
3918 +out:
3919 + buf->err = err;
3920 + return err;
3921 +}
3922 +
3923 +/* Is a directory logically empty? */
3924 +int check_empty(struct dentry *dentry, struct unionfs_dir_state **namelist)
3925 +{
3926 + int err = 0;
3927 + struct dentry *lower_dentry = NULL;
3928 + struct super_block *sb;
3929 + struct file *lower_file;
3930 + struct unionfs_rdutil_callback *buf = NULL;
3931 + int bindex, bstart, bend, bopaque;
3932 +
3933 + sb = dentry->d_sb;
3934 +
3935 +
3936 + BUG_ON(!S_ISDIR(dentry->d_inode->i_mode));
3937 +
3938 + if ((err = unionfs_partial_lookup(dentry)))
3939 + goto out;
3940 +
3941 + bstart = dbstart(dentry);
3942 + bend = dbend(dentry);
3943 + bopaque = dbopaque(dentry);
3944 + if (0 <= bopaque && bopaque < bend)
3945 + bend = bopaque;
3946 +
3947 + buf = kmalloc(sizeof(struct unionfs_rdutil_callback), GFP_KERNEL);
3948 + if (!buf) {
3949 + err = -ENOMEM;
3950 + goto out;
3951 + }
3952 + buf->err = 0;
3953 + buf->mode = RD_CHECK_EMPTY;
3954 + buf->rdstate = alloc_rdstate(dentry->d_inode, bstart);
3955 + if (!buf->rdstate) {
3956 + err = -ENOMEM;
3957 + goto out;
3958 + }
3959 +
3960 + /* Process the lower directories with rdutil_callback as a filldir. */
3961 + for (bindex = bstart; bindex <= bend; bindex++) {
3962 + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3963 + if (!lower_dentry)
3964 + continue;
3965 + if (!lower_dentry->d_inode)
3966 + continue;
3967 + if (!S_ISDIR(lower_dentry->d_inode->i_mode))
3968 + continue;
3969 +
3970 + dget(lower_dentry);
3971 + unionfs_mntget(dentry, bindex);
3972 + branchget(sb, bindex);
3973 + lower_file =
3974 + dentry_open(lower_dentry,
3975 + unionfs_lower_mnt_idx(dentry, bindex),
3976 + O_RDONLY);
3977 + if (IS_ERR(lower_file)) {
3978 + err = PTR_ERR(lower_file);
3979 + dput(lower_dentry);
3980 + branchput(sb, bindex);
3981 + goto out;
3982 + }
3983 +
3984 + do {
3985 + buf->filldir_called = 0;
3986 + buf->rdstate->bindex = bindex;
3987 + err = vfs_readdir(lower_file,
3988 + readdir_util_callback, buf);
3989 + if (buf->err)
3990 + err = buf->err;
3991 + } while ((err >= 0) && buf->filldir_called);
3992 +
3993 + /* fput calls dput for lower_dentry */
3994 + fput(lower_file);
3995 + branchput(sb, bindex);
3996 +
3997 + if (err < 0)
3998 + goto out;
3999 + }
4000 +
4001 +out:
4002 + if (buf) {
4003 + if (namelist && !err)
4004 + *namelist = buf->rdstate;
4005 + else if (buf->rdstate)
4006 + free_rdstate(buf->rdstate);
4007 + kfree(buf);
4008 + }
4009 +
4010 +
4011 + return err;
4012 +}
4013 diff --git a/fs/unionfs/fanout.h b/fs/unionfs/fanout.h
4014 new file mode 100644
4015 index 0000000..e7407f0
4016 --- /dev/null
4017 +++ b/fs/unionfs/fanout.h
4018 @@ -0,0 +1,318 @@
4019 +/*
4020 + * Copyright (c) 2003-2007 Erez Zadok
4021 + * Copyright (c) 2003-2006 Charles P. Wright
4022 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4023 + * Copyright (c) 2005 Arun M. Krishnakumar
4024 + * Copyright (c) 2004-2006 David P. Quigley
4025 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4026 + * Copyright (c) 2003 Puja Gupta
4027 + * Copyright (c) 2003 Harikesavan Krishnan
4028 + * Copyright (c) 2003-2007 Stony Brook University
4029 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
4030 + *
4031 + * This program is free software; you can redistribute it and/or modify
4032 + * it under the terms of the GNU General Public License version 2 as
4033 + * published by the Free Software Foundation.
4034 + */
4035 +
4036 +#ifndef _FANOUT_H_
4037 +#define _FANOUT_H_
4038 +
4039 +/*
4040 + * Inode to private data
4041 + *
4042 + * Since we use containers and the struct inode is _inside_ the
4043 + * unionfs_inode_info structure, UNIONFS_I will always (given a non-NULL
4044 + * inode pointer), return a valid non-NULL pointer.
4045 + */
4046 +static inline struct unionfs_inode_info *UNIONFS_I(const struct inode *inode)
4047 +{
4048 + return container_of(inode, struct unionfs_inode_info, vfs_inode);
4049 +}
4050 +
4051 +#define ibstart(ino) (UNIONFS_I(ino)->bstart)
4052 +#define ibend(ino) (UNIONFS_I(ino)->bend)
4053 +
4054 +/* Superblock to private data */
4055 +#define UNIONFS_SB(super) ((struct unionfs_sb_info *)(super)->s_fs_info)
4056 +#define sbstart(sb) 0
4057 +#define sbend(sb) (UNIONFS_SB(sb)->bend)
4058 +#define sbmax(sb) (UNIONFS_SB(sb)->bend + 1)
4059 +#define sbhbid(sb) (UNIONFS_SB(sb)->high_branch_id)
4060 +
4061 +/* File to private Data */
4062 +#define UNIONFS_F(file) ((struct unionfs_file_info *)((file)->private_data))
4063 +#define fbstart(file) (UNIONFS_F(file)->bstart)
4064 +#define fbend(file) (UNIONFS_F(file)->bend)
4065 +
4066 +/* macros to manipulate branch IDs in stored in our superblock */
4067 +static inline int branch_id(struct super_block *sb, int index)
4068 +{
4069 + return UNIONFS_SB(sb)->data[index].branch_id;
4070 +}
4071 +
4072 +static inline void set_branch_id(struct super_block *sb, int index, int val)
4073 +{
4074 + UNIONFS_SB(sb)->data[index].branch_id = val;
4075 +}
4076 +
4077 +static inline void new_branch_id(struct super_block *sb, int index)
4078 +{
4079 + set_branch_id(sb, index, ++UNIONFS_SB(sb)->high_branch_id);
4080 +}
4081 +
4082 +/*
4083 + * Find new index of matching branch with an existing superblock of a known
4084 + * (possibly old) id. This is needed because branches could have been
4085 + * added/deleted causing the branches of any open files to shift.
4086 + *
4087 + * @sb: the new superblock which may have new/different branch IDs
4088 + * @id: the old/existing id we're looking for
4089 + * Returns index of newly found branch (0 or greater), -1 otherwise.
4090 + */
4091 +static inline int branch_id_to_idx(struct super_block *sb, int id)
4092 +{
4093 + int i;
4094 + for (i = 0; i < sbmax(sb); i++) {
4095 + if (branch_id(sb, i) == id)
4096 + return i;
4097 + }
4098 + /* in the non-ODF code, this should really never happen */
4099 + printk(KERN_WARNING "unionfs: cannot find branch with id %d\n", id);
4100 + return -1;
4101 +}
4102 +
4103 +/* File to lower file. */
4104 +static inline struct file *unionfs_lower_file(const struct file *f)
4105 +{
4106 + return UNIONFS_F(f)->lower_files[fbstart(f)];
4107 +}
4108 +
4109 +static inline struct file *unionfs_lower_file_idx(const struct file *f,
4110 + int index)
4111 +{
4112 + return UNIONFS_F(f)->lower_files[index];
4113 +}
4114 +
4115 +static inline void unionfs_set_lower_file_idx(struct file *f, int index,
4116 + struct file *val)
4117 +{
4118 + UNIONFS_F(f)->lower_files[index] = val;
4119 + /* save branch ID (may be redundant?) */
4120 + UNIONFS_F(f)->saved_branch_ids[index] =
4121 + branch_id((f)->f_dentry->d_sb, index);
4122 +}
4123 +
4124 +static inline void unionfs_set_lower_file(struct file *f, struct file *val)
4125 +{
4126 + unionfs_set_lower_file_idx((f), fbstart(f), (val));
4127 +}
4128 +
4129 +/* Inode to lower inode. */
4130 +static inline struct inode *unionfs_lower_inode(const struct inode *i)
4131 +{
4132 + return UNIONFS_I(i)->lower_inodes[ibstart(i)];
4133 +}
4134 +
4135 +static inline struct inode *unionfs_lower_inode_idx(const struct inode *i,
4136 + int index)
4137 +{
4138 + return UNIONFS_I(i)->lower_inodes[index];
4139 +}
4140 +
4141 +static inline void unionfs_set_lower_inode_idx(struct inode *i, int index,
4142 + struct inode *val)
4143 +{
4144 + UNIONFS_I(i)->lower_inodes[index] = val;
4145 +}
4146 +
4147 +static inline void unionfs_set_lower_inode(struct inode *i, struct inode *val)
4148 +{
4149 + UNIONFS_I(i)->lower_inodes[ibstart(i)] = val;
4150 +}
4151 +
4152 +/* Superblock to lower superblock. */
4153 +static inline struct super_block *unionfs_lower_super(
4154 + const struct super_block *sb)
4155 +{
4156 + return UNIONFS_SB(sb)->data[sbstart(sb)].sb;
4157 +}
4158 +
4159 +static inline struct super_block *unionfs_lower_super_idx(
4160 + const struct super_block *sb,
4161 + int index)
4162 +{
4163 + return UNIONFS_SB(sb)->data[index].sb;
4164 +}
4165 +
4166 +static inline void unionfs_set_lower_super_idx(struct super_block *sb,
4167 + int index,
4168 + struct super_block *val)
4169 +{
4170 + UNIONFS_SB(sb)->data[index].sb = val;
4171 +}
4172 +
4173 +static inline void unionfs_set_lower_super(struct super_block *sb,
4174 + struct super_block *val)
4175 +{
4176 + UNIONFS_SB(sb)->data[sbstart(sb)].sb = val;
4177 +}
4178 +
4179 +/* Branch count macros. */
4180 +static inline int branch_count(const struct super_block *sb, int index)
4181 +{
4182 + return atomic_read(&UNIONFS_SB(sb)->data[index].open_files);
4183 +}
4184 +
4185 +static inline void set_branch_count(struct super_block *sb, int index, int val)
4186 +{
4187 + atomic_set(&UNIONFS_SB(sb)->data[index].open_files, val);
4188 +}
4189 +
4190 +static inline void branchget(struct super_block *sb, int index)
4191 +{
4192 + atomic_inc(&UNIONFS_SB(sb)->data[index].open_files);
4193 +}
4194 +
4195 +static inline void branchput(struct super_block *sb, int index)
4196 +{
4197 + atomic_dec(&UNIONFS_SB(sb)->data[index].open_files);
4198 +}
4199 +
4200 +/* Dentry macros */
4201 +static inline struct unionfs_dentry_info *UNIONFS_D(const struct dentry *dent)
4202 +{
4203 + return dent->d_fsdata;
4204 +}
4205 +
4206 +static inline int dbstart(const struct dentry *dent)
4207 +{
4208 + return UNIONFS_D(dent)->bstart;
4209 +}
4210 +
4211 +static inline void set_dbstart(struct dentry *dent, int val)
4212 +{
4213 + UNIONFS_D(dent)->bstart = val;
4214 +}
4215 +
4216 +static inline int dbend(const struct dentry *dent)
4217 +{
4218 + return UNIONFS_D(dent)->bend;
4219 +}
4220 +
4221 +static inline void set_dbend(struct dentry *dent, int val)
4222 +{
4223 + UNIONFS_D(dent)->bend = val;
4224 +}
4225 +
4226 +static inline int dbopaque(const struct dentry *dent)
4227 +{
4228 + return UNIONFS_D(dent)->bopaque;
4229 +}
4230 +
4231 +static inline void set_dbopaque(struct dentry *dent, int val)
4232 +{
4233 + UNIONFS_D(dent)->bopaque = val;
4234 +}
4235 +
4236 +static inline void unionfs_set_lower_dentry_idx(struct dentry *dent, int index,
4237 + struct dentry *val)
4238 +{
4239 + UNIONFS_D(dent)->lower_paths[index].dentry = val;
4240 +}
4241 +
4242 +static inline struct dentry *unionfs_lower_dentry_idx(
4243 + const struct dentry *dent,
4244 + int index)
4245 +{
4246 + return UNIONFS_D(dent)->lower_paths[index].dentry;
4247 +}
4248 +
4249 +static inline struct dentry *unionfs_lower_dentry(const struct dentry *dent)
4250 +{
4251 + return unionfs_lower_dentry_idx(dent, dbstart(dent));
4252 +}
4253 +
4254 +static inline void unionfs_set_lower_mnt_idx(struct dentry *dent, int index,
4255 + struct vfsmount *mnt)
4256 +{
4257 + UNIONFS_D(dent)->lower_paths[index].mnt = mnt;
4258 +}
4259 +
4260 +static inline struct vfsmount *unionfs_lower_mnt_idx(
4261 + const struct dentry *dent,
4262 + int index)
4263 +{
4264 + return UNIONFS_D(dent)->lower_paths[index].mnt;
4265 +}
4266 +
4267 +static inline struct vfsmount *unionfs_lower_mnt(const struct dentry *dent)
4268 +{
4269 + return unionfs_lower_mnt_idx(dent, dbstart(dent));
4270 +}
4271 +
4272 +/* Macros for locking a dentry. */
4273 +static inline void unionfs_lock_dentry(struct dentry *d)
4274 +{
4275 + mutex_lock(&UNIONFS_D(d)->lock);
4276 +}
4277 +
4278 +static inline void unionfs_unlock_dentry(struct dentry *d)
4279 +{
4280 + mutex_unlock(&UNIONFS_D(d)->lock);
4281 +}
4282 +
4283 +static inline void verify_locked(struct dentry *d)
4284 +{
4285 + BUG_ON(!mutex_is_locked(&UNIONFS_D(d)->lock));
4286 +}
4287 +
4288 +/* copy a/m/ctime from the lower branch with the newest times */
4289 +static inline void unionfs_copy_attr_times(struct inode *upper)
4290 +{
4291 + int bindex;
4292 + struct inode *lower;
4293 +
4294 + if (!upper)
4295 + return;
4296 + for (bindex=ibstart(upper); bindex <= ibend(upper); bindex++) {
4297 + lower = unionfs_lower_inode_idx(upper, bindex);
4298 + if (!lower)
4299 + continue; /* not all lower dir objects may exist */
4300 + if (timespec_compare(&upper->i_mtime, &lower->i_mtime) < 0)
4301 + upper->i_mtime = lower->i_mtime;
4302 + if (timespec_compare(&upper->i_ctime, &lower->i_ctime) < 0)
4303 + upper->i_ctime = lower->i_ctime;
4304 + if (timespec_compare(&upper->i_atime, &lower->i_atime) < 0)
4305 + upper->i_atime = lower->i_atime;
4306 + /* XXX: should we notify_change on our upper inode? */
4307 + }
4308 +}
4309 +
4310 +/*
4311 + * A unionfs/fanout version of fsstack_copy_attr_all. Uses a
4312 + * unionfs_get_nlinks to properly calcluate the number of links to a file.
4313 + * Also, copies the max() of all a/m/ctimes for all lower inodes (which is
4314 + * important if the lower inode is a directory type)
4315 + */
4316 +static inline void unionfs_copy_attr_all(struct inode *dest,
4317 + const struct inode *src)
4318 +{
4319 + dest->i_mode = src->i_mode;
4320 + dest->i_uid = src->i_uid;
4321 + dest->i_gid = src->i_gid;
4322 + dest->i_rdev = src->i_rdev;
4323 +
4324 + unionfs_copy_attr_times(dest);
4325 +
4326 + dest->i_blkbits = src->i_blkbits;
4327 + dest->i_flags = src->i_flags;
4328 +
4329 + /*
4330 + * Update the nlinks AFTER updating the above fields, because the
4331 + * get_links callback may depend on them.
4332 + */
4333 + dest->i_nlink = unionfs_get_nlinks(dest);
4334 +}
4335 +
4336 +#endif /* not _FANOUT_H */
4337 diff --git a/fs/unionfs/file.c b/fs/unionfs/file.c
4338 new file mode 100644
4339 index 0000000..3f6b2d0
4340 --- /dev/null
4341 +++ b/fs/unionfs/file.c
4342 @@ -0,0 +1,250 @@
4343 +/*
4344 + * Copyright (c) 2003-2007 Erez Zadok
4345 + * Copyright (c) 2003-2006 Charles P. Wright
4346 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4347 + * Copyright (c) 2005-2006 Junjiro Okajima
4348 + * Copyright (c) 2005 Arun M. Krishnakumar
4349 + * Copyright (c) 2004-2006 David P. Quigley
4350 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4351 + * Copyright (c) 2003 Puja Gupta
4352 + * Copyright (c) 2003 Harikesavan Krishnan
4353 + * Copyright (c) 2003-2007 Stony Brook University
4354 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
4355 + *
4356 + * This program is free software; you can redistribute it and/or modify
4357 + * it under the terms of the GNU General Public License version 2 as
4358 + * published by the Free Software Foundation.
4359 + */
4360 +
4361 +#include "union.h"
4362 +
4363 +static ssize_t unionfs_read(struct file *file, char __user *buf,
4364 + size_t count, loff_t *ppos)
4365 +{
4366 + int err;
4367 +
4368 + unionfs_read_lock(file->f_path.dentry->d_sb);
4369 + if ((err = unionfs_file_revalidate(file, 0)))
4370 + goto out;
4371 + unionfs_check_file(file);
4372 +
4373 + err = do_sync_read(file, buf, count, ppos);
4374 +
4375 + if (err >= 0)
4376 + touch_atime(unionfs_lower_mnt(file->f_path.dentry),
4377 + unionfs_lower_dentry(file->f_path.dentry));
4378 +
4379 +out:
4380 + unionfs_read_unlock(file->f_path.dentry->d_sb);
4381 + unionfs_check_file(file);
4382 + return err;
4383 +}
4384 +
4385 +static ssize_t unionfs_aio_read(struct kiocb *iocb, const struct iovec *iov,
4386 + unsigned long nr_segs, loff_t pos)
4387 +{
4388 + int err = 0;
4389 + struct file *file = iocb->ki_filp;
4390 +
4391 + unionfs_read_lock(file->f_path.dentry->d_sb);
4392 + if ((err = unionfs_file_revalidate(file, 0)))
4393 + goto out;
4394 + unionfs_check_file(file);
4395 +
4396 + err = generic_file_aio_read(iocb, iov, nr_segs, pos);
4397 +
4398 + if (err == -EIOCBQUEUED)
4399 + err = wait_on_sync_kiocb(iocb);
4400 +
4401 + if (err >= 0)
4402 + touch_atime(unionfs_lower_mnt(file->f_path.dentry),
4403 + unionfs_lower_dentry(file->f_path.dentry));
4404 +
4405 +out:
4406 + unionfs_read_unlock(file->f_path.dentry->d_sb);
4407 + unionfs_check_file(file);
4408 + return err;
4409 +}
4410 +
4411 +static ssize_t unionfs_write(struct file *file, const char __user *buf,
4412 + size_t count, loff_t *ppos)
4413 +{
4414 + int err = 0;
4415 +
4416 + unionfs_read_lock(file->f_path.dentry->d_sb);
4417 + if ((err = unionfs_file_revalidate(file, 1)))
4418 + goto out;
4419 + unionfs_check_file(file);
4420 +
4421 + err = do_sync_write(file, buf, count, ppos);
4422 + /* update our inode times upon a successful lower write */
4423 + if (err >= 0) {
4424 + unionfs_copy_attr_times(file->f_path.dentry->d_inode);
4425 + unionfs_check_file(file);
4426 + }
4427 +
4428 +out:
4429 + unionfs_read_unlock(file->f_path.dentry->d_sb);
4430 + return err;
4431 +}
4432 +
4433 +static int unionfs_file_readdir(struct file *file, void *dirent,
4434 + filldir_t filldir)
4435 +{
4436 + return -ENOTDIR;
4437 +}
4438 +
4439 +static int unionfs_mmap(struct file *file, struct vm_area_struct *vma)
4440 +{
4441 + int err = 0;
4442 + int willwrite;
4443 + struct file *lower_file;
4444 +
4445 + unionfs_read_lock(file->f_path.dentry->d_sb);
4446 +
4447 + /* This might be deferred to mmap's writepage */
4448 + willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags);
4449 + if ((err = unionfs_file_revalidate(file, willwrite)))
4450 + goto out;
4451 + unionfs_check_file(file);
4452 +
4453 + /*
4454 + * File systems which do not implement ->writepage may use
4455 + * generic_file_readonly_mmap as their ->mmap op. If you call
4456 + * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL.
4457 + * But we cannot call the lower ->mmap op, so we can't tell that
4458 + * writeable mappings won't work. Therefore, our only choice is to
4459 + * check if the lower file system supports the ->writepage, and if
4460 + * not, return EINVAL (the same error that
4461 + * generic_file_readonly_mmap returns in that case).
4462 + */
4463 + lower_file = unionfs_lower_file(file);
4464 + if (willwrite && !lower_file->f_mapping->a_ops->writepage) {
4465 + err = -EINVAL;
4466 + printk("unionfs: branch %d file system does not support "
4467 + "writeable mmap\n", fbstart(file));
4468 + } else {
4469 + err = generic_file_mmap(file, vma);
4470 + if (err)
4471 + printk("unionfs: generic_file_mmap failed %d\n", err);
4472 + }
4473 +
4474 +out:
4475 + unionfs_read_unlock(file->f_path.dentry->d_sb);
4476 + if (!err) {
4477 + /* copyup could cause parent dir times to change */
4478 + unionfs_copy_attr_times(file->f_path.dentry->d_parent->d_inode);
4479 + unionfs_check_file(file);
4480 + unionfs_check_dentry(file->f_path.dentry->d_parent);
4481 + }
4482 + return err;
4483 +}
4484 +
4485 +int unionfs_fsync(struct file *file, struct dentry *dentry, int datasync)
4486 +{
4487 + int bindex, bstart, bend;
4488 + struct file *lower_file;
4489 + struct dentry *lower_dentry;
4490 + struct inode *lower_inode, *inode;
4491 + int err = -EINVAL;
4492 +
4493 + unionfs_read_lock(file->f_path.dentry->d_sb);
4494 + if ((err = unionfs_file_revalidate(file, 1)))
4495 + goto out;
4496 + unionfs_check_file(file);
4497 +
4498 + bstart = fbstart(file);
4499 + bend = fbend(file);
4500 + if (bstart < 0 || bend < 0)
4501 + goto out;
4502 +
4503 + inode = dentry->d_inode;
4504 + if (!inode) {
4505 + printk(KERN_ERR
4506 + "unionfs: null lower inode in unionfs_fsync\n");
4507 + goto out;
4508 + }
4509 + for (bindex = bstart; bindex <= bend; bindex++) {
4510 + lower_inode = unionfs_lower_inode_idx(inode, bindex);
4511 + if (!lower_inode || !lower_inode->i_fop->fsync)
4512 + continue;
4513 + lower_file = unionfs_lower_file_idx(file, bindex);
4514 + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4515 + mutex_lock(&lower_inode->i_mutex);
4516 + err = lower_inode->i_fop->fsync(lower_file,
4517 + lower_dentry,
4518 + datasync);
4519 + mutex_unlock(&lower_inode->i_mutex);
4520 + if (err)
4521 + goto out;
4522 + }
4523 +
4524 + unionfs_copy_attr_times(inode);
4525 +
4526 +out:
4527 + unionfs_read_unlock(file->f_path.dentry->d_sb);
4528 + unionfs_check_file(file);
4529 + return err;
4530 +}
4531 +
4532 +int unionfs_fasync(int fd, struct file *file, int flag)
4533 +{
4534 + int bindex, bstart, bend;
4535 + struct file *lower_file;
4536 + struct dentry *dentry;
4537 + struct inode *lower_inode, *inode;
4538 + int err = 0;
4539 +
4540 + unionfs_read_lock(file->f_path.dentry->d_sb);
4541 + if ((err = unionfs_file_revalidate(file, 1)))
4542 + goto out;
4543 + unionfs_check_file(file);
4544 +
4545 + bstart = fbstart(file);
4546 + bend = fbend(file);
4547 + if (bstart < 0 || bend < 0)
4548 + goto out;
4549 +
4550 + dentry = file->f_path.dentry;
4551 + inode = dentry->d_inode;
4552 + if (!inode) {
4553 + printk(KERN_ERR
4554 + "unionfs: null lower inode in unionfs_fasync\n");
4555 + goto out;
4556 + }
4557 + for (bindex = bstart; bindex <= bend; bindex++) {
4558 + lower_inode = unionfs_lower_inode_idx(inode, bindex);
4559 + if (!lower_inode || !lower_inode->i_fop->fasync)
4560 + continue;
4561 + lower_file = unionfs_lower_file_idx(file, bindex);
4562 + mutex_lock(&lower_inode->i_mutex);
4563 + err = lower_inode->i_fop->fasync(fd, lower_file, flag);
4564 + mutex_unlock(&lower_inode->i_mutex);
4565 + if (err)
4566 + goto out;
4567 + }
4568 +
4569 + unionfs_copy_attr_times(inode);
4570 +
4571 +out:
4572 + unionfs_read_unlock(file->f_path.dentry->d_sb);
4573 + unionfs_check_file(file);
4574 + return err;
4575 +}
4576 +
4577 +struct file_operations unionfs_main_fops = {
4578 + .llseek = generic_file_llseek,
4579 + .read = unionfs_read,
4580 + .aio_read = unionfs_aio_read,
4581 + .write = unionfs_write,
4582 + .aio_write = generic_file_aio_write,
4583 + .readdir = unionfs_file_readdir,
4584 + .unlocked_ioctl = unionfs_ioctl,
4585 + .mmap = unionfs_mmap,
4586 + .open = unionfs_open,
4587 + .flush = unionfs_flush,
4588 + .release = unionfs_file_release,
4589 + .fsync = unionfs_fsync,
4590 + .fasync = unionfs_fasync,
4591 + .sendfile = generic_file_sendfile,
4592 +};
4593 diff --git a/fs/unionfs/inode.c b/fs/unionfs/inode.c
4594 new file mode 100644
4595 index 0000000..c772fbd
4596 --- /dev/null
4597 +++ b/fs/unionfs/inode.c
4598 @@ -0,0 +1,1219 @@
4599 +/*
4600 + * Copyright (c) 2003-2007 Erez Zadok
4601 + * Copyright (c) 2003-2006 Charles P. Wright
4602 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4603 + * Copyright (c) 2005-2006 Junjiro Okajima
4604 + * Copyright (c) 2005 Arun M. Krishnakumar
4605 + * Copyright (c) 2004-2006 David P. Quigley
4606 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4607 + * Copyright (c) 2003 Puja Gupta
4608 + * Copyright (c) 2003 Harikesavan Krishnan
4609 + * Copyright (c) 2003-2007 Stony Brook University
4610 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
4611 + *
4612 + * This program is free software; you can redistribute it and/or modify
4613 + * it under the terms of the GNU General Public License version 2 as
4614 + * published by the Free Software Foundation.
4615 + */
4616 +
4617 +#include "union.h"
4618 +
4619 +static int unionfs_create(struct inode *parent, struct dentry *dentry,
4620 + int mode, struct nameidata *nd)
4621 +{
4622 + int err = 0;
4623 + struct dentry *lower_dentry = NULL;
4624 + struct dentry *wh_dentry = NULL;
4625 + struct dentry *new_lower_dentry;
4626 + struct dentry *lower_parent_dentry = NULL;
4627 + int bindex = 0, bstart;
4628 + char *name = NULL;
4629 + int valid = 0;
4630 +
4631 + unionfs_read_lock(dentry->d_sb);
4632 + unionfs_lock_dentry(dentry);
4633 +
4634 + unionfs_lock_dentry(dentry->d_parent);
4635 + valid = __unionfs_d_revalidate_chain(dentry->d_parent, nd, 0);
4636 + unionfs_unlock_dentry(dentry->d_parent);
4637 + if (!valid) {
4638 + err = -ESTALE; /* same as what real_lookup does */
4639 + goto out;
4640 + }
4641 + valid = __unionfs_d_revalidate_chain(dentry, nd, 0);
4642 + /*
4643 + * It's only a bug if this dentry was not negative and couldn't be
4644 + * revalidated (shouldn't happen).
4645 + */
4646 + BUG_ON(!valid && dentry->d_inode);
4647 +
4648 + /* We start out in the leftmost branch. */
4649 + bstart = dbstart(dentry);
4650 + lower_dentry = unionfs_lower_dentry(dentry);
4651 +
4652 + /*
4653 + * check if whiteout exists in this branch, i.e. lookup .wh.foo
4654 + * first.
4655 + */
4656 + name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
4657 + if (IS_ERR(name)) {
4658 + err = PTR_ERR(name);
4659 + goto out;
4660 + }
4661 +
4662 + wh_dentry = lookup_one_len(name, lower_dentry->d_parent,
4663 + dentry->d_name.len + UNIONFS_WHLEN);
4664 + if (IS_ERR(wh_dentry)) {
4665 + err = PTR_ERR(wh_dentry);
4666 + wh_dentry = NULL;
4667 + goto out;
4668 + }
4669 +
4670 + if (wh_dentry->d_inode) {
4671 + /*
4672 + * .wh.foo has been found.
4673 + * First truncate it and then rename it to foo (hence having
4674 + * the same overall effect as a normal create.
4675 + */
4676 + struct dentry *lower_dir_dentry;
4677 + struct iattr newattrs;
4678 +
4679 + mutex_lock(&wh_dentry->d_inode->i_mutex);
4680 + newattrs.ia_valid = ATTR_CTIME | ATTR_MODE | ATTR_ATIME
4681 + | ATTR_MTIME | ATTR_UID | ATTR_GID | ATTR_FORCE
4682 + | ATTR_KILL_SUID | ATTR_KILL_SGID;
4683 +
4684 + newattrs.ia_mode = mode & ~current->fs->umask;
4685 + newattrs.ia_uid = current->fsuid;
4686 + newattrs.ia_gid = current->fsgid;
4687 +
4688 + if (wh_dentry->d_inode->i_size != 0) {
4689 + newattrs.ia_valid |= ATTR_SIZE;
4690 + newattrs.ia_size = 0;
4691 + }
4692 +
4693 + err = notify_change(wh_dentry, &newattrs);
4694 +
4695 + mutex_unlock(&wh_dentry->d_inode->i_mutex);
4696 +
4697 + if (err)
4698 + printk(KERN_WARNING "unionfs: %s:%d: notify_change "
4699 + "failed: %d, ignoring..\n",
4700 + __FILE__, __LINE__, err);
4701 +
4702 + new_lower_dentry = unionfs_lower_dentry(dentry);
4703 + dget(new_lower_dentry);
4704 +
4705 + lower_dir_dentry = dget_parent(wh_dentry);
4706 + lock_rename(lower_dir_dentry, lower_dir_dentry);
4707 +
4708 + if (!(err = is_robranch_super(dentry->d_sb, bstart))) {
4709 + err = vfs_rename(lower_dir_dentry->d_inode,
4710 + wh_dentry,
4711 + lower_dir_dentry->d_inode,
4712 + new_lower_dentry);
4713 + }
4714 + if (!err) {
4715 + fsstack_copy_attr_times(parent,
4716 + new_lower_dentry->d_parent->
4717 + d_inode);
4718 + fsstack_copy_inode_size(parent,
4719 + new_lower_dentry->d_parent->
4720 + d_inode);
4721 + parent->i_nlink = unionfs_get_nlinks(parent);
4722 + }
4723 +
4724 + unlock_rename(lower_dir_dentry, lower_dir_dentry);
4725 + dput(lower_dir_dentry);
4726 +
4727 + dput(new_lower_dentry);
4728 +
4729 + if (err) {
4730 + /* exit if the error returned was NOT -EROFS */
4731 + if (!IS_COPYUP_ERR(err))
4732 + goto out;
4733 + /*
4734 + * We were not able to create the file in this
4735 + * branch, so, we try to create it in one branch to
4736 + * left
4737 + */
4738 + bstart--;
4739 + } else {
4740 + /*
4741 + * reset the unionfs dentry to point to the .wh.foo
4742 + * entry.
4743 + */
4744 +
4745 + /* Discard any old reference. */
4746 + dput(unionfs_lower_dentry(dentry));
4747 +
4748 + /* Trade one reference to another. */
4749 + unionfs_set_lower_dentry_idx(dentry, bstart,
4750 + wh_dentry);
4751 + wh_dentry = NULL;
4752 +
4753 + /*
4754 + * Only INTERPOSE_LOOKUP can return a value other
4755 + * than 0 on err.
4756 + */
4757 + err = PTR_ERR(unionfs_interpose(dentry,
4758 + parent->i_sb, 0));
4759 + goto out;
4760 + }
4761 + }
4762 +
4763 + for (bindex = bstart; bindex >= 0; bindex--) {
4764 + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4765 + if (!lower_dentry) {
4766 + /*
4767 + * if lower_dentry is NULL, create the entire
4768 + * dentry directory structure in branch 'bindex'.
4769 + * lower_dentry will NOT be null when bindex == bstart
4770 + * because lookup passed as a negative unionfs dentry
4771 + * pointing to a lone negative underlying dentry.
4772 + */
4773 + lower_dentry = create_parents(parent, dentry,
4774 + dentry->d_name.name,
4775 + bindex);
4776 + if (!lower_dentry || IS_ERR(lower_dentry)) {
4777 + if (IS_ERR(lower_dentry))
4778 + err = PTR_ERR(lower_dentry);
4779 + continue;
4780 + }
4781 + }
4782 +
4783 + lower_parent_dentry = lock_parent(lower_dentry);
4784 + if (IS_ERR(lower_parent_dentry)) {
4785 + err = PTR_ERR(lower_parent_dentry);
4786 + goto out;
4787 + }
4788 + /* We shouldn't create things in a read-only branch. */
4789 + if (!(err = is_robranch_super(dentry->d_sb, bindex)))
4790 + err = vfs_create(lower_parent_dentry->d_inode,
4791 + lower_dentry, mode, nd);
4792 +
4793 + if (err || !lower_dentry->d_inode) {
4794 + unlock_dir(lower_parent_dentry);
4795 +
4796 + /* break out of for loop if the error wasn't -EROFS */
4797 + if (!IS_COPYUP_ERR(err))
4798 + break;
4799 + } else {
4800 + /*
4801 + * Only INTERPOSE_LOOKUP can return a value other
4802 + * than 0 on err.
4803 + */
4804 + err = PTR_ERR(unionfs_interpose(dentry,
4805 + parent->i_sb, 0));
4806 + if (!err) {
4807 + unionfs_copy_attr_times(parent);
4808 + fsstack_copy_inode_size(parent,
4809 + lower_parent_dentry->
4810 + d_inode);
4811 + /* update no. of links on parent directory */
4812 + parent->i_nlink = unionfs_get_nlinks(parent);
4813 + }
4814 + unlock_dir(lower_parent_dentry);
4815 + break;
4816 + }
4817 + }
4818 +
4819 +out:
4820 + dput(wh_dentry);
4821 + kfree(name);
4822 +
4823 + if (!err)
4824 + unionfs_inherit_mnt(dentry);
4825 + unionfs_unlock_dentry(dentry);
4826 + unionfs_read_unlock(dentry->d_sb);
4827 +
4828 + unionfs_check_inode(parent);
4829 + if (!err)
4830 + unionfs_check_dentry(dentry->d_parent);
4831 + unionfs_check_dentry(dentry);
4832 + return err;
4833 +}
4834 +
4835 +/*
4836 + * unionfs_lookup is the only special function which takes a dentry, yet we
4837 + * do NOT want to call __unionfs_d_revalidate_chain because by definition,
4838 + * we don't have a valid dentry here yet.
4839 + */
4840 +static struct dentry *unionfs_lookup(struct inode *parent,
4841 + struct dentry *dentry,
4842 + struct nameidata *nd)
4843 +{
4844 + struct path path_save;
4845 + struct dentry *ret;
4846 +
4847 + unionfs_read_lock(dentry->d_sb);
4848 +
4849 + /* save the dentry & vfsmnt from namei */
4850 + if (nd) {
4851 + path_save.dentry = nd->dentry;
4852 + path_save.mnt = nd->mnt;
4853 + }
4854 +
4855 + /* The locking is done by unionfs_lookup_backend. */
4856 + ret = unionfs_lookup_backend(dentry, nd, INTERPOSE_LOOKUP);
4857 +
4858 + /* restore the dentry & vfsmnt in namei */
4859 + if (nd) {
4860 + nd->dentry = path_save.dentry;
4861 + nd->mnt = path_save.mnt;
4862 + }
4863 + if (!IS_ERR(ret)) {
4864 + if (ret)
4865 + dentry = ret;
4866 + /* parent times may have changed */
4867 + unionfs_copy_attr_times(dentry->d_parent->d_inode);
4868 + }
4869 +
4870 + unionfs_check_inode(parent);
4871 + unionfs_check_dentry(dentry);
4872 + unionfs_check_dentry(dentry->d_parent);
4873 + unionfs_read_unlock(dentry->d_sb);
4874 +
4875 + return ret;
4876 +}
4877 +
4878 +static int unionfs_link(struct dentry *old_dentry, struct inode *dir,
4879 + struct dentry *new_dentry)
4880 +{
4881 + int err = 0;
4882 + struct dentry *lower_old_dentry = NULL;
4883 + struct dentry *lower_new_dentry = NULL;
4884 + struct dentry *lower_dir_dentry = NULL;
4885 + struct dentry *whiteout_dentry;
4886 + char *name = NULL;
4887 +
4888 + unionfs_read_lock(old_dentry->d_sb);
4889 + unionfs_double_lock_dentry(new_dentry, old_dentry);
4890 +
4891 + if (!__unionfs_d_revalidate_chain(old_dentry, NULL, 0)) {
4892 + err = -ESTALE;
4893 + goto out;
4894 + }
4895 + if (new_dentry->d_inode &&
4896 + !__unionfs_d_revalidate_chain(new_dentry, NULL, 0)) {
4897 + err = -ESTALE;
4898 + goto out;
4899 + }
4900 +
4901 + lower_new_dentry = unionfs_lower_dentry(new_dentry);
4902 +
4903 + /*
4904 + * check if whiteout exists in the branch of new dentry, i.e. lookup
4905 + * .wh.foo first. If present, delete it
4906 + */
4907 + name = alloc_whname(new_dentry->d_name.name, new_dentry->d_name.len);
4908 + if (IS_ERR(name)) {
4909 + err = PTR_ERR(name);
4910 + goto out;
4911 + }
4912 +
4913 + whiteout_dentry = lookup_one_len(name, lower_new_dentry->d_parent,
4914 + new_dentry->d_name.len +
4915 + UNIONFS_WHLEN);
4916 + if (IS_ERR(whiteout_dentry)) {
4917 + err = PTR_ERR(whiteout_dentry);
4918 + goto out;
4919 + }
4920 +
4921 + if (!whiteout_dentry->d_inode) {
4922 + dput(whiteout_dentry);
4923 + whiteout_dentry = NULL;
4924 + } else {
4925 + /* found a .wh.foo entry, unlink it and then call vfs_link() */
4926 + lower_dir_dentry = lock_parent(whiteout_dentry);
4927 + err = is_robranch_super(new_dentry->d_sb, dbstart(new_dentry));
4928 + if (!err)
4929 + err = vfs_unlink(lower_dir_dentry->d_inode,
4930 + whiteout_dentry);
4931 +
4932 + fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
4933 + dir->i_nlink = unionfs_get_nlinks(dir);
4934 + unlock_dir(lower_dir_dentry);
4935 + lower_dir_dentry = NULL;
4936 + dput(whiteout_dentry);
4937 + if (err)
4938 + goto out;
4939 + }
4940 +
4941 + if (dbstart(old_dentry) != dbstart(new_dentry)) {
4942 + lower_new_dentry = create_parents(dir, new_dentry,
4943 + new_dentry->d_name.name,
4944 + dbstart(old_dentry));
4945 + err = PTR_ERR(lower_new_dentry);
4946 + if (IS_COPYUP_ERR(err))
4947 + goto docopyup;
4948 + if (!lower_new_dentry || IS_ERR(lower_new_dentry))
4949 + goto out;
4950 + }
4951 + lower_new_dentry = unionfs_lower_dentry(new_dentry);
4952 + lower_old_dentry = unionfs_lower_dentry(old_dentry);
4953 +
4954 + BUG_ON(dbstart(old_dentry) != dbstart(new_dentry));
4955 + lower_dir_dentry = lock_parent(lower_new_dentry);
4956 + if (!(err = is_robranch(old_dentry)))
4957 + err = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
4958 + lower_new_dentry);
4959 + unlock_dir(lower_dir_dentry);
4960 +
4961 +docopyup:
4962 + if (IS_COPYUP_ERR(err)) {
4963 + int old_bstart = dbstart(old_dentry);
4964 + int bindex;
4965 +
4966 + for (bindex = old_bstart - 1; bindex >= 0; bindex--) {
4967 + err = copyup_dentry(old_dentry->d_parent->d_inode,
4968 + old_dentry, old_bstart,
4969 + bindex, old_dentry->d_name.name,
4970 + old_dentry->d_name.len, NULL,
4971 + old_dentry->d_inode->i_size);
4972 + if (!err) {
4973 + lower_new_dentry =
4974 + create_parents(dir, new_dentry,
4975 + new_dentry->d_name.name,
4976 + bindex);
4977 + lower_old_dentry =
4978 + unionfs_lower_dentry(old_dentry);
4979 + lower_dir_dentry =
4980 + lock_parent(lower_new_dentry);
4981 + /* do vfs_link */
4982 + err = vfs_link(lower_old_dentry,
4983 + lower_dir_dentry->d_inode,
4984 + lower_new_dentry);
4985 + unlock_dir(lower_dir_dentry);
4986 + goto check_link;
4987 + }
4988 + }
4989 + goto out;
4990 + }
4991 +
4992 +check_link:
4993 + if (err || !lower_new_dentry->d_inode)
4994 + goto out;
4995 +
4996 + /* Its a hard link, so use the same inode */
4997 + new_dentry->d_inode = igrab(old_dentry->d_inode);
4998 + d_instantiate(new_dentry, new_dentry->d_inode);
4999 + unionfs_copy_attr_all(dir, lower_new_dentry->d_parent->d_inode);
5000 + fsstack_copy_inode_size(dir, lower_new_dentry->d_parent->d_inode);
5001 +
5002 + /* propagate number of hard-links */
5003 + old_dentry->d_inode->i_nlink = unionfs_get_nlinks(old_dentry->d_inode);
5004 + /* new dentry's ctime may have changed due to hard-link counts */
5005 + unionfs_copy_attr_times(new_dentry->d_inode);
5006 +
5007 +out:
5008 + if (!new_dentry->d_inode)
5009 + d_drop(new_dentry);
5010 +
5011 + kfree(name);
5012 + if (!err)
5013 + unionfs_inherit_mnt(new_dentry);
5014 +
5015 + unionfs_unlock_dentry(new_dentry);
5016 + unionfs_unlock_dentry(old_dentry);
5017 +
5018 + unionfs_check_inode(dir);
5019 + unionfs_check_dentry(new_dentry);
5020 + unionfs_check_dentry(old_dentry);
5021 + unionfs_read_unlock(old_dentry->d_sb);
5022 +
5023 + return err;
5024 +}
5025 +
5026 +static int unionfs_symlink(struct inode *dir, struct dentry *dentry,
5027 + const char *symname)
5028 +{
5029 + int err = 0;
5030 + struct dentry *lower_dentry = NULL;
5031 + struct dentry *whiteout_dentry = NULL;
5032 + struct dentry *lower_dir_dentry = NULL;
5033 + umode_t mode;
5034 + int bindex = 0, bstart;
5035 + char *name = NULL;
5036 +
5037 + unionfs_read_lock(dentry->d_sb);
5038 + unionfs_lock_dentry(dentry);
5039 +
5040 + if (dentry->d_inode &&
5041 + !__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
5042 + err = -ESTALE;
5043 + goto out;
5044 + }
5045 +
5046 + /* We start out in the leftmost branch. */
5047 + bstart = dbstart(dentry);
5048 +
5049 + lower_dentry = unionfs_lower_dentry(dentry);
5050 +
5051 + /*
5052 + * check if whiteout exists in this branch, i.e. lookup .wh.foo
5053 + * first. If present, delete it
5054 + */
5055 + name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
5056 + if (IS_ERR(name)) {
5057 + err = PTR_ERR(name);
5058 + goto out;
5059 + }
5060 +
5061 + whiteout_dentry =
5062 + lookup_one_len(name, lower_dentry->d_parent,
5063 + dentry->d_name.len + UNIONFS_WHLEN);
5064 + if (IS_ERR(whiteout_dentry)) {
5065 + err = PTR_ERR(whiteout_dentry);
5066 + goto out;
5067 + }
5068 +
5069 + if (!whiteout_dentry->d_inode) {
5070 + dput(whiteout_dentry);
5071 + whiteout_dentry = NULL;
5072 + } else {
5073 + /*
5074 + * found a .wh.foo entry, unlink it and then call
5075 + * vfs_symlink().
5076 + */
5077 + lower_dir_dentry = lock_parent(whiteout_dentry);
5078 +
5079 + if (!(err = is_robranch_super(dentry->d_sb, bstart)))
5080 + err = vfs_unlink(lower_dir_dentry->d_inode,
5081 + whiteout_dentry);
5082 + dput(whiteout_dentry);
5083 +
5084 + fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
5085 + /* propagate number of hard-links */
5086 + dir->i_nlink = unionfs_get_nlinks(dir);
5087 +
5088 + unlock_dir(lower_dir_dentry);
5089 +
5090 + if (err) {
5091 + /* exit if the error returned was NOT -EROFS */
5092 + if (!IS_COPYUP_ERR(err))
5093 + goto out;
5094 + /*
5095 + * should now try to create symlink in the another
5096 + * branch.
5097 + */
5098 + bstart--;
5099 + }
5100 + }
5101 +
5102 + /*
5103 + * deleted whiteout if it was present, now do a normal vfs_symlink()
5104 + * with possible recursive directory creation
5105 + */
5106 + for (bindex = bstart; bindex >= 0; bindex--) {
5107 + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
5108 + if (!lower_dentry) {
5109 + /*
5110 + * if lower_dentry is NULL, create the entire
5111 + * dentry directory structure in branch 'bindex'.
5112 + * lower_dentry will NOT be null when bindex ==
5113 + * bstart because lookup passed as a negative
5114 + * unionfs dentry pointing to a lone negative
5115 + * underlying dentry
5116 + */
5117 + lower_dentry = create_parents(dir, dentry,
5118 + dentry->d_name.name,
5119 + bindex);
5120 + if (!lower_dentry || IS_ERR(lower_dentry)) {
5121 + if (IS_ERR(lower_dentry))
5122 + err = PTR_ERR(lower_dentry);
5123 +
5124 + printk(KERN_DEBUG "unionfs: lower dentry "
5125 + "NULL (or error) for bindex = %d\n",
5126 + bindex);
5127 + continue;
5128 + }
5129 + }
5130 +
5131 + lower_dir_dentry = lock_parent(lower_dentry);
5132 +
5133 + if (!(err = is_robranch_super(dentry->d_sb, bindex))) {
5134 + mode = S_IALLUGO;
5135 + err =
5136 + vfs_symlink(lower_dir_dentry->d_inode,
5137 + lower_dentry, symname, mode);
5138 + }
5139 + unlock_dir(lower_dir_dentry);
5140 +
5141 + if (err || !lower_dentry->d_inode) {
5142 + /*
5143 + * break out of for loop if error returned was NOT
5144 + * -EROFS.
5145 + */
5146 + if (!IS_COPYUP_ERR(err))
5147 + break;
5148 + } else {
5149 + /*
5150 + * Only INTERPOSE_LOOKUP can return a value other
5151 + * than 0 on err.
5152 + */
5153 + err = PTR_ERR(unionfs_interpose(dentry,
5154 + dir->i_sb, 0));
5155 + if (!err) {
5156 + fsstack_copy_attr_times(dir,
5157 + lower_dir_dentry->
5158 + d_inode);
5159 + fsstack_copy_inode_size(dir,
5160 + lower_dir_dentry->
5161 + d_inode);
5162 + /*
5163 + * update number of links on parent
5164 + * directory.
5165 + */
5166 + dir->i_nlink = unionfs_get_nlinks(dir);
5167 + }
5168 + break;
5169 + }
5170 + }
5171 +
5172 +out:
5173 + if (!dentry->d_inode)
5174 + d_drop(dentry);
5175 +
5176 + kfree(name);
5177 + if (!err)
5178 + unionfs_inherit_mnt(dentry);
5179 + unionfs_unlock_dentry(dentry);
5180 +
5181 + unionfs_check_inode(dir);
5182 + unionfs_check_dentry(dentry);
5183 + unionfs_read_unlock(dentry->d_sb);
5184 +
5185 + return err;
5186 +}
5187 +
5188 +static int unionfs_mkdir(struct inode *parent, struct dentry *dentry, int mode)
5189 +{
5190 + int err = 0;
5191 + struct dentry *lower_dentry = NULL, *whiteout_dentry = NULL;
5192 + struct dentry *lower_parent_dentry = NULL;
5193 + int bindex = 0, bstart;
5194 + char *name = NULL;
5195 + int whiteout_unlinked = 0;
5196 + struct sioq_args args;
5197 +
5198 + unionfs_read_lock(dentry->d_sb);
5199 + unionfs_lock_dentry(dentry);
5200 +
5201 + if (dentry->d_inode &&
5202 + !__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
5203 + err = -ESTALE;
5204 + goto out;
5205 + }
5206 +
5207 + bstart = dbstart(dentry);
5208 +
5209 + lower_dentry = unionfs_lower_dentry(dentry);
5210 +
5211 + /*
5212 + * check if whiteout exists in this branch, i.e. lookup .wh.foo
5213 + * first.
5214 + */
5215 + name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
5216 + if (IS_ERR(name)) {
5217 + err = PTR_ERR(name);
5218 + goto out;
5219 + }
5220 +
5221 + whiteout_dentry = lookup_one_len(name, lower_dentry->d_parent,
5222 + dentry->d_name.len + UNIONFS_WHLEN);
5223 + if (IS_ERR(whiteout_dentry)) {
5224 + err = PTR_ERR(whiteout_dentry);
5225 + goto out;
5226 + }
5227 +
5228 + if (!whiteout_dentry->d_inode) {
5229 + dput(whiteout_dentry);
5230 + whiteout_dentry = NULL;
5231 + } else {
5232 + lower_parent_dentry = lock_parent(whiteout_dentry);
5233 +
5234 + /* found a.wh.foo entry, remove it then do vfs_mkdir */
5235 + if (!(err = is_robranch_super(dentry->d_sb, bstart))) {
5236 + args.unlink.parent = lower_parent_dentry->d_inode;
5237 + args.unlink.dentry = whiteout_dentry;
5238 + run_sioq(__unionfs_unlink, &args);
5239 + err = args.err;
5240 + }
5241 + dput(whiteout_dentry);
5242 +
5243 + unlock_dir(lower_parent_dentry);
5244 +
5245 + if (err) {
5246 + /* exit if the error returned was NOT -EROFS */
5247 + if (!IS_COPYUP_ERR(err))
5248 + goto out;
5249 + bstart--;
5250 + } else
5251 + whiteout_unlinked = 1;
5252 + }
5253 +
5254 + for (bindex = bstart; bindex >= 0; bindex--) {
5255 + int i;
5256 + int bend = dbend(dentry);
5257 +
5258 + if (is_robranch_super(dentry->d_sb, bindex))
5259 + continue;
5260 +
5261 + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
5262 + if (!lower_dentry) {
5263 + lower_dentry = create_parents(parent, dentry,
5264 + dentry->d_name.name,
5265 + bindex);
5266 + if (!lower_dentry || IS_ERR(lower_dentry)) {
5267 + printk(KERN_DEBUG "unionfs: lower dentry "
5268 + " NULL for bindex = %d\n", bindex);
5269 + continue;
5270 + }
5271 + }
5272 +
5273 + lower_parent_dentry = lock_parent(lower_dentry);
5274 +
5275 + if (IS_ERR(lower_parent_dentry)) {
5276 + err = PTR_ERR(lower_parent_dentry);
5277 + goto out;
5278 + }
5279 +
5280 + err = vfs_mkdir(lower_parent_dentry->d_inode, lower_dentry,
5281 + mode);
5282 +
5283 + unlock_dir(lower_parent_dentry);
5284 +
5285 + /* did the mkdir succeed? */
5286 + if (err)
5287 + break;
5288 +
5289 + for (i = bindex + 1; i < bend; i++) {
5290 + if (unionfs_lower_dentry_idx(dentry, i)) {
5291 + dput(unionfs_lower_dentry_idx(dentry, i));
5292 + unionfs_set_lower_dentry_idx(dentry, i, NULL);
5293 + }
5294 + }
5295 + set_dbend(dentry, bindex);
5296 +
5297 + /*
5298 + * Only INTERPOSE_LOOKUP can return a value other than 0 on
5299 + * err.
5300 + */
5301 + err = PTR_ERR(unionfs_interpose(dentry, parent->i_sb, 0));
5302 + if (!err) {
5303 + unionfs_copy_attr_times(parent);
5304 + fsstack_copy_inode_size(parent,
5305 + lower_parent_dentry->d_inode);
5306 +
5307 + /* update number of links on parent directory */
5308 + parent->i_nlink = unionfs_get_nlinks(parent);
5309 + }
5310 +
5311 + err = make_dir_opaque(dentry, dbstart(dentry));
5312 + if (err) {
5313 + printk(KERN_ERR "unionfs: mkdir: error creating "
5314 + ".wh.__dir_opaque: %d\n", err);
5315 + goto out;
5316 + }
5317 +
5318 + /* we are done! */
5319 + break;
5320 + }
5321 +
5322 +out:
5323 + if (!dentry->d_inode)
5324 + d_drop(dentry);
5325 +
5326 + kfree(name);
5327 +
5328 + if (!err)
5329 + unionfs_copy_attr_times(dentry->d_inode);
5330 + unionfs_unlock_dentry(dentry);
5331 + unionfs_check_inode(parent);
5332 + unionfs_check_dentry(dentry);
5333 + unionfs_read_unlock(dentry->d_sb);
5334 +
5335 + return err;
5336 +}
5337 +
5338 +static int unionfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
5339 + dev_t dev)
5340 +{
5341 + int err = 0;
5342 + struct dentry *lower_dentry = NULL, *whiteout_dentry = NULL;
5343 + struct dentry *lower_parent_dentry = NULL;
5344 + int bindex = 0, bstart;
5345 + char *name = NULL;
5346 + int whiteout_unlinked = 0;
5347 +
5348 + unionfs_read_lock(dentry->d_sb);
5349 + unionfs_lock_dentry(dentry);
5350 +
5351 + if (dentry->d_inode &&
5352 + !__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
5353 + err = -ESTALE;
5354 + goto out;
5355 + }
5356 +
5357 + bstart = dbstart(dentry);
5358 +
5359 + lower_dentry = unionfs_lower_dentry(dentry);
5360 +
5361 + /*
5362 + * check if whiteout exists in this branch, i.e. lookup .wh.foo
5363 + * first.
5364 + */
5365 + name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
5366 + if (IS_ERR(name)) {
5367 + err = PTR_ERR(name);
5368 + goto out;
5369 + }
5370 +
5371 + whiteout_dentry = lookup_one_len(name, lower_dentry->d_parent,
5372 + dentry->d_name.len + UNIONFS_WHLEN);
5373 + if (IS_ERR(whiteout_dentry)) {
5374 + err = PTR_ERR(whiteout_dentry);
5375 + goto out;
5376 + }
5377 +
5378 + if (!whiteout_dentry->d_inode) {
5379 + dput(whiteout_dentry);
5380 + whiteout_dentry = NULL;
5381 + } else {
5382 + /* found .wh.foo, unlink it */
5383 + lower_parent_dentry = lock_parent(whiteout_dentry);
5384 +
5385 + /* found a.wh.foo entry, remove it then do vfs_mkdir */
5386 + if (!(err = is_robranch_super(dentry->d_sb, bstart)))
5387 + err = vfs_unlink(lower_parent_dentry->d_inode,
5388 + whiteout_dentry);
5389 + dput(whiteout_dentry);
5390 +
5391 + unlock_dir(lower_parent_dentry);
5392 +
5393 + if (err) {
5394 + if (!IS_COPYUP_ERR(err))
5395 + goto out;
5396 + bstart--;
5397 + } else
5398 + whiteout_unlinked = 1;
5399 + }
5400 +
5401 + for (bindex = bstart; bindex >= 0; bindex--) {
5402 + if (is_robranch_super(dentry->d_sb, bindex))
5403 + continue;
5404 +
5405 + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
5406 + if (!lower_dentry) {
5407 + lower_dentry = create_parents(dir, dentry,
5408 + dentry->d_name.name,
5409 + bindex);
5410 + if (IS_ERR(lower_dentry)) {
5411 + printk(KERN_DEBUG "unionfs: failed to create "
5412 + "parents on %d, err = %ld\n",
5413 + bindex, PTR_ERR(lower_dentry));
5414 + continue;
5415 + }
5416 + }
5417 +
5418 + lower_parent_dentry = lock_parent(lower_dentry);
5419 + if (IS_ERR(lower_parent_dentry)) {
5420 + err = PTR_ERR(lower_parent_dentry);
5421 + goto out;
5422 + }
5423 +
5424 + err = vfs_mknod(lower_parent_dentry->d_inode,
5425 + lower_dentry, mode, dev);
5426 +
5427 + if (err) {
5428 + unlock_dir(lower_parent_dentry);
5429 + break;
5430 + }
5431 +
5432 + /*
5433 + * Only INTERPOSE_LOOKUP can return a value other than 0 on
5434 + * err.
5435 + */
5436 + err = PTR_ERR(unionfs_interpose(dentry, dir->i_sb, 0));
5437 + if (!err) {
5438 + fsstack_copy_attr_times(dir,
5439 + lower_parent_dentry->d_inode);
5440 + fsstack_copy_inode_size(dir,
5441 + lower_parent_dentry->d_inode);
5442 + /* update number of links on parent directory */
5443 + dir->i_nlink = unionfs_get_nlinks(dir);
5444 + }
5445 + unlock_dir(lower_parent_dentry);
5446 +
5447 + break;
5448 + }
5449 +
5450 +out:
5451 + if (!dentry->d_inode)
5452 + d_drop(dentry);
5453 +
5454 + kfree(name);
5455 +
5456 + if (!err)
5457 + unionfs_inherit_mnt(dentry);
5458 + unionfs_unlock_dentry(dentry);
5459 +
5460 + unionfs_check_inode(dir);
5461 + unionfs_check_dentry(dentry);
5462 + unionfs_read_unlock(dentry->d_sb);
5463 +
5464 + return err;
5465 +}
5466 +
5467 +static int unionfs_readlink(struct dentry *dentry, char __user *buf,
5468 + int bufsiz)
5469 +{
5470 + int err;
5471 + struct dentry *lower_dentry;
5472 +
5473 + unionfs_read_lock(dentry->d_sb);
5474 + unionfs_lock_dentry(dentry);
5475 +
5476 + if (!__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
5477 + err = -ESTALE;
5478 + goto out;
5479 + }
5480 +
5481 + lower_dentry = unionfs_lower_dentry(dentry);
5482 +
5483 + if (!lower_dentry->d_inode->i_op ||
5484 + !lower_dentry->d_inode->i_op->readlink) {
5485 + err = -EINVAL;
5486 + goto out;
5487 + }
5488 +
5489 + err = lower_dentry->d_inode->i_op->readlink(lower_dentry,
5490 + buf, bufsiz);
5491 + if (err > 0)
5492 + fsstack_copy_attr_atime(dentry->d_inode,
5493 + lower_dentry->d_inode);
5494 +
5495 +out:
5496 + unionfs_unlock_dentry(dentry);
5497 + unionfs_check_dentry(dentry);
5498 + unionfs_read_unlock(dentry->d_sb);
5499 +
5500 + return err;
5501 +}
5502 +
5503 +/*
5504 + * unionfs_follow_link takes a dentry, but it is simple. It only needs to
5505 + * allocate some memory and then call our ->readlink method. Our
5506 + * unionfs_readlink *does* lock our dentry and revalidate the dentry.
5507 + * Therefore, we do not have to lock our dentry here, to prevent a deadlock;
5508 + * nor do we need to revalidate it either. It is safe to not lock our
5509 + * dentry here, nor revalidate it, because unionfs_follow_link does not do
5510 + * anything (prior to calling ->readlink) which could become inconsistent
5511 + * due to branch management.
5512 + */
5513 +static void *unionfs_follow_link(struct dentry *dentry, struct nameidata *nd)
5514 +{
5515 + char *buf;
5516 + int len = PAGE_SIZE, err;
5517 + mm_segment_t old_fs;
5518 +
5519 + unionfs_read_lock(dentry->d_sb);
5520 +
5521 + /* This is freed by the put_link method assuming a successful call. */
5522 + buf = kmalloc(len, GFP_KERNEL);
5523 + if (!buf) {
5524 + err = -ENOMEM;
5525 + goto out;
5526 + }
5527 +
5528 + /* read the symlink, and then we will follow it */
5529 + old_fs = get_fs();
5530 + set_fs(KERNEL_DS);
5531 + err = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len);
5532 + set_fs(old_fs);
5533 + if (err < 0) {
5534 + kfree(buf);
5535 + buf = NULL;
5536 + goto out;
5537 + }
5538 + buf[err] = 0;
5539 + nd_set_link(nd, buf);
5540 + err = 0;
5541 +
5542 +out:
5543 + unionfs_check_dentry(dentry);
5544 + unionfs_read_unlock(dentry->d_sb);
5545 + return ERR_PTR(err);
5546 +}
5547 +
5548 +/* FIXME: We may not have to lock here */
5549 +static void unionfs_put_link(struct dentry *dentry, struct nameidata *nd,
5550 + void *cookie)
5551 +{
5552 + unionfs_read_lock(dentry->d_sb);
5553 +
5554 + unionfs_lock_dentry(dentry);
5555 + if (!__unionfs_d_revalidate_chain(dentry, nd, 0))
5556 + printk("unionfs: put_link failed to revalidate dentry\n");
5557 + unionfs_unlock_dentry(dentry);
5558 +
5559 + unionfs_check_dentry(dentry);
5560 + kfree(nd_get_link(nd));
5561 + unionfs_read_unlock(dentry->d_sb);
5562 +}
5563 +
5564 +/*
5565 + * Basically copied from the kernel vfs permission(), but we've changed
5566 + * the following:
5567 + * (1) the IS_RDONLY check is skipped, and
5568 + * (2) We return 0 (success) if the non-leftmost branch is mounted
5569 + * readonly, to allow copyup to work.
5570 + * (3) we do call security_inode_permission, and therefore security inside
5571 + * SELinux, etc. are performed.
5572 + */
5573 +static int inode_permission(struct super_block *sb, struct inode *inode, int mask,
5574 + struct nameidata *nd, int bindex)
5575 +{
5576 + int retval, submask;
5577 +
5578 + if (mask & MAY_WRITE) {
5579 + umode_t mode = inode->i_mode;
5580 + /* The first branch is allowed to be really readonly. */
5581 + if (bindex == 0 &&
5582 + IS_RDONLY(inode) &&
5583 + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
5584 + return -EROFS;
5585 + /*
5586 + * Nobody gets write access to an immutable file.
5587 + */
5588 + if (IS_IMMUTABLE(inode))
5589 + return -EACCES;
5590 + /*
5591 + * For all other branches than the first one, we ignore
5592 + * EROFS or if the branch is mounted as readonly, to let
5593 + * copyup take place.
5594 + */
5595 + if (bindex > 0 &&
5596 + is_robranch_super(sb, bindex) &&
5597 + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
5598 + return 0;
5599 + }
5600 +
5601 + /* Ordinary permission routines do not understand MAY_APPEND. */
5602 + submask = mask & ~MAY_APPEND;
5603 + if (inode->i_op && inode->i_op->permission)
5604 + retval = inode->i_op->permission(inode, submask, nd);
5605 + else
5606 + retval = generic_permission(inode, submask, NULL);
5607 +
5608 + if (retval && retval != -EROFS) /* ignore EROFS */
5609 + return retval;
5610 +
5611 + retval = security_inode_permission(inode, mask, nd);
5612 + return ((retval == -EROFS) ? 0 : retval); /* ignore EROFS */
5613 +}
5614 +
5615 +/*
5616 + * Don't grab the superblock read-lock in unionfs_permission, which prevents
5617 + * a deadlock with the branch-management "add branch" code (which grabbed
5618 + * the write lock). It is safe to not grab the read lock here, because even
5619 + * with branch management taking place, there is no chance that
5620 + * unionfs_permission, or anything it calls, will use stale branch
5621 + * information.
5622 + */
5623 +static int unionfs_permission(struct inode *inode, int mask,
5624 + struct nameidata *nd)
5625 +{
5626 + struct inode *lower_inode = NULL;
5627 + int err = 0;
5628 + int bindex, bstart, bend;
5629 + const int is_file = !S_ISDIR(inode->i_mode);
5630 + const int write_mask = (mask & MAY_WRITE) && !(mask & MAY_READ);
5631 +
5632 + bstart = ibstart(inode);
5633 + bend = ibend(inode);
5634 + if (bstart < 0 || bend < 0) {
5635 + /*
5636 + * With branch-management, we can get a stale inode here.
5637 + * If so, we return ESTALE back to link_path_walk, which
5638 + * would discard the dcache entry and re-lookup the
5639 + * dentry+inode. This should be equivalent to issuing
5640 + * __unionfs_d_revalidate_chain on nd.dentry here.
5641 + */
5642 + err = -ESTALE; /* force revalidate */
5643 + goto out;
5644 + }
5645 +
5646 + for (bindex = bstart; bindex <= bend; bindex++) {
5647 + lower_inode = unionfs_lower_inode_idx(inode, bindex);
5648 + if (!lower_inode)
5649 + continue;
5650 +
5651 + /*
5652 + * check the condition for D-F-D underlying files/directories,
5653 + * we don't have to check for files, if we are checking for
5654 + * directories.
5655 + */
5656 + if (!is_file && !S_ISDIR(lower_inode->i_mode))
5657 + continue;
5658 +
5659 + /*
5660 + * We use our own special version of permission, such that
5661 + * only the first branch returns -EROFS.
5662 + */
5663 + err = inode_permission(inode->i_sb, lower_inode, mask, nd, bindex);
5664 +
5665 + /*
5666 + * The permissions are an intersection of the overall directory
5667 + * permissions, so we fail if one fails.
5668 + */
5669 + if (err)
5670 + goto out;
5671 +
5672 + /* only the leftmost file matters. */
5673 + if (is_file || write_mask) {
5674 + if (is_file && write_mask) {
5675 + err = get_write_access(lower_inode);
5676 + if (!err)
5677 + put_write_access(lower_inode);
5678 + }
5679 + break;
5680 + }
5681 + }
5682 + /* sync times which may have changed (asynchronously) below */
5683 + unionfs_copy_attr_times(inode);
5684 +
5685 +out:
5686 + unionfs_check_inode(inode);
5687 + return err;
5688 +}
5689 +
5690 +static int unionfs_setattr(struct dentry *dentry, struct iattr *ia)
5691 +{
5692 + int err = 0;
5693 + struct dentry *lower_dentry;
5694 + struct inode *inode = NULL;
5695 + struct inode *lower_inode = NULL;
5696 + int bstart, bend, bindex;
5697 + int i;
5698 + int copyup = 0;
5699 +
5700 + unionfs_read_lock(dentry->d_sb);
5701 + unionfs_lock_dentry(dentry);
5702 +
5703 + if (!__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
5704 + err = -ESTALE;
5705 + goto out;
5706 + }
5707 +
5708 + bstart = dbstart(dentry);
5709 + bend = dbend(dentry);
5710 + inode = dentry->d_inode;
5711 +
5712 + for (bindex = bstart; (bindex <= bend) || (bindex == bstart);
5713 + bindex++) {
5714 + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
5715 + if (!lower_dentry)
5716 + continue;
5717 + BUG_ON(lower_dentry->d_inode == NULL);
5718 +
5719 + /* If the file is on a read only branch */
5720 + if (is_robranch_super(dentry->d_sb, bindex)
5721 + || IS_RDONLY(lower_dentry->d_inode)) {
5722 + if (copyup || (bindex != bstart))
5723 + continue;
5724 + /* Only if its the leftmost file, copyup the file */
5725 + for (i = bstart - 1; i >= 0; i--) {
5726 + loff_t size = dentry->d_inode->i_size;
5727 + if (ia->ia_valid & ATTR_SIZE)
5728 + size = ia->ia_size;
5729 + err = copyup_dentry(dentry->d_parent->d_inode,
5730 + dentry, bstart, i,
5731 + dentry->d_name.name,
5732 + dentry->d_name.len,
5733 + NULL, size);
5734 +
5735 + if (!err) {
5736 + copyup = 1;
5737 + lower_dentry =
5738 + unionfs_lower_dentry(dentry);
5739 + break;
5740 + }
5741 + /*
5742 + * if error is in the leftmost branch, pass
5743 + * it up.
5744 + */
5745 + if (i == 0)
5746 + goto out;
5747 + }
5748 +
5749 + }
5750 + err = notify_change(lower_dentry, ia);
5751 + if (err)
5752 + goto out;
5753 + break;
5754 + }
5755 +
5756 + /* for mmap */
5757 + if (ia->ia_valid & ATTR_SIZE) {
5758 + if (ia->ia_size != i_size_read(inode)) {
5759 + err = vmtruncate(inode, ia->ia_size);
5760 + if (err)
5761 + printk("unionfs_setattr: vmtruncate failed\n");
5762 + }
5763 + }
5764 +
5765 + /* get the size from the first lower inode */
5766 + lower_inode = unionfs_lower_inode(inode);
5767 + unionfs_copy_attr_all(inode, lower_inode);
5768 + fsstack_copy_inode_size(inode, lower_inode);
5769 + /* if setattr succeeded, then parent dir may have changed */
5770 + unionfs_copy_attr_times(dentry->d_parent->d_inode);
5771 +out:
5772 + unionfs_unlock_dentry(dentry);
5773 + unionfs_check_dentry(dentry);
5774 + unionfs_check_dentry(dentry->d_parent);
5775 + unionfs_read_unlock(dentry->d_sb);
5776 +
5777 + return err;
5778 +}
5779 +
5780 +struct inode_operations unionfs_symlink_iops = {
5781 + .readlink = unionfs_readlink,
5782 + .permission = unionfs_permission,
5783 + .follow_link = unionfs_follow_link,
5784 + .setattr = unionfs_setattr,
5785 + .put_link = unionfs_put_link,
5786 +};
5787 +
5788 +struct inode_operations unionfs_dir_iops = {
5789 + .create = unionfs_create,
5790 + .lookup = unionfs_lookup,
5791 + .link = unionfs_link,
5792 + .unlink = unionfs_unlink,
5793 + .symlink = unionfs_symlink,
5794 + .mkdir = unionfs_mkdir,
5795 + .rmdir = unionfs_rmdir,
5796 + .mknod = unionfs_mknod,
5797 + .rename = unionfs_rename,
5798 + .permission = unionfs_permission,
5799 + .setattr = unionfs_setattr,
5800 +#ifdef CONFIG_UNION_FS_XATTR
5801 + .setxattr = unionfs_setxattr,
5802 + .getxattr = unionfs_getxattr,
5803 + .removexattr = unionfs_removexattr,
5804 + .listxattr = unionfs_listxattr,
5805 +#endif /* CONFIG_UNION_FS_XATTR */
5806 +};
5807 +
5808 +struct inode_operations unionfs_main_iops = {
5809 + .permission = unionfs_permission,
5810 + .setattr = unionfs_setattr,
5811 +#ifdef CONFIG_UNION_FS_XATTR
5812 + .setxattr = unionfs_setxattr,
5813 + .getxattr = unionfs_getxattr,
5814 + .removexattr = unionfs_removexattr,
5815 + .listxattr = unionfs_listxattr,
5816 +#endif /* CONFIG_UNION_FS_XATTR */
5817 +};
5818 diff --git a/fs/unionfs/lookup.c b/fs/unionfs/lookup.c
5819 new file mode 100644
5820 index 0000000..da991b3
5821 --- /dev/null
5822 +++ b/fs/unionfs/lookup.c
5823 @@ -0,0 +1,577 @@
5824 +/*
5825 + * Copyright (c) 2003-2007 Erez Zadok
5826 + * Copyright (c) 2003-2006 Charles P. Wright
5827 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
5828 + * Copyright (c) 2005-2006 Junjiro Okajima
5829 + * Copyright (c) 2005 Arun M. Krishnakumar
5830 + * Copyright (c) 2004-2006 David P. Quigley
5831 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
5832 + * Copyright (c) 2003 Puja Gupta
5833 + * Copyright (c) 2003 Harikesavan Krishnan
5834 + * Copyright (c) 2003-2007 Stony Brook University
5835 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
5836 + *
5837 + * This program is free software; you can redistribute it and/or modify
5838 + * it under the terms of the GNU General Public License version 2 as
5839 + * published by the Free Software Foundation.
5840 + */
5841 +
5842 +#include "union.h"
5843 +
5844 +static int realloc_dentry_private_data(struct dentry *dentry);
5845 +
5846 +/* is the filename valid == !(whiteout for a file or opaque dir marker) */
5847 +static int is_validname(const char *name)
5848 +{
5849 + if (!strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN))
5850 + return 0;
5851 + if (!strncmp(name, UNIONFS_DIR_OPAQUE_NAME,
5852 + sizeof(UNIONFS_DIR_OPAQUE_NAME) - 1))
5853 + return 0;
5854 + return 1;
5855 +}
5856 +
5857 +/* The rest of these are utility functions for lookup. */
5858 +static noinline int is_opaque_dir(struct dentry *dentry, int bindex)
5859 +{
5860 + int err = 0;
5861 + struct dentry *lower_dentry;
5862 + struct dentry *wh_lower_dentry;
5863 + struct inode *lower_inode;
5864 + struct sioq_args args;
5865 +
5866 + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
5867 + lower_inode = lower_dentry->d_inode;
5868 +
5869 + BUG_ON(!S_ISDIR(lower_inode->i_mode));
5870 +
5871 + mutex_lock(&lower_inode->i_mutex);
5872 +
5873 + if (!permission(lower_inode, MAY_EXEC, NULL))
5874 + wh_lower_dentry =
5875 + lookup_one_len(UNIONFS_DIR_OPAQUE, lower_dentry,
5876 + sizeof(UNIONFS_DIR_OPAQUE) - 1);
5877 + else {
5878 + args.is_opaque.dentry = lower_dentry;
5879 + run_sioq(__is_opaque_dir, &args);
5880 + wh_lower_dentry = args.ret;
5881 + }
5882 +
5883 + mutex_unlock(&lower_inode->i_mutex);
5884 +
5885 + if (IS_ERR(wh_lower_dentry)) {
5886 + err = PTR_ERR(wh_lower_dentry);
5887 + goto out;
5888 + }
5889 +
5890 + /* This is an opaque dir iff wh_lower_dentry is positive */
5891 + err = !!wh_lower_dentry->d_inode;
5892 +
5893 + dput(wh_lower_dentry);
5894 +out:
5895 + return err;
5896 +}
5897 +
5898 +/*
5899 + * Main (and complex) driver function for Unionfs's lookup
5900 + *
5901 + * Returns: NULL (ok), ERR_PTR if an error occurred, or a non-null non-error
5902 + * PTR if d_splice returned a different dentry.
5903 + */
5904 +struct dentry *unionfs_lookup_backend(struct dentry *dentry,
5905 + struct nameidata *nd, int lookupmode)
5906 +{
5907 + int err = 0;
5908 + struct dentry *lower_dentry = NULL;
5909 + struct dentry *wh_lower_dentry = NULL;
5910 + struct dentry *lower_dir_dentry = NULL;
5911 + struct dentry *parent_dentry = NULL;
5912 + struct dentry *d_interposed = NULL;
5913 + int bindex, bstart, bend, bopaque;
5914 + int dentry_count = 0; /* Number of positive dentries. */
5915 + int first_dentry_offset = -1; /* -1 is uninitialized */
5916 + struct dentry *first_dentry = NULL;
5917 + struct dentry *first_lower_dentry = NULL;
5918 + struct vfsmount *first_lower_mnt = NULL;
5919 + int locked_parent = 0;
5920 + int locked_child = 0;
5921 + int allocated_new_info = 0;
5922 + int opaque;
5923 + char *whname = NULL;
5924 + const char *name;
5925 + int namelen;
5926 +
5927 + /*
5928 + * We should already have a lock on this dentry in the case of a
5929 + * partial lookup, or a revalidation. Otherwise it is returned from
5930 + * new_dentry_private_data already locked.
5931 + */
5932 + if (lookupmode == INTERPOSE_PARTIAL || lookupmode == INTERPOSE_REVAL ||
5933 + lookupmode == INTERPOSE_REVAL_NEG)
5934 + verify_locked(dentry);
5935 + else {
5936 + BUG_ON(UNIONFS_D(dentry) != NULL);
5937 + locked_child = 1;
5938 + }
5939 +
5940 + switch(lookupmode) {
5941 + case INTERPOSE_PARTIAL:
5942 + break;
5943 + case INTERPOSE_LOOKUP:
5944 + if ((err = new_dentry_private_data(dentry)))
5945 + goto out;
5946 + allocated_new_info = 1;
5947 + break;
5948 + default:
5949 + if ((err = realloc_dentry_private_data(dentry)))
5950 + goto out;
5951 + allocated_new_info = 1;
5952 + break;
5953 + }
5954 +
5955 + /* must initialize dentry operations */
5956 + dentry->d_op = &unionfs_dops;
5957 +
5958 + parent_dentry = dget_parent(dentry);
5959 + /* We never partial lookup the root directory. */
5960 + if (parent_dentry != dentry) {
5961 + unionfs_lock_dentry(parent_dentry);
5962 + locked_parent = 1;
5963 + } else {
5964 + dput(parent_dentry);
5965 + parent_dentry = NULL;
5966 + goto out;
5967 + }
5968 +
5969 + name = dentry->d_name.name;
5970 + namelen = dentry->d_name.len;
5971 +
5972 + /* No dentries should get created for possible whiteout names. */
5973 + if (!is_validname(name)) {
5974 + err = -EPERM;
5975 + goto out_free;
5976 + }
5977 +
5978 + /* Now start the actual lookup procedure. */
5979 + bstart = dbstart(parent_dentry);
5980 + bend = dbend(parent_dentry);
5981 + bopaque = dbopaque(parent_dentry);
5982 + BUG_ON(bstart < 0);
5983 +
5984 + /*
5985 + * It would be ideal if we could convert partial lookups to only have
5986 + * to do this work when they really need to. It could probably improve
5987 + * performance quite a bit, and maybe simplify the rest of the code.
5988 + */
5989 + if (lookupmode == INTERPOSE_PARTIAL) {
5990 + bstart++;
5991 + if ((bopaque != -1) && (bopaque < bend))
5992 + bend = bopaque;
5993 + }
5994 +
5995 + for (bindex = bstart; bindex <= bend; bindex++) {
5996 + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
5997 + if (lookupmode == INTERPOSE_PARTIAL && lower_dentry)
5998 + continue;
5999 + BUG_ON(lower_dentry != NULL);
6000 +
6001 + lower_dir_dentry =
6002 + unionfs_lower_dentry_idx(parent_dentry, bindex);
6003 +
6004 + /* if the parent lower dentry does not exist skip this */
6005 + if (!(lower_dir_dentry && lower_dir_dentry->d_inode))
6006 + continue;
6007 +
6008 + /* also skip it if the parent isn't a directory. */
6009 + if (!S_ISDIR(lower_dir_dentry->d_inode->i_mode))
6010 + continue;
6011 +
6012 + /* Reuse the whiteout name because its value doesn't change. */
6013 + if (!whname) {
6014 + whname = alloc_whname(name, namelen);
6015 + if (IS_ERR(whname)) {
6016 + err = PTR_ERR(whname);
6017 + goto out_free;
6018 + }
6019 + }
6020 +
6021 + /* check if whiteout exists in this branch: lookup .wh.foo */
6022 + wh_lower_dentry = lookup_one_len(whname, lower_dir_dentry,
6023 + namelen + UNIONFS_WHLEN);
6024 + if (IS_ERR(wh_lower_dentry)) {
6025 + dput(first_lower_dentry);
6026 + unionfs_mntput(first_dentry, first_dentry_offset);
6027 + err = PTR_ERR(wh_lower_dentry);
6028 + goto out_free;
6029 + }
6030 +
6031 + if (wh_lower_dentry->d_inode) {
6032 + /* We found a whiteout so lets give up. */
6033 + if (S_ISREG(wh_lower_dentry->d_inode->i_mode)) {
6034 + set_dbend(dentry, bindex);
6035 + set_dbopaque(dentry, bindex);
6036 + dput(wh_lower_dentry);
6037 + break;
6038 + }
6039 + err = -EIO;
6040 + printk(KERN_NOTICE "unionfs: EIO: invalid whiteout "
6041 + "entry type %d.\n",
6042 + wh_lower_dentry->d_inode->i_mode);
6043 + dput(wh_lower_dentry);
6044 + dput(first_lower_dentry);
6045 + unionfs_mntput(first_dentry, first_dentry_offset);
6046 + goto out_free;
6047 + }
6048 +
6049 + dput(wh_lower_dentry);
6050 + wh_lower_dentry = NULL;
6051 +
6052 + /* Now do regular lookup; lookup foo */
6053 + nd->dentry = unionfs_lower_dentry_idx(dentry, bindex);
6054 + /* FIXME: fix following line for mount point crossing */
6055 + nd->mnt = unionfs_lower_mnt_idx(parent_dentry, bindex);
6056 +
6057 + lower_dentry = lookup_one_len_nd(name, lower_dir_dentry,
6058 + namelen, nd);
6059 + if (IS_ERR(lower_dentry)) {
6060 + dput(first_lower_dentry);
6061 + unionfs_mntput(first_dentry, first_dentry_offset);
6062 + err = PTR_ERR(lower_dentry);
6063 + goto out_free;
6064 + }
6065 +
6066 + /*
6067 + * Store the first negative dentry specially, because if they
6068 + * are all negative we need this for future creates.
6069 + */
6070 + if (!lower_dentry->d_inode) {
6071 + if (!first_lower_dentry && (dbstart(dentry) == -1)) {
6072 + first_lower_dentry = lower_dentry;
6073 + /*
6074 + * FIXME: following line needs to be changed
6075 + * to allow mount-point crossing
6076 + */
6077 + first_dentry = parent_dentry;
6078 + first_lower_mnt =
6079 + unionfs_mntget(parent_dentry, bindex);
6080 + first_dentry_offset = bindex;
6081 + } else
6082 + dput(lower_dentry);
6083 +
6084 + continue;
6085 + }
6086 +
6087 + /* number of positive dentries */
6088 + dentry_count++;
6089 +
6090 + /* store underlying dentry */
6091 + if (dbstart(dentry) == -1)
6092 + set_dbstart(dentry, bindex);
6093 + unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
6094 + /*
6095 + * FIXME: the following line needs to get fixed to allow
6096 + * mount-point crossing
6097 + */
6098 + unionfs_set_lower_mnt_idx(dentry, bindex,
6099 + unionfs_mntget(parent_dentry,
6100 + bindex));
6101 + set_dbend(dentry, bindex);
6102 +
6103 + /* update parent directory's atime with the bindex */
6104 + fsstack_copy_attr_atime(parent_dentry->d_inode,
6105 + lower_dir_dentry->d_inode);
6106 +
6107 + /* We terminate file lookups here. */
6108 + if (!S_ISDIR(lower_dentry->d_inode->i_mode)) {
6109 + if (lookupmode == INTERPOSE_PARTIAL)
6110 + continue;
6111 + if (dentry_count == 1)
6112 + goto out_positive;
6113 + /* This can only happen with mixed D-*-F-* */
6114 + BUG_ON(!S_ISDIR(unionfs_lower_dentry(dentry)->
6115 + d_inode->i_mode));
6116 + continue;
6117 + }
6118 +
6119 + opaque = is_opaque_dir(dentry, bindex);
6120 + if (opaque < 0) {
6121 + dput(first_lower_dentry);
6122 + unionfs_mntput(first_dentry, first_dentry_offset);
6123 + err = opaque;
6124 + goto out_free;
6125 + } else if (opaque) {
6126 + set_dbend(dentry, bindex);
6127 + set_dbopaque(dentry, bindex);
6128 + break;
6129 + }
6130 + }
6131 +
6132 + if (dentry_count)
6133 + goto out_positive;
6134 + else
6135 + goto out_negative;
6136 +
6137 +out_negative:
6138 + if (lookupmode == INTERPOSE_PARTIAL)
6139 + goto out;
6140 +
6141 + /* If we've only got negative dentries, then use the leftmost one. */
6142 + if (lookupmode == INTERPOSE_REVAL) {
6143 + if (dentry->d_inode)
6144 + UNIONFS_I(dentry->d_inode)->stale = 1;
6145 + goto out;
6146 + }
6147 + /* This should only happen if we found a whiteout. */
6148 + if (first_dentry_offset == -1) {
6149 + nd->dentry = dentry;
6150 + /* FIXME: fix following line for mount point crossing */
6151 + nd->mnt = unionfs_lower_mnt_idx(parent_dentry, bindex);
6152 +
6153 + first_lower_dentry =
6154 + lookup_one_len_nd(name, lower_dir_dentry,
6155 + namelen, nd);
6156 + first_dentry_offset = bindex;
6157 + if (IS_ERR(first_lower_dentry)) {
6158 + err = PTR_ERR(first_lower_dentry);
6159 + goto out;
6160 + }
6161 +
6162 + /*
6163 + * FIXME: the following line needs to be changed to allow
6164 + * mount-point crossing
6165 + */
6166 + first_dentry = dentry;
6167 + first_lower_mnt = unionfs_mntget(dentry->d_sb->s_root,
6168 + bindex);
6169 + }
6170 + unionfs_set_lower_dentry_idx(dentry, first_dentry_offset,
6171 + first_lower_dentry);
6172 + unionfs_set_lower_mnt_idx(dentry, first_dentry_offset,
6173 + first_lower_mnt);
6174 + set_dbstart(dentry, first_dentry_offset);
6175 + set_dbend(dentry, first_dentry_offset);
6176 +
6177 + if (lookupmode == INTERPOSE_REVAL_NEG)
6178 + BUG_ON(dentry->d_inode != NULL);
6179 + else
6180 + d_add(dentry, NULL);
6181 + goto out;
6182 +
6183 +/* This part of the code is for positive dentries. */
6184 +out_positive:
6185 + BUG_ON(dentry_count <= 0);
6186 +
6187 + /*
6188 + * If we're holding onto the first negative dentry & corresponding
6189 + * vfsmount - throw it out.
6190 + */
6191 + dput(first_lower_dentry);
6192 + unionfs_mntput(first_dentry, first_dentry_offset);
6193 +
6194 + /* Partial lookups need to re-interpose, or throw away older negs. */
6195 + if (lookupmode == INTERPOSE_PARTIAL) {
6196 + if (dentry->d_inode) {
6197 + unionfs_reinterpose(dentry);
6198 + goto out;
6199 + }
6200 +
6201 + /*
6202 + * This somehow turned positive, so it is as if we had a
6203 + * negative revalidation.
6204 + */
6205 + lookupmode = INTERPOSE_REVAL_NEG;
6206 +
6207 + update_bstart(dentry);
6208 + bstart = dbstart(dentry);
6209 + bend = dbend(dentry);
6210 + }
6211 +
6212 + /*
6213 + * Interpose can return a dentry if d_splice returned a different
6214 + * dentry.
6215 + */
6216 + d_interposed = unionfs_interpose(dentry, dentry->d_sb, lookupmode);
6217 + if (IS_ERR(d_interposed))
6218 + err = PTR_ERR(d_interposed);
6219 + else if (d_interposed)
6220 + dentry = d_interposed;
6221 +
6222 + if (err)
6223 + goto out_drop;
6224 +
6225 + goto out;
6226 +
6227 +out_drop:
6228 + d_drop(dentry);
6229 +
6230 +out_free:
6231 + /* should dput all the underlying dentries on error condition */
6232 + bstart = dbstart(dentry);
6233 + if (bstart >= 0) {
6234 + bend = dbend(dentry);
6235 + for (bindex = bstart; bindex <= bend; bindex++) {
6236 + dput(unionfs_lower_dentry_idx(dentry, bindex));
6237 + unionfs_mntput(dentry, bindex);
6238 + }
6239 + }
6240 + kfree(UNIONFS_D(dentry)->lower_paths);
6241 + UNIONFS_D(dentry)->lower_paths = NULL;
6242 + set_dbstart(dentry, -1);
6243 + set_dbend(dentry, -1);
6244 +
6245 +out:
6246 + if (!err && UNIONFS_D(dentry)) {
6247 + BUG_ON(dbend(dentry) > UNIONFS_D(dentry)->bcount);
6248 + BUG_ON(dbend(dentry) > sbmax(dentry->d_sb));
6249 + BUG_ON(dbstart(dentry) < 0);
6250 + }
6251 + kfree(whname);
6252 + if (locked_parent)
6253 + unionfs_unlock_dentry(parent_dentry);
6254 + dput(parent_dentry);
6255 + if (locked_child || (err && allocated_new_info))
6256 + unionfs_unlock_dentry(dentry);
6257 + if (!err && d_interposed)
6258 + return d_interposed;
6259 + return ERR_PTR(err);
6260 +}
6261 +
6262 +/*
6263 + * This is a utility function that fills in a unionfs dentry.
6264 + *
6265 + * Returns: 0 (ok), or -ERRNO if an error occurred.
6266 + */
6267 +int unionfs_partial_lookup(struct dentry *dentry)
6268 +{
6269 + struct dentry *tmp;
6270 + struct nameidata nd = { .flags = 0 };
6271 + int err = -ENOSYS;
6272 +
6273 + tmp = unionfs_lookup_backend(dentry, &nd, INTERPOSE_PARTIAL);
6274 + if (!tmp) {
6275 + err = 0;
6276 + goto out;
6277 + }
6278 + if (IS_ERR(tmp)) {
6279 + err = PTR_ERR(tmp);
6280 + goto out;
6281 + }
6282 + /* need to change the interface */
6283 + BUG_ON(tmp != dentry);
6284 +out:
6285 + return err;
6286 +}
6287 +
6288 +/* The dentry cache is just so we have properly sized dentries. */
6289 +static struct kmem_cache *unionfs_dentry_cachep;
6290 +int unionfs_init_dentry_cache(void)
6291 +{
6292 + unionfs_dentry_cachep =
6293 + kmem_cache_create("unionfs_dentry",
6294 + sizeof(struct unionfs_dentry_info),
6295 + 0, SLAB_RECLAIM_ACCOUNT, NULL, NULL);
6296 +
6297 + return (unionfs_dentry_cachep ? 0 : -ENOMEM);
6298 +}
6299 +
6300 +void unionfs_destroy_dentry_cache(void)
6301 +{
6302 + if (unionfs_dentry_cachep)
6303 + kmem_cache_destroy(unionfs_dentry_cachep);
6304 +}
6305 +
6306 +void free_dentry_private_data(struct dentry *dentry)
6307 +{
6308 + if (!dentry || !dentry->d_fsdata)
6309 + return;
6310 + kmem_cache_free(unionfs_dentry_cachep, dentry->d_fsdata);
6311 + dentry->d_fsdata = NULL;
6312 +}
6313 +
6314 +static inline int __realloc_dentry_private_data(struct dentry *dentry)
6315 +{
6316 + struct unionfs_dentry_info *info = UNIONFS_D(dentry);
6317 + void *p;
6318 + int size;
6319 +
6320 + BUG_ON(!info);
6321 +
6322 + size = sizeof(struct path) * sbmax(dentry->d_sb);
6323 + p = krealloc(info->lower_paths, size, GFP_ATOMIC);
6324 + if (!p)
6325 + return -ENOMEM;
6326 +
6327 + info->lower_paths = p;
6328 +
6329 + info->bstart = -1;
6330 + info->bend = -1;
6331 + info->bopaque = -1;
6332 + info->bcount = sbmax(dentry->d_sb);
6333 + atomic_set(&info->generation,
6334 + atomic_read(&UNIONFS_SB(dentry->d_sb)->generation));
6335 +
6336 + memset(info->lower_paths, 0, size);
6337 +
6338 + return 0;
6339 +}
6340 +
6341 +/* UNIONFS_D(dentry)->lock must be locked */
6342 +static int realloc_dentry_private_data(struct dentry *dentry)
6343 +{
6344 + if (!__realloc_dentry_private_data(dentry))
6345 + return 0;
6346 +
6347 + kfree(UNIONFS_D(dentry)->lower_paths);
6348 + free_dentry_private_data(dentry);
6349 + return -ENOMEM;
6350 +}
6351 +
6352 +/* allocate new dentry private data */
6353 +int new_dentry_private_data(struct dentry *dentry)
6354 +{
6355 + struct unionfs_dentry_info *info = UNIONFS_D(dentry);
6356 +
6357 + BUG_ON(info);
6358 +
6359 + info = kmem_cache_alloc(unionfs_dentry_cachep, GFP_ATOMIC);
6360 + if (!info)
6361 + return -ENOMEM;
6362 +
6363 + mutex_init(&info->lock);
6364 + mutex_lock(&info->lock);
6365 +
6366 + info->lower_paths = NULL;
6367 +
6368 + dentry->d_fsdata = info;
6369 +
6370 + if (!__realloc_dentry_private_data(dentry))
6371 + return 0;
6372 +
6373 + mutex_unlock(&info->lock);
6374 + free_dentry_private_data(dentry);
6375 + return -ENOMEM;
6376 +}
6377 +
6378 +/*
6379 + * scan through the lower dentry objects, and set bstart to reflect the
6380 + * starting branch
6381 + */
6382 +void update_bstart(struct dentry *dentry)
6383 +{
6384 + int bindex;
6385 + int bstart = dbstart(dentry);
6386 + int bend = dbend(dentry);
6387 + struct dentry *lower_dentry;
6388 +
6389 + for (bindex = bstart; bindex <= bend; bindex++) {
6390 + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6391 + if (!lower_dentry)
6392 + continue;
6393 + if (lower_dentry->d_inode) {
6394 + set_dbstart(dentry, bindex);
6395 + break;
6396 + }
6397 + dput(lower_dentry);
6398 + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
6399 + }
6400 +}
6401 diff --git a/fs/unionfs/main.c b/fs/unionfs/main.c
6402 new file mode 100644
6403 index 0000000..e437edb
6404 --- /dev/null
6405 +++ b/fs/unionfs/main.c
6406 @@ -0,0 +1,762 @@
6407 +/*
6408 + * Copyright (c) 2003-2007 Erez Zadok
6409 + * Copyright (c) 2003-2006 Charles P. Wright
6410 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
6411 + * Copyright (c) 2005-2006 Junjiro Okajima
6412 + * Copyright (c) 2005 Arun M. Krishnakumar
6413 + * Copyright (c) 2004-2006 David P. Quigley
6414 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
6415 + * Copyright (c) 2003 Puja Gupta
6416 + * Copyright (c) 2003 Harikesavan Krishnan
6417 + * Copyright (c) 2003-2007 Stony Brook University
6418 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
6419 + *
6420 + * This program is free software; you can redistribute it and/or modify
6421 + * it under the terms of the GNU General Public License version 2 as
6422 + * published by the Free Software Foundation.
6423 + */
6424 +
6425 +#include "union.h"
6426 +#include <linux/module.h>
6427 +#include <linux/moduleparam.h>
6428 +
6429 +/*
6430 + * Connect a unionfs inode dentry/inode with several lower ones. This is
6431 + * the classic stackable file system "vnode interposition" action.
6432 + *
6433 + * @sb: unionfs's super_block
6434 + */
6435 +struct dentry *unionfs_interpose(struct dentry *dentry, struct super_block *sb,
6436 + int flag)
6437 +{
6438 + struct inode *lower_inode;
6439 + struct dentry *lower_dentry;
6440 + int err = 0;
6441 + struct inode *inode;
6442 + int is_negative_dentry = 1;
6443 + int bindex, bstart, bend;
6444 + int skipped = 1;
6445 + struct dentry *spliced = NULL;
6446 +
6447 + verify_locked(dentry);
6448 +
6449 + bstart = dbstart(dentry);
6450 + bend = dbend(dentry);
6451 +
6452 + /* Make sure that we didn't get a negative dentry. */
6453 + for (bindex = bstart; bindex <= bend; bindex++) {
6454 + if (unionfs_lower_dentry_idx(dentry, bindex) &&
6455 + unionfs_lower_dentry_idx(dentry, bindex)->d_inode) {
6456 + is_negative_dentry = 0;
6457 + break;
6458 + }
6459 + }
6460 + BUG_ON(is_negative_dentry);
6461 +
6462 + /*
6463 + * We allocate our new inode below, by calling iget.
6464 + * iget will call our read_inode which will initialize some
6465 + * of the new inode's fields
6466 + */
6467 +
6468 + /*
6469 + * On revalidate we've already got our own inode and just need
6470 + * to fix it up.
6471 + */
6472 + if (flag == INTERPOSE_REVAL) {
6473 + inode = dentry->d_inode;
6474 + UNIONFS_I(inode)->bstart = -1;
6475 + UNIONFS_I(inode)->bend = -1;
6476 + atomic_set(&UNIONFS_I(inode)->generation,
6477 + atomic_read(&UNIONFS_SB(sb)->generation));
6478 +
6479 + UNIONFS_I(inode)->lower_inodes =
6480 + kcalloc(sbmax(sb), sizeof(struct inode *), GFP_KERNEL);
6481 + if (!UNIONFS_I(inode)->lower_inodes) {
6482 + err = -ENOMEM;
6483 + goto out;
6484 + }
6485 + } else {
6486 + /* get unique inode number for unionfs */
6487 + inode = iget(sb, iunique(sb, UNIONFS_ROOT_INO));
6488 + if (!inode) {
6489 + err = -EACCES;
6490 + goto out;
6491 + }
6492 + if (atomic_read(&inode->i_count) > 1)
6493 + goto skip;
6494 + }
6495 +
6496 +fill_i_info:
6497 + skipped = 0;
6498 + for (bindex = bstart; bindex <= bend; bindex++) {
6499 + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6500 + if (!lower_dentry) {
6501 + unionfs_set_lower_inode_idx(inode, bindex, NULL);
6502 + continue;
6503 + }
6504 +
6505 + /* Initialize the lower inode to the new lower inode. */
6506 + if (!lower_dentry->d_inode)
6507 + continue;
6508 +
6509 + unionfs_set_lower_inode_idx(inode, bindex,
6510 + igrab(lower_dentry->d_inode));
6511 + }
6512 +
6513 + ibstart(inode) = dbstart(dentry);
6514 + ibend(inode) = dbend(dentry);
6515 +
6516 + /* Use attributes from the first branch. */
6517 + lower_inode = unionfs_lower_inode(inode);
6518 +
6519 + /* Use different set of inode ops for symlinks & directories */
6520 + if (S_ISLNK(lower_inode->i_mode))
6521 + inode->i_op = &unionfs_symlink_iops;
6522 + else if (S_ISDIR(lower_inode->i_mode))
6523 + inode->i_op = &unionfs_dir_iops;
6524 +
6525 + /* Use different set of file ops for directories */
6526 + if (S_ISDIR(lower_inode->i_mode))
6527 + inode->i_fop = &unionfs_dir_fops;
6528 +
6529 + /* properly initialize special inodes */
6530 + if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) ||
6531 + S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode))
6532 + init_special_inode(inode, lower_inode->i_mode,
6533 + lower_inode->i_rdev);
6534 +
6535 + /* all well, copy inode attributes */
6536 + unionfs_copy_attr_all(inode, lower_inode);
6537 + fsstack_copy_inode_size(inode, lower_inode);
6538 +
6539 + if (spliced)
6540 + goto out_spliced;
6541 +skip:
6542 + /* only (our) lookup wants to do a d_add */
6543 + switch (flag) {
6544 + case INTERPOSE_DEFAULT:
6545 + case INTERPOSE_REVAL_NEG:
6546 + d_instantiate(dentry, inode);
6547 + break;
6548 + case INTERPOSE_LOOKUP:
6549 + spliced = d_splice_alias(inode, dentry);
6550 + if (IS_ERR(spliced))
6551 + err = PTR_ERR(spliced);
6552 +
6553 + /*
6554 + * d_splice can return a dentry if it was disconnected and
6555 + * had to be moved. We must ensure that the private data of
6556 + * the new dentry is correct and that the inode info was
6557 + * filled properly. Finally we must return this new dentry.
6558 + */
6559 + else if (spliced && spliced != dentry) {
6560 + spliced->d_op = &unionfs_dops;
6561 + spliced->d_fsdata = dentry->d_fsdata;
6562 + dentry->d_fsdata = NULL;
6563 + dentry = spliced;
6564 + if (skipped)
6565 + goto fill_i_info;
6566 + goto out_spliced;
6567 + }
6568 + break;
6569 + case INTERPOSE_REVAL:
6570 + /* Do nothing. */
6571 + break;
6572 + default:
6573 + printk(KERN_ERR "unionfs: invalid interpose flag passed!");
6574 + BUG();
6575 + }
6576 + goto out;
6577 +
6578 +out_spliced:
6579 + if (!err)
6580 + return spliced;
6581 +out:
6582 + return ERR_PTR(err);
6583 +}
6584 +
6585 +/* like interpose above, but for an already existing dentry */
6586 +void unionfs_reinterpose(struct dentry *dentry)
6587 +{
6588 + struct dentry *lower_dentry;
6589 + struct inode *inode;
6590 + int bindex, bstart, bend;
6591 +
6592 + verify_locked(dentry);
6593 +
6594 + /* This is pre-allocated inode */
6595 + inode = dentry->d_inode;
6596 +
6597 + bstart = dbstart(dentry);
6598 + bend = dbend(dentry);
6599 + for (bindex = bstart; bindex <= bend; bindex++) {
6600 + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6601 + if (!lower_dentry)
6602 + continue;
6603 +
6604 + if (!lower_dentry->d_inode)
6605 + continue;
6606 + if (unionfs_lower_inode_idx(inode, bindex))
6607 + continue;
6608 + unionfs_set_lower_inode_idx(inode, bindex,
6609 + igrab(lower_dentry->d_inode));
6610 + }
6611 + ibstart(inode) = dbstart(dentry);
6612 + ibend(inode) = dbend(dentry);
6613 +}
6614 +
6615 +/*
6616 + * make sure the branch we just looked up (nd) makes sense:
6617 + *
6618 + * 1) we're not trying to stack unionfs on top of unionfs
6619 + * 2) it exists
6620 + * 3) is a directory
6621 + */
6622 +int check_branch(struct nameidata *nd)
6623 +{
6624 + /* XXX: remove in ODF code -- stacking unions allowed there */
6625 + if (!strcmp(nd->dentry->d_sb->s_type->name, "unionfs"))
6626 + return -EINVAL;
6627 + if (!nd->dentry->d_inode)
6628 + return -ENOENT;
6629 + if (!S_ISDIR(nd->dentry->d_inode->i_mode))
6630 + return -ENOTDIR;
6631 + return 0;
6632 +}
6633 +
6634 +/* checks if two lower_dentries have overlapping branches */
6635 +static int is_branch_overlap(struct dentry *dent1, struct dentry *dent2)
6636 +{
6637 + struct dentry *dent = NULL;
6638 +
6639 + dent = dent1;
6640 + while ((dent != dent2) && (dent->d_parent != dent))
6641 + dent = dent->d_parent;
6642 +
6643 + if (dent == dent2)
6644 + return 1;
6645 +
6646 + dent = dent2;
6647 + while ((dent != dent1) && (dent->d_parent != dent))
6648 + dent = dent->d_parent;
6649 +
6650 + return (dent == dent1);
6651 +}
6652 +
6653 +/*
6654 + * Parse branch mode helper function
6655 + */
6656 +int __parse_branch_mode(const char *name)
6657 +{
6658 + if (!name)
6659 + return 0;
6660 + if (!strcmp(name, "ro"))
6661 + return MAY_READ;
6662 + if (!strcmp(name, "rw"))
6663 + return (MAY_READ | MAY_WRITE);
6664 + return 0;
6665 +}
6666 +
6667 +/*
6668 + * Parse "ro" or "rw" options, but default to "rw" of no mode options
6669 + * was specified.
6670 + */
6671 +int parse_branch_mode(const char *name)
6672 +{
6673 + int perms = __parse_branch_mode(name);
6674 +
6675 + if (perms == 0)
6676 + perms = MAY_READ | MAY_WRITE;
6677 + return perms;
6678 +}
6679 +
6680 +/*
6681 + * parse the dirs= mount argument
6682 + *
6683 + * We don't need to lock the superblock private data's rwsem, as we get
6684 + * called only by unionfs_read_super - it is still a long time before anyone
6685 + * can even get a reference to us.
6686 + */
6687 +static int parse_dirs_option(struct super_block *sb, struct unionfs_dentry_info
6688 + *lower_root_info, char *options)
6689 +{
6690 + struct nameidata nd;
6691 + char *name;
6692 + int err = 0;
6693 + int branches = 1;
6694 + int bindex = 0;
6695 + int i = 0;
6696 + int j = 0;
6697 + struct dentry *dent1;
6698 + struct dentry *dent2;
6699 +
6700 + if (options[0] == '\0') {
6701 + printk(KERN_WARNING "unionfs: no branches specified\n");
6702 + err = -EINVAL;
6703 + goto out;
6704 + }
6705 +
6706 + /*
6707 + * Each colon means we have a separator, this is really just a rough
6708 + * guess, since strsep will handle empty fields for us.
6709 + */
6710 + for (i = 0; options[i]; i++)
6711 + if (options[i] == ':')
6712 + branches++;
6713 +
6714 + /* allocate space for underlying pointers to lower dentry */
6715 + UNIONFS_SB(sb)->data =
6716 + kcalloc(branches, sizeof(struct unionfs_data), GFP_KERNEL);
6717 + if (!UNIONFS_SB(sb)->data) {
6718 + err = -ENOMEM;
6719 + goto out;
6720 + }
6721 +
6722 + lower_root_info->lower_paths =
6723 + kcalloc(branches, sizeof(struct path), GFP_KERNEL);
6724 + if (!lower_root_info->lower_paths) {
6725 + err = -ENOMEM;
6726 + goto out;
6727 + }
6728 +
6729 + /* now parsing a string such as "b1:b2=rw:b3=ro:b4" */
6730 + branches = 0;
6731 + while ((name = strsep(&options, ":")) != NULL) {
6732 + int perms;
6733 + char *mode = strchr(name, '=');
6734 +
6735 + if (!name)
6736 + continue;
6737 + if (!*name) { /* bad use of ':' (extra colons) */
6738 + err = -EINVAL;
6739 + goto out;
6740 + }
6741 +
6742 + branches++;
6743 +
6744 + /* strip off '=' if any */
6745 + if (mode)
6746 + *mode++ = '\0';
6747 +
6748 + perms = parse_branch_mode(mode);
6749 + if (!bindex && !(perms & MAY_WRITE)) {
6750 + err = -EINVAL;
6751 + goto out;
6752 + }
6753 +
6754 + err = path_lookup(name, LOOKUP_FOLLOW, &nd);
6755 + if (err) {
6756 + printk(KERN_WARNING "unionfs: error accessing "
6757 + "lower directory '%s' (error %d)\n",
6758 + name, err);
6759 + goto out;
6760 + }
6761 +
6762 + if ((err = check_branch(&nd))) {
6763 + printk(KERN_WARNING "unionfs: lower directory "
6764 + "'%s' is not a valid branch\n", name);
6765 + path_release(&nd);
6766 + goto out;
6767 + }
6768 +
6769 + lower_root_info->lower_paths[bindex].dentry = nd.dentry;
6770 + lower_root_info->lower_paths[bindex].mnt = nd.mnt;
6771 +
6772 + set_branchperms(sb, bindex, perms);
6773 + set_branch_count(sb, bindex, 0);
6774 + new_branch_id(sb, bindex);
6775 +
6776 + if (lower_root_info->bstart < 0)
6777 + lower_root_info->bstart = bindex;
6778 + lower_root_info->bend = bindex;
6779 + bindex++;
6780 + }
6781 +
6782 + if (branches == 0) {
6783 + printk(KERN_WARNING "unionfs: no branches specified\n");
6784 + err = -EINVAL;
6785 + goto out;
6786 + }
6787 +
6788 + BUG_ON(branches != (lower_root_info->bend + 1));
6789 +
6790 + /*
6791 + * Ensure that no overlaps exist in the branches.
6792 + *
6793 + * This test is required because the Linux kernel has no support
6794 + * currently for ensuring coherency between stackable layers and
6795 + * branches. If we were to allow overlapping branches, it would be
6796 + * possible, for example, to delete a file via one branch, which
6797 + * would not be reflected in another branch. Such incoherency could
6798 + * lead to inconsistencies and even kernel oopses. Rather than
6799 + * implement hacks to work around some of these cache-coherency
6800 + * problems, we prevent branch overlapping, for now. A complete
6801 + * solution will involve proper kernel/VFS support for cache
6802 + * coherency, at which time we could safely remove this
6803 + * branch-overlapping test.
6804 + */
6805 + for (i = 0; i < branches; i++) {
6806 + dent1 = lower_root_info->lower_paths[i].dentry;
6807 + for (j = i + 1; j < branches; j++) {
6808 + dent2 = lower_root_info->lower_paths[j].dentry;
6809 + if (is_branch_overlap(dent1, dent2)) {
6810 + printk(KERN_WARNING "unionfs: branches %d and "
6811 + "%d overlap\n", i, j);
6812 + err = -EINVAL;
6813 + goto out;
6814 + }
6815 + }
6816 + }
6817 +
6818 +out:
6819 + if (err) {
6820 + for (i = 0; i < branches; i++)
6821 + if (lower_root_info->lower_paths[i].dentry) {
6822 + dput(lower_root_info->lower_paths[i].dentry);
6823 + /* initialize: can't use unionfs_mntput here */
6824 + mntput(lower_root_info->lower_paths[i].mnt);
6825 + }
6826 +
6827 + kfree(lower_root_info->lower_paths);
6828 + kfree(UNIONFS_SB(sb)->data);
6829 +
6830 + /*
6831 + * MUST clear the pointers to prevent potential double free if
6832 + * the caller dies later on
6833 + */
6834 + lower_root_info->lower_paths = NULL;
6835 + UNIONFS_SB(sb)->data = NULL;
6836 + }
6837 + return err;
6838 +}
6839 +
6840 +/*
6841 + * Parse mount options. See the manual page for usage instructions.
6842 + *
6843 + * Returns the dentry object of the lower-level (lower) directory;
6844 + * We want to mount our stackable file system on top of that lower directory.
6845 + */
6846 +static struct unionfs_dentry_info *unionfs_parse_options(
6847 + struct super_block *sb,
6848 + char *options)
6849 +{
6850 + struct unionfs_dentry_info *lower_root_info;
6851 + char *optname;
6852 + int err = 0;
6853 + int bindex;
6854 + int dirsfound = 0;
6855 +
6856 + /* allocate private data area */
6857 + err = -ENOMEM;
6858 + lower_root_info =
6859 + kzalloc(sizeof(struct unionfs_dentry_info), GFP_KERNEL);
6860 + if (!lower_root_info)
6861 + goto out_error;
6862 + lower_root_info->bstart = -1;
6863 + lower_root_info->bend = -1;
6864 + lower_root_info->bopaque = -1;
6865 +
6866 + while ((optname = strsep(&options, ",")) != NULL) {
6867 + char *optarg;
6868 + char *endptr;
6869 + int intval;
6870 +
6871 + if (!optname || !*optname)
6872 + continue;
6873 +
6874 + optarg = strchr(optname, '=');
6875 + if (optarg)
6876 + *optarg++ = '\0';
6877 +
6878 + /*
6879 + * All of our options take an argument now. Insert ones that
6880 + * don't, above this check.
6881 + */
6882 + if (!optarg) {
6883 + printk("unionfs: %s requires an argument.\n", optname);
6884 + err = -EINVAL;
6885 + goto out_error;
6886 + }
6887 +
6888 + if (!strcmp("dirs", optname)) {
6889 + if (++dirsfound > 1) {
6890 + printk(KERN_WARNING
6891 + "unionfs: multiple dirs specified\n");
6892 + err = -EINVAL;
6893 + goto out_error;
6894 + }
6895 + err = parse_dirs_option(sb, lower_root_info, optarg);
6896 + if (err)
6897 + goto out_error;
6898 + continue;
6899 + }
6900 +
6901 + /* All of these options require an integer argument. */
6902 + intval = simple_strtoul(optarg, &endptr, 0);
6903 + if (*endptr) {
6904 + printk(KERN_WARNING
6905 + "unionfs: invalid %s option '%s'\n",
6906 + optname, optarg);
6907 + err = -EINVAL;
6908 + goto out_error;
6909 + }
6910 +
6911 + err = -EINVAL;
6912 + printk(KERN_WARNING
6913 + "unionfs: unrecognized option '%s'\n", optname);
6914 + goto out_error;
6915 + }
6916 + if (dirsfound != 1) {
6917 + printk(KERN_WARNING "unionfs: dirs option required\n");
6918 + err = -EINVAL;
6919 + goto out_error;
6920 + }
6921 + goto out;
6922 +
6923 +out_error:
6924 + if (lower_root_info && lower_root_info->lower_paths) {
6925 + for (bindex = lower_root_info->bstart;
6926 + bindex >= 0 && bindex <= lower_root_info->bend;
6927 + bindex++) {
6928 + struct dentry *d;
6929 + struct vfsmount *m;
6930 +
6931 + d = lower_root_info->lower_paths[bindex].dentry;
6932 + m = lower_root_info->lower_paths[bindex].mnt;
6933 +
6934 + dput(d);
6935 + /* initializing: can't use unionfs_mntput here */
6936 + mntput(m);
6937 + }
6938 + }
6939 +
6940 + kfree(lower_root_info->lower_paths);
6941 + kfree(lower_root_info);
6942 +
6943 + kfree(UNIONFS_SB(sb)->data);
6944 + UNIONFS_SB(sb)->data = NULL;
6945 +
6946 + lower_root_info = ERR_PTR(err);
6947 +out:
6948 + return lower_root_info;
6949 +}
6950 +
6951 +/*
6952 + * our custom d_alloc_root work-alike
6953 + *
6954 + * we can't use d_alloc_root if we want to use our own interpose function
6955 + * unchanged, so we simply call our own "fake" d_alloc_root
6956 + */
6957 +static struct dentry *unionfs_d_alloc_root(struct super_block *sb)
6958 +{
6959 + struct dentry *ret = NULL;
6960 +
6961 + if (sb) {
6962 + static const struct qstr name = {.name = "/",.len = 1 };
6963 +
6964 + ret = d_alloc(NULL, &name);
6965 + if (ret) {
6966 + ret->d_op = &unionfs_dops;
6967 + ret->d_sb = sb;
6968 + ret->d_parent = ret;
6969 + }
6970 + }
6971 + return ret;
6972 +}
6973 +
6974 +/*
6975 + * There is no need to lock the unionfs_super_info's rwsem as there is no
6976 + * way anyone can have a reference to the superblock at this point in time.
6977 + */
6978 +static int unionfs_read_super(struct super_block *sb, void *raw_data,
6979 + int silent)
6980 +{
6981 + int err = 0;
6982 + struct unionfs_dentry_info *lower_root_info = NULL;
6983 + int bindex, bstart, bend;
6984 +
6985 + if (!raw_data) {
6986 + printk(KERN_WARNING
6987 + "unionfs: read_super: missing data argument\n");
6988 + err = -EINVAL;
6989 + goto out;
6990 + }
6991 +
6992 + /* Allocate superblock private data */
6993 + sb->s_fs_info = kzalloc(sizeof(struct unionfs_sb_info), GFP_KERNEL);
6994 + if (!UNIONFS_SB(sb)) {
6995 + printk(KERN_WARNING "unionfs: read_super: out of memory\n");
6996 + err = -ENOMEM;
6997 + goto out;
6998 + }
6999 +
7000 + UNIONFS_SB(sb)->bend = -1;
7001 + atomic_set(&UNIONFS_SB(sb)->generation, 1);
7002 + init_rwsem(&UNIONFS_SB(sb)->rwsem);
7003 + UNIONFS_SB(sb)->high_branch_id = -1; /* -1 == invalid branch ID */
7004 +
7005 + lower_root_info = unionfs_parse_options(sb, raw_data);
7006 + if (IS_ERR(lower_root_info)) {
7007 + printk(KERN_WARNING
7008 + "unionfs: read_super: error while parsing options "
7009 + "(err = %ld)\n", PTR_ERR(lower_root_info));
7010 + err = PTR_ERR(lower_root_info);
7011 + lower_root_info = NULL;
7012 + goto out_free;
7013 + }
7014 + if (lower_root_info->bstart == -1) {
7015 + err = -ENOENT;
7016 + goto out_free;
7017 + }
7018 +
7019 + /* set the lower superblock field of upper superblock */
7020 + bstart = lower_root_info->bstart;
7021 + BUG_ON(bstart != 0);
7022 + sbend(sb) = bend = lower_root_info->bend;
7023 + for (bindex = bstart; bindex <= bend; bindex++) {
7024 + struct dentry *d = lower_root_info->lower_paths[bindex].dentry;
7025 + unionfs_set_lower_super_idx(sb, bindex, d->d_sb);
7026 + }
7027 +
7028 + /* max Bytes is the maximum bytes from highest priority branch */
7029 + sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes;
7030 +
7031 + sb->s_op = &unionfs_sops;
7032 +
7033 + /* See comment next to the definition of unionfs_d_alloc_root */
7034 + sb->s_root = unionfs_d_alloc_root(sb);
7035 + if (!sb->s_root) {
7036 + err = -ENOMEM;
7037 + goto out_dput;
7038 + }
7039 +
7040 + /* link the upper and lower dentries */
7041 + sb->s_root->d_fsdata = NULL;
7042 + if ((err = new_dentry_private_data(sb->s_root)))
7043 + goto out_freedpd;
7044 +
7045 + /* Set the lower dentries for s_root */
7046 + for (bindex = bstart; bindex <= bend; bindex++) {
7047 + struct dentry *d;
7048 + struct vfsmount *m;
7049 +
7050 + d = lower_root_info->lower_paths[bindex].dentry;
7051 + m = lower_root_info->lower_paths[bindex].mnt;
7052 +
7053 + unionfs_set_lower_dentry_idx(sb->s_root, bindex, d);
7054 + unionfs_set_lower_mnt_idx(sb->s_root, bindex, m);
7055 + }
7056 + set_dbstart(sb->s_root, bstart);
7057 + set_dbend(sb->s_root, bend);
7058 +
7059 + /* Set the generation number to one, since this is for the mount. */
7060 + atomic_set(&UNIONFS_D(sb->s_root)->generation, 1);
7061 +
7062 + /*
7063 + * Call interpose to create the upper level inode. Only
7064 + * INTERPOSE_LOOKUP can return a value other than 0 on err.
7065 + */
7066 + err = PTR_ERR(unionfs_interpose(sb->s_root, sb, 0));
7067 + unionfs_unlock_dentry(sb->s_root);
7068 + if (!err)
7069 + goto out;
7070 + /* else fall through */
7071 +
7072 +out_freedpd:
7073 + if (UNIONFS_D(sb->s_root)) {
7074 + kfree(UNIONFS_D(sb->s_root)->lower_paths);
7075 + free_dentry_private_data(sb->s_root);
7076 + }
7077 + dput(sb->s_root);
7078 +
7079 +out_dput:
7080 + if (lower_root_info && !IS_ERR(lower_root_info)) {
7081 + for (bindex = lower_root_info->bstart;
7082 + bindex <= lower_root_info->bend; bindex++) {
7083 + struct dentry *d;
7084 + struct vfsmount *m;
7085 +
7086 + d = lower_root_info->lower_paths[bindex].dentry;
7087 + m = lower_root_info->lower_paths[bindex].mnt;
7088 +
7089 + dput(d);
7090 + /* initializing: can't use unionfs_mntput here */
7091 + mntput(m);
7092 + }
7093 + kfree(lower_root_info->lower_paths);
7094 + kfree(lower_root_info);
7095 + lower_root_info = NULL;
7096 + }
7097 +
7098 +out_free:
7099 + kfree(UNIONFS_SB(sb)->data);
7100 + kfree(UNIONFS_SB(sb));
7101 + sb->s_fs_info = NULL;
7102 +
7103 +out:
7104 + if (lower_root_info && !IS_ERR(lower_root_info)) {
7105 + kfree(lower_root_info->lower_paths);
7106 + kfree(lower_root_info);
7107 + }
7108 + return err;
7109 +}
7110 +
7111 +static int unionfs_get_sb(struct file_system_type *fs_type,
7112 + int flags, const char *dev_name,
7113 + void *raw_data, struct vfsmount *mnt)
7114 +{
7115 + return get_sb_nodev(fs_type, flags, raw_data, unionfs_read_super, mnt);
7116 +}
7117 +
7118 +static struct file_system_type unionfs_fs_type = {
7119 + .owner = THIS_MODULE,
7120 + .name = "unionfs",
7121 + .get_sb = unionfs_get_sb,
7122 + .kill_sb = generic_shutdown_super,
7123 + .fs_flags = FS_REVAL_DOT,
7124 +};
7125 +
7126 +static int __init init_unionfs_fs(void)
7127 +{
7128 + int err;
7129 +
7130 + printk("Registering unionfs " UNIONFS_VERSION "\n");
7131 +
7132 + if ((err = unionfs_init_filldir_cache()))
7133 + goto out;
7134 + if ((err = unionfs_init_inode_cache()))
7135 + goto out;
7136 + if ((err = unionfs_init_dentry_cache()))
7137 + goto out;
7138 + if ((err = init_sioq()))
7139 + goto out;
7140 + err = register_filesystem(&unionfs_fs_type);
7141 +out:
7142 + if (err) {
7143 + stop_sioq();
7144 + unionfs_destroy_filldir_cache();
7145 + unionfs_destroy_inode_cache();
7146 + unionfs_destroy_dentry_cache();
7147 + }
7148 + return err;
7149 +}
7150 +
7151 +static void __exit exit_unionfs_fs(void)
7152 +{
7153 + stop_sioq();
7154 + unionfs_destroy_filldir_cache();
7155 + unionfs_destroy_inode_cache();
7156 + unionfs_destroy_dentry_cache();
7157 + unregister_filesystem(&unionfs_fs_type);
7158 + printk("Completed unionfs module unload.\n");
7159 +}
7160 +
7161 +MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University"
7162 + " (http://www.fsl.cs.sunysb.edu)");
7163 +MODULE_DESCRIPTION("Unionfs " UNIONFS_VERSION
7164 + " (http://unionfs.filesystems.org)");
7165 +MODULE_LICENSE("GPL");
7166 +
7167 +module_init(init_unionfs_fs);
7168 +module_exit(exit_unionfs_fs);
7169 diff --git a/fs/unionfs/mmap.c b/fs/unionfs/mmap.c
7170 new file mode 100644
7171 index 0000000..5629dcc
7172 --- /dev/null
7173 +++ b/fs/unionfs/mmap.c
7174 @@ -0,0 +1,378 @@
7175 +/*
7176 + * Copyright (c) 2003-2007 Erez Zadok
7177 + * Copyright (c) 2003-2006 Charles P. Wright
7178 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7179 + * Copyright (c) 2005-2006 Junjiro Okajima
7180 + * Copyright (c) 2006 Shaya Potter
7181 + * Copyright (c) 2005 Arun M. Krishnakumar
7182 + * Copyright (c) 2004-2006 David P. Quigley
7183 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7184 + * Copyright (c) 2003 Puja Gupta
7185 + * Copyright (c) 2003 Harikesavan Krishnan
7186 + * Copyright (c) 2003-2007 Stony Brook University
7187 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
7188 + *
7189 + * This program is free software; you can redistribute it and/or modify
7190 + * it under the terms of the GNU General Public License version 2 as
7191 + * published by the Free Software Foundation.
7192 + */
7193 +
7194 +#include "union.h"
7195 +
7196 +/*
7197 + * Unionfs doesn't implement ->writepages, which is OK with the VFS and
7198 + * keeps our code simpler and smaller. Nevertheless, somehow, our own
7199 + * ->writepage must be called so we can sync the upper pages with the lower
7200 + * pages: otherwise data changed at the upper layer won't get written to the
7201 + * lower layer.
7202 + *
7203 + * Some lower file systems (e.g., NFS) expect the VFS to call its writepages
7204 + * only, which in turn will call generic_writepages and invoke each of the
7205 + * lower file system's ->writepage. NFS in particular uses the
7206 + * wbc->fs_private field in its nfs_writepage, which is set in its
7207 + * nfs_writepages. So if we don't call the lower nfs_writepages first, then
7208 + * NFS's nfs_writepage will dereference a NULL wbc->fs_private and cause an
7209 + * OOPS. If, however, we implement a unionfs_writepages and then we do call
7210 + * the lower nfs_writepages, then we "lose control" over the pages we're
7211 + * trying to write to the lower file system: we won't be writing our own
7212 + * new/modified data from the upper pages to the lower pages, and any
7213 + * mmap-based changes are lost.
7214 + *
7215 + * This is a fundamental cache-coherency problem in Linux. The kernel isn't
7216 + * able to support such stacking abstractions cleanly. One possible clean
7217 + * way would be that a lower file system's ->writepage method have some sort
7218 + * of a callback to validate if any upper pages for the same file+offset
7219 + * exist and have newer content in them.
7220 + *
7221 + * This whole NULL ptr dereference is triggered at the lower file system
7222 + * (NFS) because the wbc->for_writepages is set to 1. Therefore, to avoid
7223 + * this NULL pointer dereference, we set this flag to 0 and restore it upon
7224 + * exit. This probably means that we're slightly less efficient in writing
7225 + * pages out, doing them one at a time, but at least we avoid the oops until
7226 + * such day as Linux can better support address_space_ops in a stackable
7227 + * fashion.
7228 + */
7229 +static int unionfs_writepage(struct page *page, struct writeback_control *wbc)
7230 +{
7231 + int err = -EIO;
7232 + struct inode *inode;
7233 + struct inode *lower_inode;
7234 + struct page *lower_page;
7235 + char *kaddr, *lower_kaddr;
7236 + int saved_for_writepages = wbc->for_writepages;
7237 +
7238 + inode = page->mapping->host;
7239 + lower_inode = unionfs_lower_inode(inode);
7240 +
7241 + /*
7242 + * find lower page (returns a locked page)
7243 + *
7244 + * NOTE: we used to call grab_cache_page(), but that was unnecessary
7245 + * as it would have tried to create a new lower page if it didn't
7246 + * exist, leading to deadlocks (esp. under memory-pressure
7247 + * conditions, when it is really a bad idea to *consume* more
7248 + * memory). Instead, we assume the lower page exists, and if we can
7249 + * find it, then we ->writepage on it; if we can't find it, then it
7250 + * couldn't have disappeared unless the kernel already flushed it,
7251 + * in which case we're still OK. This is especially correct if
7252 + * wbc->sync_mode is WB_SYNC_NONE (as per
7253 + * Documentation/filesystems/vfs.txt). If we can't flush our page
7254 + * because we can't find a lower page, then at least we re-mark our
7255 + * page as dirty, and return AOP_WRITEPAGE_ACTIVATE as the VFS
7256 + * expects us to. (Note, if in the future it'd turn out that we
7257 + * have to find a lower page no matter what, then we'd have to
7258 + * resort to RAIF's page pointer flipping trick.)
7259 + */
7260 + lower_page = find_lock_page(lower_inode->i_mapping, page->index);
7261 + if (!lower_page) {
7262 + err = AOP_WRITEPAGE_ACTIVATE;
7263 + set_page_dirty(page);
7264 + goto out;
7265 + }
7266 +
7267 + /* get page address, and encode it */
7268 + kaddr = kmap(page);
7269 + lower_kaddr = kmap(lower_page);
7270 +
7271 + memcpy(lower_kaddr, kaddr, PAGE_CACHE_SIZE);
7272 +
7273 + kunmap(page);
7274 + kunmap(lower_page);
7275 +
7276 + BUG_ON(!lower_inode->i_mapping->a_ops->writepage);
7277 +
7278 + /* workaround for some lower file systems: see big comment on top */
7279 + if (wbc->for_writepages /* && !wbc->fs_private */)
7280 + wbc->for_writepages = 0;
7281 +
7282 + /* call lower writepage (expects locked page) */
7283 + clear_page_dirty_for_io(lower_page); /* emulate VFS behavior */
7284 + err = lower_inode->i_mapping->a_ops->writepage(lower_page, wbc);
7285 + wbc->for_writepages = saved_for_writepages; /* restore value */
7286 +
7287 + /* b/c find_lock_page locked it and ->writepage unlocks on success */
7288 + if (err)
7289 + unlock_page(lower_page);
7290 + /* b/c grab_cache_page increased refcnt */
7291 + page_cache_release(lower_page);
7292 +
7293 + if (err < 0) {
7294 + ClearPageUptodate(page);
7295 + goto out;
7296 + }
7297 + if (err == AOP_WRITEPAGE_ACTIVATE) {
7298 + /*
7299 + * Lower file systems such as ramfs and tmpfs, may return
7300 + * AOP_WRITEPAGE_ACTIVATE so that the VM won't try to
7301 + * (pointlessly) write the page again for a while. But
7302 + * those lower file systems also set the page dirty bit back
7303 + * again. So we mimic that behaviour here.
7304 + */
7305 + if (PageDirty(lower_page))
7306 + set_page_dirty(page);
7307 + goto out;
7308 + }
7309 +
7310 + /* all is well */
7311 + SetPageUptodate(page);
7312 + /* lower mtimes has changed: update ours */
7313 + unionfs_copy_attr_times(inode);
7314 +
7315 + unlock_page(page);
7316 +
7317 +out:
7318 + return err;
7319 +}
7320 +
7321 +/*
7322 + * readpage is called from generic_page_read and the fault handler.
7323 + * If your file system uses generic_page_read for the read op, it
7324 + * must implement readpage.
7325 + *
7326 + * Readpage expects a locked page, and must unlock it.
7327 + */
7328 +static int unionfs_do_readpage(struct file *file, struct page *page)
7329 +{
7330 + int err = -EIO;
7331 + struct file *lower_file;
7332 + struct inode *inode;
7333 + mm_segment_t old_fs;
7334 + char *page_data = NULL;
7335 + loff_t offset;
7336 +
7337 + if (UNIONFS_F(file) == NULL) {
7338 + err = -ENOENT;
7339 + goto out;
7340 + }
7341 +
7342 + lower_file = unionfs_lower_file(file);
7343 + /* FIXME: is this assertion right here? */
7344 + BUG_ON(lower_file == NULL);
7345 +
7346 + inode = file->f_path.dentry->d_inode;
7347 +
7348 + page_data = (char *) kmap(page);
7349 + /*
7350 + * Use vfs_read because some lower file systems don't have a
7351 + * readpage method, and some file systems (esp. distributed ones)
7352 + * don't like their pages to be accessed directly. Using vfs_read
7353 + * may be a little slower, but a lot safer, as the VFS does a lot of
7354 + * the necessary magic for us.
7355 + */
7356 + offset = lower_file->f_pos = (page->index << PAGE_CACHE_SHIFT);
7357 + old_fs = get_fs();
7358 + set_fs(KERNEL_DS);
7359 + err = vfs_read(lower_file, page_data, PAGE_CACHE_SIZE,
7360 + &lower_file->f_pos);
7361 + set_fs(old_fs);
7362 +
7363 + kunmap(page);
7364 +
7365 + if (err < 0)
7366 + goto out;
7367 + err = 0;
7368 +
7369 + /* if vfs_read succeeded above, sync up our times */
7370 + unionfs_copy_attr_times(inode);
7371 +
7372 + flush_dcache_page(page);
7373 +
7374 +out:
7375 + if (err == 0)
7376 + SetPageUptodate(page);
7377 + else
7378 + ClearPageUptodate(page);
7379 +
7380 + return err;
7381 +}
7382 +
7383 +static int unionfs_readpage(struct file *file, struct page *page)
7384 +{
7385 + int err;
7386 +
7387 + unionfs_read_lock(file->f_path.dentry->d_sb);
7388 + if ((err = unionfs_file_revalidate(file, 0)))
7389 + goto out;
7390 + unionfs_check_file(file);
7391 +
7392 + err = unionfs_do_readpage(file, page);
7393 +
7394 + if (!err) {
7395 + touch_atime(unionfs_lower_mnt(file->f_path.dentry),
7396 + unionfs_lower_dentry(file->f_path.dentry));
7397 + unionfs_copy_attr_times(file->f_path.dentry->d_inode);
7398 + }
7399 +
7400 + /*
7401 + * we have to unlock our page, b/c we _might_ have gotten a locked
7402 + * page. but we no longer have to wakeup on our page here, b/c
7403 + * UnlockPage does it
7404 + */
7405 +out:
7406 + unlock_page(page);
7407 + unionfs_check_file(file);
7408 + unionfs_read_unlock(file->f_path.dentry->d_sb);
7409 +
7410 + return err;
7411 +}
7412 +
7413 +static int unionfs_prepare_write(struct file *file, struct page *page,
7414 + unsigned from, unsigned to)
7415 +{
7416 + int err;
7417 +
7418 + unionfs_read_lock(file->f_path.dentry->d_sb);
7419 + /*
7420 + * This is the only place where we unconditionally copy the lower
7421 + * attribute times before calling unionfs_file_revalidate. The
7422 + * reason is that our ->write calls do_sync_write which in turn will
7423 + * call our ->prepare_write and then ->commit_write. Before our
7424 + * ->write is called, the lower mtimes are in sync, but by the time
7425 + * the VFS calls our ->commit_write, the lower mtimes have changed.
7426 + * Therefore, the only reasonable time for us to sync up from the
7427 + * changed lower mtimes, and avoid an invariant violation warning,
7428 + * is here, in ->prepare_write.
7429 + */
7430 + unionfs_copy_attr_times(file->f_path.dentry->d_inode);
7431 + err = unionfs_file_revalidate(file, 1);
7432 + unionfs_check_file(file);
7433 + unionfs_read_unlock(file->f_path.dentry->d_sb);
7434 +
7435 + return err;
7436 +}
7437 +
7438 +static int unionfs_commit_write(struct file *file, struct page *page,
7439 + unsigned from, unsigned to)
7440 +{
7441 + int err = -ENOMEM;
7442 + struct inode *inode, *lower_inode;
7443 + struct file *lower_file = NULL;
7444 + loff_t pos;
7445 + unsigned bytes = to - from;
7446 + char *page_data = NULL;
7447 + mm_segment_t old_fs;
7448 +
7449 + BUG_ON(file == NULL);
7450 +
7451 + unionfs_read_lock(file->f_path.dentry->d_sb);
7452 + if ((err = unionfs_file_revalidate(file, 1)))
7453 + goto out;
7454 + unionfs_check_file(file);
7455 +
7456 + inode = page->mapping->host;
7457 + lower_inode = unionfs_lower_inode(inode);
7458 +
7459 + if (UNIONFS_F(file) != NULL)
7460 + lower_file = unionfs_lower_file(file);
7461 +
7462 + /* FIXME: is this assertion right here? */
7463 + BUG_ON(lower_file == NULL);
7464 +
7465 + page_data = (char *)kmap(page);
7466 + lower_file->f_pos = (page->index << PAGE_CACHE_SHIFT) + from;
7467 +
7468 + /*
7469 + * SP: I use vfs_write instead of copying page data and the
7470 + * prepare_write/commit_write combo because file system's like
7471 + * GFS/OCFS2 don't like things touching those directly,
7472 + * calling the underlying write op, while a little bit slower, will
7473 + * call all the FS specific code as well
7474 + */
7475 + old_fs = get_fs();
7476 + set_fs(KERNEL_DS);
7477 + err = vfs_write(lower_file, page_data + from, bytes,
7478 + &lower_file->f_pos);
7479 + set_fs(old_fs);
7480 +
7481 + kunmap(page);
7482 +
7483 + if (err < 0)
7484 + goto out;
7485 +
7486 + inode->i_blocks = lower_inode->i_blocks;
7487 + /* we may have to update i_size */
7488 + pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to;
7489 + if (pos > i_size_read(inode))
7490 + i_size_write(inode, pos);
7491 + /* if vfs_write succeeded above, sync up our times */
7492 + unionfs_copy_attr_times(inode);
7493 + mark_inode_dirty_sync(inode);
7494 +
7495 +out:
7496 + if (err < 0)
7497 + ClearPageUptodate(page);
7498 +
7499 + unionfs_read_unlock(file->f_path.dentry->d_sb);
7500 + unionfs_check_file(file);
7501 + return err; /* assume all is ok */
7502 +}
7503 +
7504 +static void unionfs_sync_page(struct page *page)
7505 +{
7506 + struct inode *inode;
7507 + struct inode *lower_inode;
7508 + struct page *lower_page;
7509 + struct address_space *mapping;
7510 +
7511 + inode = page->mapping->host;
7512 + lower_inode = unionfs_lower_inode(inode);
7513 +
7514 + /*
7515 + * Find lower page (returns a locked page).
7516 + *
7517 + * NOTE: we used to call grab_cache_page(), but that was unnecessary
7518 + * as it would have tried to create a new lower page if it didn't
7519 + * exist, leading to deadlocks. All our sync_page method needs to
7520 + * do is ensure that pending I/O gets done.
7521 + */
7522 + lower_page = find_lock_page(lower_inode->i_mapping, page->index);
7523 + if (!lower_page) {
7524 + printk(KERN_DEBUG "unionfs: find_lock_page failed\n");
7525 + goto out;
7526 + }
7527 +
7528 + /* do the actual sync */
7529 + mapping = lower_page->mapping;
7530 + /*
7531 + * XXX: can we optimize ala RAIF and set the lower page to be
7532 + * discarded after a successful sync_page?
7533 + */
7534 + if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
7535 + mapping->a_ops->sync_page(lower_page);
7536 +
7537 + /* b/c find_lock_page locked it */
7538 + unlock_page(lower_page);
7539 + /* b/c find_lock_page increased refcnt */
7540 + page_cache_release(lower_page);
7541 +
7542 +out:
7543 + return;
7544 +}
7545 +
7546 +struct address_space_operations unionfs_aops = {
7547 + .writepage = unionfs_writepage,
7548 + .readpage = unionfs_readpage,
7549 + .prepare_write = unionfs_prepare_write,
7550 + .commit_write = unionfs_commit_write,
7551 + .sync_page = unionfs_sync_page,
7552 +};
7553 diff --git a/fs/unionfs/rdstate.c b/fs/unionfs/rdstate.c
7554 new file mode 100644
7555 index 0000000..5c9d14b
7556 --- /dev/null
7557 +++ b/fs/unionfs/rdstate.c
7558 @@ -0,0 +1,282 @@
7559 +/*
7560 + * Copyright (c) 2003-2007 Erez Zadok
7561 + * Copyright (c) 2003-2006 Charles P. Wright
7562 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7563 + * Copyright (c) 2005-2006 Junjiro Okajima
7564 + * Copyright (c) 2005 Arun M. Krishnakumar
7565 + * Copyright (c) 2004-2006 David P. Quigley
7566 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7567 + * Copyright (c) 2003 Puja Gupta
7568 + * Copyright (c) 2003 Harikesavan Krishnan
7569 + * Copyright (c) 2003-2007 Stony Brook University
7570 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
7571 + *
7572 + * This program is free software; you can redistribute it and/or modify
7573 + * it under the terms of the GNU General Public License version 2 as
7574 + * published by the Free Software Foundation.
7575 + */
7576 +
7577 +#include "union.h"
7578 +
7579 +/* This file contains the routines for maintaining readdir state. */
7580 +
7581 +/*
7582 + * There are two structures here, rdstate which is a hash table
7583 + * of the second structure which is a filldir_node.
7584 + */
7585 +
7586 +/*
7587 + * This is a struct kmem_cache for filldir nodes, because we allocate a lot
7588 + * of them and they shouldn't waste memory. If the node has a small name
7589 + * (as defined by the dentry structure), then we use an inline name to
7590 + * preserve kmalloc space.
7591 + */
7592 +static struct kmem_cache *unionfs_filldir_cachep;
7593 +
7594 +int unionfs_init_filldir_cache(void)
7595 +{
7596 + unionfs_filldir_cachep =
7597 + kmem_cache_create("unionfs_filldir",
7598 + sizeof(struct filldir_node), 0,
7599 + SLAB_RECLAIM_ACCOUNT, NULL, NULL);
7600 +
7601 + return (unionfs_filldir_cachep ? 0 : -ENOMEM);
7602 +}
7603 +
7604 +void unionfs_destroy_filldir_cache(void)
7605 +{
7606 + if (unionfs_filldir_cachep)
7607 + kmem_cache_destroy(unionfs_filldir_cachep);
7608 +}
7609 +
7610 +/*
7611 + * This is a tuning parameter that tells us roughly how big to make the
7612 + * hash table in directory entries per page. This isn't perfect, but
7613 + * at least we get a hash table size that shouldn't be too overloaded.
7614 + * The following averages are based on my home directory.
7615 + * 14.44693 Overall
7616 + * 12.29 Single Page Directories
7617 + * 117.93 Multi-page directories
7618 + */
7619 +#define DENTPAGE 4096
7620 +#define DENTPERONEPAGE 12
7621 +#define DENTPERPAGE 118
7622 +#define MINHASHSIZE 1
7623 +static int guesstimate_hash_size(struct inode *inode)
7624 +{
7625 + struct inode *lower_inode;
7626 + int bindex;
7627 + int hashsize = MINHASHSIZE;
7628 +
7629 + if (UNIONFS_I(inode)->hashsize > 0)
7630 + return UNIONFS_I(inode)->hashsize;
7631 +
7632 + for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
7633 + if (!(lower_inode = unionfs_lower_inode_idx(inode, bindex)))
7634 + continue;
7635 +
7636 + if (lower_inode->i_size == DENTPAGE)
7637 + hashsize += DENTPERONEPAGE;
7638 + else
7639 + hashsize += (lower_inode->i_size / DENTPAGE) *
7640 + DENTPERPAGE;
7641 + }
7642 +
7643 + return hashsize;
7644 +}
7645 +
7646 +int init_rdstate(struct file *file)
7647 +{
7648 + BUG_ON(sizeof(loff_t) !=
7649 + (sizeof(unsigned int) + sizeof(unsigned int)));
7650 + BUG_ON(UNIONFS_F(file)->rdstate != NULL);
7651 +
7652 + UNIONFS_F(file)->rdstate = alloc_rdstate(file->f_path.dentry->d_inode,
7653 + fbstart(file));
7654 +
7655 + return (UNIONFS_F(file)->rdstate ? 0 : -ENOMEM);
7656 +}
7657 +
7658 +struct unionfs_dir_state *find_rdstate(struct inode *inode, loff_t fpos)
7659 +{
7660 + struct unionfs_dir_state *rdstate = NULL;
7661 + struct list_head *pos;
7662 +
7663 + spin_lock(&UNIONFS_I(inode)->rdlock);
7664 + list_for_each(pos, &UNIONFS_I(inode)->readdircache) {
7665 + struct unionfs_dir_state *r =
7666 + list_entry(pos, struct unionfs_dir_state, cache);
7667 + if (fpos == rdstate2offset(r)) {
7668 + UNIONFS_I(inode)->rdcount--;
7669 + list_del(&r->cache);
7670 + rdstate = r;
7671 + break;
7672 + }
7673 + }
7674 + spin_unlock(&UNIONFS_I(inode)->rdlock);
7675 + return rdstate;
7676 +}
7677 +
7678 +struct unionfs_dir_state *alloc_rdstate(struct inode *inode, int bindex)
7679 +{
7680 + int i = 0;
7681 + int hashsize;
7682 + unsigned long mallocsize = sizeof(struct unionfs_dir_state);
7683 + struct unionfs_dir_state *rdstate;
7684 +
7685 + hashsize = guesstimate_hash_size(inode);
7686 + mallocsize += hashsize * sizeof(struct list_head);
7687 + mallocsize = __roundup_pow_of_two(mallocsize);
7688 +
7689 + /* This should give us about 500 entries anyway. */
7690 + if (mallocsize > PAGE_SIZE)
7691 + mallocsize = PAGE_SIZE;
7692 +
7693 + hashsize = (mallocsize - sizeof(struct unionfs_dir_state)) /
7694 + sizeof(struct list_head);
7695 +
7696 + rdstate = kmalloc(mallocsize, GFP_KERNEL);
7697 + if (!rdstate)
7698 + return NULL;
7699 +
7700 + spin_lock(&UNIONFS_I(inode)->rdlock);
7701 + if (UNIONFS_I(inode)->cookie >= (MAXRDCOOKIE - 1))
7702 + UNIONFS_I(inode)->cookie = 1;
7703 + else
7704 + UNIONFS_I(inode)->cookie++;
7705 +
7706 + rdstate->cookie = UNIONFS_I(inode)->cookie;
7707 + spin_unlock(&UNIONFS_I(inode)->rdlock);
7708 + rdstate->offset = 1;
7709 + rdstate->access = jiffies;
7710 + rdstate->bindex = bindex;
7711 + rdstate->dirpos = 0;
7712 + rdstate->hashentries = 0;
7713 + rdstate->size = hashsize;
7714 + for (i = 0; i < rdstate->size; i++)
7715 + INIT_LIST_HEAD(&rdstate->list[i]);
7716 +
7717 + return rdstate;
7718 +}
7719 +
7720 +static void free_filldir_node(struct filldir_node *node)
7721 +{
7722 + if (node->namelen >= DNAME_INLINE_LEN_MIN)
7723 + kfree(node->name);
7724 + kmem_cache_free(unionfs_filldir_cachep, node);
7725 +}
7726 +
7727 +void free_rdstate(struct unionfs_dir_state *state)
7728 +{
7729 + struct filldir_node *tmp;
7730 + int i;
7731 +
7732 + for (i = 0; i < state->size; i++) {
7733 + struct list_head *head = &(state->list[i]);
7734 + struct list_head *pos, *n;
7735 +
7736 + /* traverse the list and deallocate space */
7737 + list_for_each_safe(pos, n, head) {
7738 + tmp = list_entry(pos, struct filldir_node, file_list);
7739 + list_del(&tmp->file_list);
7740 + free_filldir_node(tmp);
7741 + }
7742 + }
7743 +
7744 + kfree(state);
7745 +}
7746 +
7747 +struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate,
7748 + const char *name, int namelen)
7749 +{
7750 + int index;
7751 + unsigned int hash;
7752 + struct list_head *head;
7753 + struct list_head *pos;
7754 + struct filldir_node *cursor = NULL;
7755 + int found = 0;
7756 +
7757 + BUG_ON(namelen <= 0);
7758 +
7759 + hash = full_name_hash(name, namelen);
7760 + index = hash % rdstate->size;
7761 +
7762 + head = &(rdstate->list[index]);
7763 + list_for_each(pos, head) {
7764 + cursor = list_entry(pos, struct filldir_node, file_list);
7765 +
7766 + if (cursor->namelen == namelen && cursor->hash == hash &&
7767 + !strncmp(cursor->name, name, namelen)) {
7768 + /*
7769 + * a duplicate exists, and hence no need to create
7770 + * entry to the list
7771 + */
7772 + found = 1;
7773 +
7774 + /*
7775 + * if the duplicate is in this branch, then the file
7776 + * system is corrupted.
7777 + */
7778 + if (cursor->bindex == rdstate->bindex) {
7779 + printk(KERN_DEBUG "unionfs: filldir: possible "
7780 + "I/O error: a file is duplicated "
7781 + "in the same branch %d: %s\n",
7782 + rdstate->bindex, cursor->name);
7783 + }
7784 + break;
7785 + }
7786 + }
7787 +
7788 + if (!found)
7789 + cursor = NULL;
7790 +
7791 + return cursor;
7792 +}
7793 +
7794 +int add_filldir_node(struct unionfs_dir_state *rdstate, const char *name,
7795 + int namelen, int bindex, int whiteout)
7796 +{
7797 + struct filldir_node *new;
7798 + unsigned int hash;
7799 + int index;
7800 + int err = 0;
7801 + struct list_head *head;
7802 +
7803 + BUG_ON(namelen <= 0);
7804 +
7805 + hash = full_name_hash(name, namelen);
7806 + index = hash % rdstate->size;
7807 + head = &(rdstate->list[index]);
7808 +
7809 + new = kmem_cache_alloc(unionfs_filldir_cachep, GFP_KERNEL);
7810 + if (!new) {
7811 + err = -ENOMEM;
7812 + goto out;
7813 + }
7814 +
7815 + INIT_LIST_HEAD(&new->file_list);
7816 + new->namelen = namelen;
7817 + new->hash = hash;
7818 + new->bindex = bindex;
7819 + new->whiteout = whiteout;
7820 +
7821 + if (namelen < DNAME_INLINE_LEN_MIN)
7822 + new->name = new->iname;
7823 + else {
7824 + new->name = kmalloc(namelen + 1, GFP_KERNEL);
7825 + if (!new->name) {
7826 + kmem_cache_free(unionfs_filldir_cachep, new);
7827 + new = NULL;
7828 + goto out;
7829 + }
7830 + }
7831 +
7832 + memcpy(new->name, name, namelen);
7833 + new->name[namelen] = '\0';
7834 +
7835 + rdstate->hashentries++;
7836 +
7837 + list_add(&(new->file_list), head);
7838 +out:
7839 + return err;
7840 +}
7841 diff --git a/fs/unionfs/rename.c b/fs/unionfs/rename.c
7842 new file mode 100644
7843 index 0000000..1761f8b
7844 --- /dev/null
7845 +++ b/fs/unionfs/rename.c
7846 @@ -0,0 +1,521 @@
7847 +/*
7848 + * Copyright (c) 2003-2007 Erez Zadok
7849 + * Copyright (c) 2003-2006 Charles P. Wright
7850 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7851 + * Copyright (c) 2005-2006 Junjiro Okajima
7852 + * Copyright (c) 2005 Arun M. Krishnakumar
7853 + * Copyright (c) 2004-2006 David P. Quigley
7854 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7855 + * Copyright (c) 2003 Puja Gupta
7856 + * Copyright (c) 2003 Harikesavan Krishnan
7857 + * Copyright (c) 2003-2007 Stony Brook University
7858 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
7859 + *
7860 + * This program is free software; you can redistribute it and/or modify
7861 + * it under the terms of the GNU General Public License version 2 as
7862 + * published by the Free Software Foundation.
7863 + */
7864 +
7865 +#include "union.h"
7866 +
7867 +static int __unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7868 + struct inode *new_dir, struct dentry *new_dentry,
7869 + int bindex, struct dentry **wh_old)
7870 +{
7871 + int err = 0;
7872 + struct dentry *lower_old_dentry;
7873 + struct dentry *lower_new_dentry;
7874 + struct dentry *lower_old_dir_dentry;
7875 + struct dentry *lower_new_dir_dentry;
7876 + struct dentry *lower_wh_dentry;
7877 + struct dentry *lower_wh_dir_dentry;
7878 + char *wh_name = NULL;
7879 +
7880 + lower_new_dentry = unionfs_lower_dentry_idx(new_dentry, bindex);
7881 + lower_old_dentry = unionfs_lower_dentry_idx(old_dentry, bindex);
7882 +
7883 + if (!lower_new_dentry) {
7884 + lower_new_dentry =
7885 + create_parents(new_dentry->d_parent->d_inode,
7886 + new_dentry, new_dentry->d_name.name,
7887 + bindex);
7888 + if (IS_ERR(lower_new_dentry)) {
7889 + printk(KERN_DEBUG "unionfs: error creating directory "
7890 + "tree for rename, bindex = %d, err = %ld\n",
7891 + bindex, PTR_ERR(lower_new_dentry));
7892 + err = PTR_ERR(lower_new_dentry);
7893 + goto out;
7894 + }
7895 + }
7896 +
7897 + wh_name = alloc_whname(new_dentry->d_name.name,
7898 + new_dentry->d_name.len);
7899 + if (IS_ERR(wh_name)) {
7900 + err = PTR_ERR(wh_name);
7901 + goto out;
7902 + }
7903 +
7904 + lower_wh_dentry = lookup_one_len(wh_name, lower_new_dentry->d_parent,
7905 + new_dentry->d_name.len +
7906 + UNIONFS_WHLEN);
7907 + if (IS_ERR(lower_wh_dentry)) {
7908 + err = PTR_ERR(lower_wh_dentry);
7909 + goto out;
7910 + }
7911 +
7912 + if (lower_wh_dentry->d_inode) {
7913 + /* get rid of the whiteout that is existing */
7914 + if (lower_new_dentry->d_inode) {
7915 + printk(KERN_WARNING "unionfs: both a whiteout and a "
7916 + "dentry exist when doing a rename!\n");
7917 + err = -EIO;
7918 +
7919 + dput(lower_wh_dentry);
7920 + goto out;
7921 + }
7922 +
7923 + lower_wh_dir_dentry = lock_parent(lower_wh_dentry);
7924 + if (!(err = is_robranch_super(old_dentry->d_sb, bindex)))
7925 + err = vfs_unlink(lower_wh_dir_dentry->d_inode,
7926 + lower_wh_dentry);
7927 +
7928 + dput(lower_wh_dentry);
7929 + unlock_dir(lower_wh_dir_dentry);
7930 + if (err)
7931 + goto out;
7932 + } else
7933 + dput(lower_wh_dentry);
7934 +
7935 + dget(lower_old_dentry);
7936 + lower_old_dir_dentry = dget_parent(lower_old_dentry);
7937 + lower_new_dir_dentry = dget_parent(lower_new_dentry);
7938 +
7939 + lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
7940 +
7941 + err = is_robranch_super(old_dentry->d_sb, bindex);
7942 + if (err)
7943 + goto out_unlock;
7944 +
7945 + /*
7946 + * ready to whiteout for old_dentry. caller will create the actual
7947 + * whiteout, and must dput(*wh_old)
7948 + */
7949 + if (wh_old) {
7950 + char *whname;
7951 + whname = alloc_whname(old_dentry->d_name.name,
7952 + old_dentry->d_name.len);
7953 + err = PTR_ERR(whname);
7954 + if (IS_ERR(whname))
7955 + goto out_unlock;
7956 + *wh_old = lookup_one_len(whname, lower_old_dir_dentry,
7957 + old_dentry->d_name.len +
7958 + UNIONFS_WHLEN);
7959 + kfree(whname);
7960 + err = PTR_ERR(*wh_old);
7961 + if (IS_ERR(*wh_old)) {
7962 + *wh_old = NULL;
7963 + goto out_unlock;
7964 + }
7965 + }
7966 +
7967 + err = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
7968 + lower_new_dir_dentry->d_inode, lower_new_dentry);
7969 +
7970 +out_unlock:
7971 + unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
7972 +
7973 + dput(lower_old_dir_dentry);
7974 + dput(lower_new_dir_dentry);
7975 + dput(lower_old_dentry);
7976 +
7977 +out:
7978 + if (!err) {
7979 + /* Fixup the new_dentry. */
7980 + if (bindex < dbstart(new_dentry))
7981 + set_dbstart(new_dentry, bindex);
7982 + else if (bindex > dbend(new_dentry))
7983 + set_dbend(new_dentry, bindex);
7984 + }
7985 +
7986 + kfree(wh_name);
7987 +
7988 + return err;
7989 +}
7990 +
7991 +/*
7992 + * Main rename code. This is sufficiently complex, that it's documented in
7993 + * Documentation/filesystems/unionfs/rename.txt. This routine calls
7994 + * __unionfs_rename() above to perform some of the work.
7995 + */
7996 +static int do_unionfs_rename(struct inode *old_dir,
7997 + struct dentry *old_dentry,
7998 + struct inode *new_dir,
7999 + struct dentry *new_dentry)
8000 +{
8001 + int err = 0;
8002 + int bindex, bwh_old;
8003 + int old_bstart, old_bend;
8004 + int new_bstart, new_bend;
8005 + int do_copyup = -1;
8006 + struct dentry *parent_dentry;
8007 + int local_err = 0;
8008 + int eio = 0;
8009 + int revert = 0;
8010 + struct dentry *wh_old = NULL;
8011 +
8012 + old_bstart = dbstart(old_dentry);
8013 + bwh_old = old_bstart;
8014 + old_bend = dbend(old_dentry);
8015 + parent_dentry = old_dentry->d_parent;
8016 +
8017 + new_bstart = dbstart(new_dentry);
8018 + new_bend = dbend(new_dentry);
8019 +
8020 + /* Rename source to destination. */
8021 + err = __unionfs_rename(old_dir, old_dentry, new_dir, new_dentry,
8022 + old_bstart, &wh_old);
8023 + if (err) {
8024 + if (!IS_COPYUP_ERR(err))
8025 + goto out;
8026 + do_copyup = old_bstart - 1;
8027 + } else
8028 + revert = 1;
8029 +
8030 + /*
8031 + * Unlink all instances of destination that exist to the left of
8032 + * bstart of source. On error, revert back, goto out.
8033 + */
8034 + for (bindex = old_bstart - 1; bindex >= new_bstart; bindex--) {
8035 + struct dentry *unlink_dentry;
8036 + struct dentry *unlink_dir_dentry;
8037 +
8038 + unlink_dentry = unionfs_lower_dentry_idx(new_dentry, bindex);
8039 + if (!unlink_dentry)
8040 + continue;
8041 +
8042 + unlink_dir_dentry = lock_parent(unlink_dentry);
8043 + if (!(err = is_robranch_super(old_dir->i_sb, bindex)))
8044 + err = vfs_unlink(unlink_dir_dentry->d_inode,
8045 + unlink_dentry);
8046 +
8047 + fsstack_copy_attr_times(new_dentry->d_parent->d_inode,
8048 + unlink_dir_dentry->d_inode);
8049 + /* propagate number of hard-links */
8050 + new_dentry->d_parent->d_inode->i_nlink =
8051 + unionfs_get_nlinks(new_dentry->d_parent->d_inode);
8052 +
8053 + unlock_dir(unlink_dir_dentry);
8054 + if (!err) {
8055 + if (bindex != new_bstart) {
8056 + dput(unlink_dentry);
8057 + unionfs_set_lower_dentry_idx(new_dentry,
8058 + bindex, NULL);
8059 + }
8060 + } else if (IS_COPYUP_ERR(err)) {
8061 + do_copyup = bindex - 1;
8062 + } else if (revert) {
8063 + dput(wh_old);
8064 + goto revert;
8065 + }
8066 + }
8067 +
8068 + if (do_copyup != -1) {
8069 + for (bindex = do_copyup; bindex >= 0; bindex--) {
8070 + /*
8071 + * copyup the file into some left directory, so that
8072 + * you can rename it
8073 + */
8074 + err = copyup_dentry(old_dentry->d_parent->d_inode,
8075 + old_dentry, old_bstart, bindex,
8076 + old_dentry->d_name.name,
8077 + old_dentry->d_name.len,
8078 + NULL, old_dentry->d_inode->i_size);
8079 + /* if copyup failed, try next branch to the left */
8080 + if (err)
8081 + continue;
8082 + dput(wh_old);
8083 + bwh_old = bindex;
8084 + err = __unionfs_rename(old_dir, old_dentry,
8085 + new_dir, new_dentry,
8086 + bindex, &wh_old);
8087 + break;
8088 + }
8089 + }
8090 +
8091 + /* make it opaque */
8092 + if (S_ISDIR(old_dentry->d_inode->i_mode)) {
8093 + err = make_dir_opaque(old_dentry, dbstart(old_dentry));
8094 + if (err)
8095 + goto revert;
8096 + }
8097 +
8098 + /*
8099 + * Create whiteout for source, only if:
8100 + * (1) There is more than one underlying instance of source.
8101 + * (2) We did a copy_up
8102 + */
8103 + if ((old_bstart != old_bend) || (do_copyup != -1)) {
8104 + struct dentry *lower_parent;
8105 + if (!wh_old || wh_old->d_inode || bwh_old < 0) {
8106 + printk(KERN_ERR "unionfs: rename error "
8107 + "(wh_old=%p/%p bwh_old=%d)\n", wh_old,
8108 + (wh_old ? wh_old->d_inode : NULL), bwh_old);
8109 + err = -EIO;
8110 + goto out;
8111 + }
8112 + lower_parent = lock_parent(wh_old);
8113 + local_err = vfs_create(lower_parent->d_inode, wh_old, S_IRUGO,
8114 + NULL);
8115 + unlock_dir(lower_parent);
8116 + if (!local_err)
8117 + set_dbopaque(old_dentry, bwh_old);
8118 + else {
8119 + /*
8120 + * we can't fix anything now, so we cop-out and use
8121 + * -EIO.
8122 + */
8123 + printk(KERN_ERR "unionfs: can't create a whiteout for "
8124 + "the source in rename!\n");
8125 + err = -EIO;
8126 + }
8127 + }
8128 +
8129 +out:
8130 + dput(wh_old);
8131 + return err;
8132 +
8133 +revert:
8134 + /* Do revert here. */
8135 + local_err = unionfs_refresh_lower_dentry(new_dentry, old_bstart);
8136 + if (local_err) {
8137 + printk(KERN_WARNING "unionfs: revert failed in rename: "
8138 + "the new refresh failed.\n");
8139 + eio = -EIO;
8140 + }
8141 +
8142 + local_err = unionfs_refresh_lower_dentry(old_dentry, old_bstart);
8143 + if (local_err) {
8144 + printk(KERN_WARNING "unionfs: revert failed in rename: "
8145 + "the old refresh failed.\n");
8146 + eio = -EIO;
8147 + goto revert_out;
8148 + }
8149 +
8150 + if (!unionfs_lower_dentry_idx(new_dentry, bindex) ||
8151 + !unionfs_lower_dentry_idx(new_dentry, bindex)->d_inode) {
8152 + printk(KERN_WARNING "unionfs: revert failed in rename: "
8153 + "the object disappeared from under us!\n");
8154 + eio = -EIO;
8155 + goto revert_out;
8156 + }
8157 +
8158 + if (unionfs_lower_dentry_idx(old_dentry, bindex) &&
8159 + unionfs_lower_dentry_idx(old_dentry, bindex)->d_inode) {
8160 + printk(KERN_WARNING "unionfs: revert failed in rename: "
8161 + "the object was created underneath us!\n");
8162 + eio = -EIO;
8163 + goto revert_out;
8164 + }
8165 +
8166 + local_err = __unionfs_rename(new_dir, new_dentry,
8167 + old_dir, old_dentry, old_bstart, NULL);
8168 +
8169 + /* If we can't fix it, then we cop-out with -EIO. */
8170 + if (local_err) {
8171 + printk(KERN_WARNING "unionfs: revert failed in rename!\n");
8172 + eio = -EIO;
8173 + }
8174 +
8175 + local_err = unionfs_refresh_lower_dentry(new_dentry, bindex);
8176 + if (local_err)
8177 + eio = -EIO;
8178 + local_err = unionfs_refresh_lower_dentry(old_dentry, bindex);
8179 + if (local_err)
8180 + eio = -EIO;
8181 +
8182 +revert_out:
8183 + if (eio)
8184 + err = eio;
8185 + return err;
8186 +}
8187 +
8188 +static struct dentry *lookup_whiteout(struct dentry *dentry)
8189 +{
8190 + char *whname;
8191 + int bindex = -1, bstart = -1, bend = -1;
8192 + struct dentry *parent, *lower_parent, *wh_dentry;
8193 +
8194 + whname = alloc_whname(dentry->d_name.name, dentry->d_name.len);
8195 + if (IS_ERR(whname))
8196 + return (void *)whname;
8197 +
8198 + parent = dget_parent(dentry);
8199 + unionfs_lock_dentry(parent);
8200 + bstart = dbstart(parent);
8201 + bend = dbend(parent);
8202 + wh_dentry = ERR_PTR(-ENOENT);
8203 + for (bindex = bstart; bindex <= bend; bindex++) {
8204 + lower_parent = unionfs_lower_dentry_idx(parent, bindex);
8205 + if (!lower_parent)
8206 + continue;
8207 + wh_dentry = lookup_one_len(whname, lower_parent,
8208 + dentry->d_name.len + UNIONFS_WHLEN);
8209 + if (IS_ERR(wh_dentry))
8210 + continue;
8211 + if (wh_dentry->d_inode)
8212 + break;
8213 + dput(wh_dentry);
8214 + wh_dentry = ERR_PTR(-ENOENT);
8215 + }
8216 + unionfs_unlock_dentry(parent);
8217 + dput(parent);
8218 + kfree(whname);
8219 + return wh_dentry;
8220 +}
8221 +
8222 +/*
8223 + * We can't copyup a directory, because it may involve huge numbers of
8224 + * children, etc. Doing that in the kernel would be bad, so instead we
8225 + * return EXDEV to the user-space utility that caused this, and let the
8226 + * user-space recurse and ask us to copy up each file separately.
8227 + */
8228 +static int may_rename_dir(struct dentry *dentry)
8229 +{
8230 + int err, bstart;
8231 +
8232 + err = check_empty(dentry, NULL);
8233 + if (err == -ENOTEMPTY) {
8234 + if (is_robranch(dentry))
8235 + return -EXDEV;
8236 + } else if (err)
8237 + return err;
8238 +
8239 + bstart = dbstart(dentry);
8240 + if (dbend(dentry) == bstart || dbopaque(dentry) == bstart)
8241 + return 0;
8242 +
8243 + set_dbstart(dentry, bstart + 1);
8244 + err = check_empty(dentry, NULL);
8245 + set_dbstart(dentry, bstart);
8246 + if (err == -ENOTEMPTY)
8247 + err = -EXDEV;
8248 + return err;
8249 +}
8250 +
8251 +int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8252 + struct inode *new_dir, struct dentry *new_dentry)
8253 +{
8254 + int err = 0;
8255 + struct dentry *wh_dentry;
8256 +
8257 + unionfs_read_lock(old_dentry->d_sb);
8258 + unionfs_double_lock_dentry(old_dentry, new_dentry);
8259 +
8260 + if (!__unionfs_d_revalidate_chain(old_dentry, NULL, 0)) {
8261 + err = -ESTALE;
8262 + goto out;
8263 + }
8264 + if (!d_deleted(new_dentry) && new_dentry->d_inode &&
8265 + !__unionfs_d_revalidate_chain(new_dentry, NULL, 0)) {
8266 + err = -ESTALE;
8267 + goto out;
8268 + }
8269 +
8270 + if (!S_ISDIR(old_dentry->d_inode->i_mode))
8271 + err = unionfs_partial_lookup(old_dentry);
8272 + else
8273 + err = may_rename_dir(old_dentry);
8274 +
8275 + if (err)
8276 + goto out;
8277 +
8278 + err = unionfs_partial_lookup(new_dentry);
8279 + if (err)
8280 + goto out;
8281 +
8282 + /*
8283 + * if new_dentry is already lower because of whiteout,
8284 + * simply override it even if the whited-out dir is not empty.
8285 + */
8286 + wh_dentry = lookup_whiteout(new_dentry);
8287 + if (!IS_ERR(wh_dentry))
8288 + dput(wh_dentry);
8289 + else if (new_dentry->d_inode) {
8290 + if (S_ISDIR(old_dentry->d_inode->i_mode) !=
8291 + S_ISDIR(new_dentry->d_inode->i_mode)) {
8292 + err = S_ISDIR(old_dentry->d_inode->i_mode) ?
8293 + -ENOTDIR : -EISDIR;
8294 + goto out;
8295 + }
8296 +
8297 + if (S_ISDIR(new_dentry->d_inode->i_mode)) {
8298 + struct unionfs_dir_state *namelist;
8299 + /* check if this unionfs directory is empty or not */
8300 + err = check_empty(new_dentry, &namelist);
8301 + if (err)
8302 + goto out;
8303 +
8304 + if (!is_robranch(new_dentry))
8305 + err = delete_whiteouts(new_dentry,
8306 + dbstart(new_dentry),
8307 + namelist);
8308 +
8309 + free_rdstate(namelist);
8310 +
8311 + if (err)
8312 + goto out;
8313 + }
8314 + }
8315 + err = do_unionfs_rename(old_dir, old_dentry, new_dir, new_dentry);
8316 +out:
8317 + if (err)
8318 + /* clear the new_dentry stuff created */
8319 + d_drop(new_dentry);
8320 + else {
8321 + /*
8322 + * force re-lookup since the dir on ro branch is not renamed,
8323 + * and lower dentries still indicate the un-renamed ones.
8324 + */
8325 + if (S_ISDIR(old_dentry->d_inode->i_mode))
8326 + atomic_dec(&UNIONFS_D(old_dentry)->generation);
8327 + else
8328 + unionfs_purge_extras(old_dentry);
8329 + if (new_dentry->d_inode &&
8330 + !S_ISDIR(new_dentry->d_inode->i_mode)) {
8331 + unionfs_purge_extras(new_dentry);
8332 + unionfs_inherit_mnt(new_dentry);
8333 + if (!unionfs_lower_inode(new_dentry->d_inode)) {
8334 + /*
8335 + * If we get here, it means that no copyup
8336 + * was needed, and that a file by the old
8337 + * name already existing on the destination
8338 + * branch; that file got renamed earlier in
8339 + * this function, so all we need to do here
8340 + * is set the lower inode.
8341 + */
8342 + struct inode *inode;
8343 + inode = unionfs_lower_inode(
8344 + old_dentry->d_inode);
8345 + atomic_inc(&inode->i_count);
8346 + unionfs_set_lower_inode_idx(
8347 + new_dentry->d_inode,
8348 + dbstart(new_dentry), inode);
8349 + }
8350 +
8351 + }
8352 + /* if all of this renaming succeeded, update our times */
8353 + unionfs_copy_attr_times(old_dir);
8354 + unionfs_copy_attr_times(new_dir);
8355 + unionfs_copy_attr_times(old_dentry->d_inode);
8356 + unionfs_copy_attr_times(new_dentry->d_inode);
8357 + unionfs_check_inode(old_dir);
8358 + unionfs_check_inode(new_dir);
8359 + unionfs_check_dentry(old_dentry);
8360 + unionfs_check_dentry(new_dentry);
8361 + }
8362 +
8363 + unionfs_unlock_dentry(new_dentry);
8364 + unionfs_unlock_dentry(old_dentry);
8365 + unionfs_read_unlock(old_dentry->d_sb);
8366 + return err;
8367 +}
8368 diff --git a/fs/unionfs/sioq.c b/fs/unionfs/sioq.c
8369 new file mode 100644
8370 index 0000000..478041d
8371 --- /dev/null
8372 +++ b/fs/unionfs/sioq.c
8373 @@ -0,0 +1,123 @@
8374 +/*
8375 + * Copyright (c) 2003-2007 Erez Zadok
8376 + * Copyright (c) 2003-2006 Charles P. Wright
8377 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
8378 + * Copyright (c) 2005-2006 Junjiro Okajima
8379 + * Copyright (c) 2005 Arun M. Krishnakumar
8380 + * Copyright (c) 2004-2006 David P. Quigley
8381 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
8382 + * Copyright (c) 2003 Puja Gupta
8383 + * Copyright (c) 2003 Harikesavan Krishnan
8384 + * Copyright (c) 2003-2007 Stony Brook University
8385 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
8386 + *
8387 + * This program is free software; you can redistribute it and/or modify
8388 + * it under the terms of the GNU General Public License version 2 as
8389 + * published by the Free Software Foundation.
8390 + */
8391 +
8392 +#include "union.h"
8393 +
8394 +/*
8395 + * Super-user IO work Queue - sometimes we need to perform actions which
8396 + * would fail due to the unix permissions on the parent directory (e.g.,
8397 + * rmdir a directory which appears empty, but in reality contains
8398 + * whiteouts).
8399 + */
8400 +
8401 +static struct workqueue_struct *superio_workqueue;
8402 +
8403 +int __init init_sioq(void)
8404 +{
8405 + int err;
8406 +
8407 + superio_workqueue = create_workqueue("unionfs_siod");
8408 + if (!IS_ERR(superio_workqueue))
8409 + return 0;
8410 +
8411 + err = PTR_ERR(superio_workqueue);
8412 + printk(KERN_ERR "unionfs: create_workqueue failed %d\n", err);
8413 + superio_workqueue = NULL;
8414 + return err;
8415 +}
8416 +
8417 +void stop_sioq(void)
8418 +{
8419 + if (superio_workqueue)
8420 + destroy_workqueue(superio_workqueue);
8421 +}
8422 +
8423 +void run_sioq(work_func_t func, struct sioq_args *args)
8424 +{
8425 + INIT_WORK(&args->work, func);
8426 +
8427 + init_completion(&args->comp);
8428 + while (!queue_work(superio_workqueue, &args->work)) {
8429 + /* TODO: do accounting if needed */
8430 + schedule();
8431 + }
8432 + wait_for_completion(&args->comp);
8433 +}
8434 +
8435 +void __unionfs_create(struct work_struct *work)
8436 +{
8437 + struct sioq_args *args = container_of(work, struct sioq_args, work);
8438 + struct create_args *c = &args->create;
8439 +
8440 + args->err = vfs_create(c->parent, c->dentry, c->mode, c->nd);
8441 + complete(&args->comp);
8442 +}
8443 +
8444 +void __unionfs_mkdir(struct work_struct *work)
8445 +{
8446 + struct sioq_args *args = container_of(work, struct sioq_args, work);
8447 + struct mkdir_args *m = &args->mkdir;
8448 +
8449 + args->err = vfs_mkdir(m->parent, m->dentry, m->mode);
8450 + complete(&args->comp);
8451 +}
8452 +
8453 +void __unionfs_mknod(struct work_struct *work)
8454 +{
8455 + struct sioq_args *args = container_of(work, struct sioq_args, work);
8456 + struct mknod_args *m = &args->mknod;
8457 +
8458 + args->err = vfs_mknod(m->parent, m->dentry, m->mode, m->dev);
8459 + complete(&args->comp);
8460 +}
8461 +
8462 +void __unionfs_symlink(struct work_struct *work)
8463 +{
8464 + struct sioq_args *args = container_of(work, struct sioq_args, work);
8465 + struct symlink_args *s = &args->symlink;
8466 +
8467 + args->err = vfs_symlink(s->parent, s->dentry, s->symbuf, s->mode);
8468 + complete(&args->comp);
8469 +}
8470 +
8471 +void __unionfs_unlink(struct work_struct *work)
8472 +{
8473 + struct sioq_args *args = container_of(work, struct sioq_args, work);
8474 + struct unlink_args *u = &args->unlink;
8475 +
8476 + args->err = vfs_unlink(u->parent, u->dentry);
8477 + complete(&args->comp);
8478 +}
8479 +
8480 +void __delete_whiteouts(struct work_struct *work)
8481 +{
8482 + struct sioq_args *args = container_of(work, struct sioq_args, work);
8483 + struct deletewh_args *d = &args->deletewh;
8484 +
8485 + args->err = do_delete_whiteouts(d->dentry, d->bindex, d->namelist);
8486 + complete(&args->comp);
8487 +}
8488 +
8489 +void __is_opaque_dir(struct work_struct *work)
8490 +{
8491 + struct sioq_args *args = container_of(work, struct sioq_args, work);
8492 +
8493 + args->ret = lookup_one_len(UNIONFS_DIR_OPAQUE, args->is_opaque.dentry,
8494 + sizeof(UNIONFS_DIR_OPAQUE) - 1);
8495 + complete(&args->comp);
8496 +}
8497 diff --git a/fs/unionfs/sioq.h b/fs/unionfs/sioq.h
8498 new file mode 100644
8499 index 0000000..e180756
8500 --- /dev/null
8501 +++ b/fs/unionfs/sioq.h
8502 @@ -0,0 +1,96 @@
8503 +/*
8504 + * Copyright (c) 2003-2007 Erez Zadok
8505 + * Copyright (c) 2003-2006 Charles P. Wright
8506 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
8507 + * Copyright (c) 2005-2006 Junjiro Okajima
8508 + * Copyright (c) 2005 Arun M. Krishnakumar
8509 + * Copyright (c) 2004-2006 David P. Quigley
8510 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
8511 + * Copyright (c) 2003 Puja Gupta
8512 + * Copyright (c) 2003 Harikesavan Krishnan
8513 + * Copyright (c) 2003-2007 Stony Brook University
8514 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
8515 + *
8516 + * This program is free software; you can redistribute it and/or modify
8517 + * it under the terms of the GNU General Public License version 2 as
8518 + * published by the Free Software Foundation.
8519 + */
8520 +
8521 +#ifndef _SIOQ_H
8522 +#define _SIOQ_H
8523 +
8524 +struct deletewh_args {
8525 + struct unionfs_dir_state *namelist;
8526 + struct dentry *dentry;
8527 + int bindex;
8528 +};
8529 +
8530 +struct is_opaque_args {
8531 + struct dentry *dentry;
8532 +};
8533 +
8534 +struct create_args {
8535 + struct inode *parent;
8536 + struct dentry *dentry;
8537 + umode_t mode;
8538 + struct nameidata *nd;
8539 +};
8540 +
8541 +struct mkdir_args {
8542 + struct inode *parent;
8543 + struct dentry *dentry;
8544 + umode_t mode;
8545 +};
8546 +
8547 +struct mknod_args {
8548 + struct inode *parent;
8549 + struct dentry *dentry;
8550 + umode_t mode;
8551 + dev_t dev;
8552 +};
8553 +
8554 +struct symlink_args {
8555 + struct inode *parent;
8556 + struct dentry *dentry;
8557 + char *symbuf;
8558 + umode_t mode;
8559 +};
8560 +
8561 +struct unlink_args {
8562 + struct inode *parent;
8563 + struct dentry *dentry;
8564 +};
8565 +
8566 +
8567 +struct sioq_args {
8568 + struct completion comp;
8569 + struct work_struct work;
8570 + int err;
8571 + void *ret;
8572 +
8573 + union {
8574 + struct deletewh_args deletewh;
8575 + struct is_opaque_args is_opaque;
8576 + struct create_args create;
8577 + struct mkdir_args mkdir;
8578 + struct mknod_args mknod;
8579 + struct symlink_args symlink;
8580 + struct unlink_args unlink;
8581 + };
8582 +};
8583 +
8584 +/* Extern definitions for SIOQ functions */
8585 +extern int __init init_sioq(void);
8586 +extern void stop_sioq(void);
8587 +extern void run_sioq(work_func_t func, struct sioq_args *args);
8588 +
8589 +/* Extern definitions for our privilege escalation helpers */
8590 +extern void __unionfs_create(struct work_struct *work);
8591 +extern void __unionfs_mkdir(struct work_struct *work);
8592 +extern void __unionfs_mknod(struct work_struct *work);
8593 +extern void __unionfs_symlink(struct work_struct *work);
8594 +extern void __unionfs_unlink(struct work_struct *work);
8595 +extern void __delete_whiteouts(struct work_struct *work);
8596 +extern void __is_opaque_dir(struct work_struct *work);
8597 +
8598 +#endif /* not _SIOQ_H */
8599 diff --git a/fs/unionfs/subr.c b/fs/unionfs/subr.c
8600 new file mode 100644
8601 index 0000000..5db9e62
8602 --- /dev/null
8603 +++ b/fs/unionfs/subr.c
8604 @@ -0,0 +1,240 @@
8605 +/*
8606 + * Copyright (c) 2003-2007 Erez Zadok
8607 + * Copyright (c) 2003-2006 Charles P. Wright
8608 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
8609 + * Copyright (c) 2005-2006 Junjiro Okajima
8610 + * Copyright (c) 2005 Arun M. Krishnakumar
8611 + * Copyright (c) 2004-2006 David P. Quigley
8612 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
8613 + * Copyright (c) 2003 Puja Gupta
8614 + * Copyright (c) 2003 Harikesavan Krishnan
8615 + * Copyright (c) 2003-2007 Stony Brook University
8616 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
8617 + *
8618 + * This program is free software; you can redistribute it and/or modify
8619 + * it under the terms of the GNU General Public License version 2 as
8620 + * published by the Free Software Foundation.
8621 + */
8622 +
8623 +#include "union.h"
8624 +
8625 +/*
8626 + * Pass an unionfs dentry and an index. It will try to create a whiteout
8627 + * for the filename in dentry, and will try in branch 'index'. On error,
8628 + * it will proceed to a branch to the left.
8629 + */
8630 +int create_whiteout(struct dentry *dentry, int start)
8631 +{
8632 + int bstart, bend, bindex;
8633 + struct dentry *lower_dir_dentry;
8634 + struct dentry *lower_dentry;
8635 + struct dentry *lower_wh_dentry;
8636 + char *name = NULL;
8637 + int err = -EINVAL;
8638 +
8639 + verify_locked(dentry);
8640 +
8641 + bstart = dbstart(dentry);
8642 + bend = dbend(dentry);
8643 +
8644 + /* create dentry's whiteout equivalent */
8645 + name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
8646 + if (IS_ERR(name)) {
8647 + err = PTR_ERR(name);
8648 + goto out;
8649 + }
8650 +
8651 + for (bindex = start; bindex >= 0; bindex--) {
8652 + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
8653 +
8654 + if (!lower_dentry) {
8655 + /*
8656 + * if lower dentry is not present, create the
8657 + * entire lower dentry directory structure and go
8658 + * ahead. Since we want to just create whiteout, we
8659 + * only want the parent dentry, and hence get rid of
8660 + * this dentry.
8661 + */
8662 + lower_dentry = create_parents(dentry->d_inode,
8663 + dentry,
8664 + dentry->d_name.name,
8665 + bindex);
8666 + if (!lower_dentry || IS_ERR(lower_dentry)) {
8667 + printk(KERN_DEBUG "unionfs: create_parents "
8668 + "failed for bindex = %d\n", bindex);
8669 + continue;
8670 + }
8671 + }
8672 +
8673 + lower_wh_dentry =
8674 + lookup_one_len(name, lower_dentry->d_parent,
8675 + dentry->d_name.len + UNIONFS_WHLEN);
8676 + if (IS_ERR(lower_wh_dentry))
8677 + continue;
8678 +
8679 + /*
8680 + * The whiteout already exists. This used to be impossible,
8681 + * but now is possible because of opaqueness.
8682 + */
8683 + if (lower_wh_dentry->d_inode) {
8684 + dput(lower_wh_dentry);
8685 + err = 0;
8686 + goto out;
8687 + }
8688 +
8689 + lower_dir_dentry = lock_parent(lower_wh_dentry);
8690 + if (!(err = is_robranch_super(dentry->d_sb, bindex)))
8691 + err = vfs_create(lower_dir_dentry->d_inode,
8692 + lower_wh_dentry,
8693 + ~current->fs->umask & S_IRWXUGO,
8694 + NULL);
8695 + unlock_dir(lower_dir_dentry);
8696 + dput(lower_wh_dentry);
8697 +
8698 + if (!err || !IS_COPYUP_ERR(err))
8699 + break;
8700 + }
8701 +
8702 + /* set dbopaque so that lookup will not proceed after this branch */
8703 + if (!err)
8704 + set_dbopaque(dentry, bindex);
8705 +
8706 +out:
8707 + kfree(name);
8708 + return err;
8709 +}
8710 +
8711 +/*
8712 + * This is a helper function for rename, which ends up with hosed over
8713 + * dentries when it needs to revert.
8714 + */
8715 +int unionfs_refresh_lower_dentry(struct dentry *dentry, int bindex)
8716 +{
8717 + struct dentry *lower_dentry;
8718 + struct dentry *lower_parent;
8719 + int err = 0;
8720 +
8721 + verify_locked(dentry);
8722 +
8723 + unionfs_lock_dentry(dentry->d_parent);
8724 + lower_parent = unionfs_lower_dentry_idx(dentry->d_parent, bindex);
8725 + unionfs_unlock_dentry(dentry->d_parent);
8726 +
8727 + BUG_ON(!S_ISDIR(lower_parent->d_inode->i_mode));
8728 +
8729 + lower_dentry = lookup_one_len(dentry->d_name.name, lower_parent,
8730 + dentry->d_name.len);
8731 + if (IS_ERR(lower_dentry)) {
8732 + err = PTR_ERR(lower_dentry);
8733 + goto out;
8734 + }
8735 +
8736 + dput(unionfs_lower_dentry_idx(dentry, bindex));
8737 + iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
8738 + unionfs_set_lower_inode_idx(dentry->d_inode, bindex, NULL);
8739 +
8740 + if (!lower_dentry->d_inode) {
8741 + dput(lower_dentry);
8742 + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
8743 + } else {
8744 + unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
8745 + unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
8746 + igrab(lower_dentry->d_inode));
8747 + }
8748 +
8749 +out:
8750 + return err;
8751 +}
8752 +
8753 +int make_dir_opaque(struct dentry *dentry, int bindex)
8754 +{
8755 + int err = 0;
8756 + struct dentry *lower_dentry, *diropq;
8757 + struct inode *lower_dir;
8758 +
8759 + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
8760 + lower_dir = lower_dentry->d_inode;
8761 + BUG_ON(!S_ISDIR(dentry->d_inode->i_mode) ||
8762 + !S_ISDIR(lower_dir->i_mode));
8763 +
8764 + mutex_lock(&lower_dir->i_mutex);
8765 + diropq = lookup_one_len(UNIONFS_DIR_OPAQUE, lower_dentry,
8766 + sizeof(UNIONFS_DIR_OPAQUE) - 1);
8767 + if (IS_ERR(diropq)) {
8768 + err = PTR_ERR(diropq);
8769 + goto out;
8770 + }
8771 +
8772 + if (!diropq->d_inode)
8773 + err = vfs_create(lower_dir, diropq, S_IRUGO, NULL);
8774 + if (!err)
8775 + set_dbopaque(dentry, bindex);
8776 +
8777 + dput(diropq);
8778 +
8779 +out:
8780 + mutex_unlock(&lower_dir->i_mutex);
8781 + return err;
8782 +}
8783 +
8784 +/*
8785 + * returns the sum of the n_link values of all the underlying inodes of the
8786 + * passed inode
8787 + */
8788 +int unionfs_get_nlinks(const struct inode *inode)
8789 +{
8790 + int sum_nlinks = 0;
8791 + int dirs = 0;
8792 + int bindex;
8793 + struct inode *lower_inode;
8794 +
8795 + /* don't bother to do all the work since we're unlinked */
8796 + if (inode->i_nlink == 0)
8797 + return 0;
8798 +
8799 + if (!S_ISDIR(inode->i_mode))
8800 + return unionfs_lower_inode(inode)->i_nlink;
8801 +
8802 + for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
8803 + lower_inode = unionfs_lower_inode_idx(inode, bindex);
8804 +
8805 + /* ignore files */
8806 + if (!lower_inode || !S_ISDIR(lower_inode->i_mode))
8807 + continue;
8808 +
8809 + BUG_ON(lower_inode->i_nlink < 0);
8810 +
8811 + /* A deleted directory. */
8812 + if (lower_inode->i_nlink == 0)
8813 + continue;
8814 + dirs++;
8815 +
8816 + /*
8817 + * A broken directory...
8818 + *
8819 + * Some filesystems don't properly set the number of links
8820 + * on empty directories
8821 + */
8822 + if (lower_inode->i_nlink == 1)
8823 + sum_nlinks += 2;
8824 + else
8825 + sum_nlinks += (lower_inode->i_nlink - 2);
8826 + }
8827 +
8828 + return (!dirs ? 0 : sum_nlinks + 2);
8829 +}
8830 +
8831 +/* construct whiteout filename */
8832 +char *alloc_whname(const char *name, int len)
8833 +{
8834 + char *buf;
8835 +
8836 + buf = kmalloc(len + UNIONFS_WHLEN + 1, GFP_KERNEL);
8837 + if (!buf)
8838 + return ERR_PTR(-ENOMEM);
8839 +
8840 + strcpy(buf, UNIONFS_WHPFX);
8841 + strlcat(buf, name, len + UNIONFS_WHLEN + 1);
8842 +
8843 + return buf;
8844 +}
8845 diff --git a/fs/unionfs/super.c b/fs/unionfs/super.c
8846 new file mode 100644
8847 index 0000000..f4118df
8848 --- /dev/null
8849 +++ b/fs/unionfs/super.c
8850 @@ -0,0 +1,1007 @@
8851 +/*
8852 + * Copyright (c) 2003-2007 Erez Zadok
8853 + * Copyright (c) 2003-2006 Charles P. Wright
8854 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
8855 + * Copyright (c) 2005-2006 Junjiro Okajima
8856 + * Copyright (c) 2005 Arun M. Krishnakumar
8857 + * Copyright (c) 2004-2006 David P. Quigley
8858 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
8859 + * Copyright (c) 2003 Puja Gupta
8860 + * Copyright (c) 2003 Harikesavan Krishnan
8861 + * Copyright (c) 2003-2007 Stony Brook University
8862 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
8863 + *
8864 + * This program is free software; you can redistribute it and/or modify
8865 + * it under the terms of the GNU General Public License version 2 as
8866 + * published by the Free Software Foundation.
8867 + */
8868 +
8869 +#include "union.h"
8870 +
8871 +/*
8872 + * The inode cache is used with alloc_inode for both our inode info and the
8873 + * vfs inode.
8874 + */
8875 +static struct kmem_cache *unionfs_inode_cachep;
8876 +
8877 +static void unionfs_read_inode(struct inode *inode)
8878 +{
8879 + extern struct address_space_operations unionfs_aops;
8880 + int size;
8881 + struct unionfs_inode_info *info = UNIONFS_I(inode);
8882 +
8883 + unionfs_read_lock(inode->i_sb);
8884 +
8885 + memset(info, 0, offsetof(struct unionfs_inode_info, vfs_inode));
8886 + info->bstart = -1;
8887 + info->bend = -1;
8888 + atomic_set(&info->generation,
8889 + atomic_read(&UNIONFS_SB(inode->i_sb)->generation));
8890 + spin_lock_init(&info->rdlock);
8891 + info->rdcount = 1;
8892 + info->hashsize = -1;
8893 + INIT_LIST_HEAD(&info->readdircache);
8894 +
8895 + size = sbmax(inode->i_sb) * sizeof(struct inode *);
8896 + info->lower_inodes = kzalloc(size, GFP_KERNEL);
8897 + if (!info->lower_inodes) {
8898 + printk(KERN_ERR "unionfs: no kernel memory when allocating "
8899 + "lower-pointer array!\n");
8900 + BUG();
8901 + }
8902 +
8903 + inode->i_version++;
8904 + inode->i_op = &unionfs_main_iops;
8905 + inode->i_fop = &unionfs_main_fops;
8906 +
8907 + inode->i_mapping->a_ops = &unionfs_aops;
8908 +
8909 + unionfs_read_unlock(inode->i_sb);
8910 +}
8911 +
8912 +/*
8913 + * we now define delete_inode, because there are two VFS paths that may
8914 + * destroy an inode: one of them calls clear inode before doing everything
8915 + * else that's needed, and the other is fine. This way we truncate the inode
8916 + * size (and its pages) and then clear our own inode, which will do an iput
8917 + * on our and the lower inode.
8918 + *
8919 + * No need to lock sb info's rwsem.
8920 + */
8921 +static void unionfs_delete_inode(struct inode *inode)
8922 +{
8923 + inode->i_size = 0; /* every f/s seems to do that */
8924 +
8925 + if (inode->i_data.nrpages)
8926 + truncate_inode_pages(&inode->i_data, 0);
8927 +
8928 + clear_inode(inode);
8929 +}
8930 +
8931 +/*
8932 + * final actions when unmounting a file system
8933 + *
8934 + * No need to lock rwsem.
8935 + */
8936 +static void unionfs_put_super(struct super_block *sb)
8937 +{
8938 + int bindex, bstart, bend;
8939 + struct unionfs_sb_info *spd;
8940 + int leaks = 0;
8941 +
8942 + spd = UNIONFS_SB(sb);
8943 + if (!spd)
8944 + return;
8945 +
8946 + bstart = sbstart(sb);
8947 + bend = sbend(sb);
8948 +
8949 + /* Make sure we have no leaks of branchget/branchput. */
8950 + for (bindex = bstart; bindex <= bend; bindex++)
8951 + if (branch_count(sb, bindex) != 0) {
8952 + printk("unionfs: branch %d has %d references left!\n",
8953 + bindex, branch_count(sb, bindex));
8954 + leaks = 1;
8955 + }
8956 + BUG_ON(leaks != 0);
8957 +
8958 + kfree(spd->data);
8959 + kfree(spd);
8960 + sb->s_fs_info = NULL;
8961 +}
8962 +
8963 +/*
8964 + * Since people use this to answer the "How big of a file can I write?"
8965 + * question, we report the size of the highest priority branch as the size of
8966 + * the union.
8967 + */
8968 +static int unionfs_statfs(struct dentry *dentry, struct kstatfs *buf)
8969 +{
8970 + int err = 0;
8971 + struct super_block *sb;
8972 + struct dentry *lower_dentry;
8973 +
8974 + sb = dentry->d_sb;
8975 +
8976 + unionfs_read_lock(sb);
8977 + unionfs_lock_dentry(dentry);
8978 +
8979 + if (!__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
8980 + err = -ESTALE;
8981 + goto out;
8982 + }
8983 + unionfs_check_dentry(dentry);
8984 +
8985 + lower_dentry = unionfs_lower_dentry(sb->s_root);
8986 + err = vfs_statfs(lower_dentry, buf);
8987 +
8988 + /* set return buf to our f/s to avoid confusing user-level utils */
8989 + buf->f_type = UNIONFS_SUPER_MAGIC;
8990 + /*
8991 + * Our maximum file name can is shorter by a few bytes because every
8992 + * file name could potentially be whited-out.
8993 + *
8994 + * XXX: this restriction goes away with ODF.
8995 + */
8996 + buf->f_namelen -= UNIONFS_WHLEN;
8997 +
8998 + /*
8999 + * reset two fields to avoid confusing user-land.
9000 + * XXX: is this still necessary?
9001 + */
9002 + memset(&buf->f_fsid, 0, sizeof(__kernel_fsid_t));
9003 + memset(&buf->f_spare, 0, sizeof(buf->f_spare));
9004 +
9005 +out:
9006 + unionfs_unlock_dentry(dentry);
9007 + unionfs_check_dentry(dentry);
9008 + unionfs_read_unlock(sb);
9009 + return err;
9010 +}
9011 +
9012 +/* handle mode changing during remount */
9013 +static noinline int do_remount_mode_option(char *optarg, int cur_branches,
9014 + struct unionfs_data *new_data,
9015 + struct path *new_lower_paths)
9016 +{
9017 + int err = -EINVAL;
9018 + int perms, idx;
9019 + char *modename = strchr(optarg, '=');
9020 + struct nameidata nd;
9021 +
9022 + /* by now, optarg contains the branch name */
9023 + if (!*optarg) {
9024 + printk("unionfs: no branch specified for mode change.\n");
9025 + goto out;
9026 + }
9027 + if (!modename) {
9028 + printk("unionfs: branch \"%s\" requires a mode.\n", optarg);
9029 + goto out;
9030 + }
9031 + *modename++ = '\0';
9032 + perms = __parse_branch_mode(modename);
9033 + if (perms == 0) {
9034 + printk("unionfs: invalid mode \"%s\" for \"%s\".\n",
9035 + modename, optarg);
9036 + goto out;
9037 + }
9038 +
9039 + /*
9040 + * Find matching branch index. For now, this assumes that nothing
9041 + * has been mounted on top of this Unionfs stack. Once we have /odf
9042 + * and cache-coherency resolved, we'll address the branch-path
9043 + * uniqueness.
9044 + */
9045 + err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
9046 + if (err) {
9047 + printk(KERN_WARNING "unionfs: error accessing "
9048 + "lower directory \"%s\" (error %d)\n",
9049 + optarg, err);
9050 + goto out;
9051 + }
9052 + for (idx=0; idx<cur_branches; idx++)
9053 + if (nd.mnt == new_lower_paths[idx].mnt &&
9054 + nd.dentry == new_lower_paths[idx].dentry)
9055 + break;
9056 + path_release(&nd); /* no longer needed */
9057 + if (idx == cur_branches) {
9058 + err = -ENOENT; /* err may have been reset above */
9059 + printk(KERN_WARNING "unionfs: branch \"%s\" "
9060 + "not found\n", optarg);
9061 + goto out;
9062 + }
9063 + /* check/change mode for existing branch */
9064 + /* we don't warn if perms==branchperms */
9065 + new_data[idx].branchperms = perms;
9066 + err = 0;
9067 +out:
9068 + return err;
9069 +}
9070 +
9071 +/* handle branch deletion during remount */
9072 +static noinline int do_remount_del_option(char *optarg, int cur_branches,
9073 + struct unionfs_data *new_data,
9074 + struct path *new_lower_paths)
9075 +{
9076 + int err = -EINVAL;
9077 + int idx;
9078 + struct nameidata nd;
9079 +
9080 + /* optarg contains the branch name to delete */
9081 +
9082 + /*
9083 + * Find matching branch index. For now, this assumes that nothing
9084 + * has been mounted on top of this Unionfs stack. Once we have /odf
9085 + * and cache-coherency resolved, we'll address the branch-path
9086 + * uniqueness.
9087 + */
9088 + err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
9089 + if (err) {
9090 + printk(KERN_WARNING "unionfs: error accessing "
9091 + "lower directory \"%s\" (error %d)\n",
9092 + optarg, err);
9093 + goto out;
9094 + }
9095 + for (idx=0; idx < cur_branches; idx++)
9096 + if (nd.mnt == new_lower_paths[idx].mnt &&
9097 + nd.dentry == new_lower_paths[idx].dentry)
9098 + break;
9099 + path_release(&nd); /* no longer needed */
9100 + if (idx == cur_branches) {
9101 + printk(KERN_WARNING "unionfs: branch \"%s\" "
9102 + "not found\n", optarg);
9103 + err = -ENOENT;
9104 + goto out;
9105 + }
9106 + /* check if there are any open files on the branch to be deleted */
9107 + if (atomic_read(&new_data[idx].open_files) > 0) {
9108 + err = -EBUSY;
9109 + goto out;
9110 + }
9111 +
9112 + /*
9113 + * Now we have to delete the branch. First, release any handles it
9114 + * has. Then, move the remaining array indexes past "idx" in
9115 + * new_data and new_lower_paths one to the left. Finally, adjust
9116 + * cur_branches.
9117 + */
9118 + pathput(&new_lower_paths[idx]);
9119 +
9120 + if (idx < cur_branches - 1) {
9121 + /* if idx==cur_branches-1, we delete last branch: easy */
9122 + memmove(&new_data[idx], &new_data[idx+1],
9123 + (cur_branches - 1 - idx) *
9124 + sizeof(struct unionfs_data));
9125 + memmove(&new_lower_paths[idx], &new_lower_paths[idx+1],
9126 + (cur_branches - 1 - idx) * sizeof(struct path));
9127 + }
9128 +
9129 + err = 0;
9130 +out:
9131 + return err;
9132 +}
9133 +
9134 +/* handle branch insertion during remount */
9135 +static noinline int do_remount_add_option(char *optarg, int cur_branches,
9136 + struct unionfs_data *new_data,
9137 + struct path *new_lower_paths,
9138 + int *high_branch_id)
9139 +{
9140 + int err = -EINVAL;
9141 + int perms;
9142 + int idx = 0; /* default: insert at beginning */
9143 + char *new_branch , *modename = NULL;
9144 + struct nameidata nd;
9145 +
9146 + /*
9147 + * optarg can be of several forms:
9148 + *
9149 + * /bar:/foo insert /foo before /bar
9150 + * /bar:/foo=ro insert /foo in ro mode before /bar
9151 + * /foo insert /foo in the beginning (prepend)
9152 + * :/foo insert /foo at the end (append)
9153 + */
9154 + if (*optarg == ':') { /* append? */
9155 + new_branch = optarg + 1; /* skip ':' */
9156 + idx = cur_branches;
9157 + goto found_insertion_point;
9158 + }
9159 + new_branch = strchr(optarg, ':');
9160 + if (!new_branch) { /* prepend? */
9161 + new_branch = optarg;
9162 + goto found_insertion_point;
9163 + }
9164 + *new_branch++ = '\0'; /* holds path+mode of new branch */
9165 +
9166 + /*
9167 + * Find matching branch index. For now, this assumes that nothing
9168 + * has been mounted on top of this Unionfs stack. Once we have /odf
9169 + * and cache-coherency resolved, we'll address the branch-path
9170 + * uniqueness.
9171 + */
9172 + err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
9173 + if (err) {
9174 + printk(KERN_WARNING "unionfs: error accessing "
9175 + "lower directory \"%s\" (error %d)\n",
9176 + optarg, err);
9177 + goto out;
9178 + }
9179 + for (idx=0; idx < cur_branches; idx++)
9180 + if (nd.mnt == new_lower_paths[idx].mnt &&
9181 + nd.dentry == new_lower_paths[idx].dentry)
9182 + break;
9183 + path_release(&nd); /* no longer needed */
9184 + if (idx == cur_branches) {
9185 + printk(KERN_WARNING "unionfs: branch \"%s\" "
9186 + "not found\n", optarg);
9187 + err = -ENOENT;
9188 + goto out;
9189 + }
9190 +
9191 + /*
9192 + * At this point idx will hold the index where the new branch should
9193 + * be inserted before.
9194 + */
9195 +found_insertion_point:
9196 + /* find the mode for the new branch */
9197 + if (new_branch)
9198 + modename = strchr(new_branch, '=');
9199 + if (modename)
9200 + *modename++ = '\0';
9201 + perms = parse_branch_mode(modename);
9202 +
9203 + if (!new_branch || !*new_branch) {
9204 + printk(KERN_WARNING "unionfs: null new branch\n");
9205 + err = -EINVAL;
9206 + goto out;
9207 + }
9208 + err = path_lookup(new_branch, LOOKUP_FOLLOW, &nd);
9209 + if (err) {
9210 + printk(KERN_WARNING "unionfs: error accessing "
9211 + "lower directory \"%s\" (error %d)\n",
9212 + new_branch, err);
9213 + goto out;
9214 + }
9215 + /*
9216 + * It's probably safe to check_mode the new branch to insert. Note:
9217 + * we don't allow inserting branches which are unionfs's by
9218 + * themselves (check_branch returns EINVAL in that case). This is
9219 + * because this code base doesn't support stacking unionfs: the ODF
9220 + * code base supports that correctly.
9221 + */
9222 + if ((err = check_branch(&nd))) {
9223 + printk(KERN_WARNING "unionfs: lower directory "
9224 + "\"%s\" is not a valid branch\n", optarg);
9225 + path_release(&nd);
9226 + goto out;
9227 + }
9228 +
9229 + /*
9230 + * Now we have to insert the new branch. But first, move the bits
9231 + * to make space for the new branch, if needed. Finally, adjust
9232 + * cur_branches.
9233 + * We don't release nd here; it's kept until umount/remount.
9234 + */
9235 + if (idx < cur_branches) {
9236 + /* if idx==cur_branches, we append: easy */
9237 + memmove(&new_data[idx+1], &new_data[idx],
9238 + (cur_branches - idx) * sizeof(struct unionfs_data));
9239 + memmove(&new_lower_paths[idx+1], &new_lower_paths[idx],
9240 + (cur_branches - idx) * sizeof(struct path));
9241 + }
9242 + new_lower_paths[idx].dentry = nd.dentry;
9243 + new_lower_paths[idx].mnt = nd.mnt;
9244 +
9245 + new_data[idx].sb = nd.dentry->d_sb;
9246 + atomic_set(&new_data[idx].open_files, 0);
9247 + new_data[idx].branchperms = perms;
9248 + new_data[idx].branch_id = ++*high_branch_id; /* assign new branch ID */
9249 +
9250 + err = 0;
9251 +out:
9252 + return err;
9253 +}
9254 +
9255 +
9256 +/*
9257 + * Support branch management options on remount.
9258 + *
9259 + * See Documentation/filesystems/unionfs/ for details.
9260 + *
9261 + * @flags: numeric mount options
9262 + * @options: mount options string
9263 + *
9264 + * This function can rearrange a mounted union dynamically, adding and
9265 + * removing branches, including changing branch modes. Clearly this has to
9266 + * be done safely and atomically. Luckily, the VFS already calls this
9267 + * function with lock_super(sb) and lock_kernel() held, preventing
9268 + * concurrent mixing of new mounts, remounts, and unmounts. Moreover,
9269 + * do_remount_sb(), our caller function, already called shrink_dcache_sb(sb)
9270 + * to purge dentries/inodes from our superblock, and also called
9271 + * fsync_super(sb) to purge any dirty pages. So we're good.
9272 + *
9273 + * XXX: however, our remount code may also need to invalidate mapped pages
9274 + * so as to force them to be re-gotten from the (newly reconfigured) lower
9275 + * branches. This has to wait for proper mmap and cache coherency support
9276 + * in the VFS.
9277 + *
9278 + */
9279 +static int unionfs_remount_fs(struct super_block *sb, int *flags,
9280 + char *options)
9281 +{
9282 + int err = 0;
9283 + int i;
9284 + char *optionstmp, *tmp_to_free; /* kstrdup'ed of "options" */
9285 + char *optname;
9286 + int cur_branches = 0; /* no. of current branches */
9287 + int new_branches = 0; /* no. of branches actually left in the end */
9288 + int add_branches; /* est. no. of branches to add */
9289 + int del_branches; /* est. no. of branches to del */
9290 + int max_branches; /* max possible no. of branches */
9291 + struct unionfs_data *new_data = NULL, *tmp_data = NULL;
9292 + struct path *new_lower_paths = NULL, *tmp_lower_paths = NULL;
9293 + struct inode **new_lower_inodes = NULL;
9294 + int new_high_branch_id; /* new high branch ID */
9295 + int old_ibstart, old_ibend;
9296 + int size; /* memory allocation size, temp var */
9297 +
9298 + unionfs_write_lock(sb);
9299 +
9300 + /*
9301 + * The VFS will take care of "ro" and "rw" flags, so anything else
9302 + * is an error. So we need to check if any other flags may have
9303 + * been passed (none are allowed/supported as of now).
9304 + */
9305 + if ((*flags & ~MS_RDONLY) != 0) {
9306 + printk(KERN_WARNING
9307 + "unionfs: remount flags 0x%x unsupported\n", *flags);
9308 + err = -EINVAL;
9309 + goto out_error;
9310 + }
9311 +
9312 + /*
9313 + * If 'options' is NULL, it's probably because the user just changed
9314 + * the union to a "ro" or "rw" and the VFS took care of it. So
9315 + * nothing to do and we're done.
9316 + */
9317 + if (!options || options[0] == '\0')
9318 + goto out_error;
9319 +
9320 + /*
9321 + * Find out how many branches we will have in the end, counting
9322 + * "add" and "del" commands. Copy the "options" string because
9323 + * strsep modifies the string and we need it later.
9324 + */
9325 + optionstmp = tmp_to_free = kstrdup(options, GFP_KERNEL);
9326 + if (!optionstmp) {
9327 + err = -ENOMEM;
9328 + goto out_free;
9329 + }
9330 + new_branches = cur_branches = sbmax(sb); /* current no. branches */
9331 + add_branches = del_branches = 0;
9332 + new_high_branch_id = sbhbid(sb); /* save current high_branch_id */
9333 + while ((optname = strsep(&optionstmp, ",")) != NULL) {
9334 + char *optarg;
9335 +
9336 + if (!optname || !*optname)
9337 + continue;
9338 +
9339 + optarg = strchr(optname, '=');
9340 + if (optarg)
9341 + *optarg++ = '\0';
9342 +
9343 + if (!strcmp("add", optname))
9344 + add_branches++;
9345 + else if (!strcmp("del", optname))
9346 + del_branches++;
9347 + }
9348 + kfree(tmp_to_free);
9349 + /* after all changes, will we have at least one branch left? */
9350 + if ((new_branches + add_branches - del_branches) < 1) {
9351 + printk(KERN_WARNING
9352 + "unionfs: no branches left after remount\n");
9353 + err = -EINVAL;
9354 + goto out_free;
9355 + }
9356 +
9357 + /*
9358 + * Since we haven't actually parsed all the add/del options, nor
9359 + * have we checked them for errors, we don't know for sure how many
9360 + * branches we will have after all changes have taken place. In
9361 + * fact, the total number of branches left could be less than what
9362 + * we have now. So we need to allocate space for a temporary
9363 + * placeholder that is at least as large as the maximum number of
9364 + * branches we *could* have, which is the current number plus all
9365 + * the additions. Once we're done with these temp placeholders, we
9366 + * may have to re-allocate the final size, copy over from the temp,
9367 + * and then free the temps (done near the end of this function).
9368 + */
9369 + max_branches = cur_branches + add_branches;
9370 + /* allocate space for new pointers to lower dentry */
9371 + tmp_data = kcalloc(max_branches,
9372 + sizeof(struct unionfs_data), GFP_KERNEL);
9373 + if (!tmp_data) {
9374 + err = -ENOMEM;
9375 + goto out_free;
9376 + }
9377 + /* allocate space for new pointers to lower paths */
9378 + tmp_lower_paths = kcalloc(max_branches,
9379 + sizeof(struct path), GFP_KERNEL);
9380 + if (!tmp_lower_paths) {
9381 + err = -ENOMEM;
9382 + goto out_free;
9383 + }
9384 + /* copy current info into new placeholders, incrementing refcnts */
9385 + memcpy(tmp_data, UNIONFS_SB(sb)->data,
9386 + cur_branches * sizeof(struct unionfs_data));
9387 + memcpy(tmp_lower_paths, UNIONFS_D(sb->s_root)->lower_paths,
9388 + cur_branches * sizeof(struct path));
9389 + for (i=0; i<cur_branches; i++)
9390 + pathget(&tmp_lower_paths[i]); /* drop refs at end of fxn */
9391 +
9392 + /*******************************************************************
9393 + * For each branch command, do path_lookup on the requested branch,
9394 + * and apply the change to a temp branch list. To handle errors, we
9395 + * already dup'ed the old arrays (above), and increased the refcnts
9396 + * on various f/s objects. So now we can do all the path_lookups
9397 + * and branch-management commands on the new arrays. If it fail mid
9398 + * way, we free the tmp arrays and *put all objects. If we succeed,
9399 + * then we free old arrays and *put its objects, and then replace
9400 + * the arrays with the new tmp list (we may have to re-allocate the
9401 + * memory because the temp lists could have been larger than what we
9402 + * actually needed).
9403 + *******************************************************************/
9404 +
9405 + while ((optname = strsep(&options, ",")) != NULL) {
9406 + char *optarg;
9407 +
9408 + if (!optname || !*optname)
9409 + continue;
9410 + /*
9411 + * At this stage optname holds a comma-delimited option, but
9412 + * without the commas. Next, we need to break the string on
9413 + * the '=' symbol to separate CMD=ARG, where ARG itself can
9414 + * be KEY=VAL. For example, in mode=/foo=rw, CMD is "mode",
9415 + * KEY is "/foo", and VAL is "rw".
9416 + */
9417 + optarg = strchr(optname, '=');
9418 + if (optarg)
9419 + *optarg++ = '\0';
9420 + /* incgen remount option (instead of old ioctl) */
9421 + if (!strcmp("incgen", optname)) {
9422 + err = 0;
9423 + goto out_no_change;
9424 + }
9425 +
9426 + /*
9427 + * All of our options take an argument now. (Insert ones
9428 + * that don't above this check.) So at this stage optname
9429 + * contains the CMD part and optarg contains the ARG part.
9430 + */
9431 + if (!optarg || !*optarg) {
9432 + printk("unionfs: all remount options require "
9433 + "an argument (%s).\n", optname);
9434 + err = -EINVAL;
9435 + goto out_release;
9436 + }
9437 +
9438 + if (!strcmp("add", optname)) {
9439 + err = do_remount_add_option(optarg, new_branches,
9440 + tmp_data,
9441 + tmp_lower_paths,
9442 + &new_high_branch_id);
9443 + if (err)
9444 + goto out_release;
9445 + new_branches++;
9446 + if (new_branches > UNIONFS_MAX_BRANCHES) {
9447 + printk("unionfs: command exceeds "
9448 + "%d branches\n", UNIONFS_MAX_BRANCHES);
9449 + err = -E2BIG;
9450 + goto out_release;
9451 + }
9452 + continue;
9453 + }
9454 + if (!strcmp("del", optname)) {
9455 + err = do_remount_del_option(optarg, new_branches,
9456 + tmp_data,
9457 + tmp_lower_paths);
9458 + if (err)
9459 + goto out_release;
9460 + new_branches--;
9461 + continue;
9462 + }
9463 + if (!strcmp("mode", optname)) {
9464 + err = do_remount_mode_option(optarg, new_branches,
9465 + tmp_data,
9466 + tmp_lower_paths);
9467 + if (err)
9468 + goto out_release;
9469 + continue;
9470 + }
9471 +
9472 + /*
9473 + * When you use "mount -o remount,ro", mount(8) will
9474 + * reportedly pass the original dirs= string from
9475 + * /proc/mounts. So for now, we have to ignore dirs= and
9476 + * not consider it an error, unless we want to allow users
9477 + * to pass dirs= in remount. Note that to allow the VFS to
9478 + * actually process the ro/rw remount options, we have to
9479 + * return 0 from this function.
9480 + */
9481 + if (!strcmp("dirs", optname)) {
9482 + printk(KERN_WARNING
9483 + "unionfs: remount ignoring option \"%s\".\n",
9484 + optname);
9485 + continue;
9486 + }
9487 +
9488 + err = -EINVAL;
9489 + printk(KERN_WARNING
9490 + "unionfs: unrecognized option \"%s\"\n", optname);
9491 + goto out_release;
9492 + }
9493 +
9494 +out_no_change:
9495 +
9496 + /******************************************************************
9497 + * WE'RE ALMOST DONE: check if leftmost branch might be read-only,
9498 + * see if we need to allocate a small-sized new vector, copy the
9499 + * vectors to their correct place, release the refcnt of the older
9500 + * ones, and return. Also handle invalidating any pages that will
9501 + * have to be re-read.
9502 + *******************************************************************/
9503 +
9504 + if (!(tmp_data[0].branchperms & MAY_WRITE)) {
9505 + printk("unionfs: leftmost branch cannot be read-only "
9506 + "(use \"remount,ro\" to create a read-only union)\n");
9507 + err = -EINVAL;
9508 + goto out_release;
9509 + }
9510 +
9511 + /* (re)allocate space for new pointers to lower dentry */
9512 + size = new_branches * sizeof(struct unionfs_data);
9513 + new_data = krealloc(tmp_data, size, GFP_KERNEL);
9514 + if (!new_data) {
9515 + err = -ENOMEM;
9516 + goto out_release;
9517 + }
9518 + /* allocate space for new pointers to lower paths */
9519 + size = new_branches * sizeof(struct path);
9520 + new_lower_paths = krealloc(tmp_lower_paths, size, GFP_KERNEL);
9521 + if (!new_lower_paths) {
9522 + err = -ENOMEM;
9523 + goto out_release;
9524 + }
9525 + /* allocate space for new pointers to lower inodes */
9526 + new_lower_inodes = kcalloc(new_branches,
9527 + sizeof(struct inode *), GFP_KERNEL);
9528 + if (!new_lower_inodes) {
9529 + err = -ENOMEM;
9530 + goto out_release;
9531 + }
9532 +
9533 + /*
9534 + * OK, just before we actually put the new set of branches in place,
9535 + * we need to ensure that our own f/s has no dirty objects left.
9536 + * Luckily, do_remount_sb() already calls shrink_dcache_sb(sb) and
9537 + * fsync_super(sb), taking care of dentries, inodes, and dirty
9538 + * pages. So all that's left is for us to invalidate any leftover
9539 + * (non-dirty) pages to ensure that they will be re-read from the
9540 + * new lower branches (and to support mmap).
9541 + */
9542 +
9543 + /*
9544 + * Now we call drop_pagecache_sb() to invalidate all pages in this
9545 + * super. This function calls invalidate_inode_pages(mapping),
9546 + * which calls invalidate_mapping_pages(): the latter, however, will
9547 + * not invalidate pages which are dirty, locked, under writeback, or
9548 + * mapped into page tables. We shouldn't have to worry about dirty
9549 + * or under-writeback pages, because do_remount_sb() called
9550 + * fsync_super() which would not have returned until all dirty pages
9551 + * were flushed.
9552 + *
9553 + * But do we have to worry about locked pages? Is there any chance
9554 + * that in here we'll get locked pages?
9555 + *
9556 + * XXX: what about pages mapped into pagetables? Are these pages
9557 + * which user processes may have mmap(2)'ed? If so, then we need to
9558 + * invalidate those too, no? Maybe we'll have to write our own
9559 + * version of invalidate_mapping_pages() which also handled mapped
9560 + * pages.
9561 + *
9562 + * XXX: Alternatively, maybe we should call truncate_inode_pages(),
9563 + * which use two passes over the pages list, and will truncate all
9564 + * pages.
9565 + */
9566 + drop_pagecache_sb(sb);
9567 +
9568 + /* copy new vectors into their correct place */
9569 + tmp_data = UNIONFS_SB(sb)->data;
9570 + UNIONFS_SB(sb)->data = new_data;
9571 + new_data = NULL; /* so don't free good pointers below */
9572 + tmp_lower_paths = UNIONFS_D(sb->s_root)->lower_paths;
9573 + UNIONFS_D(sb->s_root)->lower_paths = new_lower_paths;
9574 + new_lower_paths = NULL; /* so don't free good pointers below */
9575 +
9576 + /* update our unionfs_sb_info and root dentry index of last branch */
9577 + i = sbmax(sb); /* save no. of branches to release at end */
9578 + sbend(sb) = new_branches - 1;
9579 + set_dbend(sb->s_root, new_branches - 1);
9580 + old_ibstart = ibstart(sb->s_root->d_inode);
9581 + old_ibend = ibend(sb->s_root->d_inode);
9582 + ibend(sb->s_root->d_inode) = new_branches - 1;
9583 + UNIONFS_D(sb->s_root)->bcount = new_branches;
9584 + new_branches = i; /* no. of branches to release below */
9585 +
9586 + /*
9587 + * Update lower inodes: 3 steps
9588 + * 1. grab ref on all new lower inodes
9589 + */
9590 + for (i=dbstart(sb->s_root); i<=dbend(sb->s_root); i++) {
9591 + struct dentry *lower_dentry =
9592 + unionfs_lower_dentry_idx(sb->s_root, i);
9593 + igrab(lower_dentry->d_inode);
9594 + new_lower_inodes[i] = lower_dentry->d_inode;
9595 + }
9596 + /* 2. release reference on all older lower inodes */
9597 + for (i=old_ibstart; i<=old_ibend; i++) {
9598 + iput(unionfs_lower_inode_idx(sb->s_root->d_inode, i));
9599 + unionfs_set_lower_inode_idx(sb->s_root->d_inode, i, NULL);
9600 + }
9601 + kfree(UNIONFS_I(sb->s_root->d_inode)->lower_inodes);
9602 + /* 3. update root dentry's inode to new lower_inodes array */
9603 + UNIONFS_I(sb->s_root->d_inode)->lower_inodes = new_lower_inodes;
9604 + new_lower_inodes = NULL;
9605 +
9606 + /* maxbytes may have changed */
9607 + sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes;
9608 + /* update high branch ID */
9609 + sbhbid(sb) = new_high_branch_id;
9610 +
9611 + /* update our sb->generation for revalidating objects */
9612 + i = atomic_inc_return(&UNIONFS_SB(sb)->generation);
9613 + atomic_set(&UNIONFS_D(sb->s_root)->generation, i);
9614 + atomic_set(&UNIONFS_I(sb->s_root->d_inode)->generation, i);
9615 + if (!(*flags & MS_SILENT))
9616 + printk("unionfs: new generation number %d\n", i);
9617 + /* finally, update the root dentry's times */
9618 + unionfs_copy_attr_times(sb->s_root->d_inode);
9619 + err = 0; /* reset to success */
9620 +
9621 + /*
9622 + * The code above falls through to the next label, and releases the
9623 + * refcnts of the older ones (stored in tmp_*): if we fell through
9624 + * here, it means success. However, if we jump directly to this
9625 + * label from any error above, then an error occurred after we
9626 + * grabbed various refcnts, and so we have to release the
9627 + * temporarily constructed structures.
9628 + */
9629 +out_release:
9630 + /* no need to cleanup/release anything in tmp_data */
9631 + if (tmp_lower_paths)
9632 + for (i=0; i<new_branches; i++)
9633 + pathput(&tmp_lower_paths[i]);
9634 +out_free:
9635 + kfree(tmp_lower_paths);
9636 + kfree(tmp_data);
9637 + kfree(new_lower_paths);
9638 + kfree(new_data);
9639 + kfree(new_lower_inodes);
9640 +out_error:
9641 + unionfs_write_unlock(sb);
9642 + unionfs_check_dentry(sb->s_root);
9643 + return err;
9644 +}
9645 +
9646 +/*
9647 + * Called by iput() when the inode reference count reached zero
9648 + * and the inode is not hashed anywhere. Used to clear anything
9649 + * that needs to be, before the inode is completely destroyed and put
9650 + * on the inode free list.
9651 + *
9652 + * No need to lock sb info's rwsem.
9653 + */
9654 +static void unionfs_clear_inode(struct inode *inode)
9655 +{
9656 + int bindex, bstart, bend;
9657 + struct inode *lower_inode;
9658 + struct list_head *pos, *n;
9659 + struct unionfs_dir_state *rdstate;
9660 +
9661 + list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) {
9662 + rdstate = list_entry(pos, struct unionfs_dir_state, cache);
9663 + list_del(&rdstate->cache);
9664 + free_rdstate(rdstate);
9665 + }
9666 +
9667 + /*
9668 + * Decrement a reference to a lower_inode, which was incremented
9669 + * by our read_inode when it was created initially.
9670 + */
9671 + bstart = ibstart(inode);
9672 + bend = ibend(inode);
9673 + if (bstart >= 0) {
9674 + for (bindex = bstart; bindex <= bend; bindex++) {
9675 + lower_inode = unionfs_lower_inode_idx(inode, bindex);
9676 + if (!lower_inode)
9677 + continue;
9678 + iput(lower_inode);
9679 + }
9680 + }
9681 +
9682 + kfree(UNIONFS_I(inode)->lower_inodes);
9683 + UNIONFS_I(inode)->lower_inodes = NULL;
9684 +}
9685 +
9686 +static struct inode *unionfs_alloc_inode(struct super_block *sb)
9687 +{
9688 + struct unionfs_inode_info *i;
9689 +
9690 + i = kmem_cache_alloc(unionfs_inode_cachep, GFP_KERNEL);
9691 + if (!i)
9692 + return NULL;
9693 +
9694 + /* memset everything up to the inode to 0 */
9695 + memset(i, 0, offsetof(struct unionfs_inode_info, vfs_inode));
9696 +
9697 + i->vfs_inode.i_version = 1;
9698 + return &i->vfs_inode;
9699 +}
9700 +
9701 +static void unionfs_destroy_inode(struct inode *inode)
9702 +{
9703 + kmem_cache_free(unionfs_inode_cachep, UNIONFS_I(inode));
9704 +}
9705 +
9706 +/* unionfs inode cache constructor */
9707 +static void init_once(void *v, struct kmem_cache *cachep, unsigned long flags)
9708 +{
9709 + struct unionfs_inode_info *i = v;
9710 +
9711 + inode_init_once(&i->vfs_inode);
9712 +}
9713 +
9714 +int unionfs_init_inode_cache(void)
9715 +{
9716 + int err = 0;
9717 +
9718 + unionfs_inode_cachep =
9719 + kmem_cache_create("unionfs_inode_cache",
9720 + sizeof(struct unionfs_inode_info), 0,
9721 + SLAB_RECLAIM_ACCOUNT, init_once, NULL);
9722 + if (!unionfs_inode_cachep)
9723 + err = -ENOMEM;
9724 + return err;
9725 +}
9726 +
9727 +/* unionfs inode cache destructor */
9728 +void unionfs_destroy_inode_cache(void)
9729 +{
9730 + if (unionfs_inode_cachep)
9731 + kmem_cache_destroy(unionfs_inode_cachep);
9732 +}
9733 +
9734 +/*
9735 + * Called when we have a dirty inode, right here we only throw out
9736 + * parts of our readdir list that are too old.
9737 + *
9738 + * No need to grab sb info's rwsem.
9739 + */
9740 +static int unionfs_write_inode(struct inode *inode, int sync)
9741 +{
9742 + struct list_head *pos, *n;
9743 + struct unionfs_dir_state *rdstate;
9744 +
9745 + spin_lock(&UNIONFS_I(inode)->rdlock);
9746 + list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) {
9747 + rdstate = list_entry(pos, struct unionfs_dir_state, cache);
9748 + /* We keep this list in LRU order. */
9749 + if ((rdstate->access + RDCACHE_JIFFIES) > jiffies)
9750 + break;
9751 + UNIONFS_I(inode)->rdcount--;
9752 + list_del(&rdstate->cache);
9753 + free_rdstate(rdstate);
9754 + }
9755 + spin_unlock(&UNIONFS_I(inode)->rdlock);
9756 +
9757 + return 0;
9758 +}
9759 +
9760 +/*
9761 + * Used only in nfs, to kill any pending RPC tasks, so that subsequent
9762 + * code can actually succeed and won't leave tasks that need handling.
9763 + */
9764 +static void unionfs_umount_begin(struct vfsmount *mnt, int flags)
9765 +{
9766 + struct super_block *sb, *lower_sb;
9767 + struct vfsmount *lower_mnt;
9768 + int bindex, bstart, bend;
9769 +
9770 + if (!(flags & MNT_FORCE))
9771 + /*
9772 + * we are not being MNT_FORCE'd, therefore we should emulate
9773 + * old behavior
9774 + */
9775 + return;
9776 +
9777 + sb = mnt->mnt_sb;
9778 +
9779 + unionfs_read_lock(sb);
9780 +
9781 + bstart = sbstart(sb);
9782 + bend = sbend(sb);
9783 + for (bindex = bstart; bindex <= bend; bindex++) {
9784 + lower_mnt = unionfs_lower_mnt_idx(sb->s_root, bindex);
9785 + lower_sb = unionfs_lower_super_idx(sb, bindex);
9786 +
9787 + if (lower_mnt && lower_sb && lower_sb->s_op &&
9788 + lower_sb->s_op->umount_begin)
9789 + lower_sb->s_op->umount_begin(lower_mnt, flags);
9790 + }
9791 +
9792 + unionfs_read_unlock(sb);
9793 +}
9794 +
9795 +static int unionfs_show_options(struct seq_file *m, struct vfsmount *mnt)
9796 +{
9797 + struct super_block *sb = mnt->mnt_sb;
9798 + int ret = 0;
9799 + char *tmp_page;
9800 + char *path;
9801 + int bindex, bstart, bend;
9802 + int perms;
9803 +
9804 + unionfs_read_lock(sb);
9805 +
9806 + unionfs_lock_dentry(sb->s_root);
9807 +
9808 + tmp_page = (char*) __get_free_page(GFP_KERNEL);
9809 + if (!tmp_page) {
9810 + ret = -ENOMEM;
9811 + goto out;
9812 + }
9813 +
9814 + bstart = sbstart(sb);
9815 + bend = sbend(sb);
9816 +
9817 + seq_printf(m, ",dirs=");
9818 + for (bindex = bstart; bindex <= bend; bindex++) {
9819 + path = d_path(unionfs_lower_dentry_idx(sb->s_root, bindex),
9820 + unionfs_lower_mnt_idx(sb->s_root, bindex),
9821 + tmp_page, PAGE_SIZE);
9822 + if (IS_ERR(path)) {
9823 + ret = PTR_ERR(path);
9824 + goto out;
9825 + }
9826 +
9827 + perms = branchperms(sb, bindex);
9828 +
9829 + seq_printf(m, "%s=%s", path,
9830 + perms & MAY_WRITE ? "rw" : "ro");
9831 + if (bindex != bend)
9832 + seq_printf(m, ":");
9833 + }
9834 +
9835 +out:
9836 + free_page((unsigned long) tmp_page);
9837 +
9838 + unionfs_unlock_dentry(sb->s_root);
9839 +
9840 + unionfs_read_unlock(sb);
9841 +
9842 + return ret;
9843 +}
9844 +
9845 +struct super_operations unionfs_sops = {
9846 + .read_inode = unionfs_read_inode,
9847 + .delete_inode = unionfs_delete_inode,
9848 + .put_super = unionfs_put_super,
9849 + .statfs = unionfs_statfs,
9850 + .remount_fs = unionfs_remount_fs,
9851 + .clear_inode = unionfs_clear_inode,
9852 + .umount_begin = unionfs_umount_begin,
9853 + .show_options = unionfs_show_options,
9854 + .write_inode = unionfs_write_inode,
9855 + .alloc_inode = unionfs_alloc_inode,
9856 + .destroy_inode = unionfs_destroy_inode,
9857 +};
9858 diff --git a/fs/unionfs/union.h b/fs/unionfs/union.h
9859 new file mode 100644
9860 index 0000000..ba0ff50
9861 --- /dev/null
9862 +++ b/fs/unionfs/union.h
9863 @@ -0,0 +1,581 @@
9864 +/*
9865 + * Copyright (c) 2003-2007 Erez Zadok
9866 + * Copyright (c) 2003-2006 Charles P. Wright
9867 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
9868 + * Copyright (c) 2005 Arun M. Krishnakumar
9869 + * Copyright (c) 2004-2006 David P. Quigley
9870 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
9871 + * Copyright (c) 2003 Puja Gupta
9872 + * Copyright (c) 2003 Harikesavan Krishnan
9873 + * Copyright (c) 2003-2007 Stony Brook University
9874 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
9875 + *
9876 + * This program is free software; you can redistribute it and/or modify
9877 + * it under the terms of the GNU General Public License version 2 as
9878 + * published by the Free Software Foundation.
9879 + */
9880 +
9881 +#ifndef _UNION_H_
9882 +#define _UNION_H_
9883 +
9884 +#include <linux/dcache.h>
9885 +#include <linux/file.h>
9886 +#include <linux/list.h>
9887 +#include <linux/fs.h>
9888 +#include <linux/mm.h>
9889 +#include <linux/module.h>
9890 +#include <linux/mount.h>
9891 +#include <linux/namei.h>
9892 +#include <linux/page-flags.h>
9893 +#include <linux/pagemap.h>
9894 +#include <linux/poll.h>
9895 +#include <linux/security.h>
9896 +#include <linux/seq_file.h>
9897 +#include <linux/slab.h>
9898 +#include <linux/spinlock.h>
9899 +#include <linux/smp_lock.h>
9900 +#include <linux/statfs.h>
9901 +#include <linux/string.h>
9902 +#include <linux/vmalloc.h>
9903 +#include <linux/writeback.h>
9904 +#include <linux/buffer_head.h>
9905 +#include <linux/xattr.h>
9906 +#include <linux/fs_stack.h>
9907 +#include <linux/magic.h>
9908 +#include <linux/log2.h>
9909 +
9910 +#include <asm/mman.h>
9911 +#include <asm/system.h>
9912 +
9913 +#include <linux/union_fs.h>
9914 +
9915 +/* the file system name */
9916 +#define UNIONFS_NAME "unionfs"
9917 +
9918 +/* unionfs root inode number */
9919 +#define UNIONFS_ROOT_INO 1
9920 +
9921 +/* number of times we try to get a unique temporary file name */
9922 +#define GET_TMPNAM_MAX_RETRY 5
9923 +
9924 +/* maximum number of branches we support, to avoid memory blowup */
9925 +#define UNIONFS_MAX_BRANCHES 128
9926 +
9927 +/* Operations vectors defined in specific files. */
9928 +extern struct file_operations unionfs_main_fops;
9929 +extern struct file_operations unionfs_dir_fops;
9930 +extern struct inode_operations unionfs_main_iops;
9931 +extern struct inode_operations unionfs_dir_iops;
9932 +extern struct inode_operations unionfs_symlink_iops;
9933 +extern struct super_operations unionfs_sops;
9934 +extern struct dentry_operations unionfs_dops;
9935 +
9936 +/* How long should an entry be allowed to persist */
9937 +#define RDCACHE_JIFFIES (5*HZ)
9938 +
9939 +/* file private data. */
9940 +struct unionfs_file_info {
9941 + int bstart;
9942 + int bend;
9943 + atomic_t generation;
9944 +
9945 + struct unionfs_dir_state *rdstate;
9946 + struct file **lower_files;
9947 + int *saved_branch_ids; /* IDs of branches when file was opened */
9948 +};
9949 +
9950 +/* unionfs inode data in memory */
9951 +struct unionfs_inode_info {
9952 + int bstart;
9953 + int bend;
9954 + atomic_t generation;
9955 + int stale;
9956 + /* Stuff for readdir over NFS. */
9957 + spinlock_t rdlock;
9958 + struct list_head readdircache;
9959 + int rdcount;
9960 + int hashsize;
9961 + int cookie;
9962 +
9963 + /* The lower inodes */
9964 + struct inode **lower_inodes;
9965 + /* to keep track of reads/writes for unlinks before closes */
9966 + atomic_t totalopens;
9967 +
9968 + struct inode vfs_inode;
9969 +};
9970 +
9971 +/* unionfs dentry data in memory */
9972 +struct unionfs_dentry_info {
9973 + /*
9974 + * The semaphore is used to lock the dentry as soon as we get into a
9975 + * unionfs function from the VFS. Our lock ordering is that children
9976 + * go before their parents.
9977 + */
9978 + struct mutex lock;
9979 + int bstart;
9980 + int bend;
9981 + int bopaque;
9982 + int bcount;
9983 + atomic_t generation;
9984 + struct path *lower_paths;
9985 +};
9986 +
9987 +/* These are the pointers to our various objects. */
9988 +struct unionfs_data {
9989 + struct super_block *sb;
9990 + atomic_t open_files; /* number of open files on branch */
9991 + int branchperms;
9992 + int branch_id; /* unique branch ID at re/mount time */
9993 +};
9994 +
9995 +/* unionfs super-block data in memory */
9996 +struct unionfs_sb_info {
9997 + int bend;
9998 +
9999 + atomic_t generation;
10000 +
10001 + /*
10002 + * This rwsem is used to make sure that a branch management
10003 + * operation...
10004 + * 1) will not begin before all currently in-flight operations
10005 + * complete
10006 + * 2) any new operations do not execute until the currently
10007 + * running branch management operation completes
10008 + */
10009 + struct rw_semaphore rwsem;
10010 + int high_branch_id; /* last unique branch ID given */
10011 + struct unionfs_data *data;
10012 +};
10013 +
10014 +/*
10015 + * structure for making the linked list of entries by readdir on left branch
10016 + * to compare with entries on right branch
10017 + */
10018 +struct filldir_node {
10019 + struct list_head file_list; /* list for directory entries */
10020 + char *name; /* name entry */
10021 + int hash; /* name hash */
10022 + int namelen; /* name len since name is not 0 terminated */
10023 +
10024 + /*
10025 + * we can check for duplicate whiteouts and files in the same branch
10026 + * in order to return -EIO.
10027 + */
10028 + int bindex;
10029 +
10030 + /* is this a whiteout entry? */
10031 + int whiteout;
10032 +
10033 + /* Inline name, so we don't need to separately kmalloc small ones */
10034 + char iname[DNAME_INLINE_LEN_MIN];
10035 +};
10036 +
10037 +/* Directory hash table. */
10038 +struct unionfs_dir_state {
10039 + unsigned int cookie; /* the cookie, based off of rdversion */
10040 + unsigned int offset; /* The entry we have returned. */
10041 + int bindex;
10042 + loff_t dirpos; /* offset within the lower level directory */
10043 + int size; /* How big is the hash table? */
10044 + int hashentries; /* How many entries have been inserted? */
10045 + unsigned long access;
10046 +
10047 + /* This cache list is used when the inode keeps us around. */
10048 + struct list_head cache;
10049 + struct list_head list[0];
10050 +};
10051 +
10052 +/* externs needed for fanout.h or sioq.h */
10053 +extern int unionfs_get_nlinks(const struct inode *inode);
10054 +
10055 +/* include miscellaneous macros */
10056 +#include "fanout.h"
10057 +#include "sioq.h"
10058 +
10059 +/* externs for cache creation/deletion routines */
10060 +extern void unionfs_destroy_filldir_cache(void);
10061 +extern int unionfs_init_filldir_cache(void);
10062 +extern int unionfs_init_inode_cache(void);
10063 +extern void unionfs_destroy_inode_cache(void);
10064 +extern int unionfs_init_dentry_cache(void);
10065 +extern void unionfs_destroy_dentry_cache(void);
10066 +
10067 +/* Initialize and free readdir-specific state. */
10068 +extern int init_rdstate(struct file *file);
10069 +extern struct unionfs_dir_state *alloc_rdstate(struct inode *inode,
10070 + int bindex);
10071 +extern struct unionfs_dir_state *find_rdstate(struct inode *inode,
10072 + loff_t fpos);
10073 +extern void free_rdstate(struct unionfs_dir_state *state);
10074 +extern int add_filldir_node(struct unionfs_dir_state *rdstate,
10075 + const char *name, int namelen, int bindex,
10076 + int whiteout);
10077 +extern struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate,
10078 + const char *name, int namelen);
10079 +
10080 +extern struct dentry **alloc_new_dentries(int objs);
10081 +extern struct unionfs_data *alloc_new_data(int objs);
10082 +
10083 +/* We can only use 32-bits of offset for rdstate --- blech! */
10084 +#define DIREOF (0xfffff)
10085 +#define RDOFFBITS 20 /* This is the number of bits in DIREOF. */
10086 +#define MAXRDCOOKIE (0xfff)
10087 +/* Turn an rdstate into an offset. */
10088 +static inline off_t rdstate2offset(struct unionfs_dir_state *buf)
10089 +{
10090 + off_t tmp;
10091 +
10092 + tmp = ((buf->cookie & MAXRDCOOKIE) << RDOFFBITS)
10093 + | (buf->offset & DIREOF);
10094 + return tmp;
10095 +}
10096 +
10097 +#define unionfs_read_lock(sb) down_read(&UNIONFS_SB(sb)->rwsem)
10098 +#define unionfs_read_unlock(sb) up_read(&UNIONFS_SB(sb)->rwsem)
10099 +#define unionfs_write_lock(sb) down_write(&UNIONFS_SB(sb)->rwsem)
10100 +#define unionfs_write_unlock(sb) up_write(&UNIONFS_SB(sb)->rwsem)
10101 +
10102 +static inline void unionfs_double_lock_dentry(struct dentry *d1,
10103 + struct dentry *d2)
10104 +{
10105 + if (d2 < d1) {
10106 + struct dentry *tmp = d1;
10107 + d1 = d2;
10108 + d2 = tmp;
10109 + }
10110 + unionfs_lock_dentry(d1);
10111 + unionfs_lock_dentry(d2);
10112 +}
10113 +
10114 +extern int new_dentry_private_data(struct dentry *dentry);
10115 +extern void free_dentry_private_data(struct dentry *dentry);
10116 +extern void update_bstart(struct dentry *dentry);
10117 +
10118 +/*
10119 + * EXTERNALS:
10120 + */
10121 +
10122 +/* replicates the directory structure up to given dentry in given branch */
10123 +extern struct dentry *create_parents(struct inode *dir, struct dentry *dentry,
10124 + const char *name, int bindex);
10125 +extern int make_dir_opaque(struct dentry *dir, int bindex);
10126 +
10127 +/* partial lookup */
10128 +extern int unionfs_partial_lookup(struct dentry *dentry);
10129 +
10130 +/*
10131 + * Pass an unionfs dentry and an index and it will try to create a whiteout
10132 + * in branch 'index'.
10133 + *
10134 + * On error, it will proceed to a branch to the left
10135 + */
10136 +extern int create_whiteout(struct dentry *dentry, int start);
10137 +/* copies a file from dbstart to newbindex branch */
10138 +extern int copyup_file(struct inode *dir, struct file *file, int bstart,
10139 + int newbindex, loff_t size);
10140 +extern int copyup_named_file(struct inode *dir, struct file *file,
10141 + char *name, int bstart, int new_bindex,
10142 + loff_t len);
10143 +/* copies a dentry from dbstart to newbindex branch */
10144 +extern int copyup_dentry(struct inode *dir, struct dentry *dentry,
10145 + int bstart, int new_bindex, const char *name,
10146 + int namelen, struct file **copyup_file, loff_t len);
10147 +/* helper functions for post-copyup cleanup */
10148 +extern void unionfs_inherit_mnt(struct dentry *dentry);
10149 +extern void unionfs_purge_extras(struct dentry *dentry);
10150 +
10151 +extern int remove_whiteouts(struct dentry *dentry,
10152 + struct dentry *lower_dentry, int bindex);
10153 +
10154 +extern int do_delete_whiteouts(struct dentry *dentry, int bindex,
10155 + struct unionfs_dir_state *namelist);
10156 +
10157 +/* Is this directory empty: 0 if it is empty, -ENOTEMPTY if not. */
10158 +extern int check_empty(struct dentry *dentry,
10159 + struct unionfs_dir_state **namelist);
10160 +/* Delete whiteouts from this directory in branch bindex. */
10161 +extern int delete_whiteouts(struct dentry *dentry, int bindex,
10162 + struct unionfs_dir_state *namelist);
10163 +
10164 +/* Re-lookup a lower dentry. */
10165 +extern int unionfs_refresh_lower_dentry(struct dentry *dentry, int bindex);
10166 +
10167 +extern void unionfs_reinterpose(struct dentry *this_dentry);
10168 +extern struct super_block *unionfs_duplicate_super(struct super_block *sb);
10169 +
10170 +/* Locking functions. */
10171 +extern int unionfs_setlk(struct file *file, int cmd, struct file_lock *fl);
10172 +extern int unionfs_getlk(struct file *file, struct file_lock *fl);
10173 +
10174 +/* Common file operations. */
10175 +extern int unionfs_file_revalidate(struct file *file, int willwrite);
10176 +extern int unionfs_open(struct inode *inode, struct file *file);
10177 +extern int unionfs_file_release(struct inode *inode, struct file *file);
10178 +extern int unionfs_flush(struct file *file, fl_owner_t id);
10179 +extern long unionfs_ioctl(struct file *file, unsigned int cmd,
10180 + unsigned long arg);
10181 +extern int unionfs_fsync(struct file *file, struct dentry *dentry,
10182 + int datasync);
10183 +extern int unionfs_fasync(int fd, struct file *file, int flag);
10184 +
10185 +/* Inode operations */
10186 +extern int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
10187 + struct inode *new_dir, struct dentry *new_dentry);
10188 +extern int unionfs_unlink(struct inode *dir, struct dentry *dentry);
10189 +extern int unionfs_rmdir(struct inode *dir, struct dentry *dentry);
10190 +
10191 +extern int __unionfs_d_revalidate_chain(struct dentry *dentry,
10192 + struct nameidata *nd, int willwrite);
10193 +extern int is_newer_lower(const struct dentry *dentry);
10194 +
10195 +/* The values for unionfs_interpose's flag. */
10196 +#define INTERPOSE_DEFAULT 0
10197 +#define INTERPOSE_LOOKUP 1
10198 +#define INTERPOSE_REVAL 2
10199 +#define INTERPOSE_REVAL_NEG 3
10200 +#define INTERPOSE_PARTIAL 4
10201 +
10202 +extern struct dentry *unionfs_interpose(struct dentry *this_dentry,
10203 + struct super_block *sb, int flag);
10204 +
10205 +#ifdef CONFIG_UNION_FS_XATTR
10206 +/* Extended attribute functions. */
10207 +extern void *unionfs_xattr_alloc(size_t size, size_t limit);
10208 +
10209 +extern ssize_t unionfs_getxattr(struct dentry *dentry, const char *name,
10210 + void *value, size_t size);
10211 +extern int unionfs_removexattr(struct dentry *dentry, const char *name);
10212 +extern ssize_t unionfs_listxattr(struct dentry *dentry, char *list,
10213 + size_t size);
10214 +extern int unionfs_setxattr(struct dentry *dentry, const char *name,
10215 + const void *value, size_t size, int flags);
10216 +#endif /* CONFIG_UNION_FS_XATTR */
10217 +
10218 +/* The root directory is unhashed, but isn't deleted. */
10219 +static inline int d_deleted(struct dentry *d)
10220 +{
10221 + return d_unhashed(d) && (d != d->d_sb->s_root);
10222 +}
10223 +
10224 +struct dentry *unionfs_lookup_backend(struct dentry *dentry,
10225 + struct nameidata *nd, int lookupmode);
10226 +
10227 +/* unionfs_permission, check if we should bypass error to facilitate copyup */
10228 +#define IS_COPYUP_ERR(err) ((err) == -EROFS)
10229 +
10230 +/* unionfs_open, check if we need to copyup the file */
10231 +#define OPEN_WRITE_FLAGS (O_WRONLY | O_RDWR | O_APPEND)
10232 +#define IS_WRITE_FLAG(flag) ((flag) & OPEN_WRITE_FLAGS)
10233 +
10234 +static inline int branchperms(const struct super_block *sb, int index)
10235 +{
10236 + BUG_ON(index < 0);
10237 + return UNIONFS_SB(sb)->data[index].branchperms;
10238 +}
10239 +
10240 +static inline int set_branchperms(struct super_block *sb, int index, int perms)
10241 +{
10242 + BUG_ON(index < 0);
10243 + UNIONFS_SB(sb)->data[index].branchperms = perms;
10244 + return perms;
10245 +}
10246 +
10247 +/* Is this file on a read-only branch? */
10248 +static inline int is_robranch_super(const struct super_block *sb, int index)
10249 +{
10250 + int ret;
10251 +
10252 + ret = (!(branchperms(sb, index) & MAY_WRITE)) ? -EROFS : 0;
10253 + return ret;
10254 +}
10255 +
10256 +/* Is this file on a read-only branch? */
10257 +static inline int is_robranch_idx(const struct dentry *dentry, int index)
10258 +{
10259 + int err = 0;
10260 +
10261 + BUG_ON(index < 0);
10262 +
10263 + if ((!(branchperms(dentry->d_sb, index) & MAY_WRITE)) ||
10264 + IS_RDONLY(unionfs_lower_dentry_idx(dentry, index)->d_inode))
10265 + err = -EROFS;
10266 + return err;
10267 +}
10268 +
10269 +static inline int is_robranch(const struct dentry *dentry)
10270 +{
10271 + int index;
10272 +
10273 + index = UNIONFS_D(dentry)->bstart;
10274 + BUG_ON(index < 0);
10275 +
10276 + return is_robranch_idx(dentry, index);
10277 +}
10278 +
10279 +/* What do we use for whiteouts. */
10280 +#define UNIONFS_WHPFX ".wh."
10281 +#define UNIONFS_WHLEN 4
10282 +/*
10283 + * If a directory contains this file, then it is opaque. We start with the
10284 + * .wh. flag so that it is blocked by lookup.
10285 + */
10286 +#define UNIONFS_DIR_OPAQUE_NAME "__dir_opaque"
10287 +#define UNIONFS_DIR_OPAQUE UNIONFS_WHPFX UNIONFS_DIR_OPAQUE_NAME
10288 +
10289 +#ifndef DEFAULT_POLLMASK
10290 +#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
10291 +#endif /* not DEFAULT_POLLMASK */
10292 +
10293 +/*
10294 + * EXTERNALS:
10295 + */
10296 +extern char *alloc_whname(const char *name, int len);
10297 +extern int check_branch(struct nameidata *nd);
10298 +extern int __parse_branch_mode(const char *name);
10299 +extern int parse_branch_mode(const char *name);
10300 +
10301 +/*
10302 + * These two functions are here because it is kind of daft to copy and paste
10303 + * the contents of the two functions to 32+ places in unionfs
10304 + */
10305 +static inline struct dentry *lock_parent(struct dentry *dentry)
10306 +{
10307 + struct dentry *dir = dget(dentry->d_parent);
10308 +
10309 + mutex_lock(&dir->d_inode->i_mutex);
10310 + return dir;
10311 +}
10312 +
10313 +static inline void unlock_dir(struct dentry *dir)
10314 +{
10315 + mutex_unlock(&dir->d_inode->i_mutex);
10316 + dput(dir);
10317 +}
10318 +
10319 +static inline struct vfsmount *unionfs_mntget(struct dentry *dentry,
10320 + int bindex)
10321 +{
10322 + struct vfsmount *mnt;
10323 +
10324 + if (!dentry) {
10325 + if (bindex < 0)
10326 + return NULL;
10327 + if (!dentry && bindex >= 0) {
10328 +#ifdef UNIONFS_DEBUG
10329 + printk(KERN_DEBUG
10330 + "unionfs_mntget: dentry=%p bindex=%d\n",
10331 + dentry, bindex);
10332 +#endif /* UNIONFS_DEBUG */
10333 + return NULL;
10334 + }
10335 + }
10336 + mnt = unionfs_lower_mnt_idx(dentry, bindex);
10337 + if (!mnt) {
10338 + if (bindex < 0)
10339 + return NULL;
10340 + if (!mnt && bindex >= 0) {
10341 +#ifdef UNIONFS_DEBUG
10342 + printk(KERN_DEBUG
10343 + "unionfs_mntget: mnt=%p bindex=%d\n",
10344 + mnt, bindex);
10345 +#endif /* UNIONFS_DEBUG */
10346 + return NULL;
10347 + }
10348 + }
10349 + mnt = mntget(mnt);
10350 + return mnt;
10351 +}
10352 +
10353 +static inline void unionfs_mntput(struct dentry *dentry, int bindex)
10354 +{
10355 + struct vfsmount *mnt;
10356 +
10357 + if (!dentry) {
10358 + if (bindex < 0)
10359 + return;
10360 + if (!dentry && bindex >= 0) {
10361 +#ifdef UNIONFS_DEBUG
10362 + printk(KERN_DEBUG
10363 + "unionfs_mntput: dentry=%p bindex=%d\n",
10364 + dentry, bindex);
10365 +#endif /* UNIONFS_DEBUG */
10366 + return;
10367 + }
10368 + }
10369 + mnt = unionfs_lower_mnt_idx(dentry, bindex);
10370 + if (!mnt) {
10371 + if (bindex < 0)
10372 + return;
10373 + if (!mnt && bindex >= 0) {
10374 +#ifdef UNIONFS_DEBUG
10375 + /*
10376 + * Directories can have NULL lower objects in
10377 + * between start/end, but NOT if at the start/end
10378 + * range. We cannot verify that this dentry is a
10379 + * type=DIR, because it may already be a negative
10380 + * dentry. But if dbstart is greater than dbend, we
10381 + * know that this couldn't have been a regular file:
10382 + * it had to have been a directory.
10383 + */
10384 + if (!(bindex > dbstart(dentry) && bindex < dbend(dentry)))
10385 + printk(KERN_WARNING
10386 + "unionfs_mntput: mnt=%p bindex=%d\n",
10387 + mnt, bindex);
10388 +#endif /* UNIONFS_DEBUG */
10389 + return;
10390 + }
10391 + }
10392 + mntput(mnt);
10393 +}
10394 +
10395 +#ifdef UNIONFS_DEBUG
10396 +
10397 +/* useful for tracking code reachability */
10398 +#define UDBG printk("DBG:%s:%s:%d\n",__FILE__,__FUNCTION__,__LINE__)
10399 +
10400 +#define unionfs_check_inode(i) __unionfs_check_inode((i), \
10401 + __FILE__,__FUNCTION__,__LINE__)
10402 +#define unionfs_check_dentry(d) __unionfs_check_dentry((d), \
10403 + __FILE__,__FUNCTION__,__LINE__)
10404 +#define unionfs_check_file(f) __unionfs_check_file((f), \
10405 + __FILE__,__FUNCTION__,__LINE__)
10406 +#define show_branch_counts(sb) __show_branch_counts((sb), \
10407 + __FILE__,__FUNCTION__,__LINE__)
10408 +#define show_inode_times(i) __show_inode_times((i), \
10409 + __FILE__,__FUNCTION__,__LINE__)
10410 +#define show_dinode_times(d) __show_dinode_times((d), \
10411 + __FILE__,__FUNCTION__,__LINE__)
10412 +#define show_inode_counts(i) __show_inode_counts((i), \
10413 + __FILE__,__FUNCTION__,__LINE__)
10414 +
10415 +extern void __unionfs_check_inode(const struct inode *inode, const char *fname,
10416 + const char *fxn, int line);
10417 +extern void __unionfs_check_dentry(const struct dentry *dentry,
10418 + const char *fname, const char *fxn,
10419 + int line);
10420 +extern void __unionfs_check_file(const struct file *file,
10421 + const char *fname, const char *fxn, int line);
10422 +extern void __show_branch_counts(const struct super_block *sb,
10423 + const char *file, const char *fxn, int line);
10424 +extern void __show_inode_times(const struct inode *inode,
10425 + const char *file, const char *fxn, int line);
10426 +extern void __show_dinode_times(const struct dentry *dentry,
10427 + const char *file, const char *fxn, int line);
10428 +extern void __show_inode_counts(const struct inode *inode,
10429 + const char *file, const char *fxn, int line);
10430 +
10431 +#else /* not UNIONFS_DEBUG */
10432 +
10433 +/* we leave useful hooks for these check functions throughout the code */
10434 +#define unionfs_check_inode(i)
10435 +#define unionfs_check_dentry(d)
10436 +#define unionfs_check_file(f)
10437 +#define show_branch_counts(sb)
10438 +#define show_inode_times(i)
10439 +#define show_dinode_times(d)
10440 +#define show_inode_counts(i)
10441 +
10442 +#endif /* not UNIONFS_DEBUG */
10443 +
10444 +#endif /* not _UNION_H_ */
10445 diff --git a/fs/unionfs/unlink.c b/fs/unionfs/unlink.c
10446 new file mode 100644
10447 index 0000000..47bebab
10448 --- /dev/null
10449 +++ b/fs/unionfs/unlink.c
10450 @@ -0,0 +1,192 @@
10451 +/*
10452 + * Copyright (c) 2003-2007 Erez Zadok
10453 + * Copyright (c) 2003-2006 Charles P. Wright
10454 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10455 + * Copyright (c) 2005-2006 Junjiro Okajima
10456 + * Copyright (c) 2005 Arun M. Krishnakumar
10457 + * Copyright (c) 2004-2006 David P. Quigley
10458 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
10459 + * Copyright (c) 2003 Puja Gupta
10460 + * Copyright (c) 2003 Harikesavan Krishnan
10461 + * Copyright (c) 2003-2007 Stony Brook University
10462 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
10463 + *
10464 + * This program is free software; you can redistribute it and/or modify
10465 + * it under the terms of the GNU General Public License version 2 as
10466 + * published by the Free Software Foundation.
10467 + */
10468 +
10469 +#include "union.h"
10470 +
10471 +/* unlink a file by creating a whiteout */
10472 +static int unionfs_unlink_whiteout(struct inode *dir, struct dentry *dentry)
10473 +{
10474 + struct dentry *lower_dentry;
10475 + struct dentry *lower_dir_dentry;
10476 + int bindex;
10477 + int err = 0;
10478 +
10479 + if ((err = unionfs_partial_lookup(dentry)))
10480 + goto out;
10481 +
10482 + bindex = dbstart(dentry);
10483 +
10484 + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10485 + if (!lower_dentry)
10486 + goto out;
10487 +
10488 + lower_dir_dentry = lock_parent(lower_dentry);
10489 +
10490 + /* avoid destroying the lower inode if the file is in use */
10491 + dget(lower_dentry);
10492 + if (!(err = is_robranch_super(dentry->d_sb, bindex)))
10493 + err = vfs_unlink(lower_dir_dentry->d_inode, lower_dentry);
10494 + /* if vfs_unlink succeeded, update our inode's times */
10495 + if (!err)
10496 + unionfs_copy_attr_times(dentry->d_inode);
10497 + dput(lower_dentry);
10498 + fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
10499 + unlock_dir(lower_dir_dentry);
10500 +
10501 + if (err && !IS_COPYUP_ERR(err))
10502 + goto out;
10503 +
10504 + if (err) {
10505 + if (dbstart(dentry) == 0)
10506 + goto out;
10507 + err = create_whiteout(dentry, dbstart(dentry) - 1);
10508 + } else if (dbopaque(dentry) != -1)
10509 + /* There is a lower lower-priority file with the same name. */
10510 + err = create_whiteout(dentry, dbopaque(dentry));
10511 + else
10512 + err = create_whiteout(dentry, dbstart(dentry));
10513 +
10514 +out:
10515 + if (!err)
10516 + dentry->d_inode->i_nlink--;
10517 +
10518 + /* We don't want to leave negative leftover dentries for revalidate. */
10519 + if (!err && (dbopaque(dentry) != -1))
10520 + update_bstart(dentry);
10521 +
10522 + return err;
10523 +}
10524 +
10525 +int unionfs_unlink(struct inode *dir, struct dentry *dentry)
10526 +{
10527 + int err = 0;
10528 +
10529 + unionfs_read_lock(dentry->d_sb);
10530 + unionfs_lock_dentry(dentry);
10531 +
10532 + if (!__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
10533 + err = -ESTALE;
10534 + goto out;
10535 + }
10536 + unionfs_check_dentry(dentry);
10537 +
10538 + err = unionfs_unlink_whiteout(dir, dentry);
10539 + /* call d_drop so the system "forgets" about us */
10540 + if (!err) {
10541 + if (!S_ISDIR(dentry->d_inode->i_mode))
10542 + unionfs_purge_extras(dentry);
10543 + d_drop(dentry);
10544 + /*
10545 + * if unlink/whiteout succeeded, parent dir mtime has
10546 + * changed
10547 + */
10548 + unionfs_copy_attr_times(dir);
10549 + }
10550 +
10551 +out:
10552 + if (!err) {
10553 + unionfs_check_dentry(dentry);
10554 + unionfs_check_inode(dir);
10555 + }
10556 + unionfs_unlock_dentry(dentry);
10557 + unionfs_read_unlock(dentry->d_sb);
10558 + return err;
10559 +}
10560 +
10561 +static int unionfs_rmdir_first(struct inode *dir, struct dentry *dentry,
10562 + struct unionfs_dir_state *namelist)
10563 +{
10564 + int err;
10565 + struct dentry *lower_dentry;
10566 + struct dentry *lower_dir_dentry = NULL;
10567 +
10568 + /* Here we need to remove whiteout entries. */
10569 + err = delete_whiteouts(dentry, dbstart(dentry), namelist);
10570 + if (err)
10571 + goto out;
10572 +
10573 + lower_dentry = unionfs_lower_dentry(dentry);
10574 +
10575 + lower_dir_dentry = lock_parent(lower_dentry);
10576 +
10577 + /* avoid destroying the lower inode if the file is in use */
10578 + dget(lower_dentry);
10579 + if (!(err = is_robranch(dentry)))
10580 + err = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
10581 + dput(lower_dentry);
10582 +
10583 + fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
10584 + /* propagate number of hard-links */
10585 + dentry->d_inode->i_nlink = unionfs_get_nlinks(dentry->d_inode);
10586 +
10587 +out:
10588 + if (lower_dir_dentry)
10589 + unlock_dir(lower_dir_dentry);
10590 + return err;
10591 +}
10592 +
10593 +int unionfs_rmdir(struct inode *dir, struct dentry *dentry)
10594 +{
10595 + int err = 0;
10596 + struct unionfs_dir_state *namelist = NULL;
10597 +
10598 + unionfs_read_lock(dentry->d_sb);
10599 + unionfs_lock_dentry(dentry);
10600 +
10601 + if (!__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
10602 + err = -ESTALE;
10603 + goto out;
10604 + }
10605 + unionfs_check_dentry(dentry);
10606 +
10607 + /* check if this unionfs directory is empty or not */
10608 + err = check_empty(dentry, &namelist);
10609 + if (err)
10610 + goto out;
10611 +
10612 + err = unionfs_rmdir_first(dir, dentry, namelist);
10613 + /* create whiteout */
10614 + if (!err)
10615 + err = create_whiteout(dentry, dbstart(dentry));
10616 + else {
10617 + int new_err;
10618 +
10619 + if (dbstart(dentry) == 0)
10620 + goto out;
10621 +
10622 + /* exit if the error returned was NOT -EROFS */
10623 + if (!IS_COPYUP_ERR(err))
10624 + goto out;
10625 +
10626 + new_err = create_whiteout(dentry, dbstart(dentry) - 1);
10627 + if (new_err != -EEXIST)
10628 + err = new_err;
10629 + }
10630 +
10631 +out:
10632 + /* call d_drop so the system "forgets" about us */
10633 + if (!err)
10634 + d_drop(dentry);
10635 +
10636 + if (namelist)
10637 + free_rdstate(namelist);
10638 +
10639 + unionfs_unlock_dentry(dentry);
10640 + unionfs_read_unlock(dentry->d_sb);
10641 + return err;
10642 +}
10643 diff --git a/fs/unionfs/xattr.c b/fs/unionfs/xattr.c
10644 new file mode 100644
10645 index 0000000..ee7da13
10646 --- /dev/null
10647 +++ b/fs/unionfs/xattr.c
10648 @@ -0,0 +1,153 @@
10649 +/*
10650 + * Copyright (c) 2003-2007 Erez Zadok
10651 + * Copyright (c) 2003-2006 Charles P. Wright
10652 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10653 + * Copyright (c) 2005-2006 Junjiro Okajima
10654 + * Copyright (c) 2005 Arun M. Krishnakumar
10655 + * Copyright (c) 2004-2006 David P. Quigley
10656 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
10657 + * Copyright (c) 2003 Puja Gupta
10658 + * Copyright (c) 2003 Harikesavan Krishnan
10659 + * Copyright (c) 2003-2007 Stony Brook University
10660 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
10661 + *
10662 + * This program is free software; you can redistribute it and/or modify
10663 + * it under the terms of the GNU General Public License version 2 as
10664 + * published by the Free Software Foundation.
10665 + */
10666 +
10667 +#include "union.h"
10668 +
10669 +/* This is lifted from fs/xattr.c */
10670 +void *unionfs_xattr_alloc(size_t size, size_t limit)
10671 +{
10672 + void *ptr;
10673 +
10674 + if (size > limit)
10675 + return ERR_PTR(-E2BIG);
10676 +
10677 + if (!size) /* size request, no buffer is needed */
10678 + return NULL;
10679 +
10680 + ptr = kmalloc(size, GFP_KERNEL);
10681 + if (!ptr)
10682 + return ERR_PTR(-ENOMEM);
10683 + return ptr;
10684 +}
10685 +
10686 +/*
10687 + * BKL held by caller.
10688 + * dentry->d_inode->i_mutex locked
10689 + */
10690 +ssize_t unionfs_getxattr(struct dentry *dentry, const char *name, void *value,
10691 + size_t size)
10692 +{
10693 + struct dentry *lower_dentry = NULL;
10694 + int err = -EOPNOTSUPP;
10695 +
10696 + unionfs_read_lock(dentry->d_sb);
10697 + unionfs_lock_dentry(dentry);
10698 +
10699 + if (!__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
10700 + err = -ESTALE;
10701 + goto out;
10702 + }
10703 +
10704 + lower_dentry = unionfs_lower_dentry(dentry);
10705 +
10706 + err = vfs_getxattr(lower_dentry, (char*) name, value, size);
10707 +
10708 +out:
10709 + unionfs_unlock_dentry(dentry);
10710 + unionfs_check_dentry(dentry);
10711 + unionfs_read_unlock(dentry->d_sb);
10712 + return err;
10713 +}
10714 +
10715 +/*
10716 + * BKL held by caller.
10717 + * dentry->d_inode->i_mutex locked
10718 + */
10719 +int unionfs_setxattr(struct dentry *dentry, const char *name,
10720 + const void *value, size_t size, int flags)
10721 +{
10722 + struct dentry *lower_dentry = NULL;
10723 + int err = -EOPNOTSUPP;
10724 +
10725 + unionfs_read_lock(dentry->d_sb);
10726 + unionfs_lock_dentry(dentry);
10727 +
10728 + if (!__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
10729 + err = -ESTALE;
10730 + goto out;
10731 + }
10732 +
10733 + lower_dentry = unionfs_lower_dentry(dentry);
10734 +
10735 + err = vfs_setxattr(lower_dentry, (char*) name, (void*) value,
10736 + size, flags);
10737 +
10738 +out:
10739 + unionfs_unlock_dentry(dentry);
10740 + unionfs_check_dentry(dentry);
10741 + unionfs_read_unlock(dentry->d_sb);
10742 + return err;
10743 +}
10744 +
10745 +/*
10746 + * BKL held by caller.
10747 + * dentry->d_inode->i_mutex locked
10748 + */
10749 +int unionfs_removexattr(struct dentry *dentry, const char *name)
10750 +{
10751 + struct dentry *lower_dentry = NULL;
10752 + int err = -EOPNOTSUPP;
10753 +
10754 + unionfs_read_lock(dentry->d_sb);
10755 + unionfs_lock_dentry(dentry);
10756 +
10757 + if (!__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
10758 + err = -ESTALE;
10759 + goto out;
10760 + }
10761 +
10762 + lower_dentry = unionfs_lower_dentry(dentry);
10763 +
10764 + err = vfs_removexattr(lower_dentry, (char*) name);
10765 +
10766 +out:
10767 + unionfs_unlock_dentry(dentry);
10768 + unionfs_check_dentry(dentry);
10769 + unionfs_read_unlock(dentry->d_sb);
10770 + return err;
10771 +}
10772 +
10773 +/*
10774 + * BKL held by caller.
10775 + * dentry->d_inode->i_mutex locked
10776 + */
10777 +ssize_t unionfs_listxattr(struct dentry *dentry, char *list, size_t size)
10778 +{
10779 + struct dentry *lower_dentry = NULL;
10780 + int err = -EOPNOTSUPP;
10781 + char *encoded_list = NULL;
10782 +
10783 + unionfs_read_lock(dentry->d_sb);
10784 + unionfs_lock_dentry(dentry);
10785 +
10786 + if (!__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
10787 + err = -ESTALE;
10788 + goto out;
10789 + }
10790 +
10791 + lower_dentry = unionfs_lower_dentry(dentry);
10792 +
10793 + encoded_list = list;
10794 + err = vfs_listxattr(lower_dentry, encoded_list, size);
10795 +
10796 +out:
10797 + unionfs_unlock_dentry(dentry);
10798 + unionfs_check_dentry(dentry);
10799 + unionfs_read_unlock(dentry->d_sb);
10800 + return err;
10801 +}
10802 diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
10803 index bb516ce..2599c5b 100644
10804 --- a/include/linux/fs_stack.h
10805 +++ b/include/linux/fs_stack.h
10806 @@ -1,17 +1,28 @@
10807 +/*
10808 + * Copyright (c) 2003-2007 Erez Zadok
10809 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10810 + * Copyright (c) 2003-2007 Stony Brook University
10811 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
10812 + *
10813 + * This program is free software; you can redistribute it and/or modify
10814 + * it under the terms of the GNU General Public License version 2 as
10815 + * published by the Free Software Foundation.
10816 + */
10817 +
10818 #ifndef _LINUX_FS_STACK_H
10819 #define _LINUX_FS_STACK_H
10820
10821 -/* This file defines generic functions used primarily by stackable
10822 +/*
10823 + * This file defines generic functions used primarily by stackable
10824 * filesystems; none of these functions require i_mutex to be held.
10825 */
10826
10827 #include <linux/fs.h>
10828
10829 /* externs for fs/stack.c */
10830 -extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
10831 - int (*get_nlinks)(struct inode *));
10832 -
10833 -extern void fsstack_copy_inode_size(struct inode *dst, const struct inode *src);
10834 +extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src);
10835 +extern void fsstack_copy_inode_size(struct inode *dst,
10836 + const struct inode *src);
10837
10838 /* inlines */
10839 static inline void fsstack_copy_attr_atime(struct inode *dest,
10840 @@ -28,4 +39,4 @@ static inline void fsstack_copy_attr_times(struct inode *dest,
10841 dest->i_ctime = src->i_ctime;
10842 }
10843
10844 -#endif /* _LINUX_FS_STACK_H */
10845 +#endif /* not _LINUX_FS_STACK_H */
10846 diff --git a/include/linux/magic.h b/include/linux/magic.h
10847 index a9c6567..a6751f6 100644
10848 --- a/include/linux/magic.h
10849 +++ b/include/linux/magic.h
10850 @@ -35,6 +35,8 @@
10851 #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs"
10852 #define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs"
10853
10854 +#define UNIONFS_SUPER_MAGIC 0xf15f083d
10855 +
10856 #define SMB_SUPER_MAGIC 0x517B
10857 #define USBDEVICE_SUPER_MAGIC 0x9fa2
10858
10859 diff --git a/include/linux/mm.h b/include/linux/mm.h
10860 index 60e0e4a..c680669 100644
10861 --- a/include/linux/mm.h
10862 +++ b/include/linux/mm.h
10863 @@ -1157,6 +1157,7 @@ int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
10864 void __user *, size_t *, loff_t *);
10865 unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
10866 unsigned long lru_pages);
10867 +extern void drop_pagecache_sb(struct super_block *);
10868 void drop_pagecache(void);
10869 void drop_slab(void);
10870
10871 diff --git a/include/linux/namei.h b/include/linux/namei.h
10872 index d39a5a6..5e4a22d 100644
10873 --- a/include/linux/namei.h
10874 +++ b/include/linux/namei.h
10875 @@ -3,6 +3,7 @@
10876
10877 #include <linux/dcache.h>
10878 #include <linux/linkage.h>
10879 +#include <linux/mount.h>
10880
10881 struct vfsmount;
10882
10883 @@ -47,6 +48,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
10884 * - internal "there are more path compnents" flag
10885 * - locked when lookup done with dcache_lock held
10886 * - dentry cache is untrusted; force a real lookup
10887 + * - lookup path from given dentry/vfsmount pair
10888 */
10889 #define LOOKUP_FOLLOW 1
10890 #define LOOKUP_DIRECTORY 2
10891 @@ -54,6 +56,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
10892 #define LOOKUP_PARENT 16
10893 #define LOOKUP_NOALT 32
10894 #define LOOKUP_REVAL 64
10895 +#define LOOKUP_ONE 128
10896 /*
10897 * Intent data
10898 */
10899 @@ -81,7 +84,14 @@ extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry
10900 extern struct file *nameidata_to_filp(struct nameidata *nd, int flags);
10901 extern void release_open_intent(struct nameidata *);
10902
10903 -extern struct dentry * lookup_one_len(const char *, struct dentry *, int);
10904 +extern struct dentry * lookup_one_len_nd(const char *, struct dentry *,
10905 + int, struct nameidata *);
10906 +
10907 +static inline struct dentry *lookup_one_len(const char *name,
10908 + struct dentry *dir, int len)
10909 +{
10910 + return lookup_one_len_nd(name, dir, len, NULL);
10911 +}
10912
10913 extern int follow_down(struct vfsmount **, struct dentry **);
10914 extern int follow_up(struct vfsmount **, struct dentry **);
10915 @@ -99,4 +109,16 @@ static inline char *nd_get_link(struct nameidata *nd)
10916 return nd->saved_names[nd->depth];
10917 }
10918
10919 +static inline void pathget(struct path *path)
10920 +{
10921 + mntget(path->mnt);
10922 + dget(path->dentry);
10923 +}
10924 +
10925 +static inline void pathput(struct path *path)
10926 +{
10927 + dput(path->dentry);
10928 + mntput(path->mnt);
10929 +}
10930 +
10931 #endif /* _LINUX_NAMEI_H */
10932 diff --git a/include/linux/slab.h b/include/linux/slab.h
10933 index 1ef822e..5f54979 100644
10934 --- a/include/linux/slab.h
10935 +++ b/include/linux/slab.h
10936 @@ -72,6 +72,7 @@ static inline void *kmem_cache_alloc_node(struct kmem_cache *cachep,
10937 */
10938 void *__kmalloc(size_t, gfp_t);
10939 void *__kzalloc(size_t, gfp_t);
10940 +void * __must_check krealloc(const void *, size_t, gfp_t);
10941 void kfree(const void *);
10942 unsigned int ksize(const void *);
10943
10944 diff --git a/include/linux/union_fs.h b/include/linux/union_fs.h
10945 new file mode 100644
10946 index 0000000..d13eb48
10947 --- /dev/null
10948 +++ b/include/linux/union_fs.h
10949 @@ -0,0 +1,25 @@
10950 +/*
10951 + * Copyright (c) 2003-2007 Erez Zadok
10952 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10953 + * Copyright (c) 2003-2007 Stony Brook University
10954 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
10955 + *
10956 + * This program is free software; you can redistribute it and/or modify
10957 + * it under the terms of the GNU General Public License version 2 as
10958 + * published by the Free Software Foundation.
10959 + */
10960 +
10961 +#ifndef _LINUX_UNION_FS_H
10962 +#define _LINUX_UNION_FS_H
10963 +
10964 +/*
10965 + * DEFINITIONS FOR USER AND KERNEL CODE:
10966 + */
10967 +# define UNIONFS_IOCTL_INCGEN _IOR(0x15, 11, int)
10968 +# define UNIONFS_IOCTL_QUERYFILE _IOR(0x15, 15, int)
10969 +
10970 +/* We don't support normal remount, but unionctl uses it. */
10971 +# define UNIONFS_REMOUNT_MAGIC 0x4a5a4380
10972 +
10973 +#endif /* _LINUX_UNIONFS_H */
10974 +
10975 diff --git a/mm/slab.c b/mm/slab.c
10976 index 4cbac24..d288d8c 100644
10977 --- a/mm/slab.c
10978 +++ b/mm/slab.c
10979 @@ -3736,6 +3736,53 @@ void *__kmalloc(size_t size, gfp_t flags)
10980 EXPORT_SYMBOL(__kmalloc);
10981 #endif
10982
10983 +
10984 +/**
10985 + * krealloc - reallocate memory. The contents will remain unchanged.
10986 + * @p: object to reallocate memory for.
10987 + * @new_size: how many bytes of memory are required.
10988 + * @flags: the type of memory to allocate.
10989 + *
10990 + * The contents of the object pointed to are preserved up to the
10991 + * lesser of the new and old sizes. If @p is %NULL, krealloc()
10992 + * behaves exactly like kmalloc(). If @size is 0 and @p is not a
10993 + * %NULL pointer, the object pointed to is freed.
10994 + */
10995 +void *krealloc(const void *p, size_t new_size, gfp_t flags)
10996 +{
10997 + struct kmem_cache *cache, *new_cache;
10998 + void *ret;
10999 +
11000 + if (unlikely(!p))
11001 + return kmalloc_track_caller(new_size, flags);
11002 +
11003 + if (unlikely(!new_size)) {
11004 + kfree(p);
11005 + return NULL;
11006 + }
11007 +
11008 + cache = virt_to_cache(p);
11009 + new_cache = __find_general_cachep(new_size, flags);
11010 +
11011 + /*
11012 + * If new size fits in the current cache, bail out.
11013 + */
11014 + if (likely(cache == new_cache))
11015 + return (void *)p;
11016 +
11017 + /*
11018 + * We are on the slow-path here so do not use __cache_alloc
11019 + * because it bloats kernel text.
11020 + */
11021 + ret = kmalloc_track_caller(new_size, flags);
11022 + if (ret) {
11023 + memcpy(ret, p, min(new_size, ksize(p)));
11024 + kfree(p);
11025 + }
11026 + return ret;
11027 +}
11028 +EXPORT_SYMBOL(krealloc);
11029 +
11030 /**
11031 * kmem_cache_free - Deallocate an object
11032 * @cachep: The cache the allocation was from.
11033 diff --git a/mm/slob.c b/mm/slob.c
11034 index c683d35..fec651b 100644
11035 --- a/mm/slob.c
11036 +++ b/mm/slob.c
11037 @@ -181,6 +181,39 @@ void *__kmalloc(size_t size, gfp_t gfp)
11038 }
11039 EXPORT_SYMBOL(__kmalloc);
11040
11041 +/**
11042 + * krealloc - reallocate memory. The contents will remain unchanged.
11043 + *
11044 + * @p: object to reallocate memory for.
11045 + * @new_size: how many bytes of memory are required.
11046 + * @flags: the type of memory to allocate.
11047 + *
11048 + * The contents of the object pointed to are preserved up to the
11049 + * lesser of the new and old sizes. If @p is %NULL, krealloc()
11050 + * behaves exactly like kmalloc(). If @size is 0 and @p is not a
11051 + * %NULL pointer, the object pointed to is freed.
11052 + */
11053 +void *krealloc(const void *p, size_t new_size, gfp_t flags)
11054 +{
11055 + void *ret;
11056 +
11057 + if (unlikely(!p))
11058 + return kmalloc_track_caller(new_size, flags);
11059 +
11060 + if (unlikely(!new_size)) {
11061 + kfree(p);
11062 + return NULL;
11063 + }
11064 +
11065 + ret = kmalloc_track_caller(new_size, flags);
11066 + if (ret) {
11067 + memcpy(ret, p, min(new_size, ksize(p)));
11068 + kfree(p);
11069 + }
11070 + return ret;
11071 +}
11072 +EXPORT_SYMBOL(krealloc);
11073 +
11074 void kfree(const void *block)
11075 {
11076 bigblock_t *bb, **last = &bigblocks;