Magellan Linux

Annotation of /trunk/kernel26-alx/patches-2.6.21-r13/0153-2.6.21-unionfs-2.1.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 660 - (hide annotations) (download)
Tue Jun 24 08:30:06 2008 UTC (15 years, 11 months ago) by niro
File size: 318435 byte(s)
-falling back to 2.6.21-magellan-r13 base to fix via epia boot quirks

1 niro 660 diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
2     index 5717858..2ef035e 100644
3     --- a/Documentation/filesystems/00-INDEX
4     +++ b/Documentation/filesystems/00-INDEX
5     @@ -84,6 +84,8 @@ udf.txt
6     - info and mount options for the UDF filesystem.
7     ufs.txt
8     - info on the ufs filesystem.
9     +unionfs/
10     + - info on the unionfs filesystem
11     vfat.txt
12     - info on using the VFAT filesystem used in Windows NT and Windows 95
13     vfs.txt
14     diff --git a/Documentation/filesystems/unionfs/00-INDEX b/Documentation/filesystems/unionfs/00-INDEX
15     new file mode 100644
16     index 0000000..96fdf67
17     --- /dev/null
18     +++ b/Documentation/filesystems/unionfs/00-INDEX
19     @@ -0,0 +1,10 @@
20     +00-INDEX
21     + - this file.
22     +concepts.txt
23     + - A brief introduction of concepts.
24     +issues.txt
25     + - A summary of known issues with unionfs.
26     +rename.txt
27     + - Information regarding rename operations.
28     +usage.txt
29     + - Usage information and examples.
30     diff --git a/Documentation/filesystems/unionfs/concepts.txt b/Documentation/filesystems/unionfs/concepts.txt
31     new file mode 100644
32     index 0000000..eb74aac
33     --- /dev/null
34     +++ b/Documentation/filesystems/unionfs/concepts.txt
35     @@ -0,0 +1,181 @@
36     +Unionfs 2.0 CONCEPTS:
37     +=====================
38     +
39     +This file describes the concepts needed by a namespace unification file
40     +system.
41     +
42     +
43     +Branch Priority:
44     +================
45     +
46     +Each branch is assigned a unique priority - starting from 0 (highest
47     +priority). No two branches can have the same priority.
48     +
49     +
50     +Branch Mode:
51     +============
52     +
53     +Each branch is assigned a mode - read-write or read-only. This allows
54     +directories on media mounted read-write to be used in a read-only manner.
55     +
56     +
57     +Whiteouts:
58     +==========
59     +
60     +A whiteout removes a file name from the namespace. Whiteouts are needed when
61     +one attempts to remove a file on a read-only branch.
62     +
63     +Suppose we have a two-branch union, where branch 0 is read-write and branch
64     +1 is read-only. And a file 'foo' on branch 1:
65     +
66     +./b0/
67     +./b1/
68     +./b1/foo
69     +
70     +The unified view would simply be:
71     +
72     +./union/
73     +./union/foo
74     +
75     +Since 'foo' is stored on a read-only branch, it cannot be removed. A
76     +whiteout is used to remove the name 'foo' from the unified namespace. Again,
77     +since branch 1 is read-only, the whiteout cannot be created there. So, we
78     +try on a higher priority (lower numerically) branch and create the whiteout
79     +there.
80     +
81     +./b0/
82     +./b0/.wh.foo
83     +./b1/
84     +./b1/foo
85     +
86     +Later, when Unionfs traverses branches (due to lookup or readdir), it
87     +eliminate 'foo' from the namespace (as well as the whiteout itself.)
88     +
89     +
90     +Duplicate Elimination:
91     +======================
92     +
93     +It is possible for files on different branches to have the same name.
94     +Unionfs then has to select which instance of the file to show to the user.
95     +Given the fact that each branch has a priority associated with it, the
96     +simplest solution is to take the instance from the highest priority
97     +(numerically lowest value) and "hide" the others.
98     +
99     +
100     +Copyup:
101     +=======
102     +
103     +When a change is made to the contents of a file's data or meta-data, they
104     +have to be stored somewhere. The best way is to create a copy of the
105     +original file on a branch that is writable, and then redirect the write
106     +though to this copy. The copy must be made on a higher priority branch so
107     +that lookup and readdir return this newer "version" of the file rather than
108     +the original (see duplicate elimination).
109     +
110     +
111     +Cache Coherency:
112     +================
113     +
114     +Unionfs users often want to be able to modify files and directories directly
115     +on the lower branches, and have those changes be visible at the Unionfs
116     +level. This means that data (e.g., pages) and meta-data (dentries, inodes,
117     +open files, etc.) have to be synchronized between the upper and lower
118     +layers. In other words, the newest changes from a layer below have to be
119     +propagated to the Unionfs layer above. If the two layers are not in sync, a
120     +cache incoherency ensues, which could lead to application failures and even
121     +oopses. The Linux kernel, however, has a rather limited set of mechanisms
122     +to ensure this inter-layer cache coherency---so Unionfs has to do most of
123     +the hard work on its own.
124     +
125     +Maintaining Invariants:
126     +
127     +The way Unionfs ensures cache coherency is as follows. At each entry point
128     +to a Unionfs file system method, we call a utility function to validate the
129     +primary objects of this method. Generally, we call unionfs_file_revalidate
130     +on open files, and __Unionfs_d_revalidate_chain on dentries (which also
131     +validates inodes). These utility functions check to see whether the upper
132     +Unionfs object is in sync with any of the lower objects that it represents.
133     +The checks we perform include whether the Unionfs superblock has a newer
134     +generation number, or if any of the lower objects mtime's or ctime's are
135     +newer. (Note: generation numbers change when branch-management commands are
136     +issued, so in a way, maintaining cache coherency is also very important for
137     +branch-management.) If indeed we determine that any Unionfs object is no
138     +longer in sync with its lower counterparts, then we rebuild that object
139     +similarly to how we do so for branch-management.
140     +
141     +While rebuilding Unionfs's objects, we also purge any page mappings and
142     +truncate inode pages (see fs/Unionfs/dentry.c:purge_inode_data). This is to
143     +ensure that Unionfs will re-get the newer data from the lower branches. We
144     +perform this purging only if the Unionfs operation in question is a reading
145     +operation; if Unionfs is performing a data writing operation (e.g., ->write,
146     +->commit_write, etc.) then we do NOT flush the lower mappings/pages: this is
147     +because (1) a self-deadlock could occur and (2) the upper Unionfs pages are
148     +considered more authoritative anyway, as they are newer and will overwrite
149     +any lower pages.
150     +
151     +Unionfs maintains the following important invariant regarding mtime's,
152     +ctime's, and atime's: the upper inode object's times are the max() of all of
153     +the lower ones. For non-directory objects, there's only one object below,
154     +so the mapping is simple; for directory objects, there could me multiple
155     +lower objects and we have to sync up with the newest one of all the lower
156     +ones. This invariant is important to maintain, especially for directories
157     +(besides, we need this to be POSIX compliant). A union could comprise
158     +multiple writable branches, each of which could change. If we don't reflect
159     +the newest possible mtime/ctime, some applications could fail. For example,
160     +NFSv2/v3 exports check for newer directory mtimes on the server to determine
161     +if the client-side attribute cache should be purged.
162     +
163     +To maintain these important invariants, of course, Unionfs carefully
164     +synchronizes upper and lower times in various places. For example, if we
165     +copy-up a file to a top-level branch, the parent directory where the file
166     +was copied up to will now have a new mtime: so after a successful copy-up,
167     +we sync up with the new top-level branch's parent directory mtime.
168     +
169     +Implementation:
170     +
171     +This cache-coherency implementation is efficient because it defers any
172     +synchronizing between the upper and lower layers until absolutely needed.
173     +Consider the example a common situation where users perform a lot of lower
174     +changes, such as untarring a whole package. While these take place,
175     +typically the user doesn't access the files via Unionfs; only after the
176     +lower changes are done, does the user try to access the lower files. With
177     +our cache-coherency implementation, the entirety of the changes to the lower
178     +branches will not result in a single CPU cycle spent at the Unionfs level
179     +until the user invokes a system call that goes through Unionfs.
180     +
181     +We have considered two alternate cache-coherency designs. (1) Using the
182     +dentry/inode notify functionality to register interest in finding out about
183     +any lower changes. This is a somewhat limited and also a heavy-handed
184     +approach which could result in many notifications to the Unionfs layer upon
185     +each small change at the lower layer (imagine a file being modified multiple
186     +times in rapid succession). (2) Rewriting the VFS to support explicit
187     +callbacks from lower objects to upper objects. We began exploring such an
188     +implementation, but found it to be very complicated--it would have resulted
189     +in massive VFS/MM changes which are unlikely to be accepted by the LKML
190     +community. We therefore believe that our current cache-coherency design and
191     +implementation represent the best approach at this time.
192     +
193     +Limitations:
194     +
195     +Our implementation works in that as long as a user process will have caused
196     +Unionfs to be called, directly or indirectly, even to just do
197     +->d_revalidate; then we will have purged the current Unionfs data and the
198     +process will see the new data. For example, a process that continually
199     +re-reads the same file's data will see the NEW data as soon as the lower
200     +file had changed, upon the next read(2) syscall (even if the file is still
201     +open!) However, this doesn't work when the process re-reads the open file's
202     +data via mmap(2) (unless the user unmaps/closes the file and remaps/reopens
203     +it). Once we respond to ->readpage(s), then the kernel maps the page into
204     +the process's address space and there doesn't appear to be a way to force
205     +the kernel to invalidate those pages/mappings, and force the process to
206     +re-issue ->readpage. If there's a way to invalidate active mappings and
207     +force a ->readpage, let us know please (invalidate_inode_pages2 doesn't do
208     +the trick).
209     +
210     +Our current Unionfs code has to perform many file-revalidation calls. It
211     +would be really nice if the VFS would export an optional file system hook
212     +->file_revalidate (similarly to dentry->d_revalidate) that will be called
213     +before each VFS op that has a "struct file" in it.
214     +
215     +
216     +For more information, see <http://unionfs.filesystems.org/>.
217     diff --git a/Documentation/filesystems/unionfs/issues.txt b/Documentation/filesystems/unionfs/issues.txt
218     new file mode 100644
219     index 0000000..3644fea
220     --- /dev/null
221     +++ b/Documentation/filesystems/unionfs/issues.txt
222     @@ -0,0 +1,15 @@
223     +KNOWN Unionfs 2.0 ISSUES:
224     +=========================
225     +
226     +1. The NFS server returns -EACCES for read-only exports, instead of -EROFS.
227     + This means we can't reliably detect a read-only NFS export.
228     +
229     +2. Unionfs should not use lookup_one_len() on the underlying f/s as it
230     + confuses NFSv4. Currently, unionfs_lookup() passes lookup intents to the
231     + lower file-system, this eliminates part of the problem. The remaining
232     + calls to lookup_one_len may need to be changed to pass an intent. We are
233     + currently introducing VFS changes to fs/namei.c's do_path_lookup() to
234     + allow proper file lookup and opening in stackable file systems.
235     +
236     +
237     +For more information, see <http://unionfs.filesystems.org/>.
238     diff --git a/Documentation/filesystems/unionfs/rename.txt b/Documentation/filesystems/unionfs/rename.txt
239     new file mode 100644
240     index 0000000..e20bb82
241     --- /dev/null
242     +++ b/Documentation/filesystems/unionfs/rename.txt
243     @@ -0,0 +1,31 @@
244     +Rename is a complex beast. The following table shows which rename(2) operations
245     +should succeed and which should fail.
246     +
247     +o: success
248     +E: error (either unionfs or vfs)
249     +X: EXDEV
250     +
251     +none = file does not exist
252     +file = file is a file
253     +dir = file is a empty directory
254     +child= file is a non-empty directory
255     +wh = file is a directory containing only whiteouts; this makes it logically
256     + empty
257     +
258     + none file dir child wh
259     +file o o E E E
260     +dir o E o E o
261     +child X E X E X
262     +wh o E o E o
263     +
264     +
265     +Renaming directories:
266     +=====================
267     +
268     +Whenever a empty (either physically or logically) directory is being renamed,
269     +the following sequence of events should take place:
270     +
271     +1) Remove whiteouts from both source and destination directory
272     +2) Rename source to destination
273     +3) Make destination opaque to prevent anything under it from showing up
274     +
275     diff --git a/Documentation/filesystems/unionfs/usage.txt b/Documentation/filesystems/unionfs/usage.txt
276     new file mode 100644
277     index 0000000..c035243
278     --- /dev/null
279     +++ b/Documentation/filesystems/unionfs/usage.txt
280     @@ -0,0 +1,97 @@
281     +Unionfs is a stackable unification file system, which can appear to merge
282     +the contents of several directories (branches), while keeping their physical
283     +content separate. Unionfs is useful for unified source tree management,
284     +merged contents of split CD-ROM, merged separate software package
285     +directories, data grids, and more. Unionfs allows any mix of read-only and
286     +read-write branches, as well as insertion and deletion of branches anywhere
287     +in the fan-out. To maintain Unix semantics, Unionfs handles elimination of
288     +duplicates, partial-error conditions, and more.
289     +
290     +# mount -t unionfs -o branch-option[,union-options[,...]] none MOUNTPOINT
291     +
292     +The available branch-option for the mount command is:
293     +
294     + dirs=branch[=ro|=rw][:...]
295     +
296     +specifies a separated list of which directories compose the union.
297     +Directories that come earlier in the list have a higher precedence than
298     +those which come later. Additionally, read-only or read-write permissions of
299     +the branch can be specified by appending =ro or =rw (default) to each
300     +directory.
301     +
302     +Syntax:
303     +
304     + dirs=/branch1[=ro|=rw]:/branch2[=ro|=rw]:...:/branchN[=ro|=rw]
305     +
306     +Example:
307     +
308     + dirs=/writable_branch=rw:/read-only_branch=ro
309     +
310     +
311     +DYNAMIC BRANCH MANAGEMENT AND REMOUNTS
312     +======================================
313     +
314     +You can remount a union and change its overall mode, or reconfigure the
315     +branches, as follows.
316     +
317     +To downgrade a union from read-write to read-only:
318     +
319     +# mount -t unionfs -o remount,ro none MOUNTPOINT
320     +
321     +To upgrade a union from read-only to read-write:
322     +
323     +# mount -t unionfs -o remount,rw none MOUNTPOINT
324     +
325     +To delete a branch /foo, regardless where it is in the current union:
326     +
327     +# mount -t unionfs -o remount,del=/foo none MOUNTPOINT
328     +
329     +To insert (add) a branch /foo before /bar:
330     +
331     +# mount -t unionfs -o remount,add=/bar:/foo none MOUNTPOINT
332     +
333     +To insert (add) a branch /foo (with the "rw" mode flag) before /bar:
334     +
335     +# mount -t unionfs -o remount,add=/bar:/foo=rw none MOUNTPOINT
336     +
337     +To insert (add) a branch /foo (in "rw" mode) at the very beginning (i.e., a
338     +new highest-priority branch), you can use the above syntax, or use a short
339     +hand version as follows:
340     +
341     +# mount -t unionfs -o remount,add=/foo none MOUNTPOINT
342     +
343     +To append a branch to the very end (new lowest-priority branch):
344     +
345     +# mount -t unionfs -o remount,add=:/foo none MOUNTPOINT
346     +
347     +To append a branch to the very end (new lowest-priority branch), in
348     +read-only mode:
349     +
350     +# mount -t unionfs -o remount,add=:/foo:ro none MOUNTPOINT
351     +
352     +Finally, to change the mode of one existing branch, say /foo, from read-only
353     +to read-write, and change /bar from read-write to read-only:
354     +
355     +# mount -t unionfs -o remount,mode=/foo=rw,mode=/bar=ro none MOUNTPOINT
356     +
357     +
358     +CACHE CONSISTENCY
359     +=================
360     +
361     +If you modify any file on any of the lower branches directly, while there is
362     +a Unionfs 2.0 mounted above any of those branches, you should tell Unionfs
363     +to purge its caches and re-get the objects. To do that, you have to
364     +increment the generation number of the superblock using the following
365     +command:
366     +
367     +# mount -t unionfs -o remount,incgen none MOUNTPOINT
368     +
369     +Note that the older way of incrementing the generation number using an
370     +ioctl, is no longer supported in Unionfs 2.0. Ioctls in general are not
371     +encouraged. Plus, an ioctl is per-file concept, whereas the generation
372     +number is a per-file-system concept. Worse, such an ioctl requires an open
373     +file, which then has to be invalidated by the very nature of the generation
374     +number increase (read: the old generation increase ioctl was pretty racy).
375     +
376     +
377     +For more information, see <http://unionfs.filesystems.org/>.
378     diff --git a/MAINTAINERS b/MAINTAINERS
379     index 277877a..d694ced 100644
380     --- a/MAINTAINERS
381     +++ b/MAINTAINERS
382     @@ -3364,6 +3364,15 @@ L: linux-kernel@vger.kernel.org
383     W: http://www.kernel.dk
384     S: Maintained
385    
386     +UNIONFS
387     +P: Erez Zadok
388     +M: ezk@cs.sunysb.edu
389     +P: Josef "Jeff" Sipek
390     +M: jsipek@cs.sunysb.edu
391     +L: unionfs@filesystems.org
392     +W: http://unionfs.filesystems.org
393     +S: Maintained
394     +
395     USB ACM DRIVER
396     P: Oliver Neukum
397     M: oliver@neukum.name
398     diff --git a/fs/Kconfig b/fs/Kconfig
399     index 3c4886b..ac4dc6d 100644
400     --- a/fs/Kconfig
401     +++ b/fs/Kconfig
402     @@ -1034,6 +1034,41 @@ config CONFIGFS_FS
403    
404     endmenu
405    
406     +menu "Layered filesystems"
407     +
408     +config ECRYPT_FS
409     + tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
410     + depends on EXPERIMENTAL && KEYS && CRYPTO && NET
411     + help
412     + Encrypted filesystem that operates on the VFS layer. See
413     + <file:Documentation/ecryptfs.txt> to learn more about
414     + eCryptfs. Userspace components are required and can be
415     + obtained from <http://ecryptfs.sf.net>.
416     +
417     + To compile this file system support as a module, choose M here: the
418     + module will be called ecryptfs.
419     +
420     +config UNION_FS
421     + tristate "Union file system (EXPERIMENTAL)"
422     + depends on EXPERIMENTAL
423     + help
424     + Unionfs is a stackable unification file system, which appears to
425     + merge the contents of several directories (branches), while keeping
426     + their physical content separate.
427     +
428     + See <http://unionfs.filesystems.org> for details
429     +
430     +config UNION_FS_XATTR
431     + bool "Unionfs extended attributes"
432     + depends on UNION_FS
433     + help
434     + Extended attributes are name:value pairs associated with inodes by
435     + the kernel or by users (see the attr(5) manual page).
436     +
437     + If unsure, say N.
438     +
439     +endmenu
440     +
441     menu "Miscellaneous filesystems"
442    
443     config ADFS_FS
444     @@ -1086,18 +1121,6 @@ config AFFS_FS
445     To compile this file system support as a module, choose M here: the
446     module will be called affs. If unsure, say N.
447    
448     -config ECRYPT_FS
449     - tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
450     - depends on EXPERIMENTAL && KEYS && CRYPTO && NET
451     - help
452     - Encrypted filesystem that operates on the VFS layer. See
453     - <file:Documentation/ecryptfs.txt> to learn more about
454     - eCryptfs. Userspace components are required and can be
455     - obtained from <http://ecryptfs.sf.net>.
456     -
457     - To compile this file system support as a module, choose M here: the
458     - module will be called ecryptfs.
459     -
460     config HFS_FS
461     tristate "Apple Macintosh file system support (EXPERIMENTAL)"
462     depends on BLOCK && EXPERIMENTAL
463     diff --git a/fs/Makefile b/fs/Makefile
464     index 9edf411..b490b1a 100644
465     --- a/fs/Makefile
466     +++ b/fs/Makefile
467     @@ -114,3 +114,4 @@ obj-$(CONFIG_HPPFS) += hppfs/
468     obj-$(CONFIG_DEBUG_FS) += debugfs/
469     obj-$(CONFIG_OCFS2_FS) += ocfs2/
470     obj-$(CONFIG_GFS2_FS) += gfs2/
471     +obj-$(CONFIG_UNION_FS) += unionfs/
472     diff --git a/fs/drop_caches.c b/fs/drop_caches.c
473     index 03ea769..6a7aa05 100644
474     --- a/fs/drop_caches.c
475     +++ b/fs/drop_caches.c
476     @@ -3,6 +3,7 @@
477     */
478    
479     #include <linux/kernel.h>
480     +#include <linux/module.h>
481     #include <linux/mm.h>
482     #include <linux/fs.h>
483     #include <linux/writeback.h>
484     @@ -12,7 +13,7 @@
485     /* A global variable is a bit ugly, but it keeps the code simple */
486     int sysctl_drop_caches;
487    
488     -static void drop_pagecache_sb(struct super_block *sb)
489     +void drop_pagecache_sb(struct super_block *sb)
490     {
491     struct inode *inode;
492    
493     @@ -24,6 +25,7 @@ static void drop_pagecache_sb(struct super_block *sb)
494     }
495     spin_unlock(&inode_lock);
496     }
497     +EXPORT_SYMBOL(drop_pagecache_sb);
498    
499     void drop_pagecache(void)
500     {
501     diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
502     index cb20b96..a8c1686 100644
503     --- a/fs/ecryptfs/dentry.c
504     +++ b/fs/ecryptfs/dentry.c
505     @@ -62,7 +62,7 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
506     struct inode *lower_inode =
507     ecryptfs_inode_to_lower(dentry->d_inode);
508    
509     - fsstack_copy_attr_all(dentry->d_inode, lower_inode, NULL);
510     + fsstack_copy_attr_all(dentry->d_inode, lower_inode);
511     }
512     out:
513     return rc;
514     diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
515     index 1548be2..d37cc12 100644
516     --- a/fs/ecryptfs/inode.c
517     +++ b/fs/ecryptfs/inode.c
518     @@ -280,7 +280,9 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
519     int rc = 0;
520     struct dentry *lower_dir_dentry;
521     struct dentry *lower_dentry;
522     + struct dentry *dentry_save;
523     struct vfsmount *lower_mnt;
524     + struct vfsmount *mnt_save;
525     char *encoded_name;
526     unsigned int encoded_namelen;
527     struct ecryptfs_crypt_stat *crypt_stat = NULL;
528     @@ -308,9 +310,13 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
529     }
530     ecryptfs_printk(KERN_DEBUG, "encoded_name = [%s]; encoded_namelen "
531     "= [%d]\n", encoded_name, encoded_namelen);
532     - lower_dentry = lookup_one_len(encoded_name, lower_dir_dentry,
533     - encoded_namelen - 1);
534     + dentry_save = nd->dentry;
535     + mnt_save = nd->mnt;
536     + lower_dentry = lookup_one_len_nd(encoded_name, lower_dir_dentry,
537     + (encoded_namelen - 1), nd);
538     kfree(encoded_name);
539     + nd->mnt = mnt_save;
540     + nd->dentry = dentry_save;
541     if (IS_ERR(lower_dentry)) {
542     ecryptfs_printk(KERN_ERR, "ERR from lower_dentry\n");
543     rc = PTR_ERR(lower_dentry);
544     @@ -597,9 +603,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
545     lower_new_dir_dentry->d_inode, lower_new_dentry);
546     if (rc)
547     goto out_lock;
548     - fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL);
549     + fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
550     if (new_dir != old_dir)
551     - fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode, NULL);
552     + fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
553     out_lock:
554     unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
555     dput(lower_new_dentry->d_parent);
556     @@ -892,7 +898,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
557     }
558     rc = notify_change(lower_dentry, ia);
559     out:
560     - fsstack_copy_attr_all(inode, lower_inode, NULL);
561     + fsstack_copy_attr_all(inode, lower_inode);
562     return rc;
563     }
564    
565     diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
566     index fc4a3a2..07c3a58 100644
567     --- a/fs/ecryptfs/main.c
568     +++ b/fs/ecryptfs/main.c
569     @@ -151,7 +151,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
570     d_add(dentry, inode);
571     else
572     d_instantiate(dentry, inode);
573     - fsstack_copy_attr_all(inode, lower_inode, NULL);
574     + fsstack_copy_attr_all(inode, lower_inode);
575     /* This size will be overwritten for real files w/ headers and
576     * other metadata */
577     fsstack_copy_inode_size(inode, lower_inode);
578     diff --git a/fs/namei.c b/fs/namei.c
579     index ee60cc4..436e9fa 100644
580     --- a/fs/namei.c
581     +++ b/fs/namei.c
582     @@ -1125,6 +1125,10 @@ static int fastcall do_path_lookup(int dfd, const char *name,
583     nd->mnt = mntget(fs->rootmnt);
584     nd->dentry = dget(fs->root);
585     read_unlock(&fs->lock);
586     + } else if (flags & LOOKUP_ONE) {
587     + /* nd->mnt and nd->dentry already set, just grab references */
588     + mntget(nd->mnt);
589     + dget(nd->dentry);
590     } else if (dfd == AT_FDCWD) {
591     read_lock(&fs->lock);
592     nd->mnt = mntget(fs->pwdmnt);
593     @@ -1293,29 +1297,37 @@ static struct dentry *lookup_hash(struct nameidata *nd)
594     }
595    
596     /* SMP-safe */
597     -struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
598     +static inline int __lookup_one_len(const char *name, struct qstr *this, struct dentry *base, int len)
599     {
600     unsigned long hash;
601     - struct qstr this;
602     unsigned int c;
603    
604     - this.name = name;
605     - this.len = len;
606     + this->name = name;
607     + this->len = len;
608     if (!len)
609     - goto access;
610     + return -EACCES;
611    
612     hash = init_name_hash();
613     while (len--) {
614     c = *(const unsigned char *)name++;
615     if (c == '/' || c == '\0')
616     - goto access;
617     + return -EACCES;
618     hash = partial_name_hash(c, hash);
619     }
620     - this.hash = end_name_hash(hash);
621     + this->hash = end_name_hash(hash);
622     + return 0;
623     +}
624     +
625     +struct dentry *lookup_one_len_nd(const char *name, struct dentry *base,
626     + int len, struct nameidata *nd)
627     +{
628     + int err;
629     + struct qstr this;
630    
631     - return __lookup_hash(&this, base, NULL);
632     -access:
633     - return ERR_PTR(-EACCES);
634     + err = __lookup_one_len(name, &this, base, len);
635     + if (err)
636     + return ERR_PTR(err);
637     + return __lookup_hash(&this, base, nd);
638     }
639    
640     /*
641     @@ -2758,7 +2770,7 @@ EXPORT_SYMBOL(follow_up);
642     EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
643     EXPORT_SYMBOL(getname);
644     EXPORT_SYMBOL(lock_rename);
645     -EXPORT_SYMBOL(lookup_one_len);
646     +EXPORT_SYMBOL(lookup_one_len_nd);
647     EXPORT_SYMBOL(page_follow_link_light);
648     EXPORT_SYMBOL(page_put_link);
649     EXPORT_SYMBOL(page_readlink);
650     diff --git a/fs/stack.c b/fs/stack.c
651     index 67716f6..56fd0df 100644
652     --- a/fs/stack.c
653     +++ b/fs/stack.c
654     @@ -1,8 +1,20 @@
655     +/*
656     + * Copyright (c) 2003-2007 Erez Zadok
657     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
658     + * Copyright (c) 2003-2007 Stony Brook University
659     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
660     + *
661     + * This program is free software; you can redistribute it and/or modify
662     + * it under the terms of the GNU General Public License version 2 as
663     + * published by the Free Software Foundation.
664     + */
665     +
666     #include <linux/module.h>
667     #include <linux/fs.h>
668     #include <linux/fs_stack.h>
669    
670     -/* does _NOT_ require i_mutex to be held.
671     +/*
672     + * does _NOT_ require i_mutex to be held.
673     *
674     * This function cannot be inlined since i_size_{read,write} is rather
675     * heavy-weight on 32-bit systems
676     @@ -14,11 +26,11 @@ void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
677     }
678     EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
679    
680     -/* copy all attributes; get_nlinks is optional way to override the i_nlink
681     +/*
682     + * copy all attributes; get_nlinks is optional way to override the i_nlink
683     * copying
684     */
685     -void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
686     - int (*get_nlinks)(struct inode *))
687     +void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
688     {
689     dest->i_mode = src->i_mode;
690     dest->i_uid = src->i_uid;
691     @@ -29,14 +41,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
692     dest->i_ctime = src->i_ctime;
693     dest->i_blkbits = src->i_blkbits;
694     dest->i_flags = src->i_flags;
695     -
696     - /*
697     - * Update the nlinks AFTER updating the above fields, because the
698     - * get_links callback may depend on them.
699     - */
700     - if (!get_nlinks)
701     - dest->i_nlink = src->i_nlink;
702     - else
703     - dest->i_nlink = (*get_nlinks)(dest);
704     + dest->i_nlink = src->i_nlink;
705     }
706     EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
707     diff --git a/fs/unionfs/Makefile b/fs/unionfs/Makefile
708     new file mode 100644
709     index 0000000..020b505
710     --- /dev/null
711     +++ b/fs/unionfs/Makefile
712     @@ -0,0 +1,24 @@
713     +UNIONFS_VERSION="2.1 (for 2.6.21.6)"
714     +
715     +EXTRA_CFLAGS += -DUNIONFS_VERSION=\"$(UNIONFS_VERSION)\"
716     +
717     +obj-$(CONFIG_UNION_FS) += unionfs.o
718     +
719     +unionfs-y := subr.o dentry.o file.o inode.o main.o super.o \
720     + rdstate.o copyup.o dirhelper.o rename.o \
721     + unlink.o lookup.o commonfops.o dirfops.o sioq.o \
722     + mmap.o
723     +
724     +unionfs-$(CONFIG_UNION_FS_XATTR) += xattr.o
725     +
726     +# If you want debugging output, please uncomment the following line
727     +# or put your options in a separate file in linux-x.y.z/fs/unionfs/local.mk
728     +#CONFIG_UNIONFS_DEBUG=y
729     +
730     +# Allow users to override debug options in a separate file
731     +-include fs/unionfs/local.mk
732     +
733     +ifeq ($(CONFIG_UNIONFS_DEBUG),y)
734     +unionfs-y += debug.o
735     +EXTRA_CFLAGS += -DUNIONFS_DEBUG=1
736     +endif
737     diff --git a/fs/unionfs/commonfops.c b/fs/unionfs/commonfops.c
738     new file mode 100644
739     index 0000000..d77608e
740     --- /dev/null
741     +++ b/fs/unionfs/commonfops.c
742     @@ -0,0 +1,837 @@
743     +/*
744     + * Copyright (c) 2003-2007 Erez Zadok
745     + * Copyright (c) 2003-2006 Charles P. Wright
746     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
747     + * Copyright (c) 2005-2006 Junjiro Okajima
748     + * Copyright (c) 2005 Arun M. Krishnakumar
749     + * Copyright (c) 2004-2006 David P. Quigley
750     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
751     + * Copyright (c) 2003 Puja Gupta
752     + * Copyright (c) 2003 Harikesavan Krishnan
753     + * Copyright (c) 2003-2007 Stony Brook University
754     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
755     + *
756     + * This program is free software; you can redistribute it and/or modify
757     + * it under the terms of the GNU General Public License version 2 as
758     + * published by the Free Software Foundation.
759     + */
760     +
761     +#include "union.h"
762     +
763     +/*
764     + * 1) Copyup the file
765     + * 2) Rename the file to '.unionfs<original inode#><counter>' - obviously
766     + * stolen from NFS's silly rename
767     + */
768     +static int copyup_deleted_file(struct file *file, struct dentry *dentry,
769     + int bstart, int bindex)
770     +{
771     + static unsigned int counter;
772     + const int i_inosize = sizeof(dentry->d_inode->i_ino) * 2;
773     + const int countersize = sizeof(counter) * 2;
774     + const int nlen = sizeof(".unionfs") + i_inosize + countersize - 1;
775     + char name[nlen + 1];
776     + int err;
777     + struct dentry *tmp_dentry = NULL;
778     + struct dentry *lower_dentry;
779     + struct dentry *lower_dir_dentry = NULL;
780     +
781     + lower_dentry = unionfs_lower_dentry_idx(dentry, bstart);
782     +
783     + sprintf(name, ".unionfs%*.*lx",
784     + i_inosize, i_inosize, lower_dentry->d_inode->i_ino);
785     +
786     + /*
787     + * Loop, looking for an unused temp name to copyup to.
788     + *
789     + * It's somewhat silly that we look for a free temp tmp name in the
790     + * source branch (bstart) instead of the dest branch (bindex), where
791     + * the final name will be created. We _will_ catch it if somehow
792     + * the name exists in the dest branch, but it'd be nice to catch it
793     + * sooner than later.
794     + */
795     +retry:
796     + tmp_dentry = NULL;
797     + do {
798     + char *suffix = name + nlen - countersize;
799     +
800     + dput(tmp_dentry);
801     + counter++;
802     + sprintf(suffix, "%*.*x", countersize, countersize, counter);
803     +
804     + printk(KERN_DEBUG "unionfs: trying to rename %s to %s\n",
805     + dentry->d_name.name, name);
806     +
807     + tmp_dentry = lookup_one_len(name, lower_dentry->d_parent,
808     + nlen);
809     + if (IS_ERR(tmp_dentry)) {
810     + err = PTR_ERR(tmp_dentry);
811     + goto out;
812     + }
813     + /* don't dput here because of do-while condition eval order */
814     + } while (tmp_dentry->d_inode != NULL); /* need negative dentry */
815     + dput(tmp_dentry);
816     +
817     + err = copyup_named_file(dentry->d_parent->d_inode, file, name, bstart,
818     + bindex, file->f_path.dentry->d_inode->i_size);
819     + if (err) {
820     + if (err == -EEXIST)
821     + goto retry;
822     + goto out;
823     + }
824     +
825     + /* bring it to the same state as an unlinked file */
826     + lower_dentry = unionfs_lower_dentry_idx(dentry, dbstart(dentry));
827     + if (!unionfs_lower_inode_idx(dentry->d_inode, bindex)) {
828     + atomic_inc(&lower_dentry->d_inode->i_count);
829     + unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
830     + lower_dentry->d_inode);
831     + }
832     + lower_dir_dentry = lock_parent(lower_dentry);
833     + err = vfs_unlink(lower_dir_dentry->d_inode, lower_dentry);
834     + unlock_dir(lower_dir_dentry);
835     +
836     +out:
837     + if (!err)
838     + unionfs_check_dentry(dentry);
839     + return err;
840     +}
841     +
842     +/*
843     + * put all references held by upper struct file and free lower file pointer
844     + * array
845     + */
846     +static void cleanup_file(struct file *file)
847     +{
848     + int bindex, bstart, bend;
849     + struct file **lower_files;
850     + struct file *lower_file;
851     + struct super_block *sb = file->f_path.dentry->d_sb;
852     +
853     + lower_files = UNIONFS_F(file)->lower_files;
854     + bstart = fbstart(file);
855     + bend = fbend(file);
856     +
857     + for (bindex = bstart; bindex <= bend; bindex++) {
858     + int i; /* holds (possibly) updated branch index */
859     + int old_bid;
860     +
861     + lower_file = unionfs_lower_file_idx(file, bindex);
862     + if (!lower_file)
863     + continue;
864     +
865     + /*
866     + * Find new index of matching branch with an open
867     + * file, since branches could have been added or
868     + * deleted causing the one with open files to shift.
869     + */
870     + old_bid = UNIONFS_F(file)->saved_branch_ids[bindex];
871     + i = branch_id_to_idx(sb, old_bid);
872     + if (i < 0) {
873     + printk(KERN_ERR "unionfs: no superblock for "
874     + "file %p\n", file);
875     + continue;
876     + }
877     +
878     + /* decrement count of open files */
879     + branchput(sb, i);
880     + /*
881     + * fput will perform an mntput for us on the correct branch.
882     + * Although we're using the file's old branch configuration,
883     + * bindex, which is the old index, correctly points to the
884     + * right branch in the file's branch list. In other words,
885     + * we're going to mntput the correct branch even if branches
886     + * have been added/removed.
887     + */
888     + fput(lower_file);
889     + UNIONFS_F(file)->lower_files[bindex] = NULL;
890     + UNIONFS_F(file)->saved_branch_ids[bindex] = -1;
891     + }
892     +
893     + UNIONFS_F(file)->lower_files = NULL;
894     + kfree(lower_files);
895     + kfree(UNIONFS_F(file)->saved_branch_ids);
896     + /* set to NULL because caller needs to know if to kfree on error */
897     + UNIONFS_F(file)->saved_branch_ids = NULL;
898     +}
899     +
900     +/* open all lower files for a given file */
901     +static int open_all_files(struct file *file)
902     +{
903     + int bindex, bstart, bend, err = 0;
904     + struct file *lower_file;
905     + struct dentry *lower_dentry;
906     + struct dentry *dentry = file->f_path.dentry;
907     + struct super_block *sb = dentry->d_sb;
908     +
909     + bstart = dbstart(dentry);
910     + bend = dbend(dentry);
911     +
912     + for (bindex = bstart; bindex <= bend; bindex++) {
913     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
914     + if (!lower_dentry)
915     + continue;
916     +
917     + dget(lower_dentry);
918     + unionfs_mntget(dentry, bindex);
919     + branchget(sb, bindex);
920     +
921     + lower_file =
922     + dentry_open(lower_dentry,
923     + unionfs_lower_mnt_idx(dentry, bindex),
924     + file->f_flags);
925     + if (IS_ERR(lower_file)) {
926     + err = PTR_ERR(lower_file);
927     + goto out;
928     + } else
929     + unionfs_set_lower_file_idx(file, bindex, lower_file);
930     + }
931     +out:
932     + return err;
933     +}
934     +
935     +/* open the highest priority file for a given upper file */
936     +static int open_highest_file(struct file *file, int willwrite)
937     +{
938     + int bindex, bstart, bend, err = 0;
939     + struct file *lower_file;
940     + struct dentry *lower_dentry;
941     + struct dentry *dentry = file->f_path.dentry;
942     + struct inode *parent_inode = dentry->d_parent->d_inode;
943     + struct super_block *sb = dentry->d_sb;
944     + size_t inode_size = dentry->d_inode->i_size;
945     +
946     + bstart = dbstart(dentry);
947     + bend = dbend(dentry);
948     +
949     + lower_dentry = unionfs_lower_dentry(dentry);
950     + if (willwrite && IS_WRITE_FLAG(file->f_flags) && is_robranch(dentry)) {
951     + for (bindex = bstart - 1; bindex >= 0; bindex--) {
952     + err = copyup_file(parent_inode, file, bstart, bindex,
953     + inode_size);
954     + if (!err)
955     + break;
956     + }
957     + atomic_set(&UNIONFS_F(file)->generation,
958     + atomic_read(&UNIONFS_I(dentry->d_inode)->
959     + generation));
960     + goto out;
961     + }
962     +
963     + dget(lower_dentry);
964     + unionfs_mntget(dentry, bstart);
965     + lower_file = dentry_open(lower_dentry,
966     + unionfs_lower_mnt_idx(dentry, bstart),
967     + file->f_flags);
968     + if (IS_ERR(lower_file)) {
969     + err = PTR_ERR(lower_file);
970     + goto out;
971     + }
972     + branchget(sb, bstart);
973     + unionfs_set_lower_file(file, lower_file);
974     + /* Fix up the position. */
975     + lower_file->f_pos = file->f_pos;
976     +
977     + memcpy(&lower_file->f_ra, &file->f_ra, sizeof(struct file_ra_state));
978     +out:
979     + return err;
980     +}
981     +
982     +/* perform a delayed copyup of a read-write file on a read-only branch */
983     +static int do_delayed_copyup(struct file *file)
984     +{
985     + int bindex, bstart, bend, err = 0;
986     + struct dentry *dentry = file->f_path.dentry;
987     + struct inode *parent_inode = dentry->d_parent->d_inode;
988     + loff_t inode_size = dentry->d_inode->i_size;
989     +
990     + bstart = fbstart(file);
991     + bend = fbend(file);
992     +
993     + BUG_ON(!S_ISREG(dentry->d_inode->i_mode));
994     +
995     + unionfs_check_file(file);
996     + unionfs_check_dentry(dentry);
997     + for (bindex = bstart - 1; bindex >= 0; bindex--) {
998     + if (!d_deleted(dentry))
999     + err = copyup_file(parent_inode, file, bstart,
1000     + bindex, inode_size);
1001     + else
1002     + err = copyup_deleted_file(file, dentry, bstart,
1003     + bindex);
1004     +
1005     + if (!err)
1006     + break;
1007     + }
1008     + if (err || (bstart <= fbstart(file)))
1009     + goto out;
1010     + bend = fbend(file);
1011     + for (bindex = bstart; bindex <= bend; bindex++) {
1012     + if (unionfs_lower_file_idx(file, bindex)) {
1013     + branchput(dentry->d_sb, bindex);
1014     + fput(unionfs_lower_file_idx(file, bindex));
1015     + unionfs_set_lower_file_idx(file, bindex, NULL);
1016     + }
1017     + if (unionfs_lower_mnt_idx(dentry, bindex)) {
1018     + unionfs_mntput(dentry, bindex);
1019     + unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
1020     + }
1021     + if (unionfs_lower_dentry_idx(dentry, bindex)) {
1022     + BUG_ON(!dentry->d_inode);
1023     + iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
1024     + unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
1025     + NULL);
1026     + dput(unionfs_lower_dentry_idx(dentry, bindex));
1027     + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
1028     + }
1029     + }
1030     + /* for reg file, we only open it "once" */
1031     + fbend(file) = fbstart(file);
1032     + set_dbend(dentry, dbstart(dentry));
1033     + ibend(dentry->d_inode) = ibstart(dentry->d_inode);
1034     +
1035     +out:
1036     + unionfs_check_file(file);
1037     + unionfs_check_dentry(dentry);
1038     + return err;
1039     +}
1040     +
1041     +/*
1042     + * Revalidate the struct file
1043     + * @file: file to revalidate
1044     + * @willwrite: 1 if caller may cause changes to the file; 0 otherwise.
1045     + */
1046     +int unionfs_file_revalidate(struct file *file, int willwrite)
1047     +{
1048     + struct super_block *sb;
1049     + struct dentry *dentry;
1050     + int sbgen, fgen, dgen;
1051     + int bstart, bend;
1052     + int size;
1053     + int err = 0;
1054     +
1055     + dentry = file->f_path.dentry;
1056     + unionfs_lock_dentry(dentry);
1057     + sb = dentry->d_sb;
1058     +
1059     + /*
1060     + * First revalidate the dentry inside struct file,
1061     + * but not unhashed dentries.
1062     + */
1063     + if (!d_deleted(dentry) &&
1064     + !__unionfs_d_revalidate_chain(dentry, NULL, willwrite)) {
1065     + err = -ESTALE;
1066     + goto out_nofree;
1067     + }
1068     +
1069     + sbgen = atomic_read(&UNIONFS_SB(sb)->generation);
1070     + dgen = atomic_read(&UNIONFS_D(dentry)->generation);
1071     + fgen = atomic_read(&UNIONFS_F(file)->generation);
1072     +
1073     + BUG_ON(sbgen > dgen);
1074     +
1075     + /*
1076     + * There are two cases we are interested in. The first is if the
1077     + * generation is lower than the super-block. The second is if
1078     + * someone has copied up this file from underneath us, we also need
1079     + * to refresh things.
1080     + */
1081     + if (!d_deleted(dentry) &&
1082     + (sbgen > fgen || dbstart(dentry) != fbstart(file))) {
1083     + int orig_brid = /* save orig branch ID */
1084     + UNIONFS_F(file)->saved_branch_ids[fbstart(file)];
1085     +
1086     + /* First we throw out the existing files. */
1087     + cleanup_file(file);
1088     +
1089     + /* Now we reopen the file(s) as in unionfs_open. */
1090     + bstart = fbstart(file) = dbstart(dentry);
1091     + bend = fbend(file) = dbend(dentry);
1092     +
1093     + size = sizeof(struct file *) * sbmax(sb);
1094     + UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL);
1095     + if (!UNIONFS_F(file)->lower_files) {
1096     + err = -ENOMEM;
1097     + goto out;
1098     + }
1099     + size = sizeof(int) * sbmax(sb);
1100     + UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL);
1101     + if (!UNIONFS_F(file)->saved_branch_ids) {
1102     + err = -ENOMEM;
1103     + goto out;
1104     + }
1105     +
1106     + if (S_ISDIR(dentry->d_inode->i_mode)) {
1107     + /* We need to open all the files. */
1108     + err = open_all_files(file);
1109     + if (err)
1110     + goto out;
1111     + } else {
1112     + int new_brid;
1113     + /* We only open the highest priority branch. */
1114     + err = open_highest_file(file, willwrite);
1115     + if (err)
1116     + goto out;
1117     + new_brid = UNIONFS_F(file)->
1118     + saved_branch_ids[fbstart(file)];
1119     + if (new_brid != orig_brid && sbgen > fgen) {
1120     + /*
1121     + * If we re-opened the file on a different
1122     + * branch than the original one, and this
1123     + * was due to a new branch inserted, then
1124     + * update the mnt counts of the old and new
1125     + * branches accordingly.
1126     + */
1127     + unionfs_mntget(dentry, bstart); /* new branch */
1128     + unionfs_mntput(sb->s_root, /* orig branch */
1129     + branch_id_to_idx(sb, orig_brid));
1130     + }
1131     + }
1132     + atomic_set(&UNIONFS_F(file)->generation,
1133     + atomic_read(&UNIONFS_I(dentry->d_inode)->
1134     + generation));
1135     + }
1136     +
1137     + /* Copyup on the first write to a file on a readonly branch. */
1138     + if (willwrite && IS_WRITE_FLAG(file->f_flags) &&
1139     + !IS_WRITE_FLAG(unionfs_lower_file(file)->f_flags) &&
1140     + is_robranch(dentry)) {
1141     + printk(KERN_DEBUG "unionfs: do delay copyup of \"%s\"\n",
1142     + dentry->d_name.name);
1143     + err = do_delayed_copyup(file);
1144     + }
1145     +
1146     +out:
1147     + if (err) {
1148     + kfree(UNIONFS_F(file)->lower_files);
1149     + kfree(UNIONFS_F(file)->saved_branch_ids);
1150     + }
1151     +out_nofree:
1152     + if (!err)
1153     + unionfs_check_file(file);
1154     + unionfs_unlock_dentry(dentry);
1155     + return err;
1156     +}
1157     +
1158     +/* unionfs_open helper function: open a directory */
1159     +static int __open_dir(struct inode *inode, struct file *file)
1160     +{
1161     + struct dentry *lower_dentry;
1162     + struct file *lower_file;
1163     + int bindex, bstart, bend;
1164     +
1165     + bstart = fbstart(file) = dbstart(file->f_path.dentry);
1166     + bend = fbend(file) = dbend(file->f_path.dentry);
1167     +
1168     + for (bindex = bstart; bindex <= bend; bindex++) {
1169     + lower_dentry =
1170     + unionfs_lower_dentry_idx(file->f_path.dentry, bindex);
1171     + if (!lower_dentry)
1172     + continue;
1173     +
1174     + dget(lower_dentry);
1175     + unionfs_mntget(file->f_path.dentry, bindex);
1176     + lower_file = dentry_open(lower_dentry,
1177     + unionfs_lower_mnt_idx(file->f_path.dentry,
1178     + bindex),
1179     + file->f_flags);
1180     + if (IS_ERR(lower_file))
1181     + return PTR_ERR(lower_file);
1182     +
1183     + unionfs_set_lower_file_idx(file, bindex, lower_file);
1184     +
1185     + /*
1186     + * The branchget goes after the open, because otherwise
1187     + * we would miss the reference on release.
1188     + */
1189     + branchget(inode->i_sb, bindex);
1190     + }
1191     +
1192     + return 0;
1193     +}
1194     +
1195     +/* unionfs_open helper function: open a file */
1196     +static int __open_file(struct inode *inode, struct file *file)
1197     +{
1198     + struct dentry *lower_dentry;
1199     + struct file *lower_file;
1200     + int lower_flags;
1201     + int bindex, bstart, bend;
1202     +
1203     + lower_dentry = unionfs_lower_dentry(file->f_path.dentry);
1204     + lower_flags = file->f_flags;
1205     +
1206     + bstart = fbstart(file) = dbstart(file->f_path.dentry);
1207     + bend = fbend(file) = dbend(file->f_path.dentry);
1208     +
1209     + /*
1210     + * check for the permission for lower file. If the error is
1211     + * COPYUP_ERR, copyup the file.
1212     + */
1213     + if (lower_dentry->d_inode && is_robranch(file->f_path.dentry)) {
1214     + /*
1215     + * if the open will change the file, copy it up otherwise
1216     + * defer it.
1217     + */
1218     + if (lower_flags & O_TRUNC) {
1219     + int size = 0;
1220     + int err = -EROFS;
1221     +
1222     + /* copyup the file */
1223     + for (bindex = bstart - 1; bindex >= 0; bindex--) {
1224     + err = copyup_file(
1225     + file->f_path.dentry->d_parent->d_inode,
1226     + file, bstart, bindex, size);
1227     + if (!err)
1228     + break;
1229     + }
1230     + return err;
1231     + } else
1232     + lower_flags &= ~(OPEN_WRITE_FLAGS);
1233     + }
1234     +
1235     + dget(lower_dentry);
1236     +
1237     + /*
1238     + * dentry_open will decrement mnt refcnt if err.
1239     + * otherwise fput() will do an mntput() for us upon file close.
1240     + */
1241     + unionfs_mntget(file->f_path.dentry, bstart);
1242     + lower_file =
1243     + dentry_open(lower_dentry,
1244     + unionfs_lower_mnt_idx(file->f_path.dentry, bstart),
1245     + lower_flags);
1246     + if (IS_ERR(lower_file))
1247     + return PTR_ERR(lower_file);
1248     +
1249     + unionfs_set_lower_file(file, lower_file);
1250     + branchget(inode->i_sb, bstart);
1251     +
1252     + return 0;
1253     +}
1254     +
1255     +int unionfs_open(struct inode *inode, struct file *file)
1256     +{
1257     + int err = 0;
1258     + struct file *lower_file = NULL;
1259     + struct dentry *dentry = NULL;
1260     + int bindex = 0, bstart = 0, bend = 0;
1261     + int size;
1262     +
1263     + unionfs_read_lock(inode->i_sb);
1264     +
1265     + file->private_data =
1266     + kzalloc(sizeof(struct unionfs_file_info), GFP_KERNEL);
1267     + if (!UNIONFS_F(file)) {
1268     + err = -ENOMEM;
1269     + goto out_nofree;
1270     + }
1271     + fbstart(file) = -1;
1272     + fbend(file) = -1;
1273     + atomic_set(&UNIONFS_F(file)->generation,
1274     + atomic_read(&UNIONFS_I(inode)->generation));
1275     +
1276     + size = sizeof(struct file *) * sbmax(inode->i_sb);
1277     + UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL);
1278     + if (!UNIONFS_F(file)->lower_files) {
1279     + err = -ENOMEM;
1280     + goto out;
1281     + }
1282     + size = sizeof(int) * sbmax(inode->i_sb);
1283     + UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL);
1284     + if (!UNIONFS_F(file)->saved_branch_ids) {
1285     + err = -ENOMEM;
1286     + goto out;
1287     + }
1288     +
1289     + dentry = file->f_path.dentry;
1290     + unionfs_lock_dentry(dentry);
1291     +
1292     + bstart = fbstart(file) = dbstart(dentry);
1293     + bend = fbend(file) = dbend(dentry);
1294     +
1295     + /* increment, so that we can flush appropriately */
1296     + atomic_inc(&UNIONFS_I(dentry->d_inode)->totalopens);
1297     +
1298     + /*
1299     + * open all directories and make the unionfs file struct point to
1300     + * these lower file structs
1301     + */
1302     + if (S_ISDIR(inode->i_mode))
1303     + err = __open_dir(inode, file); /* open a dir */
1304     + else
1305     + err = __open_file(inode, file); /* open a file */
1306     +
1307     + /* freeing the allocated resources, and fput the opened files */
1308     + if (err) {
1309     + atomic_dec(&UNIONFS_I(dentry->d_inode)->totalopens);
1310     + for (bindex = bstart; bindex <= bend; bindex++) {
1311     + lower_file = unionfs_lower_file_idx(file, bindex);
1312     + if (!lower_file)
1313     + continue;
1314     +
1315     + branchput(file->f_path.dentry->d_sb, bindex);
1316     + /* fput calls dput for lower_dentry */
1317     + fput(lower_file);
1318     + }
1319     + }
1320     +
1321     + unionfs_unlock_dentry(dentry);
1322     +
1323     +out:
1324     + if (err) {
1325     + kfree(UNIONFS_F(file)->lower_files);
1326     + kfree(UNIONFS_F(file)->saved_branch_ids);
1327     + kfree(UNIONFS_F(file));
1328     + }
1329     +out_nofree:
1330     + unionfs_read_unlock(inode->i_sb);
1331     + unionfs_check_inode(inode);
1332     + if (!err) {
1333     + unionfs_check_file(file);
1334     + unionfs_check_dentry(file->f_path.dentry->d_parent);
1335     + }
1336     + return err;
1337     +}
1338     +
1339     +/*
1340     + * release all lower object references & free the file info structure
1341     + *
1342     + * No need to grab sb info's rwsem.
1343     + */
1344     +int unionfs_file_release(struct inode *inode, struct file *file)
1345     +{
1346     + struct file *lower_file = NULL;
1347     + struct unionfs_file_info *fileinfo;
1348     + struct unionfs_inode_info *inodeinfo;
1349     + struct super_block *sb = inode->i_sb;
1350     + int bindex, bstart, bend;
1351     + int fgen, err = 0;
1352     +
1353     + unionfs_read_lock(sb);
1354     + /*
1355     + * Yes, we have to revalidate this file even if it's being released.
1356     + * This is important for open-but-unlinked files, as well as mmap
1357     + * support.
1358     + */
1359     + if ((err = unionfs_file_revalidate(file, 1)))
1360     + goto out;
1361     + unionfs_check_file(file);
1362     + fileinfo = UNIONFS_F(file);
1363     + BUG_ON(file->f_path.dentry->d_inode != inode);
1364     + inodeinfo = UNIONFS_I(inode);
1365     +
1366     + /* fput all the lower files */
1367     + fgen = atomic_read(&fileinfo->generation);
1368     + bstart = fbstart(file);
1369     + bend = fbend(file);
1370     +
1371     + for (bindex = bstart; bindex <= bend; bindex++) {
1372     + lower_file = unionfs_lower_file_idx(file, bindex);
1373     +
1374     + if (lower_file) {
1375     + fput(lower_file);
1376     + branchput(sb, bindex);
1377     + }
1378     + }
1379     + kfree(fileinfo->lower_files);
1380     + kfree(fileinfo->saved_branch_ids);
1381     +
1382     + if (fileinfo->rdstate) {
1383     + fileinfo->rdstate->access = jiffies;
1384     + printk(KERN_DEBUG "unionfs: saving rdstate with cookie "
1385     + "%u [%d.%lld]\n",
1386     + fileinfo->rdstate->cookie,
1387     + fileinfo->rdstate->bindex,
1388     + (long long)fileinfo->rdstate->dirpos);
1389     + spin_lock(&inodeinfo->rdlock);
1390     + inodeinfo->rdcount++;
1391     + list_add_tail(&fileinfo->rdstate->cache,
1392     + &inodeinfo->readdircache);
1393     + mark_inode_dirty(inode);
1394     + spin_unlock(&inodeinfo->rdlock);
1395     + fileinfo->rdstate = NULL;
1396     + }
1397     + kfree(fileinfo);
1398     +
1399     +out:
1400     + unionfs_read_unlock(sb);
1401     + return err;
1402     +}
1403     +
1404     +/* pass the ioctl to the lower fs */
1405     +static long do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1406     +{
1407     + struct file *lower_file;
1408     + int err;
1409     +
1410     + lower_file = unionfs_lower_file(file);
1411     +
1412     + err = security_file_ioctl(lower_file, cmd, arg);
1413     + if (err)
1414     + goto out;
1415     +
1416     + err = -ENOTTY;
1417     + if (!lower_file || !lower_file->f_op)
1418     + goto out;
1419     + if (lower_file->f_op->unlocked_ioctl) {
1420     + err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
1421     + } else if (lower_file->f_op->ioctl) {
1422     + lock_kernel();
1423     + err = lower_file->f_op->ioctl(lower_file->f_path.dentry->d_inode,
1424     + lower_file, cmd, arg);
1425     + unlock_kernel();
1426     + }
1427     +
1428     +out:
1429     + return err;
1430     +}
1431     +
1432     +/*
1433     + * return to user-space the branch indices containing the file in question
1434     + *
1435     + * We use fd_set and therefore we are limited to the number of the branches
1436     + * to FD_SETSIZE, which is currently 1024 - plenty for most people
1437     + */
1438     +static int unionfs_ioctl_queryfile(struct file *file, unsigned int cmd,
1439     + unsigned long arg)
1440     +{
1441     + int err = 0;
1442     + fd_set branchlist;
1443     + int bstart = 0, bend = 0, bindex = 0;
1444     + int orig_bstart, orig_bend;
1445     + struct dentry *dentry, *lower_dentry;
1446     + struct vfsmount *mnt;
1447     +
1448     + dentry = file->f_path.dentry;
1449     + unionfs_lock_dentry(dentry);
1450     + orig_bstart = dbstart(dentry);
1451     + orig_bend = dbend(dentry);
1452     + if ((err = unionfs_partial_lookup(dentry)))
1453     + goto out;
1454     + bstart = dbstart(dentry);
1455     + bend = dbend(dentry);
1456     +
1457     + FD_ZERO(&branchlist);
1458     +
1459     + for (bindex = bstart; bindex <= bend; bindex++) {
1460     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
1461     + if (!lower_dentry)
1462     + continue;
1463     + if (lower_dentry->d_inode)
1464     + FD_SET(bindex, &branchlist);
1465     + /* purge any lower objects after partial_lookup */
1466     + if (bindex < orig_bstart || bindex > orig_bend) {
1467     + dput(lower_dentry);
1468     + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
1469     + iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
1470     + unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
1471     + NULL);
1472     + mnt = unionfs_lower_mnt_idx(dentry, bindex);
1473     + if (!mnt)
1474     + continue;
1475     + unionfs_mntput(dentry, bindex);
1476     + unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
1477     + }
1478     + }
1479     + /* restore original dentry's offsets */
1480     + set_dbstart(dentry, orig_bstart);
1481     + set_dbend(dentry, orig_bend);
1482     + ibstart(dentry->d_inode) = orig_bstart;
1483     + ibend(dentry->d_inode) = orig_bend;
1484     +
1485     + err = copy_to_user((void __user *)arg, &branchlist, sizeof(fd_set));
1486     + if (err)
1487     + err = -EFAULT;
1488     +
1489     +out:
1490     + unionfs_unlock_dentry(dentry);
1491     + return err < 0 ? err : bend;
1492     +}
1493     +
1494     +long unionfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1495     +{
1496     + long err;
1497     +
1498     + unionfs_read_lock(file->f_path.dentry->d_sb);
1499     +
1500     + if ((err = unionfs_file_revalidate(file, 1)))
1501     + goto out;
1502     +
1503     + /* check if asked for local commands */
1504     + switch (cmd) {
1505     + case UNIONFS_IOCTL_INCGEN:
1506     + /* Increment the superblock generation count */
1507     + printk("unionfs: incgen ioctl deprecated; "
1508     + "use \"-o remount,incgen\"\n");
1509     + err = -ENOSYS;
1510     + break;
1511     +
1512     + case UNIONFS_IOCTL_QUERYFILE:
1513     + /* Return list of branches containing the given file */
1514     + err = unionfs_ioctl_queryfile(file, cmd, arg);
1515     + break;
1516     +
1517     + default:
1518     + /* pass the ioctl down */
1519     + err = do_ioctl(file, cmd, arg);
1520     + break;
1521     + }
1522     +
1523     +out:
1524     + unionfs_read_unlock(file->f_path.dentry->d_sb);
1525     + unionfs_check_file(file);
1526     + return err;
1527     +}
1528     +
1529     +int unionfs_flush(struct file *file, fl_owner_t id)
1530     +{
1531     + int err = 0;
1532     + struct file *lower_file = NULL;
1533     + struct dentry *dentry = file->f_path.dentry;
1534     + int bindex, bstart, bend;
1535     +
1536     + unionfs_read_lock(dentry->d_sb);
1537     +
1538     + if ((err = unionfs_file_revalidate(file, 1)))
1539     + goto out;
1540     + unionfs_check_file(file);
1541     +
1542     + if (!atomic_dec_and_test(&UNIONFS_I(dentry->d_inode)->totalopens))
1543     + goto out;
1544     +
1545     + unionfs_lock_dentry(dentry);
1546     +
1547     + bstart = fbstart(file);
1548     + bend = fbend(file);
1549     + for (bindex = bstart; bindex <= bend; bindex++) {
1550     + lower_file = unionfs_lower_file_idx(file, bindex);
1551     +
1552     + if (lower_file && lower_file->f_op &&
1553     + lower_file->f_op->flush) {
1554     + err = lower_file->f_op->flush(lower_file, id);
1555     + if (err)
1556     + goto out_lock;
1557     +
1558     + /* if there are no more refs to the dentry, dput it */
1559     + if (d_deleted(dentry)) {
1560     + dput(unionfs_lower_dentry_idx(dentry, bindex));
1561     + unionfs_set_lower_dentry_idx(dentry, bindex,
1562     + NULL);
1563     + }
1564     + }
1565     +
1566     + }
1567     +
1568     + /* on success, update our times */
1569     + unionfs_copy_attr_times(dentry->d_inode);
1570     + /* parent time could have changed too (async) */
1571     + unionfs_copy_attr_times(dentry->d_parent->d_inode);
1572     +
1573     +out_lock:
1574     + unionfs_unlock_dentry(dentry);
1575     +out:
1576     + unionfs_read_unlock(dentry->d_sb);
1577     + unionfs_check_file(file);
1578     + return err;
1579     +}
1580     diff --git a/fs/unionfs/copyup.c b/fs/unionfs/copyup.c
1581     new file mode 100644
1582     index 0000000..fb7a2de
1583     --- /dev/null
1584     +++ b/fs/unionfs/copyup.c
1585     @@ -0,0 +1,880 @@
1586     +/*
1587     + * Copyright (c) 2003-2007 Erez Zadok
1588     + * Copyright (c) 2003-2006 Charles P. Wright
1589     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
1590     + * Copyright (c) 2005-2006 Junjiro Okajima
1591     + * Copyright (c) 2005 Arun M. Krishnakumar
1592     + * Copyright (c) 2004-2006 David P. Quigley
1593     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
1594     + * Copyright (c) 2003 Puja Gupta
1595     + * Copyright (c) 2003 Harikesavan Krishnan
1596     + * Copyright (c) 2003-2007 Stony Brook University
1597     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
1598     + *
1599     + * This program is free software; you can redistribute it and/or modify
1600     + * it under the terms of the GNU General Public License version 2 as
1601     + * published by the Free Software Foundation.
1602     + */
1603     +
1604     +#include "union.h"
1605     +
1606     +/*
1607     + * For detailed explanation of copyup see:
1608     + * Documentation/filesystems/unionfs/concepts.txt
1609     + */
1610     +
1611     +#ifdef CONFIG_UNION_FS_XATTR
1612     +/* copyup all extended attrs for a given dentry */
1613     +static int copyup_xattrs(struct dentry *old_lower_dentry,
1614     + struct dentry *new_lower_dentry)
1615     +{
1616     + int err = 0;
1617     + ssize_t list_size = -1;
1618     + char *name_list = NULL;
1619     + char *attr_value = NULL;
1620     + char *name_list_orig = NULL;
1621     +
1622     + /* query the actual size of the xattr list */
1623     + list_size = vfs_listxattr(old_lower_dentry, NULL, 0);
1624     + if (list_size <= 0) {
1625     + err = list_size;
1626     + goto out;
1627     + }
1628     +
1629     + /* allocate space for the actual list */
1630     + name_list = unionfs_xattr_alloc(list_size + 1, XATTR_LIST_MAX);
1631     + if (!name_list || IS_ERR(name_list)) {
1632     + err = PTR_ERR(name_list);
1633     + goto out;
1634     + }
1635     +
1636     + name_list_orig = name_list; /* save for kfree at end */
1637     +
1638     + /* now get the actual xattr list of the source file */
1639     + list_size = vfs_listxattr(old_lower_dentry, name_list, list_size);
1640     + if (list_size <= 0) {
1641     + err = list_size;
1642     + goto out;
1643     + }
1644     +
1645     + /* allocate space to hold each xattr's value */
1646     + attr_value = unionfs_xattr_alloc(XATTR_SIZE_MAX, XATTR_SIZE_MAX);
1647     + if (!attr_value || IS_ERR(attr_value)) {
1648     + err = PTR_ERR(name_list);
1649     + goto out;
1650     + }
1651     +
1652     + /* in a loop, get and set each xattr from src to dst file */
1653     + while (*name_list) {
1654     + ssize_t size;
1655     +
1656     + /* Lock here since vfs_getxattr doesn't lock for us */
1657     + mutex_lock(&old_lower_dentry->d_inode->i_mutex);
1658     + size = vfs_getxattr(old_lower_dentry, name_list,
1659     + attr_value, XATTR_SIZE_MAX);
1660     + mutex_unlock(&old_lower_dentry->d_inode->i_mutex);
1661     + if (size < 0) {
1662     + err = size;
1663     + goto out;
1664     + }
1665     + if (size > XATTR_SIZE_MAX) {
1666     + err = -E2BIG;
1667     + goto out;
1668     + }
1669     + /* Don't lock here since vfs_setxattr does it for us. */
1670     + err = vfs_setxattr(new_lower_dentry, name_list, attr_value,
1671     + size, 0);
1672     + if (err < 0)
1673     + goto out;
1674     + name_list += strlen(name_list) + 1;
1675     + }
1676     +out:
1677     + if (name_list_orig)
1678     + kfree(name_list_orig);
1679     + if (attr_value)
1680     + kfree(attr_value);
1681     + /*
1682     + * Ignore if xattr isn't supported. Also ignore EPERM because that
1683     + * requires CAP_SYS_ADMIN for security.* xattrs, but copyup happens
1684     + * as normal users.
1685     + */
1686     + if (err == -ENOTSUPP || err == -EOPNOTSUPP || err == -EPERM)
1687     + err = 0;
1688     + return err;
1689     +}
1690     +#endif /* CONFIG_UNION_FS_XATTR */
1691     +
1692     +/*
1693     + * Determine the mode based on the copyup flags, and the existing dentry.
1694     + *
1695     + * Handle file systems which may not support certain options. For example
1696     + * jffs2 doesn't allow one to chmod a symlink. So we ignore such harmless
1697     + * errors, rather than propagating them up, which results in copyup errors
1698     + * and errors returned back to users.
1699     + */
1700     +static int copyup_permissions(struct super_block *sb,
1701     + struct dentry *old_lower_dentry,
1702     + struct dentry *new_lower_dentry)
1703     +{
1704     + struct inode *i = old_lower_dentry->d_inode;
1705     + struct iattr newattrs;
1706     + int err;
1707     +
1708     + newattrs.ia_atime = i->i_atime;
1709     + newattrs.ia_mtime = i->i_mtime;
1710     + newattrs.ia_ctime = i->i_ctime;
1711     + newattrs.ia_gid = i->i_gid;
1712     + newattrs.ia_uid = i->i_uid;
1713     + newattrs.ia_valid = ATTR_CTIME | ATTR_ATIME | ATTR_MTIME |
1714     + ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_FORCE |
1715     + ATTR_GID | ATTR_UID;
1716     + err = notify_change(new_lower_dentry, &newattrs);
1717     + if (err)
1718     + goto out;
1719     +
1720     + /* now try to change the mode and ignore EOPNOTSUPP on symlinks */
1721     + newattrs.ia_mode = i->i_mode;
1722     + newattrs.ia_valid = ATTR_MODE | ATTR_FORCE;
1723     + err = notify_change(new_lower_dentry, &newattrs);
1724     + if (err == -EOPNOTSUPP &&
1725     + S_ISLNK(new_lower_dentry->d_inode->i_mode)) {
1726     + printk(KERN_WARNING
1727     + "unionfs: changing \"%s\" symlink mode unsupported\n",
1728     + new_lower_dentry->d_name.name);
1729     + err = 0;
1730     + }
1731     +
1732     +out:
1733     + return err;
1734     +}
1735     +
1736     +/*
1737     + * create the new device/file/directory - use copyup_permission to copyup
1738     + * times, and mode
1739     + *
1740     + * if the object being copied up is a regular file, the file is only created,
1741     + * the contents have to be copied up separately
1742     + */
1743     +static int __copyup_ndentry(struct dentry *old_lower_dentry,
1744     + struct dentry *new_lower_dentry,
1745     + struct dentry *new_lower_parent_dentry,
1746     + char *symbuf)
1747     +{
1748     + int err = 0;
1749     + umode_t old_mode = old_lower_dentry->d_inode->i_mode;
1750     + struct sioq_args args;
1751     +
1752     + if (S_ISDIR(old_mode)) {
1753     + args.mkdir.parent = new_lower_parent_dentry->d_inode;
1754     + args.mkdir.dentry = new_lower_dentry;
1755     + args.mkdir.mode = old_mode;
1756     +
1757     + run_sioq(__unionfs_mkdir, &args);
1758     + err = args.err;
1759     + } else if (S_ISLNK(old_mode)) {
1760     + args.symlink.parent = new_lower_parent_dentry->d_inode;
1761     + args.symlink.dentry = new_lower_dentry;
1762     + args.symlink.symbuf = symbuf;
1763     + args.symlink.mode = old_mode;
1764     +
1765     + run_sioq(__unionfs_symlink, &args);
1766     + err = args.err;
1767     + } else if (S_ISBLK(old_mode) || S_ISCHR(old_mode) ||
1768     + S_ISFIFO(old_mode) || S_ISSOCK(old_mode)) {
1769     + args.mknod.parent = new_lower_parent_dentry->d_inode;
1770     + args.mknod.dentry = new_lower_dentry;
1771     + args.mknod.mode = old_mode;
1772     + args.mknod.dev = old_lower_dentry->d_inode->i_rdev;
1773     +
1774     + run_sioq(__unionfs_mknod, &args);
1775     + err = args.err;
1776     + } else if (S_ISREG(old_mode)) {
1777     + args.create.parent = new_lower_parent_dentry->d_inode;
1778     + args.create.dentry = new_lower_dentry;
1779     + args.create.mode = old_mode;
1780     + args.create.nd = NULL;
1781     +
1782     + run_sioq(__unionfs_create, &args);
1783     + err = args.err;
1784     + } else {
1785     + printk(KERN_ERR "unionfs: unknown inode type %d\n",
1786     + old_mode);
1787     + BUG();
1788     + }
1789     +
1790     + return err;
1791     +}
1792     +
1793     +static int __copyup_reg_data(struct dentry *dentry,
1794     + struct dentry *new_lower_dentry, int new_bindex,
1795     + struct dentry *old_lower_dentry, int old_bindex,
1796     + struct file **copyup_file, loff_t len)
1797     +{
1798     + struct super_block *sb = dentry->d_sb;
1799     + struct file *input_file;
1800     + struct file *output_file;
1801     + struct vfsmount *output_mnt;
1802     + mm_segment_t old_fs;
1803     + char *buf = NULL;
1804     + ssize_t read_bytes, write_bytes;
1805     + loff_t size;
1806     + int err = 0;
1807     +
1808     + /* open old file */
1809     + unionfs_mntget(dentry, old_bindex);
1810     + branchget(sb, old_bindex);
1811     + /* dentry_open calls dput and mntput if it returns an error */
1812     + input_file = dentry_open(old_lower_dentry,
1813     + unionfs_lower_mnt_idx(dentry, old_bindex),
1814     + O_RDONLY | O_LARGEFILE);
1815     + if (IS_ERR(input_file)) {
1816     + dput(old_lower_dentry);
1817     + err = PTR_ERR(input_file);
1818     + goto out;
1819     + }
1820     + if (!input_file->f_op || !input_file->f_op->read) {
1821     + err = -EINVAL;
1822     + goto out_close_in;
1823     + }
1824     +
1825     + /* open new file */
1826     + dget(new_lower_dentry);
1827     + output_mnt = unionfs_mntget(sb->s_root, new_bindex);
1828     + branchget(sb, new_bindex);
1829     + output_file = dentry_open(new_lower_dentry, output_mnt,
1830     + O_RDWR | O_LARGEFILE);
1831     + if (IS_ERR(output_file)) {
1832     + err = PTR_ERR(output_file);
1833     + goto out_close_in2;
1834     + }
1835     + if (!output_file->f_op || !output_file->f_op->write) {
1836     + err = -EINVAL;
1837     + goto out_close_out;
1838     + }
1839     +
1840     + /* allocating a buffer */
1841     + buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
1842     + if (!buf) {
1843     + err = -ENOMEM;
1844     + goto out_close_out;
1845     + }
1846     +
1847     + input_file->f_pos = 0;
1848     + output_file->f_pos = 0;
1849     +
1850     + old_fs = get_fs();
1851     + set_fs(KERNEL_DS);
1852     +
1853     + size = len;
1854     + err = 0;
1855     + do {
1856     + if (len >= PAGE_SIZE)
1857     + size = PAGE_SIZE;
1858     + else if ((len < PAGE_SIZE) && (len > 0))
1859     + size = len;
1860     +
1861     + len -= PAGE_SIZE;
1862     +
1863     + read_bytes =
1864     + input_file->f_op->read(input_file,
1865     + (char __user *)buf, size,
1866     + &input_file->f_pos);
1867     + if (read_bytes <= 0) {
1868     + err = read_bytes;
1869     + break;
1870     + }
1871     +
1872     + write_bytes =
1873     + output_file->f_op->write(output_file,
1874     + (char __user *)buf,
1875     + read_bytes,
1876     + &output_file->f_pos);
1877     + if ((write_bytes < 0) || (write_bytes < read_bytes)) {
1878     + err = write_bytes;
1879     + break;
1880     + }
1881     + } while ((read_bytes > 0) && (len > 0));
1882     +
1883     + set_fs(old_fs);
1884     +
1885     + kfree(buf);
1886     +
1887     + if (!err)
1888     + err = output_file->f_op->fsync(output_file,
1889     + new_lower_dentry, 0);
1890     +
1891     + if (err)
1892     + goto out_close_out;
1893     +
1894     + if (copyup_file) {
1895     + *copyup_file = output_file;
1896     + goto out_close_in;
1897     + }
1898     +
1899     +out_close_out:
1900     + fput(output_file);
1901     +
1902     +out_close_in2:
1903     + branchput(sb, new_bindex);
1904     +
1905     +out_close_in:
1906     + fput(input_file);
1907     +
1908     +out:
1909     + branchput(sb, old_bindex);
1910     +
1911     + return err;
1912     +}
1913     +
1914     +/*
1915     + * dput the lower references for old and new dentry & clear a lower dentry
1916     + * pointer
1917     + */
1918     +static void __clear(struct dentry *dentry, struct dentry *old_lower_dentry,
1919     + int old_bstart, int old_bend,
1920     + struct dentry *new_lower_dentry, int new_bindex)
1921     +{
1922     + /* get rid of the lower dentry and all its traces */
1923     + unionfs_set_lower_dentry_idx(dentry, new_bindex, NULL);
1924     + set_dbstart(dentry, old_bstart);
1925     + set_dbend(dentry, old_bend);
1926     +
1927     + dput(new_lower_dentry);
1928     + dput(old_lower_dentry);
1929     +}
1930     +
1931     +/*
1932     + * Copy up a dentry to a file of specified name.
1933     + *
1934     + * @dir: used to pull the ->i_sb to access other branches
1935     + * @dentry: the non-negative dentry whose lower_inode we should copy
1936     + * @bstart: the branch of the lower_inode to copy from
1937     + * @new_bindex: the branch to create the new file in
1938     + * @name: the name of the file to create
1939     + * @namelen: length of @name
1940     + * @copyup_file: the "struct file" to return (optional)
1941     + * @len: how many bytes to copy-up?
1942     + */
1943     +int copyup_dentry(struct inode *dir, struct dentry *dentry, int bstart,
1944     + int new_bindex, const char *name, int namelen,
1945     + struct file **copyup_file, loff_t len)
1946     +{
1947     + struct dentry *new_lower_dentry;
1948     + struct dentry *old_lower_dentry = NULL;
1949     + struct super_block *sb;
1950     + int err = 0;
1951     + int old_bindex;
1952     + int old_bstart;
1953     + int old_bend;
1954     + struct dentry *new_lower_parent_dentry = NULL;
1955     + mm_segment_t oldfs;
1956     + char *symbuf = NULL;
1957     +
1958     + verify_locked(dentry);
1959     +
1960     + old_bindex = bstart;
1961     + old_bstart = dbstart(dentry);
1962     + old_bend = dbend(dentry);
1963     +
1964     + BUG_ON(new_bindex < 0);
1965     + BUG_ON(new_bindex >= old_bindex);
1966     +
1967     + sb = dir->i_sb;
1968     +
1969     + if ((err = is_robranch_super(sb, new_bindex)))
1970     + goto out;
1971     +
1972     + /* Create the directory structure above this dentry. */
1973     + new_lower_dentry = create_parents(dir, dentry, name, new_bindex);
1974     + if (IS_ERR(new_lower_dentry)) {
1975     + err = PTR_ERR(new_lower_dentry);
1976     + goto out;
1977     + }
1978     +
1979     + old_lower_dentry = unionfs_lower_dentry_idx(dentry, old_bindex);
1980     + /* we conditionally dput this old_lower_dentry at end of function */
1981     + dget(old_lower_dentry);
1982     +
1983     + /* For symlinks, we must read the link before we lock the directory. */
1984     + if (S_ISLNK(old_lower_dentry->d_inode->i_mode)) {
1985     +
1986     + symbuf = kmalloc(PATH_MAX, GFP_KERNEL);
1987     + if (!symbuf) {
1988     + __clear(dentry, old_lower_dentry,
1989     + old_bstart, old_bend,
1990     + new_lower_dentry, new_bindex);
1991     + err = -ENOMEM;
1992     + goto out_free;
1993     + }
1994     +
1995     + oldfs = get_fs();
1996     + set_fs(KERNEL_DS);
1997     + err = old_lower_dentry->d_inode->i_op->readlink(
1998     + old_lower_dentry,
1999     + (char __user *)symbuf,
2000     + PATH_MAX);
2001     + set_fs(oldfs);
2002     + if (err < 0) {
2003     + __clear(dentry, old_lower_dentry,
2004     + old_bstart, old_bend,
2005     + new_lower_dentry, new_bindex);
2006     + goto out_free;
2007     + }
2008     + symbuf[err] = '\0';
2009     + }
2010     +
2011     + /* Now we lock the parent, and create the object in the new branch. */
2012     + new_lower_parent_dentry = lock_parent(new_lower_dentry);
2013     +
2014     + /* create the new inode */
2015     + err = __copyup_ndentry(old_lower_dentry, new_lower_dentry,
2016     + new_lower_parent_dentry, symbuf);
2017     +
2018     + if (err) {
2019     + __clear(dentry, old_lower_dentry,
2020     + old_bstart, old_bend,
2021     + new_lower_dentry, new_bindex);
2022     + goto out_unlock;
2023     + }
2024     +
2025     + /* We actually copyup the file here. */
2026     + if (S_ISREG(old_lower_dentry->d_inode->i_mode))
2027     + err = __copyup_reg_data(dentry, new_lower_dentry, new_bindex,
2028     + old_lower_dentry, old_bindex,
2029     + copyup_file, len);
2030     + if (err)
2031     + goto out_unlink;
2032     +
2033     + /* Set permissions. */
2034     + if ((err = copyup_permissions(sb, old_lower_dentry,
2035     + new_lower_dentry)))
2036     + goto out_unlink;
2037     +
2038     +#ifdef CONFIG_UNION_FS_XATTR
2039     + /* Selinux uses extended attributes for permissions. */
2040     + if ((err = copyup_xattrs(old_lower_dentry, new_lower_dentry)))
2041     + goto out_unlink;
2042     +#endif /* CONFIG_UNION_FS_XATTR */
2043     +
2044     + /* do not allow files getting deleted to be re-interposed */
2045     + if (!d_deleted(dentry))
2046     + unionfs_reinterpose(dentry);
2047     +
2048     + goto out_unlock;
2049     +
2050     +out_unlink:
2051     + /*
2052     + * copyup failed, because we possibly ran out of space or
2053     + * quota, or something else happened so let's unlink; we don't
2054     + * really care about the return value of vfs_unlink
2055     + */
2056     + vfs_unlink(new_lower_parent_dentry->d_inode, new_lower_dentry);
2057     +
2058     + if (copyup_file) {
2059     + /* need to close the file */
2060     +
2061     + fput(*copyup_file);
2062     + branchput(sb, new_bindex);
2063     + }
2064     +
2065     + /*
2066     + * TODO: should we reset the error to something like -EIO?
2067     + *
2068     + * If we don't reset, the user may get some nonsensical errors, but
2069     + * on the other hand, if we reset to EIO, we guarantee that the user
2070     + * will get a "confusing" error message.
2071     + */
2072     +
2073     +out_unlock:
2074     + unlock_dir(new_lower_parent_dentry);
2075     +
2076     +out_free:
2077     + /*
2078     + * If old_lower_dentry was a directory, we need to dput it. If it
2079     + * was a file, then it was already dput indirectly by other
2080     + * functions we call above which operate on regular files.
2081     + */
2082     + if (old_lower_dentry && old_lower_dentry->d_inode &&
2083     + (S_ISDIR(old_lower_dentry->d_inode->i_mode) ||
2084     + S_ISLNK(old_lower_dentry->d_inode->i_mode)))
2085     + dput(old_lower_dentry);
2086     + kfree(symbuf);
2087     +
2088     + if (err)
2089     + goto out;
2090     + if (!S_ISDIR(dentry->d_inode->i_mode)) {
2091     + unionfs_purge_extras(dentry);
2092     + if (!unionfs_lower_inode(dentry->d_inode)) {
2093     + /*
2094     + * If we got here, then we copied up to an
2095     + * unlinked-open file, whose name is .unionfsXXXXX.
2096     + */
2097     + struct inode *inode = new_lower_dentry->d_inode;
2098     + atomic_inc(&inode->i_count);
2099     + unionfs_set_lower_inode_idx(dentry->d_inode,
2100     + ibstart(dentry->d_inode),
2101     + inode);
2102     + }
2103     + }
2104     + unionfs_inherit_mnt(dentry);
2105     + /* sync inode times from copied-up inode to our inode */
2106     + unionfs_copy_attr_times(dentry->d_inode);
2107     + unionfs_check_inode(dir);
2108     + unionfs_check_dentry(dentry);
2109     +out:
2110     + return err;
2111     +}
2112     +
2113     +/*
2114     + * This function creates a copy of a file represented by 'file' which
2115     + * currently resides in branch 'bstart' to branch 'new_bindex.' The copy
2116     + * will be named "name".
2117     + */
2118     +int copyup_named_file(struct inode *dir, struct file *file, char *name,
2119     + int bstart, int new_bindex, loff_t len)
2120     +{
2121     + int err = 0;
2122     + struct file *output_file = NULL;
2123     +
2124     + err = copyup_dentry(dir, file->f_path.dentry, bstart, new_bindex,
2125     + name, strlen(name), &output_file, len);
2126     + if (!err) {
2127     + fbstart(file) = new_bindex;
2128     + unionfs_set_lower_file_idx(file, new_bindex, output_file);
2129     + }
2130     +
2131     + return err;
2132     +}
2133     +
2134     +/*
2135     + * This function creates a copy of a file represented by 'file' which
2136     + * currently resides in branch 'bstart' to branch 'new_bindex'.
2137     + */
2138     +int copyup_file(struct inode *dir, struct file *file, int bstart,
2139     + int new_bindex, loff_t len)
2140     +{
2141     + int err = 0;
2142     + struct file *output_file = NULL;
2143     + struct dentry *dentry = file->f_path.dentry;
2144     +
2145     + err = copyup_dentry(dir, dentry, bstart, new_bindex,
2146     + dentry->d_name.name, dentry->d_name.len,
2147     + &output_file, len);
2148     + if (!err) {
2149     + fbstart(file) = new_bindex;
2150     + unionfs_set_lower_file_idx(file, new_bindex, output_file);
2151     + }
2152     +
2153     + return err;
2154     +}
2155     +
2156     +/* purge a dentry's lower-branch states (dput/mntput, etc.) */
2157     +static void __cleanup_dentry(struct dentry *dentry, int bindex,
2158     + int old_bstart, int old_bend)
2159     +{
2160     + int loop_start;
2161     + int loop_end;
2162     + int new_bstart = -1;
2163     + int new_bend = -1;
2164     + int i;
2165     +
2166     + loop_start = min(old_bstart, bindex);
2167     + loop_end = max(old_bend, bindex);
2168     +
2169     + /*
2170     + * This loop sets the bstart and bend for the new dentry by
2171     + * traversing from left to right. It also dputs all negative
2172     + * dentries except bindex
2173     + */
2174     + for (i = loop_start; i <= loop_end; i++) {
2175     + if (!unionfs_lower_dentry_idx(dentry, i))
2176     + continue;
2177     +
2178     + if (i == bindex) {
2179     + new_bend = i;
2180     + if (new_bstart < 0)
2181     + new_bstart = i;
2182     + continue;
2183     + }
2184     +
2185     + if (!unionfs_lower_dentry_idx(dentry, i)->d_inode) {
2186     + dput(unionfs_lower_dentry_idx(dentry, i));
2187     + unionfs_set_lower_dentry_idx(dentry, i, NULL);
2188     +
2189     + unionfs_mntput(dentry, i);
2190     + unionfs_set_lower_mnt_idx(dentry, i, NULL);
2191     + } else {
2192     + if (new_bstart < 0)
2193     + new_bstart = i;
2194     + new_bend = i;
2195     + }
2196     + }
2197     +
2198     + if (new_bstart < 0)
2199     + new_bstart = bindex;
2200     + if (new_bend < 0)
2201     + new_bend = bindex;
2202     + set_dbstart(dentry, new_bstart);
2203     + set_dbend(dentry, new_bend);
2204     +
2205     +}
2206     +
2207     +/* set lower inode ptr and update bstart & bend if necessary */
2208     +static void __set_inode(struct dentry *upper, struct dentry *lower,
2209     + int bindex)
2210     +{
2211     + unionfs_set_lower_inode_idx(upper->d_inode, bindex,
2212     + igrab(lower->d_inode));
2213     + if (likely(ibstart(upper->d_inode) > bindex))
2214     + ibstart(upper->d_inode) = bindex;
2215     + if (likely(ibend(upper->d_inode) < bindex))
2216     + ibend(upper->d_inode) = bindex;
2217     +
2218     +}
2219     +
2220     +/* set lower dentry ptr and update bstart & bend if necessary */
2221     +static void __set_dentry(struct dentry *upper, struct dentry *lower,
2222     + int bindex)
2223     +{
2224     + unionfs_set_lower_dentry_idx(upper, bindex, lower);
2225     + if (likely(dbstart(upper) > bindex))
2226     + set_dbstart(upper, bindex);
2227     + if (likely(dbend(upper) < bindex))
2228     + set_dbend(upper, bindex);
2229     +}
2230     +
2231     +/*
2232     + * This function replicates the directory structure up-to given dentry
2233     + * in the bindex branch.
2234     + */
2235     +struct dentry *create_parents(struct inode *dir, struct dentry *dentry,
2236     + const char *name, int bindex)
2237     +{
2238     + int err;
2239     + struct dentry *child_dentry;
2240     + struct dentry *parent_dentry;
2241     + struct dentry *lower_parent_dentry = NULL;
2242     + struct dentry *lower_dentry = NULL;
2243     + const char *childname;
2244     + unsigned int childnamelen;
2245     + int nr_dentry;
2246     + int count = 0;
2247     + int old_bstart;
2248     + int old_bend;
2249     + struct dentry **path = NULL;
2250     + struct super_block *sb;
2251     +
2252     + verify_locked(dentry);
2253     +
2254     + if ((err = is_robranch_super(dir->i_sb, bindex))) {
2255     + lower_dentry = ERR_PTR(err);
2256     + goto out;
2257     + }
2258     +
2259     + old_bstart = dbstart(dentry);
2260     + old_bend = dbend(dentry);
2261     +
2262     + lower_dentry = ERR_PTR(-ENOMEM);
2263     +
2264     + /* There is no sense allocating any less than the minimum. */
2265     + nr_dentry = 1;
2266     + path = kmalloc(nr_dentry * sizeof(struct dentry *), GFP_KERNEL);
2267     + if (!path)
2268     + goto out;
2269     +
2270     + /* assume the negative dentry of unionfs as the parent dentry */
2271     + parent_dentry = dentry;
2272     +
2273     + /*
2274     + * This loop finds the first parent that exists in the given branch.
2275     + * We start building the directory structure from there. At the end
2276     + * of the loop, the following should hold:
2277     + * - child_dentry is the first nonexistent child
2278     + * - parent_dentry is the first existent parent
2279     + * - path[0] is the = deepest child
2280     + * - path[count] is the first child to create
2281     + */
2282     + do {
2283     + child_dentry = parent_dentry;
2284     +
2285     + /* find the parent directory dentry in unionfs */
2286     + parent_dentry = child_dentry->d_parent;
2287     + unionfs_lock_dentry(parent_dentry);
2288     +
2289     + /* find out the lower_parent_dentry in the given branch */
2290     + lower_parent_dentry =
2291     + unionfs_lower_dentry_idx(parent_dentry, bindex);
2292     +
2293     + /* grow path table */
2294     + if (count == nr_dentry) {
2295     + void *p;
2296     +
2297     + nr_dentry *= 2;
2298     + p = krealloc(path, nr_dentry * sizeof(struct dentry *),
2299     + GFP_KERNEL);
2300     + if (!p) {
2301     + lower_dentry = ERR_PTR(-ENOMEM);
2302     + goto out;
2303     + }
2304     + path = p;
2305     + }
2306     +
2307     + /* store the child dentry */
2308     + path[count++] = child_dentry;
2309     + } while (!lower_parent_dentry);
2310     + count--;
2311     +
2312     + sb = dentry->d_sb;
2313     +
2314     + /*
2315     + * This code goes between the begin/end labels and basically
2316     + * emulates a while(child_dentry != dentry), only cleaner and
2317     + * shorter than what would be a much longer while loop.
2318     + */
2319     +begin:
2320     + /* get lower parent dir in the current branch */
2321     + lower_parent_dentry = unionfs_lower_dentry_idx(parent_dentry, bindex);
2322     + unionfs_unlock_dentry(parent_dentry);
2323     +
2324     + /* init the values to lookup */
2325     + childname = child_dentry->d_name.name;
2326     + childnamelen = child_dentry->d_name.len;
2327     +
2328     + if (child_dentry != dentry) {
2329     + /* lookup child in the underlying file system */
2330     + lower_dentry = lookup_one_len(childname, lower_parent_dentry,
2331     + childnamelen);
2332     + if (IS_ERR(lower_dentry))
2333     + goto out;
2334     + } else {
2335     + /*
2336     + * Is the name a whiteout of the child name ? lookup the
2337     + * whiteout child in the underlying file system
2338     + */
2339     + lower_dentry = lookup_one_len(name, lower_parent_dentry,
2340     + strlen(name));
2341     + if (IS_ERR(lower_dentry))
2342     + goto out;
2343     +
2344     + /* Replace the current dentry (if any) with the new one */
2345     + dput(unionfs_lower_dentry_idx(dentry, bindex));
2346     + unionfs_set_lower_dentry_idx(dentry, bindex,
2347     + lower_dentry);
2348     +
2349     + __cleanup_dentry(dentry, bindex, old_bstart, old_bend);
2350     + goto out;
2351     + }
2352     +
2353     + if (lower_dentry->d_inode) {
2354     + /*
2355     + * since this already exists we dput to avoid
2356     + * multiple references on the same dentry
2357     + */
2358     + dput(lower_dentry);
2359     + } else {
2360     + struct sioq_args args;
2361     +
2362     + /* it's a negative dentry, create a new dir */
2363     + lower_parent_dentry = lock_parent(lower_dentry);
2364     +
2365     + args.mkdir.parent = lower_parent_dentry->d_inode;
2366     + args.mkdir.dentry = lower_dentry;
2367     + args.mkdir.mode = child_dentry->d_inode->i_mode;
2368     +
2369     + run_sioq(__unionfs_mkdir, &args);
2370     + err = args.err;
2371     +
2372     + if (!err)
2373     + err = copyup_permissions(dir->i_sb, child_dentry,
2374     + lower_dentry);
2375     + unlock_dir(lower_parent_dentry);
2376     + if (err) {
2377     + struct inode *inode = lower_dentry->d_inode;
2378     + /*
2379     + * If we get here, it means that we created a new
2380     + * dentry+inode, but copying permissions failed.
2381     + * Therefore, we should delete this inode and dput
2382     + * the dentry so as not to leave cruft behind.
2383     + */
2384     + if (lower_dentry->d_op && lower_dentry->d_op->d_iput)
2385     + lower_dentry->d_op->d_iput(lower_dentry,
2386     + inode);
2387     + else
2388     + iput(inode);
2389     + lower_dentry->d_inode = NULL;
2390     + dput(lower_dentry);
2391     + lower_dentry = ERR_PTR(err);
2392     + goto out;
2393     + }
2394     +
2395     + }
2396     +
2397     + __set_inode(child_dentry, lower_dentry, bindex);
2398     + __set_dentry(child_dentry, lower_dentry, bindex);
2399     + /*
2400     + * update times of this dentry, but also the parent, because if
2401     + * we changed, the parent may have changed too.
2402     + */
2403     + unionfs_copy_attr_times(parent_dentry->d_inode);
2404     + unionfs_copy_attr_times(child_dentry->d_inode);
2405     +
2406     + parent_dentry = child_dentry;
2407     + child_dentry = path[--count];
2408     + goto begin;
2409     +out:
2410     + /* cleanup any leftover locks from the do/while loop above */
2411     + if (IS_ERR(lower_dentry))
2412     + while (count)
2413     + unionfs_unlock_dentry(path[count--]);
2414     + kfree(path);
2415     + return lower_dentry;
2416     +}
2417     +
2418     +/* set lower mnt of dentry+parents to the first parent node that has an mnt */
2419     +void unionfs_inherit_mnt(struct dentry *dentry)
2420     +{
2421     + struct dentry *parent, *hasone;
2422     + int bindex = dbstart(dentry);
2423     +
2424     + if (unionfs_lower_mnt_idx(dentry, bindex))
2425     + return;
2426     + hasone = dentry->d_parent;
2427     + /* this loop should stop at root dentry */
2428     + while (!unionfs_lower_mnt_idx(hasone, bindex)) {
2429     + hasone = hasone->d_parent;
2430     + }
2431     + parent = dentry;
2432     + while (!unionfs_lower_mnt_idx(parent, bindex)) {
2433     + unionfs_set_lower_mnt_idx(parent, bindex,
2434     + unionfs_mntget(hasone, bindex));
2435     + parent = parent->d_parent;
2436     + }
2437     +}
2438     +
2439     +/*
2440     + * Regular files should have only one lower object(s). On copyup, we may
2441     + * have leftover objects from previous branches. So purge all such extra
2442     + * objects and keep only the most recent, leftmost, copied-up one.
2443     + */
2444     +void unionfs_purge_extras(struct dentry *dentry)
2445     +{
2446     + int bindex;
2447     +
2448     + BUG_ON(S_ISDIR(dentry->d_inode->i_mode));
2449     + for (bindex=dbstart(dentry)+1; bindex<=dbend(dentry); bindex++) {
2450     + if (unionfs_lower_mnt_idx(dentry, bindex)) {
2451     + unionfs_mntput(dentry, bindex);
2452     + unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
2453     + }
2454     + if (unionfs_lower_dentry_idx(dentry, bindex)) {
2455     + dput(unionfs_lower_dentry_idx(dentry, bindex));
2456     + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
2457     + iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
2458     + unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
2459     + NULL);
2460     + }
2461     + }
2462     + bindex = dbstart(dentry);
2463     + set_dbend(dentry, bindex);
2464     + ibend(dentry->d_inode) = ibstart(dentry->d_inode) = bindex;
2465     +}
2466     diff --git a/fs/unionfs/debug.c b/fs/unionfs/debug.c
2467     new file mode 100644
2468     index 0000000..94f0e84
2469     --- /dev/null
2470     +++ b/fs/unionfs/debug.c
2471     @@ -0,0 +1,494 @@
2472     +/*
2473     + * Copyright (c) 2003-2007 Erez Zadok
2474     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
2475     + * Copyright (c) 2003-2007 Stony Brook University
2476     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
2477     + *
2478     + * This program is free software; you can redistribute it and/or modify
2479     + * it under the terms of the GNU General Public License version 2 as
2480     + * published by the Free Software Foundation.
2481     + */
2482     +
2483     +#include "union.h"
2484     +
2485     +/*
2486     + * Helper debugging functions for maintainers (and for users to report back
2487     + * useful information back to maintainers)
2488     + */
2489     +
2490     +/* it's always useful to know what part of the code called us */
2491     +#define PRINT_CALLER() \
2492     + do { \
2493     + if (!printed_caller) { \
2494     + printk("PC:%s:%s:%d\n",fname,fxn,line); \
2495     + printed_caller = 1; \
2496     + } \
2497     + } while (0)
2498     +
2499     +/*
2500     + * __unionfs_check_{inode,dentry,file} perform exhaustive sanity checking on
2501     + * the fan-out of various Unionfs objects. We check that no lower objects
2502     + * exist outside the start/end branch range; that all objects within are
2503     + * non-NULL (with some allowed exceptions); that for every lower file
2504     + * there's a lower dentry+inode; that the start/end ranges match for all
2505     + * corresponding lower objects; that open files/symlinks have only one lower
2506     + * objects, but directories can have several; and more.
2507     + */
2508     +void __unionfs_check_inode(const struct inode *inode,
2509     + const char *fname, const char *fxn, int line)
2510     +{
2511     + int bindex;
2512     + int istart, iend;
2513     + struct inode *lower_inode;
2514     + struct super_block *sb;
2515     + int printed_caller = 0;
2516     +
2517     + /* for inodes now */
2518     + BUG_ON(!inode);
2519     + sb = inode->i_sb;
2520     + istart = ibstart(inode);
2521     + iend = ibend(inode);
2522     + if (istart > iend) {
2523     + PRINT_CALLER();
2524     + printk(" Ci0: inode=%p istart/end=%d:%d\n",
2525     + inode, istart, iend);
2526     + }
2527     + if ((istart == -1 && iend != -1) ||
2528     + (istart != -1 && iend == -1)) {
2529     + PRINT_CALLER();
2530     + printk(" Ci1: inode=%p istart/end=%d:%d\n",
2531     + inode, istart, iend);
2532     + }
2533     + if (!S_ISDIR(inode->i_mode)) {
2534     + if (iend != istart) {
2535     + PRINT_CALLER();
2536     + printk(" Ci2: inode=%p istart=%d iend=%d\n",
2537     + inode, istart, iend);
2538     + }
2539     + }
2540     +
2541     + for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2542     + if (!UNIONFS_I(inode)) {
2543     + PRINT_CALLER();
2544     + printk(" Ci3: no inode_info %p\n", inode);
2545     + return;
2546     + }
2547     + if (!UNIONFS_I(inode)->lower_inodes) {
2548     + PRINT_CALLER();
2549     + printk(" Ci4: no lower_inodes %p\n", inode);
2550     + return;
2551     + }
2552     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2553     + if (lower_inode) {
2554     + if (bindex < istart || bindex > iend) {
2555     + PRINT_CALLER();
2556     + printk(" Ci5: inode/linode=%p:%p bindex=%d "
2557     + "istart/end=%d:%d\n", inode,
2558     + lower_inode, bindex, istart, iend);
2559     + } else if ((int)lower_inode == 0x5a5a5a5a) {
2560     + /* freed inode! */
2561     + PRINT_CALLER();
2562     + printk(" Ci6: inode/linode=%p:%p bindex=%d "
2563     + "istart/end=%d:%d\n", inode,
2564     + lower_inode, bindex, istart, iend);
2565     + }
2566     + } else { /* lower_inode == NULL */
2567     + if (bindex >= istart && bindex <= iend) {
2568     + /*
2569     + * directories can have NULL lower inodes in
2570     + * b/t start/end, but NOT if at the
2571     + * start/end range.
2572     + */
2573     + if (!(S_ISDIR(inode->i_mode) &&
2574     + bindex > istart && bindex < iend)) {
2575     + PRINT_CALLER();
2576     + printk(" Ci7: inode/linode=%p:%p "
2577     + "bindex=%d istart/end=%d:%d\n",
2578     + inode, lower_inode, bindex,
2579     + istart, iend);
2580     + }
2581     + }
2582     + }
2583     + }
2584     +}
2585     +
2586     +void __unionfs_check_dentry(const struct dentry *dentry,
2587     + const char *fname, const char *fxn, int line)
2588     +{
2589     + int bindex;
2590     + int dstart, dend, istart, iend;
2591     + struct dentry *lower_dentry;
2592     + struct inode *inode, *lower_inode;
2593     + struct super_block *sb;
2594     + struct vfsmount *lower_mnt;
2595     + int printed_caller = 0;
2596     +
2597     + BUG_ON(!dentry);
2598     + sb = dentry->d_sb;
2599     + inode = dentry->d_inode;
2600     + dstart = dbstart(dentry);
2601     + dend = dbend(dentry);
2602     + BUG_ON(dstart > dend);
2603     +
2604     + if ((dstart == -1 && dend != -1) ||
2605     + (dstart != -1 && dend == -1)) {
2606     + PRINT_CALLER();
2607     + printk(" CD0: dentry=%p dstart/end=%d:%d\n",
2608     + dentry, dstart, dend);
2609     + }
2610     + /*
2611     + * check for NULL dentries inside the start/end range, or
2612     + * non-NULL dentries outside the start/end range.
2613     + */
2614     + for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2615     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
2616     + if (lower_dentry) {
2617     + if (bindex < dstart || bindex > dend) {
2618     + PRINT_CALLER();
2619     + printk(" CD1: dentry/lower=%p:%p(%p) "
2620     + "bindex=%d dstart/end=%d:%d\n",
2621     + dentry, lower_dentry,
2622     + (lower_dentry ? lower_dentry->d_inode :
2623     + (void *) 0xffffffff),
2624     + bindex, dstart, dend);
2625     + }
2626     + } else { /* lower_dentry == NULL */
2627     + if (bindex >= dstart && bindex <= dend) {
2628     + /*
2629     + * Directories can have NULL lower inodes in
2630     + * b/t start/end, but NOT if at the
2631     + * start/end range. Ignore this rule,
2632     + * however, if this is a NULL dentry or a
2633     + * deleted dentry.
2634     + */
2635     + if (!d_deleted((struct dentry *) dentry) &&
2636     + inode &&
2637     + !(inode && S_ISDIR(inode->i_mode) &&
2638     + bindex > dstart && bindex < dend)) {
2639     + PRINT_CALLER();
2640     + printk(" CD2: dentry/lower=%p:%p(%p) "
2641     + "bindex=%d dstart/end=%d:%d\n",
2642     + dentry, lower_dentry,
2643     + (lower_dentry ?
2644     + lower_dentry->d_inode :
2645     + (void *) 0xffffffff),
2646     + bindex, dstart, dend);
2647     + }
2648     + }
2649     + }
2650     + }
2651     +
2652     + /* check for vfsmounts same as for dentries */
2653     + for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2654     + lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
2655     + if (lower_mnt) {
2656     + if (bindex < dstart || bindex > dend) {
2657     + PRINT_CALLER();
2658     + printk(" CM0: dentry/lmnt=%p:%p bindex=%d "
2659     + "dstart/end=%d:%d\n", dentry,
2660     + lower_mnt, bindex, dstart, dend);
2661     + }
2662     + } else { /* lower_mnt == NULL */
2663     + if (bindex >= dstart && bindex <= dend) {
2664     + /*
2665     + * Directories can have NULL lower inodes in
2666     + * b/t start/end, but NOT if at the
2667     + * start/end range. Ignore this rule,
2668     + * however, if this is a NULL dentry.
2669     + */
2670     + if (inode &&
2671     + !(inode && S_ISDIR(inode->i_mode) &&
2672     + bindex > dstart && bindex < dend)) {
2673     + PRINT_CALLER();
2674     + printk(" CM1: dentry/lmnt=%p:%p "
2675     + "bindex=%d dstart/end=%d:%d\n",
2676     + dentry, lower_mnt, bindex,
2677     + dstart, dend);
2678     + }
2679     + }
2680     + }
2681     + }
2682     +
2683     + /* for inodes now */
2684     + if (!inode)
2685     + return;
2686     + istart = ibstart(inode);
2687     + iend = ibend(inode);
2688     + BUG_ON(istart > iend);
2689     + if ((istart == -1 && iend != -1) ||
2690     + (istart != -1 && iend == -1)) {
2691     + PRINT_CALLER();
2692     + printk(" CI0: dentry/inode=%p:%p istart/end=%d:%d\n",
2693     + dentry, inode, istart, iend);
2694     + }
2695     + if (istart != dstart) {
2696     + PRINT_CALLER();
2697     + printk(" CI1: dentry/inode=%p:%p istart=%d dstart=%d\n",
2698     + dentry, inode, istart, dstart);
2699     + }
2700     + if (iend != dend) {
2701     + PRINT_CALLER();
2702     + printk(" CI2: dentry/inode=%p:%p iend=%d dend=%d\n",
2703     + dentry, inode, iend, dend);
2704     + }
2705     +
2706     + if (!S_ISDIR(inode->i_mode)) {
2707     + if (dend != dstart) {
2708     + PRINT_CALLER();
2709     + printk(" CI3: dentry/inode=%p:%p dstart=%d dend=%d\n",
2710     + dentry, inode, dstart, dend);
2711     + }
2712     + if (iend != istart) {
2713     + PRINT_CALLER();
2714     + printk(" CI4: dentry/inode=%p:%p istart=%d iend=%d\n",
2715     + dentry, inode, istart, iend);
2716     + }
2717     + }
2718     +
2719     + for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2720     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2721     + if (lower_inode) {
2722     + if (bindex < istart || bindex > iend) {
2723     + PRINT_CALLER();
2724     + printk(" CI5: dentry/linode=%p:%p bindex=%d "
2725     + "istart/end=%d:%d\n", dentry,
2726     + lower_inode, bindex, istart, iend);
2727     + } else if ((int)lower_inode == 0x5a5a5a5a) {
2728     + /* freed inode! */
2729     + PRINT_CALLER();
2730     + printk(" CI6: dentry/linode=%p:%p bindex=%d "
2731     + "istart/end=%d:%d\n", dentry,
2732     + lower_inode, bindex, istart, iend);
2733     + }
2734     + } else { /* lower_inode == NULL */
2735     + if (bindex >= istart && bindex <= iend) {
2736     + /*
2737     + * directories can have NULL lower inodes in
2738     + * b/t start/end, but NOT if at the
2739     + * start/end range.
2740     + */
2741     + if (!(S_ISDIR(inode->i_mode) &&
2742     + bindex > istart && bindex < iend)) {
2743     + PRINT_CALLER();
2744     + printk(" CI7: dentry/linode=%p:%p "
2745     + "bindex=%d istart/end=%d:%d\n",
2746     + dentry, lower_inode, bindex,
2747     + istart, iend);
2748     + }
2749     + }
2750     + }
2751     + }
2752     +
2753     + /*
2754     + * If it's a directory, then intermediate objects b/t start/end can
2755     + * be NULL. But, check that all three are NULL: lower dentry, mnt,
2756     + * and inode.
2757     + */
2758     + if (S_ISDIR(inode->i_mode))
2759     + for (bindex = dstart+1; bindex < dend; bindex++) {
2760     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2761     + lower_dentry = unionfs_lower_dentry_idx(dentry,
2762     + bindex);
2763     + lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
2764     + if (!((lower_inode && lower_dentry && lower_mnt) ||
2765     + (!lower_inode && !lower_dentry && !lower_mnt))) {
2766     + PRINT_CALLER();
2767     + printk(" Cx: lmnt/ldentry/linode=%p:%p:%p "
2768     + "bindex=%d dstart/end=%d:%d\n",
2769     + lower_mnt, lower_dentry, lower_inode,
2770     + bindex, dstart, dend);
2771     + }
2772     + }
2773     + /* check if lower inode is newer than upper one (it shouldn't) */
2774     + if (is_newer_lower(dentry)) {
2775     + PRINT_CALLER();
2776     + for (bindex=ibstart(inode); bindex <= ibend(inode); bindex++) {
2777     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2778     + if (!lower_inode)
2779     + continue;
2780     + printk(" CI8: bindex=%d mtime/lmtime=%lu.%lu/%lu.%lu "
2781     + "ctime/lctime=%lu.%lu/%lu.%lu\n",
2782     + bindex,
2783     + inode->i_mtime.tv_sec,
2784     + inode->i_mtime.tv_nsec,
2785     + lower_inode->i_mtime.tv_sec,
2786     + lower_inode->i_mtime.tv_nsec,
2787     + inode->i_ctime.tv_sec,
2788     + inode->i_ctime.tv_nsec,
2789     + lower_inode->i_ctime.tv_sec,
2790     + lower_inode->i_ctime.tv_nsec);
2791     + }
2792     + }
2793     +}
2794     +
2795     +void __unionfs_check_file(const struct file *file,
2796     + const char *fname, const char *fxn, int line)
2797     +{
2798     + int bindex;
2799     + int dstart, dend, fstart, fend;
2800     + struct dentry *dentry;
2801     + struct file *lower_file;
2802     + struct inode *inode;
2803     + struct super_block *sb;
2804     + int printed_caller = 0;
2805     +
2806     + BUG_ON(!file);
2807     + dentry = file->f_path.dentry;
2808     + sb = dentry->d_sb;
2809     + dstart = dbstart(dentry);
2810     + dend = dbend(dentry);
2811     + BUG_ON(dstart > dend);
2812     + fstart = fbstart(file);
2813     + fend = fbend(file);
2814     + BUG_ON(fstart > fend);
2815     +
2816     + if ((fstart == -1 && fend != -1) ||
2817     + (fstart != -1 && fend == -1)) {
2818     + PRINT_CALLER();
2819     + printk(" CF0: file/dentry=%p:%p fstart/end=%d:%d\n",
2820     + file, dentry, fstart, fend);
2821     + }
2822     + if (fstart != dstart) {
2823     + PRINT_CALLER();
2824     + printk(" CF1: file/dentry=%p:%p fstart=%d dstart=%d\n",
2825     + file, dentry, fstart, dstart);
2826     + }
2827     + if (fend != dend) {
2828     + PRINT_CALLER();
2829     + printk(" CF2: file/dentry=%p:%p fend=%d dend=%d\n",
2830     + file, dentry, fend, dend);
2831     + }
2832     + inode = dentry->d_inode;
2833     + if (!S_ISDIR(inode->i_mode)) {
2834     + if (fend != fstart) {
2835     + PRINT_CALLER();
2836     + printk(" CF3: file/inode=%p:%p fstart=%d fend=%d\n",
2837     + file, inode, fstart, fend);
2838     + }
2839     + if (dend != dstart) {
2840     + PRINT_CALLER();
2841     + printk(" CF4: file/dentry=%p:%p dstart=%d dend=%d\n",
2842     + file, dentry, dstart, dend);
2843     + }
2844     + }
2845     +
2846     + /*
2847     + * check for NULL dentries inside the start/end range, or
2848     + * non-NULL dentries outside the start/end range.
2849     + */
2850     + for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2851     + lower_file = unionfs_lower_file_idx(file, bindex);
2852     + if (lower_file) {
2853     + if (bindex < fstart || bindex > fend) {
2854     + PRINT_CALLER();
2855     + printk(" CF5: file/lower=%p:%p bindex=%d "
2856     + "fstart/end=%d:%d\n",
2857     + file, lower_file, bindex, fstart, fend);
2858     + }
2859     + } else { /* lower_file == NULL */
2860     + if (bindex >= fstart && bindex <= fend) {
2861     + /*
2862     + * directories can have NULL lower inodes in
2863     + * b/t start/end, but NOT if at the
2864     + * start/end range.
2865     + */
2866     + if (!(S_ISDIR(inode->i_mode) &&
2867     + bindex > fstart && bindex < fend)) {
2868     + PRINT_CALLER();
2869     + printk(" CF6: file/lower=%p:%p "
2870     + "bindex=%d fstart/end=%d:%d\n",
2871     + file, lower_file, bindex,
2872     + fstart, fend);
2873     + }
2874     + }
2875     + }
2876     + }
2877     +
2878     + __unionfs_check_dentry(dentry,fname,fxn,line);
2879     +}
2880     +
2881     +/* useful to track vfsmount leaks that could cause EBUSY on unmount */
2882     +void __show_branch_counts(const struct super_block *sb,
2883     + const char *file, const char *fxn, int line)
2884     +{
2885     + int i;
2886     + struct vfsmount *mnt;
2887     +
2888     + printk("BC:");
2889     + for (i=0; i<sbmax(sb); i++) {
2890     + if (sb->s_root)
2891     + mnt = UNIONFS_D(sb->s_root)->lower_paths[i].mnt;
2892     + else
2893     + mnt = NULL;
2894     + printk("%d:", (mnt ? atomic_read(&mnt->mnt_count) : -99));
2895     + }
2896     + printk("%s:%s:%d\n",file,fxn,line);
2897     +}
2898     +
2899     +void __show_inode_times(const struct inode *inode,
2900     + const char *file, const char *fxn, int line)
2901     +{
2902     + struct inode *lower_inode;
2903     + int bindex;
2904     +
2905     + for (bindex=ibstart(inode); bindex <= ibend(inode); bindex++) {
2906     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2907     + if (!lower_inode)
2908     + continue;
2909     + printk("IT(%lu:%d): ", inode->i_ino, bindex);
2910     + printk("%s:%s:%d ",file,fxn,line);
2911     + printk("um=%lu/%lu lm=%lu/%lu ",
2912     + inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
2913     + lower_inode->i_mtime.tv_sec,
2914     + lower_inode->i_mtime.tv_nsec);
2915     + printk("uc=%lu/%lu lc=%lu/%lu\n",
2916     + inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
2917     + lower_inode->i_ctime.tv_sec,
2918     + lower_inode->i_ctime.tv_nsec);
2919     + }
2920     +}
2921     +
2922     +void __show_dinode_times(const struct dentry *dentry,
2923     + const char *file, const char *fxn, int line)
2924     +{
2925     + struct inode *inode = dentry->d_inode;
2926     + struct inode *lower_inode;
2927     + int bindex;
2928     +
2929     + for (bindex=ibstart(inode); bindex <= ibend(inode); bindex++) {
2930     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2931     + if (!lower_inode)
2932     + continue;
2933     + printk("DT(%s:%lu:%d): ", dentry->d_name.name, inode->i_ino, bindex);
2934     + printk("%s:%s:%d ",file,fxn,line);
2935     + printk("um=%lu/%lu lm=%lu/%lu ",
2936     + inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
2937     + lower_inode->i_mtime.tv_sec,
2938     + lower_inode->i_mtime.tv_nsec);
2939     + printk("uc=%lu/%lu lc=%lu/%lu\n",
2940     + inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
2941     + lower_inode->i_ctime.tv_sec,
2942     + lower_inode->i_ctime.tv_nsec);
2943     + }
2944     +}
2945     +
2946     +void __show_inode_counts(const struct inode *inode,
2947     + const char *file, const char *fxn, int line)
2948     +{
2949     + struct inode *lower_inode;
2950     + int bindex;
2951     +
2952     + if (!inode) {
2953     + printk("SiC: Null inode\n");
2954     + return;
2955     + }
2956     + for (bindex=sbstart(inode->i_sb); bindex <= sbend(inode->i_sb); bindex++) {
2957     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2958     + if (!lower_inode)
2959     + continue;
2960     + printk("SIC(%lu:%d:%d): ", inode->i_ino, bindex,
2961     + atomic_read(&(inode)->i_count));
2962     + printk("lc=%d ", atomic_read(&(lower_inode)->i_count));
2963     + printk("%s:%s:%d\n",file,fxn,line);
2964     + }
2965     +}
2966     diff --git a/fs/unionfs/dentry.c b/fs/unionfs/dentry.c
2967     new file mode 100644
2968     index 0000000..f3c1258
2969     --- /dev/null
2970     +++ b/fs/unionfs/dentry.c
2971     @@ -0,0 +1,480 @@
2972     +/*
2973     + * Copyright (c) 2003-2007 Erez Zadok
2974     + * Copyright (c) 2003-2006 Charles P. Wright
2975     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
2976     + * Copyright (c) 2005-2006 Junjiro Okajima
2977     + * Copyright (c) 2005 Arun M. Krishnakumar
2978     + * Copyright (c) 2004-2006 David P. Quigley
2979     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
2980     + * Copyright (c) 2003 Puja Gupta
2981     + * Copyright (c) 2003 Harikesavan Krishnan
2982     + * Copyright (c) 2003-2007 Stony Brook University
2983     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
2984     + *
2985     + * This program is free software; you can redistribute it and/or modify
2986     + * it under the terms of the GNU General Public License version 2 as
2987     + * published by the Free Software Foundation.
2988     + */
2989     +
2990     +#include "union.h"
2991     +
2992     +/*
2993     + * Revalidate a single dentry.
2994     + * Assume that dentry's info node is locked.
2995     + * Assume that parent(s) are all valid already, but
2996     + * the child may not yet be valid.
2997     + * Returns 1 if valid, 0 otherwise.
2998     + */
2999     +static int __unionfs_d_revalidate_one(struct dentry *dentry,
3000     + struct nameidata *nd)
3001     +{
3002     + int valid = 1; /* default is valid (1); invalid is 0. */
3003     + struct dentry *lower_dentry;
3004     + int bindex, bstart, bend;
3005     + int sbgen, dgen;
3006     + int positive = 0;
3007     + int locked = 0;
3008     + int interpose_flag;
3009     + struct nameidata lowernd; /* TODO: be gentler to the stack */
3010     +
3011     + if (nd)
3012     + memcpy(&lowernd, nd, sizeof(struct nameidata));
3013     + else
3014     + memset(&lowernd, 0, sizeof(struct nameidata));
3015     +
3016     + verify_locked(dentry);
3017     +
3018     + /* if the dentry is unhashed, do NOT revalidate */
3019     + if (d_deleted(dentry)) {
3020     + printk(KERN_DEBUG "unionfs: unhashed dentry being "
3021     + "revalidated: %*s\n",
3022     + dentry->d_name.len, dentry->d_name.name);
3023     + goto out;
3024     + }
3025     +
3026     + BUG_ON(dbstart(dentry) == -1);
3027     + if (dentry->d_inode)
3028     + positive = 1;
3029     + dgen = atomic_read(&UNIONFS_D(dentry)->generation);
3030     + sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
3031     + /*
3032     + * If we are working on an unconnected dentry, then there is no
3033     + * revalidation to be done, because this file does not exist within
3034     + * the namespace, and Unionfs operates on the namespace, not data.
3035     + */
3036     + if (sbgen != dgen) {
3037     + struct dentry *result;
3038     + int pdgen;
3039     +
3040     + /* The root entry should always be valid */
3041     + BUG_ON(IS_ROOT(dentry));
3042     +
3043     + /* We can't work correctly if our parent isn't valid. */
3044     + pdgen = atomic_read(&UNIONFS_D(dentry->d_parent)->generation);
3045     + BUG_ON(pdgen != sbgen); /* should never happen here */
3046     +
3047     + /* Free the pointers for our inodes and this dentry. */
3048     + bstart = dbstart(dentry);
3049     + bend = dbend(dentry);
3050     + if (bstart >= 0) {
3051     + struct dentry *lower_dentry;
3052     + for (bindex = bstart; bindex <= bend; bindex++) {
3053     + lower_dentry =
3054     + unionfs_lower_dentry_idx(dentry,
3055     + bindex);
3056     + dput(lower_dentry);
3057     + }
3058     + }
3059     + set_dbstart(dentry, -1);
3060     + set_dbend(dentry, -1);
3061     +
3062     + interpose_flag = INTERPOSE_REVAL_NEG;
3063     + if (positive) {
3064     + interpose_flag = INTERPOSE_REVAL;
3065     + /*
3066     + * During BRM, the VFS could already hold a lock on
3067     + * a file being read, so don't lock it again
3068     + * (deadlock), but if you lock it in this function,
3069     + * then release it here too.
3070     + */
3071     + if (!mutex_is_locked(&dentry->d_inode->i_mutex)) {
3072     + mutex_lock(&dentry->d_inode->i_mutex);
3073     + locked = 1;
3074     + }
3075     +
3076     + bstart = ibstart(dentry->d_inode);
3077     + bend = ibend(dentry->d_inode);
3078     + if (bstart >= 0) {
3079     + struct inode *lower_inode;
3080     + for (bindex = bstart; bindex <= bend;
3081     + bindex++) {
3082     + lower_inode =
3083     + unionfs_lower_inode_idx(
3084     + dentry->d_inode,
3085     + bindex);
3086     + iput(lower_inode);
3087     + }
3088     + }
3089     + kfree(UNIONFS_I(dentry->d_inode)->lower_inodes);
3090     + UNIONFS_I(dentry->d_inode)->lower_inodes = NULL;
3091     + ibstart(dentry->d_inode) = -1;
3092     + ibend(dentry->d_inode) = -1;
3093     + if (locked)
3094     + mutex_unlock(&dentry->d_inode->i_mutex);
3095     + }
3096     +
3097     + result = unionfs_lookup_backend(dentry, &lowernd,
3098     + interpose_flag);
3099     + if (result) {
3100     + if (IS_ERR(result)) {
3101     + valid = 0;
3102     + goto out;
3103     + }
3104     + /*
3105     + * current unionfs_lookup_backend() doesn't return
3106     + * a valid dentry
3107     + */
3108     + dput(dentry);
3109     + dentry = result;
3110     + }
3111     +
3112     + if (positive && UNIONFS_I(dentry->d_inode)->stale) {
3113     + make_bad_inode(dentry->d_inode);
3114     + d_drop(dentry);
3115     + valid = 0;
3116     + goto out;
3117     + }
3118     + goto out;
3119     + }
3120     +
3121     + /* The revalidation must occur across all branches */
3122     + bstart = dbstart(dentry);
3123     + bend = dbend(dentry);
3124     + BUG_ON(bstart == -1);
3125     + for (bindex = bstart; bindex <= bend; bindex++) {
3126     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3127     + if (!lower_dentry || !lower_dentry->d_op
3128     + || !lower_dentry->d_op->d_revalidate)
3129     + continue;
3130     + if (!lower_dentry->d_op->d_revalidate(lower_dentry,
3131     + &lowernd))
3132     + valid = 0;
3133     + }
3134     +
3135     + if (!dentry->d_inode)
3136     + valid = 0;
3137     +
3138     + if (valid) {
3139     + /*
3140     + * If we get here, and we copy the meta-data from the lower
3141     + * inode to our inode, then it is vital that we have already
3142     + * purged all unionfs-level file data. We do that in the
3143     + * caller (__unionfs_d_revalidate_chain) by calling
3144     + * purge_inode_data.
3145     + */
3146     + unionfs_copy_attr_all(dentry->d_inode,
3147     + unionfs_lower_inode(dentry->d_inode));
3148     + fsstack_copy_inode_size(dentry->d_inode,
3149     + unionfs_lower_inode(dentry->d_inode));
3150     + }
3151     +
3152     +out:
3153     + return valid;
3154     +}
3155     +
3156     +/*
3157     + * Determine if the lower inode objects have changed from below the unionfs
3158     + * inode. Return 1 if changed, 0 otherwise.
3159     + */
3160     +int is_newer_lower(const struct dentry *dentry)
3161     +{
3162     + int bindex;
3163     + struct inode *inode;
3164     + struct inode *lower_inode;
3165     +
3166     + /* ignore if we're called on semi-initialized dentries/inodes */
3167     + if (!dentry || !UNIONFS_D(dentry))
3168     + return 0;
3169     + inode = dentry->d_inode;
3170     + if (!inode || !UNIONFS_I(inode) ||
3171     + ibstart(inode) < 0 || ibend(inode) < 0)
3172     + return 0;
3173     +
3174     + for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
3175     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
3176     + if (!lower_inode)
3177     + continue;
3178     + /*
3179     + * We may want to apply other tests to determine if the
3180     + * lower inode's data has changed, but checking for changed
3181     + * ctime and mtime on the lower inode should be enough.
3182     + */
3183     + if (timespec_compare(&inode->i_mtime,
3184     + &lower_inode->i_mtime) < 0) {
3185     + printk("unionfs: new lower inode mtime "
3186     + "(bindex=%d, name=%s)\n", bindex,
3187     + dentry->d_name.name);
3188     + show_dinode_times(dentry);
3189     + return 1; /* mtime changed! */
3190     + }
3191     + if (timespec_compare(&inode->i_ctime,
3192     + &lower_inode->i_ctime) < 0) {
3193     + printk("unionfs: new lower inode ctime "
3194     + "(bindex=%d, name=%s)\n", bindex,
3195     + dentry->d_name.name);
3196     + show_dinode_times(dentry);
3197     + return 1; /* ctime changed! */
3198     + }
3199     + }
3200     + return 0; /* default: lower is not newer */
3201     +}
3202     +
3203     +/*
3204     + * Purge/remove/unmap all date pages of a unionfs inode. This is called
3205     + * when the lower inode has changed, and we have to force processes to get
3206     + * the new data.
3207     + *
3208     + * XXX: Our implementation works in that as long as a user process will have
3209     + * caused Unionfs to be called, directly or indirectly, even to just do
3210     + * ->d_revalidate; then we will have purged the current Unionfs data and the
3211     + * process will see the new data. For example, a process that continually
3212     + * re-reads the same file's data will see the NEW data as soon as the lower
3213     + * file had changed, upon the next read(2) syscall (even if the file is
3214     + * still open!) However, this doesn't work when the process re-reads the
3215     + * open file's data via mmap(2) (unless the user unmaps/closes the file and
3216     + * remaps/reopens it). Once we respond to ->readpage(s), then the kernel
3217     + * maps the page into the process's address space and there doesn't appear
3218     + * to be a way to force the kernel to invalidate those pages/mappings, and
3219     + * force the process to re-issue ->readpage. If there's a way to invalidate
3220     + * active mappings and force a ->readpage, let us know please
3221     + * (invalidate_inode_pages2 doesn't do the trick).
3222     + */
3223     +static inline void purge_inode_data(struct dentry *dentry)
3224     +{
3225     + /* remove all non-private mappings */
3226     + unmap_mapping_range(dentry->d_inode->i_mapping, 0, 0, 0);
3227     +
3228     + if (dentry->d_inode->i_data.nrpages)
3229     + truncate_inode_pages(&dentry->d_inode->i_data, 0);
3230     +}
3231     +
3232     +/*
3233     + * Revalidate a parent chain of dentries, then the actual node.
3234     + * Assumes that dentry is locked, but will lock all parents if/when needed.
3235     + *
3236     + * If 'willwrite' is 1, and the lower inode times are not in sync, then
3237     + * *don't* purge_inode_data, as it could deadlock if ->write calls us and we
3238     + * try to truncate a locked page. Besides, if unionfs is about to write
3239     + * data to a file, then there's the data unionfs is about to write is more
3240     + * authoritative than what's below, therefore we can safely overwrite the
3241     + * lower inode times and data.
3242     + */
3243     +int __unionfs_d_revalidate_chain(struct dentry *dentry, struct nameidata *nd,
3244     + int willwrite)
3245     +{
3246     + int valid = 0; /* default is invalid (0); valid is 1. */
3247     + struct dentry **chain = NULL; /* chain of dentries to reval */
3248     + int chain_len = 0;
3249     + struct dentry *dtmp;
3250     + int sbgen, dgen, i;
3251     + int saved_bstart, saved_bend, bindex;
3252     +
3253     + /* find length of chain needed to revalidate */
3254     + /* XXX: should I grab some global (dcache?) lock? */
3255     + chain_len = 0;
3256     + sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
3257     + dtmp = dentry->d_parent;
3258     + dgen = atomic_read(&UNIONFS_D(dtmp)->generation);
3259     + /* XXX: should we check if is_newer_lower all the way up? */
3260     + if (is_newer_lower(dtmp)) {
3261     + /*
3262     + * Special case: the root dentry's generation number must
3263     + * always be valid, but its lower inode times don't have to
3264     + * be, so sync up the times only.
3265     + */
3266     + if (IS_ROOT(dtmp))
3267     + unionfs_copy_attr_times(dtmp->d_inode);
3268     + else {
3269     + /*
3270     + * reset generation number to zero, guaranteed to be
3271     + * "old"
3272     + */
3273     + dgen = 0;
3274     + atomic_set(&UNIONFS_D(dtmp)->generation, dgen);
3275     + }
3276     + purge_inode_data(dtmp);
3277     + }
3278     + while (sbgen != dgen) {
3279     + /* The root entry should always be valid */
3280     + BUG_ON(IS_ROOT(dtmp));
3281     + chain_len++;
3282     + dtmp = dtmp->d_parent;
3283     + dgen = atomic_read(&UNIONFS_D(dtmp)->generation);
3284     + }
3285     + if (chain_len == 0)
3286     + goto out_this; /* shortcut if parents are OK */
3287     +
3288     + /*
3289     + * Allocate array of dentries to reval. We could use linked lists,
3290     + * but the number of entries we need to alloc here is often small,
3291     + * and short lived, so locality will be better.
3292     + */
3293     + chain = kzalloc(chain_len * sizeof(struct dentry *), GFP_KERNEL);
3294     + if (!chain) {
3295     + printk("unionfs: no more memory in %s\n", __FUNCTION__);
3296     + goto out;
3297     + }
3298     +
3299     + /*
3300     + * lock all dentries in chain, in child to parent order.
3301     + * if failed, then sleep for a little, then retry.
3302     + */
3303     + dtmp = dentry->d_parent;
3304     + for (i=chain_len-1; i>=0; i--) {
3305     + chain[i] = dget(dtmp);
3306     + dtmp = dtmp->d_parent;
3307     + }
3308     +
3309     + /*
3310     + * call __unionfs_d_revalidate_one() on each dentry, but in parent
3311     + * to child order.
3312     + */
3313     + for (i=0; i<chain_len; i++) {
3314     + unionfs_lock_dentry(chain[i]);
3315     + saved_bstart = dbstart(chain[i]);
3316     + saved_bend = dbend(chain[i]);
3317     + sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
3318     + dgen = atomic_read(&UNIONFS_D(chain[i])->generation);
3319     +
3320     + valid = __unionfs_d_revalidate_one(chain[i], nd);
3321     + /* XXX: is this the correct mntput condition?! */
3322     + if (valid && chain_len > 0 &&
3323     + sbgen != dgen && chain[i]->d_inode &&
3324     + S_ISDIR(chain[i]->d_inode->i_mode)) {
3325     + for (bindex = saved_bstart; bindex <= saved_bend;
3326     + bindex++)
3327     + unionfs_mntput(chain[i], bindex);
3328     + }
3329     + unionfs_unlock_dentry(chain[i]);
3330     +
3331     + if (!valid)
3332     + goto out_free;
3333     + }
3334     +
3335     +
3336     +out_this:
3337     + /* finally, lock this dentry and revalidate it */
3338     + verify_locked(dentry);
3339     + dgen = atomic_read(&UNIONFS_D(dentry)->generation);
3340     + if (is_newer_lower(dentry)) {
3341     + /* root dentry special case as aforementioned */
3342     + if (IS_ROOT(dentry))
3343     + unionfs_copy_attr_times(dentry->d_inode);
3344     + else {
3345     + /*
3346     + * reset generation number to zero, guaranteed to be
3347     + * "old"
3348     + */
3349     + dgen = 0;
3350     + atomic_set(&UNIONFS_D(dentry)->generation, dgen);
3351     + }
3352     + if (!willwrite)
3353     + purge_inode_data(dentry);
3354     + }
3355     + valid = __unionfs_d_revalidate_one(dentry, nd);
3356     +
3357     + /*
3358     + * If __unionfs_d_revalidate_one() succeeded above, then it will
3359     + * have incremented the refcnt of the mnt's, but also the branch
3360     + * indices of the dentry will have been updated (to take into
3361     + * account any branch insertions/deletion. So the current
3362     + * dbstart/dbend match the current, and new, indices of the mnts
3363     + * which __unionfs_d_revalidate_one has incremented. Note: the "if"
3364     + * test below does not depend on whether chain_len was 0 or greater.
3365     + */
3366     + if (valid && sbgen != dgen)
3367     + for (bindex = dbstart(dentry);
3368     + bindex <= dbend(dentry);
3369     + bindex++)
3370     + unionfs_mntput(dentry, bindex);
3371     +
3372     +out_free:
3373     + /* unlock/dput all dentries in chain and return status */
3374     + if (chain_len > 0) {
3375     + for (i=0; i<chain_len; i++)
3376     + dput(chain[i]);
3377     + kfree(chain);
3378     + }
3379     +out:
3380     + return valid;
3381     +}
3382     +
3383     +static int unionfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
3384     +{
3385     + int err;
3386     +
3387     + unionfs_read_lock(dentry->d_sb);
3388     +
3389     + unionfs_lock_dentry(dentry);
3390     + err = __unionfs_d_revalidate_chain(dentry, nd, 0);
3391     + unionfs_unlock_dentry(dentry);
3392     + unionfs_check_dentry(dentry);
3393     +
3394     + unionfs_read_unlock(dentry->d_sb);
3395     +
3396     + return err;
3397     +}
3398     +
3399     +/*
3400     + * At this point no one can reference this dentry, so we don't have to be
3401     + * careful about concurrent access.
3402     + */
3403     +static void unionfs_d_release(struct dentry *dentry)
3404     +{
3405     + int bindex, bstart, bend;
3406     +
3407     + unionfs_read_lock(dentry->d_sb);
3408     +
3409     + unionfs_check_dentry(dentry);
3410     + /* this could be a negative dentry, so check first */
3411     + if (!UNIONFS_D(dentry)) {
3412     + printk(KERN_DEBUG "unionfs: dentry without private data: %.*s",
3413     + dentry->d_name.len, dentry->d_name.name);
3414     + goto out;
3415     + } else if (dbstart(dentry) < 0) {
3416     + /* this is due to a failed lookup */
3417     + printk(KERN_DEBUG "unionfs: dentry without lower "
3418     + "dentries: %.*s",
3419     + dentry->d_name.len, dentry->d_name.name);
3420     + goto out_free;
3421     + }
3422     +
3423     + /* Release all the lower dentries */
3424     + bstart = dbstart(dentry);
3425     + bend = dbend(dentry);
3426     + for (bindex = bstart; bindex <= bend; bindex++) {
3427     + dput(unionfs_lower_dentry_idx(dentry, bindex));
3428     + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
3429     + /* NULL lower mnt is ok if this is a negative dentry */
3430     + if (!dentry->d_inode && !unionfs_lower_mnt_idx(dentry,bindex))
3431     + continue;
3432     + unionfs_mntput(dentry, bindex);
3433     + unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
3434     + }
3435     + /* free private data (unionfs_dentry_info) here */
3436     + kfree(UNIONFS_D(dentry)->lower_paths);
3437     + UNIONFS_D(dentry)->lower_paths = NULL;
3438     +
3439     +out_free:
3440     + /* No need to unlock it, because it is disappeared. */
3441     + free_dentry_private_data(dentry);
3442     +
3443     +out:
3444     + unionfs_read_unlock(dentry->d_sb);
3445     + return;
3446     +}
3447     +
3448     +struct dentry_operations unionfs_dops = {
3449     + .d_revalidate = unionfs_d_revalidate,
3450     + .d_release = unionfs_d_release,
3451     +};
3452     diff --git a/fs/unionfs/dirfops.c b/fs/unionfs/dirfops.c
3453     new file mode 100644
3454     index 0000000..980f125
3455     --- /dev/null
3456     +++ b/fs/unionfs/dirfops.c
3457     @@ -0,0 +1,278 @@
3458     +/*
3459     + * Copyright (c) 2003-2007 Erez Zadok
3460     + * Copyright (c) 2003-2006 Charles P. Wright
3461     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
3462     + * Copyright (c) 2005-2006 Junjiro Okajima
3463     + * Copyright (c) 2005 Arun M. Krishnakumar
3464     + * Copyright (c) 2004-2006 David P. Quigley
3465     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
3466     + * Copyright (c) 2003 Puja Gupta
3467     + * Copyright (c) 2003 Harikesavan Krishnan
3468     + * Copyright (c) 2003-2007 Stony Brook University
3469     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
3470     + *
3471     + * This program is free software; you can redistribute it and/or modify
3472     + * it under the terms of the GNU General Public License version 2 as
3473     + * published by the Free Software Foundation.
3474     + */
3475     +
3476     +#include "union.h"
3477     +
3478     +/* Make sure our rdstate is playing by the rules. */
3479     +static void verify_rdstate_offset(struct unionfs_dir_state *rdstate)
3480     +{
3481     + BUG_ON(rdstate->offset >= DIREOF);
3482     + BUG_ON(rdstate->cookie >= MAXRDCOOKIE);
3483     +}
3484     +
3485     +struct unionfs_getdents_callback {
3486     + struct unionfs_dir_state *rdstate;
3487     + void *dirent;
3488     + int entries_written;
3489     + int filldir_called;
3490     + int filldir_error;
3491     + filldir_t filldir;
3492     + struct super_block *sb;
3493     +};
3494     +
3495     +/* based on generic filldir in fs/readir.c */
3496     +static int unionfs_filldir(void *dirent, const char *name, int namelen,
3497     + loff_t offset, u64 ino, unsigned int d_type)
3498     +{
3499     + struct unionfs_getdents_callback *buf = dirent;
3500     + struct filldir_node *found = NULL;
3501     + int err = 0;
3502     + int is_wh_entry = 0;
3503     +
3504     + buf->filldir_called++;
3505     +
3506     + if ((namelen > UNIONFS_WHLEN) &&
3507     + !strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN)) {
3508     + name += UNIONFS_WHLEN;
3509     + namelen -= UNIONFS_WHLEN;
3510     + is_wh_entry = 1;
3511     + }
3512     +
3513     + found = find_filldir_node(buf->rdstate, name, namelen);
3514     +
3515     + if (found)
3516     + goto out;
3517     +
3518     + /* if 'name' isn't a whiteout, filldir it. */
3519     + if (!is_wh_entry) {
3520     + off_t pos = rdstate2offset(buf->rdstate);
3521     + u64 unionfs_ino = ino;
3522     +
3523     + if (!err) {
3524     + err = buf->filldir(buf->dirent, name, namelen, pos,
3525     + unionfs_ino, d_type);
3526     + buf->rdstate->offset++;
3527     + verify_rdstate_offset(buf->rdstate);
3528     + }
3529     + }
3530     + /*
3531     + * If we did fill it, stuff it in our hash, otherwise return an
3532     + * error.
3533     + */
3534     + if (err) {
3535     + buf->filldir_error = err;
3536     + goto out;
3537     + }
3538     + buf->entries_written++;
3539     + if ((err = add_filldir_node(buf->rdstate, name, namelen,
3540     + buf->rdstate->bindex, is_wh_entry)))
3541     + buf->filldir_error = err;
3542     +
3543     +out:
3544     + return err;
3545     +}
3546     +
3547     +static int unionfs_readdir(struct file *file, void *dirent, filldir_t filldir)
3548     +{
3549     + int err = 0;
3550     + struct file *lower_file = NULL;
3551     + struct inode *inode = NULL;
3552     + struct unionfs_getdents_callback buf;
3553     + struct unionfs_dir_state *uds;
3554     + int bend;
3555     + loff_t offset;
3556     +
3557     + unionfs_read_lock(file->f_path.dentry->d_sb);
3558     +
3559     + if ((err = unionfs_file_revalidate(file, 0)))
3560     + goto out;
3561     +
3562     + inode = file->f_path.dentry->d_inode;
3563     +
3564     + uds = UNIONFS_F(file)->rdstate;
3565     + if (!uds) {
3566     + if (file->f_pos == DIREOF) {
3567     + goto out;
3568     + } else if (file->f_pos > 0) {
3569     + uds = find_rdstate(inode, file->f_pos);
3570     + if (!uds) {
3571     + err = -ESTALE;
3572     + goto out;
3573     + }
3574     + UNIONFS_F(file)->rdstate = uds;
3575     + } else {
3576     + init_rdstate(file);
3577     + uds = UNIONFS_F(file)->rdstate;
3578     + }
3579     + }
3580     + bend = fbend(file);
3581     +
3582     + while (uds->bindex <= bend) {
3583     + lower_file = unionfs_lower_file_idx(file, uds->bindex);
3584     + if (!lower_file) {
3585     + uds->bindex++;
3586     + uds->dirpos = 0;
3587     + continue;
3588     + }
3589     +
3590     + /* prepare callback buffer */
3591     + buf.filldir_called = 0;
3592     + buf.filldir_error = 0;
3593     + buf.entries_written = 0;
3594     + buf.dirent = dirent;
3595     + buf.filldir = filldir;
3596     + buf.rdstate = uds;
3597     + buf.sb = inode->i_sb;
3598     +
3599     + /* Read starting from where we last left off. */
3600     + offset = vfs_llseek(lower_file, uds->dirpos, SEEK_SET);
3601     + if (offset < 0) {
3602     + err = offset;
3603     + goto out;
3604     + }
3605     + err = vfs_readdir(lower_file, unionfs_filldir, &buf);
3606     +
3607     + /* Save the position for when we continue. */
3608     + offset = vfs_llseek(lower_file, 0, SEEK_CUR);
3609     + if (offset < 0) {
3610     + err = offset;
3611     + goto out;
3612     + }
3613     + uds->dirpos = offset;
3614     +
3615     + /* Copy the atime. */
3616     + fsstack_copy_attr_atime(inode, lower_file->f_path.dentry->d_inode);
3617     +
3618     + if (err < 0)
3619     + goto out;
3620     +
3621     + if (buf.filldir_error)
3622     + break;
3623     +
3624     + if (!buf.entries_written) {
3625     + uds->bindex++;
3626     + uds->dirpos = 0;
3627     + }
3628     + }
3629     +
3630     + if (!buf.filldir_error && uds->bindex >= bend) {
3631     + /* Save the number of hash entries for next time. */
3632     + UNIONFS_I(inode)->hashsize = uds->hashentries;
3633     + free_rdstate(uds);
3634     + UNIONFS_F(file)->rdstate = NULL;
3635     + file->f_pos = DIREOF;
3636     + } else
3637     + file->f_pos = rdstate2offset(uds);
3638     +
3639     +out:
3640     + unionfs_read_unlock(file->f_path.dentry->d_sb);
3641     + return err;
3642     +}
3643     +
3644     +/*
3645     + * This is not meant to be a generic repositioning function. If you do
3646     + * things that aren't supported, then we return EINVAL.
3647     + *
3648     + * What is allowed:
3649     + * (1) seeking to the same position that you are currently at
3650     + * This really has no effect, but returns where you are.
3651     + * (2) seeking to the beginning of the file
3652     + * This throws out all state, and lets you begin again.
3653     + */
3654     +static loff_t unionfs_dir_llseek(struct file *file, loff_t offset, int origin)
3655     +{
3656     + struct unionfs_dir_state *rdstate;
3657     + loff_t err;
3658     +
3659     + unionfs_read_lock(file->f_path.dentry->d_sb);
3660     +
3661     + if ((err = unionfs_file_revalidate(file, 0)))
3662     + goto out;
3663     +
3664     + rdstate = UNIONFS_F(file)->rdstate;
3665     +
3666     + /*
3667     + * we let users seek to their current position, but not anywhere
3668     + * else.
3669     + */
3670     + if (!offset) {
3671     + switch (origin) {
3672     + case SEEK_SET:
3673     + if (rdstate) {
3674     + free_rdstate(rdstate);
3675     + UNIONFS_F(file)->rdstate = NULL;
3676     + }
3677     + init_rdstate(file);
3678     + err = 0;
3679     + break;
3680     + case SEEK_CUR:
3681     + err = file->f_pos;
3682     + break;
3683     + case SEEK_END:
3684     + /* Unsupported, because we would break everything. */
3685     + err = -EINVAL;
3686     + break;
3687     + }
3688     + } else {
3689     + switch (origin) {
3690     + case SEEK_SET:
3691     + if (rdstate) {
3692     + if (offset == rdstate2offset(rdstate))
3693     + err = offset;
3694     + else if (file->f_pos == DIREOF)
3695     + err = DIREOF;
3696     + else
3697     + err = -EINVAL;
3698     + } else {
3699     + rdstate = find_rdstate(file->f_path.dentry->d_inode,
3700     + offset);
3701     + if (rdstate) {
3702     + UNIONFS_F(file)->rdstate = rdstate;
3703     + err = rdstate->offset;
3704     + } else
3705     + err = -EINVAL;
3706     + }
3707     + break;
3708     + case SEEK_CUR:
3709     + case SEEK_END:
3710     + /* Unsupported, because we would break everything. */
3711     + err = -EINVAL;
3712     + break;
3713     + }
3714     + }
3715     +
3716     +out:
3717     + unionfs_read_unlock(file->f_path.dentry->d_sb);
3718     + return err;
3719     +}
3720     +
3721     +/*
3722     + * Trimmed directory options, we shouldn't pass everything down since
3723     + * we don't want to operate on partial directories.
3724     + */
3725     +struct file_operations unionfs_dir_fops = {
3726     + .llseek = unionfs_dir_llseek,
3727     + .read = generic_read_dir,
3728     + .readdir = unionfs_readdir,
3729     + .unlocked_ioctl = unionfs_ioctl,
3730     + .open = unionfs_open,
3731     + .release = unionfs_file_release,
3732     + .flush = unionfs_flush,
3733     + .fsync = unionfs_fsync,
3734     + .fasync = unionfs_fasync,
3735     +};
3736     diff --git a/fs/unionfs/dirhelper.c b/fs/unionfs/dirhelper.c
3737     new file mode 100644
3738     index 0000000..a72f711
3739     --- /dev/null
3740     +++ b/fs/unionfs/dirhelper.c
3741     @@ -0,0 +1,271 @@
3742     +/*
3743     + * Copyright (c) 2003-2007 Erez Zadok
3744     + * Copyright (c) 2003-2006 Charles P. Wright
3745     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
3746     + * Copyright (c) 2005-2006 Junjiro Okajima
3747     + * Copyright (c) 2005 Arun M. Krishnakumar
3748     + * Copyright (c) 2004-2006 David P. Quigley
3749     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
3750     + * Copyright (c) 2003 Puja Gupta
3751     + * Copyright (c) 2003 Harikesavan Krishnan
3752     + * Copyright (c) 2003-2007 Stony Brook University
3753     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
3754     + *
3755     + * This program is free software; you can redistribute it and/or modify
3756     + * it under the terms of the GNU General Public License version 2 as
3757     + * published by the Free Software Foundation.
3758     + */
3759     +
3760     +#include "union.h"
3761     +
3762     +/*
3763     + * Delete all of the whiteouts in a given directory for rmdir.
3764     + *
3765     + * lower directory inode should be locked
3766     + */
3767     +int do_delete_whiteouts(struct dentry *dentry, int bindex,
3768     + struct unionfs_dir_state *namelist)
3769     +{
3770     + int err = 0;
3771     + struct dentry *lower_dir_dentry = NULL;
3772     + struct dentry *lower_dentry;
3773     + char *name = NULL, *p;
3774     + struct inode *lower_dir;
3775     + int i;
3776     + struct list_head *pos;
3777     + struct filldir_node *cursor;
3778     +
3779     + /* Find out lower parent dentry */
3780     + lower_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3781     + BUG_ON(!S_ISDIR(lower_dir_dentry->d_inode->i_mode));
3782     + lower_dir = lower_dir_dentry->d_inode;
3783     + BUG_ON(!S_ISDIR(lower_dir->i_mode));
3784     +
3785     + err = -ENOMEM;
3786     + name = __getname();
3787     + if (!name)
3788     + goto out;
3789     + strcpy(name, UNIONFS_WHPFX);
3790     + p = name + UNIONFS_WHLEN;
3791     +
3792     + err = 0;
3793     + for (i = 0; !err && i < namelist->size; i++) {
3794     + list_for_each(pos, &namelist->list[i]) {
3795     + cursor =
3796     + list_entry(pos, struct filldir_node,
3797     + file_list);
3798     + /* Only operate on whiteouts in this branch. */
3799     + if (cursor->bindex != bindex)
3800     + continue;
3801     + if (!cursor->whiteout)
3802     + continue;
3803     +
3804     + strcpy(p, cursor->name);
3805     + lower_dentry =
3806     + lookup_one_len(name, lower_dir_dentry,
3807     + cursor->namelen +
3808     + UNIONFS_WHLEN);
3809     + if (IS_ERR(lower_dentry)) {
3810     + err = PTR_ERR(lower_dentry);
3811     + break;
3812     + }
3813     + if (lower_dentry->d_inode)
3814     + err = vfs_unlink(lower_dir, lower_dentry);
3815     + dput(lower_dentry);
3816     + if (err)
3817     + break;
3818     + }
3819     + }
3820     +
3821     + __putname(name);
3822     +
3823     + /* After all of the removals, we should copy the attributes once. */
3824     + fsstack_copy_attr_times(dentry->d_inode, lower_dir_dentry->d_inode);
3825     +
3826     +out:
3827     + return err;
3828     +}
3829     +
3830     +/* delete whiteouts in a dir (for rmdir operation) using sioq if necessary */
3831     +int delete_whiteouts(struct dentry *dentry, int bindex,
3832     + struct unionfs_dir_state *namelist)
3833     +{
3834     + int err;
3835     + struct super_block *sb;
3836     + struct dentry *lower_dir_dentry;
3837     + struct inode *lower_dir;
3838     + struct sioq_args args;
3839     +
3840     + sb = dentry->d_sb;
3841     +
3842     + BUG_ON(!S_ISDIR(dentry->d_inode->i_mode));
3843     + BUG_ON(bindex < dbstart(dentry));
3844     + BUG_ON(bindex > dbend(dentry));
3845     + err = is_robranch_super(sb, bindex);
3846     + if (err)
3847     + goto out;
3848     +
3849     + lower_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3850     + BUG_ON(!S_ISDIR(lower_dir_dentry->d_inode->i_mode));
3851     + lower_dir = lower_dir_dentry->d_inode;
3852     + BUG_ON(!S_ISDIR(lower_dir->i_mode));
3853     +
3854     + mutex_lock(&lower_dir->i_mutex);
3855     + if (!permission(lower_dir, MAY_WRITE | MAY_EXEC, NULL))
3856     + err = do_delete_whiteouts(dentry, bindex, namelist);
3857     + else {
3858     + args.deletewh.namelist = namelist;
3859     + args.deletewh.dentry = dentry;
3860     + args.deletewh.bindex = bindex;
3861     + run_sioq(__delete_whiteouts, &args);
3862     + err = args.err;
3863     + }
3864     + mutex_unlock(&lower_dir->i_mutex);
3865     +
3866     +out:
3867     + return err;
3868     +}
3869     +
3870     +#define RD_NONE 0
3871     +#define RD_CHECK_EMPTY 1
3872     +/* The callback structure for check_empty. */
3873     +struct unionfs_rdutil_callback {
3874     + int err;
3875     + int filldir_called;
3876     + struct unionfs_dir_state *rdstate;
3877     + int mode;
3878     +};
3879     +
3880     +/* This filldir function makes sure only whiteouts exist within a directory. */
3881     +static int readdir_util_callback(void *dirent, const char *name, int namelen,
3882     + loff_t offset, u64 ino, unsigned int d_type)
3883     +{
3884     + int err = 0;
3885     + struct unionfs_rdutil_callback *buf = dirent;
3886     + int whiteout = 0;
3887     + struct filldir_node *found;
3888     +
3889     + buf->filldir_called = 1;
3890     +
3891     + if (name[0] == '.' && (namelen == 1 ||
3892     + (name[1] == '.' && namelen == 2)))
3893     + goto out;
3894     +
3895     + if (namelen > UNIONFS_WHLEN &&
3896     + !strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN)) {
3897     + namelen -= UNIONFS_WHLEN;
3898     + name += UNIONFS_WHLEN;
3899     + whiteout = 1;
3900     + }
3901     +
3902     + found = find_filldir_node(buf->rdstate, name, namelen);
3903     + /* If it was found in the table there was a previous whiteout. */
3904     + if (found)
3905     + goto out;
3906     +
3907     + /*
3908     + * if it wasn't found and isn't a whiteout, the directory isn't
3909     + * empty.
3910     + */
3911     + err = -ENOTEMPTY;
3912     + if ((buf->mode == RD_CHECK_EMPTY) && !whiteout)
3913     + goto out;
3914     +
3915     + err = add_filldir_node(buf->rdstate, name, namelen,
3916     + buf->rdstate->bindex, whiteout);
3917     +
3918     +out:
3919     + buf->err = err;
3920     + return err;
3921     +}
3922     +
3923     +/* Is a directory logically empty? */
3924     +int check_empty(struct dentry *dentry, struct unionfs_dir_state **namelist)
3925     +{
3926     + int err = 0;
3927     + struct dentry *lower_dentry = NULL;
3928     + struct super_block *sb;
3929     + struct file *lower_file;
3930     + struct unionfs_rdutil_callback *buf = NULL;
3931     + int bindex, bstart, bend, bopaque;
3932     +
3933     + sb = dentry->d_sb;
3934     +
3935     +
3936     + BUG_ON(!S_ISDIR(dentry->d_inode->i_mode));
3937     +
3938     + if ((err = unionfs_partial_lookup(dentry)))
3939     + goto out;
3940     +
3941     + bstart = dbstart(dentry);
3942     + bend = dbend(dentry);
3943     + bopaque = dbopaque(dentry);
3944     + if (0 <= bopaque && bopaque < bend)
3945     + bend = bopaque;
3946     +
3947     + buf = kmalloc(sizeof(struct unionfs_rdutil_callback), GFP_KERNEL);
3948     + if (!buf) {
3949     + err = -ENOMEM;
3950     + goto out;
3951     + }
3952     + buf->err = 0;
3953     + buf->mode = RD_CHECK_EMPTY;
3954     + buf->rdstate = alloc_rdstate(dentry->d_inode, bstart);
3955     + if (!buf->rdstate) {
3956     + err = -ENOMEM;
3957     + goto out;
3958     + }
3959     +
3960     + /* Process the lower directories with rdutil_callback as a filldir. */
3961     + for (bindex = bstart; bindex <= bend; bindex++) {
3962     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3963     + if (!lower_dentry)
3964     + continue;
3965     + if (!lower_dentry->d_inode)
3966     + continue;
3967     + if (!S_ISDIR(lower_dentry->d_inode->i_mode))
3968     + continue;
3969     +
3970     + dget(lower_dentry);
3971     + unionfs_mntget(dentry, bindex);
3972     + branchget(sb, bindex);
3973     + lower_file =
3974     + dentry_open(lower_dentry,
3975     + unionfs_lower_mnt_idx(dentry, bindex),
3976     + O_RDONLY);
3977     + if (IS_ERR(lower_file)) {
3978     + err = PTR_ERR(lower_file);
3979     + dput(lower_dentry);
3980     + branchput(sb, bindex);
3981     + goto out;
3982     + }
3983     +
3984     + do {
3985     + buf->filldir_called = 0;
3986     + buf->rdstate->bindex = bindex;
3987     + err = vfs_readdir(lower_file,
3988     + readdir_util_callback, buf);
3989     + if (buf->err)
3990     + err = buf->err;
3991     + } while ((err >= 0) && buf->filldir_called);
3992     +
3993     + /* fput calls dput for lower_dentry */
3994     + fput(lower_file);
3995     + branchput(sb, bindex);
3996     +
3997     + if (err < 0)
3998     + goto out;
3999     + }
4000     +
4001     +out:
4002     + if (buf) {
4003     + if (namelist && !err)
4004     + *namelist = buf->rdstate;
4005     + else if (buf->rdstate)
4006     + free_rdstate(buf->rdstate);
4007     + kfree(buf);
4008     + }
4009     +
4010     +
4011     + return err;
4012     +}
4013     diff --git a/fs/unionfs/fanout.h b/fs/unionfs/fanout.h
4014     new file mode 100644
4015     index 0000000..e7407f0
4016     --- /dev/null
4017     +++ b/fs/unionfs/fanout.h
4018     @@ -0,0 +1,318 @@
4019     +/*
4020     + * Copyright (c) 2003-2007 Erez Zadok
4021     + * Copyright (c) 2003-2006 Charles P. Wright
4022     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4023     + * Copyright (c) 2005 Arun M. Krishnakumar
4024     + * Copyright (c) 2004-2006 David P. Quigley
4025     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4026     + * Copyright (c) 2003 Puja Gupta
4027     + * Copyright (c) 2003 Harikesavan Krishnan
4028     + * Copyright (c) 2003-2007 Stony Brook University
4029     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
4030     + *
4031     + * This program is free software; you can redistribute it and/or modify
4032     + * it under the terms of the GNU General Public License version 2 as
4033     + * published by the Free Software Foundation.
4034     + */
4035     +
4036     +#ifndef _FANOUT_H_
4037     +#define _FANOUT_H_
4038     +
4039     +/*
4040     + * Inode to private data
4041     + *
4042     + * Since we use containers and the struct inode is _inside_ the
4043     + * unionfs_inode_info structure, UNIONFS_I will always (given a non-NULL
4044     + * inode pointer), return a valid non-NULL pointer.
4045     + */
4046     +static inline struct unionfs_inode_info *UNIONFS_I(const struct inode *inode)
4047     +{
4048     + return container_of(inode, struct unionfs_inode_info, vfs_inode);
4049     +}
4050     +
4051     +#define ibstart(ino) (UNIONFS_I(ino)->bstart)
4052     +#define ibend(ino) (UNIONFS_I(ino)->bend)
4053     +
4054     +/* Superblock to private data */
4055     +#define UNIONFS_SB(super) ((struct unionfs_sb_info *)(super)->s_fs_info)
4056     +#define sbstart(sb) 0
4057     +#define sbend(sb) (UNIONFS_SB(sb)->bend)
4058     +#define sbmax(sb) (UNIONFS_SB(sb)->bend + 1)
4059     +#define sbhbid(sb) (UNIONFS_SB(sb)->high_branch_id)
4060     +
4061     +/* File to private Data */
4062     +#define UNIONFS_F(file) ((struct unionfs_file_info *)((file)->private_data))
4063     +#define fbstart(file) (UNIONFS_F(file)->bstart)
4064     +#define fbend(file) (UNIONFS_F(file)->bend)
4065     +
4066     +/* macros to manipulate branch IDs in stored in our superblock */
4067     +static inline int branch_id(struct super_block *sb, int index)
4068     +{
4069     + return UNIONFS_SB(sb)->data[index].branch_id;
4070     +}
4071     +
4072     +static inline void set_branch_id(struct super_block *sb, int index, int val)
4073     +{
4074     + UNIONFS_SB(sb)->data[index].branch_id = val;
4075     +}
4076     +
4077     +static inline void new_branch_id(struct super_block *sb, int index)
4078     +{
4079     + set_branch_id(sb, index, ++UNIONFS_SB(sb)->high_branch_id);
4080     +}
4081     +
4082     +/*
4083     + * Find new index of matching branch with an existing superblock of a known
4084     + * (possibly old) id. This is needed because branches could have been
4085     + * added/deleted causing the branches of any open files to shift.
4086     + *
4087     + * @sb: the new superblock which may have new/different branch IDs
4088     + * @id: the old/existing id we're looking for
4089     + * Returns index of newly found branch (0 or greater), -1 otherwise.
4090     + */
4091     +static inline int branch_id_to_idx(struct super_block *sb, int id)
4092     +{
4093     + int i;
4094     + for (i = 0; i < sbmax(sb); i++) {
4095     + if (branch_id(sb, i) == id)
4096     + return i;
4097     + }
4098     + /* in the non-ODF code, this should really never happen */
4099     + printk(KERN_WARNING "unionfs: cannot find branch with id %d\n", id);
4100     + return -1;
4101     +}
4102     +
4103     +/* File to lower file. */
4104     +static inline struct file *unionfs_lower_file(const struct file *f)
4105     +{
4106     + return UNIONFS_F(f)->lower_files[fbstart(f)];
4107     +}
4108     +
4109     +static inline struct file *unionfs_lower_file_idx(const struct file *f,
4110     + int index)
4111     +{
4112     + return UNIONFS_F(f)->lower_files[index];
4113     +}
4114     +
4115     +static inline void unionfs_set_lower_file_idx(struct file *f, int index,
4116     + struct file *val)
4117     +{
4118     + UNIONFS_F(f)->lower_files[index] = val;
4119     + /* save branch ID (may be redundant?) */
4120     + UNIONFS_F(f)->saved_branch_ids[index] =
4121     + branch_id((f)->f_dentry->d_sb, index);
4122     +}
4123     +
4124     +static inline void unionfs_set_lower_file(struct file *f, struct file *val)
4125     +{
4126     + unionfs_set_lower_file_idx((f), fbstart(f), (val));
4127     +}
4128     +
4129     +/* Inode to lower inode. */
4130     +static inline struct inode *unionfs_lower_inode(const struct inode *i)
4131     +{
4132     + return UNIONFS_I(i)->lower_inodes[ibstart(i)];
4133     +}
4134     +
4135     +static inline struct inode *unionfs_lower_inode_idx(const struct inode *i,
4136     + int index)
4137     +{
4138     + return UNIONFS_I(i)->lower_inodes[index];
4139     +}
4140     +
4141     +static inline void unionfs_set_lower_inode_idx(struct inode *i, int index,
4142     + struct inode *val)
4143     +{
4144     + UNIONFS_I(i)->lower_inodes[index] = val;
4145     +}
4146     +
4147     +static inline void unionfs_set_lower_inode(struct inode *i, struct inode *val)
4148     +{
4149     + UNIONFS_I(i)->lower_inodes[ibstart(i)] = val;
4150     +}
4151     +
4152     +/* Superblock to lower superblock. */
4153     +static inline struct super_block *unionfs_lower_super(
4154     + const struct super_block *sb)
4155     +{
4156     + return UNIONFS_SB(sb)->data[sbstart(sb)].sb;
4157     +}
4158     +
4159     +static inline struct super_block *unionfs_lower_super_idx(
4160     + const struct super_block *sb,
4161     + int index)
4162     +{
4163     + return UNIONFS_SB(sb)->data[index].sb;
4164     +}
4165     +
4166     +static inline void unionfs_set_lower_super_idx(struct super_block *sb,
4167     + int index,
4168     + struct super_block *val)
4169     +{
4170     + UNIONFS_SB(sb)->data[index].sb = val;
4171     +}
4172     +
4173     +static inline void unionfs_set_lower_super(struct super_block *sb,
4174     + struct super_block *val)
4175     +{
4176     + UNIONFS_SB(sb)->data[sbstart(sb)].sb = val;
4177     +}
4178     +
4179     +/* Branch count macros. */
4180     +static inline int branch_count(const struct super_block *sb, int index)
4181     +{
4182     + return atomic_read(&UNIONFS_SB(sb)->data[index].open_files);
4183     +}
4184     +
4185     +static inline void set_branch_count(struct super_block *sb, int index, int val)
4186     +{
4187     + atomic_set(&UNIONFS_SB(sb)->data[index].open_files, val);
4188     +}
4189     +
4190     +static inline void branchget(struct super_block *sb, int index)
4191     +{
4192     + atomic_inc(&UNIONFS_SB(sb)->data[index].open_files);
4193     +}
4194     +
4195     +static inline void branchput(struct super_block *sb, int index)
4196     +{
4197     + atomic_dec(&UNIONFS_SB(sb)->data[index].open_files);
4198     +}
4199     +
4200     +/* Dentry macros */
4201     +static inline struct unionfs_dentry_info *UNIONFS_D(const struct dentry *dent)
4202     +{
4203     + return dent->d_fsdata;
4204     +}
4205     +
4206     +static inline int dbstart(const struct dentry *dent)
4207     +{
4208     + return UNIONFS_D(dent)->bstart;
4209     +}
4210     +
4211     +static inline void set_dbstart(struct dentry *dent, int val)
4212     +{
4213     + UNIONFS_D(dent)->bstart = val;
4214     +}
4215     +
4216     +static inline int dbend(const struct dentry *dent)
4217     +{
4218     + return UNIONFS_D(dent)->bend;
4219     +}
4220     +
4221     +static inline void set_dbend(struct dentry *dent, int val)
4222     +{
4223     + UNIONFS_D(dent)->bend = val;
4224     +}
4225     +
4226     +static inline int dbopaque(const struct dentry *dent)
4227     +{
4228     + return UNIONFS_D(dent)->bopaque;
4229     +}
4230     +
4231     +static inline void set_dbopaque(struct dentry *dent, int val)
4232     +{
4233     + UNIONFS_D(dent)->bopaque = val;
4234     +}
4235     +
4236     +static inline void unionfs_set_lower_dentry_idx(struct dentry *dent, int index,
4237     + struct dentry *val)
4238     +{
4239     + UNIONFS_D(dent)->lower_paths[index].dentry = val;
4240     +}
4241     +
4242     +static inline struct dentry *unionfs_lower_dentry_idx(
4243     + const struct dentry *dent,
4244     + int index)
4245     +{
4246     + return UNIONFS_D(dent)->lower_paths[index].dentry;
4247     +}
4248     +
4249     +static inline struct dentry *unionfs_lower_dentry(const struct dentry *dent)
4250     +{
4251     + return unionfs_lower_dentry_idx(dent, dbstart(dent));
4252     +}
4253     +
4254     +static inline void unionfs_set_lower_mnt_idx(struct dentry *dent, int index,
4255     + struct vfsmount *mnt)
4256     +{
4257     + UNIONFS_D(dent)->lower_paths[index].mnt = mnt;
4258     +}
4259     +
4260     +static inline struct vfsmount *unionfs_lower_mnt_idx(
4261     + const struct dentry *dent,
4262     + int index)
4263     +{
4264     + return UNIONFS_D(dent)->lower_paths[index].mnt;
4265     +}
4266     +
4267     +static inline struct vfsmount *unionfs_lower_mnt(const struct dentry *dent)
4268     +{
4269     + return unionfs_lower_mnt_idx(dent, dbstart(dent));
4270     +}
4271     +
4272     +/* Macros for locking a dentry. */
4273     +static inline void unionfs_lock_dentry(struct dentry *d)
4274     +{
4275     + mutex_lock(&UNIONFS_D(d)->lock);
4276     +}
4277     +
4278     +static inline void unionfs_unlock_dentry(struct dentry *d)
4279     +{
4280     + mutex_unlock(&UNIONFS_D(d)->lock);
4281     +}
4282     +
4283     +static inline void verify_locked(struct dentry *d)
4284     +{
4285     + BUG_ON(!mutex_is_locked(&UNIONFS_D(d)->lock));
4286     +}
4287     +
4288     +/* copy a/m/ctime from the lower branch with the newest times */
4289     +static inline void unionfs_copy_attr_times(struct inode *upper)
4290     +{
4291     + int bindex;
4292     + struct inode *lower;
4293     +
4294     + if (!upper)
4295     + return;
4296     + for (bindex=ibstart(upper); bindex <= ibend(upper); bindex++) {
4297     + lower = unionfs_lower_inode_idx(upper, bindex);
4298     + if (!lower)
4299     + continue; /* not all lower dir objects may exist */
4300     + if (timespec_compare(&upper->i_mtime, &lower->i_mtime) < 0)
4301     + upper->i_mtime = lower->i_mtime;
4302     + if (timespec_compare(&upper->i_ctime, &lower->i_ctime) < 0)
4303     + upper->i_ctime = lower->i_ctime;
4304     + if (timespec_compare(&upper->i_atime, &lower->i_atime) < 0)
4305     + upper->i_atime = lower->i_atime;
4306     + /* XXX: should we notify_change on our upper inode? */
4307     + }
4308     +}
4309     +
4310     +/*
4311     + * A unionfs/fanout version of fsstack_copy_attr_all. Uses a
4312     + * unionfs_get_nlinks to properly calcluate the number of links to a file.
4313     + * Also, copies the max() of all a/m/ctimes for all lower inodes (which is
4314     + * important if the lower inode is a directory type)
4315     + */
4316     +static inline void unionfs_copy_attr_all(struct inode *dest,
4317     + const struct inode *src)
4318     +{
4319     + dest->i_mode = src->i_mode;
4320     + dest->i_uid = src->i_uid;
4321     + dest->i_gid = src->i_gid;
4322     + dest->i_rdev = src->i_rdev;
4323     +
4324     + unionfs_copy_attr_times(dest);
4325     +
4326     + dest->i_blkbits = src->i_blkbits;
4327     + dest->i_flags = src->i_flags;
4328     +
4329     + /*
4330     + * Update the nlinks AFTER updating the above fields, because the
4331     + * get_links callback may depend on them.
4332     + */
4333     + dest->i_nlink = unionfs_get_nlinks(dest);
4334     +}
4335     +
4336     +#endif /* not _FANOUT_H */
4337     diff --git a/fs/unionfs/file.c b/fs/unionfs/file.c
4338     new file mode 100644
4339     index 0000000..3f6b2d0
4340     --- /dev/null
4341     +++ b/fs/unionfs/file.c
4342     @@ -0,0 +1,250 @@
4343     +/*
4344     + * Copyright (c) 2003-2007 Erez Zadok
4345     + * Copyright (c) 2003-2006 Charles P. Wright
4346     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4347     + * Copyright (c) 2005-2006 Junjiro Okajima
4348     + * Copyright (c) 2005 Arun M. Krishnakumar
4349     + * Copyright (c) 2004-2006 David P. Quigley
4350     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4351     + * Copyright (c) 2003 Puja Gupta
4352     + * Copyright (c) 2003 Harikesavan Krishnan
4353     + * Copyright (c) 2003-2007 Stony Brook University
4354     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
4355     + *
4356     + * This program is free software; you can redistribute it and/or modify
4357     + * it under the terms of the GNU General Public License version 2 as
4358     + * published by the Free Software Foundation.
4359     + */
4360     +
4361     +#include "union.h"
4362     +
4363     +static ssize_t unionfs_read(struct file *file, char __user *buf,
4364     + size_t count, loff_t *ppos)
4365     +{
4366     + int err;
4367     +
4368     + unionfs_read_lock(file->f_path.dentry->d_sb);
4369     + if ((err = unionfs_file_revalidate(file, 0)))
4370     + goto out;
4371     + unionfs_check_file(file);
4372     +
4373     + err = do_sync_read(file, buf, count, ppos);
4374     +
4375     + if (err >= 0)
4376     + touch_atime(unionfs_lower_mnt(file->f_path.dentry),
4377     + unionfs_lower_dentry(file->f_path.dentry));
4378     +
4379     +out:
4380     + unionfs_read_unlock(file->f_path.dentry->d_sb);
4381     + unionfs_check_file(file);
4382     + return err;
4383     +}
4384     +
4385     +static ssize_t unionfs_aio_read(struct kiocb *iocb, const struct iovec *iov,
4386     + unsigned long nr_segs, loff_t pos)
4387     +{
4388     + int err = 0;
4389     + struct file *file = iocb->ki_filp;
4390     +
4391     + unionfs_read_lock(file->f_path.dentry->d_sb);
4392     + if ((err = unionfs_file_revalidate(file, 0)))
4393     + goto out;
4394     + unionfs_check_file(file);
4395     +
4396     + err = generic_file_aio_read(iocb, iov, nr_segs, pos);
4397     +
4398     + if (err == -EIOCBQUEUED)
4399     + err = wait_on_sync_kiocb(iocb);
4400     +
4401     + if (err >= 0)
4402     + touch_atime(unionfs_lower_mnt(file->f_path.dentry),
4403     + unionfs_lower_dentry(file->f_path.dentry));
4404     +
4405     +out:
4406     + unionfs_read_unlock(file->f_path.dentry->d_sb);
4407     + unionfs_check_file(file);
4408     + return err;
4409     +}
4410     +
4411     +static ssize_t unionfs_write(struct file *file, const char __user *buf,
4412     + size_t count, loff_t *ppos)
4413     +{
4414     + int err = 0;
4415     +
4416     + unionfs_read_lock(file->f_path.dentry->d_sb);
4417     + if ((err = unionfs_file_revalidate(file, 1)))
4418     + goto out;
4419     + unionfs_check_file(file);
4420     +
4421     + err = do_sync_write(file, buf, count, ppos);
4422     + /* update our inode times upon a successful lower write */
4423     + if (err >= 0) {
4424     + unionfs_copy_attr_times(file->f_path.dentry->d_inode);
4425     + unionfs_check_file(file);
4426     + }
4427     +
4428     +out:
4429     + unionfs_read_unlock(file->f_path.dentry->d_sb);
4430     + return err;
4431     +}
4432     +
4433     +static int unionfs_file_readdir(struct file *file, void *dirent,
4434     + filldir_t filldir)
4435     +{
4436     + return -ENOTDIR;
4437     +}
4438     +
4439     +static int unionfs_mmap(struct file *file, struct vm_area_struct *vma)
4440     +{
4441     + int err = 0;
4442     + int willwrite;
4443     + struct file *lower_file;
4444     +
4445     + unionfs_read_lock(file->f_path.dentry->d_sb);
4446     +
4447     + /* This might be deferred to mmap's writepage */
4448     + willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags);
4449     + if ((err = unionfs_file_revalidate(file, willwrite)))
4450     + goto out;
4451     + unionfs_check_file(file);
4452     +
4453     + /*
4454     + * File systems which do not implement ->writepage may use
4455     + * generic_file_readonly_mmap as their ->mmap op. If you call
4456     + * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL.
4457     + * But we cannot call the lower ->mmap op, so we can't tell that
4458     + * writeable mappings won't work. Therefore, our only choice is to
4459     + * check if the lower file system supports the ->writepage, and if
4460     + * not, return EINVAL (the same error that
4461     + * generic_file_readonly_mmap returns in that case).
4462     + */
4463     + lower_file = unionfs_lower_file(file);
4464     + if (willwrite && !lower_file->f_mapping->a_ops->writepage) {
4465     + err = -EINVAL;
4466     + printk("unionfs: branch %d file system does not support "
4467     + "writeable mmap\n", fbstart(file));
4468     + } else {
4469     + err = generic_file_mmap(file, vma);
4470     + if (err)
4471     + printk("unionfs: generic_file_mmap failed %d\n", err);
4472     + }
4473     +
4474     +out:
4475     + unionfs_read_unlock(file->f_path.dentry->d_sb);
4476     + if (!err) {
4477     + /* copyup could cause parent dir times to change */
4478     + unionfs_copy_attr_times(file->f_path.dentry->d_parent->d_inode);
4479     + unionfs_check_file(file);
4480     + unionfs_check_dentry(file->f_path.dentry->d_parent);
4481     + }
4482     + return err;
4483     +}
4484     +
4485     +int unionfs_fsync(struct file *file, struct dentry *dentry, int datasync)
4486     +{
4487     + int bindex, bstart, bend;
4488     + struct file *lower_file;
4489     + struct dentry *lower_dentry;
4490     + struct inode *lower_inode, *inode;
4491     + int err = -EINVAL;
4492     +
4493     + unionfs_read_lock(file->f_path.dentry->d_sb);
4494     + if ((err = unionfs_file_revalidate(file, 1)))
4495     + goto out;
4496     + unionfs_check_file(file);
4497     +
4498     + bstart = fbstart(file);
4499     + bend = fbend(file);
4500     + if (bstart < 0 || bend < 0)
4501     + goto out;
4502     +
4503     + inode = dentry->d_inode;
4504     + if (!inode) {
4505     + printk(KERN_ERR
4506     + "unionfs: null lower inode in unionfs_fsync\n");
4507     + goto out;
4508     + }
4509     + for (bindex = bstart; bindex <= bend; bindex++) {
4510     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
4511     + if (!lower_inode || !lower_inode->i_fop->fsync)
4512     + continue;
4513     + lower_file = unionfs_lower_file_idx(file, bindex);
4514     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4515     + mutex_lock(&lower_inode->i_mutex);
4516     + err = lower_inode->i_fop->fsync(lower_file,
4517     + lower_dentry,
4518     + datasync);
4519     + mutex_unlock(&lower_inode->i_mutex);
4520     + if (err)
4521     + goto out;
4522     + }
4523     +
4524     + unionfs_copy_attr_times(inode);
4525     +
4526     +out:
4527     + unionfs_read_unlock(file->f_path.dentry->d_sb);
4528     + unionfs_check_file(file);
4529     + return err;
4530     +}
4531     +
4532     +int unionfs_fasync(int fd, struct file *file, int flag)
4533     +{
4534     + int bindex, bstart, bend;
4535     + struct file *lower_file;
4536     + struct dentry *dentry;
4537     + struct inode *lower_inode, *inode;
4538     + int err = 0;
4539     +
4540     + unionfs_read_lock(file->f_path.dentry->d_sb);
4541     + if ((err = unionfs_file_revalidate(file, 1)))
4542     + goto out;
4543     + unionfs_check_file(file);
4544     +
4545     + bstart = fbstart(file);
4546     + bend = fbend(file);
4547     + if (bstart < 0 || bend < 0)
4548     + goto out;
4549     +
4550     + dentry = file->f_path.dentry;
4551     + inode = dentry->d_inode;
4552     + if (!inode) {
4553     + printk(KERN_ERR
4554     + "unionfs: null lower inode in unionfs_fasync\n");
4555     + goto out;
4556     + }
4557     + for (bindex = bstart; bindex <= bend; bindex++) {
4558     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
4559     + if (!lower_inode || !lower_inode->i_fop->fasync)
4560     + continue;
4561     + lower_file = unionfs_lower_file_idx(file, bindex);
4562     + mutex_lock(&lower_inode->i_mutex);
4563     + err = lower_inode->i_fop->fasync(fd, lower_file, flag);
4564     + mutex_unlock(&lower_inode->i_mutex);
4565     + if (err)
4566     + goto out;
4567     + }
4568     +
4569     + unionfs_copy_attr_times(inode);
4570     +
4571     +out:
4572     + unionfs_read_unlock(file->f_path.dentry->d_sb);
4573     + unionfs_check_file(file);
4574     + return err;
4575     +}
4576     +
4577     +struct file_operations unionfs_main_fops = {
4578     + .llseek = generic_file_llseek,
4579     + .read = unionfs_read,
4580     + .aio_read = unionfs_aio_read,
4581     + .write = unionfs_write,
4582     + .aio_write = generic_file_aio_write,
4583     + .readdir = unionfs_file_readdir,
4584     + .unlocked_ioctl = unionfs_ioctl,
4585     + .mmap = unionfs_mmap,
4586     + .open = unionfs_open,
4587     + .flush = unionfs_flush,
4588     + .release = unionfs_file_release,
4589     + .fsync = unionfs_fsync,
4590     + .fasync = unionfs_fasync,
4591     + .sendfile = generic_file_sendfile,
4592     +};
4593     diff --git a/fs/unionfs/inode.c b/fs/unionfs/inode.c
4594     new file mode 100644
4595     index 0000000..c772fbd
4596     --- /dev/null
4597     +++ b/fs/unionfs/inode.c
4598     @@ -0,0 +1,1219 @@
4599     +/*
4600     + * Copyright (c) 2003-2007 Erez Zadok
4601     + * Copyright (c) 2003-2006 Charles P. Wright
4602     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4603     + * Copyright (c) 2005-2006 Junjiro Okajima
4604     + * Copyright (c) 2005 Arun M. Krishnakumar
4605     + * Copyright (c) 2004-2006 David P. Quigley
4606     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4607     + * Copyright (c) 2003 Puja Gupta
4608     + * Copyright (c) 2003 Harikesavan Krishnan
4609     + * Copyright (c) 2003-2007 Stony Brook University
4610     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
4611     + *
4612     + * This program is free software; you can redistribute it and/or modify
4613     + * it under the terms of the GNU General Public License version 2 as
4614     + * published by the Free Software Foundation.
4615     + */
4616     +
4617     +#include "union.h"
4618     +
4619     +static int unionfs_create(struct inode *parent, struct dentry *dentry,
4620     + int mode, struct nameidata *nd)
4621     +{
4622     + int err = 0;
4623     + struct dentry *lower_dentry = NULL;
4624     + struct dentry *wh_dentry = NULL;
4625     + struct dentry *new_lower_dentry;
4626     + struct dentry *lower_parent_dentry = NULL;
4627     + int bindex = 0, bstart;
4628     + char *name = NULL;
4629     + int valid = 0;
4630     +
4631     + unionfs_read_lock(dentry->d_sb);
4632     + unionfs_lock_dentry(dentry);
4633     +
4634     + unionfs_lock_dentry(dentry->d_parent);
4635     + valid = __unionfs_d_revalidate_chain(dentry->d_parent, nd, 0);
4636     + unionfs_unlock_dentry(dentry->d_parent);
4637     + if (!valid) {
4638     + err = -ESTALE; /* same as what real_lookup does */
4639     + goto out;
4640     + }
4641     + valid = __unionfs_d_revalidate_chain(dentry, nd, 0);
4642     + /*
4643     + * It's only a bug if this dentry was not negative and couldn't be
4644     + * revalidated (shouldn't happen).
4645     + */
4646     + BUG_ON(!valid && dentry->d_inode);
4647     +
4648     + /* We start out in the leftmost branch. */
4649     + bstart = dbstart(dentry);
4650     + lower_dentry = unionfs_lower_dentry(dentry);
4651     +
4652     + /*
4653     + * check if whiteout exists in this branch, i.e. lookup .wh.foo
4654     + * first.
4655     + */
4656     + name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
4657     + if (IS_ERR(name)) {
4658     + err = PTR_ERR(name);
4659     + goto out;
4660     + }
4661     +
4662     + wh_dentry = lookup_one_len(name, lower_dentry->d_parent,
4663     + dentry->d_name.len + UNIONFS_WHLEN);
4664     + if (IS_ERR(wh_dentry)) {
4665     + err = PTR_ERR(wh_dentry);
4666     + wh_dentry = NULL;
4667     + goto out;
4668     + }
4669     +
4670     + if (wh_dentry->d_inode) {
4671     + /*
4672     + * .wh.foo has been found.
4673     + * First truncate it and then rename it to foo (hence having
4674     + * the same overall effect as a normal create.
4675     + */
4676     + struct dentry *lower_dir_dentry;
4677     + struct iattr newattrs;
4678     +
4679     + mutex_lock(&wh_dentry->d_inode->i_mutex);
4680     + newattrs.ia_valid = ATTR_CTIME | ATTR_MODE | ATTR_ATIME
4681     + | ATTR_MTIME | ATTR_UID | ATTR_GID | ATTR_FORCE
4682     + | ATTR_KILL_SUID | ATTR_KILL_SGID;
4683     +
4684     + newattrs.ia_mode = mode & ~current->fs->umask;
4685     + newattrs.ia_uid = current->fsuid;
4686     + newattrs.ia_gid = current->fsgid;
4687     +
4688     + if (wh_dentry->d_inode->i_size != 0) {
4689     + newattrs.ia_valid |= ATTR_SIZE;
4690     + newattrs.ia_size = 0;
4691     + }
4692     +
4693     + err = notify_change(wh_dentry, &newattrs);
4694     +
4695     + mutex_unlock(&wh_dentry->d_inode->i_mutex);
4696     +
4697     + if (err)
4698     + printk(KERN_WARNING "unionfs: %s:%d: notify_change "
4699     + "failed: %d, ignoring..\n",
4700     + __FILE__, __LINE__, err);
4701     +
4702     + new_lower_dentry = unionfs_lower_dentry(dentry);
4703     + dget(new_lower_dentry);
4704     +
4705     + lower_dir_dentry = dget_parent(wh_dentry);
4706     + lock_rename(lower_dir_dentry, lower_dir_dentry);
4707     +
4708     + if (!(err = is_robranch_super(dentry->d_sb, bstart))) {
4709     + err = vfs_rename(lower_dir_dentry->d_inode,
4710     + wh_dentry,
4711     + lower_dir_dentry->d_inode,
4712     + new_lower_dentry);
4713     + }
4714     + if (!err) {
4715     + fsstack_copy_attr_times(parent,
4716     + new_lower_dentry->d_parent->
4717     + d_inode);
4718     + fsstack_copy_inode_size(parent,
4719     + new_lower_dentry->d_parent->
4720     + d_inode);
4721     + parent->i_nlink = unionfs_get_nlinks(parent);
4722     + }
4723     +
4724     + unlock_rename(lower_dir_dentry, lower_dir_dentry);
4725     + dput(lower_dir_dentry);
4726     +
4727     + dput(new_lower_dentry);
4728     +
4729     + if (err) {
4730     + /* exit if the error returned was NOT -EROFS */
4731     + if (!IS_COPYUP_ERR(err))
4732     + goto out;
4733     + /*
4734     + * We were not able to create the file in this
4735     + * branch, so, we try to create it in one branch to
4736     + * left
4737     + */
4738     + bstart--;
4739     + } else {
4740     + /*
4741     + * reset the unionfs dentry to point to the .wh.foo
4742     + * entry.
4743     + */
4744     +
4745     + /* Discard any old reference. */
4746     + dput(unionfs_lower_dentry(dentry));
4747     +
4748     + /* Trade one reference to another. */
4749     + unionfs_set_lower_dentry_idx(dentry, bstart,
4750     + wh_dentry);
4751     + wh_dentry = NULL;
4752     +
4753     + /*
4754     + * Only INTERPOSE_LOOKUP can return a value other
4755     + * than 0 on err.
4756     + */
4757     + err = PTR_ERR(unionfs_interpose(dentry,
4758     + parent->i_sb, 0));
4759     + goto out;
4760     + }
4761     + }
4762     +
4763     + for (bindex = bstart; bindex >= 0; bindex--) {
4764     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4765     + if (!lower_dentry) {
4766     + /*
4767     + * if lower_dentry is NULL, create the entire
4768     + * dentry directory structure in branch 'bindex'.
4769     + * lower_dentry will NOT be null when bindex == bstart
4770     + * because lookup passed as a negative unionfs dentry
4771     + * pointing to a lone negative underlying dentry.
4772     + */
4773     + lower_dentry = create_parents(parent, dentry,
4774     + dentry->d_name.name,
4775     + bindex);
4776     + if (!lower_dentry || IS_ERR(lower_dentry)) {
4777     + if (IS_ERR(lower_dentry))
4778     + err = PTR_ERR(lower_dentry);
4779     + continue;
4780     + }
4781     + }
4782     +
4783     + lower_parent_dentry = lock_parent(lower_dentry);
4784     + if (IS_ERR(lower_parent_dentry)) {
4785     + err = PTR_ERR(lower_parent_dentry);
4786     + goto out;
4787     + }
4788     + /* We shouldn't create things in a read-only branch. */
4789     + if (!(err = is_robranch_super(dentry->d_sb, bindex)))
4790     + err = vfs_create(lower_parent_dentry->d_inode,
4791     + lower_dentry, mode, nd);
4792     +
4793     + if (err || !lower_dentry->d_inode) {
4794     + unlock_dir(lower_parent_dentry);
4795     +
4796     + /* break out of for loop if the error wasn't -EROFS */
4797     + if (!IS_COPYUP_ERR(err))
4798     + break;
4799     + } else {
4800     + /*
4801     + * Only INTERPOSE_LOOKUP can return a value other
4802     + * than 0 on err.
4803     + */
4804     + err = PTR_ERR(unionfs_interpose(dentry,
4805     + parent->i_sb, 0));
4806     + if (!err) {
4807     + unionfs_copy_attr_times(parent);
4808     + fsstack_copy_inode_size(parent,
4809     + lower_parent_dentry->
4810     + d_inode);
4811     + /* update no. of links on parent directory */
4812     + parent->i_nlink = unionfs_get_nlinks(parent);
4813     + }
4814     + unlock_dir(lower_parent_dentry);
4815     + break;
4816     + }
4817     + }
4818     +
4819     +out:
4820     + dput(wh_dentry);
4821     + kfree(name);
4822     +
4823     + if (!err)
4824     + unionfs_inherit_mnt(dentry);
4825     + unionfs_unlock_dentry(dentry);
4826     + unionfs_read_unlock(dentry->d_sb);
4827     +
4828     + unionfs_check_inode(parent);
4829     + if (!err)
4830     + unionfs_check_dentry(dentry->d_parent);
4831     + unionfs_check_dentry(dentry);
4832     + return err;
4833     +}
4834     +
4835     +/*
4836     + * unionfs_lookup is the only special function which takes a dentry, yet we
4837     + * do NOT want to call __unionfs_d_revalidate_chain because by definition,
4838     + * we don't have a valid dentry here yet.
4839     + */
4840     +static struct dentry *unionfs_lookup(struct inode *parent,
4841     + struct dentry *dentry,
4842     + struct nameidata *nd)
4843     +{
4844     + struct path path_save;
4845     + struct dentry *ret;
4846     +
4847     + unionfs_read_lock(dentry->d_sb);
4848     +
4849     + /* save the dentry & vfsmnt from namei */
4850     + if (nd) {
4851     + path_save.dentry = nd->dentry;
4852     + path_save.mnt = nd->mnt;
4853     + }
4854     +
4855     + /* The locking is done by unionfs_lookup_backend. */
4856     + ret = unionfs_lookup_backend(dentry, nd, INTERPOSE_LOOKUP);
4857     +
4858     + /* restore the dentry & vfsmnt in namei */
4859     + if (nd) {
4860     + nd->dentry = path_save.dentry;
4861     + nd->mnt = path_save.mnt;
4862     + }
4863     + if (!IS_ERR(ret)) {
4864     + if (ret)
4865     + dentry = ret;
4866     + /* parent times may have changed */
4867     + unionfs_copy_attr_times(dentry->d_parent->d_inode);
4868     + }
4869     +
4870     + unionfs_check_inode(parent);
4871     + unionfs_check_dentry(dentry);
4872     + unionfs_check_dentry(dentry->d_parent);
4873     + unionfs_read_unlock(dentry->d_sb);
4874     +
4875     + return ret;
4876     +}
4877     +
4878     +static int unionfs_link(struct dentry *old_dentry, struct inode *dir,
4879     + struct dentry *new_dentry)
4880     +{
4881     + int err = 0;
4882     + struct dentry *lower_old_dentry = NULL;
4883     + struct dentry *lower_new_dentry = NULL;
4884     + struct dentry *lower_dir_dentry = NULL;
4885     + struct dentry *whiteout_dentry;
4886     + char *name = NULL;
4887     +
4888     + unionfs_read_lock(old_dentry->d_sb);
4889     + unionfs_double_lock_dentry(new_dentry, old_dentry);
4890     +
4891     + if (!__unionfs_d_revalidate_chain(old_dentry, NULL, 0)) {
4892     + err = -ESTALE;
4893     + goto out;
4894     + }
4895     + if (new_dentry->d_inode &&
4896     + !__unionfs_d_revalidate_chain(new_dentry, NULL, 0)) {
4897     + err = -ESTALE;
4898     + goto out;
4899     + }
4900     +
4901     + lower_new_dentry = unionfs_lower_dentry(new_dentry);
4902     +
4903     + /*
4904     + * check if whiteout exists in the branch of new dentry, i.e. lookup
4905     + * .wh.foo first. If present, delete it
4906     + */
4907     + name = alloc_whname(new_dentry->d_name.name, new_dentry->d_name.len);
4908     + if (IS_ERR(name)) {
4909     + err = PTR_ERR(name);
4910     + goto out;
4911     + }
4912     +
4913     + whiteout_dentry = lookup_one_len(name, lower_new_dentry->d_parent,
4914     + new_dentry->d_name.len +
4915     + UNIONFS_WHLEN);
4916     + if (IS_ERR(whiteout_dentry)) {
4917     + err = PTR_ERR(whiteout_dentry);
4918     + goto out;
4919     + }
4920     +
4921     + if (!whiteout_dentry->d_inode) {
4922     + dput(whiteout_dentry);
4923     + whiteout_dentry = NULL;
4924     + } else {
4925     + /* found a .wh.foo entry, unlink it and then call vfs_link() */
4926     + lower_dir_dentry = lock_parent(whiteout_dentry);
4927     + err = is_robranch_super(new_dentry->d_sb, dbstart(new_dentry));
4928     + if (!err)
4929     + err = vfs_unlink(lower_dir_dentry->d_inode,
4930     + whiteout_dentry);
4931     +
4932     + fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
4933     + dir->i_nlink = unionfs_get_nlinks(dir);
4934     + unlock_dir(lower_dir_dentry);
4935     + lower_dir_dentry = NULL;
4936     + dput(whiteout_dentry);
4937     + if (err)
4938     + goto out;
4939     + }
4940     +
4941     + if (dbstart(old_dentry) != dbstart(new_dentry)) {
4942     + lower_new_dentry = create_parents(dir, new_dentry,
4943     + new_dentry->d_name.name,
4944     + dbstart(old_dentry));
4945     + err = PTR_ERR(lower_new_dentry);
4946     + if (IS_COPYUP_ERR(err))
4947     + goto docopyup;
4948     + if (!lower_new_dentry || IS_ERR(lower_new_dentry))
4949     + goto out;
4950     + }
4951     + lower_new_dentry = unionfs_lower_dentry(new_dentry);
4952     + lower_old_dentry = unionfs_lower_dentry(old_dentry);
4953     +
4954     + BUG_ON(dbstart(old_dentry) != dbstart(new_dentry));
4955     + lower_dir_dentry = lock_parent(lower_new_dentry);
4956     + if (!(err = is_robranch(old_dentry)))
4957     + err = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
4958     + lower_new_dentry);
4959     + unlock_dir(lower_dir_dentry);
4960     +
4961     +docopyup:
4962     + if (IS_COPYUP_ERR(err)) {
4963     + int old_bstart = dbstart(old_dentry);
4964     + int bindex;
4965     +
4966     + for (bindex = old_bstart - 1; bindex >= 0; bindex--) {
4967     + err = copyup_dentry(old_dentry->d_parent->d_inode,
4968     + old_dentry, old_bstart,
4969     + bindex, old_dentry->d_name.name,
4970     + old_dentry->d_name.len, NULL,
4971     + old_dentry->d_inode->i_size);
4972     + if (!err) {
4973     + lower_new_dentry =
4974     + create_parents(dir, new_dentry,
4975     + new_dentry->d_name.name,
4976     + bindex);
4977     + lower_old_dentry =
4978     + unionfs_lower_dentry(old_dentry);
4979     + lower_dir_dentry =
4980     + lock_parent(lower_new_dentry);
4981     + /* do vfs_link */
4982     + err = vfs_link(lower_old_dentry,
4983     + lower_dir_dentry->d_inode,
4984     + lower_new_dentry);
4985     + unlock_dir(lower_dir_dentry);
4986     + goto check_link;
4987     + }
4988     + }
4989     + goto out;
4990     + }
4991     +
4992     +check_link:
4993     + if (err || !lower_new_dentry->d_inode)
4994     + goto out;
4995     +
4996     + /* Its a hard link, so use the same inode */
4997     + new_dentry->d_inode = igrab(old_dentry->d_inode);
4998     + d_instantiate(new_dentry, new_dentry->d_inode);
4999     + unionfs_copy_attr_all(dir, lower_new_dentry->d_parent->d_inode);
5000     + fsstack_copy_inode_size(dir, lower_new_dentry->d_parent->d_inode);
5001     +
5002     + /* propagate number of hard-links */
5003     + old_dentry->d_inode->i_nlink = unionfs_get_nlinks(old_dentry->d_inode);
5004     + /* new dentry's ctime may have changed due to hard-link counts */
5005     + unionfs_copy_attr_times(new_dentry->d_inode);
5006     +
5007     +out:
5008     + if (!new_dentry->d_inode)
5009     + d_drop(new_dentry);
5010     +
5011     + kfree(name);
5012     + if (!err)
5013     + unionfs_inherit_mnt(new_dentry);
5014     +
5015     + unionfs_unlock_dentry(new_dentry);
5016     + unionfs_unlock_dentry(old_dentry);
5017     +
5018     + unionfs_check_inode(dir);
5019     + unionfs_check_dentry(new_dentry);
5020     + unionfs_check_dentry(old_dentry);
5021     + unionfs_read_unlock(old_dentry->d_sb);
5022     +
5023     + return err;
5024     +}
5025     +
5026     +static int unionfs_symlink(struct inode *dir, struct dentry *dentry,
5027     + const char *symname)
5028     +{
5029     + int err = 0;
5030     + struct dentry *lower_dentry = NULL;
5031     + struct dentry *whiteout_dentry = NULL;
5032     + struct dentry *lower_dir_dentry = NULL;
5033     + umode_t mode;
5034     + int bindex = 0, bstart;
5035     + char *name = NULL;
5036     +
5037     + unionfs_read_lock(dentry->d_sb);
5038     + unionfs_lock_dentry(dentry);
5039     +
5040     + if (dentry->d_inode &&
5041     + !__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
5042     + err = -ESTALE;
5043     + goto out;
5044     + }
5045     +
5046     + /* We start out in the leftmost branch. */
5047     + bstart = dbstart(dentry);
5048     +
5049     + lower_dentry = unionfs_lower_dentry(dentry);
5050     +
5051     + /*
5052     + * check if whiteout exists in this branch, i.e. lookup .wh.foo
5053     + * first. If present, delete it
5054     + */
5055     + name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
5056     + if (IS_ERR(name)) {
5057     + err = PTR_ERR(name);
5058     + goto out;
5059     + }
5060     +
5061     + whiteout_dentry =
5062     + lookup_one_len(name, lower_dentry->d_parent,
5063     + dentry->d_name.len + UNIONFS_WHLEN);
5064     + if (IS_ERR(whiteout_dentry)) {
5065     + err = PTR_ERR(whiteout_dentry);
5066     + goto out;
5067     + }
5068     +
5069     + if (!whiteout_dentry->d_inode) {
5070     + dput(whiteout_dentry);
5071     + whiteout_dentry = NULL;
5072     + } else {
5073     + /*
5074     + * found a .wh.foo entry, unlink it and then call
5075     + * vfs_symlink().
5076     + */
5077     + lower_dir_dentry = lock_parent(whiteout_dentry);
5078     +
5079     + if (!(err = is_robranch_super(dentry->d_sb, bstart)))
5080     + err = vfs_unlink(lower_dir_dentry->d_inode,
5081     + whiteout_dentry);
5082     + dput(whiteout_dentry);
5083     +
5084     + fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
5085     + /* propagate number of hard-links */
5086     + dir->i_nlink = unionfs_get_nlinks(dir);
5087     +
5088     + unlock_dir(lower_dir_dentry);
5089     +
5090     + if (err) {
5091     + /* exit if the error returned was NOT -EROFS */
5092     + if (!IS_COPYUP_ERR(err))
5093     + goto out;
5094     + /*
5095     + * should now try to create symlink in the another
5096     + * branch.
5097     + */
5098     + bstart--;
5099     + }
5100     + }
5101     +
5102     + /*
5103     + * deleted whiteout if it was present, now do a normal vfs_symlink()
5104     + * with possible recursive directory creation
5105     + */
5106     + for (bindex = bstart; bindex >= 0; bindex--) {
5107     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
5108     + if (!lower_dentry) {
5109     + /*
5110     + * if lower_dentry is NULL, create the entire
5111     + * dentry directory structure in branch 'bindex'.
5112     + * lower_dentry will NOT be null when bindex ==
5113     + * bstart because lookup passed as a negative
5114     + * unionfs dentry pointing to a lone negative
5115     + * underlying dentry
5116     + */
5117     + lower_dentry = create_parents(dir, dentry,
5118     + dentry->d_name.name,
5119     + bindex);
5120     + if (!lower_dentry || IS_ERR(lower_dentry)) {
5121     + if (IS_ERR(lower_dentry))
5122     + err = PTR_ERR(lower_dentry);
5123     +
5124     + printk(KERN_DEBUG "unionfs: lower dentry "
5125     + "NULL (or error) for bindex = %d\n",
5126     + bindex);
5127     + continue;
5128     + }
5129     + }
5130     +
5131     + lower_dir_dentry = lock_parent(lower_dentry);
5132     +
5133     + if (!(err = is_robranch_super(dentry->d_sb, bindex))) {
5134     + mode = S_IALLUGO;
5135     + err =
5136     + vfs_symlink(lower_dir_dentry->d_inode,
5137     + lower_dentry, symname, mode);
5138     + }
5139     + unlock_dir(lower_dir_dentry);
5140     +
5141     + if (err || !lower_dentry->d_inode) {
5142     + /*
5143     + * break out of for loop if error returned was NOT
5144     + * -EROFS.
5145     + */
5146     + if (!IS_COPYUP_ERR(err))
5147     + break;
5148     + } else {
5149     + /*
5150     + * Only INTERPOSE_LOOKUP can return a value other
5151     + * than 0 on err.
5152     + */
5153     + err = PTR_ERR(unionfs_interpose(dentry,
5154     + dir->i_sb, 0));
5155     + if (!err) {
5156     + fsstack_copy_attr_times(dir,
5157     + lower_dir_dentry->
5158     + d_inode);
5159     + fsstack_copy_inode_size(dir,
5160     + lower_dir_dentry->
5161     + d_inode);
5162     + /*
5163     + * update number of links on parent
5164     + * directory.
5165     + */
5166     + dir->i_nlink = unionfs_get_nlinks(dir);
5167     + }
5168     + break;
5169     + }
5170     + }
5171     +
5172     +out:
5173     + if (!dentry->d_inode)
5174     + d_drop(dentry);
5175     +
5176     + kfree(name);
5177     + if (!err)
5178     + unionfs_inherit_mnt(dentry);
5179     + unionfs_unlock_dentry(dentry);
5180     +
5181     + unionfs_check_inode(dir);
5182     + unionfs_check_dentry(dentry);
5183     + unionfs_read_unlock(dentry->d_sb);
5184     +
5185     + return err;
5186     +}
5187     +
5188     +static int unionfs_mkdir(struct inode *parent, struct dentry *dentry, int mode)
5189     +{
5190     + int err = 0;
5191     + struct dentry *lower_dentry = NULL, *whiteout_dentry = NULL;
5192     + struct dentry *lower_parent_dentry = NULL;
5193     + int bindex = 0, bstart;
5194     + char *name = NULL;
5195     + int whiteout_unlinked = 0;
5196     + struct sioq_args args;
5197     +
5198     + unionfs_read_lock(dentry->d_sb);
5199     + unionfs_lock_dentry(dentry);
5200     +
5201     + if (dentry->d_inode &&
5202     + !__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
5203     + err = -ESTALE;
5204     + goto out;
5205     + }
5206     +
5207     + bstart = dbstart(dentry);
5208     +
5209     + lower_dentry = unionfs_lower_dentry(dentry);
5210     +
5211     + /*
5212     + * check if whiteout exists in this branch, i.e. lookup .wh.foo
5213     + * first.
5214     + */
5215     + name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
5216     + if (IS_ERR(name)) {
5217     + err = PTR_ERR(name);
5218     + goto out;
5219     + }
5220     +
5221     + whiteout_dentry = lookup_one_len(name, lower_dentry->d_parent,
5222     + dentry->d_name.len + UNIONFS_WHLEN);
5223     + if (IS_ERR(whiteout_dentry)) {
5224     + err = PTR_ERR(whiteout_dentry);
5225     + goto out;
5226     + }
5227     +
5228     + if (!whiteout_dentry->d_inode) {
5229     + dput(whiteout_dentry);
5230     + whiteout_dentry = NULL;
5231     + } else {
5232     + lower_parent_dentry = lock_parent(whiteout_dentry);
5233     +
5234     + /* found a.wh.foo entry, remove it then do vfs_mkdir */
5235     + if (!(err = is_robranch_super(dentry->d_sb, bstart))) {
5236     + args.unlink.parent = lower_parent_dentry->d_inode;
5237     + args.unlink.dentry = whiteout_dentry;
5238     + run_sioq(__unionfs_unlink, &args);
5239     + err = args.err;
5240     + }
5241     + dput(whiteout_dentry);
5242     +
5243     + unlock_dir(lower_parent_dentry);
5244     +
5245     + if (err) {
5246     + /* exit if the error returned was NOT -EROFS */
5247     + if (!IS_COPYUP_ERR(err))
5248     + goto out;
5249     + bstart--;
5250     + } else
5251     + whiteout_unlinked = 1;
5252     + }
5253     +
5254     + for (bindex = bstart; bindex >= 0; bindex--) {
5255     + int i;
5256     + int bend = dbend(dentry);
5257     +
5258     + if (is_robranch_super(dentry->d_sb, bindex))
5259     + continue;
5260     +
5261     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
5262     + if (!lower_dentry) {
5263     + lower_dentry = create_parents(parent, dentry,
5264     + dentry->d_name.name,
5265     + bindex);
5266     + if (!lower_dentry || IS_ERR(lower_dentry)) {
5267     + printk(KERN_DEBUG "unionfs: lower dentry "
5268     + " NULL for bindex = %d\n", bindex);
5269     + continue;
5270     + }
5271     + }
5272     +
5273     + lower_parent_dentry = lock_parent(lower_dentry);
5274     +
5275     + if (IS_ERR(lower_parent_dentry)) {
5276     + err = PTR_ERR(lower_parent_dentry);
5277     + goto out;
5278     + }
5279     +
5280     + err = vfs_mkdir(lower_parent_dentry->d_inode, lower_dentry,
5281     + mode);
5282     +
5283     + unlock_dir(lower_parent_dentry);
5284     +
5285     + /* did the mkdir succeed? */
5286     + if (err)
5287     + break;
5288     +
5289     + for (i = bindex + 1; i < bend; i++) {
5290     + if (unionfs_lower_dentry_idx(dentry, i)) {
5291     + dput(unionfs_lower_dentry_idx(dentry, i));
5292     + unionfs_set_lower_dentry_idx(dentry, i, NULL);
5293     + }
5294     + }
5295     + set_dbend(dentry, bindex);
5296     +
5297     + /*
5298     + * Only INTERPOSE_LOOKUP can return a value other than 0 on
5299     + * err.
5300     + */
5301     + err = PTR_ERR(unionfs_interpose(dentry, parent->i_sb, 0));
5302     + if (!err) {
5303     + unionfs_copy_attr_times(parent);
5304     + fsstack_copy_inode_size(parent,
5305     + lower_parent_dentry->d_inode);
5306     +
5307     + /* update number of links on parent directory */
5308     + parent->i_nlink = unionfs_get_nlinks(parent);
5309     + }
5310     +
5311     + err = make_dir_opaque(dentry, dbstart(dentry));
5312     + if (err) {
5313     + printk(KERN_ERR "unionfs: mkdir: error creating "
5314     + ".wh.__dir_opaque: %d\n", err);
5315     + goto out;
5316     + }
5317     +
5318     + /* we are done! */
5319     + break;
5320     + }
5321     +
5322     +out:
5323     + if (!dentry->d_inode)
5324     + d_drop(dentry);
5325     +
5326     + kfree(name);
5327     +
5328     + if (!err)
5329     + unionfs_copy_attr_times(dentry->d_inode);
5330     + unionfs_unlock_dentry(dentry);
5331     + unionfs_check_inode(parent);
5332     + unionfs_check_dentry(dentry);
5333     + unionfs_read_unlock(dentry->d_sb);
5334     +
5335     + return err;
5336     +}
5337     +
5338     +static int unionfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
5339     + dev_t dev)
5340     +{
5341     + int err = 0;
5342     + struct dentry *lower_dentry = NULL, *whiteout_dentry = NULL;
5343     + struct dentry *lower_parent_dentry = NULL;
5344     + int bindex = 0, bstart;
5345     + char *name = NULL;
5346     + int whiteout_unlinked = 0;
5347     +
5348     + unionfs_read_lock(dentry->d_sb);
5349     + unionfs_lock_dentry(dentry);
5350     +
5351     + if (dentry->d_inode &&
5352     + !__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
5353     + err = -ESTALE;
5354     + goto out;
5355     + }
5356     +
5357     + bstart = dbstart(dentry);
5358     +
5359     + lower_dentry = unionfs_lower_dentry(dentry);
5360     +
5361     + /*
5362     + * check if whiteout exists in this branch, i.e. lookup .wh.foo
5363     + * first.
5364     + */
5365     + name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
5366     + if (IS_ERR(name)) {
5367     + err = PTR_ERR(name);
5368     + goto out;
5369     + }
5370     +
5371     + whiteout_dentry = lookup_one_len(name, lower_dentry->d_parent,
5372     + dentry->d_name.len + UNIONFS_WHLEN);
5373     + if (IS_ERR(whiteout_dentry)) {
5374     + err = PTR_ERR(whiteout_dentry);
5375     + goto out;
5376     + }
5377     +
5378     + if (!whiteout_dentry->d_inode) {
5379     + dput(whiteout_dentry);
5380     + whiteout_dentry = NULL;
5381     + } else {
5382     + /* found .wh.foo, unlink it */
5383     + lower_parent_dentry = lock_parent(whiteout_dentry);
5384     +
5385     + /* found a.wh.foo entry, remove it then do vfs_mkdir */
5386     + if (!(err = is_robranch_super(dentry->d_sb, bstart)))
5387     + err = vfs_unlink(lower_parent_dentry->d_inode,
5388     + whiteout_dentry);
5389     + dput(whiteout_dentry);
5390     +
5391     + unlock_dir(lower_parent_dentry);
5392     +
5393     + if (err) {
5394     + if (!IS_COPYUP_ERR(err))
5395     + goto out;
5396     + bstart--;
5397     + } else
5398     + whiteout_unlinked = 1;
5399     + }
5400     +
5401     + for (bindex = bstart; bindex >= 0; bindex--) {
5402     + if (is_robranch_super(dentry->d_sb, bindex))
5403     + continue;
5404     +
5405     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
5406     + if (!lower_dentry) {
5407     + lower_dentry = create_parents(dir, dentry,
5408     + dentry->d_name.name,
5409     + bindex);
5410     + if (IS_ERR(lower_dentry)) {
5411     + printk(KERN_DEBUG "unionfs: failed to create "
5412     + "parents on %d, err = %ld\n",
5413     + bindex, PTR_ERR(lower_dentry));
5414     + continue;
5415     + }
5416     + }
5417     +
5418     + lower_parent_dentry = lock_parent(lower_dentry);
5419     + if (IS_ERR(lower_parent_dentry)) {
5420     + err = PTR_ERR(lower_parent_dentry);
5421     + goto out;
5422     + }
5423     +
5424     + err = vfs_mknod(lower_parent_dentry->d_inode,
5425     + lower_dentry, mode, dev);
5426     +
5427     + if (err) {
5428     + unlock_dir(lower_parent_dentry);
5429     + break;
5430     + }
5431     +
5432     + /*
5433     + * Only INTERPOSE_LOOKUP can return a value other than 0 on
5434     + * err.
5435     + */
5436     + err = PTR_ERR(unionfs_interpose(dentry, dir->i_sb, 0));
5437     + if (!err) {
5438     + fsstack_copy_attr_times(dir,
5439     + lower_parent_dentry->d_inode);
5440     + fsstack_copy_inode_size(dir,
5441     + lower_parent_dentry->d_inode);
5442     + /* update number of links on parent directory */
5443     + dir->i_nlink = unionfs_get_nlinks(dir);
5444     + }
5445     + unlock_dir(lower_parent_dentry);
5446     +
5447     + break;
5448     + }
5449     +
5450     +out:
5451     + if (!dentry->d_inode)
5452     + d_drop(dentry);
5453     +
5454     + kfree(name);
5455     +
5456     + if (!err)
5457     + unionfs_inherit_mnt(dentry);
5458     + unionfs_unlock_dentry(dentry);
5459     +
5460     + unionfs_check_inode(dir);
5461     + unionfs_check_dentry(dentry);
5462     + unionfs_read_unlock(dentry->d_sb);
5463     +
5464     + return err;
5465     +}
5466     +
5467     +static int unionfs_readlink(struct dentry *dentry, char __user *buf,
5468     + int bufsiz)
5469     +{
5470     + int err;
5471     + struct dentry *lower_dentry;
5472     +
5473     + unionfs_read_lock(dentry->d_sb);
5474     + unionfs_lock_dentry(dentry);
5475     +
5476     + if (!__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
5477     + err = -ESTALE;
5478     + goto out;
5479     + }
5480     +
5481     + lower_dentry = unionfs_lower_dentry(dentry);
5482     +
5483     + if (!lower_dentry->d_inode->i_op ||
5484     + !lower_dentry->d_inode->i_op->readlink) {
5485     + err = -EINVAL;
5486     + goto out;
5487     + }
5488     +
5489     + err = lower_dentry->d_inode->i_op->readlink(lower_dentry,
5490     + buf, bufsiz);
5491     + if (err > 0)
5492     + fsstack_copy_attr_atime(dentry->d_inode,
5493     + lower_dentry->d_inode);
5494     +
5495     +out:
5496     + unionfs_unlock_dentry(dentry);
5497     + unionfs_check_dentry(dentry);
5498     + unionfs_read_unlock(dentry->d_sb);
5499     +
5500     + return err;
5501     +}
5502     +
5503     +/*
5504     + * unionfs_follow_link takes a dentry, but it is simple. It only needs to
5505     + * allocate some memory and then call our ->readlink method. Our
5506     + * unionfs_readlink *does* lock our dentry and revalidate the dentry.
5507     + * Therefore, we do not have to lock our dentry here, to prevent a deadlock;
5508     + * nor do we need to revalidate it either. It is safe to not lock our
5509     + * dentry here, nor revalidate it, because unionfs_follow_link does not do
5510     + * anything (prior to calling ->readlink) which could become inconsistent
5511     + * due to branch management.
5512     + */
5513     +static void *unionfs_follow_link(struct dentry *dentry, struct nameidata *nd)
5514     +{
5515     + char *buf;
5516     + int len = PAGE_SIZE, err;
5517     + mm_segment_t old_fs;
5518     +
5519     + unionfs_read_lock(dentry->d_sb);
5520     +
5521     + /* This is freed by the put_link method assuming a successful call. */
5522     + buf = kmalloc(len, GFP_KERNEL);
5523     + if (!buf) {
5524     + err = -ENOMEM;
5525     + goto out;
5526     + }
5527     +
5528     + /* read the symlink, and then we will follow it */
5529     + old_fs = get_fs();
5530     + set_fs(KERNEL_DS);
5531     + err = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len);
5532     + set_fs(old_fs);
5533     + if (err < 0) {
5534     + kfree(buf);
5535     + buf = NULL;
5536     + goto out;
5537     + }
5538     + buf[err] = 0;
5539     + nd_set_link(nd, buf);
5540     + err = 0;
5541     +
5542     +out:
5543     + unionfs_check_dentry(dentry);
5544     + unionfs_read_unlock(dentry->d_sb);
5545     + return ERR_PTR(err);
5546     +}
5547     +
5548     +/* FIXME: We may not have to lock here */
5549     +static void unionfs_put_link(struct dentry *dentry, struct nameidata *nd,
5550     + void *cookie)
5551     +{
5552     + unionfs_read_lock(dentry->d_sb);
5553     +
5554     + unionfs_lock_dentry(dentry);
5555     + if (!__unionfs_d_revalidate_chain(dentry, nd, 0))
5556     + printk("unionfs: put_link failed to revalidate dentry\n");
5557     + unionfs_unlock_dentry(dentry);
5558     +
5559     + unionfs_check_dentry(dentry);
5560     + kfree(nd_get_link(nd));
5561     + unionfs_read_unlock(dentry->d_sb);
5562     +}
5563     +
5564     +/*
5565     + * Basically copied from the kernel vfs permission(), but we've changed
5566     + * the following:
5567     + * (1) the IS_RDONLY check is skipped, and
5568     + * (2) We return 0 (success) if the non-leftmost branch is mounted
5569     + * readonly, to allow copyup to work.
5570     + * (3) we do call security_inode_permission, and therefore security inside
5571     + * SELinux, etc. are performed.
5572     + */
5573     +static int inode_permission(struct super_block *sb, struct inode *inode, int mask,
5574     + struct nameidata *nd, int bindex)
5575     +{
5576     + int retval, submask;
5577     +
5578     + if (mask & MAY_WRITE) {
5579     + umode_t mode = inode->i_mode;
5580     + /* The first branch is allowed to be really readonly. */
5581     + if (bindex == 0 &&
5582     + IS_RDONLY(inode) &&
5583     + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
5584     + return -EROFS;
5585     + /*
5586     + * Nobody gets write access to an immutable file.
5587     + */
5588     + if (IS_IMMUTABLE(inode))
5589     + return -EACCES;
5590     + /*
5591     + * For all other branches than the first one, we ignore
5592     + * EROFS or if the branch is mounted as readonly, to let
5593     + * copyup take place.
5594     + */
5595     + if (bindex > 0 &&
5596     + is_robranch_super(sb, bindex) &&
5597     + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
5598     + return 0;
5599     + }
5600     +
5601     + /* Ordinary permission routines do not understand MAY_APPEND. */
5602     + submask = mask & ~MAY_APPEND;
5603     + if (inode->i_op && inode->i_op->permission)
5604     + retval = inode->i_op->permission(inode, submask, nd);
5605     + else
5606     + retval = generic_permission(inode, submask, NULL);
5607     +
5608     + if (retval && retval != -EROFS) /* ignore EROFS */
5609     + return retval;
5610     +
5611     + retval = security_inode_permission(inode, mask, nd);
5612     + return ((retval == -EROFS) ? 0 : retval); /* ignore EROFS */
5613     +}
5614     +
5615     +/*
5616     + * Don't grab the superblock read-lock in unionfs_permission, which prevents
5617     + * a deadlock with the branch-management "add branch" code (which grabbed
5618     + * the write lock). It is safe to not grab the read lock here, because even
5619     + * with branch management taking place, there is no chance that
5620     + * unionfs_permission, or anything it calls, will use stale branch
5621     + * information.
5622     + */
5623     +static int unionfs_permission(struct inode *inode, int mask,
5624     + struct nameidata *nd)
5625     +{
5626     + struct inode *lower_inode = NULL;
5627     + int err = 0;
5628     + int bindex, bstart, bend;
5629     + const int is_file = !S_ISDIR(inode->i_mode);
5630     + const int write_mask = (mask & MAY_WRITE) && !(mask & MAY_READ);
5631     +
5632     + bstart = ibstart(inode);
5633     + bend = ibend(inode);
5634     + if (bstart < 0 || bend < 0) {
5635     + /*
5636     + * With branch-management, we can get a stale inode here.
5637     + * If so, we return ESTALE back to link_path_walk, which
5638     + * would discard the dcache entry and re-lookup the
5639     + * dentry+inode. This should be equivalent to issuing
5640     + * __unionfs_d_revalidate_chain on nd.dentry here.
5641     + */
5642     + err = -ESTALE; /* force revalidate */
5643     + goto out;
5644     + }
5645     +
5646     + for (bindex = bstart; bindex <= bend; bindex++) {
5647     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
5648     + if (!lower_inode)
5649     + continue;
5650     +
5651     + /*
5652     + * check the condition for D-F-D underlying files/directories,
5653     + * we don't have to check for files, if we are checking for
5654     + * directories.
5655     + */
5656     + if (!is_file && !S_ISDIR(lower_inode->i_mode))
5657     + continue;
5658     +
5659     + /*
5660     + * We use our own special version of permission, such that
5661     + * only the first branch returns -EROFS.
5662     + */
5663     + err = inode_permission(inode->i_sb, lower_inode, mask, nd, bindex);
5664     +
5665     + /*
5666     + * The permissions are an intersection of the overall directory
5667     + * permissions, so we fail if one fails.
5668     + */
5669     + if (err)
5670     + goto out;
5671     +
5672     + /* only the leftmost file matters. */
5673     + if (is_file || write_mask) {
5674     + if (is_file && write_mask) {
5675     + err = get_write_access(lower_inode);
5676     + if (!err)
5677     + put_write_access(lower_inode);
5678     + }
5679     + break;
5680     + }
5681     + }
5682     + /* sync times which may have changed (asynchronously) below */
5683     + unionfs_copy_attr_times(inode);
5684     +
5685     +out:
5686     + unionfs_check_inode(inode);
5687     + return err;
5688     +}
5689     +
5690     +static int unionfs_setattr(struct dentry *dentry, struct iattr *ia)
5691     +{
5692     + int err = 0;
5693     + struct dentry *lower_dentry;
5694     + struct inode *inode = NULL;
5695     + struct inode *lower_inode = NULL;
5696     + int bstart, bend, bindex;
5697     + int i;
5698     + int copyup = 0;
5699     +
5700     + unionfs_read_lock(dentry->d_sb);
5701     + unionfs_lock_dentry(dentry);
5702     +
5703     + if (!__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
5704     + err = -ESTALE;
5705     + goto out;
5706     + }
5707     +
5708     + bstart = dbstart(dentry);
5709     + bend = dbend(dentry);
5710     + inode = dentry->d_inode;
5711     +
5712     + for (bindex = bstart; (bindex <= bend) || (bindex == bstart);
5713     + bindex++) {
5714     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
5715     + if (!lower_dentry)
5716     + continue;
5717     + BUG_ON(lower_dentry->d_inode == NULL);
5718     +
5719     + /* If the file is on a read only branch */
5720     + if (is_robranch_super(dentry->d_sb, bindex)
5721     + || IS_RDONLY(lower_dentry->d_inode)) {
5722     + if (copyup || (bindex != bstart))
5723     + continue;
5724     + /* Only if its the leftmost file, copyup the file */
5725     + for (i = bstart - 1; i >= 0; i--) {
5726     + loff_t size = dentry->d_inode->i_size;
5727     + if (ia->ia_valid & ATTR_SIZE)
5728     + size = ia->ia_size;
5729     + err = copyup_dentry(dentry->d_parent->d_inode,
5730     + dentry, bstart, i,
5731     + dentry->d_name.name,
5732     + dentry->d_name.len,
5733     + NULL, size);
5734     +
5735     + if (!err) {
5736     + copyup = 1;
5737     + lower_dentry =
5738     + unionfs_lower_dentry(dentry);
5739     + break;
5740     + }
5741     + /*
5742     + * if error is in the leftmost branch, pass
5743     + * it up.
5744     + */
5745     + if (i == 0)
5746     + goto out;
5747     + }
5748     +
5749     + }
5750     + err = notify_change(lower_dentry, ia);
5751     + if (err)
5752     + goto out;
5753     + break;
5754     + }
5755     +
5756     + /* for mmap */
5757     + if (ia->ia_valid & ATTR_SIZE) {
5758     + if (ia->ia_size != i_size_read(inode)) {
5759     + err = vmtruncate(inode, ia->ia_size);
5760     + if (err)
5761     + printk("unionfs_setattr: vmtruncate failed\n");
5762     + }
5763     + }
5764     +
5765     + /* get the size from the first lower inode */
5766     + lower_inode = unionfs_lower_inode(inode);
5767     + unionfs_copy_attr_all(inode, lower_inode);
5768     + fsstack_copy_inode_size(inode, lower_inode);
5769     + /* if setattr succeeded, then parent dir may have changed */
5770     + unionfs_copy_attr_times(dentry->d_parent->d_inode);
5771     +out:
5772     + unionfs_unlock_dentry(dentry);
5773     + unionfs_check_dentry(dentry);
5774     + unionfs_check_dentry(dentry->d_parent);
5775     + unionfs_read_unlock(dentry->d_sb);
5776     +
5777     + return err;
5778     +}
5779     +
5780     +struct inode_operations unionfs_symlink_iops = {
5781     + .readlink = unionfs_readlink,
5782     + .permission = unionfs_permission,
5783     + .follow_link = unionfs_follow_link,
5784     + .setattr = unionfs_setattr,
5785     + .put_link = unionfs_put_link,
5786     +};
5787     +
5788     +struct inode_operations unionfs_dir_iops = {
5789     + .create = unionfs_create,
5790     + .lookup = unionfs_lookup,
5791     + .link = unionfs_link,
5792     + .unlink = unionfs_unlink,
5793     + .symlink = unionfs_symlink,
5794     + .mkdir = unionfs_mkdir,
5795     + .rmdir = unionfs_rmdir,
5796     + .mknod = unionfs_mknod,
5797     + .rename = unionfs_rename,
5798     + .permission = unionfs_permission,
5799     + .setattr = unionfs_setattr,
5800     +#ifdef CONFIG_UNION_FS_XATTR
5801     + .setxattr = unionfs_setxattr,
5802     + .getxattr = unionfs_getxattr,
5803     + .removexattr = unionfs_removexattr,
5804     + .listxattr = unionfs_listxattr,
5805     +#endif /* CONFIG_UNION_FS_XATTR */
5806     +};
5807     +
5808     +struct inode_operations unionfs_main_iops = {
5809     + .permission = unionfs_permission,
5810     + .setattr = unionfs_setattr,
5811     +#ifdef CONFIG_UNION_FS_XATTR
5812     + .setxattr = unionfs_setxattr,
5813     + .getxattr = unionfs_getxattr,
5814     + .removexattr = unionfs_removexattr,
5815     + .listxattr = unionfs_listxattr,
5816     +#endif /* CONFIG_UNION_FS_XATTR */
5817     +};
5818     diff --git a/fs/unionfs/lookup.c b/fs/unionfs/lookup.c
5819     new file mode 100644
5820     index 0000000..da991b3
5821     --- /dev/null
5822     +++ b/fs/unionfs/lookup.c
5823     @@ -0,0 +1,577 @@
5824     +/*
5825     + * Copyright (c) 2003-2007 Erez Zadok
5826     + * Copyright (c) 2003-2006 Charles P. Wright
5827     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
5828     + * Copyright (c) 2005-2006 Junjiro Okajima
5829     + * Copyright (c) 2005 Arun M. Krishnakumar
5830     + * Copyright (c) 2004-2006 David P. Quigley
5831     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
5832     + * Copyright (c) 2003 Puja Gupta
5833     + * Copyright (c) 2003 Harikesavan Krishnan
5834     + * Copyright (c) 2003-2007 Stony Brook University
5835     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
5836     + *
5837     + * This program is free software; you can redistribute it and/or modify
5838     + * it under the terms of the GNU General Public License version 2 as
5839     + * published by the Free Software Foundation.
5840     + */
5841     +
5842     +#include "union.h"
5843     +
5844     +static int realloc_dentry_private_data(struct dentry *dentry);
5845     +
5846     +/* is the filename valid == !(whiteout for a file or opaque dir marker) */
5847     +static int is_validname(const char *name)
5848     +{
5849     + if (!strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN))
5850     + return 0;
5851     + if (!strncmp(name, UNIONFS_DIR_OPAQUE_NAME,
5852     + sizeof(UNIONFS_DIR_OPAQUE_NAME) - 1))
5853     + return 0;
5854     + return 1;
5855     +}
5856     +
5857     +/* The rest of these are utility functions for lookup. */
5858     +static noinline int is_opaque_dir(struct dentry *dentry, int bindex)
5859     +{
5860     + int err = 0;
5861     + struct dentry *lower_dentry;
5862     + struct dentry *wh_lower_dentry;
5863     + struct inode *lower_inode;
5864     + struct sioq_args args;
5865     +
5866     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
5867     + lower_inode = lower_dentry->d_inode;
5868     +
5869     + BUG_ON(!S_ISDIR(lower_inode->i_mode));
5870     +
5871     + mutex_lock(&lower_inode->i_mutex);
5872     +
5873     + if (!permission(lower_inode, MAY_EXEC, NULL))
5874     + wh_lower_dentry =
5875     + lookup_one_len(UNIONFS_DIR_OPAQUE, lower_dentry,
5876     + sizeof(UNIONFS_DIR_OPAQUE) - 1);
5877     + else {
5878     + args.is_opaque.dentry = lower_dentry;
5879     + run_sioq(__is_opaque_dir, &args);
5880     + wh_lower_dentry = args.ret;
5881     + }
5882     +
5883     + mutex_unlock(&lower_inode->i_mutex);
5884     +
5885     + if (IS_ERR(wh_lower_dentry)) {
5886     + err = PTR_ERR(wh_lower_dentry);
5887     + goto out;
5888     + }
5889     +
5890     + /* This is an opaque dir iff wh_lower_dentry is positive */
5891     + err = !!wh_lower_dentry->d_inode;
5892     +
5893     + dput(wh_lower_dentry);
5894     +out:
5895     + return err;
5896     +}
5897     +
5898     +/*
5899     + * Main (and complex) driver function for Unionfs's lookup
5900     + *
5901     + * Returns: NULL (ok), ERR_PTR if an error occurred, or a non-null non-error
5902     + * PTR if d_splice returned a different dentry.
5903     + */
5904     +struct dentry *unionfs_lookup_backend(struct dentry *dentry,
5905     + struct nameidata *nd, int lookupmode)
5906     +{
5907     + int err = 0;
5908     + struct dentry *lower_dentry = NULL;
5909     + struct dentry *wh_lower_dentry = NULL;
5910     + struct dentry *lower_dir_dentry = NULL;
5911     + struct dentry *parent_dentry = NULL;
5912     + struct dentry *d_interposed = NULL;
5913     + int bindex, bstart, bend, bopaque;
5914     + int dentry_count = 0; /* Number of positive dentries. */
5915     + int first_dentry_offset = -1; /* -1 is uninitialized */
5916     + struct dentry *first_dentry = NULL;
5917     + struct dentry *first_lower_dentry = NULL;
5918     + struct vfsmount *first_lower_mnt = NULL;
5919     + int locked_parent = 0;
5920     + int locked_child = 0;
5921     + int allocated_new_info = 0;
5922     + int opaque;
5923     + char *whname = NULL;
5924     + const char *name;
5925     + int namelen;
5926     +
5927     + /*
5928     + * We should already have a lock on this dentry in the case of a
5929     + * partial lookup, or a revalidation. Otherwise it is returned from
5930     + * new_dentry_private_data already locked.
5931     + */
5932     + if (lookupmode == INTERPOSE_PARTIAL || lookupmode == INTERPOSE_REVAL ||
5933     + lookupmode == INTERPOSE_REVAL_NEG)
5934     + verify_locked(dentry);
5935     + else {
5936     + BUG_ON(UNIONFS_D(dentry) != NULL);
5937     + locked_child = 1;
5938     + }
5939     +
5940     + switch(lookupmode) {
5941     + case INTERPOSE_PARTIAL:
5942     + break;
5943     + case INTERPOSE_LOOKUP:
5944     + if ((err = new_dentry_private_data(dentry)))
5945     + goto out;
5946     + allocated_new_info = 1;
5947     + break;
5948     + default:
5949     + if ((err = realloc_dentry_private_data(dentry)))
5950     + goto out;
5951     + allocated_new_info = 1;
5952     + break;
5953     + }
5954     +
5955     + /* must initialize dentry operations */
5956     + dentry->d_op = &unionfs_dops;
5957     +
5958     + parent_dentry = dget_parent(dentry);
5959     + /* We never partial lookup the root directory. */
5960     + if (parent_dentry != dentry) {
5961     + unionfs_lock_dentry(parent_dentry);
5962     + locked_parent = 1;
5963     + } else {
5964     + dput(parent_dentry);
5965     + parent_dentry = NULL;
5966     + goto out;
5967     + }
5968     +
5969     + name = dentry->d_name.name;
5970     + namelen = dentry->d_name.len;
5971     +
5972     + /* No dentries should get created for possible whiteout names. */
5973     + if (!is_validname(name)) {
5974     + err = -EPERM;
5975     + goto out_free;
5976     + }
5977     +
5978     + /* Now start the actual lookup procedure. */
5979     + bstart = dbstart(parent_dentry);
5980     + bend = dbend(parent_dentry);
5981     + bopaque = dbopaque(parent_dentry);
5982     + BUG_ON(bstart < 0);
5983     +
5984     + /*
5985     + * It would be ideal if we could convert partial lookups to only have
5986     + * to do this work when they really need to. It could probably improve
5987     + * performance quite a bit, and maybe simplify the rest of the code.
5988     + */
5989     + if (lookupmode == INTERPOSE_PARTIAL) {
5990     + bstart++;
5991     + if ((bopaque != -1) && (bopaque < bend))
5992     + bend = bopaque;
5993     + }
5994     +
5995     + for (bindex = bstart; bindex <= bend; bindex++) {
5996     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
5997     + if (lookupmode == INTERPOSE_PARTIAL && lower_dentry)
5998     + continue;
5999     + BUG_ON(lower_dentry != NULL);
6000     +
6001     + lower_dir_dentry =
6002     + unionfs_lower_dentry_idx(parent_dentry, bindex);
6003     +
6004     + /* if the parent lower dentry does not exist skip this */
6005     + if (!(lower_dir_dentry && lower_dir_dentry->d_inode))
6006     + continue;
6007     +
6008     + /* also skip it if the parent isn't a directory. */
6009     + if (!S_ISDIR(lower_dir_dentry->d_inode->i_mode))
6010     + continue;
6011     +
6012     + /* Reuse the whiteout name because its value doesn't change. */
6013     + if (!whname) {
6014     + whname = alloc_whname(name, namelen);
6015     + if (IS_ERR(whname)) {
6016     + err = PTR_ERR(whname);
6017     + goto out_free;
6018     + }
6019     + }
6020     +
6021     + /* check if whiteout exists in this branch: lookup .wh.foo */
6022     + wh_lower_dentry = lookup_one_len(whname, lower_dir_dentry,
6023     + namelen + UNIONFS_WHLEN);
6024     + if (IS_ERR(wh_lower_dentry)) {
6025     + dput(first_lower_dentry);
6026     + unionfs_mntput(first_dentry, first_dentry_offset);
6027     + err = PTR_ERR(wh_lower_dentry);
6028     + goto out_free;
6029     + }
6030     +
6031     + if (wh_lower_dentry->d_inode) {
6032     + /* We found a whiteout so lets give up. */
6033     + if (S_ISREG(wh_lower_dentry->d_inode->i_mode)) {
6034     + set_dbend(dentry, bindex);
6035     + set_dbopaque(dentry, bindex);
6036     + dput(wh_lower_dentry);
6037     + break;
6038     + }
6039     + err = -EIO;
6040     + printk(KERN_NOTICE "unionfs: EIO: invalid whiteout "
6041     + "entry type %d.\n",
6042     + wh_lower_dentry->d_inode->i_mode);
6043     + dput(wh_lower_dentry);
6044     + dput(first_lower_dentry);
6045     + unionfs_mntput(first_dentry, first_dentry_offset);
6046     + goto out_free;
6047     + }
6048     +
6049     + dput(wh_lower_dentry);
6050     + wh_lower_dentry = NULL;
6051     +
6052     + /* Now do regular lookup; lookup foo */
6053     + nd->dentry = unionfs_lower_dentry_idx(dentry, bindex);
6054     + /* FIXME: fix following line for mount point crossing */
6055     + nd->mnt = unionfs_lower_mnt_idx(parent_dentry, bindex);
6056     +
6057     + lower_dentry = lookup_one_len_nd(name, lower_dir_dentry,
6058     + namelen, nd);
6059     + if (IS_ERR(lower_dentry)) {
6060     + dput(first_lower_dentry);
6061     + unionfs_mntput(first_dentry, first_dentry_offset);
6062     + err = PTR_ERR(lower_dentry);
6063     + goto out_free;
6064     + }
6065     +
6066     + /*
6067     + * Store the first negative dentry specially, because if they
6068     + * are all negative we need this for future creates.
6069     + */
6070     + if (!lower_dentry->d_inode) {
6071     + if (!first_lower_dentry && (dbstart(dentry) == -1)) {
6072     + first_lower_dentry = lower_dentry;
6073     + /*
6074     + * FIXME: following line needs to be changed
6075     + * to allow mount-point crossing
6076     + */
6077     + first_dentry = parent_dentry;
6078     + first_lower_mnt =
6079     + unionfs_mntget(parent_dentry, bindex);
6080     + first_dentry_offset = bindex;
6081     + } else
6082     + dput(lower_dentry);
6083     +
6084     + continue;
6085     + }
6086     +
6087     + /* number of positive dentries */
6088     + dentry_count++;
6089     +
6090     + /* store underlying dentry */
6091     + if (dbstart(dentry) == -1)
6092     + set_dbstart(dentry, bindex);
6093     + unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
6094     + /*
6095     + * FIXME: the following line needs to get fixed to allow
6096     + * mount-point crossing
6097     + */
6098     + unionfs_set_lower_mnt_idx(dentry, bindex,
6099     + unionfs_mntget(parent_dentry,
6100     + bindex));
6101     + set_dbend(dentry, bindex);
6102     +
6103     + /* update parent directory's atime with the bindex */
6104     + fsstack_copy_attr_atime(parent_dentry->d_inode,
6105     + lower_dir_dentry->d_inode);
6106     +
6107     + /* We terminate file lookups here. */
6108     + if (!S_ISDIR(lower_dentry->d_inode->i_mode)) {
6109     + if (lookupmode == INTERPOSE_PARTIAL)
6110     + continue;
6111     + if (dentry_count == 1)
6112     + goto out_positive;
6113     + /* This can only happen with mixed D-*-F-* */
6114     + BUG_ON(!S_ISDIR(unionfs_lower_dentry(dentry)->
6115     + d_inode->i_mode));
6116     + continue;
6117     + }
6118     +
6119     + opaque = is_opaque_dir(dentry, bindex);
6120     + if (opaque < 0) {
6121     + dput(first_lower_dentry);
6122     + unionfs_mntput(first_dentry, first_dentry_offset);
6123     + err = opaque;
6124     + goto out_free;
6125     + } else if (opaque) {
6126     + set_dbend(dentry, bindex);
6127     + set_dbopaque(dentry, bindex);
6128     + break;
6129     + }
6130     + }
6131     +
6132     + if (dentry_count)
6133     + goto out_positive;
6134     + else
6135     + goto out_negative;
6136     +
6137     +out_negative:
6138     + if (lookupmode == INTERPOSE_PARTIAL)
6139     + goto out;
6140     +
6141     + /* If we've only got negative dentries, then use the leftmost one. */
6142     + if (lookupmode == INTERPOSE_REVAL) {
6143     + if (dentry->d_inode)
6144     + UNIONFS_I(dentry->d_inode)->stale = 1;
6145     + goto out;
6146     + }
6147     + /* This should only happen if we found a whiteout. */
6148     + if (first_dentry_offset == -1) {
6149     + nd->dentry = dentry;
6150     + /* FIXME: fix following line for mount point crossing */
6151     + nd->mnt = unionfs_lower_mnt_idx(parent_dentry, bindex);
6152     +
6153     + first_lower_dentry =
6154     + lookup_one_len_nd(name, lower_dir_dentry,
6155     + namelen, nd);
6156     + first_dentry_offset = bindex;
6157     + if (IS_ERR(first_lower_dentry)) {
6158     + err = PTR_ERR(first_lower_dentry);
6159     + goto out;
6160     + }
6161     +
6162     + /*
6163     + * FIXME: the following line needs to be changed to allow
6164     + * mount-point crossing
6165     + */
6166     + first_dentry = dentry;
6167     + first_lower_mnt = unionfs_mntget(dentry->d_sb->s_root,
6168     + bindex);
6169     + }
6170     + unionfs_set_lower_dentry_idx(dentry, first_dentry_offset,
6171     + first_lower_dentry);
6172     + unionfs_set_lower_mnt_idx(dentry, first_dentry_offset,
6173     + first_lower_mnt);
6174     + set_dbstart(dentry, first_dentry_offset);
6175     + set_dbend(dentry, first_dentry_offset);
6176     +
6177     + if (lookupmode == INTERPOSE_REVAL_NEG)
6178     + BUG_ON(dentry->d_inode != NULL);
6179     + else
6180     + d_add(dentry, NULL);
6181     + goto out;
6182     +
6183     +/* This part of the code is for positive dentries. */
6184     +out_positive:
6185     + BUG_ON(dentry_count <= 0);
6186     +
6187     + /*
6188     + * If we're holding onto the first negative dentry & corresponding
6189     + * vfsmount - throw it out.
6190     + */
6191     + dput(first_lower_dentry);
6192     + unionfs_mntput(first_dentry, first_dentry_offset);
6193     +
6194     + /* Partial lookups need to re-interpose, or throw away older negs. */
6195     + if (lookupmode == INTERPOSE_PARTIAL) {
6196     + if (dentry->d_inode) {
6197     + unionfs_reinterpose(dentry);
6198     + goto out;
6199     + }
6200     +
6201     + /*
6202     + * This somehow turned positive, so it is as if we had a
6203     + * negative revalidation.
6204     + */
6205     + lookupmode = INTERPOSE_REVAL_NEG;
6206     +
6207     + update_bstart(dentry);
6208     + bstart = dbstart(dentry);
6209     + bend = dbend(dentry);
6210     + }
6211     +
6212     + /*
6213     + * Interpose can return a dentry if d_splice returned a different
6214     + * dentry.
6215     + */
6216     + d_interposed = unionfs_interpose(dentry, dentry->d_sb, lookupmode);
6217     + if (IS_ERR(d_interposed))
6218     + err = PTR_ERR(d_interposed);
6219     + else if (d_interposed)
6220     + dentry = d_interposed;
6221     +
6222     + if (err)
6223     + goto out_drop;
6224     +
6225     + goto out;
6226     +
6227     +out_drop:
6228     + d_drop(dentry);
6229     +
6230     +out_free:
6231     + /* should dput all the underlying dentries on error condition */
6232     + bstart = dbstart(dentry);
6233     + if (bstart >= 0) {
6234     + bend = dbend(dentry);
6235     + for (bindex = bstart; bindex <= bend; bindex++) {
6236     + dput(unionfs_lower_dentry_idx(dentry, bindex));
6237     + unionfs_mntput(dentry, bindex);
6238     + }
6239     + }
6240     + kfree(UNIONFS_D(dentry)->lower_paths);
6241     + UNIONFS_D(dentry)->lower_paths = NULL;
6242     + set_dbstart(dentry, -1);
6243     + set_dbend(dentry, -1);
6244     +
6245     +out:
6246     + if (!err && UNIONFS_D(dentry)) {
6247     + BUG_ON(dbend(dentry) > UNIONFS_D(dentry)->bcount);
6248     + BUG_ON(dbend(dentry) > sbmax(dentry->d_sb));
6249     + BUG_ON(dbstart(dentry) < 0);
6250     + }
6251     + kfree(whname);
6252     + if (locked_parent)
6253     + unionfs_unlock_dentry(parent_dentry);
6254     + dput(parent_dentry);
6255     + if (locked_child || (err && allocated_new_info))
6256     + unionfs_unlock_dentry(dentry);
6257     + if (!err && d_interposed)
6258     + return d_interposed;
6259     + return ERR_PTR(err);
6260     +}
6261     +
6262     +/*
6263     + * This is a utility function that fills in a unionfs dentry.
6264     + *
6265     + * Returns: 0 (ok), or -ERRNO if an error occurred.
6266     + */
6267     +int unionfs_partial_lookup(struct dentry *dentry)
6268     +{
6269     + struct dentry *tmp;
6270     + struct nameidata nd = { .flags = 0 };
6271     + int err = -ENOSYS;
6272     +
6273     + tmp = unionfs_lookup_backend(dentry, &nd, INTERPOSE_PARTIAL);
6274     + if (!tmp) {
6275     + err = 0;
6276     + goto out;
6277     + }
6278     + if (IS_ERR(tmp)) {
6279     + err = PTR_ERR(tmp);
6280     + goto out;
6281     + }
6282     + /* need to change the interface */
6283     + BUG_ON(tmp != dentry);
6284     +out:
6285     + return err;
6286     +}
6287     +
6288     +/* The dentry cache is just so we have properly sized dentries. */
6289     +static struct kmem_cache *unionfs_dentry_cachep;
6290     +int unionfs_init_dentry_cache(void)
6291     +{
6292     + unionfs_dentry_cachep =
6293     + kmem_cache_create("unionfs_dentry",
6294     + sizeof(struct unionfs_dentry_info),
6295     + 0, SLAB_RECLAIM_ACCOUNT, NULL, NULL);
6296     +
6297     + return (unionfs_dentry_cachep ? 0 : -ENOMEM);
6298     +}
6299     +
6300     +void unionfs_destroy_dentry_cache(void)
6301     +{
6302     + if (unionfs_dentry_cachep)
6303     + kmem_cache_destroy(unionfs_dentry_cachep);
6304     +}
6305     +
6306     +void free_dentry_private_data(struct dentry *dentry)
6307     +{
6308     + if (!dentry || !dentry->d_fsdata)
6309     + return;
6310     + kmem_cache_free(unionfs_dentry_cachep, dentry->d_fsdata);
6311     + dentry->d_fsdata = NULL;
6312     +}
6313     +
6314     +static inline int __realloc_dentry_private_data(struct dentry *dentry)
6315     +{
6316     + struct unionfs_dentry_info *info = UNIONFS_D(dentry);
6317     + void *p;
6318     + int size;
6319     +
6320     + BUG_ON(!info);
6321     +
6322     + size = sizeof(struct path) * sbmax(dentry->d_sb);
6323     + p = krealloc(info->lower_paths, size, GFP_ATOMIC);
6324     + if (!p)
6325     + return -ENOMEM;
6326     +
6327     + info->lower_paths = p;
6328     +
6329     + info->bstart = -1;
6330     + info->bend = -1;
6331     + info->bopaque = -1;
6332     + info->bcount = sbmax(dentry->d_sb);
6333     + atomic_set(&info->generation,
6334     + atomic_read(&UNIONFS_SB(dentry->d_sb)->generation));
6335     +
6336     + memset(info->lower_paths, 0, size);
6337     +
6338     + return 0;
6339     +}
6340     +
6341     +/* UNIONFS_D(dentry)->lock must be locked */
6342     +static int realloc_dentry_private_data(struct dentry *dentry)
6343     +{
6344     + if (!__realloc_dentry_private_data(dentry))
6345     + return 0;
6346     +
6347     + kfree(UNIONFS_D(dentry)->lower_paths);
6348     + free_dentry_private_data(dentry);
6349     + return -ENOMEM;
6350     +}
6351     +
6352     +/* allocate new dentry private data */
6353     +int new_dentry_private_data(struct dentry *dentry)
6354     +{
6355     + struct unionfs_dentry_info *info = UNIONFS_D(dentry);
6356     +
6357     + BUG_ON(info);
6358     +
6359     + info = kmem_cache_alloc(unionfs_dentry_cachep, GFP_ATOMIC);
6360     + if (!info)
6361     + return -ENOMEM;
6362     +
6363     + mutex_init(&info->lock);
6364     + mutex_lock(&info->lock);
6365     +
6366     + info->lower_paths = NULL;
6367     +
6368     + dentry->d_fsdata = info;
6369     +
6370     + if (!__realloc_dentry_private_data(dentry))
6371     + return 0;
6372     +
6373     + mutex_unlock(&info->lock);
6374     + free_dentry_private_data(dentry);
6375     + return -ENOMEM;
6376     +}
6377     +
6378     +/*
6379     + * scan through the lower dentry objects, and set bstart to reflect the
6380     + * starting branch
6381     + */
6382     +void update_bstart(struct dentry *dentry)
6383     +{
6384     + int bindex;
6385     + int bstart = dbstart(dentry);
6386     + int bend = dbend(dentry);
6387     + struct dentry *lower_dentry;
6388     +
6389     + for (bindex = bstart; bindex <= bend; bindex++) {
6390     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6391     + if (!lower_dentry)
6392     + continue;
6393     + if (lower_dentry->d_inode) {
6394     + set_dbstart(dentry, bindex);
6395     + break;
6396     + }
6397     + dput(lower_dentry);
6398     + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
6399     + }
6400     +}
6401     diff --git a/fs/unionfs/main.c b/fs/unionfs/main.c
6402     new file mode 100644
6403     index 0000000..e437edb
6404     --- /dev/null
6405     +++ b/fs/unionfs/main.c
6406     @@ -0,0 +1,762 @@
6407     +/*
6408     + * Copyright (c) 2003-2007 Erez Zadok
6409     + * Copyright (c) 2003-2006 Charles P. Wright
6410     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
6411     + * Copyright (c) 2005-2006 Junjiro Okajima
6412     + * Copyright (c) 2005 Arun M. Krishnakumar
6413     + * Copyright (c) 2004-2006 David P. Quigley
6414     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
6415     + * Copyright (c) 2003 Puja Gupta
6416     + * Copyright (c) 2003 Harikesavan Krishnan
6417     + * Copyright (c) 2003-2007 Stony Brook University
6418     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
6419     + *
6420     + * This program is free software; you can redistribute it and/or modify
6421     + * it under the terms of the GNU General Public License version 2 as
6422     + * published by the Free Software Foundation.
6423     + */
6424     +
6425     +#include "union.h"
6426     +#include <linux/module.h>
6427     +#include <linux/moduleparam.h>
6428     +
6429     +/*
6430     + * Connect a unionfs inode dentry/inode with several lower ones. This is
6431     + * the classic stackable file system "vnode interposition" action.
6432     + *
6433     + * @sb: unionfs's super_block
6434     + */
6435     +struct dentry *unionfs_interpose(struct dentry *dentry, struct super_block *sb,
6436     + int flag)
6437     +{
6438     + struct inode *lower_inode;
6439     + struct dentry *lower_dentry;
6440     + int err = 0;
6441     + struct inode *inode;
6442     + int is_negative_dentry = 1;
6443     + int bindex, bstart, bend;
6444     + int skipped = 1;
6445     + struct dentry *spliced = NULL;
6446     +
6447     + verify_locked(dentry);
6448     +
6449     + bstart = dbstart(dentry);
6450     + bend = dbend(dentry);
6451     +
6452     + /* Make sure that we didn't get a negative dentry. */
6453     + for (bindex = bstart; bindex <= bend; bindex++) {
6454     + if (unionfs_lower_dentry_idx(dentry, bindex) &&
6455     + unionfs_lower_dentry_idx(dentry, bindex)->d_inode) {
6456     + is_negative_dentry = 0;
6457     + break;
6458     + }
6459     + }
6460     + BUG_ON(is_negative_dentry);
6461     +
6462     + /*
6463     + * We allocate our new inode below, by calling iget.
6464     + * iget will call our read_inode which will initialize some
6465     + * of the new inode's fields
6466     + */
6467     +
6468     + /*
6469     + * On revalidate we've already got our own inode and just need
6470     + * to fix it up.
6471     + */
6472     + if (flag == INTERPOSE_REVAL) {
6473     + inode = dentry->d_inode;
6474     + UNIONFS_I(inode)->bstart = -1;
6475     + UNIONFS_I(inode)->bend = -1;
6476     + atomic_set(&UNIONFS_I(inode)->generation,
6477     + atomic_read(&UNIONFS_SB(sb)->generation));
6478     +
6479     + UNIONFS_I(inode)->lower_inodes =
6480     + kcalloc(sbmax(sb), sizeof(struct inode *), GFP_KERNEL);
6481     + if (!UNIONFS_I(inode)->lower_inodes) {
6482     + err = -ENOMEM;
6483     + goto out;
6484     + }
6485     + } else {
6486     + /* get unique inode number for unionfs */
6487     + inode = iget(sb, iunique(sb, UNIONFS_ROOT_INO));
6488     + if (!inode) {
6489     + err = -EACCES;
6490     + goto out;
6491     + }
6492     + if (atomic_read(&inode->i_count) > 1)
6493     + goto skip;
6494     + }
6495     +
6496     +fill_i_info:
6497     + skipped = 0;
6498     + for (bindex = bstart; bindex <= bend; bindex++) {
6499     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6500     + if (!lower_dentry) {
6501     + unionfs_set_lower_inode_idx(inode, bindex, NULL);
6502     + continue;
6503     + }
6504     +
6505     + /* Initialize the lower inode to the new lower inode. */
6506     + if (!lower_dentry->d_inode)
6507     + continue;
6508     +
6509     + unionfs_set_lower_inode_idx(inode, bindex,
6510     + igrab(lower_dentry->d_inode));
6511     + }
6512     +
6513     + ibstart(inode) = dbstart(dentry);
6514     + ibend(inode) = dbend(dentry);
6515     +
6516     + /* Use attributes from the first branch. */
6517     + lower_inode = unionfs_lower_inode(inode);
6518     +
6519     + /* Use different set of inode ops for symlinks & directories */
6520     + if (S_ISLNK(lower_inode->i_mode))
6521     + inode->i_op = &unionfs_symlink_iops;
6522     + else if (S_ISDIR(lower_inode->i_mode))
6523     + inode->i_op = &unionfs_dir_iops;
6524     +
6525     + /* Use different set of file ops for directories */
6526     + if (S_ISDIR(lower_inode->i_mode))
6527     + inode->i_fop = &unionfs_dir_fops;
6528     +
6529     + /* properly initialize special inodes */
6530     + if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) ||
6531     + S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode))
6532     + init_special_inode(inode, lower_inode->i_mode,
6533     + lower_inode->i_rdev);
6534     +
6535     + /* all well, copy inode attributes */
6536     + unionfs_copy_attr_all(inode, lower_inode);
6537     + fsstack_copy_inode_size(inode, lower_inode);
6538     +
6539     + if (spliced)
6540     + goto out_spliced;
6541     +skip:
6542     + /* only (our) lookup wants to do a d_add */
6543     + switch (flag) {
6544     + case INTERPOSE_DEFAULT:
6545     + case INTERPOSE_REVAL_NEG:
6546     + d_instantiate(dentry, inode);
6547     + break;
6548     + case INTERPOSE_LOOKUP:
6549     + spliced = d_splice_alias(inode, dentry);
6550     + if (IS_ERR(spliced))
6551     + err = PTR_ERR(spliced);
6552     +
6553     + /*
6554     + * d_splice can return a dentry if it was disconnected and
6555     + * had to be moved. We must ensure that the private data of
6556     + * the new dentry is correct and that the inode info was
6557     + * filled properly. Finally we must return this new dentry.
6558     + */
6559     + else if (spliced && spliced != dentry) {
6560     + spliced->d_op = &unionfs_dops;
6561     + spliced->d_fsdata = dentry->d_fsdata;
6562     + dentry->d_fsdata = NULL;
6563     + dentry = spliced;
6564     + if (skipped)
6565     + goto fill_i_info;
6566     + goto out_spliced;
6567     + }
6568     + break;
6569     + case INTERPOSE_REVAL:
6570     + /* Do nothing. */
6571     + break;
6572     + default:
6573     + printk(KERN_ERR "unionfs: invalid interpose flag passed!");
6574     + BUG();
6575     + }
6576     + goto out;
6577     +
6578     +out_spliced:
6579     + if (!err)
6580     + return spliced;
6581     +out:
6582     + return ERR_PTR(err);
6583     +}
6584     +
6585     +/* like interpose above, but for an already existing dentry */
6586     +void unionfs_reinterpose(struct dentry *dentry)
6587     +{
6588     + struct dentry *lower_dentry;
6589     + struct inode *inode;
6590     + int bindex, bstart, bend;
6591     +
6592     + verify_locked(dentry);
6593     +
6594     + /* This is pre-allocated inode */
6595     + inode = dentry->d_inode;
6596     +
6597     + bstart = dbstart(dentry);
6598     + bend = dbend(dentry);
6599     + for (bindex = bstart; bindex <= bend; bindex++) {
6600     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6601     + if (!lower_dentry)
6602     + continue;
6603     +
6604     + if (!lower_dentry->d_inode)
6605     + continue;
6606     + if (unionfs_lower_inode_idx(inode, bindex))
6607     + continue;
6608     + unionfs_set_lower_inode_idx(inode, bindex,
6609     + igrab(lower_dentry->d_inode));
6610     + }
6611     + ibstart(inode) = dbstart(dentry);
6612     + ibend(inode) = dbend(dentry);
6613     +}
6614     +
6615     +/*
6616     + * make sure the branch we just looked up (nd) makes sense:
6617     + *
6618     + * 1) we're not trying to stack unionfs on top of unionfs
6619     + * 2) it exists
6620     + * 3) is a directory
6621     + */
6622     +int check_branch(struct nameidata *nd)
6623     +{
6624     + /* XXX: remove in ODF code -- stacking unions allowed there */
6625     + if (!strcmp(nd->dentry->d_sb->s_type->name, "unionfs"))
6626     + return -EINVAL;
6627     + if (!nd->dentry->d_inode)
6628     + return -ENOENT;
6629     + if (!S_ISDIR(nd->dentry->d_inode->i_mode))
6630     + return -ENOTDIR;
6631     + return 0;
6632     +}
6633     +
6634     +/* checks if two lower_dentries have overlapping branches */
6635     +static int is_branch_overlap(struct dentry *dent1, struct dentry *dent2)
6636     +{
6637     + struct dentry *dent = NULL;
6638     +
6639     + dent = dent1;
6640     + while ((dent != dent2) && (dent->d_parent != dent))
6641     + dent = dent->d_parent;
6642     +
6643     + if (dent == dent2)
6644     + return 1;
6645     +
6646     + dent = dent2;
6647     + while ((dent != dent1) && (dent->d_parent != dent))
6648     + dent = dent->d_parent;
6649     +
6650     + return (dent == dent1);
6651     +}
6652     +
6653     +/*
6654     + * Parse branch mode helper function
6655     + */
6656     +int __parse_branch_mode(const char *name)
6657     +{
6658     + if (!name)
6659     + return 0;
6660     + if (!strcmp(name, "ro"))
6661     + return MAY_READ;
6662     + if (!strcmp(name, "rw"))
6663     + return (MAY_READ | MAY_WRITE);
6664     + return 0;
6665     +}
6666     +
6667     +/*
6668     + * Parse "ro" or "rw" options, but default to "rw" of no mode options
6669     + * was specified.
6670     + */
6671     +int parse_branch_mode(const char *name)
6672     +{
6673     + int perms = __parse_branch_mode(name);
6674     +
6675     + if (perms == 0)
6676     + perms = MAY_READ | MAY_WRITE;
6677     + return perms;
6678     +}
6679     +
6680     +/*
6681     + * parse the dirs= mount argument
6682     + *
6683     + * We don't need to lock the superblock private data's rwsem, as we get
6684     + * called only by unionfs_read_super - it is still a long time before anyone
6685     + * can even get a reference to us.
6686     + */
6687     +static int parse_dirs_option(struct super_block *sb, struct unionfs_dentry_info
6688     + *lower_root_info, char *options)
6689     +{
6690     + struct nameidata nd;
6691     + char *name;
6692     + int err = 0;
6693     + int branches = 1;
6694     + int bindex = 0;
6695     + int i = 0;
6696     + int j = 0;
6697     + struct dentry *dent1;
6698     + struct dentry *dent2;
6699     +
6700     + if (options[0] == '\0') {
6701     + printk(KERN_WARNING "unionfs: no branches specified\n");
6702     + err = -EINVAL;
6703     + goto out;
6704     + }
6705     +
6706     + /*
6707     + * Each colon means we have a separator, this is really just a rough
6708     + * guess, since strsep will handle empty fields for us.
6709     + */
6710     + for (i = 0; options[i]; i++)
6711     + if (options[i] == ':')
6712     + branches++;
6713     +
6714     + /* allocate space for underlying pointers to lower dentry */
6715     + UNIONFS_SB(sb)->data =
6716     + kcalloc(branches, sizeof(struct unionfs_data), GFP_KERNEL);
6717     + if (!UNIONFS_SB(sb)->data) {
6718     + err = -ENOMEM;
6719     + goto out;
6720     + }
6721     +
6722     + lower_root_info->lower_paths =
6723     + kcalloc(branches, sizeof(struct path), GFP_KERNEL);
6724     + if (!lower_root_info->lower_paths) {
6725     + err = -ENOMEM;
6726     + goto out;
6727     + }
6728     +
6729     + /* now parsing a string such as "b1:b2=rw:b3=ro:b4" */
6730     + branches = 0;
6731     + while ((name = strsep(&options, ":")) != NULL) {
6732     + int perms;
6733     + char *mode = strchr(name, '=');
6734     +
6735     + if (!name)
6736     + continue;
6737     + if (!*name) { /* bad use of ':' (extra colons) */
6738     + err = -EINVAL;
6739     + goto out;
6740     + }
6741     +
6742     + branches++;
6743     +
6744     + /* strip off '=' if any */
6745     + if (mode)
6746     + *mode++ = '\0';
6747     +
6748     + perms = parse_branch_mode(mode);
6749     + if (!bindex && !(perms & MAY_WRITE)) {
6750     + err = -EINVAL;
6751     + goto out;
6752     + }
6753     +
6754     + err = path_lookup(name, LOOKUP_FOLLOW, &nd);
6755     + if (err) {
6756     + printk(KERN_WARNING "unionfs: error accessing "
6757     + "lower directory '%s' (error %d)\n",
6758     + name, err);
6759     + goto out;
6760     + }
6761     +
6762     + if ((err = check_branch(&nd))) {
6763     + printk(KERN_WARNING "unionfs: lower directory "
6764     + "'%s' is not a valid branch\n", name);
6765     + path_release(&nd);
6766     + goto out;
6767     + }
6768     +
6769     + lower_root_info->lower_paths[bindex].dentry = nd.dentry;
6770     + lower_root_info->lower_paths[bindex].mnt = nd.mnt;
6771     +
6772     + set_branchperms(sb, bindex, perms);
6773     + set_branch_count(sb, bindex, 0);
6774     + new_branch_id(sb, bindex);
6775     +
6776     + if (lower_root_info->bstart < 0)
6777     + lower_root_info->bstart = bindex;
6778     + lower_root_info->bend = bindex;
6779     + bindex++;
6780     + }
6781     +
6782     + if (branches == 0) {
6783     + printk(KERN_WARNING "unionfs: no branches specified\n");
6784     + err = -EINVAL;
6785     + goto out;
6786     + }
6787     +
6788     + BUG_ON(branches != (lower_root_info->bend + 1));
6789     +
6790     + /*
6791     + * Ensure that no overlaps exist in the branches.
6792     + *
6793     + * This test is required because the Linux kernel has no support
6794     + * currently for ensuring coherency between stackable layers and
6795     + * branches. If we were to allow overlapping branches, it would be
6796     + * possible, for example, to delete a file via one branch, which
6797     + * would not be reflected in another branch. Such incoherency could
6798     + * lead to inconsistencies and even kernel oopses. Rather than
6799     + * implement hacks to work around some of these cache-coherency
6800     + * problems, we prevent branch overlapping, for now. A complete
6801     + * solution will involve proper kernel/VFS support for cache
6802     + * coherency, at which time we could safely remove this
6803     + * branch-overlapping test.
6804     + */
6805     + for (i = 0; i < branches; i++) {
6806     + dent1 = lower_root_info->lower_paths[i].dentry;
6807     + for (j = i + 1; j < branches; j++) {
6808     + dent2 = lower_root_info->lower_paths[j].dentry;
6809     + if (is_branch_overlap(dent1, dent2)) {
6810     + printk(KERN_WARNING "unionfs: branches %d and "
6811     + "%d overlap\n", i, j);
6812     + err = -EINVAL;
6813     + goto out;
6814     + }
6815     + }
6816     + }
6817     +
6818     +out:
6819     + if (err) {
6820     + for (i = 0; i < branches; i++)
6821     + if (lower_root_info->lower_paths[i].dentry) {
6822     + dput(lower_root_info->lower_paths[i].dentry);
6823     + /* initialize: can't use unionfs_mntput here */
6824     + mntput(lower_root_info->lower_paths[i].mnt);
6825     + }
6826     +
6827     + kfree(lower_root_info->lower_paths);
6828     + kfree(UNIONFS_SB(sb)->data);
6829     +
6830     + /*
6831     + * MUST clear the pointers to prevent potential double free if
6832     + * the caller dies later on
6833     + */
6834     + lower_root_info->lower_paths = NULL;
6835     + UNIONFS_SB(sb)->data = NULL;
6836     + }
6837     + return err;
6838     +}
6839     +
6840     +/*
6841     + * Parse mount options. See the manual page for usage instructions.
6842     + *
6843     + * Returns the dentry object of the lower-level (lower) directory;
6844     + * We want to mount our stackable file system on top of that lower directory.
6845     + */
6846     +static struct unionfs_dentry_info *unionfs_parse_options(
6847     + struct super_block *sb,
6848     + char *options)
6849     +{
6850     + struct unionfs_dentry_info *lower_root_info;
6851     + char *optname;
6852     + int err = 0;
6853     + int bindex;
6854     + int dirsfound = 0;
6855     +
6856     + /* allocate private data area */
6857     + err = -ENOMEM;
6858     + lower_root_info =
6859     + kzalloc(sizeof(struct unionfs_dentry_info), GFP_KERNEL);
6860     + if (!lower_root_info)
6861     + goto out_error;
6862     + lower_root_info->bstart = -1;
6863     + lower_root_info->bend = -1;
6864     + lower_root_info->bopaque = -1;
6865     +
6866     + while ((optname = strsep(&options, ",")) != NULL) {
6867     + char *optarg;
6868     + char *endptr;
6869     + int intval;
6870     +
6871     + if (!optname || !*optname)
6872     + continue;
6873     +
6874     + optarg = strchr(optname, '=');
6875     + if (optarg)
6876     + *optarg++ = '\0';
6877     +
6878     + /*
6879     + * All of our options take an argument now. Insert ones that
6880     + * don't, above this check.
6881     + */
6882     + if (!optarg) {
6883     + printk("unionfs: %s requires an argument.\n", optname);
6884     + err = -EINVAL;
6885     + goto out_error;
6886     + }
6887     +
6888     + if (!strcmp("dirs", optname)) {
6889     + if (++dirsfound > 1) {
6890     + printk(KERN_WARNING
6891     + "unionfs: multiple dirs specified\n");
6892     + err = -EINVAL;
6893     + goto out_error;
6894     + }
6895     + err = parse_dirs_option(sb, lower_root_info, optarg);
6896     + if (err)
6897     + goto out_error;
6898     + continue;
6899     + }
6900     +
6901     + /* All of these options require an integer argument. */
6902     + intval = simple_strtoul(optarg, &endptr, 0);
6903     + if (*endptr) {
6904     + printk(KERN_WARNING
6905     + "unionfs: invalid %s option '%s'\n",
6906     + optname, optarg);
6907     + err = -EINVAL;
6908     + goto out_error;
6909     + }
6910     +
6911     + err = -EINVAL;
6912     + printk(KERN_WARNING
6913     + "unionfs: unrecognized option '%s'\n", optname);
6914     + goto out_error;
6915     + }
6916     + if (dirsfound != 1) {
6917     + printk(KERN_WARNING "unionfs: dirs option required\n");
6918     + err = -EINVAL;
6919     + goto out_error;
6920     + }
6921     + goto out;
6922     +
6923     +out_error:
6924     + if (lower_root_info && lower_root_info->lower_paths) {
6925     + for (bindex = lower_root_info->bstart;
6926     + bindex >= 0 && bindex <= lower_root_info->bend;
6927     + bindex++) {
6928     + struct dentry *d;
6929     + struct vfsmount *m;
6930     +
6931     + d = lower_root_info->lower_paths[bindex].dentry;
6932     + m = lower_root_info->lower_paths[bindex].mnt;
6933     +
6934     + dput(d);
6935     + /* initializing: can't use unionfs_mntput here */
6936     + mntput(m);
6937     + }
6938     + }
6939     +
6940     + kfree(lower_root_info->lower_paths);
6941     + kfree(lower_root_info);
6942     +
6943     + kfree(UNIONFS_SB(sb)->data);
6944     + UNIONFS_SB(sb)->data = NULL;
6945     +
6946     + lower_root_info = ERR_PTR(err);
6947     +out:
6948     + return lower_root_info;
6949     +}
6950     +
6951     +/*
6952     + * our custom d_alloc_root work-alike
6953     + *
6954     + * we can't use d_alloc_root if we want to use our own interpose function
6955     + * unchanged, so we simply call our own "fake" d_alloc_root
6956     + */
6957     +static struct dentry *unionfs_d_alloc_root(struct super_block *sb)
6958     +{
6959     + struct dentry *ret = NULL;
6960     +
6961     + if (sb) {
6962     + static const struct qstr name = {.name = "/",.len = 1 };
6963     +
6964     + ret = d_alloc(NULL, &name);
6965     + if (ret) {
6966     + ret->d_op = &unionfs_dops;
6967     + ret->d_sb = sb;
6968     + ret->d_parent = ret;
6969     + }
6970     + }
6971     + return ret;
6972     +}
6973     +
6974     +/*
6975     + * There is no need to lock the unionfs_super_info's rwsem as there is no
6976     + * way anyone can have a reference to the superblock at this point in time.
6977     + */
6978     +static int unionfs_read_super(struct super_block *sb, void *raw_data,
6979     + int silent)
6980     +{
6981     + int err = 0;
6982     + struct unionfs_dentry_info *lower_root_info = NULL;
6983     + int bindex, bstart, bend;
6984     +
6985     + if (!raw_data) {
6986     + printk(KERN_WARNING
6987     + "unionfs: read_super: missing data argument\n");
6988     + err = -EINVAL;
6989     + goto out;
6990     + }
6991     +
6992     + /* Allocate superblock private data */
6993     + sb->s_fs_info = kzalloc(sizeof(struct unionfs_sb_info), GFP_KERNEL);
6994     + if (!UNIONFS_SB(sb)) {
6995     + printk(KERN_WARNING "unionfs: read_super: out of memory\n");
6996     + err = -ENOMEM;
6997     + goto out;
6998     + }
6999     +
7000     + UNIONFS_SB(sb)->bend = -1;
7001     + atomic_set(&UNIONFS_SB(sb)->generation, 1);
7002     + init_rwsem(&UNIONFS_SB(sb)->rwsem);
7003     + UNIONFS_SB(sb)->high_branch_id = -1; /* -1 == invalid branch ID */
7004     +
7005     + lower_root_info = unionfs_parse_options(sb, raw_data);
7006     + if (IS_ERR(lower_root_info)) {
7007     + printk(KERN_WARNING
7008     + "unionfs: read_super: error while parsing options "
7009     + "(err = %ld)\n", PTR_ERR(lower_root_info));
7010     + err = PTR_ERR(lower_root_info);
7011     + lower_root_info = NULL;
7012     + goto out_free;
7013     + }
7014     + if (lower_root_info->bstart == -1) {
7015     + err = -ENOENT;
7016     + goto out_free;
7017     + }
7018     +
7019     + /* set the lower superblock field of upper superblock */
7020     + bstart = lower_root_info->bstart;
7021     + BUG_ON(bstart != 0);
7022     + sbend(sb) = bend = lower_root_info->bend;
7023     + for (bindex = bstart; bindex <= bend; bindex++) {
7024     + struct dentry *d = lower_root_info->lower_paths[bindex].dentry;
7025     + unionfs_set_lower_super_idx(sb, bindex, d->d_sb);
7026     + }
7027     +
7028     + /* max Bytes is the maximum bytes from highest priority branch */
7029     + sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes;
7030     +
7031     + sb->s_op = &unionfs_sops;
7032     +
7033     + /* See comment next to the definition of unionfs_d_alloc_root */
7034     + sb->s_root = unionfs_d_alloc_root(sb);
7035     + if (!sb->s_root) {
7036     + err = -ENOMEM;
7037     + goto out_dput;
7038     + }
7039     +
7040     + /* link the upper and lower dentries */
7041     + sb->s_root->d_fsdata = NULL;
7042     + if ((err = new_dentry_private_data(sb->s_root)))
7043     + goto out_freedpd;
7044     +
7045     + /* Set the lower dentries for s_root */
7046     + for (bindex = bstart; bindex <= bend; bindex++) {
7047     + struct dentry *d;
7048     + struct vfsmount *m;
7049     +
7050     + d = lower_root_info->lower_paths[bindex].dentry;
7051     + m = lower_root_info->lower_paths[bindex].mnt;
7052     +
7053     + unionfs_set_lower_dentry_idx(sb->s_root, bindex, d);
7054     + unionfs_set_lower_mnt_idx(sb->s_root, bindex, m);
7055     + }
7056     + set_dbstart(sb->s_root, bstart);
7057     + set_dbend(sb->s_root, bend);
7058     +
7059     + /* Set the generation number to one, since this is for the mount. */
7060     + atomic_set(&UNIONFS_D(sb->s_root)->generation, 1);
7061     +
7062     + /*
7063     + * Call interpose to create the upper level inode. Only
7064     + * INTERPOSE_LOOKUP can return a value other than 0 on err.
7065     + */
7066     + err = PTR_ERR(unionfs_interpose(sb->s_root, sb, 0));
7067     + unionfs_unlock_dentry(sb->s_root);
7068     + if (!err)
7069     + goto out;
7070     + /* else fall through */
7071     +
7072     +out_freedpd:
7073     + if (UNIONFS_D(sb->s_root)) {
7074     + kfree(UNIONFS_D(sb->s_root)->lower_paths);
7075     + free_dentry_private_data(sb->s_root);
7076     + }
7077     + dput(sb->s_root);
7078     +
7079     +out_dput:
7080     + if (lower_root_info && !IS_ERR(lower_root_info)) {
7081     + for (bindex = lower_root_info->bstart;
7082     + bindex <= lower_root_info->bend; bindex++) {
7083     + struct dentry *d;
7084     + struct vfsmount *m;
7085     +
7086     + d = lower_root_info->lower_paths[bindex].dentry;
7087     + m = lower_root_info->lower_paths[bindex].mnt;
7088     +
7089     + dput(d);
7090     + /* initializing: can't use unionfs_mntput here */
7091     + mntput(m);
7092     + }
7093     + kfree(lower_root_info->lower_paths);
7094     + kfree(lower_root_info);
7095     + lower_root_info = NULL;
7096     + }
7097     +
7098     +out_free:
7099     + kfree(UNIONFS_SB(sb)->data);
7100     + kfree(UNIONFS_SB(sb));
7101     + sb->s_fs_info = NULL;
7102     +
7103     +out:
7104     + if (lower_root_info && !IS_ERR(lower_root_info)) {
7105     + kfree(lower_root_info->lower_paths);
7106     + kfree(lower_root_info);
7107     + }
7108     + return err;
7109     +}
7110     +
7111     +static int unionfs_get_sb(struct file_system_type *fs_type,
7112     + int flags, const char *dev_name,
7113     + void *raw_data, struct vfsmount *mnt)
7114     +{
7115     + return get_sb_nodev(fs_type, flags, raw_data, unionfs_read_super, mnt);
7116     +}
7117     +
7118     +static struct file_system_type unionfs_fs_type = {
7119     + .owner = THIS_MODULE,
7120     + .name = "unionfs",
7121     + .get_sb = unionfs_get_sb,
7122     + .kill_sb = generic_shutdown_super,
7123     + .fs_flags = FS_REVAL_DOT,
7124     +};
7125     +
7126     +static int __init init_unionfs_fs(void)
7127     +{
7128     + int err;
7129     +
7130     + printk("Registering unionfs " UNIONFS_VERSION "\n");
7131     +
7132     + if ((err = unionfs_init_filldir_cache()))
7133     + goto out;
7134     + if ((err = unionfs_init_inode_cache()))
7135     + goto out;
7136     + if ((err = unionfs_init_dentry_cache()))
7137     + goto out;
7138     + if ((err = init_sioq()))
7139     + goto out;
7140     + err = register_filesystem(&unionfs_fs_type);
7141     +out:
7142     + if (err) {
7143     + stop_sioq();
7144     + unionfs_destroy_filldir_cache();
7145     + unionfs_destroy_inode_cache();
7146     + unionfs_destroy_dentry_cache();
7147     + }
7148     + return err;
7149     +}
7150     +
7151     +static void __exit exit_unionfs_fs(void)
7152     +{
7153     + stop_sioq();
7154     + unionfs_destroy_filldir_cache();
7155     + unionfs_destroy_inode_cache();
7156     + unionfs_destroy_dentry_cache();
7157     + unregister_filesystem(&unionfs_fs_type);
7158     + printk("Completed unionfs module unload.\n");
7159     +}
7160     +
7161     +MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University"
7162     + " (http://www.fsl.cs.sunysb.edu)");
7163     +MODULE_DESCRIPTION("Unionfs " UNIONFS_VERSION
7164     + " (http://unionfs.filesystems.org)");
7165     +MODULE_LICENSE("GPL");
7166     +
7167     +module_init(init_unionfs_fs);
7168     +module_exit(exit_unionfs_fs);
7169     diff --git a/fs/unionfs/mmap.c b/fs/unionfs/mmap.c
7170     new file mode 100644
7171     index 0000000..5629dcc
7172     --- /dev/null
7173     +++ b/fs/unionfs/mmap.c
7174     @@ -0,0 +1,378 @@
7175     +/*
7176     + * Copyright (c) 2003-2007 Erez Zadok
7177     + * Copyright (c) 2003-2006 Charles P. Wright
7178     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7179     + * Copyright (c) 2005-2006 Junjiro Okajima
7180     + * Copyright (c) 2006 Shaya Potter
7181     + * Copyright (c) 2005 Arun M. Krishnakumar
7182     + * Copyright (c) 2004-2006 David P. Quigley
7183     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7184     + * Copyright (c) 2003 Puja Gupta
7185     + * Copyright (c) 2003 Harikesavan Krishnan
7186     + * Copyright (c) 2003-2007 Stony Brook University
7187     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
7188     + *
7189     + * This program is free software; you can redistribute it and/or modify
7190     + * it under the terms of the GNU General Public License version 2 as
7191     + * published by the Free Software Foundation.
7192     + */
7193     +
7194     +#include "union.h"
7195     +
7196     +/*
7197     + * Unionfs doesn't implement ->writepages, which is OK with the VFS and
7198     + * keeps our code simpler and smaller. Nevertheless, somehow, our own
7199     + * ->writepage must be called so we can sync the upper pages with the lower
7200     + * pages: otherwise data changed at the upper layer won't get written to the
7201     + * lower layer.
7202     + *
7203     + * Some lower file systems (e.g., NFS) expect the VFS to call its writepages
7204     + * only, which in turn will call generic_writepages and invoke each of the
7205     + * lower file system's ->writepage. NFS in particular uses the
7206     + * wbc->fs_private field in its nfs_writepage, which is set in its
7207     + * nfs_writepages. So if we don't call the lower nfs_writepages first, then
7208     + * NFS's nfs_writepage will dereference a NULL wbc->fs_private and cause an
7209     + * OOPS. If, however, we implement a unionfs_writepages and then we do call
7210     + * the lower nfs_writepages, then we "lose control" over the pages we're
7211     + * trying to write to the lower file system: we won't be writing our own
7212     + * new/modified data from the upper pages to the lower pages, and any
7213     + * mmap-based changes are lost.
7214     + *
7215     + * This is a fundamental cache-coherency problem in Linux. The kernel isn't
7216     + * able to support such stacking abstractions cleanly. One possible clean
7217     + * way would be that a lower file system's ->writepage method have some sort
7218     + * of a callback to validate if any upper pages for the same file+offset
7219     + * exist and have newer content in them.
7220     + *
7221     + * This whole NULL ptr dereference is triggered at the lower file system
7222     + * (NFS) because the wbc->for_writepages is set to 1. Therefore, to avoid
7223     + * this NULL pointer dereference, we set this flag to 0 and restore it upon
7224     + * exit. This probably means that we're slightly less efficient in writing
7225     + * pages out, doing them one at a time, but at least we avoid the oops until
7226     + * such day as Linux can better support address_space_ops in a stackable
7227     + * fashion.
7228     + */
7229     +static int unionfs_writepage(struct page *page, struct writeback_control *wbc)
7230     +{
7231     + int err = -EIO;
7232     + struct inode *inode;
7233     + struct inode *lower_inode;
7234     + struct page *lower_page;
7235     + char *kaddr, *lower_kaddr;
7236     + int saved_for_writepages = wbc->for_writepages;
7237     +
7238     + inode = page->mapping->host;
7239     + lower_inode = unionfs_lower_inode(inode);
7240     +
7241     + /*
7242     + * find lower page (returns a locked page)
7243     + *
7244     + * NOTE: we used to call grab_cache_page(), but that was unnecessary
7245     + * as it would have tried to create a new lower page if it didn't
7246     + * exist, leading to deadlocks (esp. under memory-pressure
7247     + * conditions, when it is really a bad idea to *consume* more
7248     + * memory). Instead, we assume the lower page exists, and if we can
7249     + * find it, then we ->writepage on it; if we can't find it, then it
7250     + * couldn't have disappeared unless the kernel already flushed it,
7251     + * in which case we're still OK. This is especially correct if
7252     + * wbc->sync_mode is WB_SYNC_NONE (as per
7253     + * Documentation/filesystems/vfs.txt). If we can't flush our page
7254     + * because we can't find a lower page, then at least we re-mark our
7255     + * page as dirty, and return AOP_WRITEPAGE_ACTIVATE as the VFS
7256     + * expects us to. (Note, if in the future it'd turn out that we
7257     + * have to find a lower page no matter what, then we'd have to
7258     + * resort to RAIF's page pointer flipping trick.)
7259     + */
7260     + lower_page = find_lock_page(lower_inode->i_mapping, page->index);
7261     + if (!lower_page) {
7262     + err = AOP_WRITEPAGE_ACTIVATE;
7263     + set_page_dirty(page);
7264     + goto out;
7265     + }
7266     +
7267     + /* get page address, and encode it */
7268     + kaddr = kmap(page);
7269     + lower_kaddr = kmap(lower_page);
7270     +
7271     + memcpy(lower_kaddr, kaddr, PAGE_CACHE_SIZE);
7272     +
7273     + kunmap(page);
7274     + kunmap(lower_page);
7275     +
7276     + BUG_ON(!lower_inode->i_mapping->a_ops->writepage);
7277     +
7278     + /* workaround for some lower file systems: see big comment on top */
7279     + if (wbc->for_writepages /* && !wbc->fs_private */)
7280     + wbc->for_writepages = 0;
7281     +
7282     + /* call lower writepage (expects locked page) */
7283     + clear_page_dirty_for_io(lower_page); /* emulate VFS behavior */
7284     + err = lower_inode->i_mapping->a_ops->writepage(lower_page, wbc);
7285     + wbc->for_writepages = saved_for_writepages; /* restore value */
7286     +
7287     + /* b/c find_lock_page locked it and ->writepage unlocks on success */
7288     + if (err)
7289     + unlock_page(lower_page);
7290     + /* b/c grab_cache_page increased refcnt */
7291     + page_cache_release(lower_page);
7292     +
7293     + if (err < 0) {
7294     + ClearPageUptodate(page);
7295     + goto out;
7296     + }
7297     + if (err == AOP_WRITEPAGE_ACTIVATE) {
7298     + /*
7299     + * Lower file systems such as ramfs and tmpfs, may return
7300     + * AOP_WRITEPAGE_ACTIVATE so that the VM won't try to
7301     + * (pointlessly) write the page again for a while. But
7302     + * those lower file systems also set the page dirty bit back
7303     + * again. So we mimic that behaviour here.
7304     + */
7305     + if (PageDirty(lower_page))
7306     + set_page_dirty(page);
7307     + goto out;
7308     + }
7309     +
7310     + /* all is well */
7311     + SetPageUptodate(page);
7312     + /* lower mtimes has changed: update ours */
7313     + unionfs_copy_attr_times(inode);
7314     +
7315     + unlock_page(page);
7316     +
7317     +out:
7318     + return err;
7319     +}
7320     +
7321     +/*
7322     + * readpage is called from generic_page_read and the fault handler.
7323     + * If your file system uses generic_page_read for the read op, it
7324     + * must implement readpage.
7325     + *
7326     + * Readpage expects a locked page, and must unlock it.
7327     + */
7328     +static int unionfs_do_readpage(struct file *file, struct page *page)
7329     +{
7330     + int err = -EIO;
7331     + struct file *lower_file;
7332     + struct inode *inode;
7333     + mm_segment_t old_fs;
7334     + char *page_data = NULL;
7335     + loff_t offset;
7336     +
7337     + if (UNIONFS_F(file) == NULL) {
7338     + err = -ENOENT;
7339     + goto out;
7340     + }
7341     +
7342     + lower_file = unionfs_lower_file(file);
7343     + /* FIXME: is this assertion right here? */
7344     + BUG_ON(lower_file == NULL);
7345     +
7346     + inode = file->f_path.dentry->d_inode;
7347     +
7348     + page_data = (char *) kmap(page);
7349     + /*
7350     + * Use vfs_read because some lower file systems don't have a
7351     + * readpage method, and some file systems (esp. distributed ones)
7352     + * don't like their pages to be accessed directly. Using vfs_read
7353     + * may be a little slower, but a lot safer, as the VFS does a lot of
7354     + * the necessary magic for us.
7355     + */
7356     + offset = lower_file->f_pos = (page->index << PAGE_CACHE_SHIFT);
7357     + old_fs = get_fs();
7358     + set_fs(KERNEL_DS);
7359     + err = vfs_read(lower_file, page_data, PAGE_CACHE_SIZE,
7360     + &lower_file->f_pos);
7361     + set_fs(old_fs);
7362     +
7363     + kunmap(page);
7364     +
7365     + if (err < 0)
7366     + goto out;
7367     + err = 0;
7368     +
7369     + /* if vfs_read succeeded above, sync up our times */
7370     + unionfs_copy_attr_times(inode);
7371     +
7372     + flush_dcache_page(page);
7373     +
7374     +out:
7375     + if (err == 0)
7376     + SetPageUptodate(page);
7377     + else
7378     + ClearPageUptodate(page);
7379     +
7380     + return err;
7381     +}
7382     +
7383     +static int unionfs_readpage(struct file *file, struct page *page)
7384     +{
7385     + int err;
7386     +
7387     + unionfs_read_lock(file->f_path.dentry->d_sb);
7388     + if ((err = unionfs_file_revalidate(file, 0)))
7389     + goto out;
7390     + unionfs_check_file(file);
7391     +
7392     + err = unionfs_do_readpage(file, page);
7393     +
7394     + if (!err) {
7395     + touch_atime(unionfs_lower_mnt(file->f_path.dentry),
7396     + unionfs_lower_dentry(file->f_path.dentry));
7397     + unionfs_copy_attr_times(file->f_path.dentry->d_inode);
7398     + }
7399     +
7400     + /*
7401     + * we have to unlock our page, b/c we _might_ have gotten a locked
7402     + * page. but we no longer have to wakeup on our page here, b/c
7403     + * UnlockPage does it
7404     + */
7405     +out:
7406     + unlock_page(page);
7407     + unionfs_check_file(file);
7408     + unionfs_read_unlock(file->f_path.dentry->d_sb);
7409     +
7410     + return err;
7411     +}
7412     +
7413     +static int unionfs_prepare_write(struct file *file, struct page *page,
7414     + unsigned from, unsigned to)
7415     +{
7416     + int err;
7417     +
7418     + unionfs_read_lock(file->f_path.dentry->d_sb);
7419     + /*
7420     + * This is the only place where we unconditionally copy the lower
7421     + * attribute times before calling unionfs_file_revalidate. The
7422     + * reason is that our ->write calls do_sync_write which in turn will
7423     + * call our ->prepare_write and then ->commit_write. Before our
7424     + * ->write is called, the lower mtimes are in sync, but by the time
7425     + * the VFS calls our ->commit_write, the lower mtimes have changed.
7426     + * Therefore, the only reasonable time for us to sync up from the
7427     + * changed lower mtimes, and avoid an invariant violation warning,
7428     + * is here, in ->prepare_write.
7429     + */
7430     + unionfs_copy_attr_times(file->f_path.dentry->d_inode);
7431     + err = unionfs_file_revalidate(file, 1);
7432     + unionfs_check_file(file);
7433     + unionfs_read_unlock(file->f_path.dentry->d_sb);
7434     +
7435     + return err;
7436     +}
7437     +
7438     +static int unionfs_commit_write(struct file *file, struct page *page,
7439     + unsigned from, unsigned to)
7440     +{
7441     + int err = -ENOMEM;
7442     + struct inode *inode, *lower_inode;
7443     + struct file *lower_file = NULL;
7444     + loff_t pos;
7445     + unsigned bytes = to - from;
7446     + char *page_data = NULL;
7447     + mm_segment_t old_fs;
7448     +
7449     + BUG_ON(file == NULL);
7450     +
7451     + unionfs_read_lock(file->f_path.dentry->d_sb);
7452     + if ((err = unionfs_file_revalidate(file, 1)))
7453     + goto out;
7454     + unionfs_check_file(file);
7455     +
7456     + inode = page->mapping->host;
7457     + lower_inode = unionfs_lower_inode(inode);
7458     +
7459     + if (UNIONFS_F(file) != NULL)
7460     + lower_file = unionfs_lower_file(file);
7461     +
7462     + /* FIXME: is this assertion right here? */
7463     + BUG_ON(lower_file == NULL);
7464     +
7465     + page_data = (char *)kmap(page);
7466     + lower_file->f_pos = (page->index << PAGE_CACHE_SHIFT) + from;
7467     +
7468     + /*
7469     + * SP: I use vfs_write instead of copying page data and the
7470     + * prepare_write/commit_write combo because file system's like
7471     + * GFS/OCFS2 don't like things touching those directly,
7472     + * calling the underlying write op, while a little bit slower, will
7473     + * call all the FS specific code as well
7474     + */
7475     + old_fs = get_fs();
7476     + set_fs(KERNEL_DS);
7477     + err = vfs_write(lower_file, page_data + from, bytes,
7478     + &lower_file->f_pos);
7479     + set_fs(old_fs);
7480     +
7481     + kunmap(page);
7482     +
7483     + if (err < 0)
7484     + goto out;
7485     +
7486     + inode->i_blocks = lower_inode->i_blocks;
7487     + /* we may have to update i_size */
7488     + pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to;
7489     + if (pos > i_size_read(inode))
7490     + i_size_write(inode, pos);
7491     + /* if vfs_write succeeded above, sync up our times */
7492     + unionfs_copy_attr_times(inode);
7493     + mark_inode_dirty_sync(inode);
7494     +
7495     +out:
7496     + if (err < 0)
7497     + ClearPageUptodate(page);
7498     +
7499     + unionfs_read_unlock(file->f_path.dentry->d_sb);
7500     + unionfs_check_file(file);
7501     + return err; /* assume all is ok */
7502     +}
7503     +
7504     +static void unionfs_sync_page(struct page *page)
7505     +{
7506     + struct inode *inode;
7507     + struct inode *lower_inode;
7508     + struct page *lower_page;
7509     + struct address_space *mapping;
7510     +
7511     + inode = page->mapping->host;
7512     + lower_inode = unionfs_lower_inode(inode);
7513     +
7514     + /*
7515     + * Find lower page (returns a locked page).
7516     + *
7517     + * NOTE: we used to call grab_cache_page(), but that was unnecessary
7518     + * as it would have tried to create a new lower page if it didn't
7519     + * exist, leading to deadlocks. All our sync_page method needs to
7520     + * do is ensure that pending I/O gets done.
7521     + */
7522     + lower_page = find_lock_page(lower_inode->i_mapping, page->index);
7523     + if (!lower_page) {
7524     + printk(KERN_DEBUG "unionfs: find_lock_page failed\n");
7525     + goto out;
7526     + }
7527     +
7528     + /* do the actual sync */
7529     + mapping = lower_page->mapping;
7530     + /*
7531     + * XXX: can we optimize ala RAIF and set the lower page to be
7532     + * discarded after a successful sync_page?
7533     + */
7534     + if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
7535     + mapping->a_ops->sync_page(lower_page);
7536     +
7537     + /* b/c find_lock_page locked it */
7538     + unlock_page(lower_page);
7539     + /* b/c find_lock_page increased refcnt */
7540     + page_cache_release(lower_page);
7541     +
7542     +out:
7543     + return;
7544     +}
7545     +
7546     +struct address_space_operations unionfs_aops = {
7547     + .writepage = unionfs_writepage,
7548     + .readpage = unionfs_readpage,
7549     + .prepare_write = unionfs_prepare_write,
7550     + .commit_write = unionfs_commit_write,
7551     + .sync_page = unionfs_sync_page,
7552     +};
7553     diff --git a/fs/unionfs/rdstate.c b/fs/unionfs/rdstate.c
7554     new file mode 100644
7555     index 0000000..5c9d14b
7556     --- /dev/null
7557     +++ b/fs/unionfs/rdstate.c
7558     @@ -0,0 +1,282 @@
7559     +/*
7560     + * Copyright (c) 2003-2007 Erez Zadok
7561     + * Copyright (c) 2003-2006 Charles P. Wright
7562     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7563     + * Copyright (c) 2005-2006 Junjiro Okajima
7564     + * Copyright (c) 2005 Arun M. Krishnakumar
7565     + * Copyright (c) 2004-2006 David P. Quigley
7566     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7567     + * Copyright (c) 2003 Puja Gupta
7568     + * Copyright (c) 2003 Harikesavan Krishnan
7569     + * Copyright (c) 2003-2007 Stony Brook University
7570     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
7571     + *
7572     + * This program is free software; you can redistribute it and/or modify
7573     + * it under the terms of the GNU General Public License version 2 as
7574     + * published by the Free Software Foundation.
7575     + */
7576     +
7577     +#include "union.h"
7578     +
7579     +/* This file contains the routines for maintaining readdir state. */
7580     +
7581     +/*
7582     + * There are two structures here, rdstate which is a hash table
7583     + * of the second structure which is a filldir_node.
7584     + */
7585     +
7586     +/*
7587     + * This is a struct kmem_cache for filldir nodes, because we allocate a lot
7588     + * of them and they shouldn't waste memory. If the node has a small name
7589     + * (as defined by the dentry structure), then we use an inline name to
7590     + * preserve kmalloc space.
7591     + */
7592     +static struct kmem_cache *unionfs_filldir_cachep;
7593     +
7594     +int unionfs_init_filldir_cache(void)
7595     +{
7596     + unionfs_filldir_cachep =
7597     + kmem_cache_create("unionfs_filldir",
7598     + sizeof(struct filldir_node), 0,
7599     + SLAB_RECLAIM_ACCOUNT, NULL, NULL);
7600     +
7601     + return (unionfs_filldir_cachep ? 0 : -ENOMEM);
7602     +}
7603     +
7604     +void unionfs_destroy_filldir_cache(void)
7605     +{
7606     + if (unionfs_filldir_cachep)
7607     + kmem_cache_destroy(unionfs_filldir_cachep);
7608     +}
7609     +
7610     +/*
7611     + * This is a tuning parameter that tells us roughly how big to make the
7612     + * hash table in directory entries per page. This isn't perfect, but
7613     + * at least we get a hash table size that shouldn't be too overloaded.
7614     + * The following averages are based on my home directory.
7615     + * 14.44693 Overall
7616     + * 12.29 Single Page Directories
7617     + * 117.93 Multi-page directories
7618     + */
7619     +#define DENTPAGE 4096
7620     +#define DENTPERONEPAGE 12
7621     +#define DENTPERPAGE 118
7622     +#define MINHASHSIZE 1
7623     +static int guesstimate_hash_size(struct inode *inode)
7624     +{
7625     + struct inode *lower_inode;
7626     + int bindex;
7627     + int hashsize = MINHASHSIZE;
7628     +
7629     + if (UNIONFS_I(inode)->hashsize > 0)
7630     + return UNIONFS_I(inode)->hashsize;
7631     +
7632     + for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
7633     + if (!(lower_inode = unionfs_lower_inode_idx(inode, bindex)))
7634     + continue;
7635     +
7636     + if (lower_inode->i_size == DENTPAGE)
7637     + hashsize += DENTPERONEPAGE;
7638     + else
7639     + hashsize += (lower_inode->i_size / DENTPAGE) *
7640     + DENTPERPAGE;
7641     + }
7642     +
7643     + return hashsize;
7644     +}
7645     +
7646     +int init_rdstate(struct file *file)
7647     +{
7648     + BUG_ON(sizeof(loff_t) !=
7649     + (sizeof(unsigned int) + sizeof(unsigned int)));
7650     + BUG_ON(UNIONFS_F(file)->rdstate != NULL);
7651     +
7652     + UNIONFS_F(file)->rdstate = alloc_rdstate(file->f_path.dentry->d_inode,
7653     + fbstart(file));
7654     +
7655     + return (UNIONFS_F(file)->rdstate ? 0 : -ENOMEM);
7656     +}
7657     +
7658     +struct unionfs_dir_state *find_rdstate(struct inode *inode, loff_t fpos)
7659     +{
7660     + struct unionfs_dir_state *rdstate = NULL;
7661     + struct list_head *pos;
7662     +
7663     + spin_lock(&UNIONFS_I(inode)->rdlock);
7664     + list_for_each(pos, &UNIONFS_I(inode)->readdircache) {
7665     + struct unionfs_dir_state *r =
7666     + list_entry(pos, struct unionfs_dir_state, cache);
7667     + if (fpos == rdstate2offset(r)) {
7668     + UNIONFS_I(inode)->rdcount--;
7669     + list_del(&r->cache);
7670     + rdstate = r;
7671     + break;
7672     + }
7673     + }
7674     + spin_unlock(&UNIONFS_I(inode)->rdlock);
7675     + return rdstate;
7676     +}
7677     +
7678     +struct unionfs_dir_state *alloc_rdstate(struct inode *inode, int bindex)
7679     +{
7680     + int i = 0;
7681     + int hashsize;
7682     + unsigned long mallocsize = sizeof(struct unionfs_dir_state);
7683     + struct unionfs_dir_state *rdstate;
7684     +
7685     + hashsize = guesstimate_hash_size(inode);
7686     + mallocsize += hashsize * sizeof(struct list_head);
7687     + mallocsize = __roundup_pow_of_two(mallocsize);
7688     +
7689     + /* This should give us about 500 entries anyway. */
7690     + if (mallocsize > PAGE_SIZE)
7691     + mallocsize = PAGE_SIZE;
7692     +
7693     + hashsize = (mallocsize - sizeof(struct unionfs_dir_state)) /
7694     + sizeof(struct list_head);
7695     +
7696     + rdstate = kmalloc(mallocsize, GFP_KERNEL);
7697     + if (!rdstate)
7698     + return NULL;
7699     +
7700     + spin_lock(&UNIONFS_I(inode)->rdlock);
7701     + if (UNIONFS_I(inode)->cookie >= (MAXRDCOOKIE - 1))
7702     + UNIONFS_I(inode)->cookie = 1;
7703     + else
7704     + UNIONFS_I(inode)->cookie++;
7705     +
7706     + rdstate->cookie = UNIONFS_I(inode)->cookie;
7707     + spin_unlock(&UNIONFS_I(inode)->rdlock);
7708     + rdstate->offset = 1;
7709     + rdstate->access = jiffies;
7710     + rdstate->bindex = bindex;
7711     + rdstate->dirpos = 0;
7712     + rdstate->hashentries = 0;
7713     + rdstate->size = hashsize;
7714     + for (i = 0; i < rdstate->size; i++)
7715     + INIT_LIST_HEAD(&rdstate->list[i]);
7716     +
7717     + return rdstate;
7718     +}
7719     +
7720     +static void free_filldir_node(struct filldir_node *node)
7721     +{
7722     + if (node->namelen >= DNAME_INLINE_LEN_MIN)
7723     + kfree(node->name);
7724     + kmem_cache_free(unionfs_filldir_cachep, node);
7725     +}
7726     +
7727     +void free_rdstate(struct unionfs_dir_state *state)
7728     +{
7729     + struct filldir_node *tmp;
7730     + int i;
7731     +
7732     + for (i = 0; i < state->size; i++) {
7733     + struct list_head *head = &(state->list[i]);
7734     + struct list_head *pos, *n;
7735     +
7736     + /* traverse the list and deallocate space */
7737     + list_for_each_safe(pos, n, head) {
7738     + tmp = list_entry(pos, struct filldir_node, file_list);
7739     + list_del(&tmp->file_list);
7740     + free_filldir_node(tmp);
7741     + }
7742     + }
7743     +
7744     + kfree(state);
7745     +}
7746     +
7747     +struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate,
7748     + const char *name, int namelen)
7749     +{
7750     + int index;
7751     + unsigned int hash;
7752     + struct list_head *head;
7753     + struct list_head *pos;
7754     + struct filldir_node *cursor = NULL;
7755     + int found = 0;
7756     +
7757     + BUG_ON(namelen <= 0);
7758     +
7759     + hash = full_name_hash(name, namelen);
7760     + index = hash % rdstate->size;
7761     +
7762     + head = &(rdstate->list[index]);
7763     + list_for_each(pos, head) {
7764     + cursor = list_entry(pos, struct filldir_node, file_list);
7765     +
7766     + if (cursor->namelen == namelen && cursor->hash == hash &&
7767     + !strncmp(cursor->name, name, namelen)) {
7768     + /*
7769     + * a duplicate exists, and hence no need to create
7770     + * entry to the list
7771     + */
7772     + found = 1;
7773     +
7774     + /*
7775     + * if the duplicate is in this branch, then the file
7776     + * system is corrupted.
7777     + */
7778     + if (cursor->bindex == rdstate->bindex) {
7779     + printk(KERN_DEBUG "unionfs: filldir: possible "
7780     + "I/O error: a file is duplicated "
7781     + "in the same branch %d: %s\n",
7782     + rdstate->bindex, cursor->name);
7783     + }
7784     + break;
7785     + }
7786     + }
7787     +
7788     + if (!found)
7789     + cursor = NULL;
7790     +
7791     + return cursor;
7792     +}
7793     +
7794     +int add_filldir_node(struct unionfs_dir_state *rdstate, const char *name,
7795     + int namelen, int bindex, int whiteout)
7796     +{
7797     + struct filldir_node *new;
7798     + unsigned int hash;
7799     + int index;
7800     + int err = 0;
7801     + struct list_head *head;
7802     +
7803     + BUG_ON(namelen <= 0);
7804     +
7805     + hash = full_name_hash(name, namelen);
7806     + index = hash % rdstate->size;
7807     + head = &(rdstate->list[index]);
7808     +
7809     + new = kmem_cache_alloc(unionfs_filldir_cachep, GFP_KERNEL);
7810     + if (!new) {
7811     + err = -ENOMEM;
7812     + goto out;
7813     + }
7814     +
7815     + INIT_LIST_HEAD(&new->file_list);
7816     + new->namelen = namelen;
7817     + new->hash = hash;
7818     + new->bindex = bindex;
7819     + new->whiteout = whiteout;
7820     +
7821     + if (namelen < DNAME_INLINE_LEN_MIN)
7822     + new->name = new->iname;
7823     + else {
7824     + new->name = kmalloc(namelen + 1, GFP_KERNEL);
7825     + if (!new->name) {
7826     + kmem_cache_free(unionfs_filldir_cachep, new);
7827     + new = NULL;
7828     + goto out;
7829     + }
7830     + }
7831     +
7832     + memcpy(new->name, name, namelen);
7833     + new->name[namelen] = '\0';
7834     +
7835     + rdstate->hashentries++;
7836     +
7837     + list_add(&(new->file_list), head);
7838     +out:
7839     + return err;
7840     +}
7841     diff --git a/fs/unionfs/rename.c b/fs/unionfs/rename.c
7842     new file mode 100644
7843     index 0000000..1761f8b
7844     --- /dev/null
7845     +++ b/fs/unionfs/rename.c
7846     @@ -0,0 +1,521 @@
7847     +/*
7848     + * Copyright (c) 2003-2007 Erez Zadok
7849     + * Copyright (c) 2003-2006 Charles P. Wright
7850     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7851     + * Copyright (c) 2005-2006 Junjiro Okajima
7852     + * Copyright (c) 2005 Arun M. Krishnakumar
7853     + * Copyright (c) 2004-2006 David P. Quigley
7854     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7855     + * Copyright (c) 2003 Puja Gupta
7856     + * Copyright (c) 2003 Harikesavan Krishnan
7857     + * Copyright (c) 2003-2007 Stony Brook University
7858     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
7859     + *
7860     + * This program is free software; you can redistribute it and/or modify
7861     + * it under the terms of the GNU General Public License version 2 as
7862     + * published by the Free Software Foundation.
7863     + */
7864     +
7865     +#include "union.h"
7866     +
7867     +static int __unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7868     + struct inode *new_dir, struct dentry *new_dentry,
7869     + int bindex, struct dentry **wh_old)
7870     +{
7871     + int err = 0;
7872     + struct dentry *lower_old_dentry;
7873     + struct dentry *lower_new_dentry;
7874     + struct dentry *lower_old_dir_dentry;
7875     + struct dentry *lower_new_dir_dentry;
7876     + struct dentry *lower_wh_dentry;
7877     + struct dentry *lower_wh_dir_dentry;
7878     + char *wh_name = NULL;
7879     +
7880     + lower_new_dentry = unionfs_lower_dentry_idx(new_dentry, bindex);
7881     + lower_old_dentry = unionfs_lower_dentry_idx(old_dentry, bindex);
7882     +
7883     + if (!lower_new_dentry) {
7884     + lower_new_dentry =
7885     + create_parents(new_dentry->d_parent->d_inode,
7886     + new_dentry, new_dentry->d_name.name,
7887     + bindex);
7888     + if (IS_ERR(lower_new_dentry)) {
7889     + printk(KERN_DEBUG "unionfs: error creating directory "
7890     + "tree for rename, bindex = %d, err = %ld\n",
7891     + bindex, PTR_ERR(lower_new_dentry));
7892     + err = PTR_ERR(lower_new_dentry);
7893     + goto out;
7894     + }
7895     + }
7896     +
7897     + wh_name = alloc_whname(new_dentry->d_name.name,
7898     + new_dentry->d_name.len);
7899     + if (IS_ERR(wh_name)) {
7900     + err = PTR_ERR(wh_name);
7901     + goto out;
7902     + }
7903     +
7904     + lower_wh_dentry = lookup_one_len(wh_name, lower_new_dentry->d_parent,
7905     + new_dentry->d_name.len +
7906     + UNIONFS_WHLEN);
7907     + if (IS_ERR(lower_wh_dentry)) {
7908     + err = PTR_ERR(lower_wh_dentry);
7909     + goto out;
7910     + }
7911     +
7912     + if (lower_wh_dentry->d_inode) {
7913     + /* get rid of the whiteout that is existing */
7914     + if (lower_new_dentry->d_inode) {
7915     + printk(KERN_WARNING "unionfs: both a whiteout and a "
7916     + "dentry exist when doing a rename!\n");
7917     + err = -EIO;
7918     +
7919     + dput(lower_wh_dentry);
7920     + goto out;
7921     + }
7922     +
7923     + lower_wh_dir_dentry = lock_parent(lower_wh_dentry);
7924     + if (!(err = is_robranch_super(old_dentry->d_sb, bindex)))
7925     + err = vfs_unlink(lower_wh_dir_dentry->d_inode,
7926     + lower_wh_dentry);
7927     +
7928     + dput(lower_wh_dentry);
7929     + unlock_dir(lower_wh_dir_dentry);
7930     + if (err)
7931     + goto out;
7932     + } else
7933     + dput(lower_wh_dentry);
7934     +
7935     + dget(lower_old_dentry);
7936     + lower_old_dir_dentry = dget_parent(lower_old_dentry);
7937     + lower_new_dir_dentry = dget_parent(lower_new_dentry);
7938     +
7939     + lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
7940     +
7941     + err = is_robranch_super(old_dentry->d_sb, bindex);
7942     + if (err)
7943     + goto out_unlock;
7944     +
7945     + /*
7946     + * ready to whiteout for old_dentry. caller will create the actual
7947     + * whiteout, and must dput(*wh_old)
7948     + */
7949     + if (wh_old) {
7950     + char *whname;
7951     + whname = alloc_whname(old_dentry->d_name.name,
7952     + old_dentry->d_name.len);
7953     + err = PTR_ERR(whname);
7954     + if (IS_ERR(whname))
7955     + goto out_unlock;
7956     + *wh_old = lookup_one_len(whname, lower_old_dir_dentry,
7957     + old_dentry->d_name.len +
7958     + UNIONFS_WHLEN);
7959     + kfree(whname);
7960     + err = PTR_ERR(*wh_old);
7961     + if (IS_ERR(*wh_old)) {
7962     + *wh_old = NULL;
7963     + goto out_unlock;
7964     + }
7965     + }
7966     +
7967     + err = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
7968     + lower_new_dir_dentry->d_inode, lower_new_dentry);
7969     +
7970     +out_unlock:
7971     + unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
7972     +
7973     + dput(lower_old_dir_dentry);
7974     + dput(lower_new_dir_dentry);
7975     + dput(lower_old_dentry);
7976     +
7977     +out:
7978     + if (!err) {
7979     + /* Fixup the new_dentry. */
7980     + if (bindex < dbstart(new_dentry))
7981     + set_dbstart(new_dentry, bindex);
7982     + else if (bindex > dbend(new_dentry))
7983     + set_dbend(new_dentry, bindex);
7984     + }
7985     +
7986     + kfree(wh_name);
7987     +
7988     + return err;
7989     +}
7990     +
7991     +/*
7992     + * Main rename code. This is sufficiently complex, that it's documented in
7993     + * Documentation/filesystems/unionfs/rename.txt. This routine calls
7994     + * __unionfs_rename() above to perform some of the work.
7995     + */
7996     +static int do_unionfs_rename(struct inode *old_dir,
7997     + struct dentry *old_dentry,
7998     + struct inode *new_dir,
7999     + struct dentry *new_dentry)
8000     +{
8001     + int err = 0;
8002     + int bindex, bwh_old;
8003     + int old_bstart, old_bend;
8004     + int new_bstart, new_bend;
8005     + int do_copyup = -1;
8006     + struct dentry *parent_dentry;
8007     + int local_err = 0;
8008     + int eio = 0;
8009     + int revert = 0;
8010     + struct dentry *wh_old = NULL;
8011     +
8012     + old_bstart = dbstart(old_dentry);
8013     + bwh_old = old_bstart;
8014     + old_bend = dbend(old_dentry);
8015     + parent_dentry = old_dentry->d_parent;
8016     +
8017     + new_bstart = dbstart(new_dentry);
8018     + new_bend = dbend(new_dentry);
8019     +
8020     + /* Rename source to destination. */
8021     + err = __unionfs_rename(old_dir, old_dentry, new_dir, new_dentry,
8022     + old_bstart, &wh_old);
8023     + if (err) {
8024     + if (!IS_COPYUP_ERR(err))
8025     + goto out;
8026     + do_copyup = old_bstart - 1;
8027     + } else
8028     + revert = 1;
8029     +
8030     + /*
8031     + * Unlink all instances of destination that exist to the left of
8032     + * bstart of source. On error, revert back, goto out.
8033     + */
8034     + for (bindex = old_bstart - 1; bindex >= new_bstart; bindex--) {
8035     + struct dentry *unlink_dentry;
8036     + struct dentry *unlink_dir_dentry;
8037     +
8038     + unlink_dentry = unionfs_lower_dentry_idx(new_dentry, bindex);
8039     + if (!unlink_dentry)
8040     + continue;
8041     +
8042     + unlink_dir_dentry = lock_parent(unlink_dentry);
8043     + if (!(err = is_robranch_super(old_dir->i_sb, bindex)))
8044     + err = vfs_unlink(unlink_dir_dentry->d_inode,
8045     + unlink_dentry);
8046     +
8047     + fsstack_copy_attr_times(new_dentry->d_parent->d_inode,
8048     + unlink_dir_dentry->d_inode);
8049     + /* propagate number of hard-links */
8050     + new_dentry->d_parent->d_inode->i_nlink =
8051     + unionfs_get_nlinks(new_dentry->d_parent->d_inode);
8052     +
8053     + unlock_dir(unlink_dir_dentry);
8054     + if (!err) {
8055     + if (bindex != new_bstart) {
8056     + dput(unlink_dentry);
8057     + unionfs_set_lower_dentry_idx(new_dentry,
8058     + bindex, NULL);
8059     + }
8060     + } else if (IS_COPYUP_ERR(err)) {
8061     + do_copyup = bindex - 1;
8062     + } else if (revert) {
8063     + dput(wh_old);
8064     + goto revert;
8065     + }
8066     + }
8067     +
8068     + if (do_copyup != -1) {
8069     + for (bindex = do_copyup; bindex >= 0; bindex--) {
8070     + /*
8071     + * copyup the file into some left directory, so that
8072     + * you can rename it
8073     + */
8074     + err = copyup_dentry(old_dentry->d_parent->d_inode,
8075     + old_dentry, old_bstart, bindex,
8076     + old_dentry->d_name.name,
8077     + old_dentry->d_name.len,
8078     + NULL, old_dentry->d_inode->i_size);
8079     + /* if copyup failed, try next branch to the left */
8080     + if (err)
8081     + continue;
8082     + dput(wh_old);
8083     + bwh_old = bindex;
8084     + err = __unionfs_rename(old_dir, old_dentry,
8085     + new_dir, new_dentry,
8086     + bindex, &wh_old);
8087     + break;
8088     + }
8089     + }
8090     +
8091     + /* make it opaque */
8092     + if (S_ISDIR(old_dentry->d_inode->i_mode)) {
8093     + err = make_dir_opaque(old_dentry, dbstart(old_dentry));
8094     + if (err)
8095     + goto revert;
8096     + }
8097     +
8098     + /*
8099     + * Create whiteout for source, only if:
8100     + * (1) There is more than one underlying instance of source.
8101     + * (2) We did a copy_up
8102     + */
8103     + if ((old_bstart != old_bend) || (do_copyup != -1)) {
8104     + struct dentry *lower_parent;
8105     + if (!wh_old || wh_old->d_inode || bwh_old < 0) {
8106     + printk(KERN_ERR "unionfs: rename error "
8107     + "(wh_old=%p/%p bwh_old=%d)\n", wh_old,
8108     + (wh_old ? wh_old->d_inode : NULL), bwh_old);
8109     + err = -EIO;
8110     + goto out;
8111     + }
8112     + lower_parent = lock_parent(wh_old);
8113     + local_err = vfs_create(lower_parent->d_inode, wh_old, S_IRUGO,
8114     + NULL);
8115     + unlock_dir(lower_parent);
8116     + if (!local_err)
8117     + set_dbopaque(old_dentry, bwh_old);
8118     + else {
8119     + /*
8120     + * we can't fix anything now, so we cop-out and use
8121     + * -EIO.
8122     + */
8123     + printk(KERN_ERR "unionfs: can't create a whiteout for "
8124     + "the source in rename!\n");
8125     + err = -EIO;
8126     + }
8127     + }
8128     +
8129     +out:
8130     + dput(wh_old);
8131     + return err;
8132     +
8133     +revert:
8134     + /* Do revert here. */
8135     + local_err = unionfs_refresh_lower_dentry(new_dentry, old_bstart);
8136     + if (local_err) {
8137     + printk(KERN_WARNING "unionfs: revert failed in rename: "
8138     + "the new refresh failed.\n");
8139     + eio = -EIO;
8140     + }
8141     +
8142     + local_err = unionfs_refresh_lower_dentry(old_dentry, old_bstart);
8143     + if (local_err) {
8144     + printk(KERN_WARNING "unionfs: revert failed in rename: "
8145     + "the old refresh failed.\n");
8146     + eio = -EIO;
8147     + goto revert_out;
8148     + }
8149     +
8150     + if (!unionfs_lower_dentry_idx(new_dentry, bindex) ||
8151     + !unionfs_lower_dentry_idx(new_dentry, bindex)->d_inode) {
8152     + printk(KERN_WARNING "unionfs: revert failed in rename: "
8153     + "the object disappeared from under us!\n");
8154     + eio = -EIO;
8155     + goto revert_out;
8156     + }
8157     +
8158     + if (unionfs_lower_dentry_idx(old_dentry, bindex) &&
8159     + unionfs_lower_dentry_idx(old_dentry, bindex)->d_inode) {
8160     + printk(KERN_WARNING "unionfs: revert failed in rename: "
8161     + "the object was created underneath us!\n");
8162     + eio = -EIO;
8163     + goto revert_out;
8164     + }
8165     +
8166     + local_err = __unionfs_rename(new_dir, new_dentry,
8167     + old_dir, old_dentry, old_bstart, NULL);
8168     +
8169     + /* If we can't fix it, then we cop-out with -EIO. */
8170     + if (local_err) {
8171     + printk(KERN_WARNING "unionfs: revert failed in rename!\n");
8172     + eio = -EIO;
8173     + }
8174     +
8175     + local_err = unionfs_refresh_lower_dentry(new_dentry, bindex);
8176     + if (local_err)
8177     + eio = -EIO;
8178     + local_err = unionfs_refresh_lower_dentry(old_dentry, bindex);
8179     + if (local_err)
8180     + eio = -EIO;
8181     +
8182     +revert_out:
8183     + if (eio)
8184     + err = eio;
8185     + return err;
8186     +}
8187     +
8188     +static struct dentry *lookup_whiteout(struct dentry *dentry)
8189     +{
8190     + char *whname;
8191     + int bindex = -1, bstart = -1, bend = -1;
8192     + struct dentry *parent, *lower_parent, *wh_dentry;
8193     +
8194     + whname = alloc_whname(dentry->d_name.name, dentry->d_name.len);
8195     + if (IS_ERR(whname))
8196     + return (void *)whname;
8197     +
8198     + parent = dget_parent(dentry);
8199     + unionfs_lock_dentry(parent);
8200     + bstart = dbstart(parent);
8201     + bend = dbend(parent);
8202     + wh_dentry = ERR_PTR(-ENOENT);
8203     + for (bindex = bstart; bindex <= bend; bindex++) {
8204     + lower_parent = unionfs_lower_dentry_idx(parent, bindex);
8205     + if (!lower_parent)
8206     + continue;
8207     + wh_dentry = lookup_one_len(whname, lower_parent,
8208     + dentry->d_name.len + UNIONFS_WHLEN);
8209     + if (IS_ERR(wh_dentry))
8210     + continue;
8211     + if (wh_dentry->d_inode)
8212     + break;
8213     + dput(wh_dentry);
8214     + wh_dentry = ERR_PTR(-ENOENT);
8215     + }
8216     + unionfs_unlock_dentry(parent);
8217     + dput(parent);
8218     + kfree(whname);
8219     + return wh_dentry;
8220     +}
8221     +
8222     +/*
8223     + * We can't copyup a directory, because it may involve huge numbers of
8224     + * children, etc. Doing that in the kernel would be bad, so instead we
8225     + * return EXDEV to the user-space utility that caused this, and let the
8226     + * user-space recurse and ask us to copy up each file separately.
8227     + */
8228     +static int may_rename_dir(struct dentry *dentry)
8229     +{
8230     + int err, bstart;
8231     +
8232     + err = check_empty(dentry, NULL);
8233     + if (err == -ENOTEMPTY) {
8234     + if (is_robranch(dentry))
8235     + return -EXDEV;
8236     + } else if (err)
8237     + return err;
8238     +
8239     + bstart = dbstart(dentry);
8240     + if (dbend(dentry) == bstart || dbopaque(dentry) == bstart)
8241     + return 0;
8242     +
8243     + set_dbstart(dentry, bstart + 1);
8244     + err = check_empty(dentry, NULL);
8245     + set_dbstart(dentry, bstart);
8246     + if (err == -ENOTEMPTY)
8247     + err = -EXDEV;
8248     + return err;
8249     +}
8250     +
8251     +int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8252     + struct inode *new_dir, struct dentry *new_dentry)
8253     +{
8254     + int err = 0;
8255     + struct dentry *wh_dentry;
8256     +
8257     + unionfs_read_lock(old_dentry->d_sb);
8258     + unionfs_double_lock_dentry(old_dentry, new_dentry);
8259     +
8260     + if (!__unionfs_d_revalidate_chain(old_dentry, NULL, 0)) {
8261     + err = -ESTALE;
8262     + goto out;
8263     + }
8264     + if (!d_deleted(new_dentry) && new_dentry->d_inode &&
8265     + !__unionfs_d_revalidate_chain(new_dentry, NULL, 0)) {
8266     + err = -ESTALE;
8267     + goto out;
8268     + }
8269     +
8270     + if (!S_ISDIR(old_dentry->d_inode->i_mode))
8271     + err = unionfs_partial_lookup(old_dentry);
8272     + else
8273     + err = may_rename_dir(old_dentry);
8274     +
8275     + if (err)
8276     + goto out;
8277     +
8278     + err = unionfs_partial_lookup(new_dentry);
8279     + if (err)
8280     + goto out;
8281     +
8282     + /*
8283     + * if new_dentry is already lower because of whiteout,
8284     + * simply override it even if the whited-out dir is not empty.
8285     + */
8286     + wh_dentry = lookup_whiteout(new_dentry);
8287     + if (!IS_ERR(wh_dentry))
8288     + dput(wh_dentry);
8289     + else if (new_dentry->d_inode) {
8290     + if (S_ISDIR(old_dentry->d_inode->i_mode) !=
8291     + S_ISDIR(new_dentry->d_inode->i_mode)) {
8292     + err = S_ISDIR(old_dentry->d_inode->i_mode) ?
8293     + -ENOTDIR : -EISDIR;
8294     + goto out;
8295     + }
8296     +
8297     + if (S_ISDIR(new_dentry->d_inode->i_mode)) {
8298     + struct unionfs_dir_state *namelist;
8299     + /* check if this unionfs directory is empty or not */
8300     + err = check_empty(new_dentry, &namelist);
8301     + if (err)
8302     + goto out;
8303     +
8304     + if (!is_robranch(new_dentry))
8305     + err = delete_whiteouts(new_dentry,
8306     + dbstart(new_dentry),
8307     + namelist);
8308     +
8309     + free_rdstate(namelist);
8310     +
8311     + if (err)
8312     + goto out;
8313     + }
8314     + }
8315     + err = do_unionfs_rename(old_dir, old_dentry, new_dir, new_dentry);
8316     +out:
8317     + if (err)
8318     + /* clear the new_dentry stuff created */
8319     + d_drop(new_dentry);
8320     + else {
8321     + /*
8322     + * force re-lookup since the dir on ro branch is not renamed,
8323     + * and lower dentries still indicate the un-renamed ones.
8324     + */
8325     + if (S_ISDIR(old_dentry->d_inode->i_mode))
8326     + atomic_dec(&UNIONFS_D(old_dentry)->generation);
8327     + else
8328     + unionfs_purge_extras(old_dentry);
8329     + if (new_dentry->d_inode &&
8330     + !S_ISDIR(new_dentry->d_inode->i_mode)) {
8331     + unionfs_purge_extras(new_dentry);
8332     + unionfs_inherit_mnt(new_dentry);
8333     + if (!unionfs_lower_inode(new_dentry->d_inode)) {
8334     + /*
8335     + * If we get here, it means that no copyup
8336     + * was needed, and that a file by the old
8337     + * name already existing on the destination
8338     + * branch; that file got renamed earlier in
8339     + * this function, so all we need to do here
8340     + * is set the lower inode.
8341     + */
8342     + struct inode *inode;
8343     + inode = unionfs_lower_inode(
8344     + old_dentry->d_inode);
8345     + atomic_inc(&inode->i_count);
8346     + unionfs_set_lower_inode_idx(
8347     + new_dentry->d_inode,
8348     + dbstart(new_dentry), inode);
8349     + }
8350     +
8351     + }
8352     + /* if all of this renaming succeeded, update our times */
8353     + unionfs_copy_attr_times(old_dir);
8354     + unionfs_copy_attr_times(new_dir);
8355     + unionfs_copy_attr_times(old_dentry->d_inode);
8356     + unionfs_copy_attr_times(new_dentry->d_inode);
8357     + unionfs_check_inode(old_dir);
8358     + unionfs_check_inode(new_dir);
8359     + unionfs_check_dentry(old_dentry);
8360     + unionfs_check_dentry(new_dentry);
8361     + }
8362     +
8363     + unionfs_unlock_dentry(new_dentry);
8364     + unionfs_unlock_dentry(old_dentry);
8365     + unionfs_read_unlock(old_dentry->d_sb);
8366     + return err;
8367     +}
8368     diff --git a/fs/unionfs/sioq.c b/fs/unionfs/sioq.c
8369     new file mode 100644
8370     index 0000000..478041d
8371     --- /dev/null
8372     +++ b/fs/unionfs/sioq.c
8373     @@ -0,0 +1,123 @@
8374     +/*
8375     + * Copyright (c) 2003-2007 Erez Zadok
8376     + * Copyright (c) 2003-2006 Charles P. Wright
8377     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
8378     + * Copyright (c) 2005-2006 Junjiro Okajima
8379     + * Copyright (c) 2005 Arun M. Krishnakumar
8380     + * Copyright (c) 2004-2006 David P. Quigley
8381     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
8382     + * Copyright (c) 2003 Puja Gupta
8383     + * Copyright (c) 2003 Harikesavan Krishnan
8384     + * Copyright (c) 2003-2007 Stony Brook University
8385     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
8386     + *
8387     + * This program is free software; you can redistribute it and/or modify
8388     + * it under the terms of the GNU General Public License version 2 as
8389     + * published by the Free Software Foundation.
8390     + */
8391     +
8392     +#include "union.h"
8393     +
8394     +/*
8395     + * Super-user IO work Queue - sometimes we need to perform actions which
8396     + * would fail due to the unix permissions on the parent directory (e.g.,
8397     + * rmdir a directory which appears empty, but in reality contains
8398     + * whiteouts).
8399     + */
8400     +
8401     +static struct workqueue_struct *superio_workqueue;
8402     +
8403     +int __init init_sioq(void)
8404     +{
8405     + int err;
8406     +
8407     + superio_workqueue = create_workqueue("unionfs_siod");
8408     + if (!IS_ERR(superio_workqueue))
8409     + return 0;
8410     +
8411     + err = PTR_ERR(superio_workqueue);
8412     + printk(KERN_ERR "unionfs: create_workqueue failed %d\n", err);
8413     + superio_workqueue = NULL;
8414     + return err;
8415     +}
8416     +
8417     +void stop_sioq(void)
8418     +{
8419     + if (superio_workqueue)
8420     + destroy_workqueue(superio_workqueue);
8421     +}
8422     +
8423     +void run_sioq(work_func_t func, struct sioq_args *args)
8424     +{
8425     + INIT_WORK(&args->work, func);
8426     +
8427     + init_completion(&args->comp);
8428     + while (!queue_work(superio_workqueue, &args->work)) {
8429     + /* TODO: do accounting if needed */
8430     + schedule();
8431     + }
8432     + wait_for_completion(&args->comp);
8433     +}
8434     +
8435     +void __unionfs_create(struct work_struct *work)
8436     +{
8437     + struct sioq_args *args = container_of(work, struct sioq_args, work);
8438     + struct create_args *c = &args->create;
8439     +
8440     + args->err = vfs_create(c->parent, c->dentry, c->mode, c->nd);
8441     + complete(&args->comp);
8442     +}
8443     +
8444     +void __unionfs_mkdir(struct work_struct *work)
8445     +{
8446     + struct sioq_args *args = container_of(work, struct sioq_args, work);
8447     + struct mkdir_args *m = &args->mkdir;
8448     +
8449     + args->err = vfs_mkdir(m->parent, m->dentry, m->mode);
8450     + complete(&args->comp);
8451     +}
8452     +
8453     +void __unionfs_mknod(struct work_struct *work)
8454     +{
8455     + struct sioq_args *args = container_of(work, struct sioq_args, work);
8456     + struct mknod_args *m = &args->mknod;
8457     +
8458     + args->err = vfs_mknod(m->parent, m->dentry, m->mode, m->dev);
8459     + complete(&args->comp);
8460     +}
8461     +
8462     +void __unionfs_symlink(struct work_struct *work)
8463     +{
8464     + struct sioq_args *args = container_of(work, struct sioq_args, work);
8465     + struct symlink_args *s = &args->symlink;
8466     +
8467     + args->err = vfs_symlink(s->parent, s->dentry, s->symbuf, s->mode);
8468     + complete(&args->comp);
8469     +}
8470     +
8471     +void __unionfs_unlink(struct work_struct *work)
8472     +{
8473     + struct sioq_args *args = container_of(work, struct sioq_args, work);
8474     + struct unlink_args *u = &args->unlink;
8475     +
8476     + args->err = vfs_unlink(u->parent, u->dentry);
8477     + complete(&args->comp);
8478     +}
8479     +
8480     +void __delete_whiteouts(struct work_struct *work)
8481     +{
8482     + struct sioq_args *args = container_of(work, struct sioq_args, work);
8483     + struct deletewh_args *d = &args->deletewh;
8484     +
8485     + args->err = do_delete_whiteouts(d->dentry, d->bindex, d->namelist);
8486     + complete(&args->comp);
8487     +}
8488     +
8489     +void __is_opaque_dir(struct work_struct *work)
8490     +{
8491     + struct sioq_args *args = container_of(work, struct sioq_args, work);
8492     +
8493     + args->ret = lookup_one_len(UNIONFS_DIR_OPAQUE, args->is_opaque.dentry,
8494     + sizeof(UNIONFS_DIR_OPAQUE) - 1);
8495     + complete(&args->comp);
8496     +}
8497     diff --git a/fs/unionfs/sioq.h b/fs/unionfs/sioq.h
8498     new file mode 100644
8499     index 0000000..e180756
8500     --- /dev/null
8501     +++ b/fs/unionfs/sioq.h
8502     @@ -0,0 +1,96 @@
8503     +/*
8504     + * Copyright (c) 2003-2007 Erez Zadok
8505     + * Copyright (c) 2003-2006 Charles P. Wright
8506     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
8507     + * Copyright (c) 2005-2006 Junjiro Okajima
8508     + * Copyright (c) 2005 Arun M. Krishnakumar
8509     + * Copyright (c) 2004-2006 David P. Quigley
8510     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
8511     + * Copyright (c) 2003 Puja Gupta
8512     + * Copyright (c) 2003 Harikesavan Krishnan
8513     + * Copyright (c) 2003-2007 Stony Brook University
8514     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
8515     + *
8516     + * This program is free software; you can redistribute it and/or modify
8517     + * it under the terms of the GNU General Public License version 2 as
8518     + * published by the Free Software Foundation.
8519     + */
8520     +
8521     +#ifndef _SIOQ_H
8522     +#define _SIOQ_H
8523     +
8524     +struct deletewh_args {
8525     + struct unionfs_dir_state *namelist;
8526     + struct dentry *dentry;
8527     + int bindex;
8528     +};
8529     +
8530     +struct is_opaque_args {
8531     + struct dentry *dentry;
8532     +};
8533     +
8534     +struct create_args {
8535     + struct inode *parent;
8536     + struct dentry *dentry;
8537     + umode_t mode;
8538     + struct nameidata *nd;
8539     +};
8540     +
8541     +struct mkdir_args {
8542     + struct inode *parent;
8543     + struct dentry *dentry;
8544     + umode_t mode;
8545     +};
8546     +
8547     +struct mknod_args {
8548     + struct inode *parent;
8549     + struct dentry *dentry;
8550     + umode_t mode;
8551     + dev_t dev;
8552     +};
8553     +
8554     +struct symlink_args {
8555     + struct inode *parent;
8556     + struct dentry *dentry;
8557     + char *symbuf;
8558     + umode_t mode;
8559     +};
8560     +
8561     +struct unlink_args {
8562     + struct inode *parent;
8563     + struct dentry *dentry;
8564     +};
8565     +
8566     +
8567     +struct sioq_args {
8568     + struct completion comp;
8569     + struct work_struct work;
8570     + int err;
8571     + void *ret;
8572     +
8573     + union {
8574     + struct deletewh_args deletewh;
8575     + struct is_opaque_args is_opaque;
8576     + struct create_args create;
8577     + struct mkdir_args mkdir;
8578     + struct mknod_args mknod;
8579     + struct symlink_args symlink;
8580     + struct unlink_args unlink;
8581     + };
8582     +};
8583     +
8584     +/* Extern definitions for SIOQ functions */
8585     +extern int __init init_sioq(void);
8586     +extern void stop_sioq(void);
8587     +extern void run_sioq(work_func_t func, struct sioq_args *args);
8588     +
8589     +/* Extern definitions for our privilege escalation helpers */
8590     +extern void __unionfs_create(struct work_struct *work);
8591     +extern void __unionfs_mkdir(struct work_struct *work);
8592     +extern void __unionfs_mknod(struct work_struct *work);
8593     +extern void __unionfs_symlink(struct work_struct *work);
8594     +extern void __unionfs_unlink(struct work_struct *work);
8595     +extern void __delete_whiteouts(struct work_struct *work);
8596     +extern void __is_opaque_dir(struct work_struct *work);
8597     +
8598     +#endif /* not _SIOQ_H */
8599     diff --git a/fs/unionfs/subr.c b/fs/unionfs/subr.c
8600     new file mode 100644
8601     index 0000000..5db9e62
8602     --- /dev/null
8603     +++ b/fs/unionfs/subr.c
8604     @@ -0,0 +1,240 @@
8605     +/*
8606     + * Copyright (c) 2003-2007 Erez Zadok
8607     + * Copyright (c) 2003-2006 Charles P. Wright
8608     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
8609     + * Copyright (c) 2005-2006 Junjiro Okajima
8610     + * Copyright (c) 2005 Arun M. Krishnakumar
8611     + * Copyright (c) 2004-2006 David P. Quigley
8612     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
8613     + * Copyright (c) 2003 Puja Gupta
8614     + * Copyright (c) 2003 Harikesavan Krishnan
8615     + * Copyright (c) 2003-2007 Stony Brook University
8616     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
8617     + *
8618     + * This program is free software; you can redistribute it and/or modify
8619     + * it under the terms of the GNU General Public License version 2 as
8620     + * published by the Free Software Foundation.
8621     + */
8622     +
8623     +#include "union.h"
8624     +
8625     +/*
8626     + * Pass an unionfs dentry and an index. It will try to create a whiteout
8627     + * for the filename in dentry, and will try in branch 'index'. On error,
8628     + * it will proceed to a branch to the left.
8629     + */
8630     +int create_whiteout(struct dentry *dentry, int start)
8631     +{
8632     + int bstart, bend, bindex;
8633     + struct dentry *lower_dir_dentry;
8634     + struct dentry *lower_dentry;
8635     + struct dentry *lower_wh_dentry;
8636     + char *name = NULL;
8637     + int err = -EINVAL;
8638     +
8639     + verify_locked(dentry);
8640     +
8641     + bstart = dbstart(dentry);
8642     + bend = dbend(dentry);
8643     +
8644     + /* create dentry's whiteout equivalent */
8645     + name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
8646     + if (IS_ERR(name)) {
8647     + err = PTR_ERR(name);
8648     + goto out;
8649     + }
8650     +
8651     + for (bindex = start; bindex >= 0; bindex--) {
8652     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
8653     +
8654     + if (!lower_dentry) {
8655     + /*
8656     + * if lower dentry is not present, create the
8657     + * entire lower dentry directory structure and go
8658     + * ahead. Since we want to just create whiteout, we
8659     + * only want the parent dentry, and hence get rid of
8660     + * this dentry.
8661     + */
8662     + lower_dentry = create_parents(dentry->d_inode,
8663     + dentry,
8664     + dentry->d_name.name,
8665     + bindex);
8666     + if (!lower_dentry || IS_ERR(lower_dentry)) {
8667     + printk(KERN_DEBUG "unionfs: create_parents "
8668     + "failed for bindex = %d\n", bindex);
8669     + continue;
8670     + }
8671     + }
8672     +
8673     + lower_wh_dentry =
8674     + lookup_one_len(name, lower_dentry->d_parent,
8675     + dentry->d_name.len + UNIONFS_WHLEN);
8676     + if (IS_ERR(lower_wh_dentry))
8677     + continue;
8678     +
8679     + /*
8680     + * The whiteout already exists. This used to be impossible,
8681     + * but now is possible because of opaqueness.
8682     + */
8683     + if (lower_wh_dentry->d_inode) {
8684     + dput(lower_wh_dentry);
8685     + err = 0;
8686     + goto out;
8687     + }
8688     +
8689     + lower_dir_dentry = lock_parent(lower_wh_dentry);
8690     + if (!(err = is_robranch_super(dentry->d_sb, bindex)))
8691     + err = vfs_create(lower_dir_dentry->d_inode,
8692     + lower_wh_dentry,
8693     + ~current->fs->umask & S_IRWXUGO,
8694     + NULL);
8695     + unlock_dir(lower_dir_dentry);
8696     + dput(lower_wh_dentry);
8697     +
8698     + if (!err || !IS_COPYUP_ERR(err))
8699     + break;
8700     + }
8701     +
8702     + /* set dbopaque so that lookup will not proceed after this branch */
8703     + if (!err)
8704     + set_dbopaque(dentry, bindex);
8705     +
8706     +out:
8707     + kfree(name);
8708     + return err;
8709     +}
8710     +
8711     +/*
8712     + * This is a helper function for rename, which ends up with hosed over
8713     + * dentries when it needs to revert.
8714     + */
8715     +int unionfs_refresh_lower_dentry(struct dentry *dentry, int bindex)
8716     +{
8717     + struct dentry *lower_dentry;
8718     + struct dentry *lower_parent;
8719     + int err = 0;
8720     +
8721     + verify_locked(dentry);
8722     +
8723     + unionfs_lock_dentry(dentry->d_parent);
8724     + lower_parent = unionfs_lower_dentry_idx(dentry->d_parent, bindex);
8725     + unionfs_unlock_dentry(dentry->d_parent);
8726     +
8727     + BUG_ON(!S_ISDIR(lower_parent->d_inode->i_mode));
8728     +
8729     + lower_dentry = lookup_one_len(dentry->d_name.name, lower_parent,
8730     + dentry->d_name.len);
8731     + if (IS_ERR(lower_dentry)) {
8732     + err = PTR_ERR(lower_dentry);
8733     + goto out;
8734     + }
8735     +
8736     + dput(unionfs_lower_dentry_idx(dentry, bindex));
8737     + iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
8738     + unionfs_set_lower_inode_idx(dentry->d_inode, bindex, NULL);
8739     +
8740     + if (!lower_dentry->d_inode) {
8741     + dput(lower_dentry);
8742     + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
8743     + } else {
8744     + unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
8745     + unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
8746     + igrab(lower_dentry->d_inode));
8747     + }
8748     +
8749     +out:
8750     + return err;
8751     +}
8752     +
8753     +int make_dir_opaque(struct dentry *dentry, int bindex)
8754     +{
8755     + int err = 0;
8756     + struct dentry *lower_dentry, *diropq;
8757     + struct inode *lower_dir;
8758     +
8759     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
8760     + lower_dir = lower_dentry->d_inode;
8761     + BUG_ON(!S_ISDIR(dentry->d_inode->i_mode) ||
8762     + !S_ISDIR(lower_dir->i_mode));
8763     +
8764     + mutex_lock(&lower_dir->i_mutex);
8765     + diropq = lookup_one_len(UNIONFS_DIR_OPAQUE, lower_dentry,
8766     + sizeof(UNIONFS_DIR_OPAQUE) - 1);
8767     + if (IS_ERR(diropq)) {
8768     + err = PTR_ERR(diropq);
8769     + goto out;
8770     + }
8771     +
8772     + if (!diropq->d_inode)
8773     + err = vfs_create(lower_dir, diropq, S_IRUGO, NULL);
8774     + if (!err)
8775     + set_dbopaque(dentry, bindex);
8776     +
8777     + dput(diropq);
8778     +
8779     +out:
8780     + mutex_unlock(&lower_dir->i_mutex);
8781     + return err;
8782     +}
8783     +
8784     +/*
8785     + * returns the sum of the n_link values of all the underlying inodes of the
8786     + * passed inode
8787     + */
8788     +int unionfs_get_nlinks(const struct inode *inode)
8789     +{
8790     + int sum_nlinks = 0;
8791     + int dirs = 0;
8792     + int bindex;
8793     + struct inode *lower_inode;
8794     +
8795     + /* don't bother to do all the work since we're unlinked */
8796     + if (inode->i_nlink == 0)
8797     + return 0;
8798     +
8799     + if (!S_ISDIR(inode->i_mode))
8800     + return unionfs_lower_inode(inode)->i_nlink;
8801     +
8802     + for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
8803     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
8804     +
8805     + /* ignore files */
8806     + if (!lower_inode || !S_ISDIR(lower_inode->i_mode))
8807     + continue;
8808     +
8809     + BUG_ON(lower_inode->i_nlink < 0);
8810     +
8811     + /* A deleted directory. */
8812     + if (lower_inode->i_nlink == 0)
8813     + continue;
8814     + dirs++;
8815     +
8816     + /*
8817     + * A broken directory...
8818     + *
8819     + * Some filesystems don't properly set the number of links
8820     + * on empty directories
8821     + */
8822     + if (lower_inode->i_nlink == 1)
8823     + sum_nlinks += 2;
8824     + else
8825     + sum_nlinks += (lower_inode->i_nlink - 2);
8826     + }
8827     +
8828     + return (!dirs ? 0 : sum_nlinks + 2);
8829     +}
8830     +
8831     +/* construct whiteout filename */
8832     +char *alloc_whname(const char *name, int len)
8833     +{
8834     + char *buf;
8835     +
8836     + buf = kmalloc(len + UNIONFS_WHLEN + 1, GFP_KERNEL);
8837     + if (!buf)
8838     + return ERR_PTR(-ENOMEM);
8839     +
8840     + strcpy(buf, UNIONFS_WHPFX);
8841     + strlcat(buf, name, len + UNIONFS_WHLEN + 1);
8842     +
8843     + return buf;
8844     +}
8845     diff --git a/fs/unionfs/super.c b/fs/unionfs/super.c
8846     new file mode 100644
8847     index 0000000..f4118df
8848     --- /dev/null
8849     +++ b/fs/unionfs/super.c
8850     @@ -0,0 +1,1007 @@
8851     +/*
8852     + * Copyright (c) 2003-2007 Erez Zadok
8853     + * Copyright (c) 2003-2006 Charles P. Wright
8854     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
8855     + * Copyright (c) 2005-2006 Junjiro Okajima
8856     + * Copyright (c) 2005 Arun M. Krishnakumar
8857     + * Copyright (c) 2004-2006 David P. Quigley
8858     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
8859     + * Copyright (c) 2003 Puja Gupta
8860     + * Copyright (c) 2003 Harikesavan Krishnan
8861     + * Copyright (c) 2003-2007 Stony Brook University
8862     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
8863     + *
8864     + * This program is free software; you can redistribute it and/or modify
8865     + * it under the terms of the GNU General Public License version 2 as
8866     + * published by the Free Software Foundation.
8867     + */
8868     +
8869     +#include "union.h"
8870     +
8871     +/*
8872     + * The inode cache is used with alloc_inode for both our inode info and the
8873     + * vfs inode.
8874     + */
8875     +static struct kmem_cache *unionfs_inode_cachep;
8876     +
8877     +static void unionfs_read_inode(struct inode *inode)
8878     +{
8879     + extern struct address_space_operations unionfs_aops;
8880     + int size;
8881     + struct unionfs_inode_info *info = UNIONFS_I(inode);
8882     +
8883     + unionfs_read_lock(inode->i_sb);
8884     +
8885     + memset(info, 0, offsetof(struct unionfs_inode_info, vfs_inode));
8886     + info->bstart = -1;
8887     + info->bend = -1;
8888     + atomic_set(&info->generation,
8889     + atomic_read(&UNIONFS_SB(inode->i_sb)->generation));
8890     + spin_lock_init(&info->rdlock);
8891     + info->rdcount = 1;
8892     + info->hashsize = -1;
8893     + INIT_LIST_HEAD(&info->readdircache);
8894     +
8895     + size = sbmax(inode->i_sb) * sizeof(struct inode *);
8896     + info->lower_inodes = kzalloc(size, GFP_KERNEL);
8897     + if (!info->lower_inodes) {
8898     + printk(KERN_ERR "unionfs: no kernel memory when allocating "
8899     + "lower-pointer array!\n");
8900     + BUG();
8901     + }
8902     +
8903     + inode->i_version++;
8904     + inode->i_op = &unionfs_main_iops;
8905     + inode->i_fop = &unionfs_main_fops;
8906     +
8907     + inode->i_mapping->a_ops = &unionfs_aops;
8908     +
8909     + unionfs_read_unlock(inode->i_sb);
8910     +}
8911     +
8912     +/*
8913     + * we now define delete_inode, because there are two VFS paths that may
8914     + * destroy an inode: one of them calls clear inode before doing everything
8915     + * else that's needed, and the other is fine. This way we truncate the inode
8916     + * size (and its pages) and then clear our own inode, which will do an iput
8917     + * on our and the lower inode.
8918     + *
8919     + * No need to lock sb info's rwsem.
8920     + */
8921     +static void unionfs_delete_inode(struct inode *inode)
8922     +{
8923     + inode->i_size = 0; /* every f/s seems to do that */
8924     +
8925     + if (inode->i_data.nrpages)
8926     + truncate_inode_pages(&inode->i_data, 0);
8927     +
8928     + clear_inode(inode);
8929     +}
8930     +
8931     +/*
8932     + * final actions when unmounting a file system
8933     + *
8934     + * No need to lock rwsem.
8935     + */
8936     +static void unionfs_put_super(struct super_block *sb)
8937     +{
8938     + int bindex, bstart, bend;
8939     + struct unionfs_sb_info *spd;
8940     + int leaks = 0;
8941     +
8942     + spd = UNIONFS_SB(sb);
8943     + if (!spd)
8944     + return;
8945     +
8946     + bstart = sbstart(sb);
8947     + bend = sbend(sb);
8948     +
8949     + /* Make sure we have no leaks of branchget/branchput. */
8950     + for (bindex = bstart; bindex <= bend; bindex++)
8951     + if (branch_count(sb, bindex) != 0) {
8952     + printk("unionfs: branch %d has %d references left!\n",
8953     + bindex, branch_count(sb, bindex));
8954     + leaks = 1;
8955     + }
8956     + BUG_ON(leaks != 0);
8957     +
8958     + kfree(spd->data);
8959     + kfree(spd);
8960     + sb->s_fs_info = NULL;
8961     +}
8962     +
8963     +/*
8964     + * Since people use this to answer the "How big of a file can I write?"
8965     + * question, we report the size of the highest priority branch as the size of
8966     + * the union.
8967     + */
8968     +static int unionfs_statfs(struct dentry *dentry, struct kstatfs *buf)
8969     +{
8970     + int err = 0;
8971     + struct super_block *sb;
8972     + struct dentry *lower_dentry;
8973     +
8974     + sb = dentry->d_sb;
8975     +
8976     + unionfs_read_lock(sb);
8977     + unionfs_lock_dentry(dentry);
8978     +
8979     + if (!__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
8980     + err = -ESTALE;
8981     + goto out;
8982     + }
8983     + unionfs_check_dentry(dentry);
8984     +
8985     + lower_dentry = unionfs_lower_dentry(sb->s_root);
8986     + err = vfs_statfs(lower_dentry, buf);
8987     +
8988     + /* set return buf to our f/s to avoid confusing user-level utils */
8989     + buf->f_type = UNIONFS_SUPER_MAGIC;
8990     + /*
8991     + * Our maximum file name can is shorter by a few bytes because every
8992     + * file name could potentially be whited-out.
8993     + *
8994     + * XXX: this restriction goes away with ODF.
8995     + */
8996     + buf->f_namelen -= UNIONFS_WHLEN;
8997     +
8998     + /*
8999     + * reset two fields to avoid confusing user-land.
9000     + * XXX: is this still necessary?
9001     + */
9002     + memset(&buf->f_fsid, 0, sizeof(__kernel_fsid_t));
9003     + memset(&buf->f_spare, 0, sizeof(buf->f_spare));
9004     +
9005     +out:
9006     + unionfs_unlock_dentry(dentry);
9007     + unionfs_check_dentry(dentry);
9008     + unionfs_read_unlock(sb);
9009     + return err;
9010     +}
9011     +
9012     +/* handle mode changing during remount */
9013     +static noinline int do_remount_mode_option(char *optarg, int cur_branches,
9014     + struct unionfs_data *new_data,
9015     + struct path *new_lower_paths)
9016     +{
9017     + int err = -EINVAL;
9018     + int perms, idx;
9019     + char *modename = strchr(optarg, '=');
9020     + struct nameidata nd;
9021     +
9022     + /* by now, optarg contains the branch name */
9023     + if (!*optarg) {
9024     + printk("unionfs: no branch specified for mode change.\n");
9025     + goto out;
9026     + }
9027     + if (!modename) {
9028     + printk("unionfs: branch \"%s\" requires a mode.\n", optarg);
9029     + goto out;
9030     + }
9031     + *modename++ = '\0';
9032     + perms = __parse_branch_mode(modename);
9033     + if (perms == 0) {
9034     + printk("unionfs: invalid mode \"%s\" for \"%s\".\n",
9035     + modename, optarg);
9036     + goto out;
9037     + }
9038     +
9039     + /*
9040     + * Find matching branch index. For now, this assumes that nothing
9041     + * has been mounted on top of this Unionfs stack. Once we have /odf
9042     + * and cache-coherency resolved, we'll address the branch-path
9043     + * uniqueness.
9044     + */
9045     + err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
9046     + if (err) {
9047     + printk(KERN_WARNING "unionfs: error accessing "
9048     + "lower directory \"%s\" (error %d)\n",
9049     + optarg, err);
9050     + goto out;
9051     + }
9052     + for (idx=0; idx<cur_branches; idx++)
9053     + if (nd.mnt == new_lower_paths[idx].mnt &&
9054     + nd.dentry == new_lower_paths[idx].dentry)
9055     + break;
9056     + path_release(&nd); /* no longer needed */
9057     + if (idx == cur_branches) {
9058     + err = -ENOENT; /* err may have been reset above */
9059     + printk(KERN_WARNING "unionfs: branch \"%s\" "
9060     + "not found\n", optarg);
9061     + goto out;
9062     + }
9063     + /* check/change mode for existing branch */
9064     + /* we don't warn if perms==branchperms */
9065     + new_data[idx].branchperms = perms;
9066     + err = 0;
9067     +out:
9068     + return err;
9069     +}
9070     +
9071     +/* handle branch deletion during remount */
9072     +static noinline int do_remount_del_option(char *optarg, int cur_branches,
9073     + struct unionfs_data *new_data,
9074     + struct path *new_lower_paths)
9075     +{
9076     + int err = -EINVAL;
9077     + int idx;
9078     + struct nameidata nd;
9079     +
9080     + /* optarg contains the branch name to delete */
9081     +
9082     + /*
9083     + * Find matching branch index. For now, this assumes that nothing
9084     + * has been mounted on top of this Unionfs stack. Once we have /odf
9085     + * and cache-coherency resolved, we'll address the branch-path
9086     + * uniqueness.
9087     + */
9088     + err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
9089     + if (err) {
9090     + printk(KERN_WARNING "unionfs: error accessing "
9091     + "lower directory \"%s\" (error %d)\n",
9092     + optarg, err);
9093     + goto out;
9094     + }
9095     + for (idx=0; idx < cur_branches; idx++)
9096     + if (nd.mnt == new_lower_paths[idx].mnt &&
9097     + nd.dentry == new_lower_paths[idx].dentry)
9098     + break;
9099     + path_release(&nd); /* no longer needed */
9100     + if (idx == cur_branches) {
9101     + printk(KERN_WARNING "unionfs: branch \"%s\" "
9102     + "not found\n", optarg);
9103     + err = -ENOENT;
9104     + goto out;
9105     + }
9106     + /* check if there are any open files on the branch to be deleted */
9107     + if (atomic_read(&new_data[idx].open_files) > 0) {
9108     + err = -EBUSY;
9109     + goto out;
9110     + }
9111     +
9112     + /*
9113     + * Now we have to delete the branch. First, release any handles it
9114     + * has. Then, move the remaining array indexes past "idx" in
9115     + * new_data and new_lower_paths one to the left. Finally, adjust
9116     + * cur_branches.
9117     + */
9118     + pathput(&new_lower_paths[idx]);
9119     +
9120     + if (idx < cur_branches - 1) {
9121     + /* if idx==cur_branches-1, we delete last branch: easy */
9122     + memmove(&new_data[idx], &new_data[idx+1],
9123     + (cur_branches - 1 - idx) *
9124     + sizeof(struct unionfs_data));
9125     + memmove(&new_lower_paths[idx], &new_lower_paths[idx+1],
9126     + (cur_branches - 1 - idx) * sizeof(struct path));
9127     + }
9128     +
9129     + err = 0;
9130     +out:
9131     + return err;
9132     +}
9133     +
9134     +/* handle branch insertion during remount */
9135     +static noinline int do_remount_add_option(char *optarg, int cur_branches,
9136     + struct unionfs_data *new_data,
9137     + struct path *new_lower_paths,
9138     + int *high_branch_id)
9139     +{
9140     + int err = -EINVAL;
9141     + int perms;
9142     + int idx = 0; /* default: insert at beginning */
9143     + char *new_branch , *modename = NULL;
9144     + struct nameidata nd;
9145     +
9146     + /*
9147     + * optarg can be of several forms:
9148     + *
9149     + * /bar:/foo insert /foo before /bar
9150     + * /bar:/foo=ro insert /foo in ro mode before /bar
9151     + * /foo insert /foo in the beginning (prepend)
9152     + * :/foo insert /foo at the end (append)
9153     + */
9154     + if (*optarg == ':') { /* append? */
9155     + new_branch = optarg + 1; /* skip ':' */
9156     + idx = cur_branches;
9157     + goto found_insertion_point;
9158     + }
9159     + new_branch = strchr(optarg, ':');
9160     + if (!new_branch) { /* prepend? */
9161     + new_branch = optarg;
9162     + goto found_insertion_point;
9163     + }
9164     + *new_branch++ = '\0'; /* holds path+mode of new branch */
9165     +
9166     + /*
9167     + * Find matching branch index. For now, this assumes that nothing
9168     + * has been mounted on top of this Unionfs stack. Once we have /odf
9169     + * and cache-coherency resolved, we'll address the branch-path
9170     + * uniqueness.
9171     + */
9172     + err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
9173     + if (err) {
9174     + printk(KERN_WARNING "unionfs: error accessing "
9175     + "lower directory \"%s\" (error %d)\n",
9176     + optarg, err);
9177     + goto out;
9178     + }
9179     + for (idx=0; idx < cur_branches; idx++)
9180     + if (nd.mnt == new_lower_paths[idx].mnt &&
9181     + nd.dentry == new_lower_paths[idx].dentry)
9182     + break;
9183     + path_release(&nd); /* no longer needed */
9184     + if (idx == cur_branches) {
9185     + printk(KERN_WARNING "unionfs: branch \"%s\" "
9186     + "not found\n", optarg);
9187     + err = -ENOENT;
9188     + goto out;
9189     + }
9190     +
9191     + /*
9192     + * At this point idx will hold the index where the new branch should
9193     + * be inserted before.
9194     + */
9195     +found_insertion_point:
9196     + /* find the mode for the new branch */
9197     + if (new_branch)
9198     + modename = strchr(new_branch, '=');
9199     + if (modename)
9200     + *modename++ = '\0';
9201     + perms = parse_branch_mode(modename);
9202     +
9203     + if (!new_branch || !*new_branch) {
9204     + printk(KERN_WARNING "unionfs: null new branch\n");
9205     + err = -EINVAL;
9206     + goto out;
9207     + }
9208     + err = path_lookup(new_branch, LOOKUP_FOLLOW, &nd);
9209     + if (err) {
9210     + printk(KERN_WARNING "unionfs: error accessing "
9211     + "lower directory \"%s\" (error %d)\n",
9212     + new_branch, err);
9213     + goto out;
9214     + }
9215     + /*
9216     + * It's probably safe to check_mode the new branch to insert. Note:
9217     + * we don't allow inserting branches which are unionfs's by
9218     + * themselves (check_branch returns EINVAL in that case). This is
9219     + * because this code base doesn't support stacking unionfs: the ODF
9220     + * code base supports that correctly.
9221     + */
9222     + if ((err = check_branch(&nd))) {
9223     + printk(KERN_WARNING "unionfs: lower directory "
9224     + "\"%s\" is not a valid branch\n", optarg);
9225     + path_release(&nd);
9226     + goto out;
9227     + }
9228     +
9229     + /*
9230     + * Now we have to insert the new branch. But first, move the bits
9231     + * to make space for the new branch, if needed. Finally, adjust
9232     + * cur_branches.
9233     + * We don't release nd here; it's kept until umount/remount.
9234     + */
9235     + if (idx < cur_branches) {
9236     + /* if idx==cur_branches, we append: easy */
9237     + memmove(&new_data[idx+1], &new_data[idx],
9238     + (cur_branches - idx) * sizeof(struct unionfs_data));
9239     + memmove(&new_lower_paths[idx+1], &new_lower_paths[idx],
9240     + (cur_branches - idx) * sizeof(struct path));
9241     + }
9242     + new_lower_paths[idx].dentry = nd.dentry;
9243     + new_lower_paths[idx].mnt = nd.mnt;
9244     +
9245     + new_data[idx].sb = nd.dentry->d_sb;
9246     + atomic_set(&new_data[idx].open_files, 0);
9247     + new_data[idx].branchperms = perms;
9248     + new_data[idx].branch_id = ++*high_branch_id; /* assign new branch ID */
9249     +
9250     + err = 0;
9251     +out:
9252     + return err;
9253     +}
9254     +
9255     +
9256     +/*
9257     + * Support branch management options on remount.
9258     + *
9259     + * See Documentation/filesystems/unionfs/ for details.
9260     + *
9261     + * @flags: numeric mount options
9262     + * @options: mount options string
9263     + *
9264     + * This function can rearrange a mounted union dynamically, adding and
9265     + * removing branches, including changing branch modes. Clearly this has to
9266     + * be done safely and atomically. Luckily, the VFS already calls this
9267     + * function with lock_super(sb) and lock_kernel() held, preventing
9268     + * concurrent mixing of new mounts, remounts, and unmounts. Moreover,
9269     + * do_remount_sb(), our caller function, already called shrink_dcache_sb(sb)
9270     + * to purge dentries/inodes from our superblock, and also called
9271     + * fsync_super(sb) to purge any dirty pages. So we're good.
9272     + *
9273     + * XXX: however, our remount code may also need to invalidate mapped pages
9274     + * so as to force them to be re-gotten from the (newly reconfigured) lower
9275     + * branches. This has to wait for proper mmap and cache coherency support
9276     + * in the VFS.
9277     + *
9278     + */
9279     +static int unionfs_remount_fs(struct super_block *sb, int *flags,
9280     + char *options)
9281     +{
9282     + int err = 0;
9283     + int i;
9284     + char *optionstmp, *tmp_to_free; /* kstrdup'ed of "options" */
9285     + char *optname;
9286     + int cur_branches = 0; /* no. of current branches */
9287     + int new_branches = 0; /* no. of branches actually left in the end */
9288     + int add_branches; /* est. no. of branches to add */
9289     + int del_branches; /* est. no. of branches to del */
9290     + int max_branches; /* max possible no. of branches */
9291     + struct unionfs_data *new_data = NULL, *tmp_data = NULL;
9292     + struct path *new_lower_paths = NULL, *tmp_lower_paths = NULL;
9293     + struct inode **new_lower_inodes = NULL;
9294     + int new_high_branch_id; /* new high branch ID */
9295     + int old_ibstart, old_ibend;
9296     + int size; /* memory allocation size, temp var */
9297     +
9298     + unionfs_write_lock(sb);
9299     +
9300     + /*
9301     + * The VFS will take care of "ro" and "rw" flags, so anything else
9302     + * is an error. So we need to check if any other flags may have
9303     + * been passed (none are allowed/supported as of now).
9304     + */
9305     + if ((*flags & ~MS_RDONLY) != 0) {
9306     + printk(KERN_WARNING
9307     + "unionfs: remount flags 0x%x unsupported\n", *flags);
9308     + err = -EINVAL;
9309     + goto out_error;
9310     + }
9311     +
9312     + /*
9313     + * If 'options' is NULL, it's probably because the user just changed
9314     + * the union to a "ro" or "rw" and the VFS took care of it. So
9315     + * nothing to do and we're done.
9316     + */
9317     + if (!options || options[0] == '\0')
9318     + goto out_error;
9319     +
9320     + /*
9321     + * Find out how many branches we will have in the end, counting
9322     + * "add" and "del" commands. Copy the "options" string because
9323     + * strsep modifies the string and we need it later.
9324     + */
9325     + optionstmp = tmp_to_free = kstrdup(options, GFP_KERNEL);
9326     + if (!optionstmp) {
9327     + err = -ENOMEM;
9328     + goto out_free;
9329     + }
9330     + new_branches = cur_branches = sbmax(sb); /* current no. branches */
9331     + add_branches = del_branches = 0;
9332     + new_high_branch_id = sbhbid(sb); /* save current high_branch_id */
9333     + while ((optname = strsep(&optionstmp, ",")) != NULL) {
9334     + char *optarg;
9335     +
9336     + if (!optname || !*optname)
9337     + continue;
9338     +
9339     + optarg = strchr(optname, '=');
9340     + if (optarg)
9341     + *optarg++ = '\0';
9342     +
9343     + if (!strcmp("add", optname))
9344     + add_branches++;
9345     + else if (!strcmp("del", optname))
9346     + del_branches++;
9347     + }
9348     + kfree(tmp_to_free);
9349     + /* after all changes, will we have at least one branch left? */
9350     + if ((new_branches + add_branches - del_branches) < 1) {
9351     + printk(KERN_WARNING
9352     + "unionfs: no branches left after remount\n");
9353     + err = -EINVAL;
9354     + goto out_free;
9355     + }
9356     +
9357     + /*
9358     + * Since we haven't actually parsed all the add/del options, nor
9359     + * have we checked them for errors, we don't know for sure how many
9360     + * branches we will have after all changes have taken place. In
9361     + * fact, the total number of branches left could be less than what
9362     + * we have now. So we need to allocate space for a temporary
9363     + * placeholder that is at least as large as the maximum number of
9364     + * branches we *could* have, which is the current number plus all
9365     + * the additions. Once we're done with these temp placeholders, we
9366     + * may have to re-allocate the final size, copy over from the temp,
9367     + * and then free the temps (done near the end of this function).
9368     + */
9369     + max_branches = cur_branches + add_branches;
9370     + /* allocate space for new pointers to lower dentry */
9371     + tmp_data = kcalloc(max_branches,
9372     + sizeof(struct unionfs_data), GFP_KERNEL);
9373     + if (!tmp_data) {
9374     + err = -ENOMEM;
9375     + goto out_free;
9376     + }
9377     + /* allocate space for new pointers to lower paths */
9378     + tmp_lower_paths = kcalloc(max_branches,
9379     + sizeof(struct path), GFP_KERNEL);
9380     + if (!tmp_lower_paths) {
9381     + err = -ENOMEM;
9382     + goto out_free;
9383     + }
9384     + /* copy current info into new placeholders, incrementing refcnts */
9385     + memcpy(tmp_data, UNIONFS_SB(sb)->data,
9386     + cur_branches * sizeof(struct unionfs_data));
9387     + memcpy(tmp_lower_paths, UNIONFS_D(sb->s_root)->lower_paths,
9388     + cur_branches * sizeof(struct path));
9389     + for (i=0; i<cur_branches; i++)
9390     + pathget(&tmp_lower_paths[i]); /* drop refs at end of fxn */
9391     +
9392     + /*******************************************************************
9393     + * For each branch command, do path_lookup on the requested branch,
9394     + * and apply the change to a temp branch list. To handle errors, we
9395     + * already dup'ed the old arrays (above), and increased the refcnts
9396     + * on various f/s objects. So now we can do all the path_lookups
9397     + * and branch-management commands on the new arrays. If it fail mid
9398     + * way, we free the tmp arrays and *put all objects. If we succeed,
9399     + * then we free old arrays and *put its objects, and then replace
9400     + * the arrays with the new tmp list (we may have to re-allocate the
9401     + * memory because the temp lists could have been larger than what we
9402     + * actually needed).
9403     + *******************************************************************/
9404     +
9405     + while ((optname = strsep(&options, ",")) != NULL) {
9406     + char *optarg;
9407     +
9408     + if (!optname || !*optname)
9409     + continue;
9410     + /*
9411     + * At this stage optname holds a comma-delimited option, but
9412     + * without the commas. Next, we need to break the string on
9413     + * the '=' symbol to separate CMD=ARG, where ARG itself can
9414     + * be KEY=VAL. For example, in mode=/foo=rw, CMD is "mode",
9415     + * KEY is "/foo", and VAL is "rw".
9416     + */
9417     + optarg = strchr(optname, '=');
9418     + if (optarg)
9419     + *optarg++ = '\0';
9420     + /* incgen remount option (instead of old ioctl) */
9421     + if (!strcmp("incgen", optname)) {
9422     + err = 0;
9423     + goto out_no_change;
9424     + }
9425     +
9426     + /*
9427     + * All of our options take an argument now. (Insert ones
9428     + * that don't above this check.) So at this stage optname
9429     + * contains the CMD part and optarg contains the ARG part.
9430     + */
9431     + if (!optarg || !*optarg) {
9432     + printk("unionfs: all remount options require "
9433     + "an argument (%s).\n", optname);
9434     + err = -EINVAL;
9435     + goto out_release;
9436     + }
9437     +
9438     + if (!strcmp("add", optname)) {
9439     + err = do_remount_add_option(optarg, new_branches,
9440     + tmp_data,
9441     + tmp_lower_paths,
9442     + &new_high_branch_id);
9443     + if (err)
9444     + goto out_release;
9445     + new_branches++;
9446     + if (new_branches > UNIONFS_MAX_BRANCHES) {
9447     + printk("unionfs: command exceeds "
9448     + "%d branches\n", UNIONFS_MAX_BRANCHES);
9449     + err = -E2BIG;
9450     + goto out_release;
9451     + }
9452     + continue;
9453     + }
9454     + if (!strcmp("del", optname)) {
9455     + err = do_remount_del_option(optarg, new_branches,
9456     + tmp_data,
9457     + tmp_lower_paths);
9458     + if (err)
9459     + goto out_release;
9460     + new_branches--;
9461     + continue;
9462     + }
9463     + if (!strcmp("mode", optname)) {
9464     + err = do_remount_mode_option(optarg, new_branches,
9465     + tmp_data,
9466     + tmp_lower_paths);
9467     + if (err)
9468     + goto out_release;
9469     + continue;
9470     + }
9471     +
9472     + /*
9473     + * When you use "mount -o remount,ro", mount(8) will
9474     + * reportedly pass the original dirs= string from
9475     + * /proc/mounts. So for now, we have to ignore dirs= and
9476     + * not consider it an error, unless we want to allow users
9477     + * to pass dirs= in remount. Note that to allow the VFS to
9478     + * actually process the ro/rw remount options, we have to
9479     + * return 0 from this function.
9480     + */
9481     + if (!strcmp("dirs", optname)) {
9482     + printk(KERN_WARNING
9483     + "unionfs: remount ignoring option \"%s\".\n",
9484     + optname);
9485     + continue;
9486     + }
9487     +
9488     + err = -EINVAL;
9489     + printk(KERN_WARNING
9490     + "unionfs: unrecognized option \"%s\"\n", optname);
9491     + goto out_release;
9492     + }
9493     +
9494     +out_no_change:
9495     +
9496     + /******************************************************************
9497     + * WE'RE ALMOST DONE: check if leftmost branch might be read-only,
9498     + * see if we need to allocate a small-sized new vector, copy the
9499     + * vectors to their correct place, release the refcnt of the older
9500     + * ones, and return. Also handle invalidating any pages that will
9501     + * have to be re-read.
9502     + *******************************************************************/
9503     +
9504     + if (!(tmp_data[0].branchperms & MAY_WRITE)) {
9505     + printk("unionfs: leftmost branch cannot be read-only "
9506     + "(use \"remount,ro\" to create a read-only union)\n");
9507     + err = -EINVAL;
9508     + goto out_release;
9509     + }
9510     +
9511     + /* (re)allocate space for new pointers to lower dentry */
9512     + size = new_branches * sizeof(struct unionfs_data);
9513     + new_data = krealloc(tmp_data, size, GFP_KERNEL);
9514     + if (!new_data) {
9515     + err = -ENOMEM;
9516     + goto out_release;
9517     + }
9518     + /* allocate space for new pointers to lower paths */
9519     + size = new_branches * sizeof(struct path);
9520     + new_lower_paths = krealloc(tmp_lower_paths, size, GFP_KERNEL);
9521     + if (!new_lower_paths) {
9522     + err = -ENOMEM;
9523     + goto out_release;
9524     + }
9525     + /* allocate space for new pointers to lower inodes */
9526     + new_lower_inodes = kcalloc(new_branches,
9527     + sizeof(struct inode *), GFP_KERNEL);
9528     + if (!new_lower_inodes) {
9529     + err = -ENOMEM;
9530     + goto out_release;
9531     + }
9532     +
9533     + /*
9534     + * OK, just before we actually put the new set of branches in place,
9535     + * we need to ensure that our own f/s has no dirty objects left.
9536     + * Luckily, do_remount_sb() already calls shrink_dcache_sb(sb) and
9537     + * fsync_super(sb), taking care of dentries, inodes, and dirty
9538     + * pages. So all that's left is for us to invalidate any leftover
9539     + * (non-dirty) pages to ensure that they will be re-read from the
9540     + * new lower branches (and to support mmap).
9541     + */
9542     +
9543     + /*
9544     + * Now we call drop_pagecache_sb() to invalidate all pages in this
9545     + * super. This function calls invalidate_inode_pages(mapping),
9546     + * which calls invalidate_mapping_pages(): the latter, however, will
9547     + * not invalidate pages which are dirty, locked, under writeback, or
9548     + * mapped into page tables. We shouldn't have to worry about dirty
9549     + * or under-writeback pages, because do_remount_sb() called
9550     + * fsync_super() which would not have returned until all dirty pages
9551     + * were flushed.
9552     + *
9553     + * But do we have to worry about locked pages? Is there any chance
9554     + * that in here we'll get locked pages?
9555     + *
9556     + * XXX: what about pages mapped into pagetables? Are these pages
9557     + * which user processes may have mmap(2)'ed? If so, then we need to
9558     + * invalidate those too, no? Maybe we'll have to write our own
9559     + * version of invalidate_mapping_pages() which also handled mapped
9560     + * pages.
9561     + *
9562     + * XXX: Alternatively, maybe we should call truncate_inode_pages(),
9563     + * which use two passes over the pages list, and will truncate all
9564     + * pages.
9565     + */
9566     + drop_pagecache_sb(sb);
9567     +
9568     + /* copy new vectors into their correct place */
9569     + tmp_data = UNIONFS_SB(sb)->data;
9570     + UNIONFS_SB(sb)->data = new_data;
9571     + new_data = NULL; /* so don't free good pointers below */
9572     + tmp_lower_paths = UNIONFS_D(sb->s_root)->lower_paths;
9573     + UNIONFS_D(sb->s_root)->lower_paths = new_lower_paths;
9574     + new_lower_paths = NULL; /* so don't free good pointers below */
9575     +
9576     + /* update our unionfs_sb_info and root dentry index of last branch */
9577     + i = sbmax(sb); /* save no. of branches to release at end */
9578     + sbend(sb) = new_branches - 1;
9579     + set_dbend(sb->s_root, new_branches - 1);
9580     + old_ibstart = ibstart(sb->s_root->d_inode);
9581     + old_ibend = ibend(sb->s_root->d_inode);
9582     + ibend(sb->s_root->d_inode) = new_branches - 1;
9583     + UNIONFS_D(sb->s_root)->bcount = new_branches;
9584     + new_branches = i; /* no. of branches to release below */
9585     +
9586     + /*
9587     + * Update lower inodes: 3 steps
9588     + * 1. grab ref on all new lower inodes
9589     + */
9590     + for (i=dbstart(sb->s_root); i<=dbend(sb->s_root); i++) {
9591     + struct dentry *lower_dentry =
9592     + unionfs_lower_dentry_idx(sb->s_root, i);
9593     + igrab(lower_dentry->d_inode);
9594     + new_lower_inodes[i] = lower_dentry->d_inode;
9595     + }
9596     + /* 2. release reference on all older lower inodes */
9597     + for (i=old_ibstart; i<=old_ibend; i++) {
9598     + iput(unionfs_lower_inode_idx(sb->s_root->d_inode, i));
9599     + unionfs_set_lower_inode_idx(sb->s_root->d_inode, i, NULL);
9600     + }
9601     + kfree(UNIONFS_I(sb->s_root->d_inode)->lower_inodes);
9602     + /* 3. update root dentry's inode to new lower_inodes array */
9603     + UNIONFS_I(sb->s_root->d_inode)->lower_inodes = new_lower_inodes;
9604     + new_lower_inodes = NULL;
9605     +
9606     + /* maxbytes may have changed */
9607     + sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes;
9608     + /* update high branch ID */
9609     + sbhbid(sb) = new_high_branch_id;
9610     +
9611     + /* update our sb->generation for revalidating objects */
9612     + i = atomic_inc_return(&UNIONFS_SB(sb)->generation);
9613     + atomic_set(&UNIONFS_D(sb->s_root)->generation, i);
9614     + atomic_set(&UNIONFS_I(sb->s_root->d_inode)->generation, i);
9615     + if (!(*flags & MS_SILENT))
9616     + printk("unionfs: new generation number %d\n", i);
9617     + /* finally, update the root dentry's times */
9618     + unionfs_copy_attr_times(sb->s_root->d_inode);
9619     + err = 0; /* reset to success */
9620     +
9621     + /*
9622     + * The code above falls through to the next label, and releases the
9623     + * refcnts of the older ones (stored in tmp_*): if we fell through
9624     + * here, it means success. However, if we jump directly to this
9625     + * label from any error above, then an error occurred after we
9626     + * grabbed various refcnts, and so we have to release the
9627     + * temporarily constructed structures.
9628     + */
9629     +out_release:
9630     + /* no need to cleanup/release anything in tmp_data */
9631     + if (tmp_lower_paths)
9632     + for (i=0; i<new_branches; i++)
9633     + pathput(&tmp_lower_paths[i]);
9634     +out_free:
9635     + kfree(tmp_lower_paths);
9636     + kfree(tmp_data);
9637     + kfree(new_lower_paths);
9638     + kfree(new_data);
9639     + kfree(new_lower_inodes);
9640     +out_error:
9641     + unionfs_write_unlock(sb);
9642     + unionfs_check_dentry(sb->s_root);
9643     + return err;
9644     +}
9645     +
9646     +/*
9647     + * Called by iput() when the inode reference count reached zero
9648     + * and the inode is not hashed anywhere. Used to clear anything
9649     + * that needs to be, before the inode is completely destroyed and put
9650     + * on the inode free list.
9651     + *
9652     + * No need to lock sb info's rwsem.
9653     + */
9654     +static void unionfs_clear_inode(struct inode *inode)
9655     +{
9656     + int bindex, bstart, bend;
9657     + struct inode *lower_inode;
9658     + struct list_head *pos, *n;
9659     + struct unionfs_dir_state *rdstate;
9660     +
9661     + list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) {
9662     + rdstate = list_entry(pos, struct unionfs_dir_state, cache);
9663     + list_del(&rdstate->cache);
9664     + free_rdstate(rdstate);
9665     + }
9666     +
9667     + /*
9668     + * Decrement a reference to a lower_inode, which was incremented
9669     + * by our read_inode when it was created initially.
9670     + */
9671     + bstart = ibstart(inode);
9672     + bend = ibend(inode);
9673     + if (bstart >= 0) {
9674     + for (bindex = bstart; bindex <= bend; bindex++) {
9675     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
9676     + if (!lower_inode)
9677     + continue;
9678     + iput(lower_inode);
9679     + }
9680     + }
9681     +
9682     + kfree(UNIONFS_I(inode)->lower_inodes);
9683     + UNIONFS_I(inode)->lower_inodes = NULL;
9684     +}
9685     +
9686     +static struct inode *unionfs_alloc_inode(struct super_block *sb)
9687     +{
9688     + struct unionfs_inode_info *i;
9689     +
9690     + i = kmem_cache_alloc(unionfs_inode_cachep, GFP_KERNEL);
9691     + if (!i)
9692     + return NULL;
9693     +
9694     + /* memset everything up to the inode to 0 */
9695     + memset(i, 0, offsetof(struct unionfs_inode_info, vfs_inode));
9696     +
9697     + i->vfs_inode.i_version = 1;
9698     + return &i->vfs_inode;
9699     +}
9700     +
9701     +static void unionfs_destroy_inode(struct inode *inode)
9702     +{
9703     + kmem_cache_free(unionfs_inode_cachep, UNIONFS_I(inode));
9704     +}
9705     +
9706     +/* unionfs inode cache constructor */
9707     +static void init_once(void *v, struct kmem_cache *cachep, unsigned long flags)
9708     +{
9709     + struct unionfs_inode_info *i = v;
9710     +
9711     + inode_init_once(&i->vfs_inode);
9712     +}
9713     +
9714     +int unionfs_init_inode_cache(void)
9715     +{
9716     + int err = 0;
9717     +
9718     + unionfs_inode_cachep =
9719     + kmem_cache_create("unionfs_inode_cache",
9720     + sizeof(struct unionfs_inode_info), 0,
9721     + SLAB_RECLAIM_ACCOUNT, init_once, NULL);
9722     + if (!unionfs_inode_cachep)
9723     + err = -ENOMEM;
9724     + return err;
9725     +}
9726     +
9727     +/* unionfs inode cache destructor */
9728     +void unionfs_destroy_inode_cache(void)
9729     +{
9730     + if (unionfs_inode_cachep)
9731     + kmem_cache_destroy(unionfs_inode_cachep);
9732     +}
9733     +
9734     +/*
9735     + * Called when we have a dirty inode, right here we only throw out
9736     + * parts of our readdir list that are too old.
9737     + *
9738     + * No need to grab sb info's rwsem.
9739     + */
9740     +static int unionfs_write_inode(struct inode *inode, int sync)
9741     +{
9742     + struct list_head *pos, *n;
9743     + struct unionfs_dir_state *rdstate;
9744     +
9745     + spin_lock(&UNIONFS_I(inode)->rdlock);
9746     + list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) {
9747     + rdstate = list_entry(pos, struct unionfs_dir_state, cache);
9748     + /* We keep this list in LRU order. */
9749     + if ((rdstate->access + RDCACHE_JIFFIES) > jiffies)
9750     + break;
9751     + UNIONFS_I(inode)->rdcount--;
9752     + list_del(&rdstate->cache);
9753     + free_rdstate(rdstate);
9754     + }
9755     + spin_unlock(&UNIONFS_I(inode)->rdlock);
9756     +
9757     + return 0;
9758     +}
9759     +
9760     +/*
9761     + * Used only in nfs, to kill any pending RPC tasks, so that subsequent
9762     + * code can actually succeed and won't leave tasks that need handling.
9763     + */
9764     +static void unionfs_umount_begin(struct vfsmount *mnt, int flags)
9765     +{
9766     + struct super_block *sb, *lower_sb;
9767     + struct vfsmount *lower_mnt;
9768     + int bindex, bstart, bend;
9769     +
9770     + if (!(flags & MNT_FORCE))
9771     + /*
9772     + * we are not being MNT_FORCE'd, therefore we should emulate
9773     + * old behavior
9774     + */
9775     + return;
9776     +
9777     + sb = mnt->mnt_sb;
9778     +
9779     + unionfs_read_lock(sb);
9780     +
9781     + bstart = sbstart(sb);
9782     + bend = sbend(sb);
9783     + for (bindex = bstart; bindex <= bend; bindex++) {
9784     + lower_mnt = unionfs_lower_mnt_idx(sb->s_root, bindex);
9785     + lower_sb = unionfs_lower_super_idx(sb, bindex);
9786     +
9787     + if (lower_mnt && lower_sb && lower_sb->s_op &&
9788     + lower_sb->s_op->umount_begin)
9789     + lower_sb->s_op->umount_begin(lower_mnt, flags);
9790     + }
9791     +
9792     + unionfs_read_unlock(sb);
9793     +}
9794     +
9795     +static int unionfs_show_options(struct seq_file *m, struct vfsmount *mnt)
9796     +{
9797     + struct super_block *sb = mnt->mnt_sb;
9798     + int ret = 0;
9799     + char *tmp_page;
9800     + char *path;
9801     + int bindex, bstart, bend;
9802     + int perms;
9803     +
9804     + unionfs_read_lock(sb);
9805     +
9806     + unionfs_lock_dentry(sb->s_root);
9807     +
9808     + tmp_page = (char*) __get_free_page(GFP_KERNEL);
9809     + if (!tmp_page) {
9810     + ret = -ENOMEM;
9811     + goto out;
9812     + }
9813     +
9814     + bstart = sbstart(sb);
9815     + bend = sbend(sb);
9816     +
9817     + seq_printf(m, ",dirs=");
9818     + for (bindex = bstart; bindex <= bend; bindex++) {
9819     + path = d_path(unionfs_lower_dentry_idx(sb->s_root, bindex),
9820     + unionfs_lower_mnt_idx(sb->s_root, bindex),
9821     + tmp_page, PAGE_SIZE);
9822     + if (IS_ERR(path)) {
9823     + ret = PTR_ERR(path);
9824     + goto out;
9825     + }
9826     +
9827     + perms = branchperms(sb, bindex);
9828     +
9829     + seq_printf(m, "%s=%s", path,
9830     + perms & MAY_WRITE ? "rw" : "ro");
9831     + if (bindex != bend)
9832     + seq_printf(m, ":");
9833     + }
9834     +
9835     +out:
9836     + free_page((unsigned long) tmp_page);
9837     +
9838     + unionfs_unlock_dentry(sb->s_root);
9839     +
9840     + unionfs_read_unlock(sb);
9841     +
9842     + return ret;
9843     +}
9844     +
9845     +struct super_operations unionfs_sops = {
9846     + .read_inode = unionfs_read_inode,
9847     + .delete_inode = unionfs_delete_inode,
9848     + .put_super = unionfs_put_super,
9849     + .statfs = unionfs_statfs,
9850     + .remount_fs = unionfs_remount_fs,
9851     + .clear_inode = unionfs_clear_inode,
9852     + .umount_begin = unionfs_umount_begin,
9853     + .show_options = unionfs_show_options,
9854     + .write_inode = unionfs_write_inode,
9855     + .alloc_inode = unionfs_alloc_inode,
9856     + .destroy_inode = unionfs_destroy_inode,
9857     +};
9858     diff --git a/fs/unionfs/union.h b/fs/unionfs/union.h
9859     new file mode 100644
9860     index 0000000..ba0ff50
9861     --- /dev/null
9862     +++ b/fs/unionfs/union.h
9863     @@ -0,0 +1,581 @@
9864     +/*
9865     + * Copyright (c) 2003-2007 Erez Zadok
9866     + * Copyright (c) 2003-2006 Charles P. Wright
9867     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
9868     + * Copyright (c) 2005 Arun M. Krishnakumar
9869     + * Copyright (c) 2004-2006 David P. Quigley
9870     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
9871     + * Copyright (c) 2003 Puja Gupta
9872     + * Copyright (c) 2003 Harikesavan Krishnan
9873     + * Copyright (c) 2003-2007 Stony Brook University
9874     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
9875     + *
9876     + * This program is free software; you can redistribute it and/or modify
9877     + * it under the terms of the GNU General Public License version 2 as
9878     + * published by the Free Software Foundation.
9879     + */
9880     +
9881     +#ifndef _UNION_H_
9882     +#define _UNION_H_
9883     +
9884     +#include <linux/dcache.h>
9885     +#include <linux/file.h>
9886     +#include <linux/list.h>
9887     +#include <linux/fs.h>
9888     +#include <linux/mm.h>
9889     +#include <linux/module.h>
9890     +#include <linux/mount.h>
9891     +#include <linux/namei.h>
9892     +#include <linux/page-flags.h>
9893     +#include <linux/pagemap.h>
9894     +#include <linux/poll.h>
9895     +#include <linux/security.h>
9896     +#include <linux/seq_file.h>
9897     +#include <linux/slab.h>
9898     +#include <linux/spinlock.h>
9899     +#include <linux/smp_lock.h>
9900     +#include <linux/statfs.h>
9901     +#include <linux/string.h>
9902     +#include <linux/vmalloc.h>
9903     +#include <linux/writeback.h>
9904     +#include <linux/buffer_head.h>
9905     +#include <linux/xattr.h>
9906     +#include <linux/fs_stack.h>
9907     +#include <linux/magic.h>
9908     +#include <linux/log2.h>
9909     +
9910     +#include <asm/mman.h>
9911     +#include <asm/system.h>
9912     +
9913     +#include <linux/union_fs.h>
9914     +
9915     +/* the file system name */
9916     +#define UNIONFS_NAME "unionfs"
9917     +
9918     +/* unionfs root inode number */
9919     +#define UNIONFS_ROOT_INO 1
9920     +
9921     +/* number of times we try to get a unique temporary file name */
9922     +#define GET_TMPNAM_MAX_RETRY 5
9923     +
9924     +/* maximum number of branches we support, to avoid memory blowup */
9925     +#define UNIONFS_MAX_BRANCHES 128
9926     +
9927     +/* Operations vectors defined in specific files. */
9928     +extern struct file_operations unionfs_main_fops;
9929     +extern struct file_operations unionfs_dir_fops;
9930     +extern struct inode_operations unionfs_main_iops;
9931     +extern struct inode_operations unionfs_dir_iops;
9932     +extern struct inode_operations unionfs_symlink_iops;
9933     +extern struct super_operations unionfs_sops;
9934     +extern struct dentry_operations unionfs_dops;
9935     +
9936     +/* How long should an entry be allowed to persist */
9937     +#define RDCACHE_JIFFIES (5*HZ)
9938     +
9939     +/* file private data. */
9940     +struct unionfs_file_info {
9941     + int bstart;
9942     + int bend;
9943     + atomic_t generation;
9944     +
9945     + struct unionfs_dir_state *rdstate;
9946     + struct file **lower_files;
9947     + int *saved_branch_ids; /* IDs of branches when file was opened */
9948     +};
9949     +
9950     +/* unionfs inode data in memory */
9951     +struct unionfs_inode_info {
9952     + int bstart;
9953     + int bend;
9954     + atomic_t generation;
9955     + int stale;
9956     + /* Stuff for readdir over NFS. */
9957     + spinlock_t rdlock;
9958     + struct list_head readdircache;
9959     + int rdcount;
9960     + int hashsize;
9961     + int cookie;
9962     +
9963     + /* The lower inodes */
9964     + struct inode **lower_inodes;
9965     + /* to keep track of reads/writes for unlinks before closes */
9966     + atomic_t totalopens;
9967     +
9968     + struct inode vfs_inode;
9969     +};
9970     +
9971     +/* unionfs dentry data in memory */
9972     +struct unionfs_dentry_info {
9973     + /*
9974     + * The semaphore is used to lock the dentry as soon as we get into a
9975     + * unionfs function from the VFS. Our lock ordering is that children
9976     + * go before their parents.
9977     + */
9978     + struct mutex lock;
9979     + int bstart;
9980     + int bend;
9981     + int bopaque;
9982     + int bcount;
9983     + atomic_t generation;
9984     + struct path *lower_paths;
9985     +};
9986     +
9987     +/* These are the pointers to our various objects. */
9988     +struct unionfs_data {
9989     + struct super_block *sb;
9990     + atomic_t open_files; /* number of open files on branch */
9991     + int branchperms;
9992     + int branch_id; /* unique branch ID at re/mount time */
9993     +};
9994     +
9995     +/* unionfs super-block data in memory */
9996     +struct unionfs_sb_info {
9997     + int bend;
9998     +
9999     + atomic_t generation;
10000     +
10001     + /*
10002     + * This rwsem is used to make sure that a branch management
10003     + * operation...
10004     + * 1) will not begin before all currently in-flight operations
10005     + * complete
10006     + * 2) any new operations do not execute until the currently
10007     + * running branch management operation completes
10008     + */
10009     + struct rw_semaphore rwsem;
10010     + int high_branch_id; /* last unique branch ID given */
10011     + struct unionfs_data *data;
10012     +};
10013     +
10014     +/*
10015     + * structure for making the linked list of entries by readdir on left branch
10016     + * to compare with entries on right branch
10017     + */
10018     +struct filldir_node {
10019     + struct list_head file_list; /* list for directory entries */
10020     + char *name; /* name entry */
10021     + int hash; /* name hash */
10022     + int namelen; /* name len since name is not 0 terminated */
10023     +
10024     + /*
10025     + * we can check for duplicate whiteouts and files in the same branch
10026     + * in order to return -EIO.
10027     + */
10028     + int bindex;
10029     +
10030     + /* is this a whiteout entry? */
10031     + int whiteout;
10032     +
10033     + /* Inline name, so we don't need to separately kmalloc small ones */
10034     + char iname[DNAME_INLINE_LEN_MIN];
10035     +};
10036     +
10037     +/* Directory hash table. */
10038     +struct unionfs_dir_state {
10039     + unsigned int cookie; /* the cookie, based off of rdversion */
10040     + unsigned int offset; /* The entry we have returned. */
10041     + int bindex;
10042     + loff_t dirpos; /* offset within the lower level directory */
10043     + int size; /* How big is the hash table? */
10044     + int hashentries; /* How many entries have been inserted? */
10045     + unsigned long access;
10046     +
10047     + /* This cache list is used when the inode keeps us around. */
10048     + struct list_head cache;
10049     + struct list_head list[0];
10050     +};
10051     +
10052     +/* externs needed for fanout.h or sioq.h */
10053     +extern int unionfs_get_nlinks(const struct inode *inode);
10054     +
10055     +/* include miscellaneous macros */
10056     +#include "fanout.h"
10057     +#include "sioq.h"
10058     +
10059     +/* externs for cache creation/deletion routines */
10060     +extern void unionfs_destroy_filldir_cache(void);
10061     +extern int unionfs_init_filldir_cache(void);
10062     +extern int unionfs_init_inode_cache(void);
10063     +extern void unionfs_destroy_inode_cache(void);
10064     +extern int unionfs_init_dentry_cache(void);
10065     +extern void unionfs_destroy_dentry_cache(void);
10066     +
10067     +/* Initialize and free readdir-specific state. */
10068     +extern int init_rdstate(struct file *file);
10069     +extern struct unionfs_dir_state *alloc_rdstate(struct inode *inode,
10070     + int bindex);
10071     +extern struct unionfs_dir_state *find_rdstate(struct inode *inode,
10072     + loff_t fpos);
10073     +extern void free_rdstate(struct unionfs_dir_state *state);
10074     +extern int add_filldir_node(struct unionfs_dir_state *rdstate,
10075     + const char *name, int namelen, int bindex,
10076     + int whiteout);
10077     +extern struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate,
10078     + const char *name, int namelen);
10079     +
10080     +extern struct dentry **alloc_new_dentries(int objs);
10081     +extern struct unionfs_data *alloc_new_data(int objs);
10082     +
10083     +/* We can only use 32-bits of offset for rdstate --- blech! */
10084     +#define DIREOF (0xfffff)
10085     +#define RDOFFBITS 20 /* This is the number of bits in DIREOF. */
10086     +#define MAXRDCOOKIE (0xfff)
10087     +/* Turn an rdstate into an offset. */
10088     +static inline off_t rdstate2offset(struct unionfs_dir_state *buf)
10089     +{
10090     + off_t tmp;
10091     +
10092     + tmp = ((buf->cookie & MAXRDCOOKIE) << RDOFFBITS)
10093     + | (buf->offset & DIREOF);
10094     + return tmp;
10095     +}
10096     +
10097     +#define unionfs_read_lock(sb) down_read(&UNIONFS_SB(sb)->rwsem)
10098     +#define unionfs_read_unlock(sb) up_read(&UNIONFS_SB(sb)->rwsem)
10099     +#define unionfs_write_lock(sb) down_write(&UNIONFS_SB(sb)->rwsem)
10100     +#define unionfs_write_unlock(sb) up_write(&UNIONFS_SB(sb)->rwsem)
10101     +
10102     +static inline void unionfs_double_lock_dentry(struct dentry *d1,
10103     + struct dentry *d2)
10104     +{
10105     + if (d2 < d1) {
10106     + struct dentry *tmp = d1;
10107     + d1 = d2;
10108     + d2 = tmp;
10109     + }
10110     + unionfs_lock_dentry(d1);
10111     + unionfs_lock_dentry(d2);
10112     +}
10113     +
10114     +extern int new_dentry_private_data(struct dentry *dentry);
10115     +extern void free_dentry_private_data(struct dentry *dentry);
10116     +extern void update_bstart(struct dentry *dentry);
10117     +
10118     +/*
10119     + * EXTERNALS:
10120     + */
10121     +
10122     +/* replicates the directory structure up to given dentry in given branch */
10123     +extern struct dentry *create_parents(struct inode *dir, struct dentry *dentry,
10124     + const char *name, int bindex);
10125     +extern int make_dir_opaque(struct dentry *dir, int bindex);
10126     +
10127     +/* partial lookup */
10128     +extern int unionfs_partial_lookup(struct dentry *dentry);
10129     +
10130     +/*
10131     + * Pass an unionfs dentry and an index and it will try to create a whiteout
10132     + * in branch 'index'.
10133     + *
10134     + * On error, it will proceed to a branch to the left
10135     + */
10136     +extern int create_whiteout(struct dentry *dentry, int start);
10137     +/* copies a file from dbstart to newbindex branch */
10138     +extern int copyup_file(struct inode *dir, struct file *file, int bstart,
10139     + int newbindex, loff_t size);
10140     +extern int copyup_named_file(struct inode *dir, struct file *file,
10141     + char *name, int bstart, int new_bindex,
10142     + loff_t len);
10143     +/* copies a dentry from dbstart to newbindex branch */
10144     +extern int copyup_dentry(struct inode *dir, struct dentry *dentry,
10145     + int bstart, int new_bindex, const char *name,
10146     + int namelen, struct file **copyup_file, loff_t len);
10147     +/* helper functions for post-copyup cleanup */
10148     +extern void unionfs_inherit_mnt(struct dentry *dentry);
10149     +extern void unionfs_purge_extras(struct dentry *dentry);
10150     +
10151     +extern int remove_whiteouts(struct dentry *dentry,
10152     + struct dentry *lower_dentry, int bindex);
10153     +
10154     +extern int do_delete_whiteouts(struct dentry *dentry, int bindex,
10155     + struct unionfs_dir_state *namelist);
10156     +
10157     +/* Is this directory empty: 0 if it is empty, -ENOTEMPTY if not. */
10158     +extern int check_empty(struct dentry *dentry,
10159     + struct unionfs_dir_state **namelist);
10160     +/* Delete whiteouts from this directory in branch bindex. */
10161     +extern int delete_whiteouts(struct dentry *dentry, int bindex,
10162     + struct unionfs_dir_state *namelist);
10163     +
10164     +/* Re-lookup a lower dentry. */
10165     +extern int unionfs_refresh_lower_dentry(struct dentry *dentry, int bindex);
10166     +
10167     +extern void unionfs_reinterpose(struct dentry *this_dentry);
10168     +extern struct super_block *unionfs_duplicate_super(struct super_block *sb);
10169     +
10170     +/* Locking functions. */
10171     +extern int unionfs_setlk(struct file *file, int cmd, struct file_lock *fl);
10172     +extern int unionfs_getlk(struct file *file, struct file_lock *fl);
10173     +
10174     +/* Common file operations. */
10175     +extern int unionfs_file_revalidate(struct file *file, int willwrite);
10176     +extern int unionfs_open(struct inode *inode, struct file *file);
10177     +extern int unionfs_file_release(struct inode *inode, struct file *file);
10178     +extern int unionfs_flush(struct file *file, fl_owner_t id);
10179     +extern long unionfs_ioctl(struct file *file, unsigned int cmd,
10180     + unsigned long arg);
10181     +extern int unionfs_fsync(struct file *file, struct dentry *dentry,
10182     + int datasync);
10183     +extern int unionfs_fasync(int fd, struct file *file, int flag);
10184     +
10185     +/* Inode operations */
10186     +extern int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
10187     + struct inode *new_dir, struct dentry *new_dentry);
10188     +extern int unionfs_unlink(struct inode *dir, struct dentry *dentry);
10189     +extern int unionfs_rmdir(struct inode *dir, struct dentry *dentry);
10190     +
10191     +extern int __unionfs_d_revalidate_chain(struct dentry *dentry,
10192     + struct nameidata *nd, int willwrite);
10193     +extern int is_newer_lower(const struct dentry *dentry);
10194     +
10195     +/* The values for unionfs_interpose's flag. */
10196     +#define INTERPOSE_DEFAULT 0
10197     +#define INTERPOSE_LOOKUP 1
10198     +#define INTERPOSE_REVAL 2
10199     +#define INTERPOSE_REVAL_NEG 3
10200     +#define INTERPOSE_PARTIAL 4
10201     +
10202     +extern struct dentry *unionfs_interpose(struct dentry *this_dentry,
10203     + struct super_block *sb, int flag);
10204     +
10205     +#ifdef CONFIG_UNION_FS_XATTR
10206     +/* Extended attribute functions. */
10207     +extern void *unionfs_xattr_alloc(size_t size, size_t limit);
10208     +
10209     +extern ssize_t unionfs_getxattr(struct dentry *dentry, const char *name,
10210     + void *value, size_t size);
10211     +extern int unionfs_removexattr(struct dentry *dentry, const char *name);
10212     +extern ssize_t unionfs_listxattr(struct dentry *dentry, char *list,
10213     + size_t size);
10214     +extern int unionfs_setxattr(struct dentry *dentry, const char *name,
10215     + const void *value, size_t size, int flags);
10216     +#endif /* CONFIG_UNION_FS_XATTR */
10217     +
10218     +/* The root directory is unhashed, but isn't deleted. */
10219     +static inline int d_deleted(struct dentry *d)
10220     +{
10221     + return d_unhashed(d) && (d != d->d_sb->s_root);
10222     +}
10223     +
10224     +struct dentry *unionfs_lookup_backend(struct dentry *dentry,
10225     + struct nameidata *nd, int lookupmode);
10226     +
10227     +/* unionfs_permission, check if we should bypass error to facilitate copyup */
10228     +#define IS_COPYUP_ERR(err) ((err) == -EROFS)
10229     +
10230     +/* unionfs_open, check if we need to copyup the file */
10231     +#define OPEN_WRITE_FLAGS (O_WRONLY | O_RDWR | O_APPEND)
10232     +#define IS_WRITE_FLAG(flag) ((flag) & OPEN_WRITE_FLAGS)
10233     +
10234     +static inline int branchperms(const struct super_block *sb, int index)
10235     +{
10236     + BUG_ON(index < 0);
10237     + return UNIONFS_SB(sb)->data[index].branchperms;
10238     +}
10239     +
10240     +static inline int set_branchperms(struct super_block *sb, int index, int perms)
10241     +{
10242     + BUG_ON(index < 0);
10243     + UNIONFS_SB(sb)->data[index].branchperms = perms;
10244     + return perms;
10245     +}
10246     +
10247     +/* Is this file on a read-only branch? */
10248     +static inline int is_robranch_super(const struct super_block *sb, int index)
10249     +{
10250     + int ret;
10251     +
10252     + ret = (!(branchperms(sb, index) & MAY_WRITE)) ? -EROFS : 0;
10253     + return ret;
10254     +}
10255     +
10256     +/* Is this file on a read-only branch? */
10257     +static inline int is_robranch_idx(const struct dentry *dentry, int index)
10258     +{
10259     + int err = 0;
10260     +
10261     + BUG_ON(index < 0);
10262     +
10263     + if ((!(branchperms(dentry->d_sb, index) & MAY_WRITE)) ||
10264     + IS_RDONLY(unionfs_lower_dentry_idx(dentry, index)->d_inode))
10265     + err = -EROFS;
10266     + return err;
10267     +}
10268     +
10269     +static inline int is_robranch(const struct dentry *dentry)
10270     +{
10271     + int index;
10272     +
10273     + index = UNIONFS_D(dentry)->bstart;
10274     + BUG_ON(index < 0);
10275     +
10276     + return is_robranch_idx(dentry, index);
10277     +}
10278     +
10279     +/* What do we use for whiteouts. */
10280     +#define UNIONFS_WHPFX ".wh."
10281     +#define UNIONFS_WHLEN 4
10282     +/*
10283     + * If a directory contains this file, then it is opaque. We start with the
10284     + * .wh. flag so that it is blocked by lookup.
10285     + */
10286     +#define UNIONFS_DIR_OPAQUE_NAME "__dir_opaque"
10287     +#define UNIONFS_DIR_OPAQUE UNIONFS_WHPFX UNIONFS_DIR_OPAQUE_NAME
10288     +
10289     +#ifndef DEFAULT_POLLMASK
10290     +#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
10291     +#endif /* not DEFAULT_POLLMASK */
10292     +
10293     +/*
10294     + * EXTERNALS:
10295     + */
10296     +extern char *alloc_whname(const char *name, int len);
10297     +extern int check_branch(struct nameidata *nd);
10298     +extern int __parse_branch_mode(const char *name);
10299     +extern int parse_branch_mode(const char *name);
10300     +
10301     +/*
10302     + * These two functions are here because it is kind of daft to copy and paste
10303     + * the contents of the two functions to 32+ places in unionfs
10304     + */
10305     +static inline struct dentry *lock_parent(struct dentry *dentry)
10306     +{
10307     + struct dentry *dir = dget(dentry->d_parent);
10308     +
10309     + mutex_lock(&dir->d_inode->i_mutex);
10310     + return dir;
10311     +}
10312     +
10313     +static inline void unlock_dir(struct dentry *dir)
10314     +{
10315     + mutex_unlock(&dir->d_inode->i_mutex);
10316     + dput(dir);
10317     +}
10318     +
10319     +static inline struct vfsmount *unionfs_mntget(struct dentry *dentry,
10320     + int bindex)
10321     +{
10322     + struct vfsmount *mnt;
10323     +
10324     + if (!dentry) {
10325     + if (bindex < 0)
10326     + return NULL;
10327     + if (!dentry && bindex >= 0) {
10328     +#ifdef UNIONFS_DEBUG
10329     + printk(KERN_DEBUG
10330     + "unionfs_mntget: dentry=%p bindex=%d\n",
10331     + dentry, bindex);
10332     +#endif /* UNIONFS_DEBUG */
10333     + return NULL;
10334     + }
10335     + }
10336     + mnt = unionfs_lower_mnt_idx(dentry, bindex);
10337     + if (!mnt) {
10338     + if (bindex < 0)
10339     + return NULL;
10340     + if (!mnt && bindex >= 0) {
10341     +#ifdef UNIONFS_DEBUG
10342     + printk(KERN_DEBUG
10343     + "unionfs_mntget: mnt=%p bindex=%d\n",
10344     + mnt, bindex);
10345     +#endif /* UNIONFS_DEBUG */
10346     + return NULL;
10347     + }
10348     + }
10349     + mnt = mntget(mnt);
10350     + return mnt;
10351     +}
10352     +
10353     +static inline void unionfs_mntput(struct dentry *dentry, int bindex)
10354     +{
10355     + struct vfsmount *mnt;
10356     +
10357     + if (!dentry) {
10358     + if (bindex < 0)
10359     + return;
10360     + if (!dentry && bindex >= 0) {
10361     +#ifdef UNIONFS_DEBUG
10362     + printk(KERN_DEBUG
10363     + "unionfs_mntput: dentry=%p bindex=%d\n",
10364     + dentry, bindex);
10365     +#endif /* UNIONFS_DEBUG */
10366     + return;
10367     + }
10368     + }
10369     + mnt = unionfs_lower_mnt_idx(dentry, bindex);
10370     + if (!mnt) {
10371     + if (bindex < 0)
10372     + return;
10373     + if (!mnt && bindex >= 0) {
10374     +#ifdef UNIONFS_DEBUG
10375     + /*
10376     + * Directories can have NULL lower objects in
10377     + * between start/end, but NOT if at the start/end
10378     + * range. We cannot verify that this dentry is a
10379     + * type=DIR, because it may already be a negative
10380     + * dentry. But if dbstart is greater than dbend, we
10381     + * know that this couldn't have been a regular file:
10382     + * it had to have been a directory.
10383     + */
10384     + if (!(bindex > dbstart(dentry) && bindex < dbend(dentry)))
10385     + printk(KERN_WARNING
10386     + "unionfs_mntput: mnt=%p bindex=%d\n",
10387     + mnt, bindex);
10388     +#endif /* UNIONFS_DEBUG */
10389     + return;
10390     + }
10391     + }
10392     + mntput(mnt);
10393     +}
10394     +
10395     +#ifdef UNIONFS_DEBUG
10396     +
10397     +/* useful for tracking code reachability */
10398     +#define UDBG printk("DBG:%s:%s:%d\n",__FILE__,__FUNCTION__,__LINE__)
10399     +
10400     +#define unionfs_check_inode(i) __unionfs_check_inode((i), \
10401     + __FILE__,__FUNCTION__,__LINE__)
10402     +#define unionfs_check_dentry(d) __unionfs_check_dentry((d), \
10403     + __FILE__,__FUNCTION__,__LINE__)
10404     +#define unionfs_check_file(f) __unionfs_check_file((f), \
10405     + __FILE__,__FUNCTION__,__LINE__)
10406     +#define show_branch_counts(sb) __show_branch_counts((sb), \
10407     + __FILE__,__FUNCTION__,__LINE__)
10408     +#define show_inode_times(i) __show_inode_times((i), \
10409     + __FILE__,__FUNCTION__,__LINE__)
10410     +#define show_dinode_times(d) __show_dinode_times((d), \
10411     + __FILE__,__FUNCTION__,__LINE__)
10412     +#define show_inode_counts(i) __show_inode_counts((i), \
10413     + __FILE__,__FUNCTION__,__LINE__)
10414     +
10415     +extern void __unionfs_check_inode(const struct inode *inode, const char *fname,
10416     + const char *fxn, int line);
10417     +extern void __unionfs_check_dentry(const struct dentry *dentry,
10418     + const char *fname, const char *fxn,
10419     + int line);
10420     +extern void __unionfs_check_file(const struct file *file,
10421     + const char *fname, const char *fxn, int line);
10422     +extern void __show_branch_counts(const struct super_block *sb,
10423     + const char *file, const char *fxn, int line);
10424     +extern void __show_inode_times(const struct inode *inode,
10425     + const char *file, const char *fxn, int line);
10426     +extern void __show_dinode_times(const struct dentry *dentry,
10427     + const char *file, const char *fxn, int line);
10428     +extern void __show_inode_counts(const struct inode *inode,
10429     + const char *file, const char *fxn, int line);
10430     +
10431     +#else /* not UNIONFS_DEBUG */
10432     +
10433     +/* we leave useful hooks for these check functions throughout the code */
10434     +#define unionfs_check_inode(i)
10435     +#define unionfs_check_dentry(d)
10436     +#define unionfs_check_file(f)
10437     +#define show_branch_counts(sb)
10438     +#define show_inode_times(i)
10439     +#define show_dinode_times(d)
10440     +#define show_inode_counts(i)
10441     +
10442     +#endif /* not UNIONFS_DEBUG */
10443     +
10444     +#endif /* not _UNION_H_ */
10445     diff --git a/fs/unionfs/unlink.c b/fs/unionfs/unlink.c
10446     new file mode 100644
10447     index 0000000..47bebab
10448     --- /dev/null
10449     +++ b/fs/unionfs/unlink.c
10450     @@ -0,0 +1,192 @@
10451     +/*
10452     + * Copyright (c) 2003-2007 Erez Zadok
10453     + * Copyright (c) 2003-2006 Charles P. Wright
10454     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10455     + * Copyright (c) 2005-2006 Junjiro Okajima
10456     + * Copyright (c) 2005 Arun M. Krishnakumar
10457     + * Copyright (c) 2004-2006 David P. Quigley
10458     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
10459     + * Copyright (c) 2003 Puja Gupta
10460     + * Copyright (c) 2003 Harikesavan Krishnan
10461     + * Copyright (c) 2003-2007 Stony Brook University
10462     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
10463     + *
10464     + * This program is free software; you can redistribute it and/or modify
10465     + * it under the terms of the GNU General Public License version 2 as
10466     + * published by the Free Software Foundation.
10467     + */
10468     +
10469     +#include "union.h"
10470     +
10471     +/* unlink a file by creating a whiteout */
10472     +static int unionfs_unlink_whiteout(struct inode *dir, struct dentry *dentry)
10473     +{
10474     + struct dentry *lower_dentry;
10475     + struct dentry *lower_dir_dentry;
10476     + int bindex;
10477     + int err = 0;
10478     +
10479     + if ((err = unionfs_partial_lookup(dentry)))
10480     + goto out;
10481     +
10482     + bindex = dbstart(dentry);
10483     +
10484     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10485     + if (!lower_dentry)
10486     + goto out;
10487     +
10488     + lower_dir_dentry = lock_parent(lower_dentry);
10489     +
10490     + /* avoid destroying the lower inode if the file is in use */
10491     + dget(lower_dentry);
10492     + if (!(err = is_robranch_super(dentry->d_sb, bindex)))
10493     + err = vfs_unlink(lower_dir_dentry->d_inode, lower_dentry);
10494     + /* if vfs_unlink succeeded, update our inode's times */
10495     + if (!err)
10496     + unionfs_copy_attr_times(dentry->d_inode);
10497     + dput(lower_dentry);
10498     + fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
10499     + unlock_dir(lower_dir_dentry);
10500     +
10501     + if (err && !IS_COPYUP_ERR(err))
10502     + goto out;
10503     +
10504     + if (err) {
10505     + if (dbstart(dentry) == 0)
10506     + goto out;
10507     + err = create_whiteout(dentry, dbstart(dentry) - 1);
10508     + } else if (dbopaque(dentry) != -1)
10509     + /* There is a lower lower-priority file with the same name. */
10510     + err = create_whiteout(dentry, dbopaque(dentry));
10511     + else
10512     + err = create_whiteout(dentry, dbstart(dentry));
10513     +
10514     +out:
10515     + if (!err)
10516     + dentry->d_inode->i_nlink--;
10517     +
10518     + /* We don't want to leave negative leftover dentries for revalidate. */
10519     + if (!err && (dbopaque(dentry) != -1))
10520     + update_bstart(dentry);
10521     +
10522     + return err;
10523     +}
10524     +
10525     +int unionfs_unlink(struct inode *dir, struct dentry *dentry)
10526     +{
10527     + int err = 0;
10528     +
10529     + unionfs_read_lock(dentry->d_sb);
10530     + unionfs_lock_dentry(dentry);
10531     +
10532     + if (!__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
10533     + err = -ESTALE;
10534     + goto out;
10535     + }
10536     + unionfs_check_dentry(dentry);
10537     +
10538     + err = unionfs_unlink_whiteout(dir, dentry);
10539     + /* call d_drop so the system "forgets" about us */
10540     + if (!err) {
10541     + if (!S_ISDIR(dentry->d_inode->i_mode))
10542     + unionfs_purge_extras(dentry);
10543     + d_drop(dentry);
10544     + /*
10545     + * if unlink/whiteout succeeded, parent dir mtime has
10546     + * changed
10547     + */
10548     + unionfs_copy_attr_times(dir);
10549     + }
10550     +
10551     +out:
10552     + if (!err) {
10553     + unionfs_check_dentry(dentry);
10554     + unionfs_check_inode(dir);
10555     + }
10556     + unionfs_unlock_dentry(dentry);
10557     + unionfs_read_unlock(dentry->d_sb);
10558     + return err;
10559     +}
10560     +
10561     +static int unionfs_rmdir_first(struct inode *dir, struct dentry *dentry,
10562     + struct unionfs_dir_state *namelist)
10563     +{
10564     + int err;
10565     + struct dentry *lower_dentry;
10566     + struct dentry *lower_dir_dentry = NULL;
10567     +
10568     + /* Here we need to remove whiteout entries. */
10569     + err = delete_whiteouts(dentry, dbstart(dentry), namelist);
10570     + if (err)
10571     + goto out;
10572     +
10573     + lower_dentry = unionfs_lower_dentry(dentry);
10574     +
10575     + lower_dir_dentry = lock_parent(lower_dentry);
10576     +
10577     + /* avoid destroying the lower inode if the file is in use */
10578     + dget(lower_dentry);
10579     + if (!(err = is_robranch(dentry)))
10580     + err = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
10581     + dput(lower_dentry);
10582     +
10583     + fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
10584     + /* propagate number of hard-links */
10585     + dentry->d_inode->i_nlink = unionfs_get_nlinks(dentry->d_inode);
10586     +
10587     +out:
10588     + if (lower_dir_dentry)
10589     + unlock_dir(lower_dir_dentry);
10590     + return err;
10591     +}
10592     +
10593     +int unionfs_rmdir(struct inode *dir, struct dentry *dentry)
10594     +{
10595     + int err = 0;
10596     + struct unionfs_dir_state *namelist = NULL;
10597     +
10598     + unionfs_read_lock(dentry->d_sb);
10599     + unionfs_lock_dentry(dentry);
10600     +
10601     + if (!__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
10602     + err = -ESTALE;
10603     + goto out;
10604     + }
10605     + unionfs_check_dentry(dentry);
10606     +
10607     + /* check if this unionfs directory is empty or not */
10608     + err = check_empty(dentry, &namelist);
10609     + if (err)
10610     + goto out;
10611     +
10612     + err = unionfs_rmdir_first(dir, dentry, namelist);
10613     + /* create whiteout */
10614     + if (!err)
10615     + err = create_whiteout(dentry, dbstart(dentry));
10616     + else {
10617     + int new_err;
10618     +
10619     + if (dbstart(dentry) == 0)
10620     + goto out;
10621     +
10622     + /* exit if the error returned was NOT -EROFS */
10623     + if (!IS_COPYUP_ERR(err))
10624     + goto out;
10625     +
10626     + new_err = create_whiteout(dentry, dbstart(dentry) - 1);
10627     + if (new_err != -EEXIST)
10628     + err = new_err;
10629     + }
10630     +
10631     +out:
10632     + /* call d_drop so the system "forgets" about us */
10633     + if (!err)
10634     + d_drop(dentry);
10635     +
10636     + if (namelist)
10637     + free_rdstate(namelist);
10638     +
10639     + unionfs_unlock_dentry(dentry);
10640     + unionfs_read_unlock(dentry->d_sb);
10641     + return err;
10642     +}
10643     diff --git a/fs/unionfs/xattr.c b/fs/unionfs/xattr.c
10644     new file mode 100644
10645     index 0000000..ee7da13
10646     --- /dev/null
10647     +++ b/fs/unionfs/xattr.c
10648     @@ -0,0 +1,153 @@
10649     +/*
10650     + * Copyright (c) 2003-2007 Erez Zadok
10651     + * Copyright (c) 2003-2006 Charles P. Wright
10652     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10653     + * Copyright (c) 2005-2006 Junjiro Okajima
10654     + * Copyright (c) 2005 Arun M. Krishnakumar
10655     + * Copyright (c) 2004-2006 David P. Quigley
10656     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
10657     + * Copyright (c) 2003 Puja Gupta
10658     + * Copyright (c) 2003 Harikesavan Krishnan
10659     + * Copyright (c) 2003-2007 Stony Brook University
10660     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
10661     + *
10662     + * This program is free software; you can redistribute it and/or modify
10663     + * it under the terms of the GNU General Public License version 2 as
10664     + * published by the Free Software Foundation.
10665     + */
10666     +
10667     +#include "union.h"
10668     +
10669     +/* This is lifted from fs/xattr.c */
10670     +void *unionfs_xattr_alloc(size_t size, size_t limit)
10671     +{
10672     + void *ptr;
10673     +
10674     + if (size > limit)
10675     + return ERR_PTR(-E2BIG);
10676     +
10677     + if (!size) /* size request, no buffer is needed */
10678     + return NULL;
10679     +
10680     + ptr = kmalloc(size, GFP_KERNEL);
10681     + if (!ptr)
10682     + return ERR_PTR(-ENOMEM);
10683     + return ptr;
10684     +}
10685     +
10686     +/*
10687     + * BKL held by caller.
10688     + * dentry->d_inode->i_mutex locked
10689     + */
10690     +ssize_t unionfs_getxattr(struct dentry *dentry, const char *name, void *value,
10691     + size_t size)
10692     +{
10693     + struct dentry *lower_dentry = NULL;
10694     + int err = -EOPNOTSUPP;
10695     +
10696     + unionfs_read_lock(dentry->d_sb);
10697     + unionfs_lock_dentry(dentry);
10698     +
10699     + if (!__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
10700     + err = -ESTALE;
10701     + goto out;
10702     + }
10703     +
10704     + lower_dentry = unionfs_lower_dentry(dentry);
10705     +
10706     + err = vfs_getxattr(lower_dentry, (char*) name, value, size);
10707     +
10708     +out:
10709     + unionfs_unlock_dentry(dentry);
10710     + unionfs_check_dentry(dentry);
10711     + unionfs_read_unlock(dentry->d_sb);
10712     + return err;
10713     +}
10714     +
10715     +/*
10716     + * BKL held by caller.
10717     + * dentry->d_inode->i_mutex locked
10718     + */
10719     +int unionfs_setxattr(struct dentry *dentry, const char *name,
10720     + const void *value, size_t size, int flags)
10721     +{
10722     + struct dentry *lower_dentry = NULL;
10723     + int err = -EOPNOTSUPP;
10724     +
10725     + unionfs_read_lock(dentry->d_sb);
10726     + unionfs_lock_dentry(dentry);
10727     +
10728     + if (!__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
10729     + err = -ESTALE;
10730     + goto out;
10731     + }
10732     +
10733     + lower_dentry = unionfs_lower_dentry(dentry);
10734     +
10735     + err = vfs_setxattr(lower_dentry, (char*) name, (void*) value,
10736     + size, flags);
10737     +
10738     +out:
10739     + unionfs_unlock_dentry(dentry);
10740     + unionfs_check_dentry(dentry);
10741     + unionfs_read_unlock(dentry->d_sb);
10742     + return err;
10743     +}
10744     +
10745     +/*
10746     + * BKL held by caller.
10747     + * dentry->d_inode->i_mutex locked
10748     + */
10749     +int unionfs_removexattr(struct dentry *dentry, const char *name)
10750     +{
10751     + struct dentry *lower_dentry = NULL;
10752     + int err = -EOPNOTSUPP;
10753     +
10754     + unionfs_read_lock(dentry->d_sb);
10755     + unionfs_lock_dentry(dentry);
10756     +
10757     + if (!__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
10758     + err = -ESTALE;
10759     + goto out;
10760     + }
10761     +
10762     + lower_dentry = unionfs_lower_dentry(dentry);
10763     +
10764     + err = vfs_removexattr(lower_dentry, (char*) name);
10765     +
10766     +out:
10767     + unionfs_unlock_dentry(dentry);
10768     + unionfs_check_dentry(dentry);
10769     + unionfs_read_unlock(dentry->d_sb);
10770     + return err;
10771     +}
10772     +
10773     +/*
10774     + * BKL held by caller.
10775     + * dentry->d_inode->i_mutex locked
10776     + */
10777     +ssize_t unionfs_listxattr(struct dentry *dentry, char *list, size_t size)
10778     +{
10779     + struct dentry *lower_dentry = NULL;
10780     + int err = -EOPNOTSUPP;
10781     + char *encoded_list = NULL;
10782     +
10783     + unionfs_read_lock(dentry->d_sb);
10784     + unionfs_lock_dentry(dentry);
10785     +
10786     + if (!__unionfs_d_revalidate_chain(dentry, NULL, 0)) {
10787     + err = -ESTALE;
10788     + goto out;
10789     + }
10790     +
10791     + lower_dentry = unionfs_lower_dentry(dentry);
10792     +
10793     + encoded_list = list;
10794     + err = vfs_listxattr(lower_dentry, encoded_list, size);
10795     +
10796     +out:
10797     + unionfs_unlock_dentry(dentry);
10798     + unionfs_check_dentry(dentry);
10799     + unionfs_read_unlock(dentry->d_sb);
10800     + return err;
10801     +}
10802     diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
10803     index bb516ce..2599c5b 100644
10804     --- a/include/linux/fs_stack.h
10805     +++ b/include/linux/fs_stack.h
10806     @@ -1,17 +1,28 @@
10807     +/*
10808     + * Copyright (c) 2003-2007 Erez Zadok
10809     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10810     + * Copyright (c) 2003-2007 Stony Brook University
10811     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
10812     + *
10813     + * This program is free software; you can redistribute it and/or modify
10814     + * it under the terms of the GNU General Public License version 2 as
10815     + * published by the Free Software Foundation.
10816     + */
10817     +
10818     #ifndef _LINUX_FS_STACK_H
10819     #define _LINUX_FS_STACK_H
10820    
10821     -/* This file defines generic functions used primarily by stackable
10822     +/*
10823     + * This file defines generic functions used primarily by stackable
10824     * filesystems; none of these functions require i_mutex to be held.
10825     */
10826    
10827     #include <linux/fs.h>
10828    
10829     /* externs for fs/stack.c */
10830     -extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
10831     - int (*get_nlinks)(struct inode *));
10832     -
10833     -extern void fsstack_copy_inode_size(struct inode *dst, const struct inode *src);
10834     +extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src);
10835     +extern void fsstack_copy_inode_size(struct inode *dst,
10836     + const struct inode *src);
10837    
10838     /* inlines */
10839     static inline void fsstack_copy_attr_atime(struct inode *dest,
10840     @@ -28,4 +39,4 @@ static inline void fsstack_copy_attr_times(struct inode *dest,
10841     dest->i_ctime = src->i_ctime;
10842     }
10843    
10844     -#endif /* _LINUX_FS_STACK_H */
10845     +#endif /* not _LINUX_FS_STACK_H */
10846     diff --git a/include/linux/magic.h b/include/linux/magic.h
10847     index a9c6567..a6751f6 100644
10848     --- a/include/linux/magic.h
10849     +++ b/include/linux/magic.h
10850     @@ -35,6 +35,8 @@
10851     #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs"
10852     #define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs"
10853    
10854     +#define UNIONFS_SUPER_MAGIC 0xf15f083d
10855     +
10856     #define SMB_SUPER_MAGIC 0x517B
10857     #define USBDEVICE_SUPER_MAGIC 0x9fa2
10858    
10859     diff --git a/include/linux/mm.h b/include/linux/mm.h
10860     index 60e0e4a..c680669 100644
10861     --- a/include/linux/mm.h
10862     +++ b/include/linux/mm.h
10863     @@ -1157,6 +1157,7 @@ int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
10864     void __user *, size_t *, loff_t *);
10865     unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
10866     unsigned long lru_pages);
10867     +extern void drop_pagecache_sb(struct super_block *);
10868     void drop_pagecache(void);
10869     void drop_slab(void);
10870    
10871     diff --git a/include/linux/namei.h b/include/linux/namei.h
10872     index d39a5a6..5e4a22d 100644
10873     --- a/include/linux/namei.h
10874     +++ b/include/linux/namei.h
10875     @@ -3,6 +3,7 @@
10876    
10877     #include <linux/dcache.h>
10878     #include <linux/linkage.h>
10879     +#include <linux/mount.h>
10880    
10881     struct vfsmount;
10882    
10883     @@ -47,6 +48,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
10884     * - internal "there are more path compnents" flag
10885     * - locked when lookup done with dcache_lock held
10886     * - dentry cache is untrusted; force a real lookup
10887     + * - lookup path from given dentry/vfsmount pair
10888     */
10889     #define LOOKUP_FOLLOW 1
10890     #define LOOKUP_DIRECTORY 2
10891     @@ -54,6 +56,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
10892     #define LOOKUP_PARENT 16
10893     #define LOOKUP_NOALT 32
10894     #define LOOKUP_REVAL 64
10895     +#define LOOKUP_ONE 128
10896     /*
10897     * Intent data
10898     */
10899     @@ -81,7 +84,14 @@ extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry
10900     extern struct file *nameidata_to_filp(struct nameidata *nd, int flags);
10901     extern void release_open_intent(struct nameidata *);
10902    
10903     -extern struct dentry * lookup_one_len(const char *, struct dentry *, int);
10904     +extern struct dentry * lookup_one_len_nd(const char *, struct dentry *,
10905     + int, struct nameidata *);
10906     +
10907     +static inline struct dentry *lookup_one_len(const char *name,
10908     + struct dentry *dir, int len)
10909     +{
10910     + return lookup_one_len_nd(name, dir, len, NULL);
10911     +}
10912    
10913     extern int follow_down(struct vfsmount **, struct dentry **);
10914     extern int follow_up(struct vfsmount **, struct dentry **);
10915     @@ -99,4 +109,16 @@ static inline char *nd_get_link(struct nameidata *nd)
10916     return nd->saved_names[nd->depth];
10917     }
10918    
10919     +static inline void pathget(struct path *path)
10920     +{
10921     + mntget(path->mnt);
10922     + dget(path->dentry);
10923     +}
10924     +
10925     +static inline void pathput(struct path *path)
10926     +{
10927     + dput(path->dentry);
10928     + mntput(path->mnt);
10929     +}
10930     +
10931     #endif /* _LINUX_NAMEI_H */
10932     diff --git a/include/linux/slab.h b/include/linux/slab.h
10933     index 1ef822e..5f54979 100644
10934     --- a/include/linux/slab.h
10935     +++ b/include/linux/slab.h
10936     @@ -72,6 +72,7 @@ static inline void *kmem_cache_alloc_node(struct kmem_cache *cachep,
10937     */
10938     void *__kmalloc(size_t, gfp_t);
10939     void *__kzalloc(size_t, gfp_t);
10940     +void * __must_check krealloc(const void *, size_t, gfp_t);
10941     void kfree(const void *);
10942     unsigned int ksize(const void *);
10943    
10944     diff --git a/include/linux/union_fs.h b/include/linux/union_fs.h
10945     new file mode 100644
10946     index 0000000..d13eb48
10947     --- /dev/null
10948     +++ b/include/linux/union_fs.h
10949     @@ -0,0 +1,25 @@
10950     +/*
10951     + * Copyright (c) 2003-2007 Erez Zadok
10952     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10953     + * Copyright (c) 2003-2007 Stony Brook University
10954     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
10955     + *
10956     + * This program is free software; you can redistribute it and/or modify
10957     + * it under the terms of the GNU General Public License version 2 as
10958     + * published by the Free Software Foundation.
10959     + */
10960     +
10961     +#ifndef _LINUX_UNION_FS_H
10962     +#define _LINUX_UNION_FS_H
10963     +
10964     +/*
10965     + * DEFINITIONS FOR USER AND KERNEL CODE:
10966     + */
10967     +# define UNIONFS_IOCTL_INCGEN _IOR(0x15, 11, int)
10968     +# define UNIONFS_IOCTL_QUERYFILE _IOR(0x15, 15, int)
10969     +
10970     +/* We don't support normal remount, but unionctl uses it. */
10971     +# define UNIONFS_REMOUNT_MAGIC 0x4a5a4380
10972     +
10973     +#endif /* _LINUX_UNIONFS_H */
10974     +
10975     diff --git a/mm/slab.c b/mm/slab.c
10976     index 4cbac24..d288d8c 100644
10977     --- a/mm/slab.c
10978     +++ b/mm/slab.c
10979     @@ -3736,6 +3736,53 @@ void *__kmalloc(size_t size, gfp_t flags)
10980     EXPORT_SYMBOL(__kmalloc);
10981     #endif
10982    
10983     +
10984     +/**
10985     + * krealloc - reallocate memory. The contents will remain unchanged.
10986     + * @p: object to reallocate memory for.
10987     + * @new_size: how many bytes of memory are required.
10988     + * @flags: the type of memory to allocate.
10989     + *
10990     + * The contents of the object pointed to are preserved up to the
10991     + * lesser of the new and old sizes. If @p is %NULL, krealloc()
10992     + * behaves exactly like kmalloc(). If @size is 0 and @p is not a
10993     + * %NULL pointer, the object pointed to is freed.
10994     + */
10995     +void *krealloc(const void *p, size_t new_size, gfp_t flags)
10996     +{
10997     + struct kmem_cache *cache, *new_cache;
10998     + void *ret;
10999     +
11000     + if (unlikely(!p))
11001     + return kmalloc_track_caller(new_size, flags);
11002     +
11003     + if (unlikely(!new_size)) {
11004     + kfree(p);
11005     + return NULL;
11006     + }
11007     +
11008     + cache = virt_to_cache(p);
11009     + new_cache = __find_general_cachep(new_size, flags);
11010     +
11011     + /*
11012     + * If new size fits in the current cache, bail out.
11013     + */
11014     + if (likely(cache == new_cache))
11015     + return (void *)p;
11016     +
11017     + /*
11018     + * We are on the slow-path here so do not use __cache_alloc
11019     + * because it bloats kernel text.
11020     + */
11021     + ret = kmalloc_track_caller(new_size, flags);
11022     + if (ret) {
11023     + memcpy(ret, p, min(new_size, ksize(p)));
11024     + kfree(p);
11025     + }
11026     + return ret;
11027     +}
11028     +EXPORT_SYMBOL(krealloc);
11029     +
11030     /**
11031     * kmem_cache_free - Deallocate an object
11032     * @cachep: The cache the allocation was from.
11033     diff --git a/mm/slob.c b/mm/slob.c
11034     index c683d35..fec651b 100644
11035     --- a/mm/slob.c
11036     +++ b/mm/slob.c
11037     @@ -181,6 +181,39 @@ void *__kmalloc(size_t size, gfp_t gfp)
11038     }
11039     EXPORT_SYMBOL(__kmalloc);
11040    
11041     +/**
11042     + * krealloc - reallocate memory. The contents will remain unchanged.
11043     + *
11044     + * @p: object to reallocate memory for.
11045     + * @new_size: how many bytes of memory are required.
11046     + * @flags: the type of memory to allocate.
11047     + *
11048     + * The contents of the object pointed to are preserved up to the
11049     + * lesser of the new and old sizes. If @p is %NULL, krealloc()
11050     + * behaves exactly like kmalloc(). If @size is 0 and @p is not a
11051     + * %NULL pointer, the object pointed to is freed.
11052     + */
11053     +void *krealloc(const void *p, size_t new_size, gfp_t flags)
11054     +{
11055     + void *ret;
11056     +
11057     + if (unlikely(!p))
11058     + return kmalloc_track_caller(new_size, flags);
11059     +
11060     + if (unlikely(!new_size)) {
11061     + kfree(p);
11062     + return NULL;
11063     + }
11064     +
11065     + ret = kmalloc_track_caller(new_size, flags);
11066     + if (ret) {
11067     + memcpy(ret, p, min(new_size, ksize(p)));
11068     + kfree(p);
11069     + }
11070     + return ret;
11071     +}
11072     +EXPORT_SYMBOL(krealloc);
11073     +
11074     void kfree(const void *block)
11075     {
11076     bigblock_t *bb, **last = &bigblocks;