Magellan Linux

Annotation of /trunk/kernel26-magellan-server/patches-2.6.24-r5/0153-2.6.24-unionfs-2.2.3.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 574 - (hide annotations) (download)
Mon Apr 21 17:56:37 2008 UTC (16 years ago) by niro
File size: 324460 byte(s)
- 2.6.24-magellan-r5: updated to linux-2.6.24.5

1 niro 574 diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
2     index 1de155e..b168331 100644
3     --- a/Documentation/filesystems/00-INDEX
4     +++ b/Documentation/filesystems/00-INDEX
5     @@ -96,6 +96,8 @@ udf.txt
6     - info and mount options for the UDF filesystem.
7     ufs.txt
8     - info on the ufs filesystem.
9     +unionfs/
10     + - info on the unionfs filesystem
11     vfat.txt
12     - info on using the VFAT filesystem used in Windows NT and Windows 95
13     vfs.txt
14     diff --git a/Documentation/filesystems/unionfs/00-INDEX b/Documentation/filesystems/unionfs/00-INDEX
15     new file mode 100644
16     index 0000000..96fdf67
17     --- /dev/null
18     +++ b/Documentation/filesystems/unionfs/00-INDEX
19     @@ -0,0 +1,10 @@
20     +00-INDEX
21     + - this file.
22     +concepts.txt
23     + - A brief introduction of concepts.
24     +issues.txt
25     + - A summary of known issues with unionfs.
26     +rename.txt
27     + - Information regarding rename operations.
28     +usage.txt
29     + - Usage information and examples.
30     diff --git a/Documentation/filesystems/unionfs/concepts.txt b/Documentation/filesystems/unionfs/concepts.txt
31     new file mode 100644
32     index 0000000..bed69bd
33     --- /dev/null
34     +++ b/Documentation/filesystems/unionfs/concepts.txt
35     @@ -0,0 +1,213 @@
36     +Unionfs 2.x CONCEPTS:
37     +=====================
38     +
39     +This file describes the concepts needed by a namespace unification file
40     +system.
41     +
42     +
43     +Branch Priority:
44     +================
45     +
46     +Each branch is assigned a unique priority - starting from 0 (highest
47     +priority). No two branches can have the same priority.
48     +
49     +
50     +Branch Mode:
51     +============
52     +
53     +Each branch is assigned a mode - read-write or read-only. This allows
54     +directories on media mounted read-write to be used in a read-only manner.
55     +
56     +
57     +Whiteouts:
58     +==========
59     +
60     +A whiteout removes a file name from the namespace. Whiteouts are needed when
61     +one attempts to remove a file on a read-only branch.
62     +
63     +Suppose we have a two-branch union, where branch 0 is read-write and branch
64     +1 is read-only. And a file 'foo' on branch 1:
65     +
66     +./b0/
67     +./b1/
68     +./b1/foo
69     +
70     +The unified view would simply be:
71     +
72     +./union/
73     +./union/foo
74     +
75     +Since 'foo' is stored on a read-only branch, it cannot be removed. A
76     +whiteout is used to remove the name 'foo' from the unified namespace. Again,
77     +since branch 1 is read-only, the whiteout cannot be created there. So, we
78     +try on a higher priority (lower numerically) branch and create the whiteout
79     +there.
80     +
81     +./b0/
82     +./b0/.wh.foo
83     +./b1/
84     +./b1/foo
85     +
86     +Later, when Unionfs traverses branches (due to lookup or readdir), it
87     +eliminate 'foo' from the namespace (as well as the whiteout itself.)
88     +
89     +
90     +Duplicate Elimination:
91     +======================
92     +
93     +It is possible for files on different branches to have the same name.
94     +Unionfs then has to select which instance of the file to show to the user.
95     +Given the fact that each branch has a priority associated with it, the
96     +simplest solution is to take the instance from the highest priority
97     +(numerically lowest value) and "hide" the others.
98     +
99     +
100     +Copyup:
101     +=======
102     +
103     +When a change is made to the contents of a file's data or meta-data, they
104     +have to be stored somewhere. The best way is to create a copy of the
105     +original file on a branch that is writable, and then redirect the write
106     +though to this copy. The copy must be made on a higher priority branch so
107     +that lookup and readdir return this newer "version" of the file rather than
108     +the original (see duplicate elimination).
109     +
110     +An entire unionfs mount can be read-only or read-write. If it's read-only,
111     +then none of the branches will be written to, even if some of the branches
112     +are physically writeable. If the unionfs mount is read-write, then the
113     +leftmost (highest priority) branch must be writeable (for copyup to take
114     +place); the remaining branches can be any mix of read-write and read-only.
115     +
116     +In a writeable mount, unionfs will create new files/dir in the leftmost
117     +branch. If one tries to modify a file in a read-only branch/media, unionfs
118     +will copyup the file to the leftmost branch and modify it there. If you try
119     +to modify a file from a writeable branch which is not the leftmost branch,
120     +then unionfs will modify it in that branch; this is useful if you, say,
121     +unify differnet packages (e.g., apache, sendmail, ftpd, etc.) and you want
122     +changes to specific package files to remain logically in the directory where
123     +they came from.
124     +
125     +Cache Coherency:
126     +================
127     +
128     +Unionfs users often want to be able to modify files and directories directly
129     +on the lower branches, and have those changes be visible at the Unionfs
130     +level. This means that data (e.g., pages) and meta-data (dentries, inodes,
131     +open files, etc.) have to be synchronized between the upper and lower
132     +layers. In other words, the newest changes from a layer below have to be
133     +propagated to the Unionfs layer above. If the two layers are not in sync, a
134     +cache incoherency ensues, which could lead to application failures and even
135     +oopses. The Linux kernel, however, has a rather limited set of mechanisms
136     +to ensure this inter-layer cache coherency---so Unionfs has to do most of
137     +the hard work on its own.
138     +
139     +Maintaining Invariants:
140     +
141     +The way Unionfs ensures cache coherency is as follows. At each entry point
142     +to a Unionfs file system method, we call a utility function to validate the
143     +primary objects of this method. Generally, we call unionfs_file_revalidate
144     +on open files, and __unionfs_d_revalidate_chain on dentries (which also
145     +validates inodes). These utility functions check to see whether the upper
146     +Unionfs object is in sync with any of the lower objects that it represents.
147     +The checks we perform include whether the Unionfs superblock has a newer
148     +generation number, or if any of the lower objects mtime's or ctime's are
149     +newer. (Note: generation numbers change when branch-management commands are
150     +issued, so in a way, maintaining cache coherency is also very important for
151     +branch-management.) If indeed we determine that any Unionfs object is no
152     +longer in sync with its lower counterparts, then we rebuild that object
153     +similarly to how we do so for branch-management.
154     +
155     +While rebuilding Unionfs's objects, we also purge any page mappings and
156     +truncate inode pages (see fs/unionfs/dentry.c:purge_inode_data). This is to
157     +ensure that Unionfs will re-get the newer data from the lower branches. We
158     +perform this purging only if the Unionfs operation in question is a reading
159     +operation; if Unionfs is performing a data writing operation (e.g., ->write,
160     +->commit_write, etc.) then we do NOT flush the lower mappings/pages: this is
161     +because (1) a self-deadlock could occur and (2) the upper Unionfs pages are
162     +considered more authoritative anyway, as they are newer and will overwrite
163     +any lower pages.
164     +
165     +Unionfs maintains the following important invariant regarding mtime's,
166     +ctime's, and atime's: the upper inode object's times are the max() of all of
167     +the lower ones. For non-directory objects, there's only one object below,
168     +so the mapping is simple; for directory objects, there could me multiple
169     +lower objects and we have to sync up with the newest one of all the lower
170     +ones. This invariant is important to maintain, especially for directories
171     +(besides, we need this to be POSIX compliant). A union could comprise
172     +multiple writable branches, each of which could change. If we don't reflect
173     +the newest possible mtime/ctime, some applications could fail. For example,
174     +NFSv2/v3 exports check for newer directory mtimes on the server to determine
175     +if the client-side attribute cache should be purged.
176     +
177     +To maintain these important invariants, of course, Unionfs carefully
178     +synchronizes upper and lower times in various places. For example, if we
179     +copy-up a file to a top-level branch, the parent directory where the file
180     +was copied up to will now have a new mtime: so after a successful copy-up,
181     +we sync up with the new top-level branch's parent directory mtime.
182     +
183     +Implementation:
184     +
185     +This cache-coherency implementation is efficient because it defers any
186     +synchronizing between the upper and lower layers until absolutely needed.
187     +Consider the example a common situation where users perform a lot of lower
188     +changes, such as untarring a whole package. While these take place,
189     +typically the user doesn't access the files via Unionfs; only after the
190     +lower changes are done, does the user try to access the lower files. With
191     +our cache-coherency implementation, the entirety of the changes to the lower
192     +branches will not result in a single CPU cycle spent at the Unionfs level
193     +until the user invokes a system call that goes through Unionfs.
194     +
195     +We have considered two alternate cache-coherency designs. (1) Using the
196     +dentry/inode notify functionality to register interest in finding out about
197     +any lower changes. This is a somewhat limited and also a heavy-handed
198     +approach which could result in many notifications to the Unionfs layer upon
199     +each small change at the lower layer (imagine a file being modified multiple
200     +times in rapid succession). (2) Rewriting the VFS to support explicit
201     +callbacks from lower objects to upper objects. We began exploring such an
202     +implementation, but found it to be very complicated--it would have resulted
203     +in massive VFS/MM changes which are unlikely to be accepted by the LKML
204     +community. We therefore believe that our current cache-coherency design and
205     +implementation represent the best approach at this time.
206     +
207     +Limitations:
208     +
209     +Our implementation works in that as long as a user process will have caused
210     +Unionfs to be called, directly or indirectly, even to just do
211     +->d_revalidate; then we will have purged the current Unionfs data and the
212     +process will see the new data. For example, a process that continually
213     +re-reads the same file's data will see the NEW data as soon as the lower
214     +file had changed, upon the next read(2) syscall (even if the file is still
215     +open!) However, this doesn't work when the process re-reads the open file's
216     +data via mmap(2) (unless the user unmaps/closes the file and remaps/reopens
217     +it). Once we respond to ->readpage(s), then the kernel maps the page into
218     +the process's address space and there doesn't appear to be a way to force
219     +the kernel to invalidate those pages/mappings, and force the process to
220     +re-issue ->readpage. If there's a way to invalidate active mappings and
221     +force a ->readpage, let us know please (invalidate_inode_pages2 doesn't do
222     +the trick).
223     +
224     +Our current Unionfs code has to perform many file-revalidation calls. It
225     +would be really nice if the VFS would export an optional file system hook
226     +->file_revalidate (similarly to dentry->d_revalidate) that will be called
227     +before each VFS op that has a "struct file" in it.
228     +
229     +Certain file systems have micro-second granularity (or better) for inode
230     +times, and asynchronous actions could cause those times to change with some
231     +small delay. In such cases, Unionfs may see a changed inode time that only
232     +differs by a tiny fraction of a second: such a change may be a false
233     +positive indication that the lower object has changed, whereas if unionfs
234     +waits a little longer, that false indication will not be seen. (These false
235     +positives are harmless, because they would at most cause unionfs to
236     +re-validate an object that may need no revalidation, and print a debugging
237     +message that clutters the console/logs.) Therefore, to minimize the chances
238     +of these situations, we delay the detection of changed times by a small
239     +factor of a few seconds, called UNIONFS_MIN_CC_TIME (which defaults to 3
240     +seconds, as does NFS). This means that we will detect the change, only a
241     +couple of seconds later, if indeed the time change persists in the lower
242     +file object. This delayed detection has an added performance benefit: we
243     +reduce the number of times that unionfs has to revalidate objects, in case
244     +there's a lot of concurrent activity on both the upper and lower objects,
245     +for the same file(s). Lastly, this delayed time attribute detection is
246     +similar to how NFS clients operate (e.g., acregmin).
247     +
248     +For more information, see <http://unionfs.filesystems.org/>.
249     diff --git a/Documentation/filesystems/unionfs/issues.txt b/Documentation/filesystems/unionfs/issues.txt
250     new file mode 100644
251     index 0000000..f4b7e7e
252     --- /dev/null
253     +++ b/Documentation/filesystems/unionfs/issues.txt
254     @@ -0,0 +1,28 @@
255     +KNOWN Unionfs 2.x ISSUES:
256     +=========================
257     +
258     +1. Unionfs should not use lookup_one_len() on the underlying f/s as it
259     + confuses NFSv4. Currently, unionfs_lookup() passes lookup intents to the
260     + lower file-system, this eliminates part of the problem. The remaining
261     + calls to lookup_one_len may need to be changed to pass an intent. We are
262     + currently introducing VFS changes to fs/namei.c's do_path_lookup() to
263     + allow proper file lookup and opening in stackable file systems.
264     +
265     +2. Lockdep (a debugging feature) isn't aware of stacking, and so it
266     + incorrectly complains about locking problems. The problem boils down to
267     + this: Lockdep considers all objects of a certain type to be in the same
268     + class, for example, all inodes. Lockdep doesn't like to see a lock held
269     + on two inodes within the same task, and warns that it could lead to a
270     + deadlock. However, stackable file systems do precisely that: they lock
271     + an upper object, and then a lower object, in a strict order to avoid
272     + locking problems; in addition, Unionfs, as a fan-out file system, may
273     + have to lock several lower inodes. We are currently looking into Lockdep
274     + to see how to make it aware of stackable file systems. For now, we
275     + temporarily disable lockdep when calling vfs methods on lower objects,
276     + but only for those places where lockdep complained. While this solution
277     + may seem unclean, it is not without precedent: other places in the kernel
278     + also do similar temporary disabling, of course after carefully having
279     + checked that it is the right thing to do. Anyway, you get any warnings
280     + from Lockdep, please report them to the Unionfs maintainers.
281     +
282     +For more information, see <http://unionfs.filesystems.org/>.
283     diff --git a/Documentation/filesystems/unionfs/rename.txt b/Documentation/filesystems/unionfs/rename.txt
284     new file mode 100644
285     index 0000000..e20bb82
286     --- /dev/null
287     +++ b/Documentation/filesystems/unionfs/rename.txt
288     @@ -0,0 +1,31 @@
289     +Rename is a complex beast. The following table shows which rename(2) operations
290     +should succeed and which should fail.
291     +
292     +o: success
293     +E: error (either unionfs or vfs)
294     +X: EXDEV
295     +
296     +none = file does not exist
297     +file = file is a file
298     +dir = file is a empty directory
299     +child= file is a non-empty directory
300     +wh = file is a directory containing only whiteouts; this makes it logically
301     + empty
302     +
303     + none file dir child wh
304     +file o o E E E
305     +dir o E o E o
306     +child X E X E X
307     +wh o E o E o
308     +
309     +
310     +Renaming directories:
311     +=====================
312     +
313     +Whenever a empty (either physically or logically) directory is being renamed,
314     +the following sequence of events should take place:
315     +
316     +1) Remove whiteouts from both source and destination directory
317     +2) Rename source to destination
318     +3) Make destination opaque to prevent anything under it from showing up
319     +
320     diff --git a/Documentation/filesystems/unionfs/usage.txt b/Documentation/filesystems/unionfs/usage.txt
321     new file mode 100644
322     index 0000000..1adde69
323     --- /dev/null
324     +++ b/Documentation/filesystems/unionfs/usage.txt
325     @@ -0,0 +1,134 @@
326     +Unionfs is a stackable unification file system, which can appear to merge
327     +the contents of several directories (branches), while keeping their physical
328     +content separate. Unionfs is useful for unified source tree management,
329     +merged contents of split CD-ROM, merged separate software package
330     +directories, data grids, and more. Unionfs allows any mix of read-only and
331     +read-write branches, as well as insertion and deletion of branches anywhere
332     +in the fan-out. To maintain Unix semantics, Unionfs handles elimination of
333     +duplicates, partial-error conditions, and more.
334     +
335     +GENERAL SYNTAX
336     +==============
337     +
338     +# mount -t unionfs -o <OPTIONS>,<BRANCH-OPTIONS> none MOUNTPOINT
339     +
340     +OPTIONS can be any legal combination of:
341     +
342     +- ro # mount file system read-only
343     +- rw # mount file system read-write
344     +- remount # remount the file system (see Branch Management below)
345     +- incgen # increment generation no. (see Cache Consistency below)
346     +
347     +BRANCH-OPTIONS can be either (1) a list of branches given to the "dirs="
348     +option, or (2) a list of individual branch manipulation commands, combined
349     +with the "remount" option, and is further described in the "Branch
350     +Management" section below.
351     +
352     +The syntax for the "dirs=" mount option is:
353     +
354     + dirs=branch[=ro|=rw][:...]
355     +
356     +The "dirs=" option takes a colon-delimited list of directories to compose
357     +the union, with an optional branch mode for each of those directories.
358     +Directories that come earlier (specified first, on the left) in the list
359     +have a higher precedence than those which come later. Additionally,
360     +read-only or read-write permissions of the branch can be specified by
361     +appending =ro or =rw (default) to each directory. See the Copyup section in
362     +concepts.txt, for a description of Unionfs's behavior when mixing read-only
363     +and read-write branches and mounts.
364     +
365     +Syntax:
366     +
367     + dirs=/branch1[=ro|=rw]:/branch2[=ro|=rw]:...:/branchN[=ro|=rw]
368     +
369     +Example:
370     +
371     + dirs=/writable_branch=rw:/read-only_branch=ro
372     +
373     +
374     +BRANCH MANAGEMENT
375     +=================
376     +
377     +Once you mount your union for the first time, using the "dirs=" option, you
378     +can then change the union's overall mode or reconfigure the branches, using
379     +the remount option, as follows.
380     +
381     +To downgrade a union from read-write to read-only:
382     +
383     +# mount -t unionfs -o remount,ro none MOUNTPOINT
384     +
385     +To upgrade a union from read-only to read-write:
386     +
387     +# mount -t unionfs -o remount,rw none MOUNTPOINT
388     +
389     +To delete a branch /foo, regardless where it is in the current union:
390     +
391     +# mount -t unionfs -o remount,del=/foo none MOUNTPOINT
392     +
393     +To insert (add) a branch /foo before /bar:
394     +
395     +# mount -t unionfs -o remount,add=/bar:/foo none MOUNTPOINT
396     +
397     +To insert (add) a branch /foo (with the "rw" mode flag) before /bar:
398     +
399     +# mount -t unionfs -o remount,add=/bar:/foo=rw none MOUNTPOINT
400     +
401     +To insert (add) a branch /foo (in "rw" mode) at the very beginning (i.e., a
402     +new highest-priority branch), you can use the above syntax, or use a short
403     +hand version as follows:
404     +
405     +# mount -t unionfs -o remount,add=/foo none MOUNTPOINT
406     +
407     +To append a branch to the very end (new lowest-priority branch):
408     +
409     +# mount -t unionfs -o remount,add=:/foo none MOUNTPOINT
410     +
411     +To append a branch to the very end (new lowest-priority branch), in
412     +read-only mode:
413     +
414     +# mount -t unionfs -o remount,add=:/foo=ro none MOUNTPOINT
415     +
416     +Finally, to change the mode of one existing branch, say /foo, from read-only
417     +to read-write, and change /bar from read-write to read-only:
418     +
419     +# mount -t unionfs -o remount,mode=/foo=rw,mode=/bar=ro none MOUNTPOINT
420     +
421     +Note: in Unionfs 2.x, you cannot set the leftmost branch to readonly because
422     +then Unionfs won't have any writable place for copyups to take place.
423     +Moreover, the VFS can get confused when it tries to modify something in a
424     +file system mounted read-write, but isn't permitted to write to it.
425     +Instead, you should set the whole union as readonly, as described above.
426     +If, however, you must set the leftmost branch as readonly, perhaps so you
427     +can get a snapshot of it at a point in time, then you should insert a new
428     +writable top-level branch, and mark the one you want as readonly. This can
429     +be accomplished as follows, assuming that /foo is your current leftmost
430     +branch:
431     +
432     +# mount -t tmpfs -o size=NNN /new
433     +# mount -t unionfs -o remount,add=/new,mode=/foo=ro none MOUNTPOINT
434     +<do what you want safely in /foo>
435     +# mount -t unionfs -o remount,del=/new,mode=/foo=rw none MOUNTPOINT
436     +<check if there's anything in /new you want to preserve>
437     +# umount /new
438     +
439     +CACHE CONSISTENCY
440     +=================
441     +
442     +If you modify any file on any of the lower branches directly, while there is
443     +a Unionfs 2.x mounted above any of those branches, you should tell Unionfs
444     +to purge its caches and re-get the objects. To do that, you have to
445     +increment the generation number of the superblock using the following
446     +command:
447     +
448     +# mount -t unionfs -o remount,incgen none MOUNTPOINT
449     +
450     +Note that the older way of incrementing the generation number using an
451     +ioctl, is no longer supported in Unionfs 2.0 and newer. Ioctls in general
452     +are not encouraged. Plus, an ioctl is per-file concept, whereas the
453     +generation number is a per-file-system concept. Worse, such an ioctl
454     +requires an open file, which then has to be invalidated by the very nature
455     +of the generation number increase (read: the old generation increase ioctl
456     +was pretty racy).
457     +
458     +
459     +For more information, see <http://unionfs.filesystems.org/>.
460     diff --git a/MAINTAINERS b/MAINTAINERS
461     index 2340cfb..19cbc87 100644
462     --- a/MAINTAINERS
463     +++ b/MAINTAINERS
464     @@ -3812,6 +3812,15 @@ L: linux-kernel@vger.kernel.org
465     W: http://www.kernel.dk
466     S: Maintained
467    
468     +UNIONFS
469     +P: Erez Zadok
470     +M: ezk@cs.sunysb.edu
471     +P: Josef "Jeff" Sipek
472     +M: jsipek@cs.sunysb.edu
473     +L: unionfs@filesystems.org
474     +W: http://unionfs.filesystems.org
475     +S: Maintained
476     +
477     USB ACM DRIVER
478     P: Oliver Neukum
479     M: oliver@neukum.name
480     diff --git a/fs/Kconfig b/fs/Kconfig
481     index 781b47d..7941db2 100644
482     --- a/fs/Kconfig
483     +++ b/fs/Kconfig
484     @@ -1041,6 +1041,47 @@ config CONFIGFS_FS
485    
486     endmenu
487    
488     +menu "Layered filesystems"
489     +
490     +config ECRYPT_FS
491     + tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
492     + depends on EXPERIMENTAL && KEYS && CRYPTO && NET
493     + help
494     + Encrypted filesystem that operates on the VFS layer. See
495     + <file:Documentation/filesystems/ecryptfs.txt> to learn more about
496     + eCryptfs. Userspace components are required and can be
497     + obtained from <http://ecryptfs.sf.net>.
498     +
499     + To compile this file system support as a module, choose M here: the
500     + module will be called ecryptfs.
501     +
502     +config UNION_FS
503     + tristate "Union file system (EXPERIMENTAL)"
504     + depends on EXPERIMENTAL
505     + help
506     + Unionfs is a stackable unification file system, which appears to
507     + merge the contents of several directories (branches), while keeping
508     + their physical content separate.
509     +
510     + See <http://unionfs.filesystems.org> for details
511     +
512     +config UNION_FS_XATTR
513     + bool "Unionfs extended attributes"
514     + depends on UNION_FS
515     + help
516     + Extended attributes are name:value pairs associated with inodes by
517     + the kernel or by users (see the attr(5) manual page).
518     +
519     + If unsure, say N.
520     +
521     +config UNION_FS_DEBUG
522     + bool "Debug Unionfs"
523     + depends on UNION_FS
524     + help
525     + If you say Y here, you can turn on debugging output from Unionfs.
526     +
527     +endmenu
528     +
529     menu "Miscellaneous filesystems"
530    
531     config ADFS_FS
532     @@ -1093,18 +1134,6 @@ config AFFS_FS
533     To compile this file system support as a module, choose M here: the
534     module will be called affs. If unsure, say N.
535    
536     -config ECRYPT_FS
537     - tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
538     - depends on EXPERIMENTAL && KEYS && CRYPTO && NET
539     - help
540     - Encrypted filesystem that operates on the VFS layer. See
541     - <file:Documentation/filesystems/ecryptfs.txt> to learn more about
542     - eCryptfs. Userspace components are required and can be
543     - obtained from <http://ecryptfs.sf.net>.
544     -
545     - To compile this file system support as a module, choose M here: the
546     - module will be called ecryptfs.
547     -
548     config HFS_FS
549     tristate "Apple Macintosh file system support (EXPERIMENTAL)"
550     depends on BLOCK && EXPERIMENTAL
551     diff --git a/fs/Makefile b/fs/Makefile
552     index 500cf15..e202288 100644
553     --- a/fs/Makefile
554     +++ b/fs/Makefile
555     @@ -118,3 +118,4 @@ obj-$(CONFIG_HPPFS) += hppfs/
556     obj-$(CONFIG_DEBUG_FS) += debugfs/
557     obj-$(CONFIG_OCFS2_FS) += ocfs2/
558     obj-$(CONFIG_GFS2_FS) += gfs2/
559     +obj-$(CONFIG_UNION_FS) += unionfs/
560     diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
561     index cb20b96..a8c1686 100644
562     --- a/fs/ecryptfs/dentry.c
563     +++ b/fs/ecryptfs/dentry.c
564     @@ -62,7 +62,7 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
565     struct inode *lower_inode =
566     ecryptfs_inode_to_lower(dentry->d_inode);
567    
568     - fsstack_copy_attr_all(dentry->d_inode, lower_inode, NULL);
569     + fsstack_copy_attr_all(dentry->d_inode, lower_inode);
570     }
571     out:
572     return rc;
573     diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
574     index 5a71918..89e8560 100644
575     --- a/fs/ecryptfs/inode.c
576     +++ b/fs/ecryptfs/inode.c
577     @@ -576,9 +576,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
578     lower_new_dir_dentry->d_inode, lower_new_dentry);
579     if (rc)
580     goto out_lock;
581     - fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL);
582     + fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
583     if (new_dir != old_dir)
584     - fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode, NULL);
585     + fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
586     out_lock:
587     unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
588     dput(lower_new_dentry->d_parent);
589     @@ -912,7 +912,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
590    
591     rc = notify_change(lower_dentry, ia);
592     out:
593     - fsstack_copy_attr_all(inode, lower_inode, NULL);
594     + fsstack_copy_attr_all(inode, lower_inode);
595     return rc;
596     }
597    
598     diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
599     index e5580bc..6276cdf 100644
600     --- a/fs/ecryptfs/main.c
601     +++ b/fs/ecryptfs/main.c
602     @@ -211,7 +211,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
603     d_add(dentry, inode);
604     else
605     d_instantiate(dentry, inode);
606     - fsstack_copy_attr_all(inode, lower_inode, NULL);
607     + fsstack_copy_attr_all(inode, lower_inode);
608     /* This size will be overwritten for real files w/ headers and
609     * other metadata */
610     fsstack_copy_inode_size(inode, lower_inode);
611     diff --git a/fs/namei.c b/fs/namei.c
612     index 73e2e66..784ef51 100644
613     --- a/fs/namei.c
614     +++ b/fs/namei.c
615     @@ -389,6 +389,7 @@ void release_open_intent(struct nameidata *nd)
616     else
617     fput(nd->intent.open.file);
618     }
619     +EXPORT_SYMBOL(release_open_intent);
620    
621     static inline struct dentry *
622     do_revalidate(struct dentry *dentry, struct nameidata *nd)
623     diff --git a/fs/stack.c b/fs/stack.c
624     index 67716f6..4336f2b 100644
625     --- a/fs/stack.c
626     +++ b/fs/stack.c
627     @@ -1,24 +1,42 @@
628     +/*
629     + * Copyright (c) 2006-2007 Erez Zadok
630     + * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
631     + * Copyright (c) 2006-2007 Stony Brook University
632     + * Copyright (c) 2006-2007 The Research Foundation of SUNY
633     + *
634     + * This program is free software; you can redistribute it and/or modify
635     + * it under the terms of the GNU General Public License version 2 as
636     + * published by the Free Software Foundation.
637     + */
638     +
639     #include <linux/module.h>
640     #include <linux/fs.h>
641     #include <linux/fs_stack.h>
642    
643     -/* does _NOT_ require i_mutex to be held.
644     +/*
645     + * does _NOT_ require i_mutex to be held.
646     *
647     * This function cannot be inlined since i_size_{read,write} is rather
648     * heavy-weight on 32-bit systems
649     */
650     void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
651     {
652     - i_size_write(dst, i_size_read((struct inode *)src));
653     +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
654     + spin_lock(&dst->i_lock);
655     +#endif
656     + i_size_write(dst, i_size_read(src));
657     dst->i_blocks = src->i_blocks;
658     +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
659     + spin_unlock(&dst->i_lock);
660     +#endif
661     }
662     EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
663    
664     -/* copy all attributes; get_nlinks is optional way to override the i_nlink
665     +/*
666     + * copy all attributes; get_nlinks is optional way to override the i_nlink
667     * copying
668     */
669     -void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
670     - int (*get_nlinks)(struct inode *))
671     +void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
672     {
673     dest->i_mode = src->i_mode;
674     dest->i_uid = src->i_uid;
675     @@ -29,14 +47,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
676     dest->i_ctime = src->i_ctime;
677     dest->i_blkbits = src->i_blkbits;
678     dest->i_flags = src->i_flags;
679     -
680     - /*
681     - * Update the nlinks AFTER updating the above fields, because the
682     - * get_links callback may depend on them.
683     - */
684     - if (!get_nlinks)
685     - dest->i_nlink = src->i_nlink;
686     - else
687     - dest->i_nlink = (*get_nlinks)(dest);
688     + dest->i_nlink = src->i_nlink;
689     }
690     EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
691     diff --git a/fs/unionfs/Makefile b/fs/unionfs/Makefile
692     new file mode 100644
693     index 0000000..92b281b
694     --- /dev/null
695     +++ b/fs/unionfs/Makefile
696     @@ -0,0 +1,17 @@
697     +UNIONFS_VERSION="2.2.3 (for 2.6.24)"
698     +
699     +EXTRA_CFLAGS += -DUNIONFS_VERSION=\"$(UNIONFS_VERSION)\"
700     +
701     +obj-$(CONFIG_UNION_FS) += unionfs.o
702     +
703     +unionfs-y := subr.o dentry.o file.o inode.o main.o super.o \
704     + rdstate.o copyup.o dirhelper.o rename.o unlink.o \
705     + lookup.o commonfops.o dirfops.o sioq.o mmap.o
706     +
707     +unionfs-$(CONFIG_UNION_FS_XATTR) += xattr.o
708     +
709     +unionfs-$(CONFIG_UNION_FS_DEBUG) += debug.o
710     +
711     +ifeq ($(CONFIG_UNION_FS_DEBUG),y)
712     +EXTRA_CFLAGS += -DDEBUG
713     +endif
714     diff --git a/fs/unionfs/commonfops.c b/fs/unionfs/commonfops.c
715     new file mode 100644
716     index 0000000..f37192f
717     --- /dev/null
718     +++ b/fs/unionfs/commonfops.c
719     @@ -0,0 +1,835 @@
720     +/*
721     + * Copyright (c) 2003-2007 Erez Zadok
722     + * Copyright (c) 2003-2006 Charles P. Wright
723     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
724     + * Copyright (c) 2005-2006 Junjiro Okajima
725     + * Copyright (c) 2005 Arun M. Krishnakumar
726     + * Copyright (c) 2004-2006 David P. Quigley
727     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
728     + * Copyright (c) 2003 Puja Gupta
729     + * Copyright (c) 2003 Harikesavan Krishnan
730     + * Copyright (c) 2003-2007 Stony Brook University
731     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
732     + *
733     + * This program is free software; you can redistribute it and/or modify
734     + * it under the terms of the GNU General Public License version 2 as
735     + * published by the Free Software Foundation.
736     + */
737     +
738     +#include "union.h"
739     +
740     +/*
741     + * 1) Copyup the file
742     + * 2) Rename the file to '.unionfs<original inode#><counter>' - obviously
743     + * stolen from NFS's silly rename
744     + */
745     +static int copyup_deleted_file(struct file *file, struct dentry *dentry,
746     + int bstart, int bindex)
747     +{
748     + static unsigned int counter;
749     + const int i_inosize = sizeof(dentry->d_inode->i_ino) * 2;
750     + const int countersize = sizeof(counter) * 2;
751     + const int nlen = sizeof(".unionfs") + i_inosize + countersize - 1;
752     + char name[nlen + 1];
753     + int err;
754     + struct dentry *tmp_dentry = NULL;
755     + struct dentry *lower_dentry;
756     + struct dentry *lower_dir_dentry = NULL;
757     +
758     + lower_dentry = unionfs_lower_dentry_idx(dentry, bstart);
759     +
760     + sprintf(name, ".unionfs%*.*lx",
761     + i_inosize, i_inosize, lower_dentry->d_inode->i_ino);
762     +
763     + /*
764     + * Loop, looking for an unused temp name to copyup to.
765     + *
766     + * It's somewhat silly that we look for a free temp tmp name in the
767     + * source branch (bstart) instead of the dest branch (bindex), where
768     + * the final name will be created. We _will_ catch it if somehow
769     + * the name exists in the dest branch, but it'd be nice to catch it
770     + * sooner than later.
771     + */
772     +retry:
773     + tmp_dentry = NULL;
774     + do {
775     + char *suffix = name + nlen - countersize;
776     +
777     + dput(tmp_dentry);
778     + counter++;
779     + sprintf(suffix, "%*.*x", countersize, countersize, counter);
780     +
781     + pr_debug("unionfs: trying to rename %s to %s\n",
782     + dentry->d_name.name, name);
783     +
784     + tmp_dentry = lookup_one_len(name, lower_dentry->d_parent,
785     + nlen);
786     + if (IS_ERR(tmp_dentry)) {
787     + err = PTR_ERR(tmp_dentry);
788     + goto out;
789     + }
790     + } while (tmp_dentry->d_inode != NULL); /* need negative dentry */
791     + dput(tmp_dentry);
792     +
793     + err = copyup_named_file(dentry->d_parent->d_inode, file, name, bstart,
794     + bindex,
795     + i_size_read(file->f_path.dentry->d_inode));
796     + if (err) {
797     + if (unlikely(err == -EEXIST))
798     + goto retry;
799     + goto out;
800     + }
801     +
802     + /* bring it to the same state as an unlinked file */
803     + lower_dentry = unionfs_lower_dentry_idx(dentry, dbstart(dentry));
804     + if (!unionfs_lower_inode_idx(dentry->d_inode, bindex)) {
805     + atomic_inc(&lower_dentry->d_inode->i_count);
806     + unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
807     + lower_dentry->d_inode);
808     + }
809     + lower_dir_dentry = lock_parent(lower_dentry);
810     + err = vfs_unlink(lower_dir_dentry->d_inode, lower_dentry);
811     + unlock_dir(lower_dir_dentry);
812     +
813     +out:
814     + if (!err)
815     + unionfs_check_dentry(dentry);
816     + return err;
817     +}
818     +
819     +/*
820     + * put all references held by upper struct file and free lower file pointer
821     + * array
822     + */
823     +static void cleanup_file(struct file *file)
824     +{
825     + int bindex, bstart, bend;
826     + struct file **lower_files;
827     + struct file *lower_file;
828     + struct super_block *sb = file->f_path.dentry->d_sb;
829     +
830     + lower_files = UNIONFS_F(file)->lower_files;
831     + bstart = fbstart(file);
832     + bend = fbend(file);
833     +
834     + for (bindex = bstart; bindex <= bend; bindex++) {
835     + int i; /* holds (possibly) updated branch index */
836     + int old_bid;
837     +
838     + lower_file = unionfs_lower_file_idx(file, bindex);
839     + if (!lower_file)
840     + continue;
841     +
842     + /*
843     + * Find new index of matching branch with an open
844     + * file, since branches could have been added or
845     + * deleted causing the one with open files to shift.
846     + */
847     + old_bid = UNIONFS_F(file)->saved_branch_ids[bindex];
848     + i = branch_id_to_idx(sb, old_bid);
849     + if (unlikely(i < 0)) {
850     + printk(KERN_ERR "unionfs: no superblock for "
851     + "file %p\n", file);
852     + continue;
853     + }
854     +
855     + /* decrement count of open files */
856     + branchput(sb, i);
857     + /*
858     + * fput will perform an mntput for us on the correct branch.
859     + * Although we're using the file's old branch configuration,
860     + * bindex, which is the old index, correctly points to the
861     + * right branch in the file's branch list. In other words,
862     + * we're going to mntput the correct branch even if branches
863     + * have been added/removed.
864     + */
865     + fput(lower_file);
866     + UNIONFS_F(file)->lower_files[bindex] = NULL;
867     + UNIONFS_F(file)->saved_branch_ids[bindex] = -1;
868     + }
869     +
870     + UNIONFS_F(file)->lower_files = NULL;
871     + kfree(lower_files);
872     + kfree(UNIONFS_F(file)->saved_branch_ids);
873     + /* set to NULL because caller needs to know if to kfree on error */
874     + UNIONFS_F(file)->saved_branch_ids = NULL;
875     +}
876     +
877     +/* open all lower files for a given file */
878     +static int open_all_files(struct file *file)
879     +{
880     + int bindex, bstart, bend, err = 0;
881     + struct file *lower_file;
882     + struct dentry *lower_dentry;
883     + struct dentry *dentry = file->f_path.dentry;
884     + struct super_block *sb = dentry->d_sb;
885     +
886     + bstart = dbstart(dentry);
887     + bend = dbend(dentry);
888     +
889     + for (bindex = bstart; bindex <= bend; bindex++) {
890     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
891     + if (!lower_dentry)
892     + continue;
893     +
894     + dget(lower_dentry);
895     + unionfs_mntget(dentry, bindex);
896     + branchget(sb, bindex);
897     +
898     + lower_file =
899     + dentry_open(lower_dentry,
900     + unionfs_lower_mnt_idx(dentry, bindex),
901     + file->f_flags);
902     + if (IS_ERR(lower_file)) {
903     + err = PTR_ERR(lower_file);
904     + goto out;
905     + } else {
906     + unionfs_set_lower_file_idx(file, bindex, lower_file);
907     + }
908     + }
909     +out:
910     + return err;
911     +}
912     +
913     +/* open the highest priority file for a given upper file */
914     +static int open_highest_file(struct file *file, bool willwrite)
915     +{
916     + int bindex, bstart, bend, err = 0;
917     + struct file *lower_file;
918     + struct dentry *lower_dentry;
919     + struct dentry *dentry = file->f_path.dentry;
920     + struct inode *parent_inode = dentry->d_parent->d_inode;
921     + struct super_block *sb = dentry->d_sb;
922     +
923     + bstart = dbstart(dentry);
924     + bend = dbend(dentry);
925     +
926     + lower_dentry = unionfs_lower_dentry(dentry);
927     + if (willwrite && IS_WRITE_FLAG(file->f_flags) && is_robranch(dentry)) {
928     + for (bindex = bstart - 1; bindex >= 0; bindex--) {
929     + err = copyup_file(parent_inode, file, bstart, bindex,
930     + i_size_read(dentry->d_inode));
931     + if (!err)
932     + break;
933     + }
934     + atomic_set(&UNIONFS_F(file)->generation,
935     + atomic_read(&UNIONFS_I(dentry->d_inode)->
936     + generation));
937     + goto out;
938     + }
939     +
940     + dget(lower_dentry);
941     + unionfs_mntget(dentry, bstart);
942     + lower_file = dentry_open(lower_dentry,
943     + unionfs_lower_mnt_idx(dentry, bstart),
944     + file->f_flags);
945     + if (IS_ERR(lower_file)) {
946     + err = PTR_ERR(lower_file);
947     + goto out;
948     + }
949     + branchget(sb, bstart);
950     + unionfs_set_lower_file(file, lower_file);
951     + /* Fix up the position. */
952     + lower_file->f_pos = file->f_pos;
953     +
954     + memcpy(&lower_file->f_ra, &file->f_ra, sizeof(struct file_ra_state));
955     +out:
956     + return err;
957     +}
958     +
959     +/* perform a delayed copyup of a read-write file on a read-only branch */
960     +static int do_delayed_copyup(struct file *file)
961     +{
962     + int bindex, bstart, bend, err = 0;
963     + struct dentry *dentry = file->f_path.dentry;
964     + struct inode *parent_inode = dentry->d_parent->d_inode;
965     +
966     + bstart = fbstart(file);
967     + bend = fbend(file);
968     +
969     + BUG_ON(!S_ISREG(dentry->d_inode->i_mode));
970     +
971     + unionfs_check_file(file);
972     + unionfs_check_dentry(dentry);
973     + for (bindex = bstart - 1; bindex >= 0; bindex--) {
974     + if (!d_deleted(dentry))
975     + err = copyup_file(parent_inode, file, bstart,
976     + bindex,
977     + i_size_read(dentry->d_inode));
978     + else
979     + err = copyup_deleted_file(file, dentry, bstart,
980     + bindex);
981     +
982     + if (!err)
983     + break;
984     + }
985     + if (err || (bstart <= fbstart(file)))
986     + goto out;
987     + bend = fbend(file);
988     + for (bindex = bstart; bindex <= bend; bindex++) {
989     + if (unionfs_lower_file_idx(file, bindex)) {
990     + branchput(dentry->d_sb, bindex);
991     + fput(unionfs_lower_file_idx(file, bindex));
992     + unionfs_set_lower_file_idx(file, bindex, NULL);
993     + }
994     + if (unionfs_lower_mnt_idx(dentry, bindex)) {
995     + unionfs_mntput(dentry, bindex);
996     + unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
997     + }
998     + if (unionfs_lower_dentry_idx(dentry, bindex)) {
999     + BUG_ON(!dentry->d_inode);
1000     + iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
1001     + unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
1002     + NULL);
1003     + dput(unionfs_lower_dentry_idx(dentry, bindex));
1004     + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
1005     + }
1006     + }
1007     + /* for reg file, we only open it "once" */
1008     + fbend(file) = fbstart(file);
1009     + set_dbend(dentry, dbstart(dentry));
1010     + ibend(dentry->d_inode) = ibstart(dentry->d_inode);
1011     +
1012     +out:
1013     + unionfs_check_file(file);
1014     + unionfs_check_dentry(dentry);
1015     + return err;
1016     +}
1017     +
1018     +/*
1019     + * Revalidate the struct file
1020     + * @file: file to revalidate
1021     + * @willwrite: true if caller may cause changes to the file; false otherwise.
1022     + */
1023     +int unionfs_file_revalidate(struct file *file, bool willwrite)
1024     +{
1025     + struct super_block *sb;
1026     + struct dentry *dentry;
1027     + int sbgen, fgen, dgen;
1028     + int bstart, bend;
1029     + int size;
1030     + int err = 0;
1031     +
1032     + dentry = file->f_path.dentry;
1033     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1034     + sb = dentry->d_sb;
1035     +
1036     + /*
1037     + * First revalidate the dentry inside struct file,
1038     + * but not unhashed dentries.
1039     + */
1040     +reval_dentry:
1041     + if (unlikely(!d_deleted(dentry) &&
1042     + !__unionfs_d_revalidate_chain(dentry, NULL, willwrite))) {
1043     + err = -ESTALE;
1044     + goto out_nofree;
1045     + }
1046     +
1047     + sbgen = atomic_read(&UNIONFS_SB(sb)->generation);
1048     + dgen = atomic_read(&UNIONFS_D(dentry)->generation);
1049     + fgen = atomic_read(&UNIONFS_F(file)->generation);
1050     +
1051     + if (unlikely(sbgen > dgen)) {
1052     + pr_debug("unionfs: retry dentry revalidation\n");
1053     + schedule();
1054     + goto reval_dentry;
1055     + }
1056     + BUG_ON(sbgen > dgen);
1057     +
1058     + /*
1059     + * There are two cases we are interested in. The first is if the
1060     + * generation is lower than the super-block. The second is if
1061     + * someone has copied up this file from underneath us, we also need
1062     + * to refresh things.
1063     + */
1064     + if (unlikely(!d_deleted(dentry) &&
1065     + (sbgen > fgen || dbstart(dentry) != fbstart(file)))) {
1066     + /* save orig branch ID */
1067     + int orig_brid =
1068     + UNIONFS_F(file)->saved_branch_ids[fbstart(file)];
1069     +
1070     + /* First we throw out the existing files. */
1071     + cleanup_file(file);
1072     +
1073     + /* Now we reopen the file(s) as in unionfs_open. */
1074     + bstart = fbstart(file) = dbstart(dentry);
1075     + bend = fbend(file) = dbend(dentry);
1076     +
1077     + size = sizeof(struct file *) * sbmax(sb);
1078     + UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL);
1079     + if (unlikely(!UNIONFS_F(file)->lower_files)) {
1080     + err = -ENOMEM;
1081     + goto out;
1082     + }
1083     + size = sizeof(int) * sbmax(sb);
1084     + UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL);
1085     + if (unlikely(!UNIONFS_F(file)->saved_branch_ids)) {
1086     + err = -ENOMEM;
1087     + goto out;
1088     + }
1089     +
1090     + if (S_ISDIR(dentry->d_inode->i_mode)) {
1091     + /* We need to open all the files. */
1092     + err = open_all_files(file);
1093     + if (err)
1094     + goto out;
1095     + } else {
1096     + int new_brid;
1097     + /* We only open the highest priority branch. */
1098     + err = open_highest_file(file, willwrite);
1099     + if (err)
1100     + goto out;
1101     + new_brid = UNIONFS_F(file)->
1102     + saved_branch_ids[fbstart(file)];
1103     + if (unlikely(new_brid != orig_brid && sbgen > fgen)) {
1104     + /*
1105     + * If we re-opened the file on a different
1106     + * branch than the original one, and this
1107     + * was due to a new branch inserted, then
1108     + * update the mnt counts of the old and new
1109     + * branches accordingly.
1110     + */
1111     + unionfs_mntget(dentry, bstart);
1112     + unionfs_mntput(sb->s_root,
1113     + branch_id_to_idx(sb, orig_brid));
1114     + }
1115     + }
1116     + atomic_set(&UNIONFS_F(file)->generation,
1117     + atomic_read(
1118     + &UNIONFS_I(dentry->d_inode)->generation));
1119     + }
1120     +
1121     + /* Copyup on the first write to a file on a readonly branch. */
1122     + if (willwrite && IS_WRITE_FLAG(file->f_flags) &&
1123     + !IS_WRITE_FLAG(unionfs_lower_file(file)->f_flags) &&
1124     + is_robranch(dentry)) {
1125     + pr_debug("unionfs: do delay copyup of \"%s\"\n",
1126     + dentry->d_name.name);
1127     + err = do_delayed_copyup(file);
1128     + }
1129     +
1130     +out:
1131     + if (err) {
1132     + kfree(UNIONFS_F(file)->lower_files);
1133     + kfree(UNIONFS_F(file)->saved_branch_ids);
1134     + }
1135     +out_nofree:
1136     + if (!err)
1137     + unionfs_check_file(file);
1138     + unionfs_unlock_dentry(dentry);
1139     + return err;
1140     +}
1141     +
1142     +/* unionfs_open helper function: open a directory */
1143     +static int __open_dir(struct inode *inode, struct file *file)
1144     +{
1145     + struct dentry *lower_dentry;
1146     + struct file *lower_file;
1147     + int bindex, bstart, bend;
1148     + struct vfsmount *mnt;
1149     +
1150     + bstart = fbstart(file) = dbstart(file->f_path.dentry);
1151     + bend = fbend(file) = dbend(file->f_path.dentry);
1152     +
1153     + for (bindex = bstart; bindex <= bend; bindex++) {
1154     + lower_dentry =
1155     + unionfs_lower_dentry_idx(file->f_path.dentry, bindex);
1156     + if (!lower_dentry)
1157     + continue;
1158     +
1159     + dget(lower_dentry);
1160     + unionfs_mntget(file->f_path.dentry, bindex);
1161     + mnt = unionfs_lower_mnt_idx(file->f_path.dentry, bindex);
1162     + lower_file = dentry_open(lower_dentry, mnt, file->f_flags);
1163     + if (IS_ERR(lower_file))
1164     + return PTR_ERR(lower_file);
1165     +
1166     + unionfs_set_lower_file_idx(file, bindex, lower_file);
1167     +
1168     + /*
1169     + * The branchget goes after the open, because otherwise
1170     + * we would miss the reference on release.
1171     + */
1172     + branchget(inode->i_sb, bindex);
1173     + }
1174     +
1175     + return 0;
1176     +}
1177     +
1178     +/* unionfs_open helper function: open a file */
1179     +static int __open_file(struct inode *inode, struct file *file)
1180     +{
1181     + struct dentry *lower_dentry;
1182     + struct file *lower_file;
1183     + int lower_flags;
1184     + int bindex, bstart, bend;
1185     +
1186     + lower_dentry = unionfs_lower_dentry(file->f_path.dentry);
1187     + lower_flags = file->f_flags;
1188     +
1189     + bstart = fbstart(file) = dbstart(file->f_path.dentry);
1190     + bend = fbend(file) = dbend(file->f_path.dentry);
1191     +
1192     + /*
1193     + * check for the permission for lower file. If the error is
1194     + * COPYUP_ERR, copyup the file.
1195     + */
1196     + if (lower_dentry->d_inode && is_robranch(file->f_path.dentry)) {
1197     + /*
1198     + * if the open will change the file, copy it up otherwise
1199     + * defer it.
1200     + */
1201     + if (lower_flags & O_TRUNC) {
1202     + int size = 0;
1203     + int err = -EROFS;
1204     +
1205     + /* copyup the file */
1206     + for (bindex = bstart - 1; bindex >= 0; bindex--) {
1207     + err = copyup_file(
1208     + file->f_path.dentry->d_parent->d_inode,
1209     + file, bstart, bindex, size);
1210     + if (!err)
1211     + break;
1212     + }
1213     + return err;
1214     + } else {
1215     + lower_flags &= ~(OPEN_WRITE_FLAGS);
1216     + }
1217     + }
1218     +
1219     + dget(lower_dentry);
1220     +
1221     + /*
1222     + * dentry_open will decrement mnt refcnt if err.
1223     + * otherwise fput() will do an mntput() for us upon file close.
1224     + */
1225     + unionfs_mntget(file->f_path.dentry, bstart);
1226     + lower_file =
1227     + dentry_open(lower_dentry,
1228     + unionfs_lower_mnt_idx(file->f_path.dentry, bstart),
1229     + lower_flags);
1230     + if (IS_ERR(lower_file))
1231     + return PTR_ERR(lower_file);
1232     +
1233     + unionfs_set_lower_file(file, lower_file);
1234     + branchget(inode->i_sb, bstart);
1235     +
1236     + return 0;
1237     +}
1238     +
1239     +int unionfs_open(struct inode *inode, struct file *file)
1240     +{
1241     + int err = 0;
1242     + struct file *lower_file = NULL;
1243     + struct dentry *dentry = NULL;
1244     + int bindex = 0, bstart = 0, bend = 0;
1245     + int size;
1246     +
1247     + unionfs_read_lock(inode->i_sb, UNIONFS_SMUTEX_PARENT);
1248     +
1249     + file->private_data =
1250     + kzalloc(sizeof(struct unionfs_file_info), GFP_KERNEL);
1251     + if (unlikely(!UNIONFS_F(file))) {
1252     + err = -ENOMEM;
1253     + goto out_nofree;
1254     + }
1255     + fbstart(file) = -1;
1256     + fbend(file) = -1;
1257     + atomic_set(&UNIONFS_F(file)->generation,
1258     + atomic_read(&UNIONFS_I(inode)->generation));
1259     +
1260     + size = sizeof(struct file *) * sbmax(inode->i_sb);
1261     + UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL);
1262     + if (unlikely(!UNIONFS_F(file)->lower_files)) {
1263     + err = -ENOMEM;
1264     + goto out;
1265     + }
1266     + size = sizeof(int) * sbmax(inode->i_sb);
1267     + UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL);
1268     + if (unlikely(!UNIONFS_F(file)->saved_branch_ids)) {
1269     + err = -ENOMEM;
1270     + goto out;
1271     + }
1272     +
1273     + dentry = file->f_path.dentry;
1274     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1275     +
1276     + bstart = fbstart(file) = dbstart(dentry);
1277     + bend = fbend(file) = dbend(dentry);
1278     +
1279     + /*
1280     + * open all directories and make the unionfs file struct point to
1281     + * these lower file structs
1282     + */
1283     + if (S_ISDIR(inode->i_mode))
1284     + err = __open_dir(inode, file); /* open a dir */
1285     + else
1286     + err = __open_file(inode, file); /* open a file */
1287     +
1288     + /* freeing the allocated resources, and fput the opened files */
1289     + if (err) {
1290     + for (bindex = bstart; bindex <= bend; bindex++) {
1291     + lower_file = unionfs_lower_file_idx(file, bindex);
1292     + if (!lower_file)
1293     + continue;
1294     +
1295     + branchput(file->f_path.dentry->d_sb, bindex);
1296     + /* fput calls dput for lower_dentry */
1297     + fput(lower_file);
1298     + }
1299     + }
1300     +
1301     + /* XXX: should this unlock be moved to the function bottom? */
1302     + unionfs_unlock_dentry(dentry);
1303     +
1304     +out:
1305     + if (err) {
1306     + kfree(UNIONFS_F(file)->lower_files);
1307     + kfree(UNIONFS_F(file)->saved_branch_ids);
1308     + kfree(UNIONFS_F(file));
1309     + }
1310     +out_nofree:
1311     + if (!err) {
1312     + dentry = file->f_path.dentry;
1313     + unionfs_copy_attr_times(dentry->d_parent->d_inode);
1314     + unionfs_copy_attr_times(inode);
1315     + unionfs_check_file(file);
1316     + unionfs_check_inode(inode);
1317     + }
1318     + unionfs_read_unlock(inode->i_sb);
1319     + return err;
1320     +}
1321     +
1322     +/*
1323     + * release all lower object references & free the file info structure
1324     + *
1325     + * No need to grab sb info's rwsem.
1326     + */
1327     +int unionfs_file_release(struct inode *inode, struct file *file)
1328     +{
1329     + struct file *lower_file = NULL;
1330     + struct unionfs_file_info *fileinfo;
1331     + struct unionfs_inode_info *inodeinfo;
1332     + struct super_block *sb = inode->i_sb;
1333     + struct dentry *dentry = file->f_path.dentry;
1334     + int bindex, bstart, bend;
1335     + int fgen, err = 0;
1336     +
1337     + unionfs_read_lock(sb, UNIONFS_SMUTEX_PARENT);
1338     + /*
1339     + * Yes, we have to revalidate this file even if it's being released.
1340     + * This is important for open-but-unlinked files, as well as mmap
1341     + * support.
1342     + */
1343     + err = unionfs_file_revalidate(file, true);
1344     + if (unlikely(err))
1345     + goto out;
1346     + unionfs_check_file(file);
1347     + fileinfo = UNIONFS_F(file);
1348     + BUG_ON(file->f_path.dentry->d_inode != inode);
1349     + inodeinfo = UNIONFS_I(inode);
1350     +
1351     + /* fput all the lower files */
1352     + fgen = atomic_read(&fileinfo->generation);
1353     + bstart = fbstart(file);
1354     + bend = fbend(file);
1355     +
1356     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1357     + for (bindex = bstart; bindex <= bend; bindex++) {
1358     + lower_file = unionfs_lower_file_idx(file, bindex);
1359     +
1360     + if (lower_file) {
1361     + fput(lower_file);
1362     + branchput(sb, bindex);
1363     + }
1364     +
1365     + /* if there are no more refs to the dentry, dput it */
1366     + if (d_deleted(dentry)) {
1367     + dput(unionfs_lower_dentry_idx(dentry, bindex));
1368     + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
1369     + }
1370     + }
1371     + unionfs_unlock_dentry(dentry);
1372     +
1373     + kfree(fileinfo->lower_files);
1374     + kfree(fileinfo->saved_branch_ids);
1375     +
1376     + if (fileinfo->rdstate) {
1377     + fileinfo->rdstate->access = jiffies;
1378     + spin_lock(&inodeinfo->rdlock);
1379     + inodeinfo->rdcount++;
1380     + list_add_tail(&fileinfo->rdstate->cache,
1381     + &inodeinfo->readdircache);
1382     + mark_inode_dirty(inode);
1383     + spin_unlock(&inodeinfo->rdlock);
1384     + fileinfo->rdstate = NULL;
1385     + }
1386     + kfree(fileinfo);
1387     +
1388     +out:
1389     + unionfs_read_unlock(sb);
1390     + return err;
1391     +}
1392     +
1393     +/* pass the ioctl to the lower fs */
1394     +static long do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1395     +{
1396     + struct file *lower_file;
1397     + int err;
1398     +
1399     + lower_file = unionfs_lower_file(file);
1400     +
1401     + err = -ENOTTY;
1402     + if (!lower_file || !lower_file->f_op)
1403     + goto out;
1404     + if (lower_file->f_op->unlocked_ioctl) {
1405     + err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
1406     + } else if (lower_file->f_op->ioctl) {
1407     + lock_kernel();
1408     + err = lower_file->f_op->ioctl(
1409     + lower_file->f_path.dentry->d_inode,
1410     + lower_file, cmd, arg);
1411     + unlock_kernel();
1412     + }
1413     +
1414     +out:
1415     + return err;
1416     +}
1417     +
1418     +/*
1419     + * return to user-space the branch indices containing the file in question
1420     + *
1421     + * We use fd_set and therefore we are limited to the number of the branches
1422     + * to FD_SETSIZE, which is currently 1024 - plenty for most people
1423     + */
1424     +static int unionfs_ioctl_queryfile(struct file *file, unsigned int cmd,
1425     + unsigned long arg)
1426     +{
1427     + int err = 0;
1428     + fd_set branchlist;
1429     + int bstart = 0, bend = 0, bindex = 0;
1430     + int orig_bstart, orig_bend;
1431     + struct dentry *dentry, *lower_dentry;
1432     + struct vfsmount *mnt;
1433     +
1434     + dentry = file->f_path.dentry;
1435     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1436     + orig_bstart = dbstart(dentry);
1437     + orig_bend = dbend(dentry);
1438     + err = unionfs_partial_lookup(dentry);
1439     + if (err)
1440     + goto out;
1441     + bstart = dbstart(dentry);
1442     + bend = dbend(dentry);
1443     +
1444     + FD_ZERO(&branchlist);
1445     +
1446     + for (bindex = bstart; bindex <= bend; bindex++) {
1447     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
1448     + if (!lower_dentry)
1449     + continue;
1450     + if (likely(lower_dentry->d_inode))
1451     + FD_SET(bindex, &branchlist);
1452     + /* purge any lower objects after partial_lookup */
1453     + if (bindex < orig_bstart || bindex > orig_bend) {
1454     + dput(lower_dentry);
1455     + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
1456     + iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
1457     + unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
1458     + NULL);
1459     + mnt = unionfs_lower_mnt_idx(dentry, bindex);
1460     + if (!mnt)
1461     + continue;
1462     + unionfs_mntput(dentry, bindex);
1463     + unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
1464     + }
1465     + }
1466     + /* restore original dentry's offsets */
1467     + set_dbstart(dentry, orig_bstart);
1468     + set_dbend(dentry, orig_bend);
1469     + ibstart(dentry->d_inode) = orig_bstart;
1470     + ibend(dentry->d_inode) = orig_bend;
1471     +
1472     + err = copy_to_user((void __user *)arg, &branchlist, sizeof(fd_set));
1473     + if (unlikely(err))
1474     + err = -EFAULT;
1475     +
1476     +out:
1477     + unionfs_unlock_dentry(dentry);
1478     + return err < 0 ? err : bend;
1479     +}
1480     +
1481     +long unionfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1482     +{
1483     + long err;
1484     +
1485     + unionfs_read_lock(file->f_path.dentry->d_sb, UNIONFS_SMUTEX_PARENT);
1486     +
1487     + err = unionfs_file_revalidate(file, true);
1488     + if (unlikely(err))
1489     + goto out;
1490     +
1491     + /* check if asked for local commands */
1492     + switch (cmd) {
1493     + case UNIONFS_IOCTL_INCGEN:
1494     + /* Increment the superblock generation count */
1495     + pr_info("unionfs: incgen ioctl deprecated; "
1496     + "use \"-o remount,incgen\"\n");
1497     + err = -ENOSYS;
1498     + break;
1499     +
1500     + case UNIONFS_IOCTL_QUERYFILE:
1501     + /* Return list of branches containing the given file */
1502     + err = unionfs_ioctl_queryfile(file, cmd, arg);
1503     + break;
1504     +
1505     + default:
1506     + /* pass the ioctl down */
1507     + err = do_ioctl(file, cmd, arg);
1508     + break;
1509     + }
1510     +
1511     +out:
1512     + unionfs_check_file(file);
1513     + unionfs_read_unlock(file->f_path.dentry->d_sb);
1514     + return err;
1515     +}
1516     +
1517     +int unionfs_flush(struct file *file, fl_owner_t id)
1518     +{
1519     + int err = 0;
1520     + struct file *lower_file = NULL;
1521     + struct dentry *dentry = file->f_path.dentry;
1522     + int bindex, bstart, bend;
1523     +
1524     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
1525     +
1526     + err = unionfs_file_revalidate(file, true);
1527     + if (unlikely(err))
1528     + goto out;
1529     + unionfs_check_file(file);
1530     +
1531     + bstart = fbstart(file);
1532     + bend = fbend(file);
1533     + for (bindex = bstart; bindex <= bend; bindex++) {
1534     + lower_file = unionfs_lower_file_idx(file, bindex);
1535     +
1536     + if (lower_file && lower_file->f_op &&
1537     + lower_file->f_op->flush) {
1538     + err = lower_file->f_op->flush(lower_file, id);
1539     + if (err)
1540     + goto out;
1541     + }
1542     +
1543     + }
1544     +
1545     + /* on success, update our times */
1546     + unionfs_copy_attr_times(dentry->d_inode);
1547     + /* parent time could have changed too (async) */
1548     + unionfs_copy_attr_times(dentry->d_parent->d_inode);
1549     +
1550     +out:
1551     + unionfs_check_file(file);
1552     + unionfs_read_unlock(dentry->d_sb);
1553     + return err;
1554     +}
1555     diff --git a/fs/unionfs/copyup.c b/fs/unionfs/copyup.c
1556     new file mode 100644
1557     index 0000000..9beac01
1558     --- /dev/null
1559     +++ b/fs/unionfs/copyup.c
1560     @@ -0,0 +1,885 @@
1561     +/*
1562     + * Copyright (c) 2003-2007 Erez Zadok
1563     + * Copyright (c) 2003-2006 Charles P. Wright
1564     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
1565     + * Copyright (c) 2005-2006 Junjiro Okajima
1566     + * Copyright (c) 2005 Arun M. Krishnakumar
1567     + * Copyright (c) 2004-2006 David P. Quigley
1568     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
1569     + * Copyright (c) 2003 Puja Gupta
1570     + * Copyright (c) 2003 Harikesavan Krishnan
1571     + * Copyright (c) 2003-2007 Stony Brook University
1572     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
1573     + *
1574     + * This program is free software; you can redistribute it and/or modify
1575     + * it under the terms of the GNU General Public License version 2 as
1576     + * published by the Free Software Foundation.
1577     + */
1578     +
1579     +#include "union.h"
1580     +
1581     +/*
1582     + * For detailed explanation of copyup see:
1583     + * Documentation/filesystems/unionfs/concepts.txt
1584     + */
1585     +
1586     +#ifdef CONFIG_UNION_FS_XATTR
1587     +/* copyup all extended attrs for a given dentry */
1588     +static int copyup_xattrs(struct dentry *old_lower_dentry,
1589     + struct dentry *new_lower_dentry)
1590     +{
1591     + int err = 0;
1592     + ssize_t list_size = -1;
1593     + char *name_list = NULL;
1594     + char *attr_value = NULL;
1595     + char *name_list_buf = NULL;
1596     +
1597     + /* query the actual size of the xattr list */
1598     + list_size = vfs_listxattr(old_lower_dentry, NULL, 0);
1599     + if (list_size <= 0) {
1600     + err = list_size;
1601     + goto out;
1602     + }
1603     +
1604     + /* allocate space for the actual list */
1605     + name_list = unionfs_xattr_alloc(list_size + 1, XATTR_LIST_MAX);
1606     + if (unlikely(!name_list || IS_ERR(name_list))) {
1607     + err = PTR_ERR(name_list);
1608     + goto out;
1609     + }
1610     +
1611     + name_list_buf = name_list; /* save for kfree at end */
1612     +
1613     + /* now get the actual xattr list of the source file */
1614     + list_size = vfs_listxattr(old_lower_dentry, name_list, list_size);
1615     + if (list_size <= 0) {
1616     + err = list_size;
1617     + goto out;
1618     + }
1619     +
1620     + /* allocate space to hold each xattr's value */
1621     + attr_value = unionfs_xattr_alloc(XATTR_SIZE_MAX, XATTR_SIZE_MAX);
1622     + if (unlikely(!attr_value || IS_ERR(attr_value))) {
1623     + err = PTR_ERR(name_list);
1624     + goto out;
1625     + }
1626     +
1627     + /* in a loop, get and set each xattr from src to dst file */
1628     + while (*name_list) {
1629     + ssize_t size;
1630     +
1631     + /* Lock here since vfs_getxattr doesn't lock for us */
1632     + mutex_lock(&old_lower_dentry->d_inode->i_mutex);
1633     + size = vfs_getxattr(old_lower_dentry, name_list,
1634     + attr_value, XATTR_SIZE_MAX);
1635     + mutex_unlock(&old_lower_dentry->d_inode->i_mutex);
1636     + if (size < 0) {
1637     + err = size;
1638     + goto out;
1639     + }
1640     + if (size > XATTR_SIZE_MAX) {
1641     + err = -E2BIG;
1642     + goto out;
1643     + }
1644     + /* Don't lock here since vfs_setxattr does it for us. */
1645     + err = vfs_setxattr(new_lower_dentry, name_list, attr_value,
1646     + size, 0);
1647     + /*
1648     + * Selinux depends on "security.*" xattrs, so to maintain
1649     + * the security of copied-up files, if Selinux is active,
1650     + * then we must copy these xattrs as well. So we need to
1651     + * temporarily get FOWNER privileges.
1652     + * XXX: move entire copyup code to SIOQ.
1653     + */
1654     + if (err == -EPERM && !capable(CAP_FOWNER)) {
1655     + cap_raise(current->cap_effective, CAP_FOWNER);
1656     + err = vfs_setxattr(new_lower_dentry, name_list,
1657     + attr_value, size, 0);
1658     + cap_lower(current->cap_effective, CAP_FOWNER);
1659     + }
1660     + if (err < 0)
1661     + goto out;
1662     + name_list += strlen(name_list) + 1;
1663     + }
1664     +out:
1665     + unionfs_xattr_kfree(name_list_buf);
1666     + unionfs_xattr_kfree(attr_value);
1667     + /* Ignore if xattr isn't supported */
1668     + if (err == -ENOTSUPP || err == -EOPNOTSUPP)
1669     + err = 0;
1670     + return err;
1671     +}
1672     +#endif /* CONFIG_UNION_FS_XATTR */
1673     +
1674     +/*
1675     + * Determine the mode based on the copyup flags, and the existing dentry.
1676     + *
1677     + * Handle file systems which may not support certain options. For example
1678     + * jffs2 doesn't allow one to chmod a symlink. So we ignore such harmless
1679     + * errors, rather than propagating them up, which results in copyup errors
1680     + * and errors returned back to users.
1681     + */
1682     +static int copyup_permissions(struct super_block *sb,
1683     + struct dentry *old_lower_dentry,
1684     + struct dentry *new_lower_dentry)
1685     +{
1686     + struct inode *i = old_lower_dentry->d_inode;
1687     + struct iattr newattrs;
1688     + int err;
1689     +
1690     + newattrs.ia_atime = i->i_atime;
1691     + newattrs.ia_mtime = i->i_mtime;
1692     + newattrs.ia_ctime = i->i_ctime;
1693     + newattrs.ia_gid = i->i_gid;
1694     + newattrs.ia_uid = i->i_uid;
1695     + newattrs.ia_valid = ATTR_CTIME | ATTR_ATIME | ATTR_MTIME |
1696     + ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_FORCE |
1697     + ATTR_GID | ATTR_UID;
1698     + err = notify_change(new_lower_dentry, &newattrs);
1699     + if (err)
1700     + goto out;
1701     +
1702     + /* now try to change the mode and ignore EOPNOTSUPP on symlinks */
1703     + newattrs.ia_mode = i->i_mode;
1704     + newattrs.ia_valid = ATTR_MODE | ATTR_FORCE;
1705     + err = notify_change(new_lower_dentry, &newattrs);
1706     + if (err == -EOPNOTSUPP &&
1707     + S_ISLNK(new_lower_dentry->d_inode->i_mode)) {
1708     + printk(KERN_WARNING
1709     + "unionfs: changing \"%s\" symlink mode unsupported\n",
1710     + new_lower_dentry->d_name.name);
1711     + err = 0;
1712     + }
1713     +
1714     +out:
1715     + return err;
1716     +}
1717     +
1718     +/*
1719     + * create the new device/file/directory - use copyup_permission to copyup
1720     + * times, and mode
1721     + *
1722     + * if the object being copied up is a regular file, the file is only created,
1723     + * the contents have to be copied up separately
1724     + */
1725     +static int __copyup_ndentry(struct dentry *old_lower_dentry,
1726     + struct dentry *new_lower_dentry,
1727     + struct dentry *new_lower_parent_dentry,
1728     + char *symbuf)
1729     +{
1730     + int err = 0;
1731     + umode_t old_mode = old_lower_dentry->d_inode->i_mode;
1732     + struct sioq_args args;
1733     +
1734     + if (S_ISDIR(old_mode)) {
1735     + args.mkdir.parent = new_lower_parent_dentry->d_inode;
1736     + args.mkdir.dentry = new_lower_dentry;
1737     + args.mkdir.mode = old_mode;
1738     +
1739     + run_sioq(__unionfs_mkdir, &args);
1740     + err = args.err;
1741     + } else if (S_ISLNK(old_mode)) {
1742     + args.symlink.parent = new_lower_parent_dentry->d_inode;
1743     + args.symlink.dentry = new_lower_dentry;
1744     + args.symlink.symbuf = symbuf;
1745     + args.symlink.mode = old_mode;
1746     +
1747     + run_sioq(__unionfs_symlink, &args);
1748     + err = args.err;
1749     + } else if (S_ISBLK(old_mode) || S_ISCHR(old_mode) ||
1750     + S_ISFIFO(old_mode) || S_ISSOCK(old_mode)) {
1751     + args.mknod.parent = new_lower_parent_dentry->d_inode;
1752     + args.mknod.dentry = new_lower_dentry;
1753     + args.mknod.mode = old_mode;
1754     + args.mknod.dev = old_lower_dentry->d_inode->i_rdev;
1755     +
1756     + run_sioq(__unionfs_mknod, &args);
1757     + err = args.err;
1758     + } else if (S_ISREG(old_mode)) {
1759     + struct nameidata nd;
1760     + err = init_lower_nd(&nd, LOOKUP_CREATE);
1761     + if (unlikely(err < 0))
1762     + goto out;
1763     + args.create.nd = &nd;
1764     + args.create.parent = new_lower_parent_dentry->d_inode;
1765     + args.create.dentry = new_lower_dentry;
1766     + args.create.mode = old_mode;
1767     +
1768     + run_sioq(__unionfs_create, &args);
1769     + err = args.err;
1770     + release_lower_nd(&nd, err);
1771     + } else {
1772     + printk(KERN_CRIT "unionfs: unknown inode type %d\n",
1773     + old_mode);
1774     + BUG();
1775     + }
1776     +
1777     +out:
1778     + return err;
1779     +}
1780     +
1781     +static int __copyup_reg_data(struct dentry *dentry,
1782     + struct dentry *new_lower_dentry, int new_bindex,
1783     + struct dentry *old_lower_dentry, int old_bindex,
1784     + struct file **copyup_file, loff_t len)
1785     +{
1786     + struct super_block *sb = dentry->d_sb;
1787     + struct file *input_file;
1788     + struct file *output_file;
1789     + struct vfsmount *output_mnt;
1790     + mm_segment_t old_fs;
1791     + char *buf = NULL;
1792     + ssize_t read_bytes, write_bytes;
1793     + loff_t size;
1794     + int err = 0;
1795     +
1796     + /* open old file */
1797     + unionfs_mntget(dentry, old_bindex);
1798     + branchget(sb, old_bindex);
1799     + /* dentry_open calls dput and mntput if it returns an error */
1800     + input_file = dentry_open(old_lower_dentry,
1801     + unionfs_lower_mnt_idx(dentry, old_bindex),
1802     + O_RDONLY | O_LARGEFILE);
1803     + if (IS_ERR(input_file)) {
1804     + dput(old_lower_dentry);
1805     + err = PTR_ERR(input_file);
1806     + goto out;
1807     + }
1808     + if (unlikely(!input_file->f_op || !input_file->f_op->read)) {
1809     + err = -EINVAL;
1810     + goto out_close_in;
1811     + }
1812     +
1813     + /* open new file */
1814     + dget(new_lower_dentry);
1815     + output_mnt = unionfs_mntget(sb->s_root, new_bindex);
1816     + branchget(sb, new_bindex);
1817     + output_file = dentry_open(new_lower_dentry, output_mnt,
1818     + O_RDWR | O_LARGEFILE);
1819     + if (IS_ERR(output_file)) {
1820     + err = PTR_ERR(output_file);
1821     + goto out_close_in2;
1822     + }
1823     + if (unlikely(!output_file->f_op || !output_file->f_op->write)) {
1824     + err = -EINVAL;
1825     + goto out_close_out;
1826     + }
1827     +
1828     + /* allocating a buffer */
1829     + buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
1830     + if (unlikely(!buf)) {
1831     + err = -ENOMEM;
1832     + goto out_close_out;
1833     + }
1834     +
1835     + input_file->f_pos = 0;
1836     + output_file->f_pos = 0;
1837     +
1838     + old_fs = get_fs();
1839     + set_fs(KERNEL_DS);
1840     +
1841     + size = len;
1842     + err = 0;
1843     + do {
1844     + if (len >= PAGE_SIZE)
1845     + size = PAGE_SIZE;
1846     + else if ((len < PAGE_SIZE) && (len > 0))
1847     + size = len;
1848     +
1849     + len -= PAGE_SIZE;
1850     +
1851     + read_bytes =
1852     + input_file->f_op->read(input_file,
1853     + (char __user *)buf, size,
1854     + &input_file->f_pos);
1855     + if (read_bytes <= 0) {
1856     + err = read_bytes;
1857     + break;
1858     + }
1859     +
1860     + /* see Documentation/filesystems/unionfs/issues.txt */
1861     + lockdep_off();
1862     + write_bytes =
1863     + output_file->f_op->write(output_file,
1864     + (char __user *)buf,
1865     + read_bytes,
1866     + &output_file->f_pos);
1867     + lockdep_on();
1868     + if ((write_bytes < 0) || (write_bytes < read_bytes)) {
1869     + err = write_bytes;
1870     + break;
1871     + }
1872     + } while ((read_bytes > 0) && (len > 0));
1873     +
1874     + set_fs(old_fs);
1875     +
1876     + kfree(buf);
1877     +
1878     + if (!err)
1879     + err = output_file->f_op->fsync(output_file,
1880     + new_lower_dentry, 0);
1881     +
1882     + if (err)
1883     + goto out_close_out;
1884     +
1885     + if (copyup_file) {
1886     + *copyup_file = output_file;
1887     + goto out_close_in;
1888     + }
1889     +
1890     +out_close_out:
1891     + fput(output_file);
1892     +
1893     +out_close_in2:
1894     + branchput(sb, new_bindex);
1895     +
1896     +out_close_in:
1897     + fput(input_file);
1898     +
1899     +out:
1900     + branchput(sb, old_bindex);
1901     +
1902     + return err;
1903     +}
1904     +
1905     +/*
1906     + * dput the lower references for old and new dentry & clear a lower dentry
1907     + * pointer
1908     + */
1909     +static void __clear(struct dentry *dentry, struct dentry *old_lower_dentry,
1910     + int old_bstart, int old_bend,
1911     + struct dentry *new_lower_dentry, int new_bindex)
1912     +{
1913     + /* get rid of the lower dentry and all its traces */
1914     + unionfs_set_lower_dentry_idx(dentry, new_bindex, NULL);
1915     + set_dbstart(dentry, old_bstart);
1916     + set_dbend(dentry, old_bend);
1917     +
1918     + dput(new_lower_dentry);
1919     + dput(old_lower_dentry);
1920     +}
1921     +
1922     +/*
1923     + * Copy up a dentry to a file of specified name.
1924     + *
1925     + * @dir: used to pull the ->i_sb to access other branches
1926     + * @dentry: the non-negative dentry whose lower_inode we should copy
1927     + * @bstart: the branch of the lower_inode to copy from
1928     + * @new_bindex: the branch to create the new file in
1929     + * @name: the name of the file to create
1930     + * @namelen: length of @name
1931     + * @copyup_file: the "struct file" to return (optional)
1932     + * @len: how many bytes to copy-up?
1933     + */
1934     +int copyup_dentry(struct inode *dir, struct dentry *dentry, int bstart,
1935     + int new_bindex, const char *name, int namelen,
1936     + struct file **copyup_file, loff_t len)
1937     +{
1938     + struct dentry *new_lower_dentry;
1939     + struct dentry *old_lower_dentry = NULL;
1940     + struct super_block *sb;
1941     + int err = 0;
1942     + int old_bindex;
1943     + int old_bstart;
1944     + int old_bend;
1945     + struct dentry *new_lower_parent_dentry = NULL;
1946     + mm_segment_t oldfs;
1947     + char *symbuf = NULL;
1948     +
1949     + verify_locked(dentry);
1950     +
1951     + old_bindex = bstart;
1952     + old_bstart = dbstart(dentry);
1953     + old_bend = dbend(dentry);
1954     +
1955     + BUG_ON(new_bindex < 0);
1956     + BUG_ON(new_bindex >= old_bindex);
1957     +
1958     + sb = dir->i_sb;
1959     +
1960     + err = is_robranch_super(sb, new_bindex);
1961     + if (err)
1962     + goto out;
1963     +
1964     + /* Create the directory structure above this dentry. */
1965     + new_lower_dentry = create_parents(dir, dentry, name, new_bindex);
1966     + if (IS_ERR(new_lower_dentry)) {
1967     + err = PTR_ERR(new_lower_dentry);
1968     + goto out;
1969     + }
1970     +
1971     + old_lower_dentry = unionfs_lower_dentry_idx(dentry, old_bindex);
1972     + /* we conditionally dput this old_lower_dentry at end of function */
1973     + dget(old_lower_dentry);
1974     +
1975     + /* For symlinks, we must read the link before we lock the directory. */
1976     + if (S_ISLNK(old_lower_dentry->d_inode->i_mode)) {
1977     +
1978     + symbuf = kmalloc(PATH_MAX, GFP_KERNEL);
1979     + if (unlikely(!symbuf)) {
1980     + __clear(dentry, old_lower_dentry,
1981     + old_bstart, old_bend,
1982     + new_lower_dentry, new_bindex);
1983     + err = -ENOMEM;
1984     + goto out_free;
1985     + }
1986     +
1987     + oldfs = get_fs();
1988     + set_fs(KERNEL_DS);
1989     + err = old_lower_dentry->d_inode->i_op->readlink(
1990     + old_lower_dentry,
1991     + (char __user *)symbuf,
1992     + PATH_MAX);
1993     + set_fs(oldfs);
1994     + if (err < 0) {
1995     + __clear(dentry, old_lower_dentry,
1996     + old_bstart, old_bend,
1997     + new_lower_dentry, new_bindex);
1998     + goto out_free;
1999     + }
2000     + symbuf[err] = '\0';
2001     + }
2002     +
2003     + /* Now we lock the parent, and create the object in the new branch. */
2004     + new_lower_parent_dentry = lock_parent(new_lower_dentry);
2005     +
2006     + /* create the new inode */
2007     + err = __copyup_ndentry(old_lower_dentry, new_lower_dentry,
2008     + new_lower_parent_dentry, symbuf);
2009     +
2010     + if (err) {
2011     + __clear(dentry, old_lower_dentry,
2012     + old_bstart, old_bend,
2013     + new_lower_dentry, new_bindex);
2014     + goto out_unlock;
2015     + }
2016     +
2017     + /* We actually copyup the file here. */
2018     + if (S_ISREG(old_lower_dentry->d_inode->i_mode))
2019     + err = __copyup_reg_data(dentry, new_lower_dentry, new_bindex,
2020     + old_lower_dentry, old_bindex,
2021     + copyup_file, len);
2022     + if (err)
2023     + goto out_unlink;
2024     +
2025     + /* Set permissions. */
2026     + err = copyup_permissions(sb, old_lower_dentry, new_lower_dentry);
2027     + if (err)
2028     + goto out_unlink;
2029     +
2030     +#ifdef CONFIG_UNION_FS_XATTR
2031     + /* Selinux uses extended attributes for permissions. */
2032     + err = copyup_xattrs(old_lower_dentry, new_lower_dentry);
2033     + if (err)
2034     + goto out_unlink;
2035     +#endif /* CONFIG_UNION_FS_XATTR */
2036     +
2037     + /* do not allow files getting deleted to be re-interposed */
2038     + if (!d_deleted(dentry))
2039     + unionfs_reinterpose(dentry);
2040     +
2041     + goto out_unlock;
2042     +
2043     +out_unlink:
2044     + /*
2045     + * copyup failed, because we possibly ran out of space or
2046     + * quota, or something else happened so let's unlink; we don't
2047     + * really care about the return value of vfs_unlink
2048     + */
2049     + vfs_unlink(new_lower_parent_dentry->d_inode, new_lower_dentry);
2050     +
2051     + if (copyup_file) {
2052     + /* need to close the file */
2053     +
2054     + fput(*copyup_file);
2055     + branchput(sb, new_bindex);
2056     + }
2057     +
2058     + /*
2059     + * TODO: should we reset the error to something like -EIO?
2060     + *
2061     + * If we don't reset, the user may get some nonsensical errors, but
2062     + * on the other hand, if we reset to EIO, we guarantee that the user
2063     + * will get a "confusing" error message.
2064     + */
2065     +
2066     +out_unlock:
2067     + unlock_dir(new_lower_parent_dentry);
2068     +
2069     +out_free:
2070     + /*
2071     + * If old_lower_dentry was not a file, then we need to dput it. If
2072     + * it was a file, then it was already dput indirectly by other
2073     + * functions we call above which operate on regular files.
2074     + */
2075     + if (old_lower_dentry && old_lower_dentry->d_inode &&
2076     + !S_ISREG(old_lower_dentry->d_inode->i_mode))
2077     + dput(old_lower_dentry);
2078     + kfree(symbuf);
2079     +
2080     + if (err)
2081     + goto out;
2082     + if (!S_ISDIR(dentry->d_inode->i_mode)) {
2083     + unionfs_postcopyup_release(dentry);
2084     + if (!unionfs_lower_inode(dentry->d_inode)) {
2085     + /*
2086     + * If we got here, then we copied up to an
2087     + * unlinked-open file, whose name is .unionfsXXXXX.
2088     + */
2089     + struct inode *inode = new_lower_dentry->d_inode;
2090     + atomic_inc(&inode->i_count);
2091     + unionfs_set_lower_inode_idx(dentry->d_inode,
2092     + ibstart(dentry->d_inode),
2093     + inode);
2094     + }
2095     + }
2096     + unionfs_postcopyup_setmnt(dentry);
2097     + /* sync inode times from copied-up inode to our inode */
2098     + unionfs_copy_attr_times(dentry->d_inode);
2099     + unionfs_check_inode(dir);
2100     + unionfs_check_dentry(dentry);
2101     +out:
2102     + return err;
2103     +}
2104     +
2105     +/*
2106     + * This function creates a copy of a file represented by 'file' which
2107     + * currently resides in branch 'bstart' to branch 'new_bindex.' The copy
2108     + * will be named "name".
2109     + */
2110     +int copyup_named_file(struct inode *dir, struct file *file, char *name,
2111     + int bstart, int new_bindex, loff_t len)
2112     +{
2113     + int err = 0;
2114     + struct file *output_file = NULL;
2115     +
2116     + err = copyup_dentry(dir, file->f_path.dentry, bstart, new_bindex,
2117     + name, strlen(name), &output_file, len);
2118     + if (!err) {
2119     + fbstart(file) = new_bindex;
2120     + unionfs_set_lower_file_idx(file, new_bindex, output_file);
2121     + }
2122     +
2123     + return err;
2124     +}
2125     +
2126     +/*
2127     + * This function creates a copy of a file represented by 'file' which
2128     + * currently resides in branch 'bstart' to branch 'new_bindex'.
2129     + */
2130     +int copyup_file(struct inode *dir, struct file *file, int bstart,
2131     + int new_bindex, loff_t len)
2132     +{
2133     + int err = 0;
2134     + struct file *output_file = NULL;
2135     + struct dentry *dentry = file->f_path.dentry;
2136     +
2137     + err = copyup_dentry(dir, dentry, bstart, new_bindex,
2138     + dentry->d_name.name, dentry->d_name.len,
2139     + &output_file, len);
2140     + if (!err) {
2141     + fbstart(file) = new_bindex;
2142     + unionfs_set_lower_file_idx(file, new_bindex, output_file);
2143     + }
2144     +
2145     + return err;
2146     +}
2147     +
2148     +/* purge a dentry's lower-branch states (dput/mntput, etc.) */
2149     +static void __cleanup_dentry(struct dentry *dentry, int bindex,
2150     + int old_bstart, int old_bend)
2151     +{
2152     + int loop_start;
2153     + int loop_end;
2154     + int new_bstart = -1;
2155     + int new_bend = -1;
2156     + int i;
2157     +
2158     + loop_start = min(old_bstart, bindex);
2159     + loop_end = max(old_bend, bindex);
2160     +
2161     + /*
2162     + * This loop sets the bstart and bend for the new dentry by
2163     + * traversing from left to right. It also dputs all negative
2164     + * dentries except bindex
2165     + */
2166     + for (i = loop_start; i <= loop_end; i++) {
2167     + if (!unionfs_lower_dentry_idx(dentry, i))
2168     + continue;
2169     +
2170     + if (i == bindex) {
2171     + new_bend = i;
2172     + if (new_bstart < 0)
2173     + new_bstart = i;
2174     + continue;
2175     + }
2176     +
2177     + if (!unionfs_lower_dentry_idx(dentry, i)->d_inode) {
2178     + dput(unionfs_lower_dentry_idx(dentry, i));
2179     + unionfs_set_lower_dentry_idx(dentry, i, NULL);
2180     +
2181     + unionfs_mntput(dentry, i);
2182     + unionfs_set_lower_mnt_idx(dentry, i, NULL);
2183     + } else {
2184     + if (new_bstart < 0)
2185     + new_bstart = i;
2186     + new_bend = i;
2187     + }
2188     + }
2189     +
2190     + if (new_bstart < 0)
2191     + new_bstart = bindex;
2192     + if (new_bend < 0)
2193     + new_bend = bindex;
2194     + set_dbstart(dentry, new_bstart);
2195     + set_dbend(dentry, new_bend);
2196     +
2197     +}
2198     +
2199     +/* set lower inode ptr and update bstart & bend if necessary */
2200     +static void __set_inode(struct dentry *upper, struct dentry *lower,
2201     + int bindex)
2202     +{
2203     + unionfs_set_lower_inode_idx(upper->d_inode, bindex,
2204     + igrab(lower->d_inode));
2205     + if (likely(ibstart(upper->d_inode) > bindex))
2206     + ibstart(upper->d_inode) = bindex;
2207     + if (likely(ibend(upper->d_inode) < bindex))
2208     + ibend(upper->d_inode) = bindex;
2209     +
2210     +}
2211     +
2212     +/* set lower dentry ptr and update bstart & bend if necessary */
2213     +static void __set_dentry(struct dentry *upper, struct dentry *lower,
2214     + int bindex)
2215     +{
2216     + unionfs_set_lower_dentry_idx(upper, bindex, lower);
2217     + if (likely(dbstart(upper) > bindex))
2218     + set_dbstart(upper, bindex);
2219     + if (likely(dbend(upper) < bindex))
2220     + set_dbend(upper, bindex);
2221     +}
2222     +
2223     +/*
2224     + * This function replicates the directory structure up-to given dentry
2225     + * in the bindex branch.
2226     + */
2227     +struct dentry *create_parents(struct inode *dir, struct dentry *dentry,
2228     + const char *name, int bindex)
2229     +{
2230     + int err;
2231     + struct dentry *child_dentry;
2232     + struct dentry *parent_dentry;
2233     + struct dentry *lower_parent_dentry = NULL;
2234     + struct dentry *lower_dentry = NULL;
2235     + const char *childname;
2236     + unsigned int childnamelen;
2237     + int nr_dentry;
2238     + int count = 0;
2239     + int old_bstart;
2240     + int old_bend;
2241     + struct dentry **path = NULL;
2242     + struct super_block *sb;
2243     +
2244     + verify_locked(dentry);
2245     +
2246     + err = is_robranch_super(dir->i_sb, bindex);
2247     + if (err) {
2248     + lower_dentry = ERR_PTR(err);
2249     + goto out;
2250     + }
2251     +
2252     + old_bstart = dbstart(dentry);
2253     + old_bend = dbend(dentry);
2254     +
2255     + lower_dentry = ERR_PTR(-ENOMEM);
2256     +
2257     + /* There is no sense allocating any less than the minimum. */
2258     + nr_dentry = 1;
2259     + path = kmalloc(nr_dentry * sizeof(struct dentry *), GFP_KERNEL);
2260     + if (unlikely(!path))
2261     + goto out;
2262     +
2263     + /* assume the negative dentry of unionfs as the parent dentry */
2264     + parent_dentry = dentry;
2265     +
2266     + /*
2267     + * This loop finds the first parent that exists in the given branch.
2268     + * We start building the directory structure from there. At the end
2269     + * of the loop, the following should hold:
2270     + * - child_dentry is the first nonexistent child
2271     + * - parent_dentry is the first existent parent
2272     + * - path[0] is the = deepest child
2273     + * - path[count] is the first child to create
2274     + */
2275     + do {
2276     + child_dentry = parent_dentry;
2277     +
2278     + /* find the parent directory dentry in unionfs */
2279     + parent_dentry = dget_parent(child_dentry);
2280     +
2281     + /* find out the lower_parent_dentry in the given branch */
2282     + lower_parent_dentry =
2283     + unionfs_lower_dentry_idx(parent_dentry, bindex);
2284     +
2285     + /* grow path table */
2286     + if (count == nr_dentry) {
2287     + void *p;
2288     +
2289     + nr_dentry *= 2;
2290     + p = krealloc(path, nr_dentry * sizeof(struct dentry *),
2291     + GFP_KERNEL);
2292     + if (unlikely(!p)) {
2293     + lower_dentry = ERR_PTR(-ENOMEM);
2294     + goto out;
2295     + }
2296     + path = p;
2297     + }
2298     +
2299     + /* store the child dentry */
2300     + path[count++] = child_dentry;
2301     + } while (!lower_parent_dentry);
2302     + count--;
2303     +
2304     + sb = dentry->d_sb;
2305     +
2306     + /*
2307     + * This code goes between the begin/end labels and basically
2308     + * emulates a while(child_dentry != dentry), only cleaner and
2309     + * shorter than what would be a much longer while loop.
2310     + */
2311     +begin:
2312     + /* get lower parent dir in the current branch */
2313     + lower_parent_dentry = unionfs_lower_dentry_idx(parent_dentry, bindex);
2314     + dput(parent_dentry);
2315     +
2316     + /* init the values to lookup */
2317     + childname = child_dentry->d_name.name;
2318     + childnamelen = child_dentry->d_name.len;
2319     +
2320     + if (child_dentry != dentry) {
2321     + /* lookup child in the underlying file system */
2322     + lower_dentry = lookup_one_len(childname, lower_parent_dentry,
2323     + childnamelen);
2324     + if (IS_ERR(lower_dentry))
2325     + goto out;
2326     + } else {
2327     + /*
2328     + * Is the name a whiteout of the child name ? lookup the
2329     + * whiteout child in the underlying file system
2330     + */
2331     + lower_dentry = lookup_one_len(name, lower_parent_dentry,
2332     + strlen(name));
2333     + if (IS_ERR(lower_dentry))
2334     + goto out;
2335     +
2336     + /* Replace the current dentry (if any) with the new one */
2337     + dput(unionfs_lower_dentry_idx(dentry, bindex));
2338     + unionfs_set_lower_dentry_idx(dentry, bindex,
2339     + lower_dentry);
2340     +
2341     + __cleanup_dentry(dentry, bindex, old_bstart, old_bend);
2342     + goto out;
2343     + }
2344     +
2345     + if (lower_dentry->d_inode) {
2346     + /*
2347     + * since this already exists we dput to avoid
2348     + * multiple references on the same dentry
2349     + */
2350     + dput(lower_dentry);
2351     + } else {
2352     + struct sioq_args args;
2353     +
2354     + /* it's a negative dentry, create a new dir */
2355     + lower_parent_dentry = lock_parent(lower_dentry);
2356     +
2357     + args.mkdir.parent = lower_parent_dentry->d_inode;
2358     + args.mkdir.dentry = lower_dentry;
2359     + args.mkdir.mode = child_dentry->d_inode->i_mode;
2360     +
2361     + run_sioq(__unionfs_mkdir, &args);
2362     + err = args.err;
2363     +
2364     + if (!err)
2365     + err = copyup_permissions(dir->i_sb, child_dentry,
2366     + lower_dentry);
2367     + unlock_dir(lower_parent_dentry);
2368     + if (err) {
2369     + dput(lower_dentry);
2370     + lower_dentry = ERR_PTR(err);
2371     + goto out;
2372     + }
2373     +
2374     + }
2375     +
2376     + __set_inode(child_dentry, lower_dentry, bindex);
2377     + __set_dentry(child_dentry, lower_dentry, bindex);
2378     + /*
2379     + * update times of this dentry, but also the parent, because if
2380     + * we changed, the parent may have changed too.
2381     + */
2382     + unionfs_copy_attr_times(parent_dentry->d_inode);
2383     + unionfs_copy_attr_times(child_dentry->d_inode);
2384     +
2385     + parent_dentry = child_dentry;
2386     + child_dentry = path[--count];
2387     + goto begin;
2388     +out:
2389     + /* cleanup any leftover locks from the do/while loop above */
2390     + if (IS_ERR(lower_dentry))
2391     + while (count)
2392     + dput(path[count--]);
2393     + kfree(path);
2394     + return lower_dentry;
2395     +}
2396     +
2397     +/*
2398     + * Post-copyup helper to ensure we have valid mnts: set lower mnt of
2399     + * dentry+parents to the first parent node that has an mnt.
2400     + */
2401     +void unionfs_postcopyup_setmnt(struct dentry *dentry)
2402     +{
2403     + struct dentry *parent, *hasone;
2404     + int bindex = dbstart(dentry);
2405     +
2406     + if (unionfs_lower_mnt_idx(dentry, bindex))
2407     + return;
2408     + hasone = dentry->d_parent;
2409     + /* this loop should stop at root dentry */
2410     + while (!unionfs_lower_mnt_idx(hasone, bindex))
2411     + hasone = hasone->d_parent;
2412     + parent = dentry;
2413     + while (!unionfs_lower_mnt_idx(parent, bindex)) {
2414     + unionfs_set_lower_mnt_idx(parent, bindex,
2415     + unionfs_mntget(hasone, bindex));
2416     + parent = parent->d_parent;
2417     + }
2418     +}
2419     +
2420     +/*
2421     + * Post-copyup helper to release all non-directory source objects of a
2422     + * copied-up file. Regular files should have only one lower object.
2423     + */
2424     +void unionfs_postcopyup_release(struct dentry *dentry)
2425     +{
2426     + int bindex;
2427     +
2428     + BUG_ON(S_ISDIR(dentry->d_inode->i_mode));
2429     + for (bindex = dbstart(dentry)+1; bindex <= dbend(dentry); bindex++) {
2430     + if (unionfs_lower_mnt_idx(dentry, bindex)) {
2431     + unionfs_mntput(dentry, bindex);
2432     + unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
2433     + }
2434     + if (unionfs_lower_dentry_idx(dentry, bindex)) {
2435     + dput(unionfs_lower_dentry_idx(dentry, bindex));
2436     + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
2437     + iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
2438     + unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
2439     + NULL);
2440     + }
2441     + }
2442     + bindex = dbstart(dentry);
2443     + set_dbend(dentry, bindex);
2444     + ibend(dentry->d_inode) = ibstart(dentry->d_inode) = bindex;
2445     +}
2446     diff --git a/fs/unionfs/debug.c b/fs/unionfs/debug.c
2447     new file mode 100644
2448     index 0000000..d154c32
2449     --- /dev/null
2450     +++ b/fs/unionfs/debug.c
2451     @@ -0,0 +1,533 @@
2452     +/*
2453     + * Copyright (c) 2003-2007 Erez Zadok
2454     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
2455     + * Copyright (c) 2003-2007 Stony Brook University
2456     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
2457     + *
2458     + * This program is free software; you can redistribute it and/or modify
2459     + * it under the terms of the GNU General Public License version 2 as
2460     + * published by the Free Software Foundation.
2461     + */
2462     +
2463     +#include "union.h"
2464     +
2465     +/*
2466     + * Helper debugging functions for maintainers (and for users to report back
2467     + * useful information back to maintainers)
2468     + */
2469     +
2470     +/* it's always useful to know what part of the code called us */
2471     +#define PRINT_CALLER(fname, fxn, line) \
2472     + do { \
2473     + if (!printed_caller) { \
2474     + pr_debug("PC:%s:%s:%d\n", (fname), (fxn), (line)); \
2475     + printed_caller = 1; \
2476     + } \
2477     + } while (0)
2478     +
2479     +/*
2480     + * __unionfs_check_{inode,dentry,file} perform exhaustive sanity checking on
2481     + * the fan-out of various Unionfs objects. We check that no lower objects
2482     + * exist outside the start/end branch range; that all objects within are
2483     + * non-NULL (with some allowed exceptions); that for every lower file
2484     + * there's a lower dentry+inode; that the start/end ranges match for all
2485     + * corresponding lower objects; that open files/symlinks have only one lower
2486     + * objects, but directories can have several; and more.
2487     + */
2488     +void __unionfs_check_inode(const struct inode *inode,
2489     + const char *fname, const char *fxn, int line)
2490     +{
2491     + int bindex;
2492     + int istart, iend;
2493     + struct inode *lower_inode;
2494     + struct super_block *sb;
2495     + int printed_caller = 0;
2496     + void *poison_ptr;
2497     +
2498     + /* for inodes now */
2499     + BUG_ON(!inode);
2500     + sb = inode->i_sb;
2501     + istart = ibstart(inode);
2502     + iend = ibend(inode);
2503     + /* don't check inode if no lower branches */
2504     + if (istart < 0 && iend < 0)
2505     + return;
2506     + if (unlikely(istart > iend)) {
2507     + PRINT_CALLER(fname, fxn, line);
2508     + pr_debug(" Ci0: inode=%p istart/end=%d:%d\n",
2509     + inode, istart, iend);
2510     + }
2511     + if (unlikely((istart == -1 && iend != -1) ||
2512     + (istart != -1 && iend == -1))) {
2513     + PRINT_CALLER(fname, fxn, line);
2514     + pr_debug(" Ci1: inode=%p istart/end=%d:%d\n",
2515     + inode, istart, iend);
2516     + }
2517     + if (!S_ISDIR(inode->i_mode)) {
2518     + if (unlikely(iend != istart)) {
2519     + PRINT_CALLER(fname, fxn, line);
2520     + pr_debug(" Ci2: inode=%p istart=%d iend=%d\n",
2521     + inode, istart, iend);
2522     + }
2523     + }
2524     +
2525     + for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2526     + if (unlikely(!UNIONFS_I(inode))) {
2527     + PRINT_CALLER(fname, fxn, line);
2528     + pr_debug(" Ci3: no inode_info %p\n", inode);
2529     + return;
2530     + }
2531     + if (unlikely(!UNIONFS_I(inode)->lower_inodes)) {
2532     + PRINT_CALLER(fname, fxn, line);
2533     + pr_debug(" Ci4: no lower_inodes %p\n", inode);
2534     + return;
2535     + }
2536     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2537     + if (lower_inode) {
2538     + memset(&poison_ptr, POISON_INUSE, sizeof(void *));
2539     + if (unlikely(bindex < istart || bindex > iend)) {
2540     + PRINT_CALLER(fname, fxn, line);
2541     + pr_debug(" Ci5: inode/linode=%p:%p bindex=%d "
2542     + "istart/end=%d:%d\n", inode,
2543     + lower_inode, bindex, istart, iend);
2544     + } else if (unlikely(lower_inode == poison_ptr)) {
2545     + /* freed inode! */
2546     + PRINT_CALLER(fname, fxn, line);
2547     + pr_debug(" Ci6: inode/linode=%p:%p bindex=%d "
2548     + "istart/end=%d:%d\n", inode,
2549     + lower_inode, bindex, istart, iend);
2550     + }
2551     + continue;
2552     + }
2553     + /* if we get here, then lower_inode == NULL */
2554     + if (bindex < istart || bindex > iend)
2555     + continue;
2556     + /*
2557     + * directories can have NULL lower inodes in b/t start/end,
2558     + * but NOT if at the start/end range.
2559     + */
2560     + if (unlikely(S_ISDIR(inode->i_mode) &&
2561     + bindex > istart && bindex < iend))
2562     + continue;
2563     + PRINT_CALLER(fname, fxn, line);
2564     + pr_debug(" Ci7: inode/linode=%p:%p "
2565     + "bindex=%d istart/end=%d:%d\n",
2566     + inode, lower_inode, bindex, istart, iend);
2567     + }
2568     +}
2569     +
2570     +void __unionfs_check_dentry(const struct dentry *dentry,
2571     + const char *fname, const char *fxn, int line)
2572     +{
2573     + int bindex;
2574     + int dstart, dend, istart, iend;
2575     + struct dentry *lower_dentry;
2576     + struct inode *inode, *lower_inode;
2577     + struct super_block *sb;
2578     + struct vfsmount *lower_mnt;
2579     + int printed_caller = 0;
2580     + void *poison_ptr;
2581     +
2582     + BUG_ON(!dentry);
2583     + sb = dentry->d_sb;
2584     + inode = dentry->d_inode;
2585     + dstart = dbstart(dentry);
2586     + dend = dbend(dentry);
2587     + /* don't check dentry/mnt if no lower branches */
2588     + if (dstart < 0 && dend < 0)
2589     + goto check_inode;
2590     + BUG_ON(dstart > dend);
2591     +
2592     + if (unlikely((dstart == -1 && dend != -1) ||
2593     + (dstart != -1 && dend == -1))) {
2594     + PRINT_CALLER(fname, fxn, line);
2595     + pr_debug(" CD0: dentry=%p dstart/end=%d:%d\n",
2596     + dentry, dstart, dend);
2597     + }
2598     + /*
2599     + * check for NULL dentries inside the start/end range, or
2600     + * non-NULL dentries outside the start/end range.
2601     + */
2602     + for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2603     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
2604     + if (lower_dentry) {
2605     + if (unlikely(bindex < dstart || bindex > dend)) {
2606     + PRINT_CALLER(fname, fxn, line);
2607     + pr_debug(" CD1: dentry/lower=%p:%p(%p) "
2608     + "bindex=%d dstart/end=%d:%d\n",
2609     + dentry, lower_dentry,
2610     + (lower_dentry ? lower_dentry->d_inode :
2611     + (void *) -1L),
2612     + bindex, dstart, dend);
2613     + }
2614     + } else { /* lower_dentry == NULL */
2615     + if (bindex < dstart || bindex > dend)
2616     + continue;
2617     + /*
2618     + * Directories can have NULL lower inodes in b/t
2619     + * start/end, but NOT if at the start/end range.
2620     + * Ignore this rule, however, if this is a NULL
2621     + * dentry or a deleted dentry.
2622     + */
2623     + if (unlikely(!d_deleted((struct dentry *) dentry) &&
2624     + inode &&
2625     + !(inode && S_ISDIR(inode->i_mode) &&
2626     + bindex > dstart && bindex < dend))) {
2627     + PRINT_CALLER(fname, fxn, line);
2628     + pr_debug(" CD2: dentry/lower=%p:%p(%p) "
2629     + "bindex=%d dstart/end=%d:%d\n",
2630     + dentry, lower_dentry,
2631     + (lower_dentry ?
2632     + lower_dentry->d_inode :
2633     + (void *) -1L),
2634     + bindex, dstart, dend);
2635     + }
2636     + }
2637     + }
2638     +
2639     + /* check for vfsmounts same as for dentries */
2640     + for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2641     + lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
2642     + if (lower_mnt) {
2643     + if (unlikely(bindex < dstart || bindex > dend)) {
2644     + PRINT_CALLER(fname, fxn, line);
2645     + pr_debug(" CM0: dentry/lmnt=%p:%p bindex=%d "
2646     + "dstart/end=%d:%d\n", dentry,
2647     + lower_mnt, bindex, dstart, dend);
2648     + }
2649     + } else { /* lower_mnt == NULL */
2650     + if (bindex < dstart || bindex > dend)
2651     + continue;
2652     + /*
2653     + * Directories can have NULL lower inodes in b/t
2654     + * start/end, but NOT if at the start/end range.
2655     + * Ignore this rule, however, if this is a NULL
2656     + * dentry.
2657     + */
2658     + if (unlikely(inode &&
2659     + !(inode && S_ISDIR(inode->i_mode) &&
2660     + bindex > dstart && bindex < dend))) {
2661     + PRINT_CALLER(fname, fxn, line);
2662     + pr_debug(" CM1: dentry/lmnt=%p:%p "
2663     + "bindex=%d dstart/end=%d:%d\n",
2664     + dentry, lower_mnt, bindex,
2665     + dstart, dend);
2666     + }
2667     + }
2668     + }
2669     +
2670     +check_inode:
2671     + /* for inodes now */
2672     + if (!inode)
2673     + return;
2674     + istart = ibstart(inode);
2675     + iend = ibend(inode);
2676     + /* don't check inode if no lower branches */
2677     + if (istart < 0 && iend < 0)
2678     + return;
2679     + BUG_ON(istart > iend);
2680     + if (unlikely((istart == -1 && iend != -1) ||
2681     + (istart != -1 && iend == -1))) {
2682     + PRINT_CALLER(fname, fxn, line);
2683     + pr_debug(" CI0: dentry/inode=%p:%p istart/end=%d:%d\n",
2684     + dentry, inode, istart, iend);
2685     + }
2686     + if (unlikely(istart != dstart)) {
2687     + PRINT_CALLER(fname, fxn, line);
2688     + pr_debug(" CI1: dentry/inode=%p:%p istart=%d dstart=%d\n",
2689     + dentry, inode, istart, dstart);
2690     + }
2691     + if (unlikely(iend != dend)) {
2692     + PRINT_CALLER(fname, fxn, line);
2693     + pr_debug(" CI2: dentry/inode=%p:%p iend=%d dend=%d\n",
2694     + dentry, inode, iend, dend);
2695     + }
2696     +
2697     + if (!S_ISDIR(inode->i_mode)) {
2698     + if (unlikely(dend != dstart)) {
2699     + PRINT_CALLER(fname, fxn, line);
2700     + pr_debug(" CI3: dentry/inode=%p:%p dstart=%d dend=%d\n",
2701     + dentry, inode, dstart, dend);
2702     + }
2703     + if (unlikely(iend != istart)) {
2704     + PRINT_CALLER(fname, fxn, line);
2705     + pr_debug(" CI4: dentry/inode=%p:%p istart=%d iend=%d\n",
2706     + dentry, inode, istart, iend);
2707     + }
2708     + }
2709     +
2710     + for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2711     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2712     + if (lower_inode) {
2713     + memset(&poison_ptr, POISON_INUSE, sizeof(void *));
2714     + if (unlikely(bindex < istart || bindex > iend)) {
2715     + PRINT_CALLER(fname, fxn, line);
2716     + pr_debug(" CI5: dentry/linode=%p:%p bindex=%d "
2717     + "istart/end=%d:%d\n", dentry,
2718     + lower_inode, bindex, istart, iend);
2719     + } else if (unlikely(lower_inode == poison_ptr)) {
2720     + /* freed inode! */
2721     + PRINT_CALLER(fname, fxn, line);
2722     + pr_debug(" CI6: dentry/linode=%p:%p bindex=%d "
2723     + "istart/end=%d:%d\n", dentry,
2724     + lower_inode, bindex, istart, iend);
2725     + }
2726     + continue;
2727     + }
2728     + /* if we get here, then lower_inode == NULL */
2729     + if (bindex < istart || bindex > iend)
2730     + continue;
2731     + /*
2732     + * directories can have NULL lower inodes in b/t start/end,
2733     + * but NOT if at the start/end range.
2734     + */
2735     + if (unlikely(S_ISDIR(inode->i_mode) &&
2736     + bindex > istart && bindex < iend))
2737     + continue;
2738     + PRINT_CALLER(fname, fxn, line);
2739     + pr_debug(" CI7: dentry/linode=%p:%p "
2740     + "bindex=%d istart/end=%d:%d\n",
2741     + dentry, lower_inode, bindex, istart, iend);
2742     + }
2743     +
2744     + /*
2745     + * If it's a directory, then intermediate objects b/t start/end can
2746     + * be NULL. But, check that all three are NULL: lower dentry, mnt,
2747     + * and inode.
2748     + */
2749     + if (dstart >= 0 && dend >= 0 && S_ISDIR(inode->i_mode))
2750     + for (bindex = dstart+1; bindex < dend; bindex++) {
2751     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2752     + lower_dentry = unionfs_lower_dentry_idx(dentry,
2753     + bindex);
2754     + lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
2755     + if (unlikely(!((lower_inode && lower_dentry &&
2756     + lower_mnt) ||
2757     + (!lower_inode &&
2758     + !lower_dentry && !lower_mnt)))) {
2759     + PRINT_CALLER(fname, fxn, line);
2760     + pr_debug(" Cx: lmnt/ldentry/linode=%p:%p:%p "
2761     + "bindex=%d dstart/end=%d:%d\n",
2762     + lower_mnt, lower_dentry, lower_inode,
2763     + bindex, dstart, dend);
2764     + }
2765     + }
2766     + /* check if lower inode is newer than upper one (it shouldn't) */
2767     + if (unlikely(is_newer_lower(dentry))) {
2768     + PRINT_CALLER(fname, fxn, line);
2769     + for (bindex = ibstart(inode); bindex <= ibend(inode);
2770     + bindex++) {
2771     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2772     + if (unlikely(!lower_inode))
2773     + continue;
2774     + pr_debug(" CI8: bindex=%d mtime/lmtime=%lu.%lu/%lu.%lu "
2775     + "ctime/lctime=%lu.%lu/%lu.%lu\n",
2776     + bindex,
2777     + inode->i_mtime.tv_sec,
2778     + inode->i_mtime.tv_nsec,
2779     + lower_inode->i_mtime.tv_sec,
2780     + lower_inode->i_mtime.tv_nsec,
2781     + inode->i_ctime.tv_sec,
2782     + inode->i_ctime.tv_nsec,
2783     + lower_inode->i_ctime.tv_sec,
2784     + lower_inode->i_ctime.tv_nsec);
2785     + }
2786     + }
2787     +}
2788     +
2789     +void __unionfs_check_file(const struct file *file,
2790     + const char *fname, const char *fxn, int line)
2791     +{
2792     + int bindex;
2793     + int dstart, dend, fstart, fend;
2794     + struct dentry *dentry;
2795     + struct file *lower_file;
2796     + struct inode *inode;
2797     + struct super_block *sb;
2798     + int printed_caller = 0;
2799     +
2800     + BUG_ON(!file);
2801     + dentry = file->f_path.dentry;
2802     + sb = dentry->d_sb;
2803     + dstart = dbstart(dentry);
2804     + dend = dbend(dentry);
2805     + BUG_ON(dstart > dend);
2806     + fstart = fbstart(file);
2807     + fend = fbend(file);
2808     + BUG_ON(fstart > fend);
2809     +
2810     + if (unlikely((fstart == -1 && fend != -1) ||
2811     + (fstart != -1 && fend == -1))) {
2812     + PRINT_CALLER(fname, fxn, line);
2813     + pr_debug(" CF0: file/dentry=%p:%p fstart/end=%d:%d\n",
2814     + file, dentry, fstart, fend);
2815     + }
2816     + if (unlikely(fstart != dstart)) {
2817     + PRINT_CALLER(fname, fxn, line);
2818     + pr_debug(" CF1: file/dentry=%p:%p fstart=%d dstart=%d\n",
2819     + file, dentry, fstart, dstart);
2820     + }
2821     + if (unlikely(fend != dend)) {
2822     + PRINT_CALLER(fname, fxn, line);
2823     + pr_debug(" CF2: file/dentry=%p:%p fend=%d dend=%d\n",
2824     + file, dentry, fend, dend);
2825     + }
2826     + inode = dentry->d_inode;
2827     + if (!S_ISDIR(inode->i_mode)) {
2828     + if (unlikely(fend != fstart)) {
2829     + PRINT_CALLER(fname, fxn, line);
2830     + pr_debug(" CF3: file/inode=%p:%p fstart=%d fend=%d\n",
2831     + file, inode, fstart, fend);
2832     + }
2833     + if (unlikely(dend != dstart)) {
2834     + PRINT_CALLER(fname, fxn, line);
2835     + pr_debug(" CF4: file/dentry=%p:%p dstart=%d dend=%d\n",
2836     + file, dentry, dstart, dend);
2837     + }
2838     + }
2839     +
2840     + /*
2841     + * check for NULL dentries inside the start/end range, or
2842     + * non-NULL dentries outside the start/end range.
2843     + */
2844     + for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2845     + lower_file = unionfs_lower_file_idx(file, bindex);
2846     + if (lower_file) {
2847     + if (unlikely(bindex < fstart || bindex > fend)) {
2848     + PRINT_CALLER(fname, fxn, line);
2849     + pr_debug(" CF5: file/lower=%p:%p bindex=%d "
2850     + "fstart/end=%d:%d\n", file,
2851     + lower_file, bindex, fstart, fend);
2852     + }
2853     + } else { /* lower_file == NULL */
2854     + if (bindex >= fstart && bindex <= fend) {
2855     + /*
2856     + * directories can have NULL lower inodes in
2857     + * b/t start/end, but NOT if at the
2858     + * start/end range.
2859     + */
2860     + if (unlikely(!(S_ISDIR(inode->i_mode) &&
2861     + bindex > fstart &&
2862     + bindex < fend))) {
2863     + PRINT_CALLER(fname, fxn, line);
2864     + pr_debug(" CF6: file/lower=%p:%p "
2865     + "bindex=%d fstart/end=%d:%d\n",
2866     + file, lower_file, bindex,
2867     + fstart, fend);
2868     + }
2869     + }
2870     + }
2871     + }
2872     +
2873     + __unionfs_check_dentry(dentry, fname, fxn, line);
2874     +}
2875     +
2876     +void __unionfs_check_nd(const struct nameidata *nd,
2877     + const char *fname, const char *fxn, int line)
2878     +{
2879     + struct file *file;
2880     + int printed_caller = 0;
2881     +
2882     + if (unlikely(!nd))
2883     + return;
2884     + if (nd->flags & LOOKUP_OPEN) {
2885     + file = nd->intent.open.file;
2886     + if (unlikely(file->f_path.dentry &&
2887     + strcmp(file->f_path.dentry->d_sb->s_type->name,
2888     + UNIONFS_NAME))) {
2889     + PRINT_CALLER(fname, fxn, line);
2890     + pr_debug(" CND1: lower_file of type %s\n",
2891     + file->f_path.dentry->d_sb->s_type->name);
2892     + BUG();
2893     + }
2894     + }
2895     +}
2896     +
2897     +/* useful to track vfsmount leaks that could cause EBUSY on unmount */
2898     +void __show_branch_counts(const struct super_block *sb,
2899     + const char *file, const char *fxn, int line)
2900     +{
2901     + int i;
2902     + struct vfsmount *mnt;
2903     +
2904     + pr_debug("BC:");
2905     + for (i = 0; i < sbmax(sb); i++) {
2906     + if (likely(sb->s_root))
2907     + mnt = UNIONFS_D(sb->s_root)->lower_paths[i].mnt;
2908     + else
2909     + mnt = NULL;
2910     + printk(KERN_CONT "%d:",
2911     + (mnt ? atomic_read(&mnt->mnt_count) : -99));
2912     + }
2913     + printk(KERN_CONT "%s:%s:%d\n", file, fxn, line);
2914     +}
2915     +
2916     +void __show_inode_times(const struct inode *inode,
2917     + const char *file, const char *fxn, int line)
2918     +{
2919     + struct inode *lower_inode;
2920     + int bindex;
2921     +
2922     + for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
2923     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2924     + if (unlikely(!lower_inode))
2925     + continue;
2926     + pr_debug("IT(%lu:%d): %s:%s:%d "
2927     + "um=%lu/%lu lm=%lu/%lu uc=%lu/%lu lc=%lu/%lu\n",
2928     + inode->i_ino, bindex,
2929     + file, fxn, line,
2930     + inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
2931     + lower_inode->i_mtime.tv_sec,
2932     + lower_inode->i_mtime.tv_nsec,
2933     + inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
2934     + lower_inode->i_ctime.tv_sec,
2935     + lower_inode->i_ctime.tv_nsec);
2936     + }
2937     +}
2938     +
2939     +void __show_dinode_times(const struct dentry *dentry,
2940     + const char *file, const char *fxn, int line)
2941     +{
2942     + struct inode *inode = dentry->d_inode;
2943     + struct inode *lower_inode;
2944     + int bindex;
2945     +
2946     + for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
2947     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2948     + if (!lower_inode)
2949     + continue;
2950     + pr_debug("DT(%s:%lu:%d): %s:%s:%d "
2951     + "um=%lu/%lu lm=%lu/%lu uc=%lu/%lu lc=%lu/%lu\n",
2952     + dentry->d_name.name, inode->i_ino, bindex,
2953     + file, fxn, line,
2954     + inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
2955     + lower_inode->i_mtime.tv_sec,
2956     + lower_inode->i_mtime.tv_nsec,
2957     + inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
2958     + lower_inode->i_ctime.tv_sec,
2959     + lower_inode->i_ctime.tv_nsec);
2960     + }
2961     +}
2962     +
2963     +void __show_inode_counts(const struct inode *inode,
2964     + const char *file, const char *fxn, int line)
2965     +{
2966     + struct inode *lower_inode;
2967     + int bindex;
2968     +
2969     + if (unlikely(!inode)) {
2970     + pr_debug("SiC: Null inode\n");
2971     + return;
2972     + }
2973     + for (bindex = sbstart(inode->i_sb); bindex <= sbend(inode->i_sb);
2974     + bindex++) {
2975     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2976     + if (unlikely(!lower_inode))
2977     + continue;
2978     + pr_debug("SIC(%lu:%d:%d): lc=%d %s:%s:%d\n",
2979     + inode->i_ino, bindex,
2980     + atomic_read(&(inode)->i_count),
2981     + atomic_read(&(lower_inode)->i_count),
2982     + file, fxn, line);
2983     + }
2984     +}
2985     diff --git a/fs/unionfs/dentry.c b/fs/unionfs/dentry.c
2986     new file mode 100644
2987     index 0000000..cd15243
2988     --- /dev/null
2989     +++ b/fs/unionfs/dentry.c
2990     @@ -0,0 +1,548 @@
2991     +/*
2992     + * Copyright (c) 2003-2007 Erez Zadok
2993     + * Copyright (c) 2003-2006 Charles P. Wright
2994     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
2995     + * Copyright (c) 2005-2006 Junjiro Okajima
2996     + * Copyright (c) 2005 Arun M. Krishnakumar
2997     + * Copyright (c) 2004-2006 David P. Quigley
2998     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
2999     + * Copyright (c) 2003 Puja Gupta
3000     + * Copyright (c) 2003 Harikesavan Krishnan
3001     + * Copyright (c) 2003-2007 Stony Brook University
3002     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
3003     + *
3004     + * This program is free software; you can redistribute it and/or modify
3005     + * it under the terms of the GNU General Public License version 2 as
3006     + * published by the Free Software Foundation.
3007     + */
3008     +
3009     +#include "union.h"
3010     +
3011     +/*
3012     + * Revalidate a single dentry.
3013     + * Assume that dentry's info node is locked.
3014     + * Assume that parent(s) are all valid already, but
3015     + * the child may not yet be valid.
3016     + * Returns true if valid, false otherwise.
3017     + */
3018     +static bool __unionfs_d_revalidate_one(struct dentry *dentry,
3019     + struct nameidata *nd)
3020     +{
3021     + bool valid = true; /* default is valid */
3022     + struct dentry *lower_dentry;
3023     + int bindex, bstart, bend;
3024     + int sbgen, dgen;
3025     + int positive = 0;
3026     + int interpose_flag;
3027     + struct nameidata lowernd; /* TODO: be gentler to the stack */
3028     +
3029     + if (nd)
3030     + memcpy(&lowernd, nd, sizeof(struct nameidata));
3031     + else
3032     + memset(&lowernd, 0, sizeof(struct nameidata));
3033     +
3034     + verify_locked(dentry);
3035     + verify_locked(dentry->d_parent);
3036     +
3037     + /* if the dentry is unhashed, do NOT revalidate */
3038     + if (d_deleted(dentry))
3039     + goto out;
3040     +
3041     + BUG_ON(dbstart(dentry) == -1);
3042     + if (dentry->d_inode)
3043     + positive = 1;
3044     + dgen = atomic_read(&UNIONFS_D(dentry)->generation);
3045     + sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
3046     + /*
3047     + * If we are working on an unconnected dentry, then there is no
3048     + * revalidation to be done, because this file does not exist within
3049     + * the namespace, and Unionfs operates on the namespace, not data.
3050     + */
3051     + if (unlikely(sbgen != dgen)) {
3052     + struct dentry *result;
3053     + int pdgen;
3054     +
3055     + /* The root entry should always be valid */
3056     + BUG_ON(IS_ROOT(dentry));
3057     +
3058     + /* We can't work correctly if our parent isn't valid. */
3059     + pdgen = atomic_read(&UNIONFS_D(dentry->d_parent)->generation);
3060     + BUG_ON(pdgen != sbgen); /* should never happen here */
3061     +
3062     + /* Free the pointers for our inodes and this dentry. */
3063     + bstart = dbstart(dentry);
3064     + bend = dbend(dentry);
3065     + if (bstart >= 0) {
3066     + struct dentry *lower_dentry;
3067     + for (bindex = bstart; bindex <= bend; bindex++) {
3068     + lower_dentry =
3069     + unionfs_lower_dentry_idx(dentry,
3070     + bindex);
3071     + dput(lower_dentry);
3072     + }
3073     + }
3074     + set_dbstart(dentry, -1);
3075     + set_dbend(dentry, -1);
3076     +
3077     + interpose_flag = INTERPOSE_REVAL_NEG;
3078     + if (positive) {
3079     + interpose_flag = INTERPOSE_REVAL;
3080     +
3081     + bstart = ibstart(dentry->d_inode);
3082     + bend = ibend(dentry->d_inode);
3083     + if (bstart >= 0) {
3084     + struct inode *lower_inode;
3085     + for (bindex = bstart; bindex <= bend;
3086     + bindex++) {
3087     + lower_inode =
3088     + unionfs_lower_inode_idx(
3089     + dentry->d_inode,
3090     + bindex);
3091     + iput(lower_inode);
3092     + }
3093     + }
3094     + kfree(UNIONFS_I(dentry->d_inode)->lower_inodes);
3095     + UNIONFS_I(dentry->d_inode)->lower_inodes = NULL;
3096     + ibstart(dentry->d_inode) = -1;
3097     + ibend(dentry->d_inode) = -1;
3098     + }
3099     +
3100     + result = unionfs_lookup_backend(dentry, &lowernd,
3101     + interpose_flag);
3102     + if (result) {
3103     + if (IS_ERR(result)) {
3104     + valid = false;
3105     + goto out;
3106     + }
3107     + /*
3108     + * current unionfs_lookup_backend() doesn't return
3109     + * a valid dentry
3110     + */
3111     + dput(dentry);
3112     + dentry = result;
3113     + }
3114     +
3115     + if (unlikely(positive && UNIONFS_I(dentry->d_inode)->stale)) {
3116     + make_bad_inode(dentry->d_inode);
3117     + d_drop(dentry);
3118     + valid = false;
3119     + goto out;
3120     + }
3121     + goto out;
3122     + }
3123     +
3124     + /* The revalidation must occur across all branches */
3125     + bstart = dbstart(dentry);
3126     + bend = dbend(dentry);
3127     + BUG_ON(bstart == -1);
3128     + for (bindex = bstart; bindex <= bend; bindex++) {
3129     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3130     + if (!lower_dentry || !lower_dentry->d_op
3131     + || !lower_dentry->d_op->d_revalidate)
3132     + continue;
3133     + /*
3134     + * Don't pass nameidata to lower file system, because we
3135     + * don't want an arbitrary lower file being opened or
3136     + * returned to us: it may be useless to us because of the
3137     + * fanout nature of unionfs (cf. file/directory open-file
3138     + * invariants). We will open lower files as and when needed
3139     + * later on.
3140     + */
3141     + if (!lower_dentry->d_op->d_revalidate(lower_dentry, NULL))
3142     + valid = false;
3143     + }
3144     +
3145     + if (!dentry->d_inode ||
3146     + ibstart(dentry->d_inode) < 0 ||
3147     + ibend(dentry->d_inode) < 0) {
3148     + valid = false;
3149     + goto out;
3150     + }
3151     +
3152     + if (valid) {
3153     + /*
3154     + * If we get here, and we copy the meta-data from the lower
3155     + * inode to our inode, then it is vital that we have already
3156     + * purged all unionfs-level file data. We do that in the
3157     + * caller (__unionfs_d_revalidate_chain) by calling
3158     + * purge_inode_data.
3159     + */
3160     + unionfs_copy_attr_all(dentry->d_inode,
3161     + unionfs_lower_inode(dentry->d_inode));
3162     + fsstack_copy_inode_size(dentry->d_inode,
3163     + unionfs_lower_inode(dentry->d_inode));
3164     + }
3165     +
3166     +out:
3167     + return valid;
3168     +}
3169     +
3170     +/*
3171     + * Determine if the lower inode objects have changed from below the unionfs
3172     + * inode. Return true if changed, false otherwise.
3173     + *
3174     + * We check if the mtime or ctime have changed. However, the inode times
3175     + * can be changed by anyone without much protection, including
3176     + * asynchronously. This can sometimes cause unionfs to find that the lower
3177     + * file system doesn't change its inode times quick enough, resulting in a
3178     + * false positive indication (which is harmless, it just makes unionfs do
3179     + * extra work in re-validating the objects). To minimize the chances of
3180     + * these situations, we still consider such small time changes valid, but we
3181     + * don't print debugging messages unless the time changes are greater than
3182     + * UNIONFS_MIN_CC_TIME (which defaults to 3 seconds, as with NFS's acregmin)
3183     + * because significant changes are more likely due to users manually
3184     + * touching lower files.
3185     + */
3186     +bool is_newer_lower(const struct dentry *dentry)
3187     +{
3188     + int bindex;
3189     + struct inode *inode;
3190     + struct inode *lower_inode;
3191     +
3192     + /* ignore if we're called on semi-initialized dentries/inodes */
3193     + if (!dentry || !UNIONFS_D(dentry))
3194     + return false;
3195     + inode = dentry->d_inode;
3196     + if (!inode || !UNIONFS_I(inode)->lower_inodes ||
3197     + ibstart(inode) < 0 || ibend(inode) < 0)
3198     + return false;
3199     +
3200     + for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
3201     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
3202     + if (!lower_inode)
3203     + continue;
3204     +
3205     + /* check if mtime/ctime have changed */
3206     + if (unlikely(timespec_compare(&inode->i_mtime,
3207     + &lower_inode->i_mtime) < 0)) {
3208     + if ((lower_inode->i_mtime.tv_sec -
3209     + inode->i_mtime.tv_sec) > UNIONFS_MIN_CC_TIME) {
3210     + pr_info("unionfs: new lower inode mtime "
3211     + "(bindex=%d, name=%s)\n", bindex,
3212     + dentry->d_name.name);
3213     + show_dinode_times(dentry);
3214     + }
3215     + return true;
3216     + }
3217     + if (unlikely(timespec_compare(&inode->i_ctime,
3218     + &lower_inode->i_ctime) < 0)) {
3219     + if ((lower_inode->i_ctime.tv_sec -
3220     + inode->i_ctime.tv_sec) > UNIONFS_MIN_CC_TIME) {
3221     + pr_info("unionfs: new lower inode ctime "
3222     + "(bindex=%d, name=%s)\n", bindex,
3223     + dentry->d_name.name);
3224     + show_dinode_times(dentry);
3225     + }
3226     + return true;
3227     + }
3228     + }
3229     + return false; /* default: lower is not newer */
3230     +}
3231     +
3232     +/*
3233     + * Purge and invalidate as many data pages of a unionfs inode. This is
3234     + * called when the lower inode has changed, and we want to force processes
3235     + * to re-get the new data.
3236     + */
3237     +static inline void purge_inode_data(struct inode *inode)
3238     +{
3239     + /* remove all non-private mappings */
3240     + unmap_mapping_range(inode->i_mapping, 0, 0, 0);
3241     + /* invalidate as many pages as possible */
3242     + invalidate_mapping_pages(inode->i_mapping, 0, -1);
3243     + /*
3244     + * Don't try to truncate_inode_pages here, because this could lead
3245     + * to a deadlock between some of address_space ops and dentry
3246     + * revalidation: the address space op is invoked with a lock on our
3247     + * own page, and truncate_inode_pages will block on locked pages.
3248     + */
3249     +}
3250     +
3251     +void purge_sb_data(struct super_block *sb)
3252     +{
3253     + struct inode *inode;
3254     +
3255     + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
3256     + if (inode->i_state & (I_FREEING|I_WILL_FREE))
3257     + continue;
3258     + purge_inode_data(inode);
3259     + }
3260     +}
3261     +
3262     +/*
3263     + * Revalidate a parent chain of dentries, then the actual node.
3264     + * Assumes that dentry is locked, but will lock all parents if/when needed.
3265     + *
3266     + * If 'willwrite' is true, and the lower inode times are not in sync, then
3267     + * *don't* purge_inode_data, as it could deadlock if ->write calls us and we
3268     + * try to truncate a locked page. Besides, if unionfs is about to write
3269     + * data to a file, then there's the data unionfs is about to write is more
3270     + * authoritative than what's below, therefore we can safely overwrite the
3271     + * lower inode times and data.
3272     + */
3273     +bool __unionfs_d_revalidate_chain(struct dentry *dentry, struct nameidata *nd,
3274     + bool willwrite)
3275     +{
3276     + bool valid = false; /* default is invalid */
3277     + struct dentry **chain = NULL; /* chain of dentries to reval */
3278     + int chain_len = 0;
3279     + struct dentry *dtmp;
3280     + int sbgen, dgen, i;
3281     + int saved_bstart, saved_bend, bindex;
3282     +
3283     + /* find length of chain needed to revalidate */
3284     + /* XXX: should I grab some global (dcache?) lock? */
3285     + chain_len = 0;
3286     + sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
3287     + dtmp = dentry->d_parent;
3288     + if (dentry != dtmp)
3289     + unionfs_lock_dentry(dtmp, UNIONFS_DMUTEX_REVAL_PARENT);
3290     + dgen = atomic_read(&UNIONFS_D(dtmp)->generation);
3291     + /* XXX: should we check if is_newer_lower all the way up? */
3292     + if (unlikely(is_newer_lower(dtmp))) {
3293     + /*
3294     + * Special case: the root dentry's generation number must
3295     + * always be valid, but its lower inode times don't have to
3296     + * be, so sync up the times only.
3297     + */
3298     + if (IS_ROOT(dtmp)) {
3299     + unionfs_copy_attr_times(dtmp->d_inode);
3300     + } else {
3301     + /*
3302     + * reset generation number to zero, guaranteed to be
3303     + * "old"
3304     + */
3305     + dgen = 0;
3306     + atomic_set(&UNIONFS_D(dtmp)->generation, dgen);
3307     + }
3308     + purge_inode_data(dtmp->d_inode);
3309     + }
3310     + if (dentry != dtmp)
3311     + unionfs_unlock_dentry(dtmp);
3312     + while (sbgen != dgen) {
3313     + /* The root entry should always be valid */
3314     + BUG_ON(IS_ROOT(dtmp));
3315     + chain_len++;
3316     + dtmp = dtmp->d_parent;
3317     + dgen = atomic_read(&UNIONFS_D(dtmp)->generation);
3318     + }
3319     + if (chain_len == 0)
3320     + goto out_this; /* shortcut if parents are OK */
3321     +
3322     + /*
3323     + * Allocate array of dentries to reval. We could use linked lists,
3324     + * but the number of entries we need to alloc here is often small,
3325     + * and short lived, so locality will be better.
3326     + */
3327     + chain = kzalloc(chain_len * sizeof(struct dentry *), GFP_KERNEL);
3328     + if (unlikely(!chain)) {
3329     + printk(KERN_CRIT "unionfs: no more memory in %s\n",
3330     + __FUNCTION__);
3331     + goto out;
3332     + }
3333     +
3334     + /*
3335     + * lock all dentries in chain, in child to parent order.
3336     + * if failed, then sleep for a little, then retry.
3337     + */
3338     + dtmp = dentry->d_parent;
3339     + for (i = chain_len-1; i >= 0; i--) {
3340     + chain[i] = dget(dtmp);
3341     + dtmp = dtmp->d_parent;
3342     + }
3343     +
3344     + /*
3345     + * call __unionfs_d_revalidate_one() on each dentry, but in parent
3346     + * to child order.
3347     + */
3348     + for (i = 0; i < chain_len; i++) {
3349     + unionfs_lock_dentry(chain[i], UNIONFS_DMUTEX_REVAL_CHILD);
3350     + if (chain[i] != chain[i]->d_parent)
3351     + unionfs_lock_dentry(chain[i]->d_parent,
3352     + UNIONFS_DMUTEX_REVAL_PARENT);
3353     + saved_bstart = dbstart(chain[i]);
3354     + saved_bend = dbend(chain[i]);
3355     + sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
3356     + dgen = atomic_read(&UNIONFS_D(chain[i])->generation);
3357     +
3358     + valid = __unionfs_d_revalidate_one(chain[i], nd);
3359     + /* XXX: is this the correct mntput condition?! */
3360     + if (valid && chain_len > 0 &&
3361     + sbgen != dgen && chain[i]->d_inode &&
3362     + S_ISDIR(chain[i]->d_inode->i_mode)) {
3363     + for (bindex = saved_bstart; bindex <= saved_bend;
3364     + bindex++)
3365     + unionfs_mntput(chain[i], bindex);
3366     + }
3367     + if (chain[i] != chain[i]->d_parent)
3368     + unionfs_unlock_dentry(chain[i]->d_parent);
3369     + unionfs_unlock_dentry(chain[i]);
3370     +
3371     + if (unlikely(!valid))
3372     + goto out_free;
3373     + }
3374     +
3375     +
3376     +out_this:
3377     + /* finally, lock this dentry and revalidate it */
3378     + verify_locked(dentry);
3379     + if (dentry != dentry->d_parent)
3380     + unionfs_lock_dentry(dentry->d_parent,
3381     + UNIONFS_DMUTEX_REVAL_PARENT);
3382     + dgen = atomic_read(&UNIONFS_D(dentry)->generation);
3383     +
3384     + if (unlikely(is_newer_lower(dentry))) {
3385     + /* root dentry special case as aforementioned */
3386     + if (IS_ROOT(dentry)) {
3387     + unionfs_copy_attr_times(dentry->d_inode);
3388     + } else {
3389     + /*
3390     + * reset generation number to zero, guaranteed to be
3391     + * "old"
3392     + */
3393     + dgen = 0;
3394     + atomic_set(&UNIONFS_D(dentry)->generation, dgen);
3395     + }
3396     + if (!willwrite)
3397     + purge_inode_data(dentry->d_inode);
3398     + }
3399     + valid = __unionfs_d_revalidate_one(dentry, nd);
3400     + if (dentry != dentry->d_parent)
3401     + unionfs_unlock_dentry(dentry->d_parent);
3402     +
3403     + /*
3404     + * If __unionfs_d_revalidate_one() succeeded above, then it will
3405     + * have incremented the refcnt of the mnt's, but also the branch
3406     + * indices of the dentry will have been updated (to take into
3407     + * account any branch insertions/deletion. So the current
3408     + * dbstart/dbend match the current, and new, indices of the mnts
3409     + * which __unionfs_d_revalidate_one has incremented. Note: the "if"
3410     + * test below does not depend on whether chain_len was 0 or greater.
3411     + */
3412     + if (valid && sbgen != dgen)
3413     + for (bindex = dbstart(dentry);
3414     + bindex <= dbend(dentry);
3415     + bindex++)
3416     + unionfs_mntput(dentry, bindex);
3417     +
3418     +out_free:
3419     + /* unlock/dput all dentries in chain and return status */
3420     + if (chain_len > 0) {
3421     + for (i = 0; i < chain_len; i++)
3422     + dput(chain[i]);
3423     + kfree(chain);
3424     + }
3425     +out:
3426     + return valid;
3427     +}
3428     +
3429     +static int unionfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
3430     +{
3431     + int err;
3432     +
3433     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
3434     +
3435     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3436     + err = __unionfs_d_revalidate_chain(dentry, nd, false);
3437     + if (likely(err > 0)) { /* true==1: dentry is valid */
3438     + unionfs_check_dentry(dentry);
3439     + unionfs_check_nd(nd);
3440     + }
3441     + unionfs_unlock_dentry(dentry);
3442     +
3443     + unionfs_read_unlock(dentry->d_sb);
3444     +
3445     + return err;
3446     +}
3447     +
3448     +/*
3449     + * At this point no one can reference this dentry, so we don't have to be
3450     + * careful about concurrent access.
3451     + */
3452     +static void unionfs_d_release(struct dentry *dentry)
3453     +{
3454     + int bindex, bstart, bend;
3455     +
3456     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
3457     +
3458     + unionfs_check_dentry(dentry);
3459     + /* this could be a negative dentry, so check first */
3460     + if (unlikely(!UNIONFS_D(dentry))) {
3461     + printk(KERN_ERR "unionfs: dentry without private data: %.*s\n",
3462     + dentry->d_name.len, dentry->d_name.name);
3463     + goto out;
3464     + } else if (dbstart(dentry) < 0)
3465     + goto out_free; /* due to a (normal) failed lookup */
3466     +
3467     + /* Release all the lower dentries */
3468     + bstart = dbstart(dentry);
3469     + bend = dbend(dentry);
3470     + for (bindex = bstart; bindex <= bend; bindex++) {
3471     + dput(unionfs_lower_dentry_idx(dentry, bindex));
3472     + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
3473     + /* NULL lower mnt is ok if this is a negative dentry */
3474     + if (!dentry->d_inode && !unionfs_lower_mnt_idx(dentry, bindex))
3475     + continue;
3476     + unionfs_mntput(dentry, bindex);
3477     + unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
3478     + }
3479     + /* free private data (unionfs_dentry_info) here */
3480     + kfree(UNIONFS_D(dentry)->lower_paths);
3481     + UNIONFS_D(dentry)->lower_paths = NULL;
3482     +
3483     +out_free:
3484     + /* No need to unlock it, because it is disappeared. */
3485     + free_dentry_private_data(dentry);
3486     +
3487     +out:
3488     + unionfs_read_unlock(dentry->d_sb);
3489     + return;
3490     +}
3491     +
3492     +/*
3493     + * Called when we're removing the last reference to our dentry. So we
3494     + * should drop all lower references too.
3495     + */
3496     +static void unionfs_d_iput(struct dentry *dentry, struct inode *inode)
3497     +{
3498     + int bindex, rc;
3499     +
3500     + BUG_ON(!dentry);
3501     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
3502     +
3503     + if (!UNIONFS_D(dentry) || dbstart(dentry) < 0)
3504     + goto drop_lower_inodes;
3505     + for (bindex = dbstart(dentry); bindex <= dbend(dentry); bindex++) {
3506     + if (unionfs_lower_mnt_idx(dentry, bindex)) {
3507     + unionfs_mntput(dentry, bindex);
3508     + unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
3509     + }
3510     + if (unionfs_lower_dentry_idx(dentry, bindex)) {
3511     + dput(unionfs_lower_dentry_idx(dentry, bindex));
3512     + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
3513     + }
3514     + }
3515     + set_dbstart(dentry, -1);
3516     + set_dbend(dentry, -1);
3517     +
3518     +drop_lower_inodes:
3519     + rc = atomic_read(&inode->i_count);
3520     + if (rc == 1 && inode->i_nlink == 1 && ibstart(inode) >= 0) {
3521     + /* see Documentation/filesystems/unionfs/issues.txt */
3522     + lockdep_off();
3523     + iput(unionfs_lower_inode(inode));
3524     + lockdep_on();
3525     + unionfs_set_lower_inode(inode, NULL);
3526     + /* XXX: may need to set start/end to -1? */
3527     + }
3528     +
3529     + iput(inode);
3530     +
3531     + unionfs_read_unlock(dentry->d_sb);
3532     +}
3533     +
3534     +struct dentry_operations unionfs_dops = {
3535     + .d_revalidate = unionfs_d_revalidate,
3536     + .d_release = unionfs_d_release,
3537     + .d_iput = unionfs_d_iput,
3538     +};
3539     diff --git a/fs/unionfs/dirfops.c b/fs/unionfs/dirfops.c
3540     new file mode 100644
3541     index 0000000..a613862
3542     --- /dev/null
3543     +++ b/fs/unionfs/dirfops.c
3544     @@ -0,0 +1,290 @@
3545     +/*
3546     + * Copyright (c) 2003-2007 Erez Zadok
3547     + * Copyright (c) 2003-2006 Charles P. Wright
3548     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
3549     + * Copyright (c) 2005-2006 Junjiro Okajima
3550     + * Copyright (c) 2005 Arun M. Krishnakumar
3551     + * Copyright (c) 2004-2006 David P. Quigley
3552     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
3553     + * Copyright (c) 2003 Puja Gupta
3554     + * Copyright (c) 2003 Harikesavan Krishnan
3555     + * Copyright (c) 2003-2007 Stony Brook University
3556     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
3557     + *
3558     + * This program is free software; you can redistribute it and/or modify
3559     + * it under the terms of the GNU General Public License version 2 as
3560     + * published by the Free Software Foundation.
3561     + */
3562     +
3563     +#include "union.h"
3564     +
3565     +/* Make sure our rdstate is playing by the rules. */
3566     +static void verify_rdstate_offset(struct unionfs_dir_state *rdstate)
3567     +{
3568     + BUG_ON(rdstate->offset >= DIREOF);
3569     + BUG_ON(rdstate->cookie >= MAXRDCOOKIE);
3570     +}
3571     +
3572     +struct unionfs_getdents_callback {
3573     + struct unionfs_dir_state *rdstate;
3574     + void *dirent;
3575     + int entries_written;
3576     + int filldir_called;
3577     + int filldir_error;
3578     + filldir_t filldir;
3579     + struct super_block *sb;
3580     +};
3581     +
3582     +/* based on generic filldir in fs/readir.c */
3583     +static int unionfs_filldir(void *dirent, const char *name, int namelen,
3584     + loff_t offset, u64 ino, unsigned int d_type)
3585     +{
3586     + struct unionfs_getdents_callback *buf = dirent;
3587     + struct filldir_node *found = NULL;
3588     + int err = 0;
3589     + int is_wh_entry = 0;
3590     +
3591     + buf->filldir_called++;
3592     +
3593     + if ((namelen > UNIONFS_WHLEN) &&
3594     + !strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN)) {
3595     + name += UNIONFS_WHLEN;
3596     + namelen -= UNIONFS_WHLEN;
3597     + is_wh_entry = 1;
3598     + }
3599     +
3600     + found = find_filldir_node(buf->rdstate, name, namelen, is_wh_entry);
3601     +
3602     + if (found) {
3603     + /*
3604     + * If we had non-whiteout entry in dir cache, then mark it
3605     + * as a whiteout and but leave it in the dir cache.
3606     + */
3607     + if (is_wh_entry && !found->whiteout)
3608     + found->whiteout = is_wh_entry;
3609     + goto out;
3610     + }
3611     +
3612     + /* if 'name' isn't a whiteout, filldir it. */
3613     + if (!is_wh_entry) {
3614     + off_t pos = rdstate2offset(buf->rdstate);
3615     + u64 unionfs_ino = ino;
3616     +
3617     + err = buf->filldir(buf->dirent, name, namelen, pos,
3618     + unionfs_ino, d_type);
3619     + buf->rdstate->offset++;
3620     + verify_rdstate_offset(buf->rdstate);
3621     + }
3622     + /*
3623     + * If we did fill it, stuff it in our hash, otherwise return an
3624     + * error.
3625     + */
3626     + if (err) {
3627     + buf->filldir_error = err;
3628     + goto out;
3629     + }
3630     + buf->entries_written++;
3631     + err = add_filldir_node(buf->rdstate, name, namelen,
3632     + buf->rdstate->bindex, is_wh_entry);
3633     + if (err)
3634     + buf->filldir_error = err;
3635     +
3636     +out:
3637     + return err;
3638     +}
3639     +
3640     +static int unionfs_readdir(struct file *file, void *dirent, filldir_t filldir)
3641     +{
3642     + int err = 0;
3643     + struct file *lower_file = NULL;
3644     + struct inode *inode = NULL;
3645     + struct unionfs_getdents_callback buf;
3646     + struct unionfs_dir_state *uds;
3647     + int bend;
3648     + loff_t offset;
3649     +
3650     + unionfs_read_lock(file->f_path.dentry->d_sb, UNIONFS_SMUTEX_PARENT);
3651     +
3652     + err = unionfs_file_revalidate(file, false);
3653     + if (unlikely(err))
3654     + goto out;
3655     +
3656     + inode = file->f_path.dentry->d_inode;
3657     +
3658     + uds = UNIONFS_F(file)->rdstate;
3659     + if (!uds) {
3660     + if (file->f_pos == DIREOF) {
3661     + goto out;
3662     + } else if (file->f_pos > 0) {
3663     + uds = find_rdstate(inode, file->f_pos);
3664     + if (unlikely(!uds)) {
3665     + err = -ESTALE;
3666     + goto out;
3667     + }
3668     + UNIONFS_F(file)->rdstate = uds;
3669     + } else {
3670     + init_rdstate(file);
3671     + uds = UNIONFS_F(file)->rdstate;
3672     + }
3673     + }
3674     + bend = fbend(file);
3675     +
3676     + while (uds->bindex <= bend) {
3677     + lower_file = unionfs_lower_file_idx(file, uds->bindex);
3678     + if (!lower_file) {
3679     + uds->bindex++;
3680     + uds->dirpos = 0;
3681     + continue;
3682     + }
3683     +
3684     + /* prepare callback buffer */
3685     + buf.filldir_called = 0;
3686     + buf.filldir_error = 0;
3687     + buf.entries_written = 0;
3688     + buf.dirent = dirent;
3689     + buf.filldir = filldir;
3690     + buf.rdstate = uds;
3691     + buf.sb = inode->i_sb;
3692     +
3693     + /* Read starting from where we last left off. */
3694     + offset = vfs_llseek(lower_file, uds->dirpos, SEEK_SET);
3695     + if (offset < 0) {
3696     + err = offset;
3697     + goto out;
3698     + }
3699     + err = vfs_readdir(lower_file, unionfs_filldir, &buf);
3700     +
3701     + /* Save the position for when we continue. */
3702     + offset = vfs_llseek(lower_file, 0, SEEK_CUR);
3703     + if (offset < 0) {
3704     + err = offset;
3705     + goto out;
3706     + }
3707     + uds->dirpos = offset;
3708     +
3709     + /* Copy the atime. */
3710     + fsstack_copy_attr_atime(inode,
3711     + lower_file->f_path.dentry->d_inode);
3712     +
3713     + if (err < 0)
3714     + goto out;
3715     +
3716     + if (buf.filldir_error)
3717     + break;
3718     +
3719     + if (!buf.entries_written) {
3720     + uds->bindex++;
3721     + uds->dirpos = 0;
3722     + }
3723     + }
3724     +
3725     + if (!buf.filldir_error && uds->bindex >= bend) {
3726     + /* Save the number of hash entries for next time. */
3727     + UNIONFS_I(inode)->hashsize = uds->hashentries;
3728     + free_rdstate(uds);
3729     + UNIONFS_F(file)->rdstate = NULL;
3730     + file->f_pos = DIREOF;
3731     + } else {
3732     + file->f_pos = rdstate2offset(uds);
3733     + }
3734     +
3735     +out:
3736     + unionfs_read_unlock(file->f_path.dentry->d_sb);
3737     + return err;
3738     +}
3739     +
3740     +/*
3741     + * This is not meant to be a generic repositioning function. If you do
3742     + * things that aren't supported, then we return EINVAL.
3743     + *
3744     + * What is allowed:
3745     + * (1) seeking to the same position that you are currently at
3746     + * This really has no effect, but returns where you are.
3747     + * (2) seeking to the beginning of the file
3748     + * This throws out all state, and lets you begin again.
3749     + */
3750     +static loff_t unionfs_dir_llseek(struct file *file, loff_t offset, int origin)
3751     +{
3752     + struct unionfs_dir_state *rdstate;
3753     + loff_t err;
3754     +
3755     + unionfs_read_lock(file->f_path.dentry->d_sb, UNIONFS_SMUTEX_PARENT);
3756     +
3757     + err = unionfs_file_revalidate(file, false);
3758     + if (unlikely(err))
3759     + goto out;
3760     +
3761     + rdstate = UNIONFS_F(file)->rdstate;
3762     +
3763     + /*
3764     + * we let users seek to their current position, but not anywhere
3765     + * else.
3766     + */
3767     + if (!offset) {
3768     + switch (origin) {
3769     + case SEEK_SET:
3770     + if (rdstate) {
3771     + free_rdstate(rdstate);
3772     + UNIONFS_F(file)->rdstate = NULL;
3773     + }
3774     + init_rdstate(file);
3775     + err = 0;
3776     + break;
3777     + case SEEK_CUR:
3778     + err = file->f_pos;
3779     + break;
3780     + case SEEK_END:
3781     + /* Unsupported, because we would break everything. */
3782     + err = -EINVAL;
3783     + break;
3784     + }
3785     + } else {
3786     + switch (origin) {
3787     + case SEEK_SET:
3788     + if (rdstate) {
3789     + if (offset == rdstate2offset(rdstate))
3790     + err = offset;
3791     + else if (file->f_pos == DIREOF)
3792     + err = DIREOF;
3793     + else
3794     + err = -EINVAL;
3795     + } else {
3796     + struct inode *inode;
3797     + inode = file->f_path.dentry->d_inode;
3798     + rdstate = find_rdstate(inode, offset);
3799     + if (rdstate) {
3800     + UNIONFS_F(file)->rdstate = rdstate;
3801     + err = rdstate->offset;
3802     + } else {
3803     + err = -EINVAL;
3804     + }
3805     + }
3806     + break;
3807     + case SEEK_CUR:
3808     + case SEEK_END:
3809     + /* Unsupported, because we would break everything. */
3810     + err = -EINVAL;
3811     + break;
3812     + }
3813     + }
3814     +
3815     +out:
3816     + unionfs_read_unlock(file->f_path.dentry->d_sb);
3817     + return err;
3818     +}
3819     +
3820     +/*
3821     + * Trimmed directory options, we shouldn't pass everything down since
3822     + * we don't want to operate on partial directories.
3823     + */
3824     +struct file_operations unionfs_dir_fops = {
3825     + .llseek = unionfs_dir_llseek,
3826     + .read = generic_read_dir,
3827     + .readdir = unionfs_readdir,
3828     + .unlocked_ioctl = unionfs_ioctl,
3829     + .open = unionfs_open,
3830     + .release = unionfs_file_release,
3831     + .flush = unionfs_flush,
3832     + .fsync = unionfs_fsync,
3833     + .fasync = unionfs_fasync,
3834     +};
3835     diff --git a/fs/unionfs/dirhelper.c b/fs/unionfs/dirhelper.c
3836     new file mode 100644
3837     index 0000000..4b73bb6
3838     --- /dev/null
3839     +++ b/fs/unionfs/dirhelper.c
3840     @@ -0,0 +1,267 @@
3841     +/*
3842     + * Copyright (c) 2003-2007 Erez Zadok
3843     + * Copyright (c) 2003-2006 Charles P. Wright
3844     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
3845     + * Copyright (c) 2005-2006 Junjiro Okajima
3846     + * Copyright (c) 2005 Arun M. Krishnakumar
3847     + * Copyright (c) 2004-2006 David P. Quigley
3848     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
3849     + * Copyright (c) 2003 Puja Gupta
3850     + * Copyright (c) 2003 Harikesavan Krishnan
3851     + * Copyright (c) 2003-2007 Stony Brook University
3852     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
3853     + *
3854     + * This program is free software; you can redistribute it and/or modify
3855     + * it under the terms of the GNU General Public License version 2 as
3856     + * published by the Free Software Foundation.
3857     + */
3858     +
3859     +#include "union.h"
3860     +
3861     +/*
3862     + * Delete all of the whiteouts in a given directory for rmdir.
3863     + *
3864     + * lower directory inode should be locked
3865     + */
3866     +int do_delete_whiteouts(struct dentry *dentry, int bindex,
3867     + struct unionfs_dir_state *namelist)
3868     +{
3869     + int err = 0;
3870     + struct dentry *lower_dir_dentry = NULL;
3871     + struct dentry *lower_dentry;
3872     + char *name = NULL, *p;
3873     + struct inode *lower_dir;
3874     + int i;
3875     + struct list_head *pos;
3876     + struct filldir_node *cursor;
3877     +
3878     + /* Find out lower parent dentry */
3879     + lower_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3880     + BUG_ON(!S_ISDIR(lower_dir_dentry->d_inode->i_mode));
3881     + lower_dir = lower_dir_dentry->d_inode;
3882     + BUG_ON(!S_ISDIR(lower_dir->i_mode));
3883     +
3884     + err = -ENOMEM;
3885     + name = __getname();
3886     + if (unlikely(!name))
3887     + goto out;
3888     + strcpy(name, UNIONFS_WHPFX);
3889     + p = name + UNIONFS_WHLEN;
3890     +
3891     + err = 0;
3892     + for (i = 0; !err && i < namelist->size; i++) {
3893     + list_for_each(pos, &namelist->list[i]) {
3894     + cursor =
3895     + list_entry(pos, struct filldir_node,
3896     + file_list);
3897     + /* Only operate on whiteouts in this branch. */
3898     + if (cursor->bindex != bindex)
3899     + continue;
3900     + if (!cursor->whiteout)
3901     + continue;
3902     +
3903     + strcpy(p, cursor->name);
3904     + lower_dentry =
3905     + lookup_one_len(name, lower_dir_dentry,
3906     + cursor->namelen +
3907     + UNIONFS_WHLEN);
3908     + if (IS_ERR(lower_dentry)) {
3909     + err = PTR_ERR(lower_dentry);
3910     + break;
3911     + }
3912     + if (lower_dentry->d_inode)
3913     + err = vfs_unlink(lower_dir, lower_dentry);
3914     + dput(lower_dentry);
3915     + if (err)
3916     + break;
3917     + }
3918     + }
3919     +
3920     + __putname(name);
3921     +
3922     + /* After all of the removals, we should copy the attributes once. */
3923     + fsstack_copy_attr_times(dentry->d_inode, lower_dir_dentry->d_inode);
3924     +
3925     +out:
3926     + return err;
3927     +}
3928     +
3929     +/* delete whiteouts in a dir (for rmdir operation) using sioq if necessary */
3930     +int delete_whiteouts(struct dentry *dentry, int bindex,
3931     + struct unionfs_dir_state *namelist)
3932     +{
3933     + int err;
3934     + struct super_block *sb;
3935     + struct dentry *lower_dir_dentry;
3936     + struct inode *lower_dir;
3937     + struct sioq_args args;
3938     +
3939     + sb = dentry->d_sb;
3940     +
3941     + BUG_ON(!S_ISDIR(dentry->d_inode->i_mode));
3942     + BUG_ON(bindex < dbstart(dentry));
3943     + BUG_ON(bindex > dbend(dentry));
3944     + err = is_robranch_super(sb, bindex);
3945     + if (err)
3946     + goto out;
3947     +
3948     + lower_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3949     + BUG_ON(!S_ISDIR(lower_dir_dentry->d_inode->i_mode));
3950     + lower_dir = lower_dir_dentry->d_inode;
3951     + BUG_ON(!S_ISDIR(lower_dir->i_mode));
3952     +
3953     + if (!permission(lower_dir, MAY_WRITE | MAY_EXEC, NULL)) {
3954     + err = do_delete_whiteouts(dentry, bindex, namelist);
3955     + } else {
3956     + args.deletewh.namelist = namelist;
3957     + args.deletewh.dentry = dentry;
3958     + args.deletewh.bindex = bindex;
3959     + run_sioq(__delete_whiteouts, &args);
3960     + err = args.err;
3961     + }
3962     +
3963     +out:
3964     + return err;
3965     +}
3966     +
3967     +#define RD_NONE 0
3968     +#define RD_CHECK_EMPTY 1
3969     +/* The callback structure for check_empty. */
3970     +struct unionfs_rdutil_callback {
3971     + int err;
3972     + int filldir_called;
3973     + struct unionfs_dir_state *rdstate;
3974     + int mode;
3975     +};
3976     +
3977     +/* This filldir function makes sure only whiteouts exist within a directory. */
3978     +static int readdir_util_callback(void *dirent, const char *name, int namelen,
3979     + loff_t offset, u64 ino, unsigned int d_type)
3980     +{
3981     + int err = 0;
3982     + struct unionfs_rdutil_callback *buf = dirent;
3983     + int whiteout = 0;
3984     + struct filldir_node *found;
3985     +
3986     + buf->filldir_called = 1;
3987     +
3988     + if (name[0] == '.' && (namelen == 1 ||
3989     + (name[1] == '.' && namelen == 2)))
3990     + goto out;
3991     +
3992     + if (namelen > UNIONFS_WHLEN &&
3993     + !strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN)) {
3994     + namelen -= UNIONFS_WHLEN;
3995     + name += UNIONFS_WHLEN;
3996     + whiteout = 1;
3997     + }
3998     +
3999     + found = find_filldir_node(buf->rdstate, name, namelen, whiteout);
4000     + /* If it was found in the table there was a previous whiteout. */
4001     + if (found)
4002     + goto out;
4003     +
4004     + /*
4005     + * if it wasn't found and isn't a whiteout, the directory isn't
4006     + * empty.
4007     + */
4008     + err = -ENOTEMPTY;
4009     + if ((buf->mode == RD_CHECK_EMPTY) && !whiteout)
4010     + goto out;
4011     +
4012     + err = add_filldir_node(buf->rdstate, name, namelen,
4013     + buf->rdstate->bindex, whiteout);
4014     +
4015     +out:
4016     + buf->err = err;
4017     + return err;
4018     +}
4019     +
4020     +/* Is a directory logically empty? */
4021     +int check_empty(struct dentry *dentry, struct unionfs_dir_state **namelist)
4022     +{
4023     + int err = 0;
4024     + struct dentry *lower_dentry = NULL;
4025     + struct vfsmount *mnt;
4026     + struct super_block *sb;
4027     + struct file *lower_file;
4028     + struct unionfs_rdutil_callback *buf = NULL;
4029     + int bindex, bstart, bend, bopaque;
4030     +
4031     + sb = dentry->d_sb;
4032     +
4033     +
4034     + BUG_ON(!S_ISDIR(dentry->d_inode->i_mode));
4035     +
4036     + err = unionfs_partial_lookup(dentry);
4037     + if (err)
4038     + goto out;
4039     +
4040     + bstart = dbstart(dentry);
4041     + bend = dbend(dentry);
4042     + bopaque = dbopaque(dentry);
4043     + if (0 <= bopaque && bopaque < bend)
4044     + bend = bopaque;
4045     +
4046     + buf = kmalloc(sizeof(struct unionfs_rdutil_callback), GFP_KERNEL);
4047     + if (unlikely(!buf)) {
4048     + err = -ENOMEM;
4049     + goto out;
4050     + }
4051     + buf->err = 0;
4052     + buf->mode = RD_CHECK_EMPTY;
4053     + buf->rdstate = alloc_rdstate(dentry->d_inode, bstart);
4054     + if (unlikely(!buf->rdstate)) {
4055     + err = -ENOMEM;
4056     + goto out;
4057     + }
4058     +
4059     + /* Process the lower directories with rdutil_callback as a filldir. */
4060     + for (bindex = bstart; bindex <= bend; bindex++) {
4061     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4062     + if (!lower_dentry)
4063     + continue;
4064     + if (!lower_dentry->d_inode)
4065     + continue;
4066     + if (!S_ISDIR(lower_dentry->d_inode->i_mode))
4067     + continue;
4068     +
4069     + dget(lower_dentry);
4070     + mnt = unionfs_mntget(dentry, bindex);
4071     + branchget(sb, bindex);
4072     + lower_file = dentry_open(lower_dentry, mnt, O_RDONLY);
4073     + if (IS_ERR(lower_file)) {
4074     + err = PTR_ERR(lower_file);
4075     + branchput(sb, bindex);
4076     + goto out;
4077     + }
4078     +
4079     + do {
4080     + buf->filldir_called = 0;
4081     + buf->rdstate->bindex = bindex;
4082     + err = vfs_readdir(lower_file,
4083     + readdir_util_callback, buf);
4084     + if (buf->err)
4085     + err = buf->err;
4086     + } while ((err >= 0) && buf->filldir_called);
4087     +
4088     + /* fput calls dput for lower_dentry */
4089     + fput(lower_file);
4090     + branchput(sb, bindex);
4091     +
4092     + if (err < 0)
4093     + goto out;
4094     + }
4095     +
4096     +out:
4097     + if (buf) {
4098     + if (namelist && !err)
4099     + *namelist = buf->rdstate;
4100     + else if (buf->rdstate)
4101     + free_rdstate(buf->rdstate);
4102     + kfree(buf);
4103     + }
4104     +
4105     +
4106     + return err;
4107     +}
4108     diff --git a/fs/unionfs/fanout.h b/fs/unionfs/fanout.h
4109     new file mode 100644
4110     index 0000000..4d9a45f
4111     --- /dev/null
4112     +++ b/fs/unionfs/fanout.h
4113     @@ -0,0 +1,366 @@
4114     +/*
4115     + * Copyright (c) 2003-2007 Erez Zadok
4116     + * Copyright (c) 2003-2006 Charles P. Wright
4117     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4118     + * Copyright (c) 2005 Arun M. Krishnakumar
4119     + * Copyright (c) 2004-2006 David P. Quigley
4120     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4121     + * Copyright (c) 2003 Puja Gupta
4122     + * Copyright (c) 2003 Harikesavan Krishnan
4123     + * Copyright (c) 2003-2007 Stony Brook University
4124     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
4125     + *
4126     + * This program is free software; you can redistribute it and/or modify
4127     + * it under the terms of the GNU General Public License version 2 as
4128     + * published by the Free Software Foundation.
4129     + */
4130     +
4131     +#ifndef _FANOUT_H_
4132     +#define _FANOUT_H_
4133     +
4134     +/*
4135     + * Inode to private data
4136     + *
4137     + * Since we use containers and the struct inode is _inside_ the
4138     + * unionfs_inode_info structure, UNIONFS_I will always (given a non-NULL
4139     + * inode pointer), return a valid non-NULL pointer.
4140     + */
4141     +static inline struct unionfs_inode_info *UNIONFS_I(const struct inode *inode)
4142     +{
4143     + return container_of(inode, struct unionfs_inode_info, vfs_inode);
4144     +}
4145     +
4146     +#define ibstart(ino) (UNIONFS_I(ino)->bstart)
4147     +#define ibend(ino) (UNIONFS_I(ino)->bend)
4148     +
4149     +/* Superblock to private data */
4150     +#define UNIONFS_SB(super) ((struct unionfs_sb_info *)(super)->s_fs_info)
4151     +#define sbstart(sb) 0
4152     +#define sbend(sb) (UNIONFS_SB(sb)->bend)
4153     +#define sbmax(sb) (UNIONFS_SB(sb)->bend + 1)
4154     +#define sbhbid(sb) (UNIONFS_SB(sb)->high_branch_id)
4155     +
4156     +/* File to private Data */
4157     +#define UNIONFS_F(file) ((struct unionfs_file_info *)((file)->private_data))
4158     +#define fbstart(file) (UNIONFS_F(file)->bstart)
4159     +#define fbend(file) (UNIONFS_F(file)->bend)
4160     +
4161     +/* macros to manipulate branch IDs in stored in our superblock */
4162     +static inline int branch_id(struct super_block *sb, int index)
4163     +{
4164     + BUG_ON(!sb || index < 0);
4165     + return UNIONFS_SB(sb)->data[index].branch_id;
4166     +}
4167     +
4168     +static inline void set_branch_id(struct super_block *sb, int index, int val)
4169     +{
4170     + BUG_ON(!sb || index < 0);
4171     + UNIONFS_SB(sb)->data[index].branch_id = val;
4172     +}
4173     +
4174     +static inline void new_branch_id(struct super_block *sb, int index)
4175     +{
4176     + BUG_ON(!sb || index < 0);
4177     + set_branch_id(sb, index, ++UNIONFS_SB(sb)->high_branch_id);
4178     +}
4179     +
4180     +/*
4181     + * Find new index of matching branch with an existing superblock of a known
4182     + * (possibly old) id. This is needed because branches could have been
4183     + * added/deleted causing the branches of any open files to shift.
4184     + *
4185     + * @sb: the new superblock which may have new/different branch IDs
4186     + * @id: the old/existing id we're looking for
4187     + * Returns index of newly found branch (0 or greater), -1 otherwise.
4188     + */
4189     +static inline int branch_id_to_idx(struct super_block *sb, int id)
4190     +{
4191     + int i;
4192     + for (i = 0; i < sbmax(sb); i++) {
4193     + if (branch_id(sb, i) == id)
4194     + return i;
4195     + }
4196     + /* in the non-ODF code, this should really never happen */
4197     + printk(KERN_WARNING "unionfs: cannot find branch with id %d\n", id);
4198     + return -1;
4199     +}
4200     +
4201     +/* File to lower file. */
4202     +static inline struct file *unionfs_lower_file(const struct file *f)
4203     +{
4204     + BUG_ON(!f);
4205     + return UNIONFS_F(f)->lower_files[fbstart(f)];
4206     +}
4207     +
4208     +static inline struct file *unionfs_lower_file_idx(const struct file *f,
4209     + int index)
4210     +{
4211     + BUG_ON(!f || index < 0);
4212     + return UNIONFS_F(f)->lower_files[index];
4213     +}
4214     +
4215     +static inline void unionfs_set_lower_file_idx(struct file *f, int index,
4216     + struct file *val)
4217     +{
4218     + BUG_ON(!f || index < 0);
4219     + UNIONFS_F(f)->lower_files[index] = val;
4220     + /* save branch ID (may be redundant?) */
4221     + UNIONFS_F(f)->saved_branch_ids[index] =
4222     + branch_id((f)->f_path.dentry->d_sb, index);
4223     +}
4224     +
4225     +static inline void unionfs_set_lower_file(struct file *f, struct file *val)
4226     +{
4227     + BUG_ON(!f);
4228     + unionfs_set_lower_file_idx((f), fbstart(f), (val));
4229     +}
4230     +
4231     +/* Inode to lower inode. */
4232     +static inline struct inode *unionfs_lower_inode(const struct inode *i)
4233     +{
4234     + BUG_ON(!i);
4235     + return UNIONFS_I(i)->lower_inodes[ibstart(i)];
4236     +}
4237     +
4238     +static inline struct inode *unionfs_lower_inode_idx(const struct inode *i,
4239     + int index)
4240     +{
4241     + BUG_ON(!i || index < 0);
4242     + return UNIONFS_I(i)->lower_inodes[index];
4243     +}
4244     +
4245     +static inline void unionfs_set_lower_inode_idx(struct inode *i, int index,
4246     + struct inode *val)
4247     +{
4248     + BUG_ON(!i || index < 0);
4249     + UNIONFS_I(i)->lower_inodes[index] = val;
4250     +}
4251     +
4252     +static inline void unionfs_set_lower_inode(struct inode *i, struct inode *val)
4253     +{
4254     + BUG_ON(!i);
4255     + UNIONFS_I(i)->lower_inodes[ibstart(i)] = val;
4256     +}
4257     +
4258     +/* Superblock to lower superblock. */
4259     +static inline struct super_block *unionfs_lower_super(
4260     + const struct super_block *sb)
4261     +{
4262     + BUG_ON(!sb);
4263     + return UNIONFS_SB(sb)->data[sbstart(sb)].sb;
4264     +}
4265     +
4266     +static inline struct super_block *unionfs_lower_super_idx(
4267     + const struct super_block *sb,
4268     + int index)
4269     +{
4270     + BUG_ON(!sb || index < 0);
4271     + return UNIONFS_SB(sb)->data[index].sb;
4272     +}
4273     +
4274     +static inline void unionfs_set_lower_super_idx(struct super_block *sb,
4275     + int index,
4276     + struct super_block *val)
4277     +{
4278     + BUG_ON(!sb || index < 0);
4279     + UNIONFS_SB(sb)->data[index].sb = val;
4280     +}
4281     +
4282     +static inline void unionfs_set_lower_super(struct super_block *sb,
4283     + struct super_block *val)
4284     +{
4285     + BUG_ON(!sb);
4286     + UNIONFS_SB(sb)->data[sbstart(sb)].sb = val;
4287     +}
4288     +
4289     +/* Branch count macros. */
4290     +static inline int branch_count(const struct super_block *sb, int index)
4291     +{
4292     + BUG_ON(!sb || index < 0);
4293     + return atomic_read(&UNIONFS_SB(sb)->data[index].open_files);
4294     +}
4295     +
4296     +static inline void set_branch_count(struct super_block *sb, int index, int val)
4297     +{
4298     + BUG_ON(!sb || index < 0);
4299     + atomic_set(&UNIONFS_SB(sb)->data[index].open_files, val);
4300     +}
4301     +
4302     +static inline void branchget(struct super_block *sb, int index)
4303     +{
4304     + BUG_ON(!sb || index < 0);
4305     + atomic_inc(&UNIONFS_SB(sb)->data[index].open_files);
4306     +}
4307     +
4308     +static inline void branchput(struct super_block *sb, int index)
4309     +{
4310     + BUG_ON(!sb || index < 0);
4311     + atomic_dec(&UNIONFS_SB(sb)->data[index].open_files);
4312     +}
4313     +
4314     +/* Dentry macros */
4315     +static inline struct unionfs_dentry_info *UNIONFS_D(const struct dentry *dent)
4316     +{
4317     + BUG_ON(!dent);
4318     + return dent->d_fsdata;
4319     +}
4320     +
4321     +static inline int dbstart(const struct dentry *dent)
4322     +{
4323     + BUG_ON(!dent);
4324     + return UNIONFS_D(dent)->bstart;
4325     +}
4326     +
4327     +static inline void set_dbstart(struct dentry *dent, int val)
4328     +{
4329     + BUG_ON(!dent);
4330     + UNIONFS_D(dent)->bstart = val;
4331     +}
4332     +
4333     +static inline int dbend(const struct dentry *dent)
4334     +{
4335     + BUG_ON(!dent);
4336     + return UNIONFS_D(dent)->bend;
4337     +}
4338     +
4339     +static inline void set_dbend(struct dentry *dent, int val)
4340     +{
4341     + BUG_ON(!dent);
4342     + UNIONFS_D(dent)->bend = val;
4343     +}
4344     +
4345     +static inline int dbopaque(const struct dentry *dent)
4346     +{
4347     + BUG_ON(!dent);
4348     + return UNIONFS_D(dent)->bopaque;
4349     +}
4350     +
4351     +static inline void set_dbopaque(struct dentry *dent, int val)
4352     +{
4353     + BUG_ON(!dent);
4354     + UNIONFS_D(dent)->bopaque = val;
4355     +}
4356     +
4357     +static inline void unionfs_set_lower_dentry_idx(struct dentry *dent, int index,
4358     + struct dentry *val)
4359     +{
4360     + BUG_ON(!dent || index < 0);
4361     + UNIONFS_D(dent)->lower_paths[index].dentry = val;
4362     +}
4363     +
4364     +static inline struct dentry *unionfs_lower_dentry_idx(
4365     + const struct dentry *dent,
4366     + int index)
4367     +{
4368     + BUG_ON(!dent || index < 0);
4369     + return UNIONFS_D(dent)->lower_paths[index].dentry;
4370     +}
4371     +
4372     +static inline struct dentry *unionfs_lower_dentry(const struct dentry *dent)
4373     +{
4374     + BUG_ON(!dent);
4375     + return unionfs_lower_dentry_idx(dent, dbstart(dent));
4376     +}
4377     +
4378     +static inline void unionfs_set_lower_mnt_idx(struct dentry *dent, int index,
4379     + struct vfsmount *mnt)
4380     +{
4381     + BUG_ON(!dent || index < 0);
4382     + UNIONFS_D(dent)->lower_paths[index].mnt = mnt;
4383     +}
4384     +
4385     +static inline struct vfsmount *unionfs_lower_mnt_idx(
4386     + const struct dentry *dent,
4387     + int index)
4388     +{
4389     + BUG_ON(!dent || index < 0);
4390     + return UNIONFS_D(dent)->lower_paths[index].mnt;
4391     +}
4392     +
4393     +static inline struct vfsmount *unionfs_lower_mnt(const struct dentry *dent)
4394     +{
4395     + BUG_ON(!dent);
4396     + return unionfs_lower_mnt_idx(dent, dbstart(dent));
4397     +}
4398     +
4399     +/* Macros for locking a dentry. */
4400     +enum unionfs_dentry_lock_class {
4401     + UNIONFS_DMUTEX_NORMAL,
4402     + UNIONFS_DMUTEX_ROOT,
4403     + UNIONFS_DMUTEX_PARENT,
4404     + UNIONFS_DMUTEX_CHILD,
4405     + UNIONFS_DMUTEX_WHITEOUT,
4406     + UNIONFS_DMUTEX_REVAL_PARENT, /* for file/dentry revalidate */
4407     + UNIONFS_DMUTEX_REVAL_CHILD, /* for file/dentry revalidate */
4408     +};
4409     +
4410     +static inline void unionfs_lock_dentry(struct dentry *d,
4411     + unsigned int subclass)
4412     +{
4413     + BUG_ON(!d);
4414     + mutex_lock_nested(&UNIONFS_D(d)->lock, subclass);
4415     +}
4416     +
4417     +static inline void unionfs_unlock_dentry(struct dentry *d)
4418     +{
4419     + BUG_ON(!d);
4420     + mutex_unlock(&UNIONFS_D(d)->lock);
4421     +}
4422     +
4423     +static inline void verify_locked(struct dentry *d)
4424     +{
4425     + BUG_ON(!d);
4426     + BUG_ON(!mutex_is_locked(&UNIONFS_D(d)->lock));
4427     +}
4428     +
4429     +/* copy a/m/ctime from the lower branch with the newest times */
4430     +static inline void unionfs_copy_attr_times(struct inode *upper)
4431     +{
4432     + int bindex;
4433     + struct inode *lower;
4434     +
4435     + if (!upper || ibstart(upper) < 0)
4436     + return;
4437     + for (bindex = ibstart(upper); bindex <= ibend(upper); bindex++) {
4438     + lower = unionfs_lower_inode_idx(upper, bindex);
4439     + if (!lower)
4440     + continue; /* not all lower dir objects may exist */
4441     + if (unlikely(timespec_compare(&upper->i_mtime,
4442     + &lower->i_mtime) < 0))
4443     + upper->i_mtime = lower->i_mtime;
4444     + if (unlikely(timespec_compare(&upper->i_ctime,
4445     + &lower->i_ctime) < 0))
4446     + upper->i_ctime = lower->i_ctime;
4447     + if (unlikely(timespec_compare(&upper->i_atime,
4448     + &lower->i_atime) < 0))
4449     + upper->i_atime = lower->i_atime;
4450     + }
4451     +}
4452     +
4453     +/*
4454     + * A unionfs/fanout version of fsstack_copy_attr_all. Uses a
4455     + * unionfs_get_nlinks to properly calcluate the number of links to a file.
4456     + * Also, copies the max() of all a/m/ctimes for all lower inodes (which is
4457     + * important if the lower inode is a directory type)
4458     + */
4459     +static inline void unionfs_copy_attr_all(struct inode *dest,
4460     + const struct inode *src)
4461     +{
4462     + dest->i_mode = src->i_mode;
4463     + dest->i_uid = src->i_uid;
4464     + dest->i_gid = src->i_gid;
4465     + dest->i_rdev = src->i_rdev;
4466     +
4467     + unionfs_copy_attr_times(dest);
4468     +
4469     + dest->i_blkbits = src->i_blkbits;
4470     + dest->i_flags = src->i_flags;
4471     +
4472     + /*
4473     + * Update the nlinks AFTER updating the above fields, because the
4474     + * get_links callback may depend on them.
4475     + */
4476     + dest->i_nlink = unionfs_get_nlinks(dest);
4477     +}
4478     +
4479     +#endif /* not _FANOUT_H */
4480     diff --git a/fs/unionfs/file.c b/fs/unionfs/file.c
4481     new file mode 100644
4482     index 0000000..0c424f6
4483     --- /dev/null
4484     +++ b/fs/unionfs/file.c
4485     @@ -0,0 +1,184 @@
4486     +/*
4487     + * Copyright (c) 2003-2007 Erez Zadok
4488     + * Copyright (c) 2003-2006 Charles P. Wright
4489     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4490     + * Copyright (c) 2005-2006 Junjiro Okajima
4491     + * Copyright (c) 2005 Arun M. Krishnakumar
4492     + * Copyright (c) 2004-2006 David P. Quigley
4493     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4494     + * Copyright (c) 2003 Puja Gupta
4495     + * Copyright (c) 2003 Harikesavan Krishnan
4496     + * Copyright (c) 2003-2007 Stony Brook University
4497     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
4498     + *
4499     + * This program is free software; you can redistribute it and/or modify
4500     + * it under the terms of the GNU General Public License version 2 as
4501     + * published by the Free Software Foundation.
4502     + */
4503     +
4504     +#include "union.h"
4505     +
4506     +static int unionfs_file_readdir(struct file *file, void *dirent,
4507     + filldir_t filldir)
4508     +{
4509     + return -ENOTDIR;
4510     +}
4511     +
4512     +static int unionfs_mmap(struct file *file, struct vm_area_struct *vma)
4513     +{
4514     + int err = 0;
4515     + bool willwrite;
4516     + struct file *lower_file;
4517     +
4518     + unionfs_read_lock(file->f_path.dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4519     +
4520     + /* This might be deferred to mmap's writepage */
4521     + willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags);
4522     + err = unionfs_file_revalidate(file, willwrite);
4523     + if (unlikely(err))
4524     + goto out;
4525     + unionfs_check_file(file);
4526     +
4527     + /*
4528     + * File systems which do not implement ->writepage may use
4529     + * generic_file_readonly_mmap as their ->mmap op. If you call
4530     + * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL.
4531     + * But we cannot call the lower ->mmap op, so we can't tell that
4532     + * writeable mappings won't work. Therefore, our only choice is to
4533     + * check if the lower file system supports the ->writepage, and if
4534     + * not, return EINVAL (the same error that
4535     + * generic_file_readonly_mmap returns in that case).
4536     + */
4537     + lower_file = unionfs_lower_file(file);
4538     + if (willwrite && !lower_file->f_mapping->a_ops->writepage) {
4539     + err = -EINVAL;
4540     + printk(KERN_ERR "unionfs: branch %d file system does not "
4541     + "support writeable mmap\n", fbstart(file));
4542     + } else {
4543     + err = generic_file_mmap(file, vma);
4544     + if (err)
4545     + printk(KERN_ERR
4546     + "unionfs: generic_file_mmap failed %d\n", err);
4547     + }
4548     +
4549     +out:
4550     + if (!err) {
4551     + /* copyup could cause parent dir times to change */
4552     + unionfs_copy_attr_times(file->f_path.dentry->d_parent->d_inode);
4553     + unionfs_check_file(file);
4554     + }
4555     + unionfs_read_unlock(file->f_path.dentry->d_sb);
4556     + return err;
4557     +}
4558     +
4559     +int unionfs_fsync(struct file *file, struct dentry *dentry, int datasync)
4560     +{
4561     + int bindex, bstart, bend;
4562     + struct file *lower_file;
4563     + struct dentry *lower_dentry;
4564     + struct inode *lower_inode, *inode;
4565     + int err = -EINVAL;
4566     +
4567     + unionfs_read_lock(file->f_path.dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4568     + err = unionfs_file_revalidate(file, true);
4569     + if (unlikely(err))
4570     + goto out;
4571     + unionfs_check_file(file);
4572     +
4573     + bstart = fbstart(file);
4574     + bend = fbend(file);
4575     + if (bstart < 0 || bend < 0)
4576     + goto out;
4577     +
4578     + inode = dentry->d_inode;
4579     + if (unlikely(!inode)) {
4580     + printk(KERN_ERR
4581     + "unionfs: null lower inode in unionfs_fsync\n");
4582     + goto out;
4583     + }
4584     + for (bindex = bstart; bindex <= bend; bindex++) {
4585     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
4586     + if (!lower_inode || !lower_inode->i_fop->fsync)
4587     + continue;
4588     + lower_file = unionfs_lower_file_idx(file, bindex);
4589     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4590     + mutex_lock(&lower_inode->i_mutex);
4591     + err = lower_inode->i_fop->fsync(lower_file,
4592     + lower_dentry,
4593     + datasync);
4594     + mutex_unlock(&lower_inode->i_mutex);
4595     + if (err)
4596     + goto out;
4597     + }
4598     +
4599     + unionfs_copy_attr_times(inode);
4600     +
4601     +out:
4602     + unionfs_check_file(file);
4603     + unionfs_read_unlock(file->f_path.dentry->d_sb);
4604     + return err;
4605     +}
4606     +
4607     +int unionfs_fasync(int fd, struct file *file, int flag)
4608     +{
4609     + int bindex, bstart, bend;
4610     + struct file *lower_file;
4611     + struct dentry *dentry;
4612     + struct inode *lower_inode, *inode;
4613     + int err = 0;
4614     +
4615     + unionfs_read_lock(file->f_path.dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4616     + err = unionfs_file_revalidate(file, true);
4617     + if (unlikely(err))
4618     + goto out;
4619     + unionfs_check_file(file);
4620     +
4621     + bstart = fbstart(file);
4622     + bend = fbend(file);
4623     + if (bstart < 0 || bend < 0)
4624     + goto out;
4625     +
4626     + dentry = file->f_path.dentry;
4627     + inode = dentry->d_inode;
4628     + if (unlikely(!inode)) {
4629     + printk(KERN_ERR
4630     + "unionfs: null lower inode in unionfs_fasync\n");
4631     + goto out;
4632     + }
4633     + for (bindex = bstart; bindex <= bend; bindex++) {
4634     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
4635     + if (!lower_inode || !lower_inode->i_fop->fasync)
4636     + continue;
4637     + lower_file = unionfs_lower_file_idx(file, bindex);
4638     + mutex_lock(&lower_inode->i_mutex);
4639     + err = lower_inode->i_fop->fasync(fd, lower_file, flag);
4640     + mutex_unlock(&lower_inode->i_mutex);
4641     + if (err)
4642     + goto out;
4643     + }
4644     +
4645     + unionfs_copy_attr_times(inode);
4646     +
4647     +out:
4648     + unionfs_check_file(file);
4649     + unionfs_read_unlock(file->f_path.dentry->d_sb);
4650     + return err;
4651     +}
4652     +
4653     +struct file_operations unionfs_main_fops = {
4654     + .llseek = generic_file_llseek,
4655     + .read = do_sync_read,
4656     + .aio_read = generic_file_aio_read,
4657     + .write = do_sync_write,
4658     + .aio_write = generic_file_aio_write,
4659     + .readdir = unionfs_file_readdir,
4660     + .unlocked_ioctl = unionfs_ioctl,
4661     + .mmap = unionfs_mmap,
4662     + .open = unionfs_open,
4663     + .flush = unionfs_flush,
4664     + .release = unionfs_file_release,
4665     + .fsync = unionfs_fsync,
4666     + .fasync = unionfs_fasync,
4667     + .splice_read = generic_file_splice_read,
4668     + .splice_write = generic_file_splice_write,
4669     +};
4670     diff --git a/fs/unionfs/inode.c b/fs/unionfs/inode.c
4671     new file mode 100644
4672     index 0000000..0b92da2
4673     --- /dev/null
4674     +++ b/fs/unionfs/inode.c
4675     @@ -0,0 +1,1091 @@
4676     +/*
4677     + * Copyright (c) 2003-2007 Erez Zadok
4678     + * Copyright (c) 2003-2006 Charles P. Wright
4679     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4680     + * Copyright (c) 2005-2006 Junjiro Okajima
4681     + * Copyright (c) 2005 Arun M. Krishnakumar
4682     + * Copyright (c) 2004-2006 David P. Quigley
4683     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4684     + * Copyright (c) 2003 Puja Gupta
4685     + * Copyright (c) 2003 Harikesavan Krishnan
4686     + * Copyright (c) 2003-2007 Stony Brook University
4687     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
4688     + *
4689     + * This program is free software; you can redistribute it and/or modify
4690     + * it under the terms of the GNU General Public License version 2 as
4691     + * published by the Free Software Foundation.
4692     + */
4693     +
4694     +#include "union.h"
4695     +
4696     +/*
4697     + * Helper function when creating new objects (create, symlink, and mknod).
4698     + * Checks to see if there's a whiteout in @lower_dentry's parent directory,
4699     + * whose name is taken from @dentry. Then tries to remove that whiteout, if
4700     + * found.
4701     + *
4702     + * Return 0 if no whiteout was found, or if one was found and successfully
4703     + * removed (a zero tells the caller that @lower_dentry belongs to a good
4704     + * branch to create the new object in). Return -ERRNO if an error occurred
4705     + * during whiteout lookup or in trying to unlink the whiteout.
4706     + */
4707     +static int check_for_whiteout(struct dentry *dentry,
4708     + struct dentry *lower_dentry)
4709     +{
4710     + int err = 0;
4711     + struct dentry *wh_dentry = NULL;
4712     + struct dentry *lower_dir_dentry;
4713     + char *name = NULL;
4714     +
4715     + /*
4716     + * check if whiteout exists in this branch, i.e. lookup .wh.foo
4717     + * first.
4718     + */
4719     + name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
4720     + if (unlikely(IS_ERR(name))) {
4721     + err = PTR_ERR(name);
4722     + goto out;
4723     + }
4724     +
4725     + wh_dentry = lookup_one_len(name, lower_dentry->d_parent,
4726     + dentry->d_name.len + UNIONFS_WHLEN);
4727     + if (IS_ERR(wh_dentry)) {
4728     + err = PTR_ERR(wh_dentry);
4729     + wh_dentry = NULL;
4730     + goto out;
4731     + }
4732     +
4733     + if (!wh_dentry->d_inode) /* no whiteout exists */
4734     + goto out;
4735     +
4736     + /* .wh.foo has been found, so let's unlink it */
4737     + lower_dir_dentry = lock_parent_wh(wh_dentry);
4738     + /* see Documentation/filesystems/unionfs/issues.txt */
4739     + lockdep_off();
4740     + err = vfs_unlink(lower_dir_dentry->d_inode, wh_dentry);
4741     + lockdep_on();
4742     + unlock_dir(lower_dir_dentry);
4743     +
4744     + /*
4745     + * Whiteouts are special files and should be deleted no matter what
4746     + * (as if they never existed), in order to allow this create
4747     + * operation to succeed. This is especially important in sticky
4748     + * directories: a whiteout may have been created by one user, but
4749     + * the newly created file may be created by another user.
4750     + * Therefore, in order to maintain Unix semantics, if the vfs_unlink
4751     + * above failed, then we have to try to directly unlink the
4752     + * whiteout. Note: in the ODF version of unionfs, whiteout are
4753     + * handled much more cleanly.
4754     + */
4755     + if (err == -EPERM) {
4756     + struct inode *inode = lower_dir_dentry->d_inode;
4757     + err = inode->i_op->unlink(inode, wh_dentry);
4758     + }
4759     + if (err)
4760     + printk(KERN_ERR "unionfs: could not "
4761     + "unlink whiteout, err = %d\n", err);
4762     +
4763     +out:
4764     + dput(wh_dentry);
4765     + kfree(name);
4766     + return err;
4767     +}
4768     +
4769     +/*
4770     + * Find a writeable branch to create new object in. Checks all writeble
4771     + * branches of the parent inode, from istart to iend order; if none are
4772     + * suitable, also tries branch 0 (which may require a copyup).
4773     + *
4774     + * Return a lower_dentry we can use to create object in, or ERR_PTR.
4775     + */
4776     +static struct dentry *find_writeable_branch(struct inode *parent,
4777     + struct dentry *dentry)
4778     +{
4779     + int err = -EINVAL;
4780     + int bindex, istart, iend;
4781     + struct dentry *lower_dentry = NULL;
4782     +
4783     + istart = ibstart(parent);
4784     + iend = ibend(parent);
4785     + if (istart < 0)
4786     + goto out;
4787     +
4788     +begin:
4789     + for (bindex = istart; bindex <= iend; bindex++) {
4790     + /* skip non-writeable branches */
4791     + err = is_robranch_super(dentry->d_sb, bindex);
4792     + if (err) {
4793     + err = -EROFS;
4794     + continue;
4795     + }
4796     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4797     + if (!lower_dentry)
4798     + continue;
4799     + /*
4800     + * check for whiteouts in writeable branch, and remove them
4801     + * if necessary.
4802     + */
4803     + err = check_for_whiteout(dentry, lower_dentry);
4804     + if (err)
4805     + continue;
4806     + }
4807     + /*
4808     + * If istart wasn't already branch 0, and we got any error, then try
4809     + * branch 0 (which may require copyup)
4810     + */
4811     + if (err && istart > 0) {
4812     + istart = iend = 0;
4813     + goto begin;
4814     + }
4815     +
4816     + /*
4817     + * If we tried even branch 0, and still got an error, abort. But if
4818     + * the error was an EROFS, then we should try to copyup.
4819     + */
4820     + if (err && err != -EROFS)
4821     + goto out;
4822     +
4823     + /*
4824     + * If we get here, then check if copyup needed. If lower_dentry is
4825     + * NULL, create the entire dentry directory structure in branch 0.
4826     + */
4827     + if (!lower_dentry) {
4828     + bindex = 0;
4829     + lower_dentry = create_parents(parent, dentry,
4830     + dentry->d_name.name, bindex);
4831     + if (IS_ERR(lower_dentry)) {
4832     + err = PTR_ERR(lower_dentry);
4833     + goto out;
4834     + }
4835     + }
4836     + err = 0; /* all's well */
4837     +out:
4838     + if (err)
4839     + return ERR_PTR(err);
4840     + return lower_dentry;
4841     +}
4842     +
4843     +static int unionfs_create(struct inode *parent, struct dentry *dentry,
4844     + int mode, struct nameidata *nd)
4845     +{
4846     + int err = 0;
4847     + struct dentry *lower_dentry = NULL;
4848     + struct dentry *lower_parent_dentry = NULL;
4849     + int valid = 0;
4850     + struct nameidata lower_nd;
4851     +
4852     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
4853     + unionfs_lock_dentry(dentry->d_parent, UNIONFS_DMUTEX_PARENT);
4854     + valid = __unionfs_d_revalidate_chain(dentry->d_parent, nd, false);
4855     + unionfs_unlock_dentry(dentry->d_parent);
4856     + if (unlikely(!valid)) {
4857     + err = -ESTALE; /* same as what real_lookup does */
4858     + goto out;
4859     + }
4860     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4861     +
4862     + valid = __unionfs_d_revalidate_chain(dentry, nd, false);
4863     + /*
4864     + * It's only a bug if this dentry was not negative and couldn't be
4865     + * revalidated (shouldn't happen).
4866     + */
4867     + BUG_ON(!valid && dentry->d_inode);
4868     +
4869     + lower_dentry = find_writeable_branch(parent, dentry);
4870     + if (IS_ERR(lower_dentry)) {
4871     + err = PTR_ERR(lower_dentry);
4872     + goto out;
4873     + }
4874     +
4875     + lower_parent_dentry = lock_parent(lower_dentry);
4876     + if (IS_ERR(lower_parent_dentry)) {
4877     + err = PTR_ERR(lower_parent_dentry);
4878     + goto out;
4879     + }
4880     +
4881     + err = init_lower_nd(&lower_nd, LOOKUP_CREATE);
4882     + if (unlikely(err < 0))
4883     + goto out;
4884     + err = vfs_create(lower_parent_dentry->d_inode, lower_dentry, mode,
4885     + &lower_nd);
4886     + release_lower_nd(&lower_nd, err);
4887     +
4888     + if (!err) {
4889     + err = PTR_ERR(unionfs_interpose(dentry, parent->i_sb, 0));
4890     + if (!err) {
4891     + unionfs_copy_attr_times(parent);
4892     + fsstack_copy_inode_size(parent,
4893     + lower_parent_dentry->d_inode);
4894     + /* update no. of links on parent directory */
4895     + parent->i_nlink = unionfs_get_nlinks(parent);
4896     + }
4897     + }
4898     +
4899     + unlock_dir(lower_parent_dentry);
4900     +
4901     +out:
4902     + if (!err)
4903     + unionfs_postcopyup_setmnt(dentry);
4904     +
4905     + unionfs_check_inode(parent);
4906     + if (!err) {
4907     + unionfs_check_dentry(dentry);
4908     + unionfs_check_nd(nd);
4909     + }
4910     + unionfs_unlock_dentry(dentry);
4911     + unionfs_read_unlock(dentry->d_sb);
4912     + return err;
4913     +}
4914     +
4915     +/*
4916     + * unionfs_lookup is the only special function which takes a dentry, yet we
4917     + * do NOT want to call __unionfs_d_revalidate_chain because by definition,
4918     + * we don't have a valid dentry here yet.
4919     + */
4920     +static struct dentry *unionfs_lookup(struct inode *parent,
4921     + struct dentry *dentry,
4922     + struct nameidata *nd)
4923     +{
4924     + struct path path_save;
4925     + struct dentry *ret;
4926     +
4927     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
4928     + if (dentry != dentry->d_parent)
4929     + unionfs_lock_dentry(dentry->d_parent, UNIONFS_DMUTEX_PARENT);
4930     +
4931     + /* save the dentry & vfsmnt from namei */
4932     + if (nd) {
4933     + path_save.dentry = nd->dentry;
4934     + path_save.mnt = nd->mnt;
4935     + }
4936     +
4937     + /*
4938     + * unionfs_lookup_backend returns a locked dentry upon success,
4939     + * so we'll have to unlock it below.
4940     + */
4941     + ret = unionfs_lookup_backend(dentry, nd, INTERPOSE_LOOKUP);
4942     +
4943     + /* restore the dentry & vfsmnt in namei */
4944     + if (nd) {
4945     + nd->dentry = path_save.dentry;
4946     + nd->mnt = path_save.mnt;
4947     + }
4948     + if (!IS_ERR(ret)) {
4949     + if (ret)
4950     + dentry = ret;
4951     + /* parent times may have changed */
4952     + unionfs_copy_attr_times(dentry->d_parent->d_inode);
4953     + }
4954     +
4955     + unionfs_check_inode(parent);
4956     + if (!IS_ERR(ret)) {
4957     + unionfs_check_dentry(dentry);
4958     + unionfs_check_nd(nd);
4959     + unionfs_unlock_dentry(dentry);
4960     + }
4961     +
4962     + if (dentry != dentry->d_parent) {
4963     + unionfs_check_dentry(dentry->d_parent);
4964     + unionfs_unlock_dentry(dentry->d_parent);
4965     + }
4966     + unionfs_read_unlock(dentry->d_sb);
4967     +
4968     + return ret;
4969     +}
4970     +
4971     +static int unionfs_link(struct dentry *old_dentry, struct inode *dir,
4972     + struct dentry *new_dentry)
4973     +{
4974     + int err = 0;
4975     + struct dentry *lower_old_dentry = NULL;
4976     + struct dentry *lower_new_dentry = NULL;
4977     + struct dentry *lower_dir_dentry = NULL;
4978     + struct dentry *whiteout_dentry;
4979     + char *name = NULL;
4980     +
4981     + unionfs_read_lock(old_dentry->d_sb, UNIONFS_SMUTEX_CHILD);
4982     + unionfs_double_lock_dentry(new_dentry, old_dentry);
4983     +
4984     + if (unlikely(!__unionfs_d_revalidate_chain(old_dentry, NULL, false))) {
4985     + err = -ESTALE;
4986     + goto out;
4987     + }
4988     + if (unlikely(new_dentry->d_inode &&
4989     + !__unionfs_d_revalidate_chain(new_dentry, NULL, false))) {
4990     + err = -ESTALE;
4991     + goto out;
4992     + }
4993     +
4994     + lower_new_dentry = unionfs_lower_dentry(new_dentry);
4995     +
4996     + /*
4997     + * check if whiteout exists in the branch of new dentry, i.e. lookup
4998     + * .wh.foo first. If present, delete it
4999     + */
5000     + name = alloc_whname(new_dentry->d_name.name, new_dentry->d_name.len);
5001     + if (unlikely(IS_ERR(name))) {
5002     + err = PTR_ERR(name);
5003     + goto out;
5004     + }
5005     +
5006     + whiteout_dentry = lookup_one_len(name, lower_new_dentry->d_parent,
5007     + new_dentry->d_name.len +
5008     + UNIONFS_WHLEN);
5009     + if (IS_ERR(whiteout_dentry)) {
5010     + err = PTR_ERR(whiteout_dentry);
5011     + goto out;
5012     + }
5013     +
5014     + if (!whiteout_dentry->d_inode) {
5015     + dput(whiteout_dentry);
5016     + whiteout_dentry = NULL;
5017     + } else {
5018     + /* found a .wh.foo entry, unlink it and then call vfs_link() */
5019     + lower_dir_dentry = lock_parent_wh(whiteout_dentry);
5020     + err = is_robranch_super(new_dentry->d_sb, dbstart(new_dentry));
5021     + if (!err) {
5022     + /* see Documentation/filesystems/unionfs/issues.txt */
5023     + lockdep_off();
5024     + err = vfs_unlink(lower_dir_dentry->d_inode,
5025     + whiteout_dentry);
5026     + lockdep_on();
5027     + }
5028     +
5029     + fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
5030     + dir->i_nlink = unionfs_get_nlinks(dir);
5031     + unlock_dir(lower_dir_dentry);
5032     + lower_dir_dentry = NULL;
5033     + dput(whiteout_dentry);
5034     + if (err)
5035     + goto out;
5036     + }
5037     +
5038     + if (dbstart(old_dentry) != dbstart(new_dentry)) {
5039     + lower_new_dentry = create_parents(dir, new_dentry,
5040     + new_dentry->d_name.name,
5041     + dbstart(old_dentry));
5042     + err = PTR_ERR(lower_new_dentry);
5043     + if (IS_COPYUP_ERR(err))
5044     + goto docopyup;
5045     + if (!lower_new_dentry || IS_ERR(lower_new_dentry))
5046     + goto out;
5047     + }
5048     + lower_new_dentry = unionfs_lower_dentry(new_dentry);
5049     + lower_old_dentry = unionfs_lower_dentry(old_dentry);
5050     +
5051     + BUG_ON(dbstart(old_dentry) != dbstart(new_dentry));
5052     + lower_dir_dentry = lock_parent(lower_new_dentry);
5053     + err = is_robranch(old_dentry);
5054     + if (!err) {
5055     + /* see Documentation/filesystems/unionfs/issues.txt */
5056     + lockdep_off();
5057     + err = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
5058     + lower_new_dentry);
5059     + lockdep_on();
5060     + }
5061     + unlock_dir(lower_dir_dentry);
5062     +
5063     +docopyup:
5064     + if (IS_COPYUP_ERR(err)) {
5065     + int old_bstart = dbstart(old_dentry);
5066     + int bindex;
5067     +
5068     + for (bindex = old_bstart - 1; bindex >= 0; bindex--) {
5069     + err = copyup_dentry(old_dentry->d_parent->d_inode,
5070     + old_dentry, old_bstart,
5071     + bindex, old_dentry->d_name.name,
5072     + old_dentry->d_name.len, NULL,
5073     + i_size_read(old_dentry->d_inode));
5074     + if (!err) {
5075     + lower_new_dentry =
5076     + create_parents(dir, new_dentry,
5077     + new_dentry->d_name.name,
5078     + bindex);
5079     + lower_old_dentry =
5080     + unionfs_lower_dentry(old_dentry);
5081     + lower_dir_dentry =
5082     + lock_parent(lower_new_dentry);
5083     + /*
5084     + * see
5085     + * Documentation/filesystems/unionfs/issues.txt
5086     + */
5087     + lockdep_off();
5088     + /* do vfs_link */
5089     + err = vfs_link(lower_old_dentry,
5090     + lower_dir_dentry->d_inode,
5091     + lower_new_dentry);
5092     + lockdep_on();
5093     + unlock_dir(lower_dir_dentry);
5094     + goto check_link;
5095     + }
5096     + }
5097     + goto out;
5098     + }
5099     +
5100     +check_link:
5101     + if (err || !lower_new_dentry->d_inode)
5102     + goto out;
5103     +
5104     + /* Its a hard link, so use the same inode */
5105     + new_dentry->d_inode = igrab(old_dentry->d_inode);
5106     + d_instantiate(new_dentry, new_dentry->d_inode);
5107     + unionfs_copy_attr_all(dir, lower_new_dentry->d_parent->d_inode);
5108     + fsstack_copy_inode_size(dir, lower_new_dentry->d_parent->d_inode);
5109     +
5110     + /* propagate number of hard-links */
5111     + old_dentry->d_inode->i_nlink = unionfs_get_nlinks(old_dentry->d_inode);
5112     + /* new dentry's ctime may have changed due to hard-link counts */
5113     + unionfs_copy_attr_times(new_dentry->d_inode);
5114     +
5115     +out:
5116     + if (!new_dentry->d_inode)
5117     + d_drop(new_dentry);
5118     +
5119     + kfree(name);
5120     + if (!err)
5121     + unionfs_postcopyup_setmnt(new_dentry);
5122     +
5123     + unionfs_check_inode(dir);
5124     + unionfs_check_dentry(new_dentry);
5125     + unionfs_check_dentry(old_dentry);
5126     +
5127     + unionfs_unlock_dentry(new_dentry);
5128     + unionfs_unlock_dentry(old_dentry);
5129     + unionfs_read_unlock(old_dentry->d_sb);
5130     +
5131     + return err;
5132     +}
5133     +
5134     +static int unionfs_symlink(struct inode *parent, struct dentry *dentry,
5135     + const char *symname)
5136     +{
5137     + int err = 0;
5138     + struct dentry *lower_dentry = NULL;
5139     + struct dentry *wh_dentry = NULL;
5140     + struct dentry *lower_parent_dentry = NULL;
5141     + char *name = NULL;
5142     + int valid = 0;
5143     + umode_t mode;
5144     +
5145     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5146     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5147     +
5148     + if (unlikely(dentry->d_inode &&
5149     + !__unionfs_d_revalidate_chain(dentry, NULL, false))) {
5150     + err = -ESTALE;
5151     + goto out;
5152     + }
5153     +
5154     + /*
5155     + * It's only a bug if this dentry was not negative and couldn't be
5156     + * revalidated (shouldn't happen).
5157     + */
5158     + BUG_ON(!valid && dentry->d_inode);
5159     +
5160     + lower_dentry = find_writeable_branch(parent, dentry);
5161     + if (IS_ERR(lower_dentry)) {
5162     + err = PTR_ERR(lower_dentry);
5163     + goto out;
5164     + }
5165     +
5166     + lower_parent_dentry = lock_parent(lower_dentry);
5167     + if (IS_ERR(lower_parent_dentry)) {
5168     + err = PTR_ERR(lower_parent_dentry);
5169     + goto out;
5170     + }
5171     +
5172     + mode = S_IALLUGO;
5173     + err = vfs_symlink(lower_parent_dentry->d_inode, lower_dentry,
5174     + symname, mode);
5175     + if (!err) {
5176     + err = PTR_ERR(unionfs_interpose(dentry, parent->i_sb, 0));
5177     + if (!err) {
5178     + unionfs_copy_attr_times(parent);
5179     + fsstack_copy_inode_size(parent,
5180     + lower_parent_dentry->d_inode);
5181     + /* update no. of links on parent directory */
5182     + parent->i_nlink = unionfs_get_nlinks(parent);
5183     + }
5184     + }
5185     +
5186     + unlock_dir(lower_parent_dentry);
5187     +
5188     +out:
5189     + dput(wh_dentry);
5190     + kfree(name);
5191     +
5192     + if (!err)
5193     + unionfs_postcopyup_setmnt(dentry);
5194     +
5195     + unionfs_check_inode(parent);
5196     + if (!err)
5197     + unionfs_check_dentry(dentry);
5198     + unionfs_unlock_dentry(dentry);
5199     + unionfs_read_unlock(dentry->d_sb);
5200     + return err;
5201     +}
5202     +
5203     +static int unionfs_mkdir(struct inode *parent, struct dentry *dentry, int mode)
5204     +{
5205     + int err = 0;
5206     + struct dentry *lower_dentry = NULL, *whiteout_dentry = NULL;
5207     + struct dentry *lower_parent_dentry = NULL;
5208     + int bindex = 0, bstart;
5209     + char *name = NULL;
5210     + int whiteout_unlinked = 0;
5211     + struct sioq_args args;
5212     +
5213     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5214     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5215     +
5216     + if (unlikely(dentry->d_inode &&
5217     + !__unionfs_d_revalidate_chain(dentry, NULL, false))) {
5218     + err = -ESTALE;
5219     + goto out;
5220     + }
5221     +
5222     + bstart = dbstart(dentry);
5223     +
5224     + lower_dentry = unionfs_lower_dentry(dentry);
5225     +
5226     + /*
5227     + * check if whiteout exists in this branch, i.e. lookup .wh.foo
5228     + * first.
5229     + */
5230     + name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
5231     + if (unlikely(IS_ERR(name))) {
5232     + err = PTR_ERR(name);
5233     + goto out;
5234     + }
5235     +
5236     + whiteout_dentry = lookup_one_len(name, lower_dentry->d_parent,
5237     + dentry->d_name.len + UNIONFS_WHLEN);
5238     + if (IS_ERR(whiteout_dentry)) {
5239     + err = PTR_ERR(whiteout_dentry);
5240     + goto out;
5241     + }
5242     +
5243     + if (!whiteout_dentry->d_inode) {
5244     + dput(whiteout_dentry);
5245     + whiteout_dentry = NULL;
5246     + } else {
5247     + lower_parent_dentry = lock_parent_wh(whiteout_dentry);
5248     +
5249     + /* found a.wh.foo entry, remove it then do vfs_mkdir */
5250     + err = is_robranch_super(dentry->d_sb, bstart);
5251     + if (!err) {
5252     + args.unlink.parent = lower_parent_dentry->d_inode;
5253     + args.unlink.dentry = whiteout_dentry;
5254     + run_sioq(__unionfs_unlink, &args);
5255     + err = args.err;
5256     + }
5257     + dput(whiteout_dentry);
5258     +
5259     + unlock_dir(lower_parent_dentry);
5260     +
5261     + if (err) {
5262     + /* exit if the error returned was NOT -EROFS */
5263     + if (!IS_COPYUP_ERR(err))
5264     + goto out;
5265     + bstart--;
5266     + } else {
5267     + whiteout_unlinked = 1;
5268     + }
5269     + }
5270     +
5271     + for (bindex = bstart; bindex >= 0; bindex--) {
5272     + int i;
5273     + int bend = dbend(dentry);
5274     +
5275     + if (is_robranch_super(dentry->d_sb, bindex))
5276     + continue;
5277     +
5278     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
5279     + if (!lower_dentry) {
5280     + lower_dentry = create_parents(parent, dentry,
5281     + dentry->d_name.name,
5282     + bindex);
5283     + if (!lower_dentry || IS_ERR(lower_dentry)) {
5284     + printk(KERN_ERR "unionfs: lower dentry "
5285     + " NULL for bindex = %d\n", bindex);
5286     + continue;
5287     + }
5288     + }
5289     +
5290     + lower_parent_dentry = lock_parent(lower_dentry);
5291     +
5292     + if (IS_ERR(lower_parent_dentry)) {
5293     + err = PTR_ERR(lower_parent_dentry);
5294     + goto out;
5295     + }
5296     +
5297     + err = vfs_mkdir(lower_parent_dentry->d_inode, lower_dentry,
5298     + mode);
5299     +
5300     + unlock_dir(lower_parent_dentry);
5301     +
5302     + /* did the mkdir succeed? */
5303     + if (err)
5304     + break;
5305     +
5306     + for (i = bindex + 1; i < bend; i++) {
5307     + if (unionfs_lower_dentry_idx(dentry, i)) {
5308     + dput(unionfs_lower_dentry_idx(dentry, i));
5309     + unionfs_set_lower_dentry_idx(dentry, i, NULL);
5310     + }
5311     + }
5312     + set_dbend(dentry, bindex);
5313     +
5314     + /*
5315     + * Only INTERPOSE_LOOKUP can return a value other than 0 on
5316     + * err.
5317     + */
5318     + err = PTR_ERR(unionfs_interpose(dentry, parent->i_sb, 0));
5319     + if (!err) {
5320     + unionfs_copy_attr_times(parent);
5321     + fsstack_copy_inode_size(parent,
5322     + lower_parent_dentry->d_inode);
5323     +
5324     + /* update number of links on parent directory */
5325     + parent->i_nlink = unionfs_get_nlinks(parent);
5326     + }
5327     +
5328     + err = make_dir_opaque(dentry, dbstart(dentry));
5329     + if (err) {
5330     + printk(KERN_ERR "unionfs: mkdir: error creating "
5331     + ".wh.__dir_opaque: %d\n", err);
5332     + goto out;
5333     + }
5334     +
5335     + /* we are done! */
5336     + break;
5337     + }
5338     +
5339     +out:
5340     + if (!dentry->d_inode)
5341     + d_drop(dentry);
5342     +
5343     + kfree(name);
5344     +
5345     + if (!err) {
5346     + unionfs_copy_attr_times(dentry->d_inode);
5347     + unionfs_postcopyup_setmnt(dentry);
5348     + }
5349     + unionfs_check_inode(parent);
5350     + unionfs_check_dentry(dentry);
5351     + unionfs_unlock_dentry(dentry);
5352     + unionfs_read_unlock(dentry->d_sb);
5353     +
5354     + return err;
5355     +}
5356     +
5357     +static int unionfs_mknod(struct inode *parent, struct dentry *dentry, int mode,
5358     + dev_t dev)
5359     +{
5360     + int err = 0;
5361     + struct dentry *lower_dentry = NULL;
5362     + struct dentry *wh_dentry = NULL;
5363     + struct dentry *lower_parent_dentry = NULL;
5364     + char *name = NULL;
5365     + int valid = 0;
5366     +
5367     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5368     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5369     +
5370     + if (unlikely(dentry->d_inode &&
5371     + !__unionfs_d_revalidate_chain(dentry, NULL, false))) {
5372     + err = -ESTALE;
5373     + goto out;
5374     + }
5375     +
5376     + /*
5377     + * It's only a bug if this dentry was not negative and couldn't be
5378     + * revalidated (shouldn't happen).
5379     + */
5380     + BUG_ON(!valid && dentry->d_inode);
5381     +
5382     + lower_dentry = find_writeable_branch(parent, dentry);
5383     + if (IS_ERR(lower_dentry)) {
5384     + err = PTR_ERR(lower_dentry);
5385     + goto out;
5386     + }
5387     +
5388     + lower_parent_dentry = lock_parent(lower_dentry);
5389     + if (IS_ERR(lower_parent_dentry)) {
5390     + err = PTR_ERR(lower_parent_dentry);
5391     + goto out;
5392     + }
5393     +
5394     + err = vfs_mknod(lower_parent_dentry->d_inode, lower_dentry, mode, dev);
5395     + if (!err) {
5396     + err = PTR_ERR(unionfs_interpose(dentry, parent->i_sb, 0));
5397     + if (!err) {
5398     + unionfs_copy_attr_times(parent);
5399     + fsstack_copy_inode_size(parent,
5400     + lower_parent_dentry->d_inode);
5401     + /* update no. of links on parent directory */
5402     + parent->i_nlink = unionfs_get_nlinks(parent);
5403     + }
5404     + }
5405     +
5406     + unlock_dir(lower_parent_dentry);
5407     +
5408     +out:
5409     + dput(wh_dentry);
5410     + kfree(name);
5411     +
5412     + if (!err)
5413     + unionfs_postcopyup_setmnt(dentry);
5414     +
5415     + unionfs_check_inode(parent);
5416     + if (!err)
5417     + unionfs_check_dentry(dentry);
5418     + unionfs_unlock_dentry(dentry);
5419     + unionfs_read_unlock(dentry->d_sb);
5420     + return err;
5421     +}
5422     +
5423     +static int unionfs_readlink(struct dentry *dentry, char __user *buf,
5424     + int bufsiz)
5425     +{
5426     + int err;
5427     + struct dentry *lower_dentry;
5428     +
5429     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5430     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5431     +
5432     + if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) {
5433     + err = -ESTALE;
5434     + goto out;
5435     + }
5436     +
5437     + lower_dentry = unionfs_lower_dentry(dentry);
5438     +
5439     + if (!lower_dentry->d_inode->i_op ||
5440     + !lower_dentry->d_inode->i_op->readlink) {
5441     + err = -EINVAL;
5442     + goto out;
5443     + }
5444     +
5445     + err = lower_dentry->d_inode->i_op->readlink(lower_dentry,
5446     + buf, bufsiz);
5447     + if (err > 0)
5448     + fsstack_copy_attr_atime(dentry->d_inode,
5449     + lower_dentry->d_inode);
5450     +
5451     +out:
5452     + unionfs_check_dentry(dentry);
5453     + unionfs_unlock_dentry(dentry);
5454     + unionfs_read_unlock(dentry->d_sb);
5455     +
5456     + return err;
5457     +}
5458     +
5459     +/*
5460     + * unionfs_follow_link takes a dentry, but it is simple. It only needs to
5461     + * allocate some memory and then call our ->readlink method. Our
5462     + * unionfs_readlink *does* lock our dentry and revalidate the dentry.
5463     + * Therefore, we do not have to lock our dentry here, to prevent a deadlock;
5464     + * nor do we need to revalidate it either. It is safe to not lock our
5465     + * dentry here, nor revalidate it, because unionfs_follow_link does not do
5466     + * anything (prior to calling ->readlink) which could become inconsistent
5467     + * due to branch management. We also don't need to lock our super because
5468     + * this function isn't affected by branch-management.
5469     + */
5470     +static void *unionfs_follow_link(struct dentry *dentry, struct nameidata *nd)
5471     +{
5472     + char *buf;
5473     + int len = PAGE_SIZE, err;
5474     + mm_segment_t old_fs;
5475     +
5476     + /* This is freed by the put_link method assuming a successful call. */
5477     + buf = kmalloc(len, GFP_KERNEL);
5478     + if (unlikely(!buf)) {
5479     + err = -ENOMEM;
5480     + goto out;
5481     + }
5482     +
5483     + /* read the symlink, and then we will follow it */
5484     + old_fs = get_fs();
5485     + set_fs(KERNEL_DS);
5486     + err = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len);
5487     + set_fs(old_fs);
5488     + if (err < 0) {
5489     + kfree(buf);
5490     + buf = NULL;
5491     + goto out;
5492     + }
5493     + buf[err] = 0;
5494     + nd_set_link(nd, buf);
5495     + err = 0;
5496     +
5497     +out:
5498     + unionfs_check_dentry(dentry);
5499     + unionfs_check_nd(nd);
5500     + return ERR_PTR(err);
5501     +}
5502     +
5503     +/* FIXME: We may not have to lock here */
5504     +static void unionfs_put_link(struct dentry *dentry, struct nameidata *nd,
5505     + void *cookie)
5506     +{
5507     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5508     +
5509     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5510     + if (unlikely(!__unionfs_d_revalidate_chain(dentry, nd, false)))
5511     + printk(KERN_ERR
5512     + "unionfs: put_link failed to revalidate dentry\n");
5513     +
5514     + unionfs_check_dentry(dentry);
5515     + unionfs_check_nd(nd);
5516     + kfree(nd_get_link(nd));
5517     + unionfs_unlock_dentry(dentry);
5518     + unionfs_read_unlock(dentry->d_sb);
5519     +}
5520     +
5521     +/*
5522     + * Don't grab the superblock read-lock in unionfs_permission, which prevents
5523     + * a deadlock with the branch-management "add branch" code (which grabbed
5524     + * the write lock). It is safe to not grab the read lock here, because even
5525     + * with branch management taking place, there is no chance that
5526     + * unionfs_permission, or anything it calls, will use stale branch
5527     + * information.
5528     + */
5529     +static int unionfs_permission(struct inode *inode, int mask,
5530     + struct nameidata *nd)
5531     +{
5532     + struct inode *lower_inode = NULL;
5533     + int err = 0;
5534     + int bindex, bstart, bend;
5535     + const int is_file = !S_ISDIR(inode->i_mode);
5536     + const int write_mask = (mask & MAY_WRITE) && !(mask & MAY_READ);
5537     +
5538     + if (nd)
5539     + unionfs_lock_dentry(nd->dentry, UNIONFS_DMUTEX_CHILD);
5540     +
5541     + if (!UNIONFS_I(inode)->lower_inodes) {
5542     + if (is_file) /* dirs can be unlinked but chdir'ed to */
5543     + err = -ESTALE; /* force revalidate */
5544     + goto out;
5545     + }
5546     + bstart = ibstart(inode);
5547     + bend = ibend(inode);
5548     + if (unlikely(bstart < 0 || bend < 0)) {
5549     + /*
5550     + * With branch-management, we can get a stale inode here.
5551     + * If so, we return ESTALE back to link_path_walk, which
5552     + * would discard the dcache entry and re-lookup the
5553     + * dentry+inode. This should be equivalent to issuing
5554     + * __unionfs_d_revalidate_chain on nd.dentry here.
5555     + */
5556     + if (is_file) /* dirs can be unlinked but chdir'ed to */
5557     + err = -ESTALE; /* force revalidate */
5558     + goto out;
5559     + }
5560     +
5561     + for (bindex = bstart; bindex <= bend; bindex++) {
5562     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
5563     + if (!lower_inode)
5564     + continue;
5565     +
5566     + /*
5567     + * check the condition for D-F-D underlying files/directories,
5568     + * we don't have to check for files, if we are checking for
5569     + * directories.
5570     + */
5571     + if (!is_file && !S_ISDIR(lower_inode->i_mode))
5572     + continue;
5573     +
5574     + /*
5575     + * We check basic permissions, but we ignore any conditions
5576     + * such as readonly file systems or branches marked as
5577     + * readonly, because those conditions should lead to a
5578     + * copyup taking place later on.
5579     + */
5580     + err = permission(lower_inode, mask, nd);
5581     + if (err && bindex > 0) {
5582     + umode_t mode = lower_inode->i_mode;
5583     + if (is_robranch_super(inode->i_sb, bindex) &&
5584     + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
5585     + err = 0;
5586     + if (IS_COPYUP_ERR(err))
5587     + err = 0;
5588     + }
5589     +
5590     + /*
5591     + * The permissions are an intersection of the overall directory
5592     + * permissions, so we fail if one fails.
5593     + */
5594     + if (err)
5595     + goto out;
5596     +
5597     + /* only the leftmost file matters. */
5598     + if (is_file || write_mask) {
5599     + if (is_file && write_mask) {
5600     + err = get_write_access(lower_inode);
5601     + if (!err)
5602     + put_write_access(lower_inode);
5603     + }
5604     + break;
5605     + }
5606     + }
5607     + /* sync times which may have changed (asynchronously) below */
5608     + unionfs_copy_attr_times(inode);
5609     +
5610     +out:
5611     + unionfs_check_inode(inode);
5612     + unionfs_check_nd(nd);
5613     + if (nd)
5614     + unionfs_unlock_dentry(nd->dentry);
5615     + return err;
5616     +}
5617     +
5618     +static int unionfs_setattr(struct dentry *dentry, struct iattr *ia)
5619     +{
5620     + int err = 0;
5621     + struct dentry *lower_dentry;
5622     + struct inode *inode;
5623     + struct inode *lower_inode;
5624     + int bstart, bend, bindex;
5625     + loff_t size;
5626     +
5627     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5628     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5629     +
5630     + if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) {
5631     + err = -ESTALE;
5632     + goto out;
5633     + }
5634     +
5635     + bstart = dbstart(dentry);
5636     + bend = dbend(dentry);
5637     + inode = dentry->d_inode;
5638     +
5639     + /*
5640     + * mode change is for clearing setuid/setgid. Allow lower filesystem
5641     + * to reinterpret it in its own way.
5642     + */
5643     + if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
5644     + ia->ia_valid &= ~ATTR_MODE;
5645     +
5646     + lower_dentry = unionfs_lower_dentry(dentry);
5647     + BUG_ON(!lower_dentry); /* should never happen after above revalidate */
5648     +
5649     + /* copyup if the file is on a read only branch */
5650     + if (is_robranch_super(dentry->d_sb, bstart)
5651     + || IS_RDONLY(lower_dentry->d_inode)) {
5652     + /* check if we have a branch to copy up to */
5653     + if (bstart <= 0) {
5654     + err = -EACCES;
5655     + goto out;
5656     + }
5657     +
5658     + if (ia->ia_valid & ATTR_SIZE)
5659     + size = ia->ia_size;
5660     + else
5661     + size = i_size_read(inode);
5662     + /* copyup to next available branch */
5663     + for (bindex = bstart - 1; bindex >= 0; bindex--) {
5664     + err = copyup_dentry(dentry->d_parent->d_inode,
5665     + dentry, bstart, bindex,
5666     + dentry->d_name.name,
5667     + dentry->d_name.len,
5668     + NULL, size);
5669     + if (!err)
5670     + break;
5671     + }
5672     + if (err)
5673     + goto out;
5674     + /* get updated lower_dentry after copyup */
5675     + lower_dentry = unionfs_lower_dentry(dentry);
5676     + }
5677     +
5678     + lower_inode = unionfs_lower_inode(inode);
5679     +
5680     + /*
5681     + * If shrinking, first truncate upper level to cancel writing dirty
5682     + * pages beyond the new eof; and also if its' maxbytes is more
5683     + * limiting (fail with -EFBIG before making any change to the lower
5684     + * level). There is no need to vmtruncate the upper level
5685     + * afterwards in the other cases: we fsstack_copy_inode_size from
5686     + * the lower level.
5687     + */
5688     + if (ia->ia_valid & ATTR_SIZE) {
5689     + size = i_size_read(inode);
5690     + if (ia->ia_size < size || (ia->ia_size > size &&
5691     + inode->i_sb->s_maxbytes < lower_inode->i_sb->s_maxbytes)) {
5692     + err = vmtruncate(inode, ia->ia_size);
5693     + if (err)
5694     + goto out;
5695     + }
5696     + }
5697     +
5698     + /* notify the (possibly copied-up) lower inode */
5699     + err = notify_change(lower_dentry, ia);
5700     + if (err)
5701     + goto out;
5702     +
5703     + /* get attributes from the first lower inode */
5704     + unionfs_copy_attr_all(inode, lower_inode);
5705     + /*
5706     + * unionfs_copy_attr_all will copy the lower times to our inode if
5707     + * the lower ones are newer (useful for cache coherency). However,
5708     + * ->setattr is the only place in which we may have to copy the
5709     + * lower inode times absolutely, to support utimes(2).
5710     + */
5711     + if (ia->ia_valid & ATTR_MTIME_SET)
5712     + inode->i_mtime = lower_inode->i_mtime;
5713     + if (ia->ia_valid & ATTR_CTIME)
5714     + inode->i_ctime = lower_inode->i_ctime;
5715     + if (ia->ia_valid & ATTR_ATIME_SET)
5716     + inode->i_atime = lower_inode->i_atime;
5717     + fsstack_copy_inode_size(inode, lower_inode);
5718     + /* if setattr succeeded, then parent dir may have changed */
5719     + unionfs_copy_attr_times(dentry->d_parent->d_inode);
5720     +out:
5721     + if (!err)
5722     + unionfs_check_dentry(dentry);
5723     + unionfs_unlock_dentry(dentry);
5724     + unionfs_read_unlock(dentry->d_sb);
5725     +
5726     + return err;
5727     +}
5728     +
5729     +struct inode_operations unionfs_symlink_iops = {
5730     + .readlink = unionfs_readlink,
5731     + .permission = unionfs_permission,
5732     + .follow_link = unionfs_follow_link,
5733     + .setattr = unionfs_setattr,
5734     + .put_link = unionfs_put_link,
5735     +};
5736     +
5737     +struct inode_operations unionfs_dir_iops = {
5738     + .create = unionfs_create,
5739     + .lookup = unionfs_lookup,
5740     + .link = unionfs_link,
5741     + .unlink = unionfs_unlink,
5742     + .symlink = unionfs_symlink,
5743     + .mkdir = unionfs_mkdir,
5744     + .rmdir = unionfs_rmdir,
5745     + .mknod = unionfs_mknod,
5746     + .rename = unionfs_rename,
5747     + .permission = unionfs_permission,
5748     + .setattr = unionfs_setattr,
5749     +#ifdef CONFIG_UNION_FS_XATTR
5750     + .setxattr = unionfs_setxattr,
5751     + .getxattr = unionfs_getxattr,
5752     + .removexattr = unionfs_removexattr,
5753     + .listxattr = unionfs_listxattr,
5754     +#endif /* CONFIG_UNION_FS_XATTR */
5755     +};
5756     +
5757     +struct inode_operations unionfs_main_iops = {
5758     + .permission = unionfs_permission,
5759     + .setattr = unionfs_setattr,
5760     +#ifdef CONFIG_UNION_FS_XATTR
5761     + .setxattr = unionfs_setxattr,
5762     + .getxattr = unionfs_getxattr,
5763     + .removexattr = unionfs_removexattr,
5764     + .listxattr = unionfs_listxattr,
5765     +#endif /* CONFIG_UNION_FS_XATTR */
5766     +};
5767     diff --git a/fs/unionfs/lookup.c b/fs/unionfs/lookup.c
5768     new file mode 100644
5769     index 0000000..b9ee072
5770     --- /dev/null
5771     +++ b/fs/unionfs/lookup.c
5772     @@ -0,0 +1,652 @@
5773     +/*
5774     + * Copyright (c) 2003-2007 Erez Zadok
5775     + * Copyright (c) 2003-2006 Charles P. Wright
5776     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
5777     + * Copyright (c) 2005-2006 Junjiro Okajima
5778     + * Copyright (c) 2005 Arun M. Krishnakumar
5779     + * Copyright (c) 2004-2006 David P. Quigley
5780     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
5781     + * Copyright (c) 2003 Puja Gupta
5782     + * Copyright (c) 2003 Harikesavan Krishnan
5783     + * Copyright (c) 2003-2007 Stony Brook University
5784     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
5785     + *
5786     + * This program is free software; you can redistribute it and/or modify
5787     + * it under the terms of the GNU General Public License version 2 as
5788     + * published by the Free Software Foundation.
5789     + */
5790     +
5791     +#include "union.h"
5792     +
5793     +static int realloc_dentry_private_data(struct dentry *dentry);
5794     +
5795     +/* is the filename valid == !(whiteout for a file or opaque dir marker) */
5796     +static int is_validname(const char *name)
5797     +{
5798     + if (!strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN))
5799     + return 0;
5800     + if (!strncmp(name, UNIONFS_DIR_OPAQUE_NAME,
5801     + sizeof(UNIONFS_DIR_OPAQUE_NAME) - 1))
5802     + return 0;
5803     + return 1;
5804     +}
5805     +
5806     +/* The rest of these are utility functions for lookup. */
5807     +static noinline int is_opaque_dir(struct dentry *dentry, int bindex)
5808     +{
5809     + int err = 0;
5810     + struct dentry *lower_dentry;
5811     + struct dentry *wh_lower_dentry;
5812     + struct inode *lower_inode;
5813     + struct sioq_args args;
5814     +
5815     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
5816     + lower_inode = lower_dentry->d_inode;
5817     +
5818     + BUG_ON(!S_ISDIR(lower_inode->i_mode));
5819     +
5820     + mutex_lock(&lower_inode->i_mutex);
5821     +
5822     + if (!permission(lower_inode, MAY_EXEC, NULL)) {
5823     + wh_lower_dentry =
5824     + lookup_one_len(UNIONFS_DIR_OPAQUE, lower_dentry,
5825     + sizeof(UNIONFS_DIR_OPAQUE) - 1);
5826     + } else {
5827     + args.is_opaque.dentry = lower_dentry;
5828     + run_sioq(__is_opaque_dir, &args);
5829     + wh_lower_dentry = args.ret;
5830     + }
5831     +
5832     + mutex_unlock(&lower_inode->i_mutex);
5833     +
5834     + if (IS_ERR(wh_lower_dentry)) {
5835     + err = PTR_ERR(wh_lower_dentry);
5836     + goto out;
5837     + }
5838     +
5839     + /* This is an opaque dir iff wh_lower_dentry is positive */
5840     + err = !!wh_lower_dentry->d_inode;
5841     +
5842     + dput(wh_lower_dentry);
5843     +out:
5844     + return err;
5845     +}
5846     +
5847     +/*
5848     + * Main (and complex) driver function for Unionfs's lookup
5849     + *
5850     + * Returns: NULL (ok), ERR_PTR if an error occurred, or a non-null non-error
5851     + * PTR if d_splice returned a different dentry.
5852     + *
5853     + * If lookupmode is INTERPOSE_PARTIAL/REVAL/REVAL_NEG, the passed dentry's
5854     + * inode info must be locked. If lookupmode is INTERPOSE_LOOKUP (i.e., a
5855     + * newly looked-up dentry), then unionfs_lookup_backend will return a locked
5856     + * dentry's info, which the caller must unlock.
5857     + */
5858     +struct dentry *unionfs_lookup_backend(struct dentry *dentry,
5859     + struct nameidata *nd, int lookupmode)
5860     +{
5861     + int err = 0;
5862     + struct dentry *lower_dentry = NULL;
5863     + struct dentry *wh_lower_dentry = NULL;
5864     + struct dentry *lower_dir_dentry = NULL;
5865     + struct dentry *parent_dentry = NULL;
5866     + struct dentry *d_interposed = NULL;
5867     + int bindex, bstart = -1, bend, bopaque;
5868     + int dentry_count = 0; /* Number of positive dentries. */
5869     + int first_dentry_offset = -1; /* -1 is uninitialized */
5870     + struct dentry *first_dentry = NULL;
5871     + struct dentry *first_lower_dentry = NULL;
5872     + struct vfsmount *first_lower_mnt = NULL;
5873     + int opaque;
5874     + char *whname = NULL;
5875     + const char *name;
5876     + int namelen;
5877     +
5878     + /*
5879     + * We should already have a lock on this dentry in the case of a
5880     + * partial lookup, or a revalidation. Otherwise it is returned from
5881     + * new_dentry_private_data already locked.
5882     + */
5883     + if (lookupmode == INTERPOSE_PARTIAL || lookupmode == INTERPOSE_REVAL ||
5884     + lookupmode == INTERPOSE_REVAL_NEG)
5885     + verify_locked(dentry);
5886     + else /* this could only be INTERPOSE_LOOKUP */
5887     + BUG_ON(UNIONFS_D(dentry) != NULL);
5888     +
5889     + switch (lookupmode) {
5890     + case INTERPOSE_PARTIAL:
5891     + break;
5892     + case INTERPOSE_LOOKUP:
5893     + err = new_dentry_private_data(dentry, UNIONFS_DMUTEX_CHILD);
5894     + if (unlikely(err))
5895     + goto out;
5896     + break;
5897     + default:
5898     + /* default: can only be INTERPOSE_REVAL/REVAL_NEG */
5899     + err = realloc_dentry_private_data(dentry);
5900     + if (unlikely(err))
5901     + goto out;
5902     + break;
5903     + }
5904     +
5905     + /* must initialize dentry operations */
5906     + dentry->d_op = &unionfs_dops;
5907     +
5908     + parent_dentry = dget_parent(dentry);
5909     + /* We never partial lookup the root directory. */
5910     + if (parent_dentry == dentry) {
5911     + dput(parent_dentry);
5912     + parent_dentry = NULL;
5913     + goto out;
5914     + }
5915     +
5916     + name = dentry->d_name.name;
5917     + namelen = dentry->d_name.len;
5918     +
5919     + /* No dentries should get created for possible whiteout names. */
5920     + if (!is_validname(name)) {
5921     + err = -EPERM;
5922     + goto out_free;
5923     + }
5924     +
5925     + /* Now start the actual lookup procedure. */
5926     + bstart = dbstart(parent_dentry);
5927     + bend = dbend(parent_dentry);
5928     + bopaque = dbopaque(parent_dentry);
5929     + BUG_ON(bstart < 0);
5930     +
5931     + /*
5932     + * It would be ideal if we could convert partial lookups to only have
5933     + * to do this work when they really need to. It could probably improve
5934     + * performance quite a bit, and maybe simplify the rest of the code.
5935     + */
5936     + if (lookupmode == INTERPOSE_PARTIAL) {
5937     + bstart++;
5938     + if ((bopaque != -1) && (bopaque < bend))
5939     + bend = bopaque;
5940     + }
5941     +
5942     + for (bindex = bstart; bindex <= bend; bindex++) {
5943     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
5944     + if (lookupmode == INTERPOSE_PARTIAL && lower_dentry)
5945     + continue;
5946     + BUG_ON(lower_dentry != NULL);
5947     +
5948     + lower_dir_dentry =
5949     + unionfs_lower_dentry_idx(parent_dentry, bindex);
5950     +
5951     + /* if the parent lower dentry does not exist skip this */
5952     + if (!(lower_dir_dentry && lower_dir_dentry->d_inode))
5953     + continue;
5954     +
5955     + /* also skip it if the parent isn't a directory. */
5956     + if (!S_ISDIR(lower_dir_dentry->d_inode->i_mode))
5957     + continue;
5958     +
5959     + /* Reuse the whiteout name because its value doesn't change. */
5960     + if (!whname) {
5961     + whname = alloc_whname(name, namelen);
5962     + if (unlikely(IS_ERR(whname))) {
5963     + err = PTR_ERR(whname);
5964     + goto out_free;
5965     + }
5966     + }
5967     +
5968     + /* check if whiteout exists in this branch: lookup .wh.foo */
5969     + wh_lower_dentry = lookup_one_len(whname, lower_dir_dentry,
5970     + namelen + UNIONFS_WHLEN);
5971     + if (IS_ERR(wh_lower_dentry)) {
5972     + dput(first_lower_dentry);
5973     + unionfs_mntput(first_dentry, first_dentry_offset);
5974     + err = PTR_ERR(wh_lower_dentry);
5975     + goto out_free;
5976     + }
5977     +
5978     + if (wh_lower_dentry->d_inode) {
5979     + /* We found a whiteout so let's give up. */
5980     + if (S_ISREG(wh_lower_dentry->d_inode->i_mode)) {
5981     + set_dbend(dentry, bindex);
5982     + set_dbopaque(dentry, bindex);
5983     + dput(wh_lower_dentry);
5984     + break;
5985     + }
5986     + err = -EIO;
5987     + printk(KERN_ERR "unionfs: EIO: invalid whiteout "
5988     + "entry type %d\n",
5989     + wh_lower_dentry->d_inode->i_mode);
5990     + dput(wh_lower_dentry);
5991     + dput(first_lower_dentry);
5992     + unionfs_mntput(first_dentry, first_dentry_offset);
5993     + goto out_free;
5994     + }
5995     +
5996     + dput(wh_lower_dentry);
5997     + wh_lower_dentry = NULL;
5998     +
5999     + /* Now do regular lookup; lookup foo */
6000     + BUG_ON(!lower_dir_dentry);
6001     + lower_dentry = lookup_one_len(name, lower_dir_dentry, namelen);
6002     + if (IS_ERR(lower_dentry)) {
6003     + dput(first_lower_dentry);
6004     + unionfs_mntput(first_dentry, first_dentry_offset);
6005     + err = PTR_ERR(lower_dentry);
6006     + goto out_free;
6007     + }
6008     +
6009     + /*
6010     + * Store the first negative dentry specially, because if they
6011     + * are all negative we need this for future creates.
6012     + */
6013     + if (!lower_dentry->d_inode) {
6014     + if (!first_lower_dentry && (dbstart(dentry) == -1)) {
6015     + first_lower_dentry = lower_dentry;
6016     + /*
6017     + * FIXME: following line needs to be changed
6018     + * to allow mount-point crossing
6019     + */
6020     + first_dentry = parent_dentry;
6021     + first_lower_mnt =
6022     + unionfs_mntget(parent_dentry, bindex);
6023     + first_dentry_offset = bindex;
6024     + } else {
6025     + dput(lower_dentry);
6026     + }
6027     +
6028     + continue;
6029     + }
6030     +
6031     + /* number of positive dentries */
6032     + dentry_count++;
6033     +
6034     + /* store underlying dentry */
6035     + if (dbstart(dentry) == -1)
6036     + set_dbstart(dentry, bindex);
6037     + unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
6038     + /*
6039     + * FIXME: the following line needs to get fixed to allow
6040     + * mount-point crossing
6041     + */
6042     + unionfs_set_lower_mnt_idx(dentry, bindex,
6043     + unionfs_mntget(parent_dentry,
6044     + bindex));
6045     + set_dbend(dentry, bindex);
6046     +
6047     + /* update parent directory's atime with the bindex */
6048     + fsstack_copy_attr_atime(parent_dentry->d_inode,
6049     + lower_dir_dentry->d_inode);
6050     +
6051     + /* We terminate file lookups here. */
6052     + if (!S_ISDIR(lower_dentry->d_inode->i_mode)) {
6053     + if (lookupmode == INTERPOSE_PARTIAL)
6054     + continue;
6055     + if (dentry_count == 1)
6056     + goto out_positive;
6057     + /* This can only happen with mixed D-*-F-* */
6058     + BUG_ON(!S_ISDIR(unionfs_lower_dentry(dentry)->
6059     + d_inode->i_mode));
6060     + continue;
6061     + }
6062     +
6063     + opaque = is_opaque_dir(dentry, bindex);
6064     + if (opaque < 0) {
6065     + dput(first_lower_dentry);
6066     + unionfs_mntput(first_dentry, first_dentry_offset);
6067     + err = opaque;
6068     + goto out_free;
6069     + } else if (opaque) {
6070     + set_dbend(dentry, bindex);
6071     + set_dbopaque(dentry, bindex);
6072     + break;
6073     + }
6074     + }
6075     +
6076     + if (dentry_count)
6077     + goto out_positive;
6078     + else
6079     + goto out_negative;
6080     +
6081     +out_negative:
6082     + if (lookupmode == INTERPOSE_PARTIAL)
6083     + goto out;
6084     +
6085     + /* If we've only got negative dentries, then use the leftmost one. */
6086     + if (lookupmode == INTERPOSE_REVAL) {
6087     + if (dentry->d_inode)
6088     + UNIONFS_I(dentry->d_inode)->stale = 1;
6089     + goto out;
6090     + }
6091     + if (!lower_dir_dentry) {
6092     + err = -ENOENT;
6093     + goto out;
6094     + }
6095     + /* This should only happen if we found a whiteout. */
6096     + if (first_dentry_offset == -1) {
6097     + first_lower_dentry = lookup_one_len(name, lower_dir_dentry,
6098     + namelen);
6099     + first_dentry_offset = bindex;
6100     + if (IS_ERR(first_lower_dentry)) {
6101     + err = PTR_ERR(first_lower_dentry);
6102     + goto out;
6103     + }
6104     +
6105     + /*
6106     + * FIXME: the following line needs to be changed to allow
6107     + * mount-point crossing
6108     + */
6109     + first_dentry = dentry;
6110     + first_lower_mnt = unionfs_mntget(dentry->d_sb->s_root,
6111     + bindex);
6112     + }
6113     + unionfs_set_lower_dentry_idx(dentry, first_dentry_offset,
6114     + first_lower_dentry);
6115     + unionfs_set_lower_mnt_idx(dentry, first_dentry_offset,
6116     + first_lower_mnt);
6117     + set_dbstart(dentry, first_dentry_offset);
6118     + set_dbend(dentry, first_dentry_offset);
6119     +
6120     + if (lookupmode == INTERPOSE_REVAL_NEG)
6121     + BUG_ON(dentry->d_inode != NULL);
6122     + else
6123     + d_add(dentry, NULL);
6124     + goto out;
6125     +
6126     +/* This part of the code is for positive dentries. */
6127     +out_positive:
6128     + BUG_ON(dentry_count <= 0);
6129     +
6130     + /*
6131     + * If we're holding onto the first negative dentry & corresponding
6132     + * vfsmount - throw it out.
6133     + */
6134     + dput(first_lower_dentry);
6135     + unionfs_mntput(first_dentry, first_dentry_offset);
6136     +
6137     + /* Partial lookups need to re-interpose, or throw away older negs. */
6138     + if (lookupmode == INTERPOSE_PARTIAL) {
6139     + if (dentry->d_inode) {
6140     + unionfs_reinterpose(dentry);
6141     + goto out;
6142     + }
6143     +
6144     + /*
6145     + * This somehow turned positive, so it is as if we had a
6146     + * negative revalidation.
6147     + */
6148     + lookupmode = INTERPOSE_REVAL_NEG;
6149     +
6150     + update_bstart(dentry);
6151     + bstart = dbstart(dentry);
6152     + bend = dbend(dentry);
6153     + }
6154     +
6155     + /*
6156     + * Interpose can return a dentry if d_splice returned a different
6157     + * dentry.
6158     + */
6159     + d_interposed = unionfs_interpose(dentry, dentry->d_sb, lookupmode);
6160     + if (IS_ERR(d_interposed))
6161     + err = PTR_ERR(d_interposed);
6162     + else if (d_interposed)
6163     + dentry = d_interposed;
6164     +
6165     + if (err)
6166     + goto out_drop;
6167     +
6168     + goto out;
6169     +
6170     +out_drop:
6171     + d_drop(dentry);
6172     +
6173     +out_free:
6174     + /* should dput all the underlying dentries on error condition */
6175     + bstart = dbstart(dentry);
6176     + if (bstart >= 0) {
6177     + bend = dbend(dentry);
6178     + for (bindex = bstart; bindex <= bend; bindex++) {
6179     + dput(unionfs_lower_dentry_idx(dentry, bindex));
6180     + unionfs_mntput(dentry, bindex);
6181     + }
6182     + }
6183     + kfree(UNIONFS_D(dentry)->lower_paths);
6184     + UNIONFS_D(dentry)->lower_paths = NULL;
6185     + set_dbstart(dentry, -1);
6186     + set_dbend(dentry, -1);
6187     +
6188     +out:
6189     + if (!err && UNIONFS_D(dentry)) {
6190     + BUG_ON(dbend(dentry) > UNIONFS_D(dentry)->bcount);
6191     + BUG_ON(dbend(dentry) > sbmax(dentry->d_sb));
6192     + if (dbstart(dentry) < 0 &&
6193     + dentry->d_inode && bstart >= 0 &&
6194     + (!UNIONFS_I(dentry->d_inode) ||
6195     + !UNIONFS_I(dentry->d_inode)->lower_inodes)) {
6196     + unionfs_mntput(dentry->d_sb->s_root, bstart);
6197     + dput(first_lower_dentry);
6198     + UNIONFS_I(dentry->d_inode)->stale = 1;
6199     + }
6200     + }
6201     + kfree(whname);
6202     + dput(parent_dentry);
6203     + if (err && (lookupmode == INTERPOSE_LOOKUP))
6204     + unionfs_unlock_dentry(dentry);
6205     + if (!err && d_interposed)
6206     + return d_interposed;
6207     + if (dentry->d_inode && UNIONFS_I(dentry->d_inode)->stale &&
6208     + first_dentry_offset >= 0)
6209     + unionfs_mntput(dentry->d_sb->s_root, first_dentry_offset);
6210     + return ERR_PTR(err);
6211     +}
6212     +
6213     +/*
6214     + * This is a utility function that fills in a unionfs dentry.
6215     + * Caller must lock this dentry with unionfs_lock_dentry.
6216     + *
6217     + * Returns: 0 (ok), or -ERRNO if an error occurred.
6218     + */
6219     +int unionfs_partial_lookup(struct dentry *dentry)
6220     +{
6221     + struct dentry *tmp;
6222     + struct nameidata nd = { .flags = 0 };
6223     + int err = -ENOSYS;
6224     +
6225     + tmp = unionfs_lookup_backend(dentry, &nd, INTERPOSE_PARTIAL);
6226     + if (!tmp) {
6227     + err = 0;
6228     + goto out;
6229     + }
6230     + if (IS_ERR(tmp)) {
6231     + err = PTR_ERR(tmp);
6232     + goto out;
6233     + }
6234     + /* need to change the interface */
6235     + BUG_ON(tmp != dentry);
6236     +out:
6237     + return err;
6238     +}
6239     +
6240     +/* The dentry cache is just so we have properly sized dentries. */
6241     +static struct kmem_cache *unionfs_dentry_cachep;
6242     +int unionfs_init_dentry_cache(void)
6243     +{
6244     + unionfs_dentry_cachep =
6245     + kmem_cache_create("unionfs_dentry",
6246     + sizeof(struct unionfs_dentry_info),
6247     + 0, SLAB_RECLAIM_ACCOUNT, NULL);
6248     +
6249     + return (unionfs_dentry_cachep ? 0 : -ENOMEM);
6250     +}
6251     +
6252     +void unionfs_destroy_dentry_cache(void)
6253     +{
6254     + if (unionfs_dentry_cachep)
6255     + kmem_cache_destroy(unionfs_dentry_cachep);
6256     +}
6257     +
6258     +void free_dentry_private_data(struct dentry *dentry)
6259     +{
6260     + if (!dentry || !dentry->d_fsdata)
6261     + return;
6262     + kmem_cache_free(unionfs_dentry_cachep, dentry->d_fsdata);
6263     + dentry->d_fsdata = NULL;
6264     +}
6265     +
6266     +static inline int __realloc_dentry_private_data(struct dentry *dentry)
6267     +{
6268     + struct unionfs_dentry_info *info = UNIONFS_D(dentry);
6269     + void *p;
6270     + int size;
6271     +
6272     + BUG_ON(!info);
6273     +
6274     + size = sizeof(struct path) * sbmax(dentry->d_sb);
6275     + p = krealloc(info->lower_paths, size, GFP_ATOMIC);
6276     + if (unlikely(!p))
6277     + return -ENOMEM;
6278     +
6279     + info->lower_paths = p;
6280     +
6281     + info->bstart = -1;
6282     + info->bend = -1;
6283     + info->bopaque = -1;
6284     + info->bcount = sbmax(dentry->d_sb);
6285     + atomic_set(&info->generation,
6286     + atomic_read(&UNIONFS_SB(dentry->d_sb)->generation));
6287     +
6288     + memset(info->lower_paths, 0, size);
6289     +
6290     + return 0;
6291     +}
6292     +
6293     +/* UNIONFS_D(dentry)->lock must be locked */
6294     +static int realloc_dentry_private_data(struct dentry *dentry)
6295     +{
6296     + if (!__realloc_dentry_private_data(dentry))
6297     + return 0;
6298     +
6299     + kfree(UNIONFS_D(dentry)->lower_paths);
6300     + free_dentry_private_data(dentry);
6301     + return -ENOMEM;
6302     +}
6303     +
6304     +/* allocate new dentry private data */
6305     +int new_dentry_private_data(struct dentry *dentry, int subclass)
6306     +{
6307     + struct unionfs_dentry_info *info = UNIONFS_D(dentry);
6308     +
6309     + BUG_ON(info);
6310     +
6311     + info = kmem_cache_alloc(unionfs_dentry_cachep, GFP_ATOMIC);
6312     + if (unlikely(!info))
6313     + return -ENOMEM;
6314     +
6315     + mutex_init(&info->lock);
6316     + mutex_lock_nested(&info->lock, subclass);
6317     +
6318     + info->lower_paths = NULL;
6319     +
6320     + dentry->d_fsdata = info;
6321     +
6322     + if (!__realloc_dentry_private_data(dentry))
6323     + return 0;
6324     +
6325     + mutex_unlock(&info->lock);
6326     + free_dentry_private_data(dentry);
6327     + return -ENOMEM;
6328     +}
6329     +
6330     +/*
6331     + * scan through the lower dentry objects, and set bstart to reflect the
6332     + * starting branch
6333     + */
6334     +void update_bstart(struct dentry *dentry)
6335     +{
6336     + int bindex;
6337     + int bstart = dbstart(dentry);
6338     + int bend = dbend(dentry);
6339     + struct dentry *lower_dentry;
6340     +
6341     + for (bindex = bstart; bindex <= bend; bindex++) {
6342     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6343     + if (!lower_dentry)
6344     + continue;
6345     + if (lower_dentry->d_inode) {
6346     + set_dbstart(dentry, bindex);
6347     + break;
6348     + }
6349     + dput(lower_dentry);
6350     + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
6351     + }
6352     +}
6353     +
6354     +
6355     +/*
6356     + * Initialize a nameidata structure (the intent part) we can pass to a lower
6357     + * file system. Returns 0 on success or -error (only -ENOMEM possible).
6358     + * Inside that nd structure, this function may also return an allocated
6359     + * struct file (for open intents). The caller, when done with this nd, must
6360     + * kfree the intent file (using release_lower_nd).
6361     + *
6362     + * XXX: this code, and the callers of this code, should be redone using
6363     + * vfs_path_lookup() when (1) the nameidata structure is refactored into a
6364     + * separate intent-structure, and (2) open_namei() is broken into a VFS-only
6365     + * function and a method that other file systems can call.
6366     + */
6367     +int init_lower_nd(struct nameidata *nd, unsigned int flags)
6368     +{
6369     + int err = 0;
6370     +#ifdef ALLOC_LOWER_ND_FILE
6371     + /*
6372     + * XXX: one day we may need to have the lower return an open file
6373     + * for us. It is not needed in 2.6.23-rc1 for nfs2/nfs3, but may
6374     + * very well be needed for nfs4.
6375     + */
6376     + struct file *file;
6377     +#endif /* ALLOC_LOWER_ND_FILE */
6378     +
6379     + memset(nd, 0, sizeof(struct nameidata));
6380     + if (!flags)
6381     + return err;
6382     +
6383     + switch (flags) {
6384     + case LOOKUP_CREATE:
6385     + nd->intent.open.flags |= O_CREAT;
6386     + /* fall through: shared code for create/open cases */
6387     + case LOOKUP_OPEN:
6388     + nd->flags = flags;
6389     + nd->intent.open.flags |= (FMODE_READ | FMODE_WRITE);
6390     +#ifdef ALLOC_LOWER_ND_FILE
6391     + file = kzalloc(sizeof(struct file), GFP_KERNEL);
6392     + if (unlikely(!file)) {
6393     + err = -ENOMEM;
6394     + break; /* exit switch statement and thus return */
6395     + }
6396     + nd->intent.open.file = file;
6397     +#endif /* ALLOC_LOWER_ND_FILE */
6398     + break;
6399     + case LOOKUP_ACCESS:
6400     + nd->flags = flags;
6401     + break;
6402     + default:
6403     + /*
6404     + * We should never get here, for now.
6405     + * We can add new cases here later on.
6406     + */
6407     + pr_debug("unionfs: unknown nameidata flag 0x%x\n", flags);
6408     + BUG();
6409     + break;
6410     + }
6411     +
6412     + return err;
6413     +}
6414     +
6415     +void release_lower_nd(struct nameidata *nd, int err)
6416     +{
6417     + if (!nd->intent.open.file)
6418     + return;
6419     + else if (!err)
6420     + release_open_intent(nd);
6421     +#ifdef ALLOC_LOWER_ND_FILE
6422     + kfree(nd->intent.open.file);
6423     +#endif /* ALLOC_LOWER_ND_FILE */
6424     +}
6425     diff --git a/fs/unionfs/main.c b/fs/unionfs/main.c
6426     new file mode 100644
6427     index 0000000..23c18f7
6428     --- /dev/null
6429     +++ b/fs/unionfs/main.c
6430     @@ -0,0 +1,794 @@
6431     +/*
6432     + * Copyright (c) 2003-2007 Erez Zadok
6433     + * Copyright (c) 2003-2006 Charles P. Wright
6434     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
6435     + * Copyright (c) 2005-2006 Junjiro Okajima
6436     + * Copyright (c) 2005 Arun M. Krishnakumar
6437     + * Copyright (c) 2004-2006 David P. Quigley
6438     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
6439     + * Copyright (c) 2003 Puja Gupta
6440     + * Copyright (c) 2003 Harikesavan Krishnan
6441     + * Copyright (c) 2003-2007 Stony Brook University
6442     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
6443     + *
6444     + * This program is free software; you can redistribute it and/or modify
6445     + * it under the terms of the GNU General Public License version 2 as
6446     + * published by the Free Software Foundation.
6447     + */
6448     +
6449     +#include "union.h"
6450     +#include <linux/module.h>
6451     +#include <linux/moduleparam.h>
6452     +
6453     +static void unionfs_fill_inode(struct dentry *dentry,
6454     + struct inode *inode)
6455     +{
6456     + struct inode *lower_inode;
6457     + struct dentry *lower_dentry;
6458     + int bindex, bstart, bend;
6459     +
6460     + bstart = dbstart(dentry);
6461     + bend = dbend(dentry);
6462     +
6463     + for (bindex = bstart; bindex <= bend; bindex++) {
6464     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6465     + if (!lower_dentry) {
6466     + unionfs_set_lower_inode_idx(inode, bindex, NULL);
6467     + continue;
6468     + }
6469     +
6470     + /* Initialize the lower inode to the new lower inode. */
6471     + if (!lower_dentry->d_inode)
6472     + continue;
6473     +
6474     + unionfs_set_lower_inode_idx(inode, bindex,
6475     + igrab(lower_dentry->d_inode));
6476     + }
6477     +
6478     + ibstart(inode) = dbstart(dentry);
6479     + ibend(inode) = dbend(dentry);
6480     +
6481     + /* Use attributes from the first branch. */
6482     + lower_inode = unionfs_lower_inode(inode);
6483     +
6484     + /* Use different set of inode ops for symlinks & directories */
6485     + if (S_ISLNK(lower_inode->i_mode))
6486     + inode->i_op = &unionfs_symlink_iops;
6487     + else if (S_ISDIR(lower_inode->i_mode))
6488     + inode->i_op = &unionfs_dir_iops;
6489     +
6490     + /* Use different set of file ops for directories */
6491     + if (S_ISDIR(lower_inode->i_mode))
6492     + inode->i_fop = &unionfs_dir_fops;
6493     +
6494     + /* properly initialize special inodes */
6495     + if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) ||
6496     + S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode))
6497     + init_special_inode(inode, lower_inode->i_mode,
6498     + lower_inode->i_rdev);
6499     +
6500     + /* all well, copy inode attributes */
6501     + unionfs_copy_attr_all(inode, lower_inode);
6502     + fsstack_copy_inode_size(inode, lower_inode);
6503     +}
6504     +
6505     +/*
6506     + * Connect a unionfs inode dentry/inode with several lower ones. This is
6507     + * the classic stackable file system "vnode interposition" action.
6508     + *
6509     + * @sb: unionfs's super_block
6510     + */
6511     +struct dentry *unionfs_interpose(struct dentry *dentry, struct super_block *sb,
6512     + int flag)
6513     +{
6514     + int err = 0;
6515     + struct inode *inode;
6516     + int is_negative_dentry = 1;
6517     + int bindex, bstart, bend;
6518     + int need_fill_inode = 1;
6519     + struct dentry *spliced = NULL;
6520     +
6521     + verify_locked(dentry);
6522     +
6523     + bstart = dbstart(dentry);
6524     + bend = dbend(dentry);
6525     +
6526     + /* Make sure that we didn't get a negative dentry. */
6527     + for (bindex = bstart; bindex <= bend; bindex++) {
6528     + if (unionfs_lower_dentry_idx(dentry, bindex) &&
6529     + unionfs_lower_dentry_idx(dentry, bindex)->d_inode) {
6530     + is_negative_dentry = 0;
6531     + break;
6532     + }
6533     + }
6534     + BUG_ON(is_negative_dentry);
6535     +
6536     + /*
6537     + * We allocate our new inode below, by calling iget.
6538     + * iget will call our read_inode which will initialize some
6539     + * of the new inode's fields
6540     + */
6541     +
6542     + /*
6543     + * On revalidate we've already got our own inode and just need
6544     + * to fix it up.
6545     + */
6546     + if (flag == INTERPOSE_REVAL) {
6547     + inode = dentry->d_inode;
6548     + UNIONFS_I(inode)->bstart = -1;
6549     + UNIONFS_I(inode)->bend = -1;
6550     + atomic_set(&UNIONFS_I(inode)->generation,
6551     + atomic_read(&UNIONFS_SB(sb)->generation));
6552     +
6553     + UNIONFS_I(inode)->lower_inodes =
6554     + kcalloc(sbmax(sb), sizeof(struct inode *), GFP_KERNEL);
6555     + if (unlikely(!UNIONFS_I(inode)->lower_inodes)) {
6556     + err = -ENOMEM;
6557     + goto out;
6558     + }
6559     + } else {
6560     + /* get unique inode number for unionfs */
6561     + inode = iget(sb, iunique(sb, UNIONFS_ROOT_INO));
6562     + if (!inode) {
6563     + err = -EACCES;
6564     + goto out;
6565     + }
6566     + if (atomic_read(&inode->i_count) > 1)
6567     + goto skip;
6568     + }
6569     +
6570     + need_fill_inode = 0;
6571     + unionfs_fill_inode(dentry, inode);
6572     +
6573     +skip:
6574     + /* only (our) lookup wants to do a d_add */
6575     + switch (flag) {
6576     + case INTERPOSE_DEFAULT:
6577     + case INTERPOSE_REVAL_NEG:
6578     + d_instantiate(dentry, inode);
6579     + break;
6580     + case INTERPOSE_LOOKUP:
6581     + spliced = d_splice_alias(inode, dentry);
6582     + if (spliced && spliced != dentry) {
6583     + /*
6584     + * d_splice can return a dentry if it was
6585     + * disconnected and had to be moved. We must ensure
6586     + * that the private data of the new dentry is
6587     + * correct and that the inode info was filled
6588     + * properly. Finally we must return this new
6589     + * dentry.
6590     + */
6591     + spliced->d_op = &unionfs_dops;
6592     + spliced->d_fsdata = dentry->d_fsdata;
6593     + dentry->d_fsdata = NULL;
6594     + dentry = spliced;
6595     + if (need_fill_inode) {
6596     + need_fill_inode = 0;
6597     + unionfs_fill_inode(dentry, inode);
6598     + }
6599     + goto out_spliced;
6600     + } else if (!spliced) {
6601     + if (need_fill_inode) {
6602     + need_fill_inode = 0;
6603     + unionfs_fill_inode(dentry, inode);
6604     + goto out_spliced;
6605     + }
6606     + }
6607     + break;
6608     + case INTERPOSE_REVAL:
6609     + /* Do nothing. */
6610     + break;
6611     + default:
6612     + printk(KERN_CRIT "unionfs: invalid interpose flag passed!\n");
6613     + BUG();
6614     + }
6615     + goto out;
6616     +
6617     +out_spliced:
6618     + if (!err)
6619     + return spliced;
6620     +out:
6621     + return ERR_PTR(err);
6622     +}
6623     +
6624     +/* like interpose above, but for an already existing dentry */
6625     +void unionfs_reinterpose(struct dentry *dentry)
6626     +{
6627     + struct dentry *lower_dentry;
6628     + struct inode *inode;
6629     + int bindex, bstart, bend;
6630     +
6631     + verify_locked(dentry);
6632     +
6633     + /* This is pre-allocated inode */
6634     + inode = dentry->d_inode;
6635     +
6636     + bstart = dbstart(dentry);
6637     + bend = dbend(dentry);
6638     + for (bindex = bstart; bindex <= bend; bindex++) {
6639     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6640     + if (!lower_dentry)
6641     + continue;
6642     +
6643     + if (!lower_dentry->d_inode)
6644     + continue;
6645     + if (unionfs_lower_inode_idx(inode, bindex))
6646     + continue;
6647     + unionfs_set_lower_inode_idx(inode, bindex,
6648     + igrab(lower_dentry->d_inode));
6649     + }
6650     + ibstart(inode) = dbstart(dentry);
6651     + ibend(inode) = dbend(dentry);
6652     +}
6653     +
6654     +/*
6655     + * make sure the branch we just looked up (nd) makes sense:
6656     + *
6657     + * 1) we're not trying to stack unionfs on top of unionfs
6658     + * 2) it exists
6659     + * 3) is a directory
6660     + */
6661     +int check_branch(struct nameidata *nd)
6662     +{
6663     + /* XXX: remove in ODF code -- stacking unions allowed there */
6664     + if (!strcmp(nd->dentry->d_sb->s_type->name, UNIONFS_NAME))
6665     + return -EINVAL;
6666     + if (!nd->dentry->d_inode)
6667     + return -ENOENT;
6668     + if (!S_ISDIR(nd->dentry->d_inode->i_mode))
6669     + return -ENOTDIR;
6670     + return 0;
6671     +}
6672     +
6673     +/* checks if two lower_dentries have overlapping branches */
6674     +static int is_branch_overlap(struct dentry *dent1, struct dentry *dent2)
6675     +{
6676     + struct dentry *dent = NULL;
6677     +
6678     + dent = dent1;
6679     + while ((dent != dent2) && (dent->d_parent != dent))
6680     + dent = dent->d_parent;
6681     +
6682     + if (dent == dent2)
6683     + return 1;
6684     +
6685     + dent = dent2;
6686     + while ((dent != dent1) && (dent->d_parent != dent))
6687     + dent = dent->d_parent;
6688     +
6689     + return (dent == dent1);
6690     +}
6691     +
6692     +/*
6693     + * Parse "ro" or "rw" options, but default to "rw" if no mode options was
6694     + * specified. Fill the mode bits in @perms. If encounter an unknown
6695     + * string, return -EINVAL. Otherwise return 0.
6696     + */
6697     +int parse_branch_mode(const char *name, int *perms)
6698     +{
6699     + if (!name || !strcmp(name, "rw")) {
6700     + *perms = MAY_READ | MAY_WRITE;
6701     + return 0;
6702     + }
6703     + if (!strcmp(name, "ro")) {
6704     + *perms = MAY_READ;
6705     + return 0;
6706     + }
6707     + return -EINVAL;
6708     +}
6709     +
6710     +/*
6711     + * parse the dirs= mount argument
6712     + *
6713     + * We don't need to lock the superblock private data's rwsem, as we get
6714     + * called only by unionfs_read_super - it is still a long time before anyone
6715     + * can even get a reference to us.
6716     + */
6717     +static int parse_dirs_option(struct super_block *sb, struct unionfs_dentry_info
6718     + *lower_root_info, char *options)
6719     +{
6720     + struct nameidata nd;
6721     + char *name;
6722     + int err = 0;
6723     + int branches = 1;
6724     + int bindex = 0;
6725     + int i = 0;
6726     + int j = 0;
6727     + struct dentry *dent1;
6728     + struct dentry *dent2;
6729     +
6730     + if (options[0] == '\0') {
6731     + printk(KERN_ERR "unionfs: no branches specified\n");
6732     + err = -EINVAL;
6733     + goto out;
6734     + }
6735     +
6736     + /*
6737     + * Each colon means we have a separator, this is really just a rough
6738     + * guess, since strsep will handle empty fields for us.
6739     + */
6740     + for (i = 0; options[i]; i++)
6741     + if (options[i] == ':')
6742     + branches++;
6743     +
6744     + /* allocate space for underlying pointers to lower dentry */
6745     + UNIONFS_SB(sb)->data =
6746     + kcalloc(branches, sizeof(struct unionfs_data), GFP_KERNEL);
6747     + if (unlikely(!UNIONFS_SB(sb)->data)) {
6748     + err = -ENOMEM;
6749     + goto out;
6750     + }
6751     +
6752     + lower_root_info->lower_paths =
6753     + kcalloc(branches, sizeof(struct path), GFP_KERNEL);
6754     + if (unlikely(!lower_root_info->lower_paths)) {
6755     + err = -ENOMEM;
6756     + goto out;
6757     + }
6758     +
6759     + /* now parsing a string such as "b1:b2=rw:b3=ro:b4" */
6760     + branches = 0;
6761     + while ((name = strsep(&options, ":")) != NULL) {
6762     + int perms;
6763     + char *mode = strchr(name, '=');
6764     +
6765     + if (!name)
6766     + continue;
6767     + if (!*name) { /* bad use of ':' (extra colons) */
6768     + err = -EINVAL;
6769     + goto out;
6770     + }
6771     +
6772     + branches++;
6773     +
6774     + /* strip off '=' if any */
6775     + if (mode)
6776     + *mode++ = '\0';
6777     +
6778     + err = parse_branch_mode(mode, &perms);
6779     + if (err) {
6780     + printk(KERN_ERR "unionfs: invalid mode \"%s\" for "
6781     + "branch %d\n", mode, bindex);
6782     + goto out;
6783     + }
6784     + /* ensure that leftmost branch is writeable */
6785     + if (!bindex && !(perms & MAY_WRITE)) {
6786     + printk(KERN_ERR "unionfs: leftmost branch cannot be "
6787     + "read-only (use \"-o ro\" to create a "
6788     + "read-only union)\n");
6789     + err = -EINVAL;
6790     + goto out;
6791     + }
6792     +
6793     + err = path_lookup(name, LOOKUP_FOLLOW, &nd);
6794     + if (err) {
6795     + printk(KERN_ERR "unionfs: error accessing "
6796     + "lower directory '%s' (error %d)\n",
6797     + name, err);
6798     + goto out;
6799     + }
6800     +
6801     + err = check_branch(&nd);
6802     + if (err) {
6803     + printk(KERN_ERR "unionfs: lower directory "
6804     + "'%s' is not a valid branch\n", name);
6805     + path_release(&nd);
6806     + goto out;
6807     + }
6808     +
6809     + lower_root_info->lower_paths[bindex].dentry = nd.dentry;
6810     + lower_root_info->lower_paths[bindex].mnt = nd.mnt;
6811     +
6812     + set_branchperms(sb, bindex, perms);
6813     + set_branch_count(sb, bindex, 0);
6814     + new_branch_id(sb, bindex);
6815     +
6816     + if (lower_root_info->bstart < 0)
6817     + lower_root_info->bstart = bindex;
6818     + lower_root_info->bend = bindex;
6819     + bindex++;
6820     + }
6821     +
6822     + if (branches == 0) {
6823     + printk(KERN_ERR "unionfs: no branches specified\n");
6824     + err = -EINVAL;
6825     + goto out;
6826     + }
6827     +
6828     + BUG_ON(branches != (lower_root_info->bend + 1));
6829     +
6830     + /*
6831     + * Ensure that no overlaps exist in the branches.
6832     + *
6833     + * This test is required because the Linux kernel has no support
6834     + * currently for ensuring coherency between stackable layers and
6835     + * branches. If we were to allow overlapping branches, it would be
6836     + * possible, for example, to delete a file via one branch, which
6837     + * would not be reflected in another branch. Such incoherency could
6838     + * lead to inconsistencies and even kernel oopses. Rather than
6839     + * implement hacks to work around some of these cache-coherency
6840     + * problems, we prevent branch overlapping, for now. A complete
6841     + * solution will involve proper kernel/VFS support for cache
6842     + * coherency, at which time we could safely remove this
6843     + * branch-overlapping test.
6844     + */
6845     + for (i = 0; i < branches; i++) {
6846     + dent1 = lower_root_info->lower_paths[i].dentry;
6847     + for (j = i + 1; j < branches; j++) {
6848     + dent2 = lower_root_info->lower_paths[j].dentry;
6849     + if (is_branch_overlap(dent1, dent2)) {
6850     + printk(KERN_ERR "unionfs: branches %d and "
6851     + "%d overlap\n", i, j);
6852     + err = -EINVAL;
6853     + goto out;
6854     + }
6855     + }
6856     + }
6857     +
6858     +out:
6859     + if (err) {
6860     + for (i = 0; i < branches; i++)
6861     + if (lower_root_info->lower_paths[i].dentry) {
6862     + dput(lower_root_info->lower_paths[i].dentry);
6863     + /* initialize: can't use unionfs_mntput here */
6864     + mntput(lower_root_info->lower_paths[i].mnt);
6865     + }
6866     +
6867     + kfree(lower_root_info->lower_paths);
6868     + kfree(UNIONFS_SB(sb)->data);
6869     +
6870     + /*
6871     + * MUST clear the pointers to prevent potential double free if
6872     + * the caller dies later on
6873     + */
6874     + lower_root_info->lower_paths = NULL;
6875     + UNIONFS_SB(sb)->data = NULL;
6876     + }
6877     + return err;
6878     +}
6879     +
6880     +/*
6881     + * Parse mount options. See the manual page for usage instructions.
6882     + *
6883     + * Returns the dentry object of the lower-level (lower) directory;
6884     + * We want to mount our stackable file system on top of that lower directory.
6885     + */
6886     +static struct unionfs_dentry_info *unionfs_parse_options(
6887     + struct super_block *sb,
6888     + char *options)
6889     +{
6890     + struct unionfs_dentry_info *lower_root_info;
6891     + char *optname;
6892     + int err = 0;
6893     + int bindex;
6894     + int dirsfound = 0;
6895     +
6896     + /* allocate private data area */
6897     + err = -ENOMEM;
6898     + lower_root_info =
6899     + kzalloc(sizeof(struct unionfs_dentry_info), GFP_KERNEL);
6900     + if (unlikely(!lower_root_info))
6901     + goto out_error;
6902     + lower_root_info->bstart = -1;
6903     + lower_root_info->bend = -1;
6904     + lower_root_info->bopaque = -1;
6905     +
6906     + while ((optname = strsep(&options, ",")) != NULL) {
6907     + char *optarg;
6908     + char *endptr;
6909     + int intval;
6910     +
6911     + if (!optname || !*optname)
6912     + continue;
6913     +
6914     + optarg = strchr(optname, '=');
6915     + if (optarg)
6916     + *optarg++ = '\0';
6917     +
6918     + /*
6919     + * All of our options take an argument now. Insert ones that
6920     + * don't, above this check.
6921     + */
6922     + if (!optarg) {
6923     + printk(KERN_ERR "unionfs: %s requires an argument\n",
6924     + optname);
6925     + err = -EINVAL;
6926     + goto out_error;
6927     + }
6928     +
6929     + if (!strcmp("dirs", optname)) {
6930     + if (++dirsfound > 1) {
6931     + printk(KERN_ERR
6932     + "unionfs: multiple dirs specified\n");
6933     + err = -EINVAL;
6934     + goto out_error;
6935     + }
6936     + err = parse_dirs_option(sb, lower_root_info, optarg);
6937     + if (err)
6938     + goto out_error;
6939     + continue;
6940     + }
6941     +
6942     + /* All of these options require an integer argument. */
6943     + intval = simple_strtoul(optarg, &endptr, 0);
6944     + if (*endptr) {
6945     + printk(KERN_ERR
6946     + "unionfs: invalid %s option '%s'\n",
6947     + optname, optarg);
6948     + err = -EINVAL;
6949     + goto out_error;
6950     + }
6951     +
6952     + err = -EINVAL;
6953     + printk(KERN_ERR
6954     + "unionfs: unrecognized option '%s'\n", optname);
6955     + goto out_error;
6956     + }
6957     + if (dirsfound != 1) {
6958     + printk(KERN_ERR "unionfs: dirs option required\n");
6959     + err = -EINVAL;
6960     + goto out_error;
6961     + }
6962     + goto out;
6963     +
6964     +out_error:
6965     + if (lower_root_info && lower_root_info->lower_paths) {
6966     + for (bindex = lower_root_info->bstart;
6967     + bindex >= 0 && bindex <= lower_root_info->bend;
6968     + bindex++) {
6969     + struct dentry *d;
6970     + struct vfsmount *m;
6971     +
6972     + d = lower_root_info->lower_paths[bindex].dentry;
6973     + m = lower_root_info->lower_paths[bindex].mnt;
6974     +
6975     + dput(d);
6976     + /* initializing: can't use unionfs_mntput here */
6977     + mntput(m);
6978     + }
6979     + }
6980     +
6981     + kfree(lower_root_info->lower_paths);
6982     + kfree(lower_root_info);
6983     +
6984     + kfree(UNIONFS_SB(sb)->data);
6985     + UNIONFS_SB(sb)->data = NULL;
6986     +
6987     + lower_root_info = ERR_PTR(err);
6988     +out:
6989     + return lower_root_info;
6990     +}
6991     +
6992     +/*
6993     + * our custom d_alloc_root work-alike
6994     + *
6995     + * we can't use d_alloc_root if we want to use our own interpose function
6996     + * unchanged, so we simply call our own "fake" d_alloc_root
6997     + */
6998     +static struct dentry *unionfs_d_alloc_root(struct super_block *sb)
6999     +{
7000     + struct dentry *ret = NULL;
7001     +
7002     + if (sb) {
7003     + static const struct qstr name = {
7004     + .name = "/",
7005     + .len = 1
7006     + };
7007     +
7008     + ret = d_alloc(NULL, &name);
7009     + if (likely(ret)) {
7010     + ret->d_op = &unionfs_dops;
7011     + ret->d_sb = sb;
7012     + ret->d_parent = ret;
7013     + }
7014     + }
7015     + return ret;
7016     +}
7017     +
7018     +/*
7019     + * There is no need to lock the unionfs_super_info's rwsem as there is no
7020     + * way anyone can have a reference to the superblock at this point in time.
7021     + */
7022     +static int unionfs_read_super(struct super_block *sb, void *raw_data,
7023     + int silent)
7024     +{
7025     + int err = 0;
7026     + struct unionfs_dentry_info *lower_root_info = NULL;
7027     + int bindex, bstart, bend;
7028     +
7029     + if (!raw_data) {
7030     + printk(KERN_ERR
7031     + "unionfs: read_super: missing data argument\n");
7032     + err = -EINVAL;
7033     + goto out;
7034     + }
7035     +
7036     + /* Allocate superblock private data */
7037     + sb->s_fs_info = kzalloc(sizeof(struct unionfs_sb_info), GFP_KERNEL);
7038     + if (unlikely(!UNIONFS_SB(sb))) {
7039     + printk(KERN_CRIT "unionfs: read_super: out of memory\n");
7040     + err = -ENOMEM;
7041     + goto out;
7042     + }
7043     +
7044     + UNIONFS_SB(sb)->bend = -1;
7045     + atomic_set(&UNIONFS_SB(sb)->generation, 1);
7046     + init_rwsem(&UNIONFS_SB(sb)->rwsem);
7047     + UNIONFS_SB(sb)->high_branch_id = -1; /* -1 == invalid branch ID */
7048     +
7049     + lower_root_info = unionfs_parse_options(sb, raw_data);
7050     + if (IS_ERR(lower_root_info)) {
7051     + printk(KERN_ERR
7052     + "unionfs: read_super: error while parsing options "
7053     + "(err = %ld)\n", PTR_ERR(lower_root_info));
7054     + err = PTR_ERR(lower_root_info);
7055     + lower_root_info = NULL;
7056     + goto out_free;
7057     + }
7058     + if (lower_root_info->bstart == -1) {
7059     + err = -ENOENT;
7060     + goto out_free;
7061     + }
7062     +
7063     + /* set the lower superblock field of upper superblock */
7064     + bstart = lower_root_info->bstart;
7065     + BUG_ON(bstart != 0);
7066     + sbend(sb) = bend = lower_root_info->bend;
7067     + for (bindex = bstart; bindex <= bend; bindex++) {
7068     + struct dentry *d = lower_root_info->lower_paths[bindex].dentry;
7069     + unionfs_set_lower_super_idx(sb, bindex, d->d_sb);
7070     + }
7071     +
7072     + /* max Bytes is the maximum bytes from highest priority branch */
7073     + sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes;
7074     +
7075     + /*
7076     + * Our c/m/atime granularity is 1 ns because we may stack on file
7077     + * systems whose granularity is as good. This is important for our
7078     + * time-based cache coherency.
7079     + */
7080     + sb->s_time_gran = 1;
7081     +
7082     + sb->s_op = &unionfs_sops;
7083     +
7084     + /* See comment next to the definition of unionfs_d_alloc_root */
7085     + sb->s_root = unionfs_d_alloc_root(sb);
7086     + if (unlikely(!sb->s_root)) {
7087     + err = -ENOMEM;
7088     + goto out_dput;
7089     + }
7090     +
7091     + /* link the upper and lower dentries */
7092     + sb->s_root->d_fsdata = NULL;
7093     + err = new_dentry_private_data(sb->s_root, UNIONFS_DMUTEX_ROOT);
7094     + if (unlikely(err))
7095     + goto out_freedpd;
7096     +
7097     + /* Set the lower dentries for s_root */
7098     + for (bindex = bstart; bindex <= bend; bindex++) {
7099     + struct dentry *d;
7100     + struct vfsmount *m;
7101     +
7102     + d = lower_root_info->lower_paths[bindex].dentry;
7103     + m = lower_root_info->lower_paths[bindex].mnt;
7104     +
7105     + unionfs_set_lower_dentry_idx(sb->s_root, bindex, d);
7106     + unionfs_set_lower_mnt_idx(sb->s_root, bindex, m);
7107     + }
7108     + set_dbstart(sb->s_root, bstart);
7109     + set_dbend(sb->s_root, bend);
7110     +
7111     + /* Set the generation number to one, since this is for the mount. */
7112     + atomic_set(&UNIONFS_D(sb->s_root)->generation, 1);
7113     +
7114     + /*
7115     + * Call interpose to create the upper level inode. Only
7116     + * INTERPOSE_LOOKUP can return a value other than 0 on err.
7117     + */
7118     + err = PTR_ERR(unionfs_interpose(sb->s_root, sb, 0));
7119     + unionfs_unlock_dentry(sb->s_root);
7120     + if (!err)
7121     + goto out;
7122     + /* else fall through */
7123     +
7124     +out_freedpd:
7125     + if (UNIONFS_D(sb->s_root)) {
7126     + kfree(UNIONFS_D(sb->s_root)->lower_paths);
7127     + free_dentry_private_data(sb->s_root);
7128     + }
7129     + dput(sb->s_root);
7130     +
7131     +out_dput:
7132     + if (lower_root_info && !IS_ERR(lower_root_info)) {
7133     + for (bindex = lower_root_info->bstart;
7134     + bindex <= lower_root_info->bend; bindex++) {
7135     + struct dentry *d;
7136     + struct vfsmount *m;
7137     +
7138     + d = lower_root_info->lower_paths[bindex].dentry;
7139     + m = lower_root_info->lower_paths[bindex].mnt;
7140     +
7141     + dput(d);
7142     + /* initializing: can't use unionfs_mntput here */
7143     + mntput(m);
7144     + }
7145     + kfree(lower_root_info->lower_paths);
7146     + kfree(lower_root_info);
7147     + lower_root_info = NULL;
7148     + }
7149     +
7150     +out_free:
7151     + kfree(UNIONFS_SB(sb)->data);
7152     + kfree(UNIONFS_SB(sb));
7153     + sb->s_fs_info = NULL;
7154     +
7155     +out:
7156     + if (lower_root_info && !IS_ERR(lower_root_info)) {
7157     + kfree(lower_root_info->lower_paths);
7158     + kfree(lower_root_info);
7159     + }
7160     + return err;
7161     +}
7162     +
7163     +static int unionfs_get_sb(struct file_system_type *fs_type,
7164     + int flags, const char *dev_name,
7165     + void *raw_data, struct vfsmount *mnt)
7166     +{
7167     + return get_sb_nodev(fs_type, flags, raw_data, unionfs_read_super, mnt);
7168     +}
7169     +
7170     +static struct file_system_type unionfs_fs_type = {
7171     + .owner = THIS_MODULE,
7172     + .name = UNIONFS_NAME,
7173     + .get_sb = unionfs_get_sb,
7174     + .kill_sb = generic_shutdown_super,
7175     + .fs_flags = FS_REVAL_DOT,
7176     +};
7177     +
7178     +static int __init init_unionfs_fs(void)
7179     +{
7180     + int err;
7181     +
7182     + pr_info("Registering unionfs " UNIONFS_VERSION "\n");
7183     +
7184     + err = unionfs_init_filldir_cache();
7185     + if (unlikely(err))
7186     + goto out;
7187     + err = unionfs_init_inode_cache();
7188     + if (unlikely(err))
7189     + goto out;
7190     + err = unionfs_init_dentry_cache();
7191     + if (unlikely(err))
7192     + goto out;
7193     + err = init_sioq();
7194     + if (unlikely(err))
7195     + goto out;
7196     + err = register_filesystem(&unionfs_fs_type);
7197     +out:
7198     + if (unlikely(err)) {
7199     + stop_sioq();
7200     + unionfs_destroy_filldir_cache();
7201     + unionfs_destroy_inode_cache();
7202     + unionfs_destroy_dentry_cache();
7203     + }
7204     + return err;
7205     +}
7206     +
7207     +static void __exit exit_unionfs_fs(void)
7208     +{
7209     + stop_sioq();
7210     + unionfs_destroy_filldir_cache();
7211     + unionfs_destroy_inode_cache();
7212     + unionfs_destroy_dentry_cache();
7213     + unregister_filesystem(&unionfs_fs_type);
7214     + pr_info("Completed unionfs module unload\n");
7215     +}
7216     +
7217     +MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University"
7218     + " (http://www.fsl.cs.sunysb.edu)");
7219     +MODULE_DESCRIPTION("Unionfs " UNIONFS_VERSION
7220     + " (http://unionfs.filesystems.org)");
7221     +MODULE_LICENSE("GPL");
7222     +
7223     +module_init(init_unionfs_fs);
7224     +module_exit(exit_unionfs_fs);
7225     diff --git a/fs/unionfs/mmap.c b/fs/unionfs/mmap.c
7226     new file mode 100644
7227     index 0000000..ad770ac
7228     --- /dev/null
7229     +++ b/fs/unionfs/mmap.c
7230     @@ -0,0 +1,343 @@
7231     +/*
7232     + * Copyright (c) 2003-2007 Erez Zadok
7233     + * Copyright (c) 2003-2006 Charles P. Wright
7234     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7235     + * Copyright (c) 2005-2006 Junjiro Okajima
7236     + * Copyright (c) 2006 Shaya Potter
7237     + * Copyright (c) 2005 Arun M. Krishnakumar
7238     + * Copyright (c) 2004-2006 David P. Quigley
7239     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7240     + * Copyright (c) 2003 Puja Gupta
7241     + * Copyright (c) 2003 Harikesavan Krishnan
7242     + * Copyright (c) 2003-2007 Stony Brook University
7243     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
7244     + *
7245     + * This program is free software; you can redistribute it and/or modify
7246     + * it under the terms of the GNU General Public License version 2 as
7247     + * published by the Free Software Foundation.
7248     + */
7249     +
7250     +#include "union.h"
7251     +
7252     +static int unionfs_writepage(struct page *page, struct writeback_control *wbc)
7253     +{
7254     + int err = -EIO;
7255     + struct inode *inode;
7256     + struct inode *lower_inode;
7257     + struct page *lower_page;
7258     + struct address_space *lower_mapping; /* lower inode mapping */
7259     + gfp_t mask;
7260     +
7261     + BUG_ON(!PageUptodate(page));
7262     + inode = page->mapping->host;
7263     + /* if no lower inode, nothing to do */
7264     + if (!inode || !UNIONFS_I(inode) || UNIONFS_I(inode)->lower_inodes) {
7265     + err = 0;
7266     + goto out;
7267     + }
7268     + lower_inode = unionfs_lower_inode(inode);
7269     + lower_mapping = lower_inode->i_mapping;
7270     +
7271     + /*
7272     + * find lower page (returns a locked page)
7273     + *
7274     + * We turn off __GFP_FS while we look for or create a new lower
7275     + * page. This prevents a recursion into the file system code, which
7276     + * under memory pressure conditions could lead to a deadlock. This
7277     + * is similar to how the loop driver behaves (see loop_set_fd in
7278     + * drivers/block/loop.c). If we can't find the lower page, we
7279     + * redirty our page and return "success" so that the VM will call us
7280     + * again in the (hopefully near) future.
7281     + */
7282     + mask = mapping_gfp_mask(lower_mapping) & ~(__GFP_FS);
7283     + lower_page = find_or_create_page(lower_mapping, page->index, mask);
7284     + if (!lower_page) {
7285     + err = 0;
7286     + set_page_dirty(page);
7287     + goto out;
7288     + }
7289     +
7290     + /* copy page data from our upper page to the lower page */
7291     + copy_highpage(lower_page, page);
7292     + flush_dcache_page(lower_page);
7293     + SetPageUptodate(lower_page);
7294     + set_page_dirty(lower_page);
7295     +
7296     + /*
7297     + * Call lower writepage (expects locked page). However, if we are
7298     + * called with wbc->for_reclaim, then the VFS/VM just wants to
7299     + * reclaim our page. Therefore, we don't need to call the lower
7300     + * ->writepage: just copy our data to the lower page (already done
7301     + * above), then mark the lower page dirty and unlock it, and return
7302     + * success.
7303     + */
7304     + if (wbc->for_reclaim) {
7305     + unlock_page(lower_page);
7306     + goto out_release;
7307     + }
7308     +
7309     + BUG_ON(!lower_mapping->a_ops->writepage);
7310     + wait_on_page_writeback(lower_page); /* prevent multiple writers */
7311     + clear_page_dirty_for_io(lower_page); /* emulate VFS behavior */
7312     + err = lower_mapping->a_ops->writepage(lower_page, wbc);
7313     + if (err < 0)
7314     + goto out_release;
7315     +
7316     + /*
7317     + * Lower file systems such as ramfs and tmpfs, may return
7318     + * AOP_WRITEPAGE_ACTIVATE so that the VM won't try to (pointlessly)
7319     + * write the page again for a while. But those lower file systems
7320     + * also set the page dirty bit back again. Since we successfully
7321     + * copied our page data to the lower page, then the VM will come
7322     + * back to the lower page (directly) and try to flush it. So we can
7323     + * save the VM the hassle of coming back to our page and trying to
7324     + * flush too. Therefore, we don't re-dirty our own page, and we
7325     + * never return AOP_WRITEPAGE_ACTIVATE back to the VM (we consider
7326     + * this a success).
7327     + *
7328     + * We also unlock the lower page if the lower ->writepage returned
7329     + * AOP_WRITEPAGE_ACTIVATE. (This "anomalous" behaviour may be
7330     + * addressed in future shmem/VM code.)
7331     + */
7332     + if (err == AOP_WRITEPAGE_ACTIVATE) {
7333     + err = 0;
7334     + unlock_page(lower_page);
7335     + }
7336     +
7337     + /* all is well */
7338     +
7339     + /* lower mtimes have changed: update ours */
7340     + unionfs_copy_attr_times(inode);
7341     +
7342     +out_release:
7343     + /* b/c find_or_create_page increased refcnt */
7344     + page_cache_release(lower_page);
7345     +out:
7346     + /*
7347     + * We unlock our page unconditionally, because we never return
7348     + * AOP_WRITEPAGE_ACTIVATE.
7349     + */
7350     + unlock_page(page);
7351     + return err;
7352     +}
7353     +
7354     +static int unionfs_writepages(struct address_space *mapping,
7355     + struct writeback_control *wbc)
7356     +{
7357     + int err = 0;
7358     + struct inode *lower_inode;
7359     + struct inode *inode;
7360     +
7361     + inode = mapping->host;
7362     + if (ibstart(inode) < 0 && ibend(inode) < 0)
7363     + goto out;
7364     + lower_inode = unionfs_lower_inode(inode);
7365     + if (!lower_inode)
7366     + goto out;
7367     +
7368     + err = generic_writepages(mapping, wbc);
7369     + if (!err)
7370     + unionfs_copy_attr_times(inode);
7371     +out:
7372     + return err;
7373     +}
7374     +
7375     +/* Readpage expects a locked page, and must unlock it */
7376     +static int unionfs_readpage(struct file *file, struct page *page)
7377     +{
7378     + int err;
7379     + struct file *lower_file;
7380     + struct inode *inode;
7381     + mm_segment_t old_fs;
7382     + char *page_data = NULL;
7383     + mode_t orig_mode;
7384     +
7385     + unionfs_read_lock(file->f_path.dentry->d_sb, UNIONFS_SMUTEX_PARENT);
7386     + err = unionfs_file_revalidate(file, false);
7387     + if (unlikely(err))
7388     + goto out;
7389     + unionfs_check_file(file);
7390     +
7391     + if (!UNIONFS_F(file)) {
7392     + err = -ENOENT;
7393     + goto out;
7394     + }
7395     +
7396     + lower_file = unionfs_lower_file(file);
7397     + /* FIXME: is this assertion right here? */
7398     + BUG_ON(lower_file == NULL);
7399     +
7400     + inode = file->f_path.dentry->d_inode;
7401     +
7402     + page_data = (char *) kmap(page);
7403     + /*
7404     + * Use vfs_read because some lower file systems don't have a
7405     + * readpage method, and some file systems (esp. distributed ones)
7406     + * don't like their pages to be accessed directly. Using vfs_read
7407     + * may be a little slower, but a lot safer, as the VFS does a lot of
7408     + * the necessary magic for us.
7409     + */
7410     + lower_file->f_pos = page_offset(page);
7411     + old_fs = get_fs();
7412     + set_fs(KERNEL_DS);
7413     + /*
7414     + * generic_file_splice_write may call us on a file not opened for
7415     + * reading, so temporarily allow reading.
7416     + */
7417     + orig_mode = lower_file->f_mode;
7418     + lower_file->f_mode |= FMODE_READ;
7419     + err = vfs_read(lower_file, page_data, PAGE_CACHE_SIZE,
7420     + &lower_file->f_pos);
7421     + lower_file->f_mode = orig_mode;
7422     + set_fs(old_fs);
7423     + if (err >= 0 && err < PAGE_CACHE_SIZE)
7424     + memset(page_data + err, 0, PAGE_CACHE_SIZE - err);
7425     + kunmap(page);
7426     +
7427     + if (err < 0)
7428     + goto out;
7429     + err = 0;
7430     +
7431     + /* if vfs_read succeeded above, sync up our times */
7432     + unionfs_copy_attr_times(inode);
7433     +
7434     + flush_dcache_page(page);
7435     +
7436     + /*
7437     + * we have to unlock our page, b/c we _might_ have gotten a locked
7438     + * page. but we no longer have to wakeup on our page here, b/c
7439     + * UnlockPage does it
7440     + */
7441     +out:
7442     + if (err == 0)
7443     + SetPageUptodate(page);
7444     + else
7445     + ClearPageUptodate(page);
7446     +
7447     + unlock_page(page);
7448     + unionfs_check_file(file);
7449     + unionfs_read_unlock(file->f_path.dentry->d_sb);
7450     +
7451     + return err;
7452     +}
7453     +
7454     +static int unionfs_prepare_write(struct file *file, struct page *page,
7455     + unsigned from, unsigned to)
7456     +{
7457     + int err;
7458     +
7459     + unionfs_read_lock(file->f_path.dentry->d_sb, UNIONFS_SMUTEX_PARENT);
7460     + /*
7461     + * This is the only place where we unconditionally copy the lower
7462     + * attribute times before calling unionfs_file_revalidate. The
7463     + * reason is that our ->write calls do_sync_write which in turn will
7464     + * call our ->prepare_write and then ->commit_write. Before our
7465     + * ->write is called, the lower mtimes are in sync, but by the time
7466     + * the VFS calls our ->commit_write, the lower mtimes have changed.
7467     + * Therefore, the only reasonable time for us to sync up from the
7468     + * changed lower mtimes, and avoid an invariant violation warning,
7469     + * is here, in ->prepare_write.
7470     + */
7471     + unionfs_copy_attr_times(file->f_path.dentry->d_inode);
7472     + err = unionfs_file_revalidate(file, true);
7473     + unionfs_check_file(file);
7474     + unionfs_read_unlock(file->f_path.dentry->d_sb);
7475     +
7476     + return err;
7477     +}
7478     +
7479     +static int unionfs_commit_write(struct file *file, struct page *page,
7480     + unsigned from, unsigned to)
7481     +{
7482     + int err = -ENOMEM;
7483     + struct inode *inode, *lower_inode;
7484     + struct file *lower_file = NULL;
7485     + unsigned bytes = to - from;
7486     + char *page_data = NULL;
7487     + mm_segment_t old_fs;
7488     +
7489     + BUG_ON(file == NULL);
7490     +
7491     + unionfs_read_lock(file->f_path.dentry->d_sb, UNIONFS_SMUTEX_PARENT);
7492     + err = unionfs_file_revalidate(file, true);
7493     + if (unlikely(err))
7494     + goto out;
7495     + unionfs_check_file(file);
7496     +
7497     + inode = page->mapping->host;
7498     +
7499     + if (UNIONFS_F(file) != NULL)
7500     + lower_file = unionfs_lower_file(file);
7501     +
7502     + /* FIXME: is this assertion right here? */
7503     + BUG_ON(lower_file == NULL);
7504     +
7505     + page_data = (char *)kmap(page);
7506     + lower_file->f_pos = page_offset(page) + from;
7507     +
7508     + /*
7509     + * We use vfs_write instead of copying page data and the
7510     + * prepare_write/commit_write combo because file system's like
7511     + * GFS/OCFS2 don't like things touching those directly,
7512     + * calling the underlying write op, while a little bit slower, will
7513     + * call all the FS specific code as well
7514     + */
7515     + old_fs = get_fs();
7516     + set_fs(KERNEL_DS);
7517     + err = vfs_write(lower_file, page_data + from, bytes,
7518     + &lower_file->f_pos);
7519     + set_fs(old_fs);
7520     +
7521     + kunmap(page);
7522     +
7523     + if (err < 0)
7524     + goto out;
7525     +
7526     + /* if vfs_write succeeded above, sync up our times/sizes */
7527     + lower_inode = lower_file->f_path.dentry->d_inode;
7528     + if (!lower_inode)
7529     + lower_inode = unionfs_lower_inode(inode);
7530     + BUG_ON(!lower_inode);
7531     + fsstack_copy_inode_size(inode, lower_inode);
7532     + unionfs_copy_attr_times(inode);
7533     + mark_inode_dirty_sync(inode);
7534     +
7535     +out:
7536     + if (err < 0)
7537     + ClearPageUptodate(page);
7538     +
7539     + unionfs_check_file(file);
7540     + unionfs_read_unlock(file->f_path.dentry->d_sb);
7541     + return err; /* assume all is ok */
7542     +}
7543     +
7544     +/*
7545     + * Although unionfs isn't a block-based file system, it may stack on one.
7546     + * ->bmap is needed, for example, to swapon(2) files.
7547     + */
7548     +sector_t unionfs_bmap(struct address_space *mapping, sector_t block)
7549     +{
7550     + int err = -EINVAL;
7551     + struct inode *inode, *lower_inode;
7552     + sector_t (*bmap)(struct address_space *, sector_t);
7553     +
7554     + inode = (struct inode *)mapping->host;
7555     + lower_inode = unionfs_lower_inode(inode);
7556     + if (!lower_inode)
7557     + goto out;
7558     + bmap = lower_inode->i_mapping->a_ops->bmap;
7559     + if (bmap)
7560     + err = bmap(lower_inode->i_mapping, block);
7561     +out:
7562     + return err;
7563     +}
7564     +
7565     +
7566     +struct address_space_operations unionfs_aops = {
7567     + .writepage = unionfs_writepage,
7568     + .writepages = unionfs_writepages,
7569     + .readpage = unionfs_readpage,
7570     + .prepare_write = unionfs_prepare_write,
7571     + .commit_write = unionfs_commit_write,
7572     + .bmap = unionfs_bmap,
7573     +};
7574     diff --git a/fs/unionfs/rdstate.c b/fs/unionfs/rdstate.c
7575     new file mode 100644
7576     index 0000000..7ba1e1a
7577     --- /dev/null
7578     +++ b/fs/unionfs/rdstate.c
7579     @@ -0,0 +1,285 @@
7580     +/*
7581     + * Copyright (c) 2003-2007 Erez Zadok
7582     + * Copyright (c) 2003-2006 Charles P. Wright
7583     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7584     + * Copyright (c) 2005-2006 Junjiro Okajima
7585     + * Copyright (c) 2005 Arun M. Krishnakumar
7586     + * Copyright (c) 2004-2006 David P. Quigley
7587     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7588     + * Copyright (c) 2003 Puja Gupta
7589     + * Copyright (c) 2003 Harikesavan Krishnan
7590     + * Copyright (c) 2003-2007 Stony Brook University
7591     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
7592     + *
7593     + * This program is free software; you can redistribute it and/or modify
7594     + * it under the terms of the GNU General Public License version 2 as
7595     + * published by the Free Software Foundation.
7596     + */
7597     +
7598     +#include "union.h"
7599     +
7600     +/* This file contains the routines for maintaining readdir state. */
7601     +
7602     +/*
7603     + * There are two structures here, rdstate which is a hash table
7604     + * of the second structure which is a filldir_node.
7605     + */
7606     +
7607     +/*
7608     + * This is a struct kmem_cache for filldir nodes, because we allocate a lot
7609     + * of them and they shouldn't waste memory. If the node has a small name
7610     + * (as defined by the dentry structure), then we use an inline name to
7611     + * preserve kmalloc space.
7612     + */
7613     +static struct kmem_cache *unionfs_filldir_cachep;
7614     +
7615     +int unionfs_init_filldir_cache(void)
7616     +{
7617     + unionfs_filldir_cachep =
7618     + kmem_cache_create("unionfs_filldir",
7619     + sizeof(struct filldir_node), 0,
7620     + SLAB_RECLAIM_ACCOUNT, NULL);
7621     +
7622     + return (unionfs_filldir_cachep ? 0 : -ENOMEM);
7623     +}
7624     +
7625     +void unionfs_destroy_filldir_cache(void)
7626     +{
7627     + if (unionfs_filldir_cachep)
7628     + kmem_cache_destroy(unionfs_filldir_cachep);
7629     +}
7630     +
7631     +/*
7632     + * This is a tuning parameter that tells us roughly how big to make the
7633     + * hash table in directory entries per page. This isn't perfect, but
7634     + * at least we get a hash table size that shouldn't be too overloaded.
7635     + * The following averages are based on my home directory.
7636     + * 14.44693 Overall
7637     + * 12.29 Single Page Directories
7638     + * 117.93 Multi-page directories
7639     + */
7640     +#define DENTPAGE 4096
7641     +#define DENTPERONEPAGE 12
7642     +#define DENTPERPAGE 118
7643     +#define MINHASHSIZE 1
7644     +static int guesstimate_hash_size(struct inode *inode)
7645     +{
7646     + struct inode *lower_inode;
7647     + int bindex;
7648     + int hashsize = MINHASHSIZE;
7649     +
7650     + if (UNIONFS_I(inode)->hashsize > 0)
7651     + return UNIONFS_I(inode)->hashsize;
7652     +
7653     + for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
7654     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
7655     + if (!lower_inode)
7656     + continue;
7657     +
7658     + if (i_size_read(lower_inode) == DENTPAGE)
7659     + hashsize += DENTPERONEPAGE;
7660     + else
7661     + hashsize += (i_size_read(lower_inode) / DENTPAGE) *
7662     + DENTPERPAGE;
7663     + }
7664     +
7665     + return hashsize;
7666     +}
7667     +
7668     +int init_rdstate(struct file *file)
7669     +{
7670     + BUG_ON(sizeof(loff_t) !=
7671     + (sizeof(unsigned int) + sizeof(unsigned int)));
7672     + BUG_ON(UNIONFS_F(file)->rdstate != NULL);
7673     +
7674     + UNIONFS_F(file)->rdstate = alloc_rdstate(file->f_path.dentry->d_inode,
7675     + fbstart(file));
7676     +
7677     + return (UNIONFS_F(file)->rdstate ? 0 : -ENOMEM);
7678     +}
7679     +
7680     +struct unionfs_dir_state *find_rdstate(struct inode *inode, loff_t fpos)
7681     +{
7682     + struct unionfs_dir_state *rdstate = NULL;
7683     + struct list_head *pos;
7684     +
7685     + spin_lock(&UNIONFS_I(inode)->rdlock);
7686     + list_for_each(pos, &UNIONFS_I(inode)->readdircache) {
7687     + struct unionfs_dir_state *r =
7688     + list_entry(pos, struct unionfs_dir_state, cache);
7689     + if (fpos == rdstate2offset(r)) {
7690     + UNIONFS_I(inode)->rdcount--;
7691     + list_del(&r->cache);
7692     + rdstate = r;
7693     + break;
7694     + }
7695     + }
7696     + spin_unlock(&UNIONFS_I(inode)->rdlock);
7697     + return rdstate;
7698     +}
7699     +
7700     +struct unionfs_dir_state *alloc_rdstate(struct inode *inode, int bindex)
7701     +{
7702     + int i = 0;
7703     + int hashsize;
7704     + unsigned long mallocsize = sizeof(struct unionfs_dir_state);
7705     + struct unionfs_dir_state *rdstate;
7706     +
7707     + hashsize = guesstimate_hash_size(inode);
7708     + mallocsize += hashsize * sizeof(struct list_head);
7709     + mallocsize = __roundup_pow_of_two(mallocsize);
7710     +
7711     + /* This should give us about 500 entries anyway. */
7712     + if (mallocsize > PAGE_SIZE)
7713     + mallocsize = PAGE_SIZE;
7714     +
7715     + hashsize = (mallocsize - sizeof(struct unionfs_dir_state)) /
7716     + sizeof(struct list_head);
7717     +
7718     + rdstate = kmalloc(mallocsize, GFP_KERNEL);
7719     + if (unlikely(!rdstate))
7720     + return NULL;
7721     +
7722     + spin_lock(&UNIONFS_I(inode)->rdlock);
7723     + if (UNIONFS_I(inode)->cookie >= (MAXRDCOOKIE - 1))
7724     + UNIONFS_I(inode)->cookie = 1;
7725     + else
7726     + UNIONFS_I(inode)->cookie++;
7727     +
7728     + rdstate->cookie = UNIONFS_I(inode)->cookie;
7729     + spin_unlock(&UNIONFS_I(inode)->rdlock);
7730     + rdstate->offset = 1;
7731     + rdstate->access = jiffies;
7732     + rdstate->bindex = bindex;
7733     + rdstate->dirpos = 0;
7734     + rdstate->hashentries = 0;
7735     + rdstate->size = hashsize;
7736     + for (i = 0; i < rdstate->size; i++)
7737     + INIT_LIST_HEAD(&rdstate->list[i]);
7738     +
7739     + return rdstate;
7740     +}
7741     +
7742     +static void free_filldir_node(struct filldir_node *node)
7743     +{
7744     + if (node->namelen >= DNAME_INLINE_LEN_MIN)
7745     + kfree(node->name);
7746     + kmem_cache_free(unionfs_filldir_cachep, node);
7747     +}
7748     +
7749     +void free_rdstate(struct unionfs_dir_state *state)
7750     +{
7751     + struct filldir_node *tmp;
7752     + int i;
7753     +
7754     + for (i = 0; i < state->size; i++) {
7755     + struct list_head *head = &(state->list[i]);
7756     + struct list_head *pos, *n;
7757     +
7758     + /* traverse the list and deallocate space */
7759     + list_for_each_safe(pos, n, head) {
7760     + tmp = list_entry(pos, struct filldir_node, file_list);
7761     + list_del(&tmp->file_list);
7762     + free_filldir_node(tmp);
7763     + }
7764     + }
7765     +
7766     + kfree(state);
7767     +}
7768     +
7769     +struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate,
7770     + const char *name, int namelen,
7771     + int is_whiteout)
7772     +{
7773     + int index;
7774     + unsigned int hash;
7775     + struct list_head *head;
7776     + struct list_head *pos;
7777     + struct filldir_node *cursor = NULL;
7778     + int found = 0;
7779     +
7780     + BUG_ON(namelen <= 0);
7781     +
7782     + hash = full_name_hash(name, namelen);
7783     + index = hash % rdstate->size;
7784     +
7785     + head = &(rdstate->list[index]);
7786     + list_for_each(pos, head) {
7787     + cursor = list_entry(pos, struct filldir_node, file_list);
7788     +
7789     + if (cursor->namelen == namelen && cursor->hash == hash &&
7790     + !strncmp(cursor->name, name, namelen)) {
7791     + /*
7792     + * a duplicate exists, and hence no need to create
7793     + * entry to the list
7794     + */
7795     + found = 1;
7796     +
7797     + /*
7798     + * if a duplicate is found in this branch, and is
7799     + * not due to the caller looking for an entry to
7800     + * whiteout, then the file system may be corrupted.
7801     + */
7802     + if (unlikely(!is_whiteout &&
7803     + cursor->bindex == rdstate->bindex))
7804     + printk(KERN_ERR "unionfs: filldir: possible "
7805     + "I/O error: a file is duplicated "
7806     + "in the same branch %d: %s\n",
7807     + rdstate->bindex, cursor->name);
7808     + break;
7809     + }
7810     + }
7811     +
7812     + if (!found)
7813     + cursor = NULL;
7814     +
7815     + return cursor;
7816     +}
7817     +
7818     +int add_filldir_node(struct unionfs_dir_state *rdstate, const char *name,
7819     + int namelen, int bindex, int whiteout)
7820     +{
7821     + struct filldir_node *new;
7822     + unsigned int hash;
7823     + int index;
7824     + int err = 0;
7825     + struct list_head *head;
7826     +
7827     + BUG_ON(namelen <= 0);
7828     +
7829     + hash = full_name_hash(name, namelen);
7830     + index = hash % rdstate->size;
7831     + head = &(rdstate->list[index]);
7832     +
7833     + new = kmem_cache_alloc(unionfs_filldir_cachep, GFP_KERNEL);
7834     + if (unlikely(!new)) {
7835     + err = -ENOMEM;
7836     + goto out;
7837     + }
7838     +
7839     + INIT_LIST_HEAD(&new->file_list);
7840     + new->namelen = namelen;
7841     + new->hash = hash;
7842     + new->bindex = bindex;
7843     + new->whiteout = whiteout;
7844     +
7845     + if (namelen < DNAME_INLINE_LEN_MIN) {
7846     + new->name = new->iname;
7847     + } else {
7848     + new->name = kmalloc(namelen + 1, GFP_KERNEL);
7849     + if (unlikely(!new->name)) {
7850     + kmem_cache_free(unionfs_filldir_cachep, new);
7851     + new = NULL;
7852     + goto out;
7853     + }
7854     + }
7855     +
7856     + memcpy(new->name, name, namelen);
7857     + new->name[namelen] = '\0';
7858     +
7859     + rdstate->hashentries++;
7860     +
7861     + list_add(&(new->file_list), head);
7862     +out:
7863     + return err;
7864     +}
7865     diff --git a/fs/unionfs/rename.c b/fs/unionfs/rename.c
7866     new file mode 100644
7867     index 0000000..5ab13f9
7868     --- /dev/null
7869     +++ b/fs/unionfs/rename.c
7870     @@ -0,0 +1,545 @@
7871     +/*
7872     + * Copyright (c) 2003-2007 Erez Zadok
7873     + * Copyright (c) 2003-2006 Charles P. Wright
7874     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7875     + * Copyright (c) 2005-2006 Junjiro Okajima
7876     + * Copyright (c) 2005 Arun M. Krishnakumar
7877     + * Copyright (c) 2004-2006 David P. Quigley
7878     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7879     + * Copyright (c) 2003 Puja Gupta
7880     + * Copyright (c) 2003 Harikesavan Krishnan
7881     + * Copyright (c) 2003-2007 Stony Brook University
7882     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
7883     + *
7884     + * This program is free software; you can redistribute it and/or modify
7885     + * it under the terms of the GNU General Public License version 2 as
7886     + * published by the Free Software Foundation.
7887     + */
7888     +
7889     +#include "union.h"
7890     +
7891     +static int __unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7892     + struct inode *new_dir, struct dentry *new_dentry,
7893     + int bindex, struct dentry **wh_old)
7894     +{
7895     + int err = 0;
7896     + struct dentry *lower_old_dentry;
7897     + struct dentry *lower_new_dentry;
7898     + struct dentry *lower_old_dir_dentry;
7899     + struct dentry *lower_new_dir_dentry;
7900     + struct dentry *lower_wh_dentry;
7901     + struct dentry *lower_wh_dir_dentry;
7902     + struct dentry *trap;
7903     + char *wh_name = NULL;
7904     +
7905     + lower_new_dentry = unionfs_lower_dentry_idx(new_dentry, bindex);
7906     + lower_old_dentry = unionfs_lower_dentry_idx(old_dentry, bindex);
7907     +
7908     + if (!lower_new_dentry) {
7909     + lower_new_dentry =
7910     + create_parents(new_dentry->d_parent->d_inode,
7911     + new_dentry, new_dentry->d_name.name,
7912     + bindex);
7913     + if (IS_ERR(lower_new_dentry)) {
7914     + err = PTR_ERR(lower_new_dentry);
7915     + if (IS_COPYUP_ERR(err))
7916     + goto out;
7917     + printk(KERN_ERR "unionfs: error creating directory "
7918     + "tree for rename, bindex=%d err=%d\n",
7919     + bindex, err);
7920     + goto out;
7921     + }
7922     + }
7923     +
7924     + wh_name = alloc_whname(new_dentry->d_name.name,
7925     + new_dentry->d_name.len);
7926     + if (unlikely(IS_ERR(wh_name))) {
7927     + err = PTR_ERR(wh_name);
7928     + goto out;
7929     + }
7930     +
7931     + lower_wh_dentry = lookup_one_len(wh_name, lower_new_dentry->d_parent,
7932     + new_dentry->d_name.len +
7933     + UNIONFS_WHLEN);
7934     + if (IS_ERR(lower_wh_dentry)) {
7935     + err = PTR_ERR(lower_wh_dentry);
7936     + goto out;
7937     + }
7938     +
7939     + if (lower_wh_dentry->d_inode) {
7940     + /* get rid of the whiteout that is existing */
7941     + if (lower_new_dentry->d_inode) {
7942     + printk(KERN_ERR "unionfs: both a whiteout and a "
7943     + "dentry exist when doing a rename!\n");
7944     + err = -EIO;
7945     +
7946     + dput(lower_wh_dentry);
7947     + goto out;
7948     + }
7949     +
7950     + lower_wh_dir_dentry = lock_parent_wh(lower_wh_dentry);
7951     + err = is_robranch_super(old_dentry->d_sb, bindex);
7952     + if (!err)
7953     + err = vfs_unlink(lower_wh_dir_dentry->d_inode,
7954     + lower_wh_dentry);
7955     +
7956     + dput(lower_wh_dentry);
7957     + unlock_dir(lower_wh_dir_dentry);
7958     + if (err)
7959     + goto out;
7960     + } else {
7961     + dput(lower_wh_dentry);
7962     + }
7963     +
7964     + err = is_robranch_super(old_dentry->d_sb, bindex);
7965     + if (err)
7966     + goto out;
7967     +
7968     + dget(lower_old_dentry);
7969     + dget(lower_new_dentry);
7970     + lower_old_dir_dentry = dget_parent(lower_old_dentry);
7971     + lower_new_dir_dentry = dget_parent(lower_new_dentry);
7972     +
7973     + /*
7974     + * ready to whiteout for old_dentry. caller will create the actual
7975     + * whiteout, and must dput(*wh_old)
7976     + */
7977     + if (wh_old) {
7978     + char *whname;
7979     + whname = alloc_whname(old_dentry->d_name.name,
7980     + old_dentry->d_name.len);
7981     + err = PTR_ERR(whname);
7982     + if (unlikely(IS_ERR(whname)))
7983     + goto out_dput;
7984     + *wh_old = lookup_one_len(whname, lower_old_dir_dentry,
7985     + old_dentry->d_name.len +
7986     + UNIONFS_WHLEN);
7987     + kfree(whname);
7988     + err = PTR_ERR(*wh_old);
7989     + if (IS_ERR(*wh_old)) {
7990     + *wh_old = NULL;
7991     + goto out_dput;
7992     + }
7993     + }
7994     +
7995     + /* see Documentation/filesystems/unionfs/issues.txt */
7996     + lockdep_off();
7997     + trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
7998     + /* source should not be ancenstor of target */
7999     + if (trap == lower_old_dentry) {
8000     + err = -EINVAL;
8001     + goto out_err_unlock;
8002     + }
8003     + /* target should not be ancenstor of source */
8004     + if (trap == lower_new_dentry) {
8005     + err = -ENOTEMPTY;
8006     + goto out_err_unlock;
8007     + }
8008     + err = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
8009     + lower_new_dir_dentry->d_inode, lower_new_dentry);
8010     +out_err_unlock:
8011     + unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
8012     + lockdep_on();
8013     +
8014     +out_dput:
8015     + dput(lower_old_dir_dentry);
8016     + dput(lower_new_dir_dentry);
8017     + dput(lower_old_dentry);
8018     + dput(lower_new_dentry);
8019     +
8020     +out:
8021     + if (!err) {
8022     + /* Fixup the new_dentry. */
8023     + if (bindex < dbstart(new_dentry))
8024     + set_dbstart(new_dentry, bindex);
8025     + else if (bindex > dbend(new_dentry))
8026     + set_dbend(new_dentry, bindex);
8027     + }
8028     +
8029     + kfree(wh_name);
8030     +
8031     + return err;
8032     +}
8033     +
8034     +/*
8035     + * Main rename code. This is sufficiently complex, that it's documented in
8036     + * Documentation/filesystems/unionfs/rename.txt. This routine calls
8037     + * __unionfs_rename() above to perform some of the work.
8038     + */
8039     +static int do_unionfs_rename(struct inode *old_dir,
8040     + struct dentry *old_dentry,
8041     + struct inode *new_dir,
8042     + struct dentry *new_dentry)
8043     +{
8044     + int err = 0;
8045     + int bindex, bwh_old;
8046     + int old_bstart, old_bend;
8047     + int new_bstart, new_bend;
8048     + int do_copyup = -1;
8049     + struct dentry *parent_dentry;
8050     + int local_err = 0;
8051     + int eio = 0;
8052     + int revert = 0;
8053     + struct dentry *wh_old = NULL;
8054     +
8055     + old_bstart = dbstart(old_dentry);
8056     + bwh_old = old_bstart;
8057     + old_bend = dbend(old_dentry);
8058     + parent_dentry = old_dentry->d_parent;
8059     +
8060     + new_bstart = dbstart(new_dentry);
8061     + new_bend = dbend(new_dentry);
8062     +
8063     + /* Rename source to destination. */
8064     + err = __unionfs_rename(old_dir, old_dentry, new_dir, new_dentry,
8065     + old_bstart, &wh_old);
8066     + if (err) {
8067     + if (!IS_COPYUP_ERR(err))
8068     + goto out;
8069     + do_copyup = old_bstart - 1;
8070     + } else {
8071     + revert = 1;
8072     + }
8073     +
8074     + /*
8075     + * Unlink all instances of destination that exist to the left of
8076     + * bstart of source. On error, revert back, goto out.
8077     + */
8078     + for (bindex = old_bstart - 1; bindex >= new_bstart; bindex--) {
8079     + struct dentry *unlink_dentry;
8080     + struct dentry *unlink_dir_dentry;
8081     +
8082     + unlink_dentry = unionfs_lower_dentry_idx(new_dentry, bindex);
8083     + if (!unlink_dentry)
8084     + continue;
8085     +
8086     + unlink_dir_dentry = lock_parent(unlink_dentry);
8087     + err = is_robranch_super(old_dir->i_sb, bindex);
8088     + if (!err)
8089     + err = vfs_unlink(unlink_dir_dentry->d_inode,
8090     + unlink_dentry);
8091     +
8092     + fsstack_copy_attr_times(new_dentry->d_parent->d_inode,
8093     + unlink_dir_dentry->d_inode);
8094     + /* propagate number of hard-links */
8095     + new_dentry->d_parent->d_inode->i_nlink =
8096     + unionfs_get_nlinks(new_dentry->d_parent->d_inode);
8097     +
8098     + unlock_dir(unlink_dir_dentry);
8099     + if (!err) {
8100     + if (bindex != new_bstart) {
8101     + dput(unlink_dentry);
8102     + unionfs_set_lower_dentry_idx(new_dentry,
8103     + bindex, NULL);
8104     + }
8105     + } else if (IS_COPYUP_ERR(err)) {
8106     + do_copyup = bindex - 1;
8107     + } else if (revert) {
8108     + dput(wh_old);
8109     + goto revert;
8110     + }
8111     + }
8112     +
8113     + if (do_copyup != -1) {
8114     + for (bindex = do_copyup; bindex >= 0; bindex--) {
8115     + /*
8116     + * copyup the file into some left directory, so that
8117     + * you can rename it
8118     + */
8119     + err = copyup_dentry(old_dentry->d_parent->d_inode,
8120     + old_dentry, old_bstart, bindex,
8121     + old_dentry->d_name.name,
8122     + old_dentry->d_name.len, NULL,
8123     + i_size_read(old_dentry->d_inode));
8124     + /* if copyup failed, try next branch to the left */
8125     + if (err)
8126     + continue;
8127     + dput(wh_old);
8128     + bwh_old = bindex;
8129     + err = __unionfs_rename(old_dir, old_dentry,
8130     + new_dir, new_dentry,
8131     + bindex, &wh_old);
8132     + break;
8133     + }
8134     + }
8135     +
8136     + /* make it opaque */
8137     + if (S_ISDIR(old_dentry->d_inode->i_mode)) {
8138     + err = make_dir_opaque(old_dentry, dbstart(old_dentry));
8139     + if (err)
8140     + goto revert;
8141     + }
8142     +
8143     + /*
8144     + * Create whiteout for source, only if:
8145     + * (1) There is more than one underlying instance of source.
8146     + * (2) We did a copy_up
8147     + */
8148     + if ((old_bstart != old_bend) || (do_copyup != -1)) {
8149     + struct dentry *lower_parent;
8150     + struct nameidata nd;
8151     + if (!wh_old || wh_old->d_inode || bwh_old < 0) {
8152     + printk(KERN_ERR "unionfs: rename error "
8153     + "(wh_old=%p/%p bwh_old=%d)\n", wh_old,
8154     + (wh_old ? wh_old->d_inode : NULL), bwh_old);
8155     + err = -EIO;
8156     + goto out;
8157     + }
8158     + err = init_lower_nd(&nd, LOOKUP_CREATE);
8159     + if (unlikely(err < 0))
8160     + goto out;
8161     + lower_parent = lock_parent_wh(wh_old);
8162     + local_err = vfs_create(lower_parent->d_inode, wh_old, S_IRUGO,
8163     + &nd);
8164     + unlock_dir(lower_parent);
8165     + if (!local_err) {
8166     + set_dbopaque(old_dentry, bwh_old);
8167     + } else {
8168     + /*
8169     + * we can't fix anything now, so we cop-out and use
8170     + * -EIO.
8171     + */
8172     + printk(KERN_ERR "unionfs: can't create a whiteout for "
8173     + "the source in rename!\n");
8174     + err = -EIO;
8175     + }
8176     + release_lower_nd(&nd, local_err);
8177     + }
8178     +
8179     +out:
8180     + dput(wh_old);
8181     + return err;
8182     +
8183     +revert:
8184     + /* Do revert here. */
8185     + local_err = unionfs_refresh_lower_dentry(new_dentry, old_bstart);
8186     + if (local_err) {
8187     + printk(KERN_ERR "unionfs: revert failed in rename: "
8188     + "the new refresh failed\n");
8189     + eio = -EIO;
8190     + }
8191     +
8192     + local_err = unionfs_refresh_lower_dentry(old_dentry, old_bstart);
8193     + if (local_err) {
8194     + printk(KERN_ERR "unionfs: revert failed in rename: "
8195     + "the old refresh failed\n");
8196     + eio = -EIO;
8197     + goto revert_out;
8198     + }
8199     +
8200     + if (!unionfs_lower_dentry_idx(new_dentry, bindex) ||
8201     + !unionfs_lower_dentry_idx(new_dentry, bindex)->d_inode) {
8202     + printk(KERN_ERR "unionfs: revert failed in rename: "
8203     + "the object disappeared from under us!\n");
8204     + eio = -EIO;
8205     + goto revert_out;
8206     + }
8207     +
8208     + if (unionfs_lower_dentry_idx(old_dentry, bindex) &&
8209     + unionfs_lower_dentry_idx(old_dentry, bindex)->d_inode) {
8210     + printk(KERN_ERR "unionfs: revert failed in rename: "
8211     + "the object was created underneath us!\n");
8212     + eio = -EIO;
8213     + goto revert_out;
8214     + }
8215     +
8216     + local_err = __unionfs_rename(new_dir, new_dentry,
8217     + old_dir, old_dentry, old_bstart, NULL);
8218     +
8219     + /* If we can't fix it, then we cop-out with -EIO. */
8220     + if (local_err) {
8221     + printk(KERN_ERR "unionfs: revert failed in rename!\n");
8222     + eio = -EIO;
8223     + }
8224     +
8225     + local_err = unionfs_refresh_lower_dentry(new_dentry, bindex);
8226     + if (local_err)
8227     + eio = -EIO;
8228     + local_err = unionfs_refresh_lower_dentry(old_dentry, bindex);
8229     + if (local_err)
8230     + eio = -EIO;
8231     +
8232     +revert_out:
8233     + if (eio)
8234     + err = eio;
8235     + return err;
8236     +}
8237     +
8238     +static struct dentry *lookup_whiteout(struct dentry *dentry)
8239     +{
8240     + char *whname;
8241     + int bindex = -1, bstart = -1, bend = -1;
8242     + struct dentry *parent, *lower_parent, *wh_dentry;
8243     +
8244     + whname = alloc_whname(dentry->d_name.name, dentry->d_name.len);
8245     + if (unlikely(IS_ERR(whname)))
8246     + return (void *)whname;
8247     +
8248     + parent = dget_parent(dentry);
8249     + unionfs_lock_dentry(parent, UNIONFS_DMUTEX_WHITEOUT);
8250     + bstart = dbstart(parent);
8251     + bend = dbend(parent);
8252     + wh_dentry = ERR_PTR(-ENOENT);
8253     + for (bindex = bstart; bindex <= bend; bindex++) {
8254     + lower_parent = unionfs_lower_dentry_idx(parent, bindex);
8255     + if (!lower_parent)
8256     + continue;
8257     + wh_dentry = lookup_one_len(whname, lower_parent,
8258     + dentry->d_name.len + UNIONFS_WHLEN);
8259     + if (IS_ERR(wh_dentry))
8260     + continue;
8261     + if (wh_dentry->d_inode)
8262     + break;
8263     + dput(wh_dentry);
8264     + wh_dentry = ERR_PTR(-ENOENT);
8265     + }
8266     + unionfs_unlock_dentry(parent);
8267     + dput(parent);
8268     + kfree(whname);
8269     + return wh_dentry;
8270     +}
8271     +
8272     +/*
8273     + * We can't copyup a directory, because it may involve huge numbers of
8274     + * children, etc. Doing that in the kernel would be bad, so instead we
8275     + * return EXDEV to the user-space utility that caused this, and let the
8276     + * user-space recurse and ask us to copy up each file separately.
8277     + */
8278     +static int may_rename_dir(struct dentry *dentry)
8279     +{
8280     + int err, bstart;
8281     +
8282     + err = check_empty(dentry, NULL);
8283     + if (err == -ENOTEMPTY) {
8284     + if (is_robranch(dentry))
8285     + return -EXDEV;
8286     + } else if (err) {
8287     + return err;
8288     + }
8289     +
8290     + bstart = dbstart(dentry);
8291     + if (dbend(dentry) == bstart || dbopaque(dentry) == bstart)
8292     + return 0;
8293     +
8294     + set_dbstart(dentry, bstart + 1);
8295     + err = check_empty(dentry, NULL);
8296     + set_dbstart(dentry, bstart);
8297     + if (err == -ENOTEMPTY)
8298     + err = -EXDEV;
8299     + return err;
8300     +}
8301     +
8302     +int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8303     + struct inode *new_dir, struct dentry *new_dentry)
8304     +{
8305     + int err = 0;
8306     + struct dentry *wh_dentry;
8307     +
8308     + unionfs_read_lock(old_dentry->d_sb, UNIONFS_SMUTEX_CHILD);
8309     + unionfs_double_lock_dentry(old_dentry, new_dentry);
8310     +
8311     + if (unlikely(!__unionfs_d_revalidate_chain(old_dentry, NULL, false))) {
8312     + err = -ESTALE;
8313     + goto out;
8314     + }
8315     + if (unlikely(!d_deleted(new_dentry) && new_dentry->d_inode &&
8316     + !__unionfs_d_revalidate_chain(new_dentry, NULL, false))) {
8317     + err = -ESTALE;
8318     + goto out;
8319     + }
8320     +
8321     + if (!S_ISDIR(old_dentry->d_inode->i_mode))
8322     + err = unionfs_partial_lookup(old_dentry);
8323     + else
8324     + err = may_rename_dir(old_dentry);
8325     +
8326     + if (err)
8327     + goto out;
8328     +
8329     + err = unionfs_partial_lookup(new_dentry);
8330     + if (err)
8331     + goto out;
8332     +
8333     + /*
8334     + * if new_dentry is already lower because of whiteout,
8335     + * simply override it even if the whited-out dir is not empty.
8336     + */
8337     + wh_dentry = lookup_whiteout(new_dentry);
8338     + if (!IS_ERR(wh_dentry)) {
8339     + dput(wh_dentry);
8340     + } else if (new_dentry->d_inode) {
8341     + if (S_ISDIR(old_dentry->d_inode->i_mode) !=
8342     + S_ISDIR(new_dentry->d_inode->i_mode)) {
8343     + err = S_ISDIR(old_dentry->d_inode->i_mode) ?
8344     + -ENOTDIR : -EISDIR;
8345     + goto out;
8346     + }
8347     +
8348     + if (S_ISDIR(new_dentry->d_inode->i_mode)) {
8349     + struct unionfs_dir_state *namelist = NULL;
8350     + /* check if this unionfs directory is empty or not */
8351     + err = check_empty(new_dentry, &namelist);
8352     + if (err)
8353     + goto out;
8354     +
8355     + if (!is_robranch(new_dentry))
8356     + err = delete_whiteouts(new_dentry,
8357     + dbstart(new_dentry),
8358     + namelist);
8359     +
8360     + free_rdstate(namelist);
8361     +
8362     + if (err)
8363     + goto out;
8364     + }
8365     + }
8366     +
8367     + err = do_unionfs_rename(old_dir, old_dentry, new_dir, new_dentry);
8368     + if (err)
8369     + goto out;
8370     +
8371     + /*
8372     + * force re-lookup since the dir on ro branch is not renamed, and
8373     + * lower dentries still indicate the un-renamed ones.
8374     + */
8375     + if (S_ISDIR(old_dentry->d_inode->i_mode))
8376     + atomic_dec(&UNIONFS_D(old_dentry)->generation);
8377     + else
8378     + unionfs_postcopyup_release(old_dentry);
8379     + if (new_dentry->d_inode && !S_ISDIR(new_dentry->d_inode->i_mode)) {
8380     + unionfs_postcopyup_release(new_dentry);
8381     + unionfs_postcopyup_setmnt(new_dentry);
8382     + if (!unionfs_lower_inode(new_dentry->d_inode)) {
8383     + /*
8384     + * If we get here, it means that no copyup was
8385     + * needed, and that a file by the old name already
8386     + * existing on the destination branch; that file got
8387     + * renamed earlier in this function, so all we need
8388     + * to do here is set the lower inode.
8389     + */
8390     + struct inode *inode;
8391     + inode = unionfs_lower_inode(old_dentry->d_inode);
8392     + igrab(inode);
8393     + unionfs_set_lower_inode_idx(new_dentry->d_inode,
8394     + dbstart(new_dentry),
8395     + inode);
8396     + }
8397     + }
8398     + /* if all of this renaming succeeded, update our times */
8399     + unionfs_copy_attr_times(old_dir);
8400     + unionfs_copy_attr_times(new_dir);
8401     + unionfs_copy_attr_times(old_dentry->d_inode);
8402     + unionfs_copy_attr_times(new_dentry->d_inode);
8403     + unionfs_check_inode(old_dir);
8404     + unionfs_check_inode(new_dir);
8405     + unionfs_check_dentry(old_dentry);
8406     + unionfs_check_dentry(new_dentry);
8407     +
8408     +out:
8409     + if (err) /* clear the new_dentry stuff created */
8410     + d_drop(new_dentry);
8411     + unionfs_unlock_dentry(new_dentry);
8412     + unionfs_unlock_dentry(old_dentry);
8413     + unionfs_read_unlock(old_dentry->d_sb);
8414     + return err;
8415     +}
8416     diff --git a/fs/unionfs/sioq.c b/fs/unionfs/sioq.c
8417     new file mode 100644
8418     index 0000000..2a8c88e
8419     --- /dev/null
8420     +++ b/fs/unionfs/sioq.c
8421     @@ -0,0 +1,119 @@
8422     +/*
8423     + * Copyright (c) 2006-2007 Erez Zadok
8424     + * Copyright (c) 2006 Charles P. Wright
8425     + * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
8426     + * Copyright (c) 2006 Junjiro Okajima
8427     + * Copyright (c) 2006 David P. Quigley
8428     + * Copyright (c) 2006-2007 Stony Brook University
8429     + * Copyright (c) 2006-2007 The Research Foundation of SUNY
8430     + *
8431     + * This program is free software; you can redistribute it and/or modify
8432     + * it under the terms of the GNU General Public License version 2 as
8433     + * published by the Free Software Foundation.
8434     + */
8435     +
8436     +#include "union.h"
8437     +
8438     +/*
8439     + * Super-user IO work Queue - sometimes we need to perform actions which
8440     + * would fail due to the unix permissions on the parent directory (e.g.,
8441     + * rmdir a directory which appears empty, but in reality contains
8442     + * whiteouts).
8443     + */
8444     +
8445     +static struct workqueue_struct *superio_workqueue;
8446     +
8447     +int __init init_sioq(void)
8448     +{
8449     + int err;
8450     +
8451     + superio_workqueue = create_workqueue("unionfs_siod");
8452     + if (!IS_ERR(superio_workqueue))
8453     + return 0;
8454     +
8455     + err = PTR_ERR(superio_workqueue);
8456     + printk(KERN_ERR "unionfs: create_workqueue failed %d\n", err);
8457     + superio_workqueue = NULL;
8458     + return err;
8459     +}
8460     +
8461     +void stop_sioq(void)
8462     +{
8463     + if (superio_workqueue)
8464     + destroy_workqueue(superio_workqueue);
8465     +}
8466     +
8467     +void run_sioq(work_func_t func, struct sioq_args *args)
8468     +{
8469     + INIT_WORK(&args->work, func);
8470     +
8471     + init_completion(&args->comp);
8472     + while (!queue_work(superio_workqueue, &args->work)) {
8473     + /* TODO: do accounting if needed */
8474     + schedule();
8475     + }
8476     + wait_for_completion(&args->comp);
8477     +}
8478     +
8479     +void __unionfs_create(struct work_struct *work)
8480     +{
8481     + struct sioq_args *args = container_of(work, struct sioq_args, work);
8482     + struct create_args *c = &args->create;
8483     +
8484     + args->err = vfs_create(c->parent, c->dentry, c->mode, c->nd);
8485     + complete(&args->comp);
8486     +}
8487     +
8488     +void __unionfs_mkdir(struct work_struct *work)
8489     +{
8490     + struct sioq_args *args = container_of(work, struct sioq_args, work);
8491     + struct mkdir_args *m = &args->mkdir;
8492     +
8493     + args->err = vfs_mkdir(m->parent, m->dentry, m->mode);
8494     + complete(&args->comp);
8495     +}
8496     +
8497     +void __unionfs_mknod(struct work_struct *work)
8498     +{
8499     + struct sioq_args *args = container_of(work, struct sioq_args, work);
8500     + struct mknod_args *m = &args->mknod;
8501     +
8502     + args->err = vfs_mknod(m->parent, m->dentry, m->mode, m->dev);
8503     + complete(&args->comp);
8504     +}
8505     +
8506     +void __unionfs_symlink(struct work_struct *work)
8507     +{
8508     + struct sioq_args *args = container_of(work, struct sioq_args, work);
8509     + struct symlink_args *s = &args->symlink;
8510     +
8511     + args->err = vfs_symlink(s->parent, s->dentry, s->symbuf, s->mode);
8512     + complete(&args->comp);
8513     +}
8514     +
8515     +void __unionfs_unlink(struct work_struct *work)
8516     +{
8517     + struct sioq_args *args = container_of(work, struct sioq_args, work);
8518     + struct unlink_args *u = &args->unlink;
8519     +
8520     + args->err = vfs_unlink(u->parent, u->dentry);
8521     + complete(&args->comp);
8522     +}
8523     +
8524     +void __delete_whiteouts(struct work_struct *work)
8525     +{
8526     + struct sioq_args *args = container_of(work, struct sioq_args, work);
8527     + struct deletewh_args *d = &args->deletewh;
8528     +
8529     + args->err = do_delete_whiteouts(d->dentry, d->bindex, d->namelist);
8530     + complete(&args->comp);
8531     +}
8532     +
8533     +void __is_opaque_dir(struct work_struct *work)
8534     +{
8535     + struct sioq_args *args = container_of(work, struct sioq_args, work);
8536     +
8537     + args->ret = lookup_one_len(UNIONFS_DIR_OPAQUE, args->is_opaque.dentry,
8538     + sizeof(UNIONFS_DIR_OPAQUE) - 1);
8539     + complete(&args->comp);
8540     +}
8541     diff --git a/fs/unionfs/sioq.h b/fs/unionfs/sioq.h
8542     new file mode 100644
8543     index 0000000..afb71ee
8544     --- /dev/null
8545     +++ b/fs/unionfs/sioq.h
8546     @@ -0,0 +1,92 @@
8547     +/*
8548     + * Copyright (c) 2006-2007 Erez Zadok
8549     + * Copyright (c) 2006 Charles P. Wright
8550     + * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
8551     + * Copyright (c) 2006 Junjiro Okajima
8552     + * Copyright (c) 2006 David P. Quigley
8553     + * Copyright (c) 2006-2007 Stony Brook University
8554     + * Copyright (c) 2006-2007 The Research Foundation of SUNY
8555     + *
8556     + * This program is free software; you can redistribute it and/or modify
8557     + * it under the terms of the GNU General Public License version 2 as
8558     + * published by the Free Software Foundation.
8559     + */
8560     +
8561     +#ifndef _SIOQ_H
8562     +#define _SIOQ_H
8563     +
8564     +struct deletewh_args {
8565     + struct unionfs_dir_state *namelist;
8566     + struct dentry *dentry;
8567     + int bindex;
8568     +};
8569     +
8570     +struct is_opaque_args {
8571     + struct dentry *dentry;
8572     +};
8573     +
8574     +struct create_args {
8575     + struct inode *parent;
8576     + struct dentry *dentry;
8577     + umode_t mode;
8578     + struct nameidata *nd;
8579     +};
8580     +
8581     +struct mkdir_args {
8582     + struct inode *parent;
8583     + struct dentry *dentry;
8584     + umode_t mode;
8585     +};
8586     +
8587     +struct mknod_args {
8588     + struct inode *parent;
8589     + struct dentry *dentry;
8590     + umode_t mode;
8591     + dev_t dev;
8592     +};
8593     +
8594     +struct symlink_args {
8595     + struct inode *parent;
8596     + struct dentry *dentry;
8597     + char *symbuf;
8598     + umode_t mode;
8599     +};
8600     +
8601     +struct unlink_args {
8602     + struct inode *parent;
8603     + struct dentry *dentry;
8604     +};
8605     +
8606     +
8607     +struct sioq_args {
8608     + struct completion comp;
8609     + struct work_struct work;
8610     + int err;
8611     + void *ret;
8612     +
8613     + union {
8614     + struct deletewh_args deletewh;
8615     + struct is_opaque_args is_opaque;
8616     + struct create_args create;
8617     + struct mkdir_args mkdir;
8618     + struct mknod_args mknod;
8619     + struct symlink_args symlink;
8620     + struct unlink_args unlink;
8621     + };
8622     +};
8623     +
8624     +/* Extern definitions for SIOQ functions */
8625     +extern int __init init_sioq(void);
8626     +extern void stop_sioq(void);
8627     +extern void run_sioq(work_func_t func, struct sioq_args *args);
8628     +
8629     +/* Extern definitions for our privilege escalation helpers */
8630     +extern void __unionfs_create(struct work_struct *work);
8631     +extern void __unionfs_mkdir(struct work_struct *work);
8632     +extern void __unionfs_mknod(struct work_struct *work);
8633     +extern void __unionfs_symlink(struct work_struct *work);
8634     +extern void __unionfs_unlink(struct work_struct *work);
8635     +extern void __delete_whiteouts(struct work_struct *work);
8636     +extern void __is_opaque_dir(struct work_struct *work);
8637     +
8638     +#endif /* not _SIOQ_H */
8639     diff --git a/fs/unionfs/subr.c b/fs/unionfs/subr.c
8640     new file mode 100644
8641     index 0000000..0a0fce9
8642     --- /dev/null
8643     +++ b/fs/unionfs/subr.c
8644     @@ -0,0 +1,242 @@
8645     +/*
8646     + * Copyright (c) 2003-2007 Erez Zadok
8647     + * Copyright (c) 2003-2006 Charles P. Wright
8648     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
8649     + * Copyright (c) 2005-2006 Junjiro Okajima
8650     + * Copyright (c) 2005 Arun M. Krishnakumar
8651     + * Copyright (c) 2004-2006 David P. Quigley
8652     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
8653     + * Copyright (c) 2003 Puja Gupta
8654     + * Copyright (c) 2003 Harikesavan Krishnan
8655     + * Copyright (c) 2003-2007 Stony Brook University
8656     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
8657     + *
8658     + * This program is free software; you can redistribute it and/or modify
8659     + * it under the terms of the GNU General Public License version 2 as
8660     + * published by the Free Software Foundation.
8661     + */
8662     +
8663     +#include "union.h"
8664     +
8665     +/*
8666     + * Pass an unionfs dentry and an index. It will try to create a whiteout
8667     + * for the filename in dentry, and will try in branch 'index'. On error,
8668     + * it will proceed to a branch to the left.
8669     + */
8670     +int create_whiteout(struct dentry *dentry, int start)
8671     +{
8672     + int bstart, bend, bindex;
8673     + struct dentry *lower_dir_dentry;
8674     + struct dentry *lower_dentry;
8675     + struct dentry *lower_wh_dentry;
8676     + struct nameidata nd;
8677     + char *name = NULL;
8678     + int err = -EINVAL;
8679     +
8680     + verify_locked(dentry);
8681     +
8682     + bstart = dbstart(dentry);
8683     + bend = dbend(dentry);
8684     +
8685     + /* create dentry's whiteout equivalent */
8686     + name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
8687     + if (unlikely(IS_ERR(name))) {
8688     + err = PTR_ERR(name);
8689     + goto out;
8690     + }
8691     +
8692     + for (bindex = start; bindex >= 0; bindex--) {
8693     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
8694     +
8695     + if (!lower_dentry) {
8696     + /*
8697     + * if lower dentry is not present, create the
8698     + * entire lower dentry directory structure and go
8699     + * ahead. Since we want to just create whiteout, we
8700     + * only want the parent dentry, and hence get rid of
8701     + * this dentry.
8702     + */
8703     + lower_dentry = create_parents(dentry->d_inode,
8704     + dentry,
8705     + dentry->d_name.name,
8706     + bindex);
8707     + if (!lower_dentry || IS_ERR(lower_dentry)) {
8708     + int ret = PTR_ERR(lower_dentry);
8709     + if (!IS_COPYUP_ERR(ret))
8710     + printk(KERN_ERR
8711     + "unionfs: create_parents for "
8712     + "whiteout failed: bindex=%d "
8713     + "err=%d\n", bindex, ret);
8714     + continue;
8715     + }
8716     + }
8717     +
8718     + lower_wh_dentry =
8719     + lookup_one_len(name, lower_dentry->d_parent,
8720     + dentry->d_name.len + UNIONFS_WHLEN);
8721     + if (IS_ERR(lower_wh_dentry))
8722     + continue;
8723     +
8724     + /*
8725     + * The whiteout already exists. This used to be impossible,
8726     + * but now is possible because of opaqueness.
8727     + */
8728     + if (lower_wh_dentry->d_inode) {
8729     + dput(lower_wh_dentry);
8730     + err = 0;
8731     + goto out;
8732     + }
8733     +
8734     + err = init_lower_nd(&nd, LOOKUP_CREATE);
8735     + if (unlikely(err < 0))
8736     + goto out;
8737     + lower_dir_dentry = lock_parent_wh(lower_wh_dentry);
8738     + err = is_robranch_super(dentry->d_sb, bindex);
8739     + if (!err)
8740     + err = vfs_create(lower_dir_dentry->d_inode,
8741     + lower_wh_dentry,
8742     + ~current->fs->umask & S_IRWXUGO,
8743     + &nd);
8744     + unlock_dir(lower_dir_dentry);
8745     + dput(lower_wh_dentry);
8746     + release_lower_nd(&nd, err);
8747     +
8748     + if (!err || !IS_COPYUP_ERR(err))
8749     + break;
8750     + }
8751     +
8752     + /* set dbopaque so that lookup will not proceed after this branch */
8753     + if (!err)
8754     + set_dbopaque(dentry, bindex);
8755     +
8756     +out:
8757     + kfree(name);
8758     + return err;
8759     +}
8760     +
8761     +/*
8762     + * This is a helper function for rename, which ends up with hosed over
8763     + * dentries when it needs to revert.
8764     + */
8765     +int unionfs_refresh_lower_dentry(struct dentry *dentry, int bindex)
8766     +{
8767     + struct dentry *lower_dentry;
8768     + struct dentry *lower_parent;
8769     + int err = 0;
8770     +
8771     + verify_locked(dentry);
8772     +
8773     + unionfs_lock_dentry(dentry->d_parent, UNIONFS_DMUTEX_CHILD);
8774     + lower_parent = unionfs_lower_dentry_idx(dentry->d_parent, bindex);
8775     + unionfs_unlock_dentry(dentry->d_parent);
8776     +
8777     + BUG_ON(!S_ISDIR(lower_parent->d_inode->i_mode));
8778     +
8779     + lower_dentry = lookup_one_len(dentry->d_name.name, lower_parent,
8780     + dentry->d_name.len);
8781     + if (IS_ERR(lower_dentry)) {
8782     + err = PTR_ERR(lower_dentry);
8783     + goto out;
8784     + }
8785     +
8786     + dput(unionfs_lower_dentry_idx(dentry, bindex));
8787     + iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
8788     + unionfs_set_lower_inode_idx(dentry->d_inode, bindex, NULL);
8789     +
8790     + if (!lower_dentry->d_inode) {
8791     + dput(lower_dentry);
8792     + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
8793     + } else {
8794     + unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
8795     + unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
8796     + igrab(lower_dentry->d_inode));
8797     + }
8798     +
8799     +out:
8800     + return err;
8801     +}
8802     +
8803     +int make_dir_opaque(struct dentry *dentry, int bindex)
8804     +{
8805     + int err = 0;
8806     + struct dentry *lower_dentry, *diropq;
8807     + struct inode *lower_dir;
8808     + struct nameidata nd;
8809     + kernel_cap_t orig_cap;
8810     +
8811     + /*
8812     + * Opaque directory whiteout markers are special files (like regular
8813     + * whiteouts), and should appear to the users as if they don't
8814     + * exist. They should be created/deleted regardless of directory
8815     + * search/create permissions, but only for the duration of this
8816     + * creation of the .wh.__dir_opaque: file. Note, this does not
8817     + * circumvent normal ->permission).
8818     + */
8819     + orig_cap = current->cap_effective;
8820     + cap_raise(current->cap_effective, CAP_DAC_READ_SEARCH);
8821     + cap_raise(current->cap_effective, CAP_DAC_OVERRIDE);
8822     +
8823     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
8824     + lower_dir = lower_dentry->d_inode;
8825     + BUG_ON(!S_ISDIR(dentry->d_inode->i_mode) ||
8826     + !S_ISDIR(lower_dir->i_mode));
8827     +
8828     + mutex_lock(&lower_dir->i_mutex);
8829     + diropq = lookup_one_len(UNIONFS_DIR_OPAQUE, lower_dentry,
8830     + sizeof(UNIONFS_DIR_OPAQUE) - 1);
8831     + if (IS_ERR(diropq)) {
8832     + err = PTR_ERR(diropq);
8833     + goto out;
8834     + }
8835     +
8836     + err = init_lower_nd(&nd, LOOKUP_CREATE);
8837     + if (unlikely(err < 0))
8838     + goto out;
8839     + if (!diropq->d_inode)
8840     + err = vfs_create(lower_dir, diropq, S_IRUGO, &nd);
8841     + if (!err)
8842     + set_dbopaque(dentry, bindex);
8843     + release_lower_nd(&nd, err);
8844     +
8845     + dput(diropq);
8846     +
8847     +out:
8848     + mutex_unlock(&lower_dir->i_mutex);
8849     + current->cap_effective = orig_cap;
8850     + return err;
8851     +}
8852     +
8853     +/*
8854     + * returns the right n_link value based on the inode type
8855     + */
8856     +int unionfs_get_nlinks(const struct inode *inode)
8857     +{
8858     + /* don't bother to do all the work since we're unlinked */
8859     + if (inode->i_nlink == 0)
8860     + return 0;
8861     +
8862     + if (!S_ISDIR(inode->i_mode))
8863     + return unionfs_lower_inode(inode)->i_nlink;
8864     +
8865     + /*
8866     + * For directories, we return 1. The only place that could cares
8867     + * about links is readdir, and there's d_type there so even that
8868     + * doesn't matter.
8869     + */
8870     + return 1;
8871     +}
8872     +
8873     +/* construct whiteout filename */
8874     +char *alloc_whname(const char *name, int len)
8875     +{
8876     + char *buf;
8877     +
8878     + buf = kmalloc(len + UNIONFS_WHLEN + 1, GFP_KERNEL);
8879     + if (unlikely(!buf))
8880     + return ERR_PTR(-ENOMEM);
8881     +
8882     + strcpy(buf, UNIONFS_WHPFX);
8883     + strlcat(buf, name, len + UNIONFS_WHLEN + 1);
8884     +
8885     + return buf;
8886     +}
8887     diff --git a/fs/unionfs/super.c b/fs/unionfs/super.c
8888     new file mode 100644
8889     index 0000000..986c980
8890     --- /dev/null
8891     +++ b/fs/unionfs/super.c
8892     @@ -0,0 +1,1025 @@
8893     +/*
8894     + * Copyright (c) 2003-2007 Erez Zadok
8895     + * Copyright (c) 2003-2006 Charles P. Wright
8896     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
8897     + * Copyright (c) 2005-2006 Junjiro Okajima
8898     + * Copyright (c) 2005 Arun M. Krishnakumar
8899     + * Copyright (c) 2004-2006 David P. Quigley
8900     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
8901     + * Copyright (c) 2003 Puja Gupta
8902     + * Copyright (c) 2003 Harikesavan Krishnan
8903     + * Copyright (c) 2003-2007 Stony Brook University
8904     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
8905     + *
8906     + * This program is free software; you can redistribute it and/or modify
8907     + * it under the terms of the GNU General Public License version 2 as
8908     + * published by the Free Software Foundation.
8909     + */
8910     +
8911     +#include "union.h"
8912     +
8913     +/*
8914     + * The inode cache is used with alloc_inode for both our inode info and the
8915     + * vfs inode.
8916     + */
8917     +static struct kmem_cache *unionfs_inode_cachep;
8918     +
8919     +static void unionfs_read_inode(struct inode *inode)
8920     +{
8921     + int size;
8922     + struct unionfs_inode_info *info = UNIONFS_I(inode);
8923     +
8924     + memset(info, 0, offsetof(struct unionfs_inode_info, vfs_inode));
8925     + info->bstart = -1;
8926     + info->bend = -1;
8927     + atomic_set(&info->generation,
8928     + atomic_read(&UNIONFS_SB(inode->i_sb)->generation));
8929     + spin_lock_init(&info->rdlock);
8930     + info->rdcount = 1;
8931     + info->hashsize = -1;
8932     + INIT_LIST_HEAD(&info->readdircache);
8933     +
8934     + size = sbmax(inode->i_sb) * sizeof(struct inode *);
8935     + info->lower_inodes = kzalloc(size, GFP_KERNEL);
8936     + if (unlikely(!info->lower_inodes)) {
8937     + printk(KERN_CRIT "unionfs: no kernel memory when allocating "
8938     + "lower-pointer array!\n");
8939     + BUG();
8940     + }
8941     +
8942     + inode->i_version++;
8943     + inode->i_op = &unionfs_main_iops;
8944     + inode->i_fop = &unionfs_main_fops;
8945     +
8946     + inode->i_mapping->a_ops = &unionfs_aops;
8947     +
8948     + /*
8949     + * reset times so unionfs_copy_attr_all can keep out time invariants
8950     + * right (upper inode time being the max of all lower ones).
8951     + */
8952     + inode->i_atime.tv_sec = inode->i_atime.tv_nsec = 0;
8953     + inode->i_mtime.tv_sec = inode->i_mtime.tv_nsec = 0;
8954     + inode->i_ctime.tv_sec = inode->i_ctime.tv_nsec = 0;
8955     +
8956     +}
8957     +
8958     +/*
8959     + * we now define delete_inode, because there are two VFS paths that may
8960     + * destroy an inode: one of them calls clear inode before doing everything
8961     + * else that's needed, and the other is fine. This way we truncate the inode
8962     + * size (and its pages) and then clear our own inode, which will do an iput
8963     + * on our and the lower inode.
8964     + *
8965     + * No need to lock sb info's rwsem.
8966     + */
8967     +static void unionfs_delete_inode(struct inode *inode)
8968     +{
8969     +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
8970     + spin_lock(&inode->i_lock);
8971     +#endif
8972     + i_size_write(inode, 0); /* every f/s seems to do that */
8973     +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
8974     + spin_unlock(&inode->i_lock);
8975     +#endif
8976     +
8977     + if (inode->i_data.nrpages)
8978     + truncate_inode_pages(&inode->i_data, 0);
8979     +
8980     + clear_inode(inode);
8981     +}
8982     +
8983     +/*
8984     + * final actions when unmounting a file system
8985     + *
8986     + * No need to lock rwsem.
8987     + */
8988     +static void unionfs_put_super(struct super_block *sb)
8989     +{
8990     + int bindex, bstart, bend;
8991     + struct unionfs_sb_info *spd;
8992     + int leaks = 0;
8993     +
8994     + spd = UNIONFS_SB(sb);
8995     + if (!spd)
8996     + return;
8997     +
8998     + bstart = sbstart(sb);
8999     + bend = sbend(sb);
9000     +
9001     + /* Make sure we have no leaks of branchget/branchput. */
9002     + for (bindex = bstart; bindex <= bend; bindex++)
9003     + if (unlikely(branch_count(sb, bindex) != 0)) {
9004     + printk(KERN_CRIT
9005     + "unionfs: branch %d has %d references left!\n",
9006     + bindex, branch_count(sb, bindex));
9007     + leaks = 1;
9008     + }
9009     + BUG_ON(leaks != 0);
9010     +
9011     + kfree(spd->data);
9012     + kfree(spd);
9013     + sb->s_fs_info = NULL;
9014     +}
9015     +
9016     +/*
9017     + * Since people use this to answer the "How big of a file can I write?"
9018     + * question, we report the size of the highest priority branch as the size of
9019     + * the union.
9020     + */
9021     +static int unionfs_statfs(struct dentry *dentry, struct kstatfs *buf)
9022     +{
9023     + int err = 0;
9024     + struct super_block *sb;
9025     + struct dentry *lower_dentry;
9026     +
9027     + sb = dentry->d_sb;
9028     +
9029     + unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD);
9030     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
9031     +
9032     + if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) {
9033     + err = -ESTALE;
9034     + goto out;
9035     + }
9036     + unionfs_check_dentry(dentry);
9037     +
9038     + lower_dentry = unionfs_lower_dentry(sb->s_root);
9039     + err = vfs_statfs(lower_dentry, buf);
9040     +
9041     + /* set return buf to our f/s to avoid confusing user-level utils */
9042     + buf->f_type = UNIONFS_SUPER_MAGIC;
9043     + /*
9044     + * Our maximum file name can is shorter by a few bytes because every
9045     + * file name could potentially be whited-out.
9046     + *
9047     + * XXX: this restriction goes away with ODF.
9048     + */
9049     + buf->f_namelen -= UNIONFS_WHLEN;
9050     +
9051     + /*
9052     + * reset two fields to avoid confusing user-land.
9053     + * XXX: is this still necessary?
9054     + */
9055     + memset(&buf->f_fsid, 0, sizeof(__kernel_fsid_t));
9056     + memset(&buf->f_spare, 0, sizeof(buf->f_spare));
9057     +
9058     +out:
9059     + unionfs_check_dentry(dentry);
9060     + unionfs_unlock_dentry(dentry);
9061     + unionfs_read_unlock(sb);
9062     + return err;
9063     +}
9064     +
9065     +/* handle mode changing during remount */
9066     +static noinline int do_remount_mode_option(char *optarg, int cur_branches,
9067     + struct unionfs_data *new_data,
9068     + struct path *new_lower_paths)
9069     +{
9070     + int err = -EINVAL;
9071     + int perms, idx;
9072     + char *modename = strchr(optarg, '=');
9073     + struct nameidata nd;
9074     +
9075     + /* by now, optarg contains the branch name */
9076     + if (!*optarg) {
9077     + printk(KERN_ERR
9078     + "unionfs: no branch specified for mode change\n");
9079     + goto out;
9080     + }
9081     + if (!modename) {
9082     + printk(KERN_ERR "unionfs: branch \"%s\" requires a mode\n",
9083     + optarg);
9084     + goto out;
9085     + }
9086     + *modename++ = '\0';
9087     + err = parse_branch_mode(modename, &perms);
9088     + if (err) {
9089     + printk(KERN_ERR "unionfs: invalid mode \"%s\" for \"%s\"\n",
9090     + modename, optarg);
9091     + goto out;
9092     + }
9093     +
9094     + /*
9095     + * Find matching branch index. For now, this assumes that nothing
9096     + * has been mounted on top of this Unionfs stack. Once we have /odf
9097     + * and cache-coherency resolved, we'll address the branch-path
9098     + * uniqueness.
9099     + */
9100     + err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
9101     + if (err) {
9102     + printk(KERN_ERR "unionfs: error accessing "
9103     + "lower directory \"%s\" (error %d)\n",
9104     + optarg, err);
9105     + goto out;
9106     + }
9107     + for (idx = 0; idx < cur_branches; idx++)
9108     + if (nd.mnt == new_lower_paths[idx].mnt &&
9109     + nd.dentry == new_lower_paths[idx].dentry)
9110     + break;
9111     + path_release(&nd); /* no longer needed */
9112     + if (idx == cur_branches) {
9113     + err = -ENOENT; /* err may have been reset above */
9114     + printk(KERN_ERR "unionfs: branch \"%s\" "
9115     + "not found\n", optarg);
9116     + goto out;
9117     + }
9118     + /* check/change mode for existing branch */
9119     + /* we don't warn if perms==branchperms */
9120     + new_data[idx].branchperms = perms;
9121     + err = 0;
9122     +out:
9123     + return err;
9124     +}
9125     +
9126     +/* handle branch deletion during remount */
9127     +static noinline int do_remount_del_option(char *optarg, int cur_branches,
9128     + struct unionfs_data *new_data,
9129     + struct path *new_lower_paths)
9130     +{
9131     + int err = -EINVAL;
9132     + int idx;
9133     + struct nameidata nd;
9134     +
9135     + /* optarg contains the branch name to delete */
9136     +
9137     + /*
9138     + * Find matching branch index. For now, this assumes that nothing
9139     + * has been mounted on top of this Unionfs stack. Once we have /odf
9140     + * and cache-coherency resolved, we'll address the branch-path
9141     + * uniqueness.
9142     + */
9143     + err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
9144     + if (err) {
9145     + printk(KERN_ERR "unionfs: error accessing "
9146     + "lower directory \"%s\" (error %d)\n",
9147     + optarg, err);
9148     + goto out;
9149     + }
9150     + for (idx = 0; idx < cur_branches; idx++)
9151     + if (nd.mnt == new_lower_paths[idx].mnt &&
9152     + nd.dentry == new_lower_paths[idx].dentry)
9153     + break;
9154     + path_release(&nd); /* no longer needed */
9155     + if (idx == cur_branches) {
9156     + printk(KERN_ERR "unionfs: branch \"%s\" "
9157     + "not found\n", optarg);
9158     + err = -ENOENT;
9159     + goto out;
9160     + }
9161     + /* check if there are any open files on the branch to be deleted */
9162     + if (atomic_read(&new_data[idx].open_files) > 0) {
9163     + err = -EBUSY;
9164     + goto out;
9165     + }
9166     +
9167     + /*
9168     + * Now we have to delete the branch. First, release any handles it
9169     + * has. Then, move the remaining array indexes past "idx" in
9170     + * new_data and new_lower_paths one to the left. Finally, adjust
9171     + * cur_branches.
9172     + */
9173     + pathput(&new_lower_paths[idx]);
9174     +
9175     + if (idx < cur_branches - 1) {
9176     + /* if idx==cur_branches-1, we delete last branch: easy */
9177     + memmove(&new_data[idx], &new_data[idx+1],
9178     + (cur_branches - 1 - idx) *
9179     + sizeof(struct unionfs_data));
9180     + memmove(&new_lower_paths[idx], &new_lower_paths[idx+1],
9181     + (cur_branches - 1 - idx) * sizeof(struct path));
9182     + }
9183     +
9184     + err = 0;
9185     +out:
9186     + return err;
9187     +}
9188     +
9189     +/* handle branch insertion during remount */
9190     +static noinline int do_remount_add_option(char *optarg, int cur_branches,
9191     + struct unionfs_data *new_data,
9192     + struct path *new_lower_paths,
9193     + int *high_branch_id)
9194     +{
9195     + int err = -EINVAL;
9196     + int perms;
9197     + int idx = 0; /* default: insert at beginning */
9198     + char *new_branch , *modename = NULL;
9199     + struct nameidata nd;
9200     +
9201     + /*
9202     + * optarg can be of several forms:
9203     + *
9204     + * /bar:/foo insert /foo before /bar
9205     + * /bar:/foo=ro insert /foo in ro mode before /bar
9206     + * /foo insert /foo in the beginning (prepend)
9207     + * :/foo insert /foo at the end (append)
9208     + */
9209     + if (*optarg == ':') { /* append? */
9210     + new_branch = optarg + 1; /* skip ':' */
9211     + idx = cur_branches;
9212     + goto found_insertion_point;
9213     + }
9214     + new_branch = strchr(optarg, ':');
9215     + if (!new_branch) { /* prepend? */
9216     + new_branch = optarg;
9217     + goto found_insertion_point;
9218     + }
9219     + *new_branch++ = '\0'; /* holds path+mode of new branch */
9220     +
9221     + /*
9222     + * Find matching branch index. For now, this assumes that nothing
9223     + * has been mounted on top of this Unionfs stack. Once we have /odf
9224     + * and cache-coherency resolved, we'll address the branch-path
9225     + * uniqueness.
9226     + */
9227     + err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
9228     + if (err) {
9229     + printk(KERN_ERR "unionfs: error accessing "
9230     + "lower directory \"%s\" (error %d)\n",
9231     + optarg, err);
9232     + goto out;
9233     + }
9234     + for (idx = 0; idx < cur_branches; idx++)
9235     + if (nd.mnt == new_lower_paths[idx].mnt &&
9236     + nd.dentry == new_lower_paths[idx].dentry)
9237     + break;
9238     + path_release(&nd); /* no longer needed */
9239     + if (idx == cur_branches) {
9240     + printk(KERN_ERR "unionfs: branch \"%s\" "
9241     + "not found\n", optarg);
9242     + err = -ENOENT;
9243     + goto out;
9244     + }
9245     +
9246     + /*
9247     + * At this point idx will hold the index where the new branch should
9248     + * be inserted before.
9249     + */
9250     +found_insertion_point:
9251     + /* find the mode for the new branch */
9252     + if (new_branch)
9253     + modename = strchr(new_branch, '=');
9254     + if (modename)
9255     + *modename++ = '\0';
9256     + if (!new_branch || !*new_branch) {
9257     + printk(KERN_ERR "unionfs: null new branch\n");
9258     + err = -EINVAL;
9259     + goto out;
9260     + }
9261     + err = parse_branch_mode(modename, &perms);
9262     + if (err) {
9263     + printk(KERN_ERR "unionfs: invalid mode \"%s\" for "
9264     + "branch \"%s\"\n", modename, new_branch);
9265     + goto out;
9266     + }
9267     + err = path_lookup(new_branch, LOOKUP_FOLLOW, &nd);
9268     + if (err) {
9269     + printk(KERN_ERR "unionfs: error accessing "
9270     + "lower directory \"%s\" (error %d)\n",
9271     + new_branch, err);
9272     + goto out;
9273     + }
9274     + /*
9275     + * It's probably safe to check_mode the new branch to insert. Note:
9276     + * we don't allow inserting branches which are unionfs's by
9277     + * themselves (check_branch returns EINVAL in that case). This is
9278     + * because this code base doesn't support stacking unionfs: the ODF
9279     + * code base supports that correctly.
9280     + */
9281     + err = check_branch(&nd);
9282     + if (err) {
9283     + printk(KERN_ERR "unionfs: lower directory "
9284     + "\"%s\" is not a valid branch\n", optarg);
9285     + path_release(&nd);
9286     + goto out;
9287     + }
9288     +
9289     + /*
9290     + * Now we have to insert the new branch. But first, move the bits
9291     + * to make space for the new branch, if needed. Finally, adjust
9292     + * cur_branches.
9293     + * We don't release nd here; it's kept until umount/remount.
9294     + */
9295     + if (idx < cur_branches) {
9296     + /* if idx==cur_branches, we append: easy */
9297     + memmove(&new_data[idx+1], &new_data[idx],
9298     + (cur_branches - idx) * sizeof(struct unionfs_data));
9299     + memmove(&new_lower_paths[idx+1], &new_lower_paths[idx],
9300     + (cur_branches - idx) * sizeof(struct path));
9301     + }
9302     + new_lower_paths[idx].dentry = nd.dentry;
9303     + new_lower_paths[idx].mnt = nd.mnt;
9304     +
9305     + new_data[idx].sb = nd.dentry->d_sb;
9306     + atomic_set(&new_data[idx].open_files, 0);
9307     + new_data[idx].branchperms = perms;
9308     + new_data[idx].branch_id = ++*high_branch_id; /* assign new branch ID */
9309     +
9310     + err = 0;
9311     +out:
9312     + return err;
9313     +}
9314     +
9315     +
9316     +/*
9317     + * Support branch management options on remount.
9318     + *
9319     + * See Documentation/filesystems/unionfs/ for details.
9320     + *
9321     + * @flags: numeric mount options
9322     + * @options: mount options string
9323     + *
9324     + * This function can rearrange a mounted union dynamically, adding and
9325     + * removing branches, including changing branch modes. Clearly this has to
9326     + * be done safely and atomically. Luckily, the VFS already calls this
9327     + * function with lock_super(sb) and lock_kernel() held, preventing
9328     + * concurrent mixing of new mounts, remounts, and unmounts. Moreover,
9329     + * do_remount_sb(), our caller function, already called shrink_dcache_sb(sb)
9330     + * to purge dentries/inodes from our superblock, and also called
9331     + * fsync_super(sb) to purge any dirty pages. So we're good.
9332     + *
9333     + * XXX: however, our remount code may also need to invalidate mapped pages
9334     + * so as to force them to be re-gotten from the (newly reconfigured) lower
9335     + * branches. This has to wait for proper mmap and cache coherency support
9336     + * in the VFS.
9337     + *
9338     + */
9339     +static int unionfs_remount_fs(struct super_block *sb, int *flags,
9340     + char *options)
9341     +{
9342     + int err = 0;
9343     + int i;
9344     + char *optionstmp, *tmp_to_free; /* kstrdup'ed of "options" */
9345     + char *optname;
9346     + int cur_branches = 0; /* no. of current branches */
9347     + int new_branches = 0; /* no. of branches actually left in the end */
9348     + int add_branches; /* est. no. of branches to add */
9349     + int del_branches; /* est. no. of branches to del */
9350     + int max_branches; /* max possible no. of branches */
9351     + struct unionfs_data *new_data = NULL, *tmp_data = NULL;
9352     + struct path *new_lower_paths = NULL, *tmp_lower_paths = NULL;
9353     + struct inode **new_lower_inodes = NULL;
9354     + int new_high_branch_id; /* new high branch ID */
9355     + int size; /* memory allocation size, temp var */
9356     + int old_ibstart, old_ibend;
9357     +
9358     + unionfs_write_lock(sb);
9359     +
9360     + /*
9361     + * The VFS will take care of "ro" and "rw" flags, and we can safely
9362     + * ignore MS_SILENT, but anything else left over is an error. So we
9363     + * need to check if any other flags may have been passed (none are
9364     + * allowed/supported as of now).
9365     + */
9366     + if ((*flags & ~(MS_RDONLY | MS_SILENT)) != 0) {
9367     + printk(KERN_ERR
9368     + "unionfs: remount flags 0x%x unsupported\n", *flags);
9369     + err = -EINVAL;
9370     + goto out_error;
9371     + }
9372     +
9373     + /*
9374     + * If 'options' is NULL, it's probably because the user just changed
9375     + * the union to a "ro" or "rw" and the VFS took care of it. So
9376     + * nothing to do and we're done.
9377     + */
9378     + if (!options || options[0] == '\0')
9379     + goto out_error;
9380     +
9381     + /*
9382     + * Find out how many branches we will have in the end, counting
9383     + * "add" and "del" commands. Copy the "options" string because
9384     + * strsep modifies the string and we need it later.
9385     + */
9386     + tmp_to_free = kstrdup(options, GFP_KERNEL);
9387     + optionstmp = tmp_to_free;
9388     + if (unlikely(!optionstmp)) {
9389     + err = -ENOMEM;
9390     + goto out_free;
9391     + }
9392     + cur_branches = sbmax(sb); /* current no. branches */
9393     + new_branches = sbmax(sb);
9394     + del_branches = 0;
9395     + add_branches = 0;
9396     + new_high_branch_id = sbhbid(sb); /* save current high_branch_id */
9397     + while ((optname = strsep(&optionstmp, ",")) != NULL) {
9398     + char *optarg;
9399     +
9400     + if (!optname || !*optname)
9401     + continue;
9402     +
9403     + optarg = strchr(optname, '=');
9404     + if (optarg)
9405     + *optarg++ = '\0';
9406     +
9407     + if (!strcmp("add", optname))
9408     + add_branches++;
9409     + else if (!strcmp("del", optname))
9410     + del_branches++;
9411     + }
9412     + kfree(tmp_to_free);
9413     + /* after all changes, will we have at least one branch left? */
9414     + if ((new_branches + add_branches - del_branches) < 1) {
9415     + printk(KERN_ERR
9416     + "unionfs: no branches left after remount\n");
9417     + err = -EINVAL;
9418     + goto out_free;
9419     + }
9420     +
9421     + /*
9422     + * Since we haven't actually parsed all the add/del options, nor
9423     + * have we checked them for errors, we don't know for sure how many
9424     + * branches we will have after all changes have taken place. In
9425     + * fact, the total number of branches left could be less than what
9426     + * we have now. So we need to allocate space for a temporary
9427     + * placeholder that is at least as large as the maximum number of
9428     + * branches we *could* have, which is the current number plus all
9429     + * the additions. Once we're done with these temp placeholders, we
9430     + * may have to re-allocate the final size, copy over from the temp,
9431     + * and then free the temps (done near the end of this function).
9432     + */
9433     + max_branches = cur_branches + add_branches;
9434     + /* allocate space for new pointers to lower dentry */
9435     + tmp_data = kcalloc(max_branches,
9436     + sizeof(struct unionfs_data), GFP_KERNEL);
9437     + if (unlikely(!tmp_data)) {
9438     + err = -ENOMEM;
9439     + goto out_free;
9440     + }
9441     + /* allocate space for new pointers to lower paths */
9442     + tmp_lower_paths = kcalloc(max_branches,
9443     + sizeof(struct path), GFP_KERNEL);
9444     + if (unlikely(!tmp_lower_paths)) {
9445     + err = -ENOMEM;
9446     + goto out_free;
9447     + }
9448     + /* copy current info into new placeholders, incrementing refcnts */
9449     + memcpy(tmp_data, UNIONFS_SB(sb)->data,
9450     + cur_branches * sizeof(struct unionfs_data));
9451     + memcpy(tmp_lower_paths, UNIONFS_D(sb->s_root)->lower_paths,
9452     + cur_branches * sizeof(struct path));
9453     + for (i = 0; i < cur_branches; i++)
9454     + pathget(&tmp_lower_paths[i]); /* drop refs at end of fxn */
9455     +
9456     + /*******************************************************************
9457     + * For each branch command, do path_lookup on the requested branch,
9458     + * and apply the change to a temp branch list. To handle errors, we
9459     + * already dup'ed the old arrays (above), and increased the refcnts
9460     + * on various f/s objects. So now we can do all the path_lookups
9461     + * and branch-management commands on the new arrays. If it fail mid
9462     + * way, we free the tmp arrays and *put all objects. If we succeed,
9463     + * then we free old arrays and *put its objects, and then replace
9464     + * the arrays with the new tmp list (we may have to re-allocate the
9465     + * memory because the temp lists could have been larger than what we
9466     + * actually needed).
9467     + *******************************************************************/
9468     +
9469     + while ((optname = strsep(&options, ",")) != NULL) {
9470     + char *optarg;
9471     +
9472     + if (!optname || !*optname)
9473     + continue;
9474     + /*
9475     + * At this stage optname holds a comma-delimited option, but
9476     + * without the commas. Next, we need to break the string on
9477     + * the '=' symbol to separate CMD=ARG, where ARG itself can
9478     + * be KEY=VAL. For example, in mode=/foo=rw, CMD is "mode",
9479     + * KEY is "/foo", and VAL is "rw".
9480     + */
9481     + optarg = strchr(optname, '=');
9482     + if (optarg)
9483     + *optarg++ = '\0';
9484     + /* incgen remount option (instead of old ioctl) */
9485     + if (!strcmp("incgen", optname)) {
9486     + err = 0;
9487     + goto out_no_change;
9488     + }
9489     +
9490     + /*
9491     + * All of our options take an argument now. (Insert ones
9492     + * that don't above this check.) So at this stage optname
9493     + * contains the CMD part and optarg contains the ARG part.
9494     + */
9495     + if (!optarg || !*optarg) {
9496     + printk(KERN_ERR "unionfs: all remount options require "
9497     + "an argument (%s)\n", optname);
9498     + err = -EINVAL;
9499     + goto out_release;
9500     + }
9501     +
9502     + if (!strcmp("add", optname)) {
9503     + err = do_remount_add_option(optarg, new_branches,
9504     + tmp_data,
9505     + tmp_lower_paths,
9506     + &new_high_branch_id);
9507     + if (err)
9508     + goto out_release;
9509     + new_branches++;
9510     + if (new_branches > UNIONFS_MAX_BRANCHES) {
9511     + printk(KERN_ERR "unionfs: command exceeds "
9512     + "%d branches\n", UNIONFS_MAX_BRANCHES);
9513     + err = -E2BIG;
9514     + goto out_release;
9515     + }
9516     + continue;
9517     + }
9518     + if (!strcmp("del", optname)) {
9519     + err = do_remount_del_option(optarg, new_branches,
9520     + tmp_data,
9521     + tmp_lower_paths);
9522     + if (err)
9523     + goto out_release;
9524     + new_branches--;
9525     + continue;
9526     + }
9527     + if (!strcmp("mode", optname)) {
9528     + err = do_remount_mode_option(optarg, new_branches,
9529     + tmp_data,
9530     + tmp_lower_paths);
9531     + if (err)
9532     + goto out_release;
9533     + continue;
9534     + }
9535     +
9536     + /*
9537     + * When you use "mount -o remount,ro", mount(8) will
9538     + * reportedly pass the original dirs= string from
9539     + * /proc/mounts. So for now, we have to ignore dirs= and
9540     + * not consider it an error, unless we want to allow users
9541     + * to pass dirs= in remount. Note that to allow the VFS to
9542     + * actually process the ro/rw remount options, we have to
9543     + * return 0 from this function.
9544     + */
9545     + if (!strcmp("dirs", optname)) {
9546     + printk(KERN_WARNING
9547     + "unionfs: remount ignoring option \"%s\"\n",
9548     + optname);
9549     + continue;
9550     + }
9551     +
9552     + err = -EINVAL;
9553     + printk(KERN_ERR
9554     + "unionfs: unrecognized option \"%s\"\n", optname);
9555     + goto out_release;
9556     + }
9557     +
9558     +out_no_change:
9559     +
9560     + /******************************************************************
9561     + * WE'RE ALMOST DONE: check if leftmost branch might be read-only,
9562     + * see if we need to allocate a small-sized new vector, copy the
9563     + * vectors to their correct place, release the refcnt of the older
9564     + * ones, and return. Also handle invalidating any pages that will
9565     + * have to be re-read.
9566     + *******************************************************************/
9567     +
9568     + if (!(tmp_data[0].branchperms & MAY_WRITE)) {
9569     + printk(KERN_ERR "unionfs: leftmost branch cannot be read-only "
9570     + "(use \"remount,ro\" to create a read-only union)\n");
9571     + err = -EINVAL;
9572     + goto out_release;
9573     + }
9574     +
9575     + /* (re)allocate space for new pointers to lower dentry */
9576     + size = new_branches * sizeof(struct unionfs_data);
9577     + new_data = krealloc(tmp_data, size, GFP_KERNEL);
9578     + if (unlikely(!new_data)) {
9579     + err = -ENOMEM;
9580     + goto out_release;
9581     + }
9582     +
9583     + /* allocate space for new pointers to lower paths */
9584     + size = new_branches * sizeof(struct path);
9585     + new_lower_paths = krealloc(tmp_lower_paths, size, GFP_KERNEL);
9586     + if (unlikely(!new_lower_paths)) {
9587     + err = -ENOMEM;
9588     + goto out_release;
9589     + }
9590     +
9591     + /* allocate space for new pointers to lower inodes */
9592     + new_lower_inodes = kcalloc(new_branches,
9593     + sizeof(struct inode *), GFP_KERNEL);
9594     + if (unlikely(!new_lower_inodes)) {
9595     + err = -ENOMEM;
9596     + goto out_release;
9597     + }
9598     +
9599     + /*
9600     + * OK, just before we actually put the new set of branches in place,
9601     + * we need to ensure that our own f/s has no dirty objects left.
9602     + * Luckily, do_remount_sb() already calls shrink_dcache_sb(sb) and
9603     + * fsync_super(sb), taking care of dentries, inodes, and dirty
9604     + * pages. So all that's left is for us to invalidate any leftover
9605     + * (non-dirty) pages to ensure that they will be re-read from the
9606     + * new lower branches (and to support mmap).
9607     + */
9608     +
9609     + /*
9610     + * Once we finish the remounting successfully, our superblock
9611     + * generation number will have increased. This will be detected by
9612     + * our dentry-revalidation code upon subsequent f/s operations
9613     + * through unionfs. The revalidation code will rebuild the union of
9614     + * lower inodes for a given unionfs inode and invalidate any pages
9615     + * of such "stale" inodes (by calling our purge_inode_data
9616     + * function). This revalidation will happen lazily and
9617     + * incrementally, as users perform operations on cached inodes. We
9618     + * would like to encourage this revalidation to happen sooner if
9619     + * possible, so we try to invalidate as many other pages in our
9620     + * superblock as we can.
9621     + */
9622     + purge_sb_data(sb);
9623     +
9624     + /* copy new vectors into their correct place */
9625     + tmp_data = UNIONFS_SB(sb)->data;
9626     + UNIONFS_SB(sb)->data = new_data;
9627     + new_data = NULL; /* so don't free good pointers below */
9628     + tmp_lower_paths = UNIONFS_D(sb->s_root)->lower_paths;
9629     + UNIONFS_D(sb->s_root)->lower_paths = new_lower_paths;
9630     + new_lower_paths = NULL; /* so don't free good pointers below */
9631     +
9632     + /* update our unionfs_sb_info and root dentry index of last branch */
9633     + i = sbmax(sb); /* save no. of branches to release at end */
9634     + sbend(sb) = new_branches - 1;
9635     + set_dbend(sb->s_root, new_branches - 1);
9636     + old_ibstart = ibstart(sb->s_root->d_inode);
9637     + old_ibend = ibend(sb->s_root->d_inode);
9638     + ibend(sb->s_root->d_inode) = new_branches - 1;
9639     + UNIONFS_D(sb->s_root)->bcount = new_branches;
9640     + new_branches = i; /* no. of branches to release below */
9641     +
9642     + /*
9643     + * Update lower inodes: 3 steps
9644     + * 1. grab ref on all new lower inodes
9645     + */
9646     + for (i = dbstart(sb->s_root); i <= dbend(sb->s_root); i++) {
9647     + struct dentry *lower_dentry =
9648     + unionfs_lower_dentry_idx(sb->s_root, i);
9649     + igrab(lower_dentry->d_inode);
9650     + new_lower_inodes[i] = lower_dentry->d_inode;
9651     + }
9652     + /* 2. release reference on all older lower inodes */
9653     + for (i = old_ibstart; i <= old_ibend; i++) {
9654     + iput(unionfs_lower_inode_idx(sb->s_root->d_inode, i));
9655     + unionfs_set_lower_inode_idx(sb->s_root->d_inode, i, NULL);
9656     + }
9657     + kfree(UNIONFS_I(sb->s_root->d_inode)->lower_inodes);
9658     + /* 3. update root dentry's inode to new lower_inodes array */
9659     + UNIONFS_I(sb->s_root->d_inode)->lower_inodes = new_lower_inodes;
9660     + new_lower_inodes = NULL;
9661     +
9662     + /* maxbytes may have changed */
9663     + sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes;
9664     + /* update high branch ID */
9665     + sbhbid(sb) = new_high_branch_id;
9666     +
9667     + /* update our sb->generation for revalidating objects */
9668     + i = atomic_inc_return(&UNIONFS_SB(sb)->generation);
9669     + atomic_set(&UNIONFS_D(sb->s_root)->generation, i);
9670     + atomic_set(&UNIONFS_I(sb->s_root->d_inode)->generation, i);
9671     + if (!(*flags & MS_SILENT))
9672     + pr_info("unionfs: new generation number %d\n", i);
9673     + /* finally, update the root dentry's times */
9674     + unionfs_copy_attr_times(sb->s_root->d_inode);
9675     + err = 0; /* reset to success */
9676     +
9677     + /*
9678     + * The code above falls through to the next label, and releases the
9679     + * refcnts of the older ones (stored in tmp_*): if we fell through
9680     + * here, it means success. However, if we jump directly to this
9681     + * label from any error above, then an error occurred after we
9682     + * grabbed various refcnts, and so we have to release the
9683     + * temporarily constructed structures.
9684     + */
9685     +out_release:
9686     + /* no need to cleanup/release anything in tmp_data */
9687     + if (tmp_lower_paths)
9688     + for (i = 0; i < new_branches; i++)
9689     + pathput(&tmp_lower_paths[i]);
9690     +out_free:
9691     + kfree(tmp_lower_paths);
9692     + kfree(tmp_data);
9693     + kfree(new_lower_paths);
9694     + kfree(new_data);
9695     + kfree(new_lower_inodes);
9696     +out_error:
9697     + unionfs_check_dentry(sb->s_root);
9698     + unionfs_write_unlock(sb);
9699     + return err;
9700     +}
9701     +
9702     +/*
9703     + * Called by iput() when the inode reference count reached zero
9704     + * and the inode is not hashed anywhere. Used to clear anything
9705     + * that needs to be, before the inode is completely destroyed and put
9706     + * on the inode free list.
9707     + *
9708     + * No need to lock sb info's rwsem.
9709     + */
9710     +static void unionfs_clear_inode(struct inode *inode)
9711     +{
9712     + int bindex, bstart, bend;
9713     + struct inode *lower_inode;
9714     + struct list_head *pos, *n;
9715     + struct unionfs_dir_state *rdstate;
9716     +
9717     + list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) {
9718     + rdstate = list_entry(pos, struct unionfs_dir_state, cache);
9719     + list_del(&rdstate->cache);
9720     + free_rdstate(rdstate);
9721     + }
9722     +
9723     + /*
9724     + * Decrement a reference to a lower_inode, which was incremented
9725     + * by our read_inode when it was created initially.
9726     + */
9727     + bstart = ibstart(inode);
9728     + bend = ibend(inode);
9729     + if (bstart >= 0) {
9730     + for (bindex = bstart; bindex <= bend; bindex++) {
9731     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
9732     + if (!lower_inode)
9733     + continue;
9734     + unionfs_set_lower_inode_idx(inode, bindex, NULL);
9735     + /* see Documentation/filesystems/unionfs/issues.txt */
9736     + lockdep_off();
9737     + iput(lower_inode);
9738     + lockdep_on();
9739     + }
9740     + }
9741     +
9742     + kfree(UNIONFS_I(inode)->lower_inodes);
9743     + UNIONFS_I(inode)->lower_inodes = NULL;
9744     +}
9745     +
9746     +static struct inode *unionfs_alloc_inode(struct super_block *sb)
9747     +{
9748     + struct unionfs_inode_info *i;
9749     +
9750     + i = kmem_cache_alloc(unionfs_inode_cachep, GFP_KERNEL);
9751     + if (unlikely(!i))
9752     + return NULL;
9753     +
9754     + /* memset everything up to the inode to 0 */
9755     + memset(i, 0, offsetof(struct unionfs_inode_info, vfs_inode));
9756     +
9757     + i->vfs_inode.i_version = 1;
9758     + return &i->vfs_inode;
9759     +}
9760     +
9761     +static void unionfs_destroy_inode(struct inode *inode)
9762     +{
9763     + kmem_cache_free(unionfs_inode_cachep, UNIONFS_I(inode));
9764     +}
9765     +
9766     +/* unionfs inode cache constructor */
9767     +static void init_once(struct kmem_cache *cachep, void *obj)
9768     +{
9769     + struct unionfs_inode_info *i = obj;
9770     +
9771     + inode_init_once(&i->vfs_inode);
9772     +}
9773     +
9774     +int unionfs_init_inode_cache(void)
9775     +{
9776     + int err = 0;
9777     +
9778     + unionfs_inode_cachep =
9779     + kmem_cache_create("unionfs_inode_cache",
9780     + sizeof(struct unionfs_inode_info), 0,
9781     + SLAB_RECLAIM_ACCOUNT, init_once);
9782     + if (unlikely(!unionfs_inode_cachep))
9783     + err = -ENOMEM;
9784     + return err;
9785     +}
9786     +
9787     +/* unionfs inode cache destructor */
9788     +void unionfs_destroy_inode_cache(void)
9789     +{
9790     + if (unionfs_inode_cachep)
9791     + kmem_cache_destroy(unionfs_inode_cachep);
9792     +}
9793     +
9794     +/*
9795     + * Called when we have a dirty inode, right here we only throw out
9796     + * parts of our readdir list that are too old.
9797     + *
9798     + * No need to grab sb info's rwsem.
9799     + */
9800     +static int unionfs_write_inode(struct inode *inode, int sync)
9801     +{
9802     + struct list_head *pos, *n;
9803     + struct unionfs_dir_state *rdstate;
9804     +
9805     + spin_lock(&UNIONFS_I(inode)->rdlock);
9806     + list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) {
9807     + rdstate = list_entry(pos, struct unionfs_dir_state, cache);
9808     + /* We keep this list in LRU order. */
9809     + if ((rdstate->access + RDCACHE_JIFFIES) > jiffies)
9810     + break;
9811     + UNIONFS_I(inode)->rdcount--;
9812     + list_del(&rdstate->cache);
9813     + free_rdstate(rdstate);
9814     + }
9815     + spin_unlock(&UNIONFS_I(inode)->rdlock);
9816     +
9817     + return 0;
9818     +}
9819     +
9820     +/*
9821     + * Used only in nfs, to kill any pending RPC tasks, so that subsequent
9822     + * code can actually succeed and won't leave tasks that need handling.
9823     + */
9824     +static void unionfs_umount_begin(struct vfsmount *mnt, int flags)
9825     +{
9826     + struct super_block *sb, *lower_sb;
9827     + struct vfsmount *lower_mnt;
9828     + int bindex, bstart, bend;
9829     +
9830     + if (!(flags & MNT_FORCE))
9831     + /*
9832     + * we are not being MNT_FORCE'd, therefore we should emulate
9833     + * old behavior
9834     + */
9835     + return;
9836     +
9837     + sb = mnt->mnt_sb;
9838     +
9839     + unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD);
9840     +
9841     + bstart = sbstart(sb);
9842     + bend = sbend(sb);
9843     + for (bindex = bstart; bindex <= bend; bindex++) {
9844     + lower_mnt = unionfs_lower_mnt_idx(sb->s_root, bindex);
9845     + lower_sb = unionfs_lower_super_idx(sb, bindex);
9846     +
9847     + if (lower_mnt && lower_sb && lower_sb->s_op &&
9848     + lower_sb->s_op->umount_begin)
9849     + lower_sb->s_op->umount_begin(lower_mnt, flags);
9850     + }
9851     +
9852     + unionfs_read_unlock(sb);
9853     +}
9854     +
9855     +static int unionfs_show_options(struct seq_file *m, struct vfsmount *mnt)
9856     +{
9857     + struct super_block *sb = mnt->mnt_sb;
9858     + int ret = 0;
9859     + char *tmp_page;
9860     + char *path;
9861     + int bindex, bstart, bend;
9862     + int perms;
9863     +
9864     + unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD);
9865     +
9866     + unionfs_lock_dentry(sb->s_root, UNIONFS_DMUTEX_CHILD);
9867     +
9868     + tmp_page = (char *) __get_free_page(GFP_KERNEL);
9869     + if (unlikely(!tmp_page)) {
9870     + ret = -ENOMEM;
9871     + goto out;
9872     + }
9873     +
9874     + bstart = sbstart(sb);
9875     + bend = sbend(sb);
9876     +
9877     + seq_printf(m, ",dirs=");
9878     + for (bindex = bstart; bindex <= bend; bindex++) {
9879     + path = d_path(unionfs_lower_dentry_idx(sb->s_root, bindex),
9880     + unionfs_lower_mnt_idx(sb->s_root, bindex),
9881     + tmp_page, PAGE_SIZE);
9882     + if (IS_ERR(path)) {
9883     + ret = PTR_ERR(path);
9884     + goto out;
9885     + }
9886     +
9887     + perms = branchperms(sb, bindex);
9888     +
9889     + seq_printf(m, "%s=%s", path,
9890     + perms & MAY_WRITE ? "rw" : "ro");
9891     + if (bindex != bend)
9892     + seq_printf(m, ":");
9893     + }
9894     +
9895     +out:
9896     + free_page((unsigned long) tmp_page);
9897     +
9898     + unionfs_unlock_dentry(sb->s_root);
9899     +
9900     + unionfs_read_unlock(sb);
9901     +
9902     + return ret;
9903     +}
9904     +
9905     +struct super_operations unionfs_sops = {
9906     + .read_inode = unionfs_read_inode,
9907     + .delete_inode = unionfs_delete_inode,
9908     + .put_super = unionfs_put_super,
9909     + .statfs = unionfs_statfs,
9910     + .remount_fs = unionfs_remount_fs,
9911     + .clear_inode = unionfs_clear_inode,
9912     + .umount_begin = unionfs_umount_begin,
9913     + .show_options = unionfs_show_options,
9914     + .write_inode = unionfs_write_inode,
9915     + .alloc_inode = unionfs_alloc_inode,
9916     + .destroy_inode = unionfs_destroy_inode,
9917     +};
9918     diff --git a/fs/unionfs/union.h b/fs/unionfs/union.h
9919     new file mode 100644
9920     index 0000000..14577bc
9921     --- /dev/null
9922     +++ b/fs/unionfs/union.h
9923     @@ -0,0 +1,609 @@
9924     +/*
9925     + * Copyright (c) 2003-2007 Erez Zadok
9926     + * Copyright (c) 2003-2006 Charles P. Wright
9927     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
9928     + * Copyright (c) 2005 Arun M. Krishnakumar
9929     + * Copyright (c) 2004-2006 David P. Quigley
9930     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
9931     + * Copyright (c) 2003 Puja Gupta
9932     + * Copyright (c) 2003 Harikesavan Krishnan
9933     + * Copyright (c) 2003-2007 Stony Brook University
9934     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
9935     + *
9936     + * This program is free software; you can redistribute it and/or modify
9937     + * it under the terms of the GNU General Public License version 2 as
9938     + * published by the Free Software Foundation.
9939     + */
9940     +
9941     +#ifndef _UNION_H_
9942     +#define _UNION_H_
9943     +
9944     +#include <linux/dcache.h>
9945     +#include <linux/file.h>
9946     +#include <linux/list.h>
9947     +#include <linux/fs.h>
9948     +#include <linux/mm.h>
9949     +#include <linux/module.h>
9950     +#include <linux/mount.h>
9951     +#include <linux/namei.h>
9952     +#include <linux/page-flags.h>
9953     +#include <linux/pagemap.h>
9954     +#include <linux/poll.h>
9955     +#include <linux/security.h>
9956     +#include <linux/seq_file.h>
9957     +#include <linux/slab.h>
9958     +#include <linux/spinlock.h>
9959     +#include <linux/smp_lock.h>
9960     +#include <linux/statfs.h>
9961     +#include <linux/string.h>
9962     +#include <linux/vmalloc.h>
9963     +#include <linux/writeback.h>
9964     +#include <linux/buffer_head.h>
9965     +#include <linux/xattr.h>
9966     +#include <linux/fs_stack.h>
9967     +#include <linux/magic.h>
9968     +#include <linux/log2.h>
9969     +#include <linux/poison.h>
9970     +#include <linux/mman.h>
9971     +#include <linux/backing-dev.h>
9972     +
9973     +#include <asm/system.h>
9974     +
9975     +#include <linux/union_fs.h>
9976     +
9977     +/* the file system name */
9978     +#define UNIONFS_NAME "unionfs"
9979     +
9980     +/* unionfs root inode number */
9981     +#define UNIONFS_ROOT_INO 1
9982     +
9983     +/* number of times we try to get a unique temporary file name */
9984     +#define GET_TMPNAM_MAX_RETRY 5
9985     +
9986     +/* maximum number of branches we support, to avoid memory blowup */
9987     +#define UNIONFS_MAX_BRANCHES 128
9988     +
9989     +/* minimum time (seconds) required for time-based cache-coherency */
9990     +#define UNIONFS_MIN_CC_TIME 3
9991     +
9992     +/* Operations vectors defined in specific files. */
9993     +extern struct file_operations unionfs_main_fops;
9994     +extern struct file_operations unionfs_dir_fops;
9995     +extern struct inode_operations unionfs_main_iops;
9996     +extern struct inode_operations unionfs_dir_iops;
9997     +extern struct inode_operations unionfs_symlink_iops;
9998     +extern struct super_operations unionfs_sops;
9999     +extern struct dentry_operations unionfs_dops;
10000     +extern struct address_space_operations unionfs_aops;
10001     +
10002     +/* How long should an entry be allowed to persist */
10003     +#define RDCACHE_JIFFIES (5*HZ)
10004     +
10005     +/* compatibility with Real-Time patches */
10006     +#ifdef CONFIG_PREEMPT_RT
10007     +# define unionfs_rw_semaphore compat_rw_semaphore
10008     +#else /* not CONFIG_PREEMPT_RT */
10009     +# define unionfs_rw_semaphore rw_semaphore
10010     +#endif /* not CONFIG_PREEMPT_RT */
10011     +
10012     +/* file private data. */
10013     +struct unionfs_file_info {
10014     + int bstart;
10015     + int bend;
10016     + atomic_t generation;
10017     +
10018     + struct unionfs_dir_state *rdstate;
10019     + struct file **lower_files;
10020     + int *saved_branch_ids; /* IDs of branches when file was opened */
10021     +};
10022     +
10023     +/* unionfs inode data in memory */
10024     +struct unionfs_inode_info {
10025     + int bstart;
10026     + int bend;
10027     + atomic_t generation;
10028     + int stale;
10029     + /* Stuff for readdir over NFS. */
10030     + spinlock_t rdlock;
10031     + struct list_head readdircache;
10032     + int rdcount;
10033     + int hashsize;
10034     + int cookie;
10035     +
10036     + /* The lower inodes */
10037     + struct inode **lower_inodes;
10038     +
10039     + struct inode vfs_inode;
10040     +};
10041     +
10042     +/* unionfs dentry data in memory */
10043     +struct unionfs_dentry_info {
10044     + /*
10045     + * The semaphore is used to lock the dentry as soon as we get into a
10046     + * unionfs function from the VFS. Our lock ordering is that children
10047     + * go before their parents.
10048     + */
10049     + struct mutex lock;
10050     + int bstart;
10051     + int bend;
10052     + int bopaque;
10053     + int bcount;
10054     + atomic_t generation;
10055     + struct path *lower_paths;
10056     +};
10057     +
10058     +/* These are the pointers to our various objects. */
10059     +struct unionfs_data {
10060     + struct super_block *sb;
10061     + atomic_t open_files; /* number of open files on branch */
10062     + int branchperms;
10063     + int branch_id; /* unique branch ID at re/mount time */
10064     +};
10065     +
10066     +/* unionfs super-block data in memory */
10067     +struct unionfs_sb_info {
10068     + int bend;
10069     +
10070     + atomic_t generation;
10071     +
10072     + /*
10073     + * This rwsem is used to make sure that a branch management
10074     + * operation...
10075     + * 1) will not begin before all currently in-flight operations
10076     + * complete.
10077     + * 2) any new operations do not execute until the currently
10078     + * running branch management operation completes.
10079     + *
10080     + * The write_lock_owner records the PID of the task which grabbed
10081     + * the rw_sem for writing. If the same task also tries to grab the
10082     + * read lock, we allow it. This prevents a self-deadlock when
10083     + * branch-management is used on a pivot_root'ed union, because we
10084     + * have to ->lookup paths which belong to the same union.
10085     + */
10086     + struct unionfs_rw_semaphore rwsem;
10087     + pid_t write_lock_owner; /* PID of rw_sem owner (write lock) */
10088     + int high_branch_id; /* last unique branch ID given */
10089     + struct unionfs_data *data;
10090     +};
10091     +
10092     +/*
10093     + * structure for making the linked list of entries by readdir on left branch
10094     + * to compare with entries on right branch
10095     + */
10096     +struct filldir_node {
10097     + struct list_head file_list; /* list for directory entries */
10098     + char *name; /* name entry */
10099     + int hash; /* name hash */
10100     + int namelen; /* name len since name is not 0 terminated */
10101     +
10102     + /*
10103     + * we can check for duplicate whiteouts and files in the same branch
10104     + * in order to return -EIO.
10105     + */
10106     + int bindex;
10107     +
10108     + /* is this a whiteout entry? */
10109     + int whiteout;
10110     +
10111     + /* Inline name, so we don't need to separately kmalloc small ones */
10112     + char iname[DNAME_INLINE_LEN_MIN];
10113     +};
10114     +
10115     +/* Directory hash table. */
10116     +struct unionfs_dir_state {
10117     + unsigned int cookie; /* the cookie, based off of rdversion */
10118     + unsigned int offset; /* The entry we have returned. */
10119     + int bindex;
10120     + loff_t dirpos; /* offset within the lower level directory */
10121     + int size; /* How big is the hash table? */
10122     + int hashentries; /* How many entries have been inserted? */
10123     + unsigned long access;
10124     +
10125     + /* This cache list is used when the inode keeps us around. */
10126     + struct list_head cache;
10127     + struct list_head list[0];
10128     +};
10129     +
10130     +/* externs needed for fanout.h or sioq.h */
10131     +extern int unionfs_get_nlinks(const struct inode *inode);
10132     +
10133     +/* include miscellaneous macros */
10134     +#include "fanout.h"
10135     +#include "sioq.h"
10136     +
10137     +/* externs for cache creation/deletion routines */
10138     +extern void unionfs_destroy_filldir_cache(void);
10139     +extern int unionfs_init_filldir_cache(void);
10140     +extern int unionfs_init_inode_cache(void);
10141     +extern void unionfs_destroy_inode_cache(void);
10142     +extern int unionfs_init_dentry_cache(void);
10143     +extern void unionfs_destroy_dentry_cache(void);
10144     +
10145     +/* Initialize and free readdir-specific state. */
10146     +extern int init_rdstate(struct file *file);
10147     +extern struct unionfs_dir_state *alloc_rdstate(struct inode *inode,
10148     + int bindex);
10149     +extern struct unionfs_dir_state *find_rdstate(struct inode *inode,
10150     + loff_t fpos);
10151     +extern void free_rdstate(struct unionfs_dir_state *state);
10152     +extern int add_filldir_node(struct unionfs_dir_state *rdstate,
10153     + const char *name, int namelen, int bindex,
10154     + int whiteout);
10155     +extern struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate,
10156     + const char *name, int namelen,
10157     + int is_whiteout);
10158     +
10159     +extern struct dentry **alloc_new_dentries(int objs);
10160     +extern struct unionfs_data *alloc_new_data(int objs);
10161     +
10162     +/* We can only use 32-bits of offset for rdstate --- blech! */
10163     +#define DIREOF (0xfffff)
10164     +#define RDOFFBITS 20 /* This is the number of bits in DIREOF. */
10165     +#define MAXRDCOOKIE (0xfff)
10166     +/* Turn an rdstate into an offset. */
10167     +static inline off_t rdstate2offset(struct unionfs_dir_state *buf)
10168     +{
10169     + off_t tmp;
10170     +
10171     + tmp = ((buf->cookie & MAXRDCOOKIE) << RDOFFBITS)
10172     + | (buf->offset & DIREOF);
10173     + return tmp;
10174     +}
10175     +
10176     +/* Macros for locking a super_block. */
10177     +enum unionfs_super_lock_class {
10178     + UNIONFS_SMUTEX_NORMAL,
10179     + UNIONFS_SMUTEX_PARENT, /* when locking on behalf of file */
10180     + UNIONFS_SMUTEX_CHILD, /* when locking on behalf of dentry */
10181     +};
10182     +static inline void unionfs_read_lock(struct super_block *sb, int subclass)
10183     +{
10184     + if (UNIONFS_SB(sb)->write_lock_owner &&
10185     + UNIONFS_SB(sb)->write_lock_owner == current->pid)
10186     + return;
10187     + down_read_nested(&UNIONFS_SB(sb)->rwsem, subclass);
10188     +}
10189     +static inline void unionfs_read_unlock(struct super_block *sb)
10190     +{
10191     + if (UNIONFS_SB(sb)->write_lock_owner &&
10192     + UNIONFS_SB(sb)->write_lock_owner == current->pid)
10193     + return;
10194     + up_read(&UNIONFS_SB(sb)->rwsem);
10195     +}
10196     +static inline void unionfs_write_lock(struct super_block *sb)
10197     +{
10198     + down_write(&UNIONFS_SB(sb)->rwsem);
10199     + UNIONFS_SB(sb)->write_lock_owner = current->pid;
10200     +}
10201     +static inline void unionfs_write_unlock(struct super_block *sb)
10202     +{
10203     + up_write(&UNIONFS_SB(sb)->rwsem);
10204     + UNIONFS_SB(sb)->write_lock_owner = 0;
10205     +}
10206     +
10207     +static inline void unionfs_double_lock_dentry(struct dentry *d1,
10208     + struct dentry *d2)
10209     +{
10210     + BUG_ON(d1 == d2);
10211     + if (d1 < d2) {
10212     + unionfs_lock_dentry(d1, UNIONFS_DMUTEX_PARENT);
10213     + unionfs_lock_dentry(d2, UNIONFS_DMUTEX_CHILD);
10214     + } else {
10215     + unionfs_lock_dentry(d2, UNIONFS_DMUTEX_PARENT);
10216     + unionfs_lock_dentry(d1, UNIONFS_DMUTEX_CHILD);
10217     + }
10218     +}
10219     +
10220     +extern int new_dentry_private_data(struct dentry *dentry, int subclass);
10221     +extern void free_dentry_private_data(struct dentry *dentry);
10222     +extern void update_bstart(struct dentry *dentry);
10223     +extern int init_lower_nd(struct nameidata *nd, unsigned int flags);
10224     +extern void release_lower_nd(struct nameidata *nd, int err);
10225     +
10226     +/*
10227     + * EXTERNALS:
10228     + */
10229     +
10230     +/* replicates the directory structure up to given dentry in given branch */
10231     +extern struct dentry *create_parents(struct inode *dir, struct dentry *dentry,
10232     + const char *name, int bindex);
10233     +extern int make_dir_opaque(struct dentry *dir, int bindex);
10234     +
10235     +/* partial lookup */
10236     +extern int unionfs_partial_lookup(struct dentry *dentry);
10237     +
10238     +/*
10239     + * Pass an unionfs dentry and an index and it will try to create a whiteout
10240     + * in branch 'index'.
10241     + *
10242     + * On error, it will proceed to a branch to the left
10243     + */
10244     +extern int create_whiteout(struct dentry *dentry, int start);
10245     +/* copies a file from dbstart to newbindex branch */
10246     +extern int copyup_file(struct inode *dir, struct file *file, int bstart,
10247     + int newbindex, loff_t size);
10248     +extern int copyup_named_file(struct inode *dir, struct file *file,
10249     + char *name, int bstart, int new_bindex,
10250     + loff_t len);
10251     +/* copies a dentry from dbstart to newbindex branch */
10252     +extern int copyup_dentry(struct inode *dir, struct dentry *dentry,
10253     + int bstart, int new_bindex, const char *name,
10254     + int namelen, struct file **copyup_file, loff_t len);
10255     +/* helper functions for post-copyup actions */
10256     +extern void unionfs_postcopyup_setmnt(struct dentry *dentry);
10257     +extern void unionfs_postcopyup_release(struct dentry *dentry);
10258     +
10259     +extern int remove_whiteouts(struct dentry *dentry,
10260     + struct dentry *lower_dentry, int bindex);
10261     +
10262     +extern int do_delete_whiteouts(struct dentry *dentry, int bindex,
10263     + struct unionfs_dir_state *namelist);
10264     +
10265     +/* Is this directory empty: 0 if it is empty, -ENOTEMPTY if not. */
10266     +extern int check_empty(struct dentry *dentry,
10267     + struct unionfs_dir_state **namelist);
10268     +/* Delete whiteouts from this directory in branch bindex. */
10269     +extern int delete_whiteouts(struct dentry *dentry, int bindex,
10270     + struct unionfs_dir_state *namelist);
10271     +
10272     +/* Re-lookup a lower dentry. */
10273     +extern int unionfs_refresh_lower_dentry(struct dentry *dentry, int bindex);
10274     +
10275     +extern void unionfs_reinterpose(struct dentry *this_dentry);
10276     +extern struct super_block *unionfs_duplicate_super(struct super_block *sb);
10277     +
10278     +/* Locking functions. */
10279     +extern int unionfs_setlk(struct file *file, int cmd, struct file_lock *fl);
10280     +extern int unionfs_getlk(struct file *file, struct file_lock *fl);
10281     +
10282     +/* Common file operations. */
10283     +extern int unionfs_file_revalidate(struct file *file, bool willwrite);
10284     +extern int unionfs_open(struct inode *inode, struct file *file);
10285     +extern int unionfs_file_release(struct inode *inode, struct file *file);
10286     +extern int unionfs_flush(struct file *file, fl_owner_t id);
10287     +extern long unionfs_ioctl(struct file *file, unsigned int cmd,
10288     + unsigned long arg);
10289     +extern int unionfs_fsync(struct file *file, struct dentry *dentry,
10290     + int datasync);
10291     +extern int unionfs_fasync(int fd, struct file *file, int flag);
10292     +
10293     +/* Inode operations */
10294     +extern int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
10295     + struct inode *new_dir, struct dentry *new_dentry);
10296     +extern int unionfs_unlink(struct inode *dir, struct dentry *dentry);
10297     +extern int unionfs_rmdir(struct inode *dir, struct dentry *dentry);
10298     +
10299     +extern bool __unionfs_d_revalidate_chain(struct dentry *dentry,
10300     + struct nameidata *nd, bool willwrite);
10301     +extern bool is_newer_lower(const struct dentry *dentry);
10302     +extern void purge_sb_data(struct super_block *sb);
10303     +
10304     +/* The values for unionfs_interpose's flag. */
10305     +#define INTERPOSE_DEFAULT 0
10306     +#define INTERPOSE_LOOKUP 1
10307     +#define INTERPOSE_REVAL 2
10308     +#define INTERPOSE_REVAL_NEG 3
10309     +#define INTERPOSE_PARTIAL 4
10310     +
10311     +extern struct dentry *unionfs_interpose(struct dentry *this_dentry,
10312     + struct super_block *sb, int flag);
10313     +
10314     +#ifdef CONFIG_UNION_FS_XATTR
10315     +/* Extended attribute functions. */
10316     +extern void *unionfs_xattr_alloc(size_t size, size_t limit);
10317     +static inline void unionfs_xattr_kfree(const void *p)
10318     +{
10319     + kfree(p);
10320     +}
10321     +extern ssize_t unionfs_getxattr(struct dentry *dentry, const char *name,
10322     + void *value, size_t size);
10323     +extern int unionfs_removexattr(struct dentry *dentry, const char *name);
10324     +extern ssize_t unionfs_listxattr(struct dentry *dentry, char *list,
10325     + size_t size);
10326     +extern int unionfs_setxattr(struct dentry *dentry, const char *name,
10327     + const void *value, size_t size, int flags);
10328     +#endif /* CONFIG_UNION_FS_XATTR */
10329     +
10330     +/* The root directory is unhashed, but isn't deleted. */
10331     +static inline int d_deleted(struct dentry *d)
10332     +{
10333     + return d_unhashed(d) && (d != d->d_sb->s_root);
10334     +}
10335     +
10336     +struct dentry *unionfs_lookup_backend(struct dentry *dentry,
10337     + struct nameidata *nd, int lookupmode);
10338     +
10339     +/* unionfs_permission, check if we should bypass error to facilitate copyup */
10340     +#define IS_COPYUP_ERR(err) ((err) == -EROFS)
10341     +
10342     +/* unionfs_open, check if we need to copyup the file */
10343     +#define OPEN_WRITE_FLAGS (O_WRONLY | O_RDWR | O_APPEND)
10344     +#define IS_WRITE_FLAG(flag) ((flag) & OPEN_WRITE_FLAGS)
10345     +
10346     +static inline int branchperms(const struct super_block *sb, int index)
10347     +{
10348     + BUG_ON(index < 0);
10349     + return UNIONFS_SB(sb)->data[index].branchperms;
10350     +}
10351     +
10352     +static inline int set_branchperms(struct super_block *sb, int index, int perms)
10353     +{
10354     + BUG_ON(index < 0);
10355     + UNIONFS_SB(sb)->data[index].branchperms = perms;
10356     + return perms;
10357     +}
10358     +
10359     +/* Is this file on a read-only branch? */
10360     +static inline int is_robranch_super(const struct super_block *sb, int index)
10361     +{
10362     + int ret;
10363     +
10364     + ret = (!(branchperms(sb, index) & MAY_WRITE)) ? -EROFS : 0;
10365     + return ret;
10366     +}
10367     +
10368     +/* Is this file on a read-only branch? */
10369     +static inline int is_robranch_idx(const struct dentry *dentry, int index)
10370     +{
10371     + struct super_block *lower_sb;
10372     +
10373     + BUG_ON(index < 0);
10374     +
10375     + if (!(branchperms(dentry->d_sb, index) & MAY_WRITE))
10376     + return -EROFS;
10377     +
10378     + lower_sb = unionfs_lower_super_idx(dentry->d_sb, index);
10379     + BUG_ON(lower_sb == NULL);
10380     + /*
10381     + * test sb flags directly, not IS_RDONLY(lower_inode) because the
10382     + * lower_dentry could be a negative.
10383     + */
10384     + if (lower_sb->s_flags & MS_RDONLY)
10385     + return -EROFS;
10386     +
10387     + return 0;
10388     +}
10389     +
10390     +static inline int is_robranch(const struct dentry *dentry)
10391     +{
10392     + int index;
10393     +
10394     + index = UNIONFS_D(dentry)->bstart;
10395     + BUG_ON(index < 0);
10396     +
10397     + return is_robranch_idx(dentry, index);
10398     +}
10399     +
10400     +/* What do we use for whiteouts. */
10401     +#define UNIONFS_WHPFX ".wh."
10402     +#define UNIONFS_WHLEN 4
10403     +/*
10404     + * If a directory contains this file, then it is opaque. We start with the
10405     + * .wh. flag so that it is blocked by lookup.
10406     + */
10407     +#define UNIONFS_DIR_OPAQUE_NAME "__dir_opaque"
10408     +#define UNIONFS_DIR_OPAQUE UNIONFS_WHPFX UNIONFS_DIR_OPAQUE_NAME
10409     +
10410     +/*
10411     + * EXTERNALS:
10412     + */
10413     +extern char *alloc_whname(const char *name, int len);
10414     +extern int check_branch(struct nameidata *nd);
10415     +extern int parse_branch_mode(const char *name, int *perms);
10416     +
10417     +/* locking helpers */
10418     +static inline struct dentry *lock_parent(struct dentry *dentry)
10419     +{
10420     + struct dentry *dir = dget_parent(dentry);
10421     + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
10422     + return dir;
10423     +}
10424     +static inline struct dentry *lock_parent_wh(struct dentry *dentry)
10425     +{
10426     + struct dentry *dir = dget_parent(dentry);
10427     +
10428     + mutex_lock_nested(&dir->d_inode->i_mutex, UNIONFS_DMUTEX_WHITEOUT);
10429     + return dir;
10430     +}
10431     +
10432     +static inline void unlock_dir(struct dentry *dir)
10433     +{
10434     + mutex_unlock(&dir->d_inode->i_mutex);
10435     + dput(dir);
10436     +}
10437     +
10438     +static inline struct vfsmount *unionfs_mntget(struct dentry *dentry,
10439     + int bindex)
10440     +{
10441     + struct vfsmount *mnt;
10442     +
10443     + BUG_ON(!dentry || bindex < 0);
10444     +
10445     + mnt = mntget(unionfs_lower_mnt_idx(dentry, bindex));
10446     +#ifdef CONFIG_UNION_FS_DEBUG
10447     + if (!mnt)
10448     + pr_debug("unionfs: mntget: mnt=%p bindex=%d\n",
10449     + mnt, bindex);
10450     +#endif /* CONFIG_UNION_FS_DEBUG */
10451     +
10452     + return mnt;
10453     +}
10454     +
10455     +static inline void unionfs_mntput(struct dentry *dentry, int bindex)
10456     +{
10457     + struct vfsmount *mnt;
10458     +
10459     + if (!dentry && bindex < 0)
10460     + return;
10461     + BUG_ON(!dentry || bindex < 0);
10462     +
10463     + mnt = unionfs_lower_mnt_idx(dentry, bindex);
10464     +#ifdef CONFIG_UNION_FS_DEBUG
10465     + /*
10466     + * Directories can have NULL lower objects in between start/end, but
10467     + * NOT if at the start/end range. We cannot verify that this dentry
10468     + * is a type=DIR, because it may already be a negative dentry. But
10469     + * if dbstart is greater than dbend, we know that this couldn't have
10470     + * been a regular file: it had to have been a directory.
10471     + */
10472     + if (!mnt && !(bindex > dbstart(dentry) && bindex < dbend(dentry)))
10473     + pr_debug("unionfs: mntput: mnt=%p bindex=%d\n", mnt, bindex);
10474     +#endif /* CONFIG_UNION_FS_DEBUG */
10475     + mntput(mnt);
10476     +}
10477     +
10478     +#ifdef CONFIG_UNION_FS_DEBUG
10479     +
10480     +/* useful for tracking code reachability */
10481     +#define UDBG pr_debug("DBG:%s:%s:%d\n", __FILE__, __FUNCTION__, __LINE__)
10482     +
10483     +#define unionfs_check_inode(i) __unionfs_check_inode((i), \
10484     + __FILE__, __FUNCTION__, __LINE__)
10485     +#define unionfs_check_dentry(d) __unionfs_check_dentry((d), \
10486     + __FILE__, __FUNCTION__, __LINE__)
10487     +#define unionfs_check_file(f) __unionfs_check_file((f), \
10488     + __FILE__, __FUNCTION__, __LINE__)
10489     +#define unionfs_check_nd(n) __unionfs_check_nd((n), \
10490     + __FILE__, __FUNCTION__, __LINE__)
10491     +#define show_branch_counts(sb) __show_branch_counts((sb), \
10492     + __FILE__, __FUNCTION__, __LINE__)
10493     +#define show_inode_times(i) __show_inode_times((i), \
10494     + __FILE__, __FUNCTION__, __LINE__)
10495     +#define show_dinode_times(d) __show_dinode_times((d), \
10496     + __FILE__, __FUNCTION__, __LINE__)
10497     +#define show_inode_counts(i) __show_inode_counts((i), \
10498     + __FILE__, __FUNCTION__, __LINE__)
10499     +
10500     +extern void __unionfs_check_inode(const struct inode *inode, const char *fname,
10501     + const char *fxn, int line);
10502     +extern void __unionfs_check_dentry(const struct dentry *dentry,
10503     + const char *fname, const char *fxn,
10504     + int line);
10505     +extern void __unionfs_check_file(const struct file *file,
10506     + const char *fname, const char *fxn, int line);
10507     +extern void __unionfs_check_nd(const struct nameidata *nd,
10508     + const char *fname, const char *fxn, int line);
10509     +extern void __show_branch_counts(const struct super_block *sb,
10510     + const char *file, const char *fxn, int line);
10511     +extern void __show_inode_times(const struct inode *inode,
10512     + const char *file, const char *fxn, int line);
10513     +extern void __show_dinode_times(const struct dentry *dentry,
10514     + const char *file, const char *fxn, int line);
10515     +extern void __show_inode_counts(const struct inode *inode,
10516     + const char *file, const char *fxn, int line);
10517     +
10518     +#else /* not CONFIG_UNION_FS_DEBUG */
10519     +
10520     +/* we leave useful hooks for these check functions throughout the code */
10521     +#define unionfs_check_inode(i) do { } while (0)
10522     +#define unionfs_check_dentry(d) do { } while (0)
10523     +#define unionfs_check_file(f) do { } while (0)
10524     +#define unionfs_check_nd(n) do { } while (0)
10525     +#define show_branch_counts(sb) do { } while (0)
10526     +#define show_inode_times(i) do { } while (0)
10527     +#define show_dinode_times(d) do { } while (0)
10528     +#define show_inode_counts(i) do { } while (0)
10529     +
10530     +#endif /* not CONFIG_UNION_FS_DEBUG */
10531     +
10532     +#endif /* not _UNION_H_ */
10533     diff --git a/fs/unionfs/unlink.c b/fs/unionfs/unlink.c
10534     new file mode 100644
10535     index 0000000..1e370a1
10536     --- /dev/null
10537     +++ b/fs/unionfs/unlink.c
10538     @@ -0,0 +1,251 @@
10539     +/*
10540     + * Copyright (c) 2003-2007 Erez Zadok
10541     + * Copyright (c) 2003-2006 Charles P. Wright
10542     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10543     + * Copyright (c) 2005-2006 Junjiro Okajima
10544     + * Copyright (c) 2005 Arun M. Krishnakumar
10545     + * Copyright (c) 2004-2006 David P. Quigley
10546     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
10547     + * Copyright (c) 2003 Puja Gupta
10548     + * Copyright (c) 2003 Harikesavan Krishnan
10549     + * Copyright (c) 2003-2007 Stony Brook University
10550     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
10551     + *
10552     + * This program is free software; you can redistribute it and/or modify
10553     + * it under the terms of the GNU General Public License version 2 as
10554     + * published by the Free Software Foundation.
10555     + */
10556     +
10557     +#include "union.h"
10558     +
10559     +/* unlink a file by creating a whiteout */
10560     +static int unionfs_unlink_whiteout(struct inode *dir, struct dentry *dentry)
10561     +{
10562     + struct dentry *lower_dentry;
10563     + struct dentry *lower_dir_dentry;
10564     + int bindex;
10565     + int err = 0;
10566     +
10567     + err = unionfs_partial_lookup(dentry);
10568     + if (err)
10569     + goto out;
10570     +
10571     + bindex = dbstart(dentry);
10572     +
10573     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10574     + if (!lower_dentry)
10575     + goto out;
10576     +
10577     + lower_dir_dentry = lock_parent(lower_dentry);
10578     +
10579     + /* avoid destroying the lower inode if the file is in use */
10580     + dget(lower_dentry);
10581     + err = is_robranch_super(dentry->d_sb, bindex);
10582     + if (!err) {
10583     + /* see Documentation/filesystems/unionfs/issues.txt */
10584     + lockdep_off();
10585     + err = vfs_unlink(lower_dir_dentry->d_inode, lower_dentry);
10586     + lockdep_on();
10587     + }
10588     + /* if vfs_unlink succeeded, update our inode's times */
10589     + if (!err)
10590     + unionfs_copy_attr_times(dentry->d_inode);
10591     + dput(lower_dentry);
10592     + fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
10593     + unlock_dir(lower_dir_dentry);
10594     +
10595     + if (err && !IS_COPYUP_ERR(err))
10596     + goto out;
10597     +
10598     + /*
10599     + * We create whiteouts if (1) there was an error unlinking the main
10600     + * file; (2) there is a lower priority file with the same name
10601     + * (dbopaque); (3) the branch in which the file is not the last
10602     + * (rightmost0 branch. The last rule is an optimization to avoid
10603     + * creating all those whiteouts if there's no chance they'd be
10604     + * masking any lower-priority branch, as well as unionfs is used
10605     + * with only one branch (using only one branch, while odd, is still
10606     + * possible).
10607     + */
10608     + if (err) {
10609     + if (dbstart(dentry) == 0)
10610     + goto out;
10611     + err = create_whiteout(dentry, dbstart(dentry) - 1);
10612     + } else if (dbopaque(dentry) != -1) {
10613     + err = create_whiteout(dentry, dbopaque(dentry));
10614     + } else if (dbstart(dentry) < sbend(dentry->d_sb)) {
10615     + err = create_whiteout(dentry, dbstart(dentry));
10616     + }
10617     +
10618     +out:
10619     + if (!err)
10620     + inode_dec_link_count(dentry->d_inode);
10621     +
10622     + /* We don't want to leave negative leftover dentries for revalidate. */
10623     + if (!err && (dbopaque(dentry) != -1))
10624     + update_bstart(dentry);
10625     +
10626     + return err;
10627     +}
10628     +
10629     +int unionfs_unlink(struct inode *dir, struct dentry *dentry)
10630     +{
10631     + int err = 0;
10632     + struct inode *inode = dentry->d_inode;
10633     +
10634     + BUG_ON(S_ISDIR(inode->i_mode));
10635     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
10636     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
10637     +
10638     + if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) {
10639     + err = -ESTALE;
10640     + goto out;
10641     + }
10642     + unionfs_check_dentry(dentry);
10643     +
10644     + err = unionfs_unlink_whiteout(dir, dentry);
10645     + /* call d_drop so the system "forgets" about us */
10646     + if (!err) {
10647     + unionfs_postcopyup_release(dentry);
10648     + if (inode->i_nlink == 0) {
10649     + /* drop lower inodes */
10650     + iput(unionfs_lower_inode(inode));
10651     + unionfs_set_lower_inode(inode, NULL);
10652     + ibstart(inode) = ibend(inode) = -1;
10653     + }
10654     + d_drop(dentry);
10655     + /*
10656     + * if unlink/whiteout succeeded, parent dir mtime has
10657     + * changed
10658     + */
10659     + unionfs_copy_attr_times(dir);
10660     + }
10661     +
10662     +out:
10663     + if (!err) {
10664     + unionfs_check_dentry(dentry);
10665     + unionfs_check_inode(dir);
10666     + }
10667     + unionfs_unlock_dentry(dentry);
10668     + unionfs_read_unlock(dentry->d_sb);
10669     + return err;
10670     +}
10671     +
10672     +static int unionfs_rmdir_first(struct inode *dir, struct dentry *dentry,
10673     + struct unionfs_dir_state *namelist)
10674     +{
10675     + int err;
10676     + struct dentry *lower_dentry;
10677     + struct dentry *lower_dir_dentry = NULL;
10678     +
10679     + /* Here we need to remove whiteout entries. */
10680     + err = delete_whiteouts(dentry, dbstart(dentry), namelist);
10681     + if (err)
10682     + goto out;
10683     +
10684     + lower_dentry = unionfs_lower_dentry(dentry);
10685     +
10686     + lower_dir_dentry = lock_parent(lower_dentry);
10687     +
10688     + /* avoid destroying the lower inode if the file is in use */
10689     + dget(lower_dentry);
10690     + err = is_robranch(dentry);
10691     + if (!err) {
10692     + /* see Documentation/filesystems/unionfs/issues.txt */
10693     + lockdep_off();
10694     + err = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
10695     + lockdep_on();
10696     + }
10697     + dput(lower_dentry);
10698     +
10699     + fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
10700     + /* propagate number of hard-links */
10701     + dentry->d_inode->i_nlink = unionfs_get_nlinks(dentry->d_inode);
10702     +
10703     +out:
10704     + if (lower_dir_dentry)
10705     + unlock_dir(lower_dir_dentry);
10706     + return err;
10707     +}
10708     +
10709     +int unionfs_rmdir(struct inode *dir, struct dentry *dentry)
10710     +{
10711     + int err = 0;
10712     + struct unionfs_dir_state *namelist = NULL;
10713     + int dstart, dend;
10714     +
10715     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
10716     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
10717     +
10718     + if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) {
10719     + err = -ESTALE;
10720     + goto out;
10721     + }
10722     + unionfs_check_dentry(dentry);
10723     +
10724     + /* check if this unionfs directory is empty or not */
10725     + err = check_empty(dentry, &namelist);
10726     + if (err)
10727     + goto out;
10728     +
10729     + err = unionfs_rmdir_first(dir, dentry, namelist);
10730     + dstart = dbstart(dentry);
10731     + dend = dbend(dentry);
10732     + /*
10733     + * We create a whiteout for the directory if there was an error to
10734     + * rmdir the first directory entry in the union. Otherwise, we
10735     + * create a whiteout only if there is no chance that a lower
10736     + * priority branch might also have the same named directory. IOW,
10737     + * if there is not another same-named directory at a lower priority
10738     + * branch, then we don't need to create a whiteout for it.
10739     + */
10740     + if (!err) {
10741     + if (dstart < dend)
10742     + err = create_whiteout(dentry, dstart);
10743     + } else {
10744     + int new_err;
10745     +
10746     + if (dstart == 0)
10747     + goto out;
10748     +
10749     + /* exit if the error returned was NOT -EROFS */
10750     + if (!IS_COPYUP_ERR(err))
10751     + goto out;
10752     +
10753     + new_err = create_whiteout(dentry, dstart - 1);
10754     + if (new_err != -EEXIST)
10755     + err = new_err;
10756     + }
10757     +
10758     +out:
10759     + /*
10760     + * Drop references to lower dentry/inode so storage space for them
10761     + * can be reclaimed. Then, call d_drop so the system "forgets"
10762     + * about us.
10763     + */
10764     + if (!err) {
10765     + struct inode *inode = dentry->d_inode;
10766     + BUG_ON(!inode);
10767     + iput(unionfs_lower_inode_idx(inode, dstart));
10768     + unionfs_set_lower_inode_idx(inode, dstart, NULL);
10769     + dput(unionfs_lower_dentry_idx(dentry, dstart));
10770     + unionfs_set_lower_dentry_idx(dentry, dstart, NULL);
10771     + /*
10772     + * If the last directory is unlinked, then mark istart/end
10773     + * as -1, (to maintain the invariant that if there are no
10774     + * lower objects, then branch index start and end are set to
10775     + * -1).
10776     + */
10777     + if (!unionfs_lower_inode_idx(inode, dstart) &&
10778     + !unionfs_lower_inode_idx(inode, dend))
10779     + ibstart(inode) = ibend(inode) = -1;
10780     + d_drop(dentry);
10781     + }
10782     +
10783     + if (namelist)
10784     + free_rdstate(namelist);
10785     +
10786     + unionfs_unlock_dentry(dentry);
10787     + unionfs_read_unlock(dentry->d_sb);
10788     + return err;
10789     +}
10790     diff --git a/fs/unionfs/xattr.c b/fs/unionfs/xattr.c
10791     new file mode 100644
10792     index 0000000..8001c65
10793     --- /dev/null
10794     +++ b/fs/unionfs/xattr.c
10795     @@ -0,0 +1,153 @@
10796     +/*
10797     + * Copyright (c) 2003-2007 Erez Zadok
10798     + * Copyright (c) 2003-2006 Charles P. Wright
10799     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10800     + * Copyright (c) 2005-2006 Junjiro Okajima
10801     + * Copyright (c) 2005 Arun M. Krishnakumar
10802     + * Copyright (c) 2004-2006 David P. Quigley
10803     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
10804     + * Copyright (c) 2003 Puja Gupta
10805     + * Copyright (c) 2003 Harikesavan Krishnan
10806     + * Copyright (c) 2003-2007 Stony Brook University
10807     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
10808     + *
10809     + * This program is free software; you can redistribute it and/or modify
10810     + * it under the terms of the GNU General Public License version 2 as
10811     + * published by the Free Software Foundation.
10812     + */
10813     +
10814     +#include "union.h"
10815     +
10816     +/* This is lifted from fs/xattr.c */
10817     +void *unionfs_xattr_alloc(size_t size, size_t limit)
10818     +{
10819     + void *ptr;
10820     +
10821     + if (size > limit)
10822     + return ERR_PTR(-E2BIG);
10823     +
10824     + if (!size) /* size request, no buffer is needed */
10825     + return NULL;
10826     +
10827     + ptr = kmalloc(size, GFP_KERNEL);
10828     + if (unlikely(!ptr))
10829     + return ERR_PTR(-ENOMEM);
10830     + return ptr;
10831     +}
10832     +
10833     +/*
10834     + * BKL held by caller.
10835     + * dentry->d_inode->i_mutex locked
10836     + */
10837     +ssize_t unionfs_getxattr(struct dentry *dentry, const char *name, void *value,
10838     + size_t size)
10839     +{
10840     + struct dentry *lower_dentry = NULL;
10841     + int err = -EOPNOTSUPP;
10842     +
10843     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
10844     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
10845     +
10846     + if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) {
10847     + err = -ESTALE;
10848     + goto out;
10849     + }
10850     +
10851     + lower_dentry = unionfs_lower_dentry(dentry);
10852     +
10853     + err = vfs_getxattr(lower_dentry, (char *) name, value, size);
10854     +
10855     +out:
10856     + unionfs_check_dentry(dentry);
10857     + unionfs_unlock_dentry(dentry);
10858     + unionfs_read_unlock(dentry->d_sb);
10859     + return err;
10860     +}
10861     +
10862     +/*
10863     + * BKL held by caller.
10864     + * dentry->d_inode->i_mutex locked
10865     + */
10866     +int unionfs_setxattr(struct dentry *dentry, const char *name,
10867     + const void *value, size_t size, int flags)
10868     +{
10869     + struct dentry *lower_dentry = NULL;
10870     + int err = -EOPNOTSUPP;
10871     +
10872     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
10873     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
10874     +
10875     + if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) {
10876     + err = -ESTALE;
10877     + goto out;
10878     + }
10879     +
10880     + lower_dentry = unionfs_lower_dentry(dentry);
10881     +
10882     + err = vfs_setxattr(lower_dentry, (char *) name, (void *) value,
10883     + size, flags);
10884     +
10885     +out:
10886     + unionfs_check_dentry(dentry);
10887     + unionfs_unlock_dentry(dentry);
10888     + unionfs_read_unlock(dentry->d_sb);
10889     + return err;
10890     +}
10891     +
10892     +/*
10893     + * BKL held by caller.
10894     + * dentry->d_inode->i_mutex locked
10895     + */
10896     +int unionfs_removexattr(struct dentry *dentry, const char *name)
10897     +{
10898     + struct dentry *lower_dentry = NULL;
10899     + int err = -EOPNOTSUPP;
10900     +
10901     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
10902     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
10903     +
10904     + if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) {
10905     + err = -ESTALE;
10906     + goto out;
10907     + }
10908     +
10909     + lower_dentry = unionfs_lower_dentry(dentry);
10910     +
10911     + err = vfs_removexattr(lower_dentry, (char *) name);
10912     +
10913     +out:
10914     + unionfs_check_dentry(dentry);
10915     + unionfs_unlock_dentry(dentry);
10916     + unionfs_read_unlock(dentry->d_sb);
10917     + return err;
10918     +}
10919     +
10920     +/*
10921     + * BKL held by caller.
10922     + * dentry->d_inode->i_mutex locked
10923     + */
10924     +ssize_t unionfs_listxattr(struct dentry *dentry, char *list, size_t size)
10925     +{
10926     + struct dentry *lower_dentry = NULL;
10927     + int err = -EOPNOTSUPP;
10928     + char *encoded_list = NULL;
10929     +
10930     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
10931     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
10932     +
10933     + if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) {
10934     + err = -ESTALE;
10935     + goto out;
10936     + }
10937     +
10938     + lower_dentry = unionfs_lower_dentry(dentry);
10939     +
10940     + encoded_list = list;
10941     + err = vfs_listxattr(lower_dentry, encoded_list, size);
10942     +
10943     +out:
10944     + unionfs_check_dentry(dentry);
10945     + unionfs_unlock_dentry(dentry);
10946     + unionfs_read_unlock(dentry->d_sb);
10947     + return err;
10948     +}
10949     diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
10950     index bb516ce..6b52faf 100644
10951     --- a/include/linux/fs_stack.h
10952     +++ b/include/linux/fs_stack.h
10953     @@ -1,17 +1,28 @@
10954     +/*
10955     + * Copyright (c) 2006-2007 Erez Zadok
10956     + * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
10957     + * Copyright (c) 2006-2007 Stony Brook University
10958     + * Copyright (c) 2006-2007 The Research Foundation of SUNY
10959     + *
10960     + * This program is free software; you can redistribute it and/or modify
10961     + * it under the terms of the GNU General Public License version 2 as
10962     + * published by the Free Software Foundation.
10963     + */
10964     +
10965     #ifndef _LINUX_FS_STACK_H
10966     #define _LINUX_FS_STACK_H
10967    
10968     -/* This file defines generic functions used primarily by stackable
10969     +/*
10970     + * This file defines generic functions used primarily by stackable
10971     * filesystems; none of these functions require i_mutex to be held.
10972     */
10973    
10974     #include <linux/fs.h>
10975    
10976     /* externs for fs/stack.c */
10977     -extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
10978     - int (*get_nlinks)(struct inode *));
10979     -
10980     -extern void fsstack_copy_inode_size(struct inode *dst, const struct inode *src);
10981     +extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src);
10982     +extern void fsstack_copy_inode_size(struct inode *dst,
10983     + const struct inode *src);
10984    
10985     /* inlines */
10986     static inline void fsstack_copy_attr_atime(struct inode *dest,
10987     diff --git a/include/linux/magic.h b/include/linux/magic.h
10988     index 1fa0c2c..67043ed 100644
10989     --- a/include/linux/magic.h
10990     +++ b/include/linux/magic.h
10991     @@ -35,6 +35,8 @@
10992     #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs"
10993     #define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs"
10994    
10995     +#define UNIONFS_SUPER_MAGIC 0xf15f083d
10996     +
10997     #define SMB_SUPER_MAGIC 0x517B
10998     #define USBDEVICE_SUPER_MAGIC 0x9fa2
10999     #define CGROUP_SUPER_MAGIC 0x27e0eb
11000     diff --git a/include/linux/namei.h b/include/linux/namei.h
11001     index 4cb4f8d..63f16d9 100644
11002     --- a/include/linux/namei.h
11003     +++ b/include/linux/namei.h
11004     @@ -3,6 +3,7 @@
11005    
11006     #include <linux/dcache.h>
11007     #include <linux/linkage.h>
11008     +#include <linux/mount.h>
11009    
11010     struct vfsmount;
11011    
11012     @@ -100,4 +101,16 @@ static inline char *nd_get_link(struct nameidata *nd)
11013     return nd->saved_names[nd->depth];
11014     }
11015    
11016     +static inline void pathget(struct path *path)
11017     +{
11018     + mntget(path->mnt);
11019     + dget(path->dentry);
11020     +}
11021     +
11022     +static inline void pathput(struct path *path)
11023     +{
11024     + dput(path->dentry);
11025     + mntput(path->mnt);
11026     +}
11027     +
11028     #endif /* _LINUX_NAMEI_H */
11029     diff --git a/include/linux/union_fs.h b/include/linux/union_fs.h
11030     new file mode 100644
11031     index 0000000..a467de0
11032     --- /dev/null
11033     +++ b/include/linux/union_fs.h
11034     @@ -0,0 +1,22 @@
11035     +/*
11036     + * Copyright (c) 2003-2007 Erez Zadok
11037     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
11038     + * Copyright (c) 2003-2007 Stony Brook University
11039     + * Copyright (c) 2003-2007 The Research Foundation of SUNY
11040     + *
11041     + * This program is free software; you can redistribute it and/or modify
11042     + * it under the terms of the GNU General Public License version 2 as
11043     + * published by the Free Software Foundation.
11044     + */
11045     +
11046     +#ifndef _LINUX_UNION_FS_H
11047     +#define _LINUX_UNION_FS_H
11048     +
11049     +/*
11050     + * DEFINITIONS FOR USER AND KERNEL CODE:
11051     + */
11052     +# define UNIONFS_IOCTL_INCGEN _IOR(0x15, 11, int)
11053     +# define UNIONFS_IOCTL_QUERYFILE _IOR(0x15, 15, int)
11054     +
11055     +#endif /* _LINUX_UNIONFS_H */
11056     +