Magellan Linux

Annotation of /trunk/kernel26-magellan/patches-2.6.35-r3/0153-2.6.35.4-unionfs-2.5.5.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1136 - (hide annotations) (download)
Sat Sep 18 11:01:49 2010 UTC (13 years, 8 months ago) by niro
File size: 335964 byte(s)
-2.6.35-magellan-r3; added patch to fix CVE-2010-3301
1 niro 1136 diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
2     index 4303614..5ade4a8 100644
3     --- a/Documentation/filesystems/00-INDEX
4     +++ b/Documentation/filesystems/00-INDEX
5     @@ -112,6 +112,8 @@ udf.txt
6     - info and mount options for the UDF filesystem.
7     ufs.txt
8     - info on the ufs filesystem.
9     +unionfs/
10     + - info on the unionfs filesystem
11     vfat.txt
12     - info on using the VFAT filesystem used in Windows NT and Windows 95
13     vfs.txt
14     diff --git a/Documentation/filesystems/unionfs/00-INDEX b/Documentation/filesystems/unionfs/00-INDEX
15     new file mode 100644
16     index 0000000..96fdf67
17     --- /dev/null
18     +++ b/Documentation/filesystems/unionfs/00-INDEX
19     @@ -0,0 +1,10 @@
20     +00-INDEX
21     + - this file.
22     +concepts.txt
23     + - A brief introduction of concepts.
24     +issues.txt
25     + - A summary of known issues with unionfs.
26     +rename.txt
27     + - Information regarding rename operations.
28     +usage.txt
29     + - Usage information and examples.
30     diff --git a/Documentation/filesystems/unionfs/concepts.txt b/Documentation/filesystems/unionfs/concepts.txt
31     new file mode 100644
32     index 0000000..b853788
33     --- /dev/null
34     +++ b/Documentation/filesystems/unionfs/concepts.txt
35     @@ -0,0 +1,287 @@
36     +Unionfs 2.x CONCEPTS:
37     +=====================
38     +
39     +This file describes the concepts needed by a namespace unification file
40     +system.
41     +
42     +
43     +Branch Priority:
44     +================
45     +
46     +Each branch is assigned a unique priority - starting from 0 (highest
47     +priority). No two branches can have the same priority.
48     +
49     +
50     +Branch Mode:
51     +============
52     +
53     +Each branch is assigned a mode - read-write or read-only. This allows
54     +directories on media mounted read-write to be used in a read-only manner.
55     +
56     +
57     +Whiteouts:
58     +==========
59     +
60     +A whiteout removes a file name from the namespace. Whiteouts are needed when
61     +one attempts to remove a file on a read-only branch.
62     +
63     +Suppose we have a two-branch union, where branch 0 is read-write and branch
64     +1 is read-only. And a file 'foo' on branch 1:
65     +
66     +./b0/
67     +./b1/
68     +./b1/foo
69     +
70     +The unified view would simply be:
71     +
72     +./union/
73     +./union/foo
74     +
75     +Since 'foo' is stored on a read-only branch, it cannot be removed. A
76     +whiteout is used to remove the name 'foo' from the unified namespace. Again,
77     +since branch 1 is read-only, the whiteout cannot be created there. So, we
78     +try on a higher priority (lower numerically) branch and create the whiteout
79     +there.
80     +
81     +./b0/
82     +./b0/.wh.foo
83     +./b1/
84     +./b1/foo
85     +
86     +Later, when Unionfs traverses branches (due to lookup or readdir), it
87     +eliminate 'foo' from the namespace (as well as the whiteout itself.)
88     +
89     +
90     +Opaque Directories:
91     +===================
92     +
93     +Assume we have a unionfs mount comprising of two branches. Branch 0 is
94     +empty; branch 1 has the directory /a and file /a/f. Let's say we mount a
95     +union of branch 0 as read-write and branch 1 as read-only. Now, let's say
96     +we try to perform the following operation in the union:
97     +
98     + rm -fr a
99     +
100     +Because branch 1 is not writable, we cannot physically remove the file /a/f
101     +or the directory /a. So instead, we will create a whiteout in branch 0
102     +named /.wh.a, masking out the name "a" from branch 1. Next, let's say we
103     +try to create a directory named "a" as follows:
104     +
105     + mkdir a
106     +
107     +Because we have a whiteout for "a" already, Unionfs behaves as if "a"
108     +doesn't exist, and thus will delete the whiteout and replace it with an
109     +actual directory named "a".
110     +
111     +The problem now is that if you try to "ls" in the union, Unionfs will
112     +perform is normal directory name unification, for *all* directories named
113     +"a" in all branches. This will cause the file /a/f from branch 1 to
114     +re-appear in the union's namespace, which violates Unix semantics.
115     +
116     +To avoid this problem, we have a different form of whiteouts for
117     +directories, called "opaque directories" (same as BSD Union Mount does).
118     +Whenever we replace a whiteout with a directory, that directory is marked as
119     +opaque. In Unionfs 2.x, it means that we create a file named
120     +/a/.wh.__dir_opaque in branch 0, after having created directory /a there.
121     +When unionfs notices that a directory is opaque, it stops all namespace
122     +operations (including merging readdir contents) at that opaque directory.
123     +This prevents re-exposing names from masked out directories.
124     +
125     +
126     +Duplicate Elimination:
127     +======================
128     +
129     +It is possible for files on different branches to have the same name.
130     +Unionfs then has to select which instance of the file to show to the user.
131     +Given the fact that each branch has a priority associated with it, the
132     +simplest solution is to take the instance from the highest priority
133     +(numerically lowest value) and "hide" the others.
134     +
135     +
136     +Unlinking:
137     +=========
138     +
139     +Unlink operation on non-directory instances is optimized to remove the
140     +maximum possible objects in case multiple underlying branches have the same
141     +file name. The unlink operation will first try to delete file instances
142     +from highest priority branch and then move further to delete from remaining
143     +branches in order of their decreasing priority. Consider a case (F..D..F),
144     +where F is a file and D is a directory of the same name; here, some
145     +intermediate branch could have an empty directory instance with the same
146     +name, so this operation also tries to delete this directory instance and
147     +proceed further to delete from next possible lower priority branch. The
148     +unionfs unlink operation will smoothly delete the files with same name from
149     +all possible underlying branches. In case if some error occurs, it creates
150     +whiteout in highest priority branch that will hide file instance in rest of
151     +the branches. An error could occur either if an unlink operations in any of
152     +the underlying branch failed or if a branch has no write permission.
153     +
154     +This unlinking policy is known as "delete all" and it has the benefit of
155     +overall reducing the number of inodes used by duplicate files, and further
156     +reducing the total number of inodes consumed by whiteouts. The cost is of
157     +extra processing, but testing shows this extra processing is well worth the
158     +savings.
159     +
160     +
161     +Copyup:
162     +=======
163     +
164     +When a change is made to the contents of a file's data or meta-data, they
165     +have to be stored somewhere. The best way is to create a copy of the
166     +original file on a branch that is writable, and then redirect the write
167     +though to this copy. The copy must be made on a higher priority branch so
168     +that lookup and readdir return this newer "version" of the file rather than
169     +the original (see duplicate elimination).
170     +
171     +An entire unionfs mount can be read-only or read-write. If it's read-only,
172     +then none of the branches will be written to, even if some of the branches
173     +are physically writeable. If the unionfs mount is read-write, then the
174     +leftmost (highest priority) branch must be writeable (for copyup to take
175     +place); the remaining branches can be any mix of read-write and read-only.
176     +
177     +In a writeable mount, unionfs will create new files/dir in the leftmost
178     +branch. If one tries to modify a file in a read-only branch/media, unionfs
179     +will copyup the file to the leftmost branch and modify it there. If you try
180     +to modify a file from a writeable branch which is not the leftmost branch,
181     +then unionfs will modify it in that branch; this is useful if you, say,
182     +unify differnet packages (e.g., apache, sendmail, ftpd, etc.) and you want
183     +changes to specific package files to remain logically in the directory where
184     +they came from.
185     +
186     +Cache Coherency:
187     +================
188     +
189     +Unionfs users often want to be able to modify files and directories directly
190     +on the lower branches, and have those changes be visible at the Unionfs
191     +level. This means that data (e.g., pages) and meta-data (dentries, inodes,
192     +open files, etc.) have to be synchronized between the upper and lower
193     +layers. In other words, the newest changes from a layer below have to be
194     +propagated to the Unionfs layer above. If the two layers are not in sync, a
195     +cache incoherency ensues, which could lead to application failures and even
196     +oopses. The Linux kernel, however, has a rather limited set of mechanisms
197     +to ensure this inter-layer cache coherency---so Unionfs has to do most of
198     +the hard work on its own.
199     +
200     +Maintaining Invariants:
201     +
202     +The way Unionfs ensures cache coherency is as follows. At each entry point
203     +to a Unionfs file system method, we call a utility function to validate the
204     +primary objects of this method. Generally, we call unionfs_file_revalidate
205     +on open files, and __unionfs_d_revalidate_chain on dentries (which also
206     +validates inodes). These utility functions check to see whether the upper
207     +Unionfs object is in sync with any of the lower objects that it represents.
208     +The checks we perform include whether the Unionfs superblock has a newer
209     +generation number, or if any of the lower objects mtime's or ctime's are
210     +newer. (Note: generation numbers change when branch-management commands are
211     +issued, so in a way, maintaining cache coherency is also very important for
212     +branch-management.) If indeed we determine that any Unionfs object is no
213     +longer in sync with its lower counterparts, then we rebuild that object
214     +similarly to how we do so for branch-management.
215     +
216     +While rebuilding Unionfs's objects, we also purge any page mappings and
217     +truncate inode pages (see fs/unionfs/dentry.c:purge_inode_data). This is to
218     +ensure that Unionfs will re-get the newer data from the lower branches. We
219     +perform this purging only if the Unionfs operation in question is a reading
220     +operation; if Unionfs is performing a data writing operation (e.g., ->write,
221     +->commit_write, etc.) then we do NOT flush the lower mappings/pages: this is
222     +because (1) a self-deadlock could occur and (2) the upper Unionfs pages are
223     +considered more authoritative anyway, as they are newer and will overwrite
224     +any lower pages.
225     +
226     +Unionfs maintains the following important invariant regarding mtime's,
227     +ctime's, and atime's: the upper inode object's times are the max() of all of
228     +the lower ones. For non-directory objects, there's only one object below,
229     +so the mapping is simple; for directory objects, there could me multiple
230     +lower objects and we have to sync up with the newest one of all the lower
231     +ones. This invariant is important to maintain, especially for directories
232     +(besides, we need this to be POSIX compliant). A union could comprise
233     +multiple writable branches, each of which could change. If we don't reflect
234     +the newest possible mtime/ctime, some applications could fail. For example,
235     +NFSv2/v3 exports check for newer directory mtimes on the server to determine
236     +if the client-side attribute cache should be purged.
237     +
238     +To maintain these important invariants, of course, Unionfs carefully
239     +synchronizes upper and lower times in various places. For example, if we
240     +copy-up a file to a top-level branch, the parent directory where the file
241     +was copied up to will now have a new mtime: so after a successful copy-up,
242     +we sync up with the new top-level branch's parent directory mtime.
243     +
244     +Implementation:
245     +
246     +This cache-coherency implementation is efficient because it defers any
247     +synchronizing between the upper and lower layers until absolutely needed.
248     +Consider the example a common situation where users perform a lot of lower
249     +changes, such as untarring a whole package. While these take place,
250     +typically the user doesn't access the files via Unionfs; only after the
251     +lower changes are done, does the user try to access the lower files. With
252     +our cache-coherency implementation, the entirety of the changes to the lower
253     +branches will not result in a single CPU cycle spent at the Unionfs level
254     +until the user invokes a system call that goes through Unionfs.
255     +
256     +We have considered two alternate cache-coherency designs. (1) Using the
257     +dentry/inode notify functionality to register interest in finding out about
258     +any lower changes. This is a somewhat limited and also a heavy-handed
259     +approach which could result in many notifications to the Unionfs layer upon
260     +each small change at the lower layer (imagine a file being modified multiple
261     +times in rapid succession). (2) Rewriting the VFS to support explicit
262     +callbacks from lower objects to upper objects. We began exploring such an
263     +implementation, but found it to be very complicated--it would have resulted
264     +in massive VFS/MM changes which are unlikely to be accepted by the LKML
265     +community. We therefore believe that our current cache-coherency design and
266     +implementation represent the best approach at this time.
267     +
268     +Limitations:
269     +
270     +Our implementation works in that as long as a user process will have caused
271     +Unionfs to be called, directly or indirectly, even to just do
272     +->d_revalidate; then we will have purged the current Unionfs data and the
273     +process will see the new data. For example, a process that continually
274     +re-reads the same file's data will see the NEW data as soon as the lower
275     +file had changed, upon the next read(2) syscall (even if the file is still
276     +open!) However, this doesn't work when the process re-reads the open file's
277     +data via mmap(2) (unless the user unmaps/closes the file and remaps/reopens
278     +it). Once we respond to ->readpage(s), then the kernel maps the page into
279     +the process's address space and there doesn't appear to be a way to force
280     +the kernel to invalidate those pages/mappings, and force the process to
281     +re-issue ->readpage. If there's a way to invalidate active mappings and
282     +force a ->readpage, let us know please (invalidate_inode_pages2 doesn't do
283     +the trick).
284     +
285     +Our current Unionfs code has to perform many file-revalidation calls. It
286     +would be really nice if the VFS would export an optional file system hook
287     +->file_revalidate (similarly to dentry->d_revalidate) that will be called
288     +before each VFS op that has a "struct file" in it.
289     +
290     +Certain file systems have micro-second granularity (or better) for inode
291     +times, and asynchronous actions could cause those times to change with some
292     +small delay. In such cases, Unionfs may see a changed inode time that only
293     +differs by a tiny fraction of a second: such a change may be a false
294     +positive indication that the lower object has changed, whereas if unionfs
295     +waits a little longer, that false indication will not be seen. (These false
296     +positives are harmless, because they would at most cause unionfs to
297     +re-validate an object that may need no revalidation, and print a debugging
298     +message that clutters the console/logs.) Therefore, to minimize the chances
299     +of these situations, we delay the detection of changed times by a small
300     +factor of a few seconds, called UNIONFS_MIN_CC_TIME (which defaults to 3
301     +seconds, as does NFS). This means that we will detect the change, only a
302     +couple of seconds later, if indeed the time change persists in the lower
303     +file object. This delayed detection has an added performance benefit: we
304     +reduce the number of times that unionfs has to revalidate objects, in case
305     +there's a lot of concurrent activity on both the upper and lower objects,
306     +for the same file(s). Lastly, this delayed time attribute detection is
307     +similar to how NFS clients operate (e.g., acregmin).
308     +
309     +Finally, there is no way currently in Linux to prevent lower directories
310     +from being moved around (i.e., topology changes); there's no way to prevent
311     +modifications to directory sub-trees of whole file systems which are mounted
312     +read-write. It is therefore possible for in-flight operations in unionfs to
313     +take place, while a lower directory is being moved around. Therefore, if
314     +you try to, say, create a new file in a directory through unionfs, while the
315     +directory is being moved around directly, then the new file may get created
316     +in the new location where that directory was moved to. This is a somewhat
317     +similar behaviour in NFS: an NFS client could be creating a new file while
318     +th NFS server is moving th directory around; the file will get successfully
319     +created in the new location. (The one exception in unionfs is that if the
320     +branch is marked read-only by unionfs, then a copyup will take place.)
321     +
322     +For more information, see <http://unionfs.filesystems.org/>.
323     diff --git a/Documentation/filesystems/unionfs/issues.txt b/Documentation/filesystems/unionfs/issues.txt
324     new file mode 100644
325     index 0000000..f4b7e7e
326     --- /dev/null
327     +++ b/Documentation/filesystems/unionfs/issues.txt
328     @@ -0,0 +1,28 @@
329     +KNOWN Unionfs 2.x ISSUES:
330     +=========================
331     +
332     +1. Unionfs should not use lookup_one_len() on the underlying f/s as it
333     + confuses NFSv4. Currently, unionfs_lookup() passes lookup intents to the
334     + lower file-system, this eliminates part of the problem. The remaining
335     + calls to lookup_one_len may need to be changed to pass an intent. We are
336     + currently introducing VFS changes to fs/namei.c's do_path_lookup() to
337     + allow proper file lookup and opening in stackable file systems.
338     +
339     +2. Lockdep (a debugging feature) isn't aware of stacking, and so it
340     + incorrectly complains about locking problems. The problem boils down to
341     + this: Lockdep considers all objects of a certain type to be in the same
342     + class, for example, all inodes. Lockdep doesn't like to see a lock held
343     + on two inodes within the same task, and warns that it could lead to a
344     + deadlock. However, stackable file systems do precisely that: they lock
345     + an upper object, and then a lower object, in a strict order to avoid
346     + locking problems; in addition, Unionfs, as a fan-out file system, may
347     + have to lock several lower inodes. We are currently looking into Lockdep
348     + to see how to make it aware of stackable file systems. For now, we
349     + temporarily disable lockdep when calling vfs methods on lower objects,
350     + but only for those places where lockdep complained. While this solution
351     + may seem unclean, it is not without precedent: other places in the kernel
352     + also do similar temporary disabling, of course after carefully having
353     + checked that it is the right thing to do. Anyway, you get any warnings
354     + from Lockdep, please report them to the Unionfs maintainers.
355     +
356     +For more information, see <http://unionfs.filesystems.org/>.
357     diff --git a/Documentation/filesystems/unionfs/rename.txt b/Documentation/filesystems/unionfs/rename.txt
358     new file mode 100644
359     index 0000000..e20bb82
360     --- /dev/null
361     +++ b/Documentation/filesystems/unionfs/rename.txt
362     @@ -0,0 +1,31 @@
363     +Rename is a complex beast. The following table shows which rename(2) operations
364     +should succeed and which should fail.
365     +
366     +o: success
367     +E: error (either unionfs or vfs)
368     +X: EXDEV
369     +
370     +none = file does not exist
371     +file = file is a file
372     +dir = file is a empty directory
373     +child= file is a non-empty directory
374     +wh = file is a directory containing only whiteouts; this makes it logically
375     + empty
376     +
377     + none file dir child wh
378     +file o o E E E
379     +dir o E o E o
380     +child X E X E X
381     +wh o E o E o
382     +
383     +
384     +Renaming directories:
385     +=====================
386     +
387     +Whenever a empty (either physically or logically) directory is being renamed,
388     +the following sequence of events should take place:
389     +
390     +1) Remove whiteouts from both source and destination directory
391     +2) Rename source to destination
392     +3) Make destination opaque to prevent anything under it from showing up
393     +
394     diff --git a/Documentation/filesystems/unionfs/usage.txt b/Documentation/filesystems/unionfs/usage.txt
395     new file mode 100644
396     index 0000000..1adde69
397     --- /dev/null
398     +++ b/Documentation/filesystems/unionfs/usage.txt
399     @@ -0,0 +1,134 @@
400     +Unionfs is a stackable unification file system, which can appear to merge
401     +the contents of several directories (branches), while keeping their physical
402     +content separate. Unionfs is useful for unified source tree management,
403     +merged contents of split CD-ROM, merged separate software package
404     +directories, data grids, and more. Unionfs allows any mix of read-only and
405     +read-write branches, as well as insertion and deletion of branches anywhere
406     +in the fan-out. To maintain Unix semantics, Unionfs handles elimination of
407     +duplicates, partial-error conditions, and more.
408     +
409     +GENERAL SYNTAX
410     +==============
411     +
412     +# mount -t unionfs -o <OPTIONS>,<BRANCH-OPTIONS> none MOUNTPOINT
413     +
414     +OPTIONS can be any legal combination of:
415     +
416     +- ro # mount file system read-only
417     +- rw # mount file system read-write
418     +- remount # remount the file system (see Branch Management below)
419     +- incgen # increment generation no. (see Cache Consistency below)
420     +
421     +BRANCH-OPTIONS can be either (1) a list of branches given to the "dirs="
422     +option, or (2) a list of individual branch manipulation commands, combined
423     +with the "remount" option, and is further described in the "Branch
424     +Management" section below.
425     +
426     +The syntax for the "dirs=" mount option is:
427     +
428     + dirs=branch[=ro|=rw][:...]
429     +
430     +The "dirs=" option takes a colon-delimited list of directories to compose
431     +the union, with an optional branch mode for each of those directories.
432     +Directories that come earlier (specified first, on the left) in the list
433     +have a higher precedence than those which come later. Additionally,
434     +read-only or read-write permissions of the branch can be specified by
435     +appending =ro or =rw (default) to each directory. See the Copyup section in
436     +concepts.txt, for a description of Unionfs's behavior when mixing read-only
437     +and read-write branches and mounts.
438     +
439     +Syntax:
440     +
441     + dirs=/branch1[=ro|=rw]:/branch2[=ro|=rw]:...:/branchN[=ro|=rw]
442     +
443     +Example:
444     +
445     + dirs=/writable_branch=rw:/read-only_branch=ro
446     +
447     +
448     +BRANCH MANAGEMENT
449     +=================
450     +
451     +Once you mount your union for the first time, using the "dirs=" option, you
452     +can then change the union's overall mode or reconfigure the branches, using
453     +the remount option, as follows.
454     +
455     +To downgrade a union from read-write to read-only:
456     +
457     +# mount -t unionfs -o remount,ro none MOUNTPOINT
458     +
459     +To upgrade a union from read-only to read-write:
460     +
461     +# mount -t unionfs -o remount,rw none MOUNTPOINT
462     +
463     +To delete a branch /foo, regardless where it is in the current union:
464     +
465     +# mount -t unionfs -o remount,del=/foo none MOUNTPOINT
466     +
467     +To insert (add) a branch /foo before /bar:
468     +
469     +# mount -t unionfs -o remount,add=/bar:/foo none MOUNTPOINT
470     +
471     +To insert (add) a branch /foo (with the "rw" mode flag) before /bar:
472     +
473     +# mount -t unionfs -o remount,add=/bar:/foo=rw none MOUNTPOINT
474     +
475     +To insert (add) a branch /foo (in "rw" mode) at the very beginning (i.e., a
476     +new highest-priority branch), you can use the above syntax, or use a short
477     +hand version as follows:
478     +
479     +# mount -t unionfs -o remount,add=/foo none MOUNTPOINT
480     +
481     +To append a branch to the very end (new lowest-priority branch):
482     +
483     +# mount -t unionfs -o remount,add=:/foo none MOUNTPOINT
484     +
485     +To append a branch to the very end (new lowest-priority branch), in
486     +read-only mode:
487     +
488     +# mount -t unionfs -o remount,add=:/foo=ro none MOUNTPOINT
489     +
490     +Finally, to change the mode of one existing branch, say /foo, from read-only
491     +to read-write, and change /bar from read-write to read-only:
492     +
493     +# mount -t unionfs -o remount,mode=/foo=rw,mode=/bar=ro none MOUNTPOINT
494     +
495     +Note: in Unionfs 2.x, you cannot set the leftmost branch to readonly because
496     +then Unionfs won't have any writable place for copyups to take place.
497     +Moreover, the VFS can get confused when it tries to modify something in a
498     +file system mounted read-write, but isn't permitted to write to it.
499     +Instead, you should set the whole union as readonly, as described above.
500     +If, however, you must set the leftmost branch as readonly, perhaps so you
501     +can get a snapshot of it at a point in time, then you should insert a new
502     +writable top-level branch, and mark the one you want as readonly. This can
503     +be accomplished as follows, assuming that /foo is your current leftmost
504     +branch:
505     +
506     +# mount -t tmpfs -o size=NNN /new
507     +# mount -t unionfs -o remount,add=/new,mode=/foo=ro none MOUNTPOINT
508     +<do what you want safely in /foo>
509     +# mount -t unionfs -o remount,del=/new,mode=/foo=rw none MOUNTPOINT
510     +<check if there's anything in /new you want to preserve>
511     +# umount /new
512     +
513     +CACHE CONSISTENCY
514     +=================
515     +
516     +If you modify any file on any of the lower branches directly, while there is
517     +a Unionfs 2.x mounted above any of those branches, you should tell Unionfs
518     +to purge its caches and re-get the objects. To do that, you have to
519     +increment the generation number of the superblock using the following
520     +command:
521     +
522     +# mount -t unionfs -o remount,incgen none MOUNTPOINT
523     +
524     +Note that the older way of incrementing the generation number using an
525     +ioctl, is no longer supported in Unionfs 2.0 and newer. Ioctls in general
526     +are not encouraged. Plus, an ioctl is per-file concept, whereas the
527     +generation number is a per-file-system concept. Worse, such an ioctl
528     +requires an open file, which then has to be invalidated by the very nature
529     +of the generation number increase (read: the old generation increase ioctl
530     +was pretty racy).
531     +
532     +
533     +For more information, see <http://unionfs.filesystems.org/>.
534     diff --git a/MAINTAINERS b/MAINTAINERS
535     index 02f75fc..8c5efe7 100644
536     --- a/MAINTAINERS
537     +++ b/MAINTAINERS
538     @@ -5766,6 +5766,14 @@ F: Documentation/cdrom/
539     F: drivers/cdrom/cdrom.c
540     F: include/linux/cdrom.h
541    
542     +UNIONFS
543     +P: Erez Zadok
544     +M: ezk@cs.sunysb.edu
545     +L: unionfs@filesystems.org
546     +W: http://unionfs.filesystems.org/
547     +T: git git.kernel.org/pub/scm/linux/kernel/git/ezk/unionfs.git
548     +S: Maintained
549     +
550     UNSORTED BLOCK IMAGES (UBI)
551     M: Artem Bityutskiy <dedekind1@gmail.com>
552     W: http://www.linux-mtd.infradead.org/
553     diff --git a/fs/Kconfig b/fs/Kconfig
554     index 5f85b59..7b4501b 100644
555     --- a/fs/Kconfig
556     +++ b/fs/Kconfig
557     @@ -169,6 +169,7 @@ if MISC_FILESYSTEMS
558     source "fs/adfs/Kconfig"
559     source "fs/affs/Kconfig"
560     source "fs/ecryptfs/Kconfig"
561     +source "fs/unionfs/Kconfig"
562     source "fs/hfs/Kconfig"
563     source "fs/hfsplus/Kconfig"
564     source "fs/befs/Kconfig"
565     diff --git a/fs/Makefile b/fs/Makefile
566     index e6ec1d3..787332e 100644
567     --- a/fs/Makefile
568     +++ b/fs/Makefile
569     @@ -84,6 +84,7 @@ obj-$(CONFIG_ISO9660_FS) += isofs/
570     obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+
571     obj-$(CONFIG_HFS_FS) += hfs/
572     obj-$(CONFIG_ECRYPT_FS) += ecryptfs/
573     +obj-$(CONFIG_UNION_FS) += unionfs/
574     obj-$(CONFIG_VXFS_FS) += freevxfs/
575     obj-$(CONFIG_NFS_FS) += nfs/
576     obj-$(CONFIG_EXPORTFS) += exportfs/
577     diff --git a/fs/namei.c b/fs/namei.c
578     index 868d0cb..b5e09e1 100644
579     --- a/fs/namei.c
580     +++ b/fs/namei.c
581     @@ -386,6 +386,7 @@ void release_open_intent(struct nameidata *nd)
582     else
583     fput(nd->intent.open.file);
584     }
585     +EXPORT_SYMBOL_GPL(release_open_intent);
586    
587     static inline struct dentry *
588     do_revalidate(struct dentry *dentry, struct nameidata *nd)
589     diff --git a/fs/splice.c b/fs/splice.c
590     index efdbfec..1ff6bca 100644
591     --- a/fs/splice.c
592     +++ b/fs/splice.c
593     @@ -1104,8 +1104,8 @@ EXPORT_SYMBOL(generic_splice_sendpage);
594     /*
595     * Attempt to initiate a splice from pipe to file.
596     */
597     -static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
598     - loff_t *ppos, size_t len, unsigned int flags)
599     +long vfs_splice_from(struct pipe_inode_info *pipe, struct file *out,
600     + loff_t *ppos, size_t len, unsigned int flags)
601     {
602     ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
603     loff_t *, size_t, unsigned int);
604     @@ -1128,13 +1128,14 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
605    
606     return splice_write(pipe, out, ppos, len, flags);
607     }
608     +EXPORT_SYMBOL_GPL(vfs_splice_from);
609    
610     /*
611     * Attempt to initiate a splice from a file to a pipe.
612     */
613     -static long do_splice_to(struct file *in, loff_t *ppos,
614     - struct pipe_inode_info *pipe, size_t len,
615     - unsigned int flags)
616     +long vfs_splice_to(struct file *in, loff_t *ppos,
617     + struct pipe_inode_info *pipe, size_t len,
618     + unsigned int flags)
619     {
620     ssize_t (*splice_read)(struct file *, loff_t *,
621     struct pipe_inode_info *, size_t, unsigned int);
622     @@ -1154,6 +1155,7 @@ static long do_splice_to(struct file *in, loff_t *ppos,
623    
624     return splice_read(in, ppos, pipe, len, flags);
625     }
626     +EXPORT_SYMBOL_GPL(vfs_splice_to);
627    
628     /**
629     * splice_direct_to_actor - splices data directly between two non-pipes
630     @@ -1223,7 +1225,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
631     size_t read_len;
632     loff_t pos = sd->pos, prev_pos = pos;
633    
634     - ret = do_splice_to(in, &pos, pipe, len, flags);
635     + ret = vfs_splice_to(in, &pos, pipe, len, flags);
636     if (unlikely(ret <= 0))
637     goto out_release;
638    
639     @@ -1282,8 +1284,8 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,
640     {
641     struct file *file = sd->u.file;
642    
643     - return do_splice_from(pipe, file, &file->f_pos, sd->total_len,
644     - sd->flags);
645     + return vfs_splice_from(pipe, file, &file->f_pos, sd->total_len,
646     + sd->flags);
647     }
648    
649     /**
650     @@ -1380,7 +1382,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
651     } else
652     off = &out->f_pos;
653    
654     - ret = do_splice_from(ipipe, out, off, len, flags);
655     + ret = vfs_splice_from(ipipe, out, off, len, flags);
656    
657     if (off_out && copy_to_user(off_out, off, sizeof(loff_t)))
658     ret = -EFAULT;
659     @@ -1400,7 +1402,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
660     } else
661     off = &in->f_pos;
662    
663     - ret = do_splice_to(in, off, opipe, len, flags);
664     + ret = vfs_splice_to(in, off, opipe, len, flags);
665    
666     if (off_in && copy_to_user(off_in, off, sizeof(loff_t)))
667     ret = -EFAULT;
668     diff --git a/fs/stack.c b/fs/stack.c
669     index 4a6f7f4..7eeef12 100644
670     --- a/fs/stack.c
671     +++ b/fs/stack.c
672     @@ -1,8 +1,20 @@
673     +/*
674     + * Copyright (c) 2006-2009 Erez Zadok
675     + * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
676     + * Copyright (c) 2006-2009 Stony Brook University
677     + * Copyright (c) 2006-2009 The Research Foundation of SUNY
678     + *
679     + * This program is free software; you can redistribute it and/or modify
680     + * it under the terms of the GNU General Public License version 2 as
681     + * published by the Free Software Foundation.
682     + */
683     +
684     #include <linux/module.h>
685     #include <linux/fs.h>
686     #include <linux/fs_stack.h>
687    
688     -/* does _NOT_ require i_mutex to be held.
689     +/*
690     + * does _NOT_ require i_mutex to be held.
691     *
692     * This function cannot be inlined since i_size_{read,write} is rather
693     * heavy-weight on 32-bit systems
694     diff --git a/fs/unionfs/Kconfig b/fs/unionfs/Kconfig
695     new file mode 100644
696     index 0000000..f3c1ac4
697     --- /dev/null
698     +++ b/fs/unionfs/Kconfig
699     @@ -0,0 +1,24 @@
700     +config UNION_FS
701     + tristate "Union file system (EXPERIMENTAL)"
702     + depends on EXPERIMENTAL
703     + help
704     + Unionfs is a stackable unification file system, which appears to
705     + merge the contents of several directories (branches), while keeping
706     + their physical content separate.
707     +
708     + See <http://unionfs.filesystems.org> for details
709     +
710     +config UNION_FS_XATTR
711     + bool "Unionfs extended attributes"
712     + depends on UNION_FS
713     + help
714     + Extended attributes are name:value pairs associated with inodes by
715     + the kernel or by users (see the attr(5) manual page).
716     +
717     + If unsure, say N.
718     +
719     +config UNION_FS_DEBUG
720     + bool "Debug Unionfs"
721     + depends on UNION_FS
722     + help
723     + If you say Y here, you can turn on debugging output from Unionfs.
724     diff --git a/fs/unionfs/Makefile b/fs/unionfs/Makefile
725     new file mode 100644
726     index 0000000..c30b01c
727     --- /dev/null
728     +++ b/fs/unionfs/Makefile
729     @@ -0,0 +1,17 @@
730     +UNIONFS_VERSION="2.5.5 (for 2.6.35.1)"
731     +
732     +EXTRA_CFLAGS += -DUNIONFS_VERSION=\"$(UNIONFS_VERSION)\"
733     +
734     +obj-$(CONFIG_UNION_FS) += unionfs.o
735     +
736     +unionfs-y := subr.o dentry.o file.o inode.o main.o super.o \
737     + rdstate.o copyup.o dirhelper.o rename.o unlink.o \
738     + lookup.o commonfops.o dirfops.o sioq.o mmap.o whiteout.o
739     +
740     +unionfs-$(CONFIG_UNION_FS_XATTR) += xattr.o
741     +
742     +unionfs-$(CONFIG_UNION_FS_DEBUG) += debug.o
743     +
744     +ifeq ($(CONFIG_UNION_FS_DEBUG),y)
745     +EXTRA_CFLAGS += -DDEBUG
746     +endif
747     diff --git a/fs/unionfs/commonfops.c b/fs/unionfs/commonfops.c
748     new file mode 100644
749     index 0000000..740c4ad
750     --- /dev/null
751     +++ b/fs/unionfs/commonfops.c
752     @@ -0,0 +1,896 @@
753     +/*
754     + * Copyright (c) 2003-2010 Erez Zadok
755     + * Copyright (c) 2003-2006 Charles P. Wright
756     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
757     + * Copyright (c) 2005-2006 Junjiro Okajima
758     + * Copyright (c) 2005 Arun M. Krishnakumar
759     + * Copyright (c) 2004-2006 David P. Quigley
760     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
761     + * Copyright (c) 2003 Puja Gupta
762     + * Copyright (c) 2003 Harikesavan Krishnan
763     + * Copyright (c) 2003-2010 Stony Brook University
764     + * Copyright (c) 2003-2010 The Research Foundation of SUNY
765     + *
766     + * This program is free software; you can redistribute it and/or modify
767     + * it under the terms of the GNU General Public License version 2 as
768     + * published by the Free Software Foundation.
769     + */
770     +
771     +#include "union.h"
772     +
773     +/*
774     + * 1) Copyup the file
775     + * 2) Rename the file to '.unionfs<original inode#><counter>' - obviously
776     + * stolen from NFS's silly rename
777     + */
778     +static int copyup_deleted_file(struct file *file, struct dentry *dentry,
779     + struct dentry *parent, int bstart, int bindex)
780     +{
781     + static unsigned int counter;
782     + const int i_inosize = sizeof(dentry->d_inode->i_ino) * 2;
783     + const int countersize = sizeof(counter) * 2;
784     + const int nlen = sizeof(".unionfs") + i_inosize + countersize - 1;
785     + char name[nlen + 1];
786     + int err;
787     + struct dentry *tmp_dentry = NULL;
788     + struct dentry *lower_dentry;
789     + struct dentry *lower_dir_dentry = NULL;
790     +
791     + lower_dentry = unionfs_lower_dentry_idx(dentry, bstart);
792     +
793     + sprintf(name, ".unionfs%*.*lx",
794     + i_inosize, i_inosize, lower_dentry->d_inode->i_ino);
795     +
796     + /*
797     + * Loop, looking for an unused temp name to copyup to.
798     + *
799     + * It's somewhat silly that we look for a free temp tmp name in the
800     + * source branch (bstart) instead of the dest branch (bindex), where
801     + * the final name will be created. We _will_ catch it if somehow
802     + * the name exists in the dest branch, but it'd be nice to catch it
803     + * sooner than later.
804     + */
805     +retry:
806     + tmp_dentry = NULL;
807     + do {
808     + char *suffix = name + nlen - countersize;
809     +
810     + dput(tmp_dentry);
811     + counter++;
812     + sprintf(suffix, "%*.*x", countersize, countersize, counter);
813     +
814     + pr_debug("unionfs: trying to rename %s to %s\n",
815     + dentry->d_name.name, name);
816     +
817     + tmp_dentry = lookup_lck_len(name, lower_dentry->d_parent,
818     + nlen);
819     + if (IS_ERR(tmp_dentry)) {
820     + err = PTR_ERR(tmp_dentry);
821     + goto out;
822     + }
823     + } while (tmp_dentry->d_inode != NULL); /* need negative dentry */
824     + dput(tmp_dentry);
825     +
826     + err = copyup_named_file(parent->d_inode, file, name, bstart, bindex,
827     + i_size_read(file->f_path.dentry->d_inode));
828     + if (err) {
829     + if (unlikely(err == -EEXIST))
830     + goto retry;
831     + goto out;
832     + }
833     +
834     + /* bring it to the same state as an unlinked file */
835     + lower_dentry = unionfs_lower_dentry_idx(dentry, dbstart(dentry));
836     + if (!unionfs_lower_inode_idx(dentry->d_inode, bindex)) {
837     + atomic_inc(&lower_dentry->d_inode->i_count);
838     + unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
839     + lower_dentry->d_inode);
840     + }
841     + lower_dir_dentry = lock_parent(lower_dentry);
842     + err = vfs_unlink(lower_dir_dentry->d_inode, lower_dentry);
843     + unlock_dir(lower_dir_dentry);
844     +
845     +out:
846     + if (!err)
847     + unionfs_check_dentry(dentry);
848     + return err;
849     +}
850     +
851     +/*
852     + * put all references held by upper struct file and free lower file pointer
853     + * array
854     + */
855     +static void cleanup_file(struct file *file)
856     +{
857     + int bindex, bstart, bend;
858     + struct file **lower_files;
859     + struct file *lower_file;
860     + struct super_block *sb = file->f_path.dentry->d_sb;
861     +
862     + lower_files = UNIONFS_F(file)->lower_files;
863     + bstart = fbstart(file);
864     + bend = fbend(file);
865     +
866     + for (bindex = bstart; bindex <= bend; bindex++) {
867     + int i; /* holds (possibly) updated branch index */
868     + int old_bid;
869     +
870     + lower_file = unionfs_lower_file_idx(file, bindex);
871     + if (!lower_file)
872     + continue;
873     +
874     + /*
875     + * Find new index of matching branch with an open
876     + * file, since branches could have been added or
877     + * deleted causing the one with open files to shift.
878     + */
879     + old_bid = UNIONFS_F(file)->saved_branch_ids[bindex];
880     + i = branch_id_to_idx(sb, old_bid);
881     + if (unlikely(i < 0)) {
882     + printk(KERN_ERR "unionfs: no superblock for "
883     + "file %p\n", file);
884     + continue;
885     + }
886     +
887     + /* decrement count of open files */
888     + branchput(sb, i);
889     + /*
890     + * fput will perform an mntput for us on the correct branch.
891     + * Although we're using the file's old branch configuration,
892     + * bindex, which is the old index, correctly points to the
893     + * right branch in the file's branch list. In other words,
894     + * we're going to mntput the correct branch even if branches
895     + * have been added/removed.
896     + */
897     + fput(lower_file);
898     + UNIONFS_F(file)->lower_files[bindex] = NULL;
899     + UNIONFS_F(file)->saved_branch_ids[bindex] = -1;
900     + }
901     +
902     + UNIONFS_F(file)->lower_files = NULL;
903     + kfree(lower_files);
904     + kfree(UNIONFS_F(file)->saved_branch_ids);
905     + /* set to NULL because caller needs to know if to kfree on error */
906     + UNIONFS_F(file)->saved_branch_ids = NULL;
907     +}
908     +
909     +/* open all lower files for a given file */
910     +static int open_all_files(struct file *file)
911     +{
912     + int bindex, bstart, bend, err = 0;
913     + struct file *lower_file;
914     + struct dentry *lower_dentry;
915     + struct dentry *dentry = file->f_path.dentry;
916     + struct super_block *sb = dentry->d_sb;
917     +
918     + bstart = dbstart(dentry);
919     + bend = dbend(dentry);
920     +
921     + for (bindex = bstart; bindex <= bend; bindex++) {
922     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
923     + if (!lower_dentry)
924     + continue;
925     +
926     + dget(lower_dentry);
927     + unionfs_mntget(dentry, bindex);
928     + branchget(sb, bindex);
929     +
930     + lower_file =
931     + dentry_open(lower_dentry,
932     + unionfs_lower_mnt_idx(dentry, bindex),
933     + file->f_flags, current_cred());
934     + if (IS_ERR(lower_file)) {
935     + branchput(sb, bindex);
936     + err = PTR_ERR(lower_file);
937     + goto out;
938     + } else {
939     + unionfs_set_lower_file_idx(file, bindex, lower_file);
940     + }
941     + }
942     +out:
943     + return err;
944     +}
945     +
946     +/* open the highest priority file for a given upper file */
947     +static int open_highest_file(struct file *file, bool willwrite)
948     +{
949     + int bindex, bstart, bend, err = 0;
950     + struct file *lower_file;
951     + struct dentry *lower_dentry;
952     + struct dentry *dentry = file->f_path.dentry;
953     + struct dentry *parent = dget_parent(dentry);
954     + struct inode *parent_inode = parent->d_inode;
955     + struct super_block *sb = dentry->d_sb;
956     +
957     + bstart = dbstart(dentry);
958     + bend = dbend(dentry);
959     +
960     + lower_dentry = unionfs_lower_dentry(dentry);
961     + if (willwrite && IS_WRITE_FLAG(file->f_flags) && is_robranch(dentry)) {
962     + for (bindex = bstart - 1; bindex >= 0; bindex--) {
963     + err = copyup_file(parent_inode, file, bstart, bindex,
964     + i_size_read(dentry->d_inode));
965     + if (!err)
966     + break;
967     + }
968     + atomic_set(&UNIONFS_F(file)->generation,
969     + atomic_read(&UNIONFS_I(dentry->d_inode)->
970     + generation));
971     + goto out;
972     + }
973     +
974     + dget(lower_dentry);
975     + unionfs_mntget(dentry, bstart);
976     + lower_file = dentry_open(lower_dentry,
977     + unionfs_lower_mnt_idx(dentry, bstart),
978     + file->f_flags, current_cred());
979     + if (IS_ERR(lower_file)) {
980     + err = PTR_ERR(lower_file);
981     + goto out;
982     + }
983     + branchget(sb, bstart);
984     + unionfs_set_lower_file(file, lower_file);
985     + /* Fix up the position. */
986     + lower_file->f_pos = file->f_pos;
987     +
988     + memcpy(&lower_file->f_ra, &file->f_ra, sizeof(struct file_ra_state));
989     +out:
990     + dput(parent);
991     + return err;
992     +}
993     +
994     +/* perform a delayed copyup of a read-write file on a read-only branch */
995     +static int do_delayed_copyup(struct file *file, struct dentry *parent)
996     +{
997     + int bindex, bstart, bend, err = 0;
998     + struct dentry *dentry = file->f_path.dentry;
999     + struct inode *parent_inode = parent->d_inode;
1000     +
1001     + bstart = fbstart(file);
1002     + bend = fbend(file);
1003     +
1004     + BUG_ON(!S_ISREG(dentry->d_inode->i_mode));
1005     +
1006     + unionfs_check_file(file);
1007     + for (bindex = bstart - 1; bindex >= 0; bindex--) {
1008     + if (!d_deleted(dentry))
1009     + err = copyup_file(parent_inode, file, bstart,
1010     + bindex,
1011     + i_size_read(dentry->d_inode));
1012     + else
1013     + err = copyup_deleted_file(file, dentry, parent,
1014     + bstart, bindex);
1015     + /* if succeeded, set lower open-file flags and break */
1016     + if (!err) {
1017     + struct file *lower_file;
1018     + lower_file = unionfs_lower_file_idx(file, bindex);
1019     + lower_file->f_flags = file->f_flags;
1020     + break;
1021     + }
1022     + }
1023     + if (err || (bstart <= fbstart(file)))
1024     + goto out;
1025     + bend = fbend(file);
1026     + for (bindex = bstart; bindex <= bend; bindex++) {
1027     + if (unionfs_lower_file_idx(file, bindex)) {
1028     + branchput(dentry->d_sb, bindex);
1029     + fput(unionfs_lower_file_idx(file, bindex));
1030     + unionfs_set_lower_file_idx(file, bindex, NULL);
1031     + }
1032     + }
1033     + path_put_lowers(dentry, bstart, bend, false);
1034     + iput_lowers(dentry->d_inode, bstart, bend, false);
1035     + /* for reg file, we only open it "once" */
1036     + fbend(file) = fbstart(file);
1037     + dbend(dentry) = dbstart(dentry);
1038     + ibend(dentry->d_inode) = ibstart(dentry->d_inode);
1039     +
1040     +out:
1041     + unionfs_check_file(file);
1042     + return err;
1043     +}
1044     +
1045     +/*
1046     + * Helper function for unionfs_file_revalidate/locked.
1047     + * Expects dentry/parent to be locked already, and revalidated.
1048     + */
1049     +static int __unionfs_file_revalidate(struct file *file, struct dentry *dentry,
1050     + struct dentry *parent,
1051     + struct super_block *sb, int sbgen,
1052     + int dgen, bool willwrite)
1053     +{
1054     + int fgen;
1055     + int bstart, bend, orig_brid;
1056     + int size;
1057     + int err = 0;
1058     +
1059     + fgen = atomic_read(&UNIONFS_F(file)->generation);
1060     +
1061     + /*
1062     + * There are two cases we are interested in. The first is if the
1063     + * generation is lower than the super-block. The second is if
1064     + * someone has copied up this file from underneath us, we also need
1065     + * to refresh things.
1066     + */
1067     + if (d_deleted(dentry) ||
1068     + (sbgen <= fgen &&
1069     + dbstart(dentry) == fbstart(file) &&
1070     + unionfs_lower_file(file)))
1071     + goto out_may_copyup;
1072     +
1073     + /* save orig branch ID */
1074     + orig_brid = UNIONFS_F(file)->saved_branch_ids[fbstart(file)];
1075     +
1076     + /* First we throw out the existing files. */
1077     + cleanup_file(file);
1078     +
1079     + /* Now we reopen the file(s) as in unionfs_open. */
1080     + bstart = fbstart(file) = dbstart(dentry);
1081     + bend = fbend(file) = dbend(dentry);
1082     +
1083     + size = sizeof(struct file *) * sbmax(sb);
1084     + UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL);
1085     + if (unlikely(!UNIONFS_F(file)->lower_files)) {
1086     + err = -ENOMEM;
1087     + goto out;
1088     + }
1089     + size = sizeof(int) * sbmax(sb);
1090     + UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL);
1091     + if (unlikely(!UNIONFS_F(file)->saved_branch_ids)) {
1092     + err = -ENOMEM;
1093     + goto out;
1094     + }
1095     +
1096     + if (S_ISDIR(dentry->d_inode->i_mode)) {
1097     + /* We need to open all the files. */
1098     + err = open_all_files(file);
1099     + if (err)
1100     + goto out;
1101     + } else {
1102     + int new_brid;
1103     + /* We only open the highest priority branch. */
1104     + err = open_highest_file(file, willwrite);
1105     + if (err)
1106     + goto out;
1107     + new_brid = UNIONFS_F(file)->saved_branch_ids[fbstart(file)];
1108     + if (unlikely(new_brid != orig_brid && sbgen > fgen)) {
1109     + /*
1110     + * If we re-opened the file on a different branch
1111     + * than the original one, and this was due to a new
1112     + * branch inserted, then update the mnt counts of
1113     + * the old and new branches accordingly.
1114     + */
1115     + unionfs_mntget(dentry, bstart);
1116     + unionfs_mntput(sb->s_root,
1117     + branch_id_to_idx(sb, orig_brid));
1118     + }
1119     + /* regular files have only one open lower file */
1120     + fbend(file) = fbstart(file);
1121     + }
1122     + atomic_set(&UNIONFS_F(file)->generation,
1123     + atomic_read(&UNIONFS_I(dentry->d_inode)->generation));
1124     +
1125     +out_may_copyup:
1126     + /* Copyup on the first write to a file on a readonly branch. */
1127     + if (willwrite && IS_WRITE_FLAG(file->f_flags) &&
1128     + !IS_WRITE_FLAG(unionfs_lower_file(file)->f_flags) &&
1129     + is_robranch(dentry)) {
1130     + pr_debug("unionfs: do delay copyup of \"%s\"\n",
1131     + dentry->d_name.name);
1132     + err = do_delayed_copyup(file, parent);
1133     + /* regular files have only one open lower file */
1134     + if (!err && !S_ISDIR(dentry->d_inode->i_mode))
1135     + fbend(file) = fbstart(file);
1136     + }
1137     +
1138     +out:
1139     + if (err) {
1140     + kfree(UNIONFS_F(file)->lower_files);
1141     + kfree(UNIONFS_F(file)->saved_branch_ids);
1142     + }
1143     + return err;
1144     +}
1145     +
1146     +/*
1147     + * Revalidate the struct file
1148     + * @file: file to revalidate
1149     + * @parent: parent dentry (locked by caller)
1150     + * @willwrite: true if caller may cause changes to the file; false otherwise.
1151     + * Caller must lock/unlock dentry's branch configuration.
1152     + */
1153     +int unionfs_file_revalidate(struct file *file, struct dentry *parent,
1154     + bool willwrite)
1155     +{
1156     + struct super_block *sb;
1157     + struct dentry *dentry;
1158     + int sbgen, dgen;
1159     + int err = 0;
1160     +
1161     + dentry = file->f_path.dentry;
1162     + sb = dentry->d_sb;
1163     + verify_locked(dentry);
1164     + verify_locked(parent);
1165     +
1166     + /*
1167     + * First revalidate the dentry inside struct file,
1168     + * but not unhashed dentries.
1169     + */
1170     + if (!d_deleted(dentry) &&
1171     + !__unionfs_d_revalidate(dentry, parent, willwrite)) {
1172     + err = -ESTALE;
1173     + goto out;
1174     + }
1175     +
1176     + sbgen = atomic_read(&UNIONFS_SB(sb)->generation);
1177     + dgen = atomic_read(&UNIONFS_D(dentry)->generation);
1178     +
1179     + if (unlikely(sbgen > dgen)) { /* XXX: should never happen */
1180     + pr_debug("unionfs: failed to revalidate dentry (%s)\n",
1181     + dentry->d_name.name);
1182     + err = -ESTALE;
1183     + goto out;
1184     + }
1185     +
1186     + err = __unionfs_file_revalidate(file, dentry, parent, sb,
1187     + sbgen, dgen, willwrite);
1188     +out:
1189     + return err;
1190     +}
1191     +
1192     +/* unionfs_open helper function: open a directory */
1193     +static int __open_dir(struct inode *inode, struct file *file)
1194     +{
1195     + struct dentry *lower_dentry;
1196     + struct file *lower_file;
1197     + int bindex, bstart, bend;
1198     + struct vfsmount *mnt;
1199     +
1200     + bstart = fbstart(file) = dbstart(file->f_path.dentry);
1201     + bend = fbend(file) = dbend(file->f_path.dentry);
1202     +
1203     + for (bindex = bstart; bindex <= bend; bindex++) {
1204     + lower_dentry =
1205     + unionfs_lower_dentry_idx(file->f_path.dentry, bindex);
1206     + if (!lower_dentry)
1207     + continue;
1208     +
1209     + dget(lower_dentry);
1210     + unionfs_mntget(file->f_path.dentry, bindex);
1211     + mnt = unionfs_lower_mnt_idx(file->f_path.dentry, bindex);
1212     + lower_file = dentry_open(lower_dentry, mnt, file->f_flags,
1213     + current_cred());
1214     + if (IS_ERR(lower_file))
1215     + return PTR_ERR(lower_file);
1216     +
1217     + unionfs_set_lower_file_idx(file, bindex, lower_file);
1218     +
1219     + /*
1220     + * The branchget goes after the open, because otherwise
1221     + * we would miss the reference on release.
1222     + */
1223     + branchget(inode->i_sb, bindex);
1224     + }
1225     +
1226     + return 0;
1227     +}
1228     +
1229     +/* unionfs_open helper function: open a file */
1230     +static int __open_file(struct inode *inode, struct file *file,
1231     + struct dentry *parent)
1232     +{
1233     + struct dentry *lower_dentry;
1234     + struct file *lower_file;
1235     + int lower_flags;
1236     + int bindex, bstart, bend;
1237     +
1238     + lower_dentry = unionfs_lower_dentry(file->f_path.dentry);
1239     + lower_flags = file->f_flags;
1240     +
1241     + bstart = fbstart(file) = dbstart(file->f_path.dentry);
1242     + bend = fbend(file) = dbend(file->f_path.dentry);
1243     +
1244     + /*
1245     + * check for the permission for lower file. If the error is
1246     + * COPYUP_ERR, copyup the file.
1247     + */
1248     + if (lower_dentry->d_inode && is_robranch(file->f_path.dentry)) {
1249     + /*
1250     + * if the open will change the file, copy it up otherwise
1251     + * defer it.
1252     + */
1253     + if (lower_flags & O_TRUNC) {
1254     + int size = 0;
1255     + int err = -EROFS;
1256     +
1257     + /* copyup the file */
1258     + for (bindex = bstart - 1; bindex >= 0; bindex--) {
1259     + err = copyup_file(parent->d_inode, file,
1260     + bstart, bindex, size);
1261     + if (!err)
1262     + break;
1263     + }
1264     + return err;
1265     + } else {
1266     + /*
1267     + * turn off writeable flags, to force delayed copyup
1268     + * by caller.
1269     + */
1270     + lower_flags &= ~(OPEN_WRITE_FLAGS);
1271     + }
1272     + }
1273     +
1274     + dget(lower_dentry);
1275     +
1276     + /*
1277     + * dentry_open will decrement mnt refcnt if err.
1278     + * otherwise fput() will do an mntput() for us upon file close.
1279     + */
1280     + unionfs_mntget(file->f_path.dentry, bstart);
1281     + lower_file =
1282     + dentry_open(lower_dentry,
1283     + unionfs_lower_mnt_idx(file->f_path.dentry, bstart),
1284     + lower_flags, current_cred());
1285     + if (IS_ERR(lower_file))
1286     + return PTR_ERR(lower_file);
1287     +
1288     + unionfs_set_lower_file(file, lower_file);
1289     + branchget(inode->i_sb, bstart);
1290     +
1291     + return 0;
1292     +}
1293     +
1294     +int unionfs_open(struct inode *inode, struct file *file)
1295     +{
1296     + int err = 0;
1297     + struct file *lower_file = NULL;
1298     + struct dentry *dentry = file->f_path.dentry;
1299     + struct dentry *parent;
1300     + int bindex = 0, bstart = 0, bend = 0;
1301     + int size;
1302     + int valid = 0;
1303     +
1304     + unionfs_read_lock(inode->i_sb, UNIONFS_SMUTEX_PARENT);
1305     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
1306     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1307     +
1308     + /* don't open unhashed/deleted files */
1309     + if (d_deleted(dentry)) {
1310     + err = -ENOENT;
1311     + goto out_nofree;
1312     + }
1313     +
1314     + /* XXX: should I change 'false' below to the 'willwrite' flag? */
1315     + valid = __unionfs_d_revalidate(dentry, parent, false);
1316     + if (unlikely(!valid)) {
1317     + err = -ESTALE;
1318     + goto out_nofree;
1319     + }
1320     +
1321     + file->private_data =
1322     + kzalloc(sizeof(struct unionfs_file_info), GFP_KERNEL);
1323     + if (unlikely(!UNIONFS_F(file))) {
1324     + err = -ENOMEM;
1325     + goto out_nofree;
1326     + }
1327     + fbstart(file) = -1;
1328     + fbend(file) = -1;
1329     + atomic_set(&UNIONFS_F(file)->generation,
1330     + atomic_read(&UNIONFS_I(inode)->generation));
1331     +
1332     + size = sizeof(struct file *) * sbmax(inode->i_sb);
1333     + UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL);
1334     + if (unlikely(!UNIONFS_F(file)->lower_files)) {
1335     + err = -ENOMEM;
1336     + goto out;
1337     + }
1338     + size = sizeof(int) * sbmax(inode->i_sb);
1339     + UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL);
1340     + if (unlikely(!UNIONFS_F(file)->saved_branch_ids)) {
1341     + err = -ENOMEM;
1342     + goto out;
1343     + }
1344     +
1345     + bstart = fbstart(file) = dbstart(dentry);
1346     + bend = fbend(file) = dbend(dentry);
1347     +
1348     + /*
1349     + * open all directories and make the unionfs file struct point to
1350     + * these lower file structs
1351     + */
1352     + if (S_ISDIR(inode->i_mode))
1353     + err = __open_dir(inode, file); /* open a dir */
1354     + else
1355     + err = __open_file(inode, file, parent); /* open a file */
1356     +
1357     + /* freeing the allocated resources, and fput the opened files */
1358     + if (err) {
1359     + for (bindex = bstart; bindex <= bend; bindex++) {
1360     + lower_file = unionfs_lower_file_idx(file, bindex);
1361     + if (!lower_file)
1362     + continue;
1363     +
1364     + branchput(dentry->d_sb, bindex);
1365     + /* fput calls dput for lower_dentry */
1366     + fput(lower_file);
1367     + }
1368     + }
1369     +
1370     +out:
1371     + if (err) {
1372     + kfree(UNIONFS_F(file)->lower_files);
1373     + kfree(UNIONFS_F(file)->saved_branch_ids);
1374     + kfree(UNIONFS_F(file));
1375     + }
1376     +out_nofree:
1377     + if (!err) {
1378     + unionfs_postcopyup_setmnt(dentry);
1379     + unionfs_copy_attr_times(inode);
1380     + unionfs_check_file(file);
1381     + unionfs_check_inode(inode);
1382     + }
1383     + unionfs_unlock_dentry(dentry);
1384     + unionfs_unlock_parent(dentry, parent);
1385     + unionfs_read_unlock(inode->i_sb);
1386     + return err;
1387     +}
1388     +
1389     +/*
1390     + * release all lower object references & free the file info structure
1391     + *
1392     + * No need to grab sb info's rwsem.
1393     + */
1394     +int unionfs_file_release(struct inode *inode, struct file *file)
1395     +{
1396     + struct file *lower_file = NULL;
1397     + struct unionfs_file_info *fileinfo;
1398     + struct unionfs_inode_info *inodeinfo;
1399     + struct super_block *sb = inode->i_sb;
1400     + struct dentry *dentry = file->f_path.dentry;
1401     + struct dentry *parent;
1402     + int bindex, bstart, bend;
1403     + int fgen, err = 0;
1404     +
1405     + /*
1406     + * Since mm/memory.c:might_fault() (under PROVE_LOCKING) was
1407     + * modified in 2.6.29-rc1 to call might_lock_read on mmap_sem, this
1408     + * has been causing false positives in file system stacking layers.
1409     + * In particular, our ->mmap is called after sys_mmap2 already holds
1410     + * mmap_sem, then we lock our own mutexes; but earlier, it's
1411     + * possible for lockdep to have locked our mutexes first, and then
1412     + * we call a lower ->readdir which could call might_fault. The
1413     + * different ordering of the locks is what lockdep complains about
1414     + * -- unnecessarily. Therefore, we have no choice but to tell
1415     + * lockdep to temporarily turn off lockdep here. Note: the comments
1416     + * inside might_sleep also suggest that it would have been
1417     + * nicer to only annotate paths that needs that might_lock_read.
1418     + */
1419     + lockdep_off();
1420     + unionfs_read_lock(sb, UNIONFS_SMUTEX_PARENT);
1421     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
1422     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1423     +
1424     + /*
1425     + * We try to revalidate, but the VFS ignores return return values
1426     + * from file->release, so we must always try to succeed here,
1427     + * including to do the kfree and dput below. So if revalidation
1428     + * failed, all we can do is print some message and keep going.
1429     + */
1430     + err = unionfs_file_revalidate(file, parent,
1431     + UNIONFS_F(file)->wrote_to_file);
1432     + if (!err)
1433     + unionfs_check_file(file);
1434     + fileinfo = UNIONFS_F(file);
1435     + BUG_ON(file->f_path.dentry->d_inode != inode);
1436     + inodeinfo = UNIONFS_I(inode);
1437     +
1438     + /* fput all the lower files */
1439     + fgen = atomic_read(&fileinfo->generation);
1440     + bstart = fbstart(file);
1441     + bend = fbend(file);
1442     +
1443     + for (bindex = bstart; bindex <= bend; bindex++) {
1444     + lower_file = unionfs_lower_file_idx(file, bindex);
1445     +
1446     + if (lower_file) {
1447     + unionfs_set_lower_file_idx(file, bindex, NULL);
1448     + fput(lower_file);
1449     + branchput(sb, bindex);
1450     + }
1451     +
1452     + /* if there are no more refs to the dentry, dput it */
1453     + if (d_deleted(dentry)) {
1454     + dput(unionfs_lower_dentry_idx(dentry, bindex));
1455     + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
1456     + }
1457     + }
1458     +
1459     + kfree(fileinfo->lower_files);
1460     + kfree(fileinfo->saved_branch_ids);
1461     +
1462     + if (fileinfo->rdstate) {
1463     + fileinfo->rdstate->access = jiffies;
1464     + spin_lock(&inodeinfo->rdlock);
1465     + inodeinfo->rdcount++;
1466     + list_add_tail(&fileinfo->rdstate->cache,
1467     + &inodeinfo->readdircache);
1468     + mark_inode_dirty(inode);
1469     + spin_unlock(&inodeinfo->rdlock);
1470     + fileinfo->rdstate = NULL;
1471     + }
1472     + kfree(fileinfo);
1473     +
1474     + unionfs_unlock_dentry(dentry);
1475     + unionfs_unlock_parent(dentry, parent);
1476     + unionfs_read_unlock(sb);
1477     + lockdep_on();
1478     + return err;
1479     +}
1480     +
1481     +/* pass the ioctl to the lower fs */
1482     +static long do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1483     +{
1484     + struct file *lower_file;
1485     + int err;
1486     +
1487     + lower_file = unionfs_lower_file(file);
1488     +
1489     + err = -ENOTTY;
1490     + if (!lower_file || !lower_file->f_op)
1491     + goto out;
1492     + if (lower_file->f_op->unlocked_ioctl) {
1493     + err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
1494     + } else if (lower_file->f_op->ioctl) {
1495     + lock_kernel();
1496     + err = lower_file->f_op->ioctl(
1497     + lower_file->f_path.dentry->d_inode,
1498     + lower_file, cmd, arg);
1499     + unlock_kernel();
1500     + }
1501     +
1502     +out:
1503     + return err;
1504     +}
1505     +
1506     +/*
1507     + * return to user-space the branch indices containing the file in question
1508     + *
1509     + * We use fd_set and therefore we are limited to the number of the branches
1510     + * to FD_SETSIZE, which is currently 1024 - plenty for most people
1511     + */
1512     +static int unionfs_ioctl_queryfile(struct file *file, struct dentry *parent,
1513     + unsigned int cmd, unsigned long arg)
1514     +{
1515     + int err = 0;
1516     + fd_set branchlist;
1517     + int bstart = 0, bend = 0, bindex = 0;
1518     + int orig_bstart, orig_bend;
1519     + struct dentry *dentry, *lower_dentry;
1520     + struct vfsmount *mnt;
1521     +
1522     + dentry = file->f_path.dentry;
1523     + orig_bstart = dbstart(dentry);
1524     + orig_bend = dbend(dentry);
1525     + err = unionfs_partial_lookup(dentry, parent);
1526     + if (err)
1527     + goto out;
1528     + bstart = dbstart(dentry);
1529     + bend = dbend(dentry);
1530     +
1531     + FD_ZERO(&branchlist);
1532     +
1533     + for (bindex = bstart; bindex <= bend; bindex++) {
1534     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
1535     + if (!lower_dentry)
1536     + continue;
1537     + if (likely(lower_dentry->d_inode))
1538     + FD_SET(bindex, &branchlist);
1539     + /* purge any lower objects after partial_lookup */
1540     + if (bindex < orig_bstart || bindex > orig_bend) {
1541     + dput(lower_dentry);
1542     + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
1543     + iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
1544     + unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
1545     + NULL);
1546     + mnt = unionfs_lower_mnt_idx(dentry, bindex);
1547     + if (!mnt)
1548     + continue;
1549     + unionfs_mntput(dentry, bindex);
1550     + unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
1551     + }
1552     + }
1553     + /* restore original dentry's offsets */
1554     + dbstart(dentry) = orig_bstart;
1555     + dbend(dentry) = orig_bend;
1556     + ibstart(dentry->d_inode) = orig_bstart;
1557     + ibend(dentry->d_inode) = orig_bend;
1558     +
1559     + err = copy_to_user((void __user *)arg, &branchlist, sizeof(fd_set));
1560     + if (unlikely(err))
1561     + err = -EFAULT;
1562     +
1563     +out:
1564     + return err < 0 ? err : bend;
1565     +}
1566     +
1567     +long unionfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1568     +{
1569     + long err;
1570     + struct dentry *dentry = file->f_path.dentry;
1571     + struct dentry *parent;
1572     +
1573     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
1574     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
1575     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1576     +
1577     + err = unionfs_file_revalidate(file, parent, true);
1578     + if (unlikely(err))
1579     + goto out;
1580     +
1581     + /* check if asked for local commands */
1582     + switch (cmd) {
1583     + case UNIONFS_IOCTL_INCGEN:
1584     + /* Increment the superblock generation count */
1585     + pr_info("unionfs: incgen ioctl deprecated; "
1586     + "use \"-o remount,incgen\"\n");
1587     + err = -ENOSYS;
1588     + break;
1589     +
1590     + case UNIONFS_IOCTL_QUERYFILE:
1591     + /* Return list of branches containing the given file */
1592     + err = unionfs_ioctl_queryfile(file, parent, cmd, arg);
1593     + break;
1594     +
1595     + default:
1596     + /* pass the ioctl down */
1597     + err = do_ioctl(file, cmd, arg);
1598     + break;
1599     + }
1600     +
1601     +out:
1602     + unionfs_check_file(file);
1603     + unionfs_unlock_dentry(dentry);
1604     + unionfs_unlock_parent(dentry, parent);
1605     + unionfs_read_unlock(dentry->d_sb);
1606     + return err;
1607     +}
1608     +
1609     +int unionfs_flush(struct file *file, fl_owner_t id)
1610     +{
1611     + int err = 0;
1612     + struct file *lower_file = NULL;
1613     + struct dentry *dentry = file->f_path.dentry;
1614     + struct dentry *parent;
1615     + int bindex, bstart, bend;
1616     +
1617     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
1618     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
1619     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1620     +
1621     + err = unionfs_file_revalidate(file, parent,
1622     + UNIONFS_F(file)->wrote_to_file);
1623     + if (unlikely(err))
1624     + goto out;
1625     + unionfs_check_file(file);
1626     +
1627     + bstart = fbstart(file);
1628     + bend = fbend(file);
1629     + for (bindex = bstart; bindex <= bend; bindex++) {
1630     + lower_file = unionfs_lower_file_idx(file, bindex);
1631     +
1632     + if (lower_file && lower_file->f_op &&
1633     + lower_file->f_op->flush) {
1634     + err = lower_file->f_op->flush(lower_file, id);
1635     + if (err)
1636     + goto out;
1637     + }
1638     +
1639     + }
1640     +
1641     +out:
1642     + if (!err)
1643     + unionfs_check_file(file);
1644     + unionfs_unlock_dentry(dentry);
1645     + unionfs_unlock_parent(dentry, parent);
1646     + unionfs_read_unlock(dentry->d_sb);
1647     + return err;
1648     +}
1649     diff --git a/fs/unionfs/copyup.c b/fs/unionfs/copyup.c
1650     new file mode 100644
1651     index 0000000..bba3a75
1652     --- /dev/null
1653     +++ b/fs/unionfs/copyup.c
1654     @@ -0,0 +1,896 @@
1655     +/*
1656     + * Copyright (c) 2003-2010 Erez Zadok
1657     + * Copyright (c) 2003-2006 Charles P. Wright
1658     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
1659     + * Copyright (c) 2005-2006 Junjiro Okajima
1660     + * Copyright (c) 2005 Arun M. Krishnakumar
1661     + * Copyright (c) 2004-2006 David P. Quigley
1662     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
1663     + * Copyright (c) 2003 Puja Gupta
1664     + * Copyright (c) 2003 Harikesavan Krishnan
1665     + * Copyright (c) 2003-2010 Stony Brook University
1666     + * Copyright (c) 2003-2010 The Research Foundation of SUNY
1667     + *
1668     + * This program is free software; you can redistribute it and/or modify
1669     + * it under the terms of the GNU General Public License version 2 as
1670     + * published by the Free Software Foundation.
1671     + */
1672     +
1673     +#include "union.h"
1674     +
1675     +/*
1676     + * For detailed explanation of copyup see:
1677     + * Documentation/filesystems/unionfs/concepts.txt
1678     + */
1679     +
1680     +#ifdef CONFIG_UNION_FS_XATTR
1681     +/* copyup all extended attrs for a given dentry */
1682     +static int copyup_xattrs(struct dentry *old_lower_dentry,
1683     + struct dentry *new_lower_dentry)
1684     +{
1685     + int err = 0;
1686     + ssize_t list_size = -1;
1687     + char *name_list = NULL;
1688     + char *attr_value = NULL;
1689     + char *name_list_buf = NULL;
1690     +
1691     + /* query the actual size of the xattr list */
1692     + list_size = vfs_listxattr(old_lower_dentry, NULL, 0);
1693     + if (list_size <= 0) {
1694     + err = list_size;
1695     + goto out;
1696     + }
1697     +
1698     + /* allocate space for the actual list */
1699     + name_list = unionfs_xattr_alloc(list_size + 1, XATTR_LIST_MAX);
1700     + if (unlikely(!name_list || IS_ERR(name_list))) {
1701     + err = PTR_ERR(name_list);
1702     + goto out;
1703     + }
1704     +
1705     + name_list_buf = name_list; /* save for kfree at end */
1706     +
1707     + /* now get the actual xattr list of the source file */
1708     + list_size = vfs_listxattr(old_lower_dentry, name_list, list_size);
1709     + if (list_size <= 0) {
1710     + err = list_size;
1711     + goto out;
1712     + }
1713     +
1714     + /* allocate space to hold each xattr's value */
1715     + attr_value = unionfs_xattr_alloc(XATTR_SIZE_MAX, XATTR_SIZE_MAX);
1716     + if (unlikely(!attr_value || IS_ERR(attr_value))) {
1717     + err = PTR_ERR(name_list);
1718     + goto out;
1719     + }
1720     +
1721     + /* in a loop, get and set each xattr from src to dst file */
1722     + while (*name_list) {
1723     + ssize_t size;
1724     +
1725     + /* Lock here since vfs_getxattr doesn't lock for us */
1726     + mutex_lock(&old_lower_dentry->d_inode->i_mutex);
1727     + size = vfs_getxattr(old_lower_dentry, name_list,
1728     + attr_value, XATTR_SIZE_MAX);
1729     + mutex_unlock(&old_lower_dentry->d_inode->i_mutex);
1730     + if (size < 0) {
1731     + err = size;
1732     + goto out;
1733     + }
1734     + if (size > XATTR_SIZE_MAX) {
1735     + err = -E2BIG;
1736     + goto out;
1737     + }
1738     + /* Don't lock here since vfs_setxattr does it for us. */
1739     + err = vfs_setxattr(new_lower_dentry, name_list, attr_value,
1740     + size, 0);
1741     + /*
1742     + * Selinux depends on "security.*" xattrs, so to maintain
1743     + * the security of copied-up files, if Selinux is active,
1744     + * then we must copy these xattrs as well. So we need to
1745     + * temporarily get FOWNER privileges.
1746     + * XXX: move entire copyup code to SIOQ.
1747     + */
1748     + if (err == -EPERM && !capable(CAP_FOWNER)) {
1749     + const struct cred *old_creds;
1750     + struct cred *new_creds;
1751     +
1752     + new_creds = prepare_creds();
1753     + if (unlikely(!new_creds)) {
1754     + err = -ENOMEM;
1755     + goto out;
1756     + }
1757     + cap_raise(new_creds->cap_effective, CAP_FOWNER);
1758     + old_creds = override_creds(new_creds);
1759     + err = vfs_setxattr(new_lower_dentry, name_list,
1760     + attr_value, size, 0);
1761     + revert_creds(old_creds);
1762     + }
1763     + if (err < 0)
1764     + goto out;
1765     + name_list += strlen(name_list) + 1;
1766     + }
1767     +out:
1768     + unionfs_xattr_kfree(name_list_buf);
1769     + unionfs_xattr_kfree(attr_value);
1770     + /* Ignore if xattr isn't supported */
1771     + if (err == -ENOTSUPP || err == -EOPNOTSUPP)
1772     + err = 0;
1773     + return err;
1774     +}
1775     +#endif /* CONFIG_UNION_FS_XATTR */
1776     +
1777     +/*
1778     + * Determine the mode based on the copyup flags, and the existing dentry.
1779     + *
1780     + * Handle file systems which may not support certain options. For example
1781     + * jffs2 doesn't allow one to chmod a symlink. So we ignore such harmless
1782     + * errors, rather than propagating them up, which results in copyup errors
1783     + * and errors returned back to users.
1784     + */
1785     +static int copyup_permissions(struct super_block *sb,
1786     + struct dentry *old_lower_dentry,
1787     + struct dentry *new_lower_dentry)
1788     +{
1789     + struct inode *i = old_lower_dentry->d_inode;
1790     + struct iattr newattrs;
1791     + int err;
1792     +
1793     + newattrs.ia_atime = i->i_atime;
1794     + newattrs.ia_mtime = i->i_mtime;
1795     + newattrs.ia_ctime = i->i_ctime;
1796     + newattrs.ia_gid = i->i_gid;
1797     + newattrs.ia_uid = i->i_uid;
1798     + newattrs.ia_valid = ATTR_CTIME | ATTR_ATIME | ATTR_MTIME |
1799     + ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_FORCE |
1800     + ATTR_GID | ATTR_UID;
1801     + mutex_lock(&new_lower_dentry->d_inode->i_mutex);
1802     + err = notify_change(new_lower_dentry, &newattrs);
1803     + if (err)
1804     + goto out;
1805     +
1806     + /* now try to change the mode and ignore EOPNOTSUPP on symlinks */
1807     + newattrs.ia_mode = i->i_mode;
1808     + newattrs.ia_valid = ATTR_MODE | ATTR_FORCE;
1809     + err = notify_change(new_lower_dentry, &newattrs);
1810     + if (err == -EOPNOTSUPP &&
1811     + S_ISLNK(new_lower_dentry->d_inode->i_mode)) {
1812     + printk(KERN_WARNING
1813     + "unionfs: changing \"%s\" symlink mode unsupported\n",
1814     + new_lower_dentry->d_name.name);
1815     + err = 0;
1816     + }
1817     +
1818     +out:
1819     + mutex_unlock(&new_lower_dentry->d_inode->i_mutex);
1820     + return err;
1821     +}
1822     +
1823     +/*
1824     + * create the new device/file/directory - use copyup_permission to copyup
1825     + * times, and mode
1826     + *
1827     + * if the object being copied up is a regular file, the file is only created,
1828     + * the contents have to be copied up separately
1829     + */
1830     +static int __copyup_ndentry(struct dentry *old_lower_dentry,
1831     + struct dentry *new_lower_dentry,
1832     + struct dentry *new_lower_parent_dentry,
1833     + char *symbuf)
1834     +{
1835     + int err = 0;
1836     + umode_t old_mode = old_lower_dentry->d_inode->i_mode;
1837     + struct sioq_args args;
1838     +
1839     + if (S_ISDIR(old_mode)) {
1840     + args.mkdir.parent = new_lower_parent_dentry->d_inode;
1841     + args.mkdir.dentry = new_lower_dentry;
1842     + args.mkdir.mode = old_mode;
1843     +
1844     + run_sioq(__unionfs_mkdir, &args);
1845     + err = args.err;
1846     + } else if (S_ISLNK(old_mode)) {
1847     + args.symlink.parent = new_lower_parent_dentry->d_inode;
1848     + args.symlink.dentry = new_lower_dentry;
1849     + args.symlink.symbuf = symbuf;
1850     +
1851     + run_sioq(__unionfs_symlink, &args);
1852     + err = args.err;
1853     + } else if (S_ISBLK(old_mode) || S_ISCHR(old_mode) ||
1854     + S_ISFIFO(old_mode) || S_ISSOCK(old_mode)) {
1855     + args.mknod.parent = new_lower_parent_dentry->d_inode;
1856     + args.mknod.dentry = new_lower_dentry;
1857     + args.mknod.mode = old_mode;
1858     + args.mknod.dev = old_lower_dentry->d_inode->i_rdev;
1859     +
1860     + run_sioq(__unionfs_mknod, &args);
1861     + err = args.err;
1862     + } else if (S_ISREG(old_mode)) {
1863     + struct nameidata nd;
1864     + err = init_lower_nd(&nd, LOOKUP_CREATE);
1865     + if (unlikely(err < 0))
1866     + goto out;
1867     + args.create.nd = &nd;
1868     + args.create.parent = new_lower_parent_dentry->d_inode;
1869     + args.create.dentry = new_lower_dentry;
1870     + args.create.mode = old_mode;
1871     +
1872     + run_sioq(__unionfs_create, &args);
1873     + err = args.err;
1874     + release_lower_nd(&nd, err);
1875     + } else {
1876     + printk(KERN_CRIT "unionfs: unknown inode type %d\n",
1877     + old_mode);
1878     + BUG();
1879     + }
1880     +
1881     +out:
1882     + return err;
1883     +}
1884     +
1885     +static int __copyup_reg_data(struct dentry *dentry,
1886     + struct dentry *new_lower_dentry, int new_bindex,
1887     + struct dentry *old_lower_dentry, int old_bindex,
1888     + struct file **copyup_file, loff_t len)
1889     +{
1890     + struct super_block *sb = dentry->d_sb;
1891     + struct file *input_file;
1892     + struct file *output_file;
1893     + struct vfsmount *output_mnt;
1894     + mm_segment_t old_fs;
1895     + char *buf = NULL;
1896     + ssize_t read_bytes, write_bytes;
1897     + loff_t size;
1898     + int err = 0;
1899     +
1900     + /* open old file */
1901     + unionfs_mntget(dentry, old_bindex);
1902     + branchget(sb, old_bindex);
1903     + /* dentry_open calls dput and mntput if it returns an error */
1904     + input_file = dentry_open(old_lower_dentry,
1905     + unionfs_lower_mnt_idx(dentry, old_bindex),
1906     + O_RDONLY | O_LARGEFILE, current_cred());
1907     + if (IS_ERR(input_file)) {
1908     + dput(old_lower_dentry);
1909     + err = PTR_ERR(input_file);
1910     + goto out;
1911     + }
1912     + if (unlikely(!input_file->f_op || !input_file->f_op->read)) {
1913     + err = -EINVAL;
1914     + goto out_close_in;
1915     + }
1916     +
1917     + /* open new file */
1918     + dget(new_lower_dentry);
1919     + output_mnt = unionfs_mntget(sb->s_root, new_bindex);
1920     + branchget(sb, new_bindex);
1921     + output_file = dentry_open(new_lower_dentry, output_mnt,
1922     + O_RDWR | O_LARGEFILE, current_cred());
1923     + if (IS_ERR(output_file)) {
1924     + err = PTR_ERR(output_file);
1925     + goto out_close_in2;
1926     + }
1927     + if (unlikely(!output_file->f_op || !output_file->f_op->write)) {
1928     + err = -EINVAL;
1929     + goto out_close_out;
1930     + }
1931     +
1932     + /* allocating a buffer */
1933     + buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
1934     + if (unlikely(!buf)) {
1935     + err = -ENOMEM;
1936     + goto out_close_out;
1937     + }
1938     +
1939     + input_file->f_pos = 0;
1940     + output_file->f_pos = 0;
1941     +
1942     + old_fs = get_fs();
1943     + set_fs(KERNEL_DS);
1944     +
1945     + size = len;
1946     + err = 0;
1947     + do {
1948     + if (len >= PAGE_SIZE)
1949     + size = PAGE_SIZE;
1950     + else if ((len < PAGE_SIZE) && (len > 0))
1951     + size = len;
1952     +
1953     + len -= PAGE_SIZE;
1954     +
1955     + read_bytes =
1956     + input_file->f_op->read(input_file,
1957     + (char __user *)buf, size,
1958     + &input_file->f_pos);
1959     + if (read_bytes <= 0) {
1960     + err = read_bytes;
1961     + break;
1962     + }
1963     +
1964     + /* see Documentation/filesystems/unionfs/issues.txt */
1965     + lockdep_off();
1966     + write_bytes =
1967     + output_file->f_op->write(output_file,
1968     + (char __user *)buf,
1969     + read_bytes,
1970     + &output_file->f_pos);
1971     + lockdep_on();
1972     + if ((write_bytes < 0) || (write_bytes < read_bytes)) {
1973     + err = write_bytes;
1974     + break;
1975     + }
1976     + } while ((read_bytes > 0) && (len > 0));
1977     +
1978     + set_fs(old_fs);
1979     +
1980     + kfree(buf);
1981     +
1982     + if (!err)
1983     + err = output_file->f_op->fsync(output_file, 0);
1984     +
1985     + if (err)
1986     + goto out_close_out;
1987     +
1988     + if (copyup_file) {
1989     + *copyup_file = output_file;
1990     + goto out_close_in;
1991     + }
1992     +
1993     +out_close_out:
1994     + fput(output_file);
1995     +
1996     +out_close_in2:
1997     + branchput(sb, new_bindex);
1998     +
1999     +out_close_in:
2000     + fput(input_file);
2001     +
2002     +out:
2003     + branchput(sb, old_bindex);
2004     +
2005     + return err;
2006     +}
2007     +
2008     +/*
2009     + * dput the lower references for old and new dentry & clear a lower dentry
2010     + * pointer
2011     + */
2012     +static void __clear(struct dentry *dentry, struct dentry *old_lower_dentry,
2013     + int old_bstart, int old_bend,
2014     + struct dentry *new_lower_dentry, int new_bindex)
2015     +{
2016     + /* get rid of the lower dentry and all its traces */
2017     + unionfs_set_lower_dentry_idx(dentry, new_bindex, NULL);
2018     + dbstart(dentry) = old_bstart;
2019     + dbend(dentry) = old_bend;
2020     +
2021     + dput(new_lower_dentry);
2022     + dput(old_lower_dentry);
2023     +}
2024     +
2025     +/*
2026     + * Copy up a dentry to a file of specified name.
2027     + *
2028     + * @dir: used to pull the ->i_sb to access other branches
2029     + * @dentry: the non-negative dentry whose lower_inode we should copy
2030     + * @bstart: the branch of the lower_inode to copy from
2031     + * @new_bindex: the branch to create the new file in
2032     + * @name: the name of the file to create
2033     + * @namelen: length of @name
2034     + * @copyup_file: the "struct file" to return (optional)
2035     + * @len: how many bytes to copy-up?
2036     + */
2037     +int copyup_dentry(struct inode *dir, struct dentry *dentry, int bstart,
2038     + int new_bindex, const char *name, int namelen,
2039     + struct file **copyup_file, loff_t len)
2040     +{
2041     + struct dentry *new_lower_dentry;
2042     + struct dentry *old_lower_dentry = NULL;
2043     + struct super_block *sb;
2044     + int err = 0;
2045     + int old_bindex;
2046     + int old_bstart;
2047     + int old_bend;
2048     + struct dentry *new_lower_parent_dentry = NULL;
2049     + mm_segment_t oldfs;
2050     + char *symbuf = NULL;
2051     +
2052     + verify_locked(dentry);
2053     +
2054     + old_bindex = bstart;
2055     + old_bstart = dbstart(dentry);
2056     + old_bend = dbend(dentry);
2057     +
2058     + BUG_ON(new_bindex < 0);
2059     + BUG_ON(new_bindex >= old_bindex);
2060     +
2061     + sb = dir->i_sb;
2062     +
2063     + err = is_robranch_super(sb, new_bindex);
2064     + if (err)
2065     + goto out;
2066     +
2067     + /* Create the directory structure above this dentry. */
2068     + new_lower_dentry = create_parents(dir, dentry, name, new_bindex);
2069     + if (IS_ERR(new_lower_dentry)) {
2070     + err = PTR_ERR(new_lower_dentry);
2071     + goto out;
2072     + }
2073     +
2074     + old_lower_dentry = unionfs_lower_dentry_idx(dentry, old_bindex);
2075     + /* we conditionally dput this old_lower_dentry at end of function */
2076     + dget(old_lower_dentry);
2077     +
2078     + /* For symlinks, we must read the link before we lock the directory. */
2079     + if (S_ISLNK(old_lower_dentry->d_inode->i_mode)) {
2080     +
2081     + symbuf = kmalloc(PATH_MAX, GFP_KERNEL);
2082     + if (unlikely(!symbuf)) {
2083     + __clear(dentry, old_lower_dentry,
2084     + old_bstart, old_bend,
2085     + new_lower_dentry, new_bindex);
2086     + err = -ENOMEM;
2087     + goto out_free;
2088     + }
2089     +
2090     + oldfs = get_fs();
2091     + set_fs(KERNEL_DS);
2092     + err = old_lower_dentry->d_inode->i_op->readlink(
2093     + old_lower_dentry,
2094     + (char __user *)symbuf,
2095     + PATH_MAX);
2096     + set_fs(oldfs);
2097     + if (err < 0) {
2098     + __clear(dentry, old_lower_dentry,
2099     + old_bstart, old_bend,
2100     + new_lower_dentry, new_bindex);
2101     + goto out_free;
2102     + }
2103     + symbuf[err] = '\0';
2104     + }
2105     +
2106     + /* Now we lock the parent, and create the object in the new branch. */
2107     + new_lower_parent_dentry = lock_parent(new_lower_dentry);
2108     +
2109     + /* create the new inode */
2110     + err = __copyup_ndentry(old_lower_dentry, new_lower_dentry,
2111     + new_lower_parent_dentry, symbuf);
2112     +
2113     + if (err) {
2114     + __clear(dentry, old_lower_dentry,
2115     + old_bstart, old_bend,
2116     + new_lower_dentry, new_bindex);
2117     + goto out_unlock;
2118     + }
2119     +
2120     + /* We actually copyup the file here. */
2121     + if (S_ISREG(old_lower_dentry->d_inode->i_mode))
2122     + err = __copyup_reg_data(dentry, new_lower_dentry, new_bindex,
2123     + old_lower_dentry, old_bindex,
2124     + copyup_file, len);
2125     + if (err)
2126     + goto out_unlink;
2127     +
2128     + /* Set permissions. */
2129     + err = copyup_permissions(sb, old_lower_dentry, new_lower_dentry);
2130     + if (err)
2131     + goto out_unlink;
2132     +
2133     +#ifdef CONFIG_UNION_FS_XATTR
2134     + /* Selinux uses extended attributes for permissions. */
2135     + err = copyup_xattrs(old_lower_dentry, new_lower_dentry);
2136     + if (err)
2137     + goto out_unlink;
2138     +#endif /* CONFIG_UNION_FS_XATTR */
2139     +
2140     + /* do not allow files getting deleted to be re-interposed */
2141     + if (!d_deleted(dentry))
2142     + unionfs_reinterpose(dentry);
2143     +
2144     + goto out_unlock;
2145     +
2146     +out_unlink:
2147     + /*
2148     + * copyup failed, because we possibly ran out of space or
2149     + * quota, or something else happened so let's unlink; we don't
2150     + * really care about the return value of vfs_unlink
2151     + */
2152     + vfs_unlink(new_lower_parent_dentry->d_inode, new_lower_dentry);
2153     +
2154     + if (copyup_file) {
2155     + /* need to close the file */
2156     +
2157     + fput(*copyup_file);
2158     + branchput(sb, new_bindex);
2159     + }
2160     +
2161     + /*
2162     + * TODO: should we reset the error to something like -EIO?
2163     + *
2164     + * If we don't reset, the user may get some nonsensical errors, but
2165     + * on the other hand, if we reset to EIO, we guarantee that the user
2166     + * will get a "confusing" error message.
2167     + */
2168     +
2169     +out_unlock:
2170     + unlock_dir(new_lower_parent_dentry);
2171     +
2172     +out_free:
2173     + /*
2174     + * If old_lower_dentry was not a file, then we need to dput it. If
2175     + * it was a file, then it was already dput indirectly by other
2176     + * functions we call above which operate on regular files.
2177     + */
2178     + if (old_lower_dentry && old_lower_dentry->d_inode &&
2179     + !S_ISREG(old_lower_dentry->d_inode->i_mode))
2180     + dput(old_lower_dentry);
2181     + kfree(symbuf);
2182     +
2183     + if (err) {
2184     + /*
2185     + * if directory creation succeeded, but inode copyup failed,
2186     + * then purge new dentries.
2187     + */
2188     + if (dbstart(dentry) < old_bstart &&
2189     + ibstart(dentry->d_inode) > dbstart(dentry))
2190     + __clear(dentry, NULL, old_bstart, old_bend,
2191     + unionfs_lower_dentry(dentry), dbstart(dentry));
2192     + goto out;
2193     + }
2194     + if (!S_ISDIR(dentry->d_inode->i_mode)) {
2195     + unionfs_postcopyup_release(dentry);
2196     + if (!unionfs_lower_inode(dentry->d_inode)) {
2197     + /*
2198     + * If we got here, then we copied up to an
2199     + * unlinked-open file, whose name is .unionfsXXXXX.
2200     + */
2201     + struct inode *inode = new_lower_dentry->d_inode;
2202     + atomic_inc(&inode->i_count);
2203     + unionfs_set_lower_inode_idx(dentry->d_inode,
2204     + ibstart(dentry->d_inode),
2205     + inode);
2206     + }
2207     + }
2208     + unionfs_postcopyup_setmnt(dentry);
2209     + /* sync inode times from copied-up inode to our inode */
2210     + unionfs_copy_attr_times(dentry->d_inode);
2211     + unionfs_check_inode(dir);
2212     + unionfs_check_dentry(dentry);
2213     +out:
2214     + return err;
2215     +}
2216     +
2217     +/*
2218     + * This function creates a copy of a file represented by 'file' which
2219     + * currently resides in branch 'bstart' to branch 'new_bindex.' The copy
2220     + * will be named "name".
2221     + */
2222     +int copyup_named_file(struct inode *dir, struct file *file, char *name,
2223     + int bstart, int new_bindex, loff_t len)
2224     +{
2225     + int err = 0;
2226     + struct file *output_file = NULL;
2227     +
2228     + err = copyup_dentry(dir, file->f_path.dentry, bstart, new_bindex,
2229     + name, strlen(name), &output_file, len);
2230     + if (!err) {
2231     + fbstart(file) = new_bindex;
2232     + unionfs_set_lower_file_idx(file, new_bindex, output_file);
2233     + }
2234     +
2235     + return err;
2236     +}
2237     +
2238     +/*
2239     + * This function creates a copy of a file represented by 'file' which
2240     + * currently resides in branch 'bstart' to branch 'new_bindex'.
2241     + */
2242     +int copyup_file(struct inode *dir, struct file *file, int bstart,
2243     + int new_bindex, loff_t len)
2244     +{
2245     + int err = 0;
2246     + struct file *output_file = NULL;
2247     + struct dentry *dentry = file->f_path.dentry;
2248     +
2249     + err = copyup_dentry(dir, dentry, bstart, new_bindex,
2250     + dentry->d_name.name, dentry->d_name.len,
2251     + &output_file, len);
2252     + if (!err) {
2253     + fbstart(file) = new_bindex;
2254     + unionfs_set_lower_file_idx(file, new_bindex, output_file);
2255     + }
2256     +
2257     + return err;
2258     +}
2259     +
2260     +/* purge a dentry's lower-branch states (dput/mntput, etc.) */
2261     +static void __cleanup_dentry(struct dentry *dentry, int bindex,
2262     + int old_bstart, int old_bend)
2263     +{
2264     + int loop_start;
2265     + int loop_end;
2266     + int new_bstart = -1;
2267     + int new_bend = -1;
2268     + int i;
2269     +
2270     + loop_start = min(old_bstart, bindex);
2271     + loop_end = max(old_bend, bindex);
2272     +
2273     + /*
2274     + * This loop sets the bstart and bend for the new dentry by
2275     + * traversing from left to right. It also dputs all negative
2276     + * dentries except bindex
2277     + */
2278     + for (i = loop_start; i <= loop_end; i++) {
2279     + if (!unionfs_lower_dentry_idx(dentry, i))
2280     + continue;
2281     +
2282     + if (i == bindex) {
2283     + new_bend = i;
2284     + if (new_bstart < 0)
2285     + new_bstart = i;
2286     + continue;
2287     + }
2288     +
2289     + if (!unionfs_lower_dentry_idx(dentry, i)->d_inode) {
2290     + dput(unionfs_lower_dentry_idx(dentry, i));
2291     + unionfs_set_lower_dentry_idx(dentry, i, NULL);
2292     +
2293     + unionfs_mntput(dentry, i);
2294     + unionfs_set_lower_mnt_idx(dentry, i, NULL);
2295     + } else {
2296     + if (new_bstart < 0)
2297     + new_bstart = i;
2298     + new_bend = i;
2299     + }
2300     + }
2301     +
2302     + if (new_bstart < 0)
2303     + new_bstart = bindex;
2304     + if (new_bend < 0)
2305     + new_bend = bindex;
2306     + dbstart(dentry) = new_bstart;
2307     + dbend(dentry) = new_bend;
2308     +
2309     +}
2310     +
2311     +/* set lower inode ptr and update bstart & bend if necessary */
2312     +static void __set_inode(struct dentry *upper, struct dentry *lower,
2313     + int bindex)
2314     +{
2315     + unionfs_set_lower_inode_idx(upper->d_inode, bindex,
2316     + igrab(lower->d_inode));
2317     + if (likely(ibstart(upper->d_inode) > bindex))
2318     + ibstart(upper->d_inode) = bindex;
2319     + if (likely(ibend(upper->d_inode) < bindex))
2320     + ibend(upper->d_inode) = bindex;
2321     +
2322     +}
2323     +
2324     +/* set lower dentry ptr and update bstart & bend if necessary */
2325     +static void __set_dentry(struct dentry *upper, struct dentry *lower,
2326     + int bindex)
2327     +{
2328     + unionfs_set_lower_dentry_idx(upper, bindex, lower);
2329     + if (likely(dbstart(upper) > bindex))
2330     + dbstart(upper) = bindex;
2331     + if (likely(dbend(upper) < bindex))
2332     + dbend(upper) = bindex;
2333     +}
2334     +
2335     +/*
2336     + * This function replicates the directory structure up-to given dentry
2337     + * in the bindex branch.
2338     + */
2339     +struct dentry *create_parents(struct inode *dir, struct dentry *dentry,
2340     + const char *name, int bindex)
2341     +{
2342     + int err;
2343     + struct dentry *child_dentry;
2344     + struct dentry *parent_dentry;
2345     + struct dentry *lower_parent_dentry = NULL;
2346     + struct dentry *lower_dentry = NULL;
2347     + const char *childname;
2348     + unsigned int childnamelen;
2349     + int nr_dentry;
2350     + int count = 0;
2351     + int old_bstart;
2352     + int old_bend;
2353     + struct dentry **path = NULL;
2354     + struct super_block *sb;
2355     +
2356     + verify_locked(dentry);
2357     +
2358     + err = is_robranch_super(dir->i_sb, bindex);
2359     + if (err) {
2360     + lower_dentry = ERR_PTR(err);
2361     + goto out;
2362     + }
2363     +
2364     + old_bstart = dbstart(dentry);
2365     + old_bend = dbend(dentry);
2366     +
2367     + lower_dentry = ERR_PTR(-ENOMEM);
2368     +
2369     + /* There is no sense allocating any less than the minimum. */
2370     + nr_dentry = 1;
2371     + path = kmalloc(nr_dentry * sizeof(struct dentry *), GFP_KERNEL);
2372     + if (unlikely(!path))
2373     + goto out;
2374     +
2375     + /* assume the negative dentry of unionfs as the parent dentry */
2376     + parent_dentry = dentry;
2377     +
2378     + /*
2379     + * This loop finds the first parent that exists in the given branch.
2380     + * We start building the directory structure from there. At the end
2381     + * of the loop, the following should hold:
2382     + * - child_dentry is the first nonexistent child
2383     + * - parent_dentry is the first existent parent
2384     + * - path[0] is the = deepest child
2385     + * - path[count] is the first child to create
2386     + */
2387     + do {
2388     + child_dentry = parent_dentry;
2389     +
2390     + /* find the parent directory dentry in unionfs */
2391     + parent_dentry = dget_parent(child_dentry);
2392     +
2393     + /* find out the lower_parent_dentry in the given branch */
2394     + lower_parent_dentry =
2395     + unionfs_lower_dentry_idx(parent_dentry, bindex);
2396     +
2397     + /* grow path table */
2398     + if (count == nr_dentry) {
2399     + void *p;
2400     +
2401     + nr_dentry *= 2;
2402     + p = krealloc(path, nr_dentry * sizeof(struct dentry *),
2403     + GFP_KERNEL);
2404     + if (unlikely(!p)) {
2405     + lower_dentry = ERR_PTR(-ENOMEM);
2406     + goto out;
2407     + }
2408     + path = p;
2409     + }
2410     +
2411     + /* store the child dentry */
2412     + path[count++] = child_dentry;
2413     + } while (!lower_parent_dentry);
2414     + count--;
2415     +
2416     + sb = dentry->d_sb;
2417     +
2418     + /*
2419     + * This code goes between the begin/end labels and basically
2420     + * emulates a while(child_dentry != dentry), only cleaner and
2421     + * shorter than what would be a much longer while loop.
2422     + */
2423     +begin:
2424     + /* get lower parent dir in the current branch */
2425     + lower_parent_dentry = unionfs_lower_dentry_idx(parent_dentry, bindex);
2426     + dput(parent_dentry);
2427     +
2428     + /* init the values to lookup */
2429     + childname = child_dentry->d_name.name;
2430     + childnamelen = child_dentry->d_name.len;
2431     +
2432     + if (child_dentry != dentry) {
2433     + /* lookup child in the underlying file system */
2434     + lower_dentry = lookup_lck_len(childname, lower_parent_dentry,
2435     + childnamelen);
2436     + if (IS_ERR(lower_dentry))
2437     + goto out;
2438     + } else {
2439     + /*
2440     + * Is the name a whiteout of the child name ? lookup the
2441     + * whiteout child in the underlying file system
2442     + */
2443     + lower_dentry = lookup_lck_len(name, lower_parent_dentry,
2444     + strlen(name));
2445     + if (IS_ERR(lower_dentry))
2446     + goto out;
2447     +
2448     + /* Replace the current dentry (if any) with the new one */
2449     + dput(unionfs_lower_dentry_idx(dentry, bindex));
2450     + unionfs_set_lower_dentry_idx(dentry, bindex,
2451     + lower_dentry);
2452     +
2453     + __cleanup_dentry(dentry, bindex, old_bstart, old_bend);
2454     + goto out;
2455     + }
2456     +
2457     + if (lower_dentry->d_inode) {
2458     + /*
2459     + * since this already exists we dput to avoid
2460     + * multiple references on the same dentry
2461     + */
2462     + dput(lower_dentry);
2463     + } else {
2464     + struct sioq_args args;
2465     +
2466     + /* it's a negative dentry, create a new dir */
2467     + lower_parent_dentry = lock_parent(lower_dentry);
2468     +
2469     + args.mkdir.parent = lower_parent_dentry->d_inode;
2470     + args.mkdir.dentry = lower_dentry;
2471     + args.mkdir.mode = child_dentry->d_inode->i_mode;
2472     +
2473     + run_sioq(__unionfs_mkdir, &args);
2474     + err = args.err;
2475     +
2476     + if (!err)
2477     + err = copyup_permissions(dir->i_sb, child_dentry,
2478     + lower_dentry);
2479     + unlock_dir(lower_parent_dentry);
2480     + if (err) {
2481     + dput(lower_dentry);
2482     + lower_dentry = ERR_PTR(err);
2483     + goto out;
2484     + }
2485     +
2486     + }
2487     +
2488     + __set_inode(child_dentry, lower_dentry, bindex);
2489     + __set_dentry(child_dentry, lower_dentry, bindex);
2490     + /*
2491     + * update times of this dentry, but also the parent, because if
2492     + * we changed, the parent may have changed too.
2493     + */
2494     + fsstack_copy_attr_times(parent_dentry->d_inode,
2495     + lower_parent_dentry->d_inode);
2496     + unionfs_copy_attr_times(child_dentry->d_inode);
2497     +
2498     + parent_dentry = child_dentry;
2499     + child_dentry = path[--count];
2500     + goto begin;
2501     +out:
2502     + /* cleanup any leftover locks from the do/while loop above */
2503     + if (IS_ERR(lower_dentry))
2504     + while (count)
2505     + dput(path[count--]);
2506     + kfree(path);
2507     + return lower_dentry;
2508     +}
2509     +
2510     +/*
2511     + * Post-copyup helper to ensure we have valid mnts: set lower mnt of
2512     + * dentry+parents to the first parent node that has an mnt.
2513     + */
2514     +void unionfs_postcopyup_setmnt(struct dentry *dentry)
2515     +{
2516     + struct dentry *parent, *hasone;
2517     + int bindex = dbstart(dentry);
2518     +
2519     + if (unionfs_lower_mnt_idx(dentry, bindex))
2520     + return;
2521     + hasone = dentry->d_parent;
2522     + /* this loop should stop at root dentry */
2523     + while (!unionfs_lower_mnt_idx(hasone, bindex))
2524     + hasone = hasone->d_parent;
2525     + parent = dentry;
2526     + while (!unionfs_lower_mnt_idx(parent, bindex)) {
2527     + unionfs_set_lower_mnt_idx(parent, bindex,
2528     + unionfs_mntget(hasone, bindex));
2529     + parent = parent->d_parent;
2530     + }
2531     +}
2532     +
2533     +/*
2534     + * Post-copyup helper to release all non-directory source objects of a
2535     + * copied-up file. Regular files should have only one lower object.
2536     + */
2537     +void unionfs_postcopyup_release(struct dentry *dentry)
2538     +{
2539     + int bstart, bend;
2540     +
2541     + BUG_ON(S_ISDIR(dentry->d_inode->i_mode));
2542     + bstart = dbstart(dentry);
2543     + bend = dbend(dentry);
2544     +
2545     + path_put_lowers(dentry, bstart + 1, bend, false);
2546     + iput_lowers(dentry->d_inode, bstart + 1, bend, false);
2547     +
2548     + dbend(dentry) = bstart;
2549     + ibend(dentry->d_inode) = ibstart(dentry->d_inode) = bstart;
2550     +}
2551     diff --git a/fs/unionfs/debug.c b/fs/unionfs/debug.c
2552     new file mode 100644
2553     index 0000000..acc44bd
2554     --- /dev/null
2555     +++ b/fs/unionfs/debug.c
2556     @@ -0,0 +1,533 @@
2557     +/*
2558     + * Copyright (c) 2003-2010 Erez Zadok
2559     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
2560     + * Copyright (c) 2003-2010 Stony Brook University
2561     + * Copyright (c) 2003-2010 The Research Foundation of SUNY
2562     + *
2563     + * This program is free software; you can redistribute it and/or modify
2564     + * it under the terms of the GNU General Public License version 2 as
2565     + * published by the Free Software Foundation.
2566     + */
2567     +
2568     +#include "union.h"
2569     +
2570     +/*
2571     + * Helper debugging functions for maintainers (and for users to report back
2572     + * useful information back to maintainers)
2573     + */
2574     +
2575     +/* it's always useful to know what part of the code called us */
2576     +#define PRINT_CALLER(fname, fxn, line) \
2577     + do { \
2578     + if (!printed_caller) { \
2579     + pr_debug("PC:%s:%s:%d\n", (fname), (fxn), (line)); \
2580     + printed_caller = 1; \
2581     + } \
2582     + } while (0)
2583     +
2584     +/*
2585     + * __unionfs_check_{inode,dentry,file} perform exhaustive sanity checking on
2586     + * the fan-out of various Unionfs objects. We check that no lower objects
2587     + * exist outside the start/end branch range; that all objects within are
2588     + * non-NULL (with some allowed exceptions); that for every lower file
2589     + * there's a lower dentry+inode; that the start/end ranges match for all
2590     + * corresponding lower objects; that open files/symlinks have only one lower
2591     + * objects, but directories can have several; and more.
2592     + */
2593     +void __unionfs_check_inode(const struct inode *inode,
2594     + const char *fname, const char *fxn, int line)
2595     +{
2596     + int bindex;
2597     + int istart, iend;
2598     + struct inode *lower_inode;
2599     + struct super_block *sb;
2600     + int printed_caller = 0;
2601     + void *poison_ptr;
2602     +
2603     + /* for inodes now */
2604     + BUG_ON(!inode);
2605     + sb = inode->i_sb;
2606     + istart = ibstart(inode);
2607     + iend = ibend(inode);
2608     + /* don't check inode if no lower branches */
2609     + if (istart < 0 && iend < 0)
2610     + return;
2611     + if (unlikely(istart > iend)) {
2612     + PRINT_CALLER(fname, fxn, line);
2613     + pr_debug(" Ci0: inode=%p istart/end=%d:%d\n",
2614     + inode, istart, iend);
2615     + }
2616     + if (unlikely((istart == -1 && iend != -1) ||
2617     + (istart != -1 && iend == -1))) {
2618     + PRINT_CALLER(fname, fxn, line);
2619     + pr_debug(" Ci1: inode=%p istart/end=%d:%d\n",
2620     + inode, istart, iend);
2621     + }
2622     + if (!S_ISDIR(inode->i_mode)) {
2623     + if (unlikely(iend != istart)) {
2624     + PRINT_CALLER(fname, fxn, line);
2625     + pr_debug(" Ci2: inode=%p istart=%d iend=%d\n",
2626     + inode, istart, iend);
2627     + }
2628     + }
2629     +
2630     + for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2631     + if (unlikely(!UNIONFS_I(inode))) {
2632     + PRINT_CALLER(fname, fxn, line);
2633     + pr_debug(" Ci3: no inode_info %p\n", inode);
2634     + return;
2635     + }
2636     + if (unlikely(!UNIONFS_I(inode)->lower_inodes)) {
2637     + PRINT_CALLER(fname, fxn, line);
2638     + pr_debug(" Ci4: no lower_inodes %p\n", inode);
2639     + return;
2640     + }
2641     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2642     + if (lower_inode) {
2643     + memset(&poison_ptr, POISON_INUSE, sizeof(void *));
2644     + if (unlikely(bindex < istart || bindex > iend)) {
2645     + PRINT_CALLER(fname, fxn, line);
2646     + pr_debug(" Ci5: inode/linode=%p:%p bindex=%d "
2647     + "istart/end=%d:%d\n", inode,
2648     + lower_inode, bindex, istart, iend);
2649     + } else if (unlikely(lower_inode == poison_ptr)) {
2650     + /* freed inode! */
2651     + PRINT_CALLER(fname, fxn, line);
2652     + pr_debug(" Ci6: inode/linode=%p:%p bindex=%d "
2653     + "istart/end=%d:%d\n", inode,
2654     + lower_inode, bindex, istart, iend);
2655     + }
2656     + continue;
2657     + }
2658     + /* if we get here, then lower_inode == NULL */
2659     + if (bindex < istart || bindex > iend)
2660     + continue;
2661     + /*
2662     + * directories can have NULL lower inodes in b/t start/end,
2663     + * but NOT if at the start/end range.
2664     + */
2665     + if (unlikely(S_ISDIR(inode->i_mode) &&
2666     + bindex > istart && bindex < iend))
2667     + continue;
2668     + PRINT_CALLER(fname, fxn, line);
2669     + pr_debug(" Ci7: inode/linode=%p:%p "
2670     + "bindex=%d istart/end=%d:%d\n",
2671     + inode, lower_inode, bindex, istart, iend);
2672     + }
2673     +}
2674     +
2675     +void __unionfs_check_dentry(const struct dentry *dentry,
2676     + const char *fname, const char *fxn, int line)
2677     +{
2678     + int bindex;
2679     + int dstart, dend, istart, iend;
2680     + struct dentry *lower_dentry;
2681     + struct inode *inode, *lower_inode;
2682     + struct super_block *sb;
2683     + struct vfsmount *lower_mnt;
2684     + int printed_caller = 0;
2685     + void *poison_ptr;
2686     +
2687     + BUG_ON(!dentry);
2688     + sb = dentry->d_sb;
2689     + inode = dentry->d_inode;
2690     + dstart = dbstart(dentry);
2691     + dend = dbend(dentry);
2692     + /* don't check dentry/mnt if no lower branches */
2693     + if (dstart < 0 && dend < 0)
2694     + goto check_inode;
2695     + BUG_ON(dstart > dend);
2696     +
2697     + if (unlikely((dstart == -1 && dend != -1) ||
2698     + (dstart != -1 && dend == -1))) {
2699     + PRINT_CALLER(fname, fxn, line);
2700     + pr_debug(" CD0: dentry=%p dstart/end=%d:%d\n",
2701     + dentry, dstart, dend);
2702     + }
2703     + /*
2704     + * check for NULL dentries inside the start/end range, or
2705     + * non-NULL dentries outside the start/end range.
2706     + */
2707     + for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2708     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
2709     + if (lower_dentry) {
2710     + if (unlikely(bindex < dstart || bindex > dend)) {
2711     + PRINT_CALLER(fname, fxn, line);
2712     + pr_debug(" CD1: dentry/lower=%p:%p(%p) "
2713     + "bindex=%d dstart/end=%d:%d\n",
2714     + dentry, lower_dentry,
2715     + (lower_dentry ? lower_dentry->d_inode :
2716     + (void *) -1L),
2717     + bindex, dstart, dend);
2718     + }
2719     + } else { /* lower_dentry == NULL */
2720     + if (bindex < dstart || bindex > dend)
2721     + continue;
2722     + /*
2723     + * Directories can have NULL lower inodes in b/t
2724     + * start/end, but NOT if at the start/end range.
2725     + * Ignore this rule, however, if this is a NULL
2726     + * dentry or a deleted dentry.
2727     + */
2728     + if (unlikely(!d_deleted((struct dentry *) dentry) &&
2729     + inode &&
2730     + !(inode && S_ISDIR(inode->i_mode) &&
2731     + bindex > dstart && bindex < dend))) {
2732     + PRINT_CALLER(fname, fxn, line);
2733     + pr_debug(" CD2: dentry/lower=%p:%p(%p) "
2734     + "bindex=%d dstart/end=%d:%d\n",
2735     + dentry, lower_dentry,
2736     + (lower_dentry ?
2737     + lower_dentry->d_inode :
2738     + (void *) -1L),
2739     + bindex, dstart, dend);
2740     + }
2741     + }
2742     + }
2743     +
2744     + /* check for vfsmounts same as for dentries */
2745     + for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2746     + lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
2747     + if (lower_mnt) {
2748     + if (unlikely(bindex < dstart || bindex > dend)) {
2749     + PRINT_CALLER(fname, fxn, line);
2750     + pr_debug(" CM0: dentry/lmnt=%p:%p bindex=%d "
2751     + "dstart/end=%d:%d\n", dentry,
2752     + lower_mnt, bindex, dstart, dend);
2753     + }
2754     + } else { /* lower_mnt == NULL */
2755     + if (bindex < dstart || bindex > dend)
2756     + continue;
2757     + /*
2758     + * Directories can have NULL lower inodes in b/t
2759     + * start/end, but NOT if at the start/end range.
2760     + * Ignore this rule, however, if this is a NULL
2761     + * dentry.
2762     + */
2763     + if (unlikely(inode &&
2764     + !(inode && S_ISDIR(inode->i_mode) &&
2765     + bindex > dstart && bindex < dend))) {
2766     + PRINT_CALLER(fname, fxn, line);
2767     + pr_debug(" CM1: dentry/lmnt=%p:%p "
2768     + "bindex=%d dstart/end=%d:%d\n",
2769     + dentry, lower_mnt, bindex,
2770     + dstart, dend);
2771     + }
2772     + }
2773     + }
2774     +
2775     +check_inode:
2776     + /* for inodes now */
2777     + if (!inode)
2778     + return;
2779     + istart = ibstart(inode);
2780     + iend = ibend(inode);
2781     + /* don't check inode if no lower branches */
2782     + if (istart < 0 && iend < 0)
2783     + return;
2784     + BUG_ON(istart > iend);
2785     + if (unlikely((istart == -1 && iend != -1) ||
2786     + (istart != -1 && iend == -1))) {
2787     + PRINT_CALLER(fname, fxn, line);
2788     + pr_debug(" CI0: dentry/inode=%p:%p istart/end=%d:%d\n",
2789     + dentry, inode, istart, iend);
2790     + }
2791     + if (unlikely(istart != dstart)) {
2792     + PRINT_CALLER(fname, fxn, line);
2793     + pr_debug(" CI1: dentry/inode=%p:%p istart=%d dstart=%d\n",
2794     + dentry, inode, istart, dstart);
2795     + }
2796     + if (unlikely(iend != dend)) {
2797     + PRINT_CALLER(fname, fxn, line);
2798     + pr_debug(" CI2: dentry/inode=%p:%p iend=%d dend=%d\n",
2799     + dentry, inode, iend, dend);
2800     + }
2801     +
2802     + if (!S_ISDIR(inode->i_mode)) {
2803     + if (unlikely(dend != dstart)) {
2804     + PRINT_CALLER(fname, fxn, line);
2805     + pr_debug(" CI3: dentry/inode=%p:%p dstart=%d dend=%d\n",
2806     + dentry, inode, dstart, dend);
2807     + }
2808     + if (unlikely(iend != istart)) {
2809     + PRINT_CALLER(fname, fxn, line);
2810     + pr_debug(" CI4: dentry/inode=%p:%p istart=%d iend=%d\n",
2811     + dentry, inode, istart, iend);
2812     + }
2813     + }
2814     +
2815     + for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2816     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2817     + if (lower_inode) {
2818     + memset(&poison_ptr, POISON_INUSE, sizeof(void *));
2819     + if (unlikely(bindex < istart || bindex > iend)) {
2820     + PRINT_CALLER(fname, fxn, line);
2821     + pr_debug(" CI5: dentry/linode=%p:%p bindex=%d "
2822     + "istart/end=%d:%d\n", dentry,
2823     + lower_inode, bindex, istart, iend);
2824     + } else if (unlikely(lower_inode == poison_ptr)) {
2825     + /* freed inode! */
2826     + PRINT_CALLER(fname, fxn, line);
2827     + pr_debug(" CI6: dentry/linode=%p:%p bindex=%d "
2828     + "istart/end=%d:%d\n", dentry,
2829     + lower_inode, bindex, istart, iend);
2830     + }
2831     + continue;
2832     + }
2833     + /* if we get here, then lower_inode == NULL */
2834     + if (bindex < istart || bindex > iend)
2835     + continue;
2836     + /*
2837     + * directories can have NULL lower inodes in b/t start/end,
2838     + * but NOT if at the start/end range.
2839     + */
2840     + if (unlikely(S_ISDIR(inode->i_mode) &&
2841     + bindex > istart && bindex < iend))
2842     + continue;
2843     + PRINT_CALLER(fname, fxn, line);
2844     + pr_debug(" CI7: dentry/linode=%p:%p "
2845     + "bindex=%d istart/end=%d:%d\n",
2846     + dentry, lower_inode, bindex, istart, iend);
2847     + }
2848     +
2849     + /*
2850     + * If it's a directory, then intermediate objects b/t start/end can
2851     + * be NULL. But, check that all three are NULL: lower dentry, mnt,
2852     + * and inode.
2853     + */
2854     + if (dstart >= 0 && dend >= 0 && S_ISDIR(inode->i_mode))
2855     + for (bindex = dstart+1; bindex < dend; bindex++) {
2856     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2857     + lower_dentry = unionfs_lower_dentry_idx(dentry,
2858     + bindex);
2859     + lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
2860     + if (unlikely(!((lower_inode && lower_dentry &&
2861     + lower_mnt) ||
2862     + (!lower_inode &&
2863     + !lower_dentry && !lower_mnt)))) {
2864     + PRINT_CALLER(fname, fxn, line);
2865     + pr_debug(" Cx: lmnt/ldentry/linode=%p:%p:%p "
2866     + "bindex=%d dstart/end=%d:%d\n",
2867     + lower_mnt, lower_dentry, lower_inode,
2868     + bindex, dstart, dend);
2869     + }
2870     + }
2871     + /* check if lower inode is newer than upper one (it shouldn't) */
2872     + if (unlikely(is_newer_lower(dentry) && !is_negative_lower(dentry))) {
2873     + PRINT_CALLER(fname, fxn, line);
2874     + for (bindex = ibstart(inode); bindex <= ibend(inode);
2875     + bindex++) {
2876     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
2877     + if (unlikely(!lower_inode))
2878     + continue;
2879     + pr_debug(" CI8: bindex=%d mtime/lmtime=%lu.%lu/%lu.%lu "
2880     + "ctime/lctime=%lu.%lu/%lu.%lu\n",
2881     + bindex,
2882     + inode->i_mtime.tv_sec,
2883     + inode->i_mtime.tv_nsec,
2884     + lower_inode->i_mtime.tv_sec,
2885     + lower_inode->i_mtime.tv_nsec,
2886     + inode->i_ctime.tv_sec,
2887     + inode->i_ctime.tv_nsec,
2888     + lower_inode->i_ctime.tv_sec,
2889     + lower_inode->i_ctime.tv_nsec);
2890     + }
2891     + }
2892     +}
2893     +
2894     +void __unionfs_check_file(const struct file *file,
2895     + const char *fname, const char *fxn, int line)
2896     +{
2897     + int bindex;
2898     + int dstart, dend, fstart, fend;
2899     + struct dentry *dentry;
2900     + struct file *lower_file;
2901     + struct inode *inode;
2902     + struct super_block *sb;
2903     + int printed_caller = 0;
2904     +
2905     + BUG_ON(!file);
2906     + dentry = file->f_path.dentry;
2907     + sb = dentry->d_sb;
2908     + dstart = dbstart(dentry);
2909     + dend = dbend(dentry);
2910     + BUG_ON(dstart > dend);
2911     + fstart = fbstart(file);
2912     + fend = fbend(file);
2913     + BUG_ON(fstart > fend);
2914     +
2915     + if (unlikely((fstart == -1 && fend != -1) ||
2916     + (fstart != -1 && fend == -1))) {
2917     + PRINT_CALLER(fname, fxn, line);
2918     + pr_debug(" CF0: file/dentry=%p:%p fstart/end=%d:%d\n",
2919     + file, dentry, fstart, fend);
2920     + }
2921     + if (unlikely(fstart != dstart)) {
2922     + PRINT_CALLER(fname, fxn, line);
2923     + pr_debug(" CF1: file/dentry=%p:%p fstart=%d dstart=%d\n",
2924     + file, dentry, fstart, dstart);
2925     + }
2926     + if (unlikely(fend != dend)) {
2927     + PRINT_CALLER(fname, fxn, line);
2928     + pr_debug(" CF2: file/dentry=%p:%p fend=%d dend=%d\n",
2929     + file, dentry, fend, dend);
2930     + }
2931     + inode = dentry->d_inode;
2932     + if (!S_ISDIR(inode->i_mode)) {
2933     + if (unlikely(fend != fstart)) {
2934     + PRINT_CALLER(fname, fxn, line);
2935     + pr_debug(" CF3: file/inode=%p:%p fstart=%d fend=%d\n",
2936     + file, inode, fstart, fend);
2937     + }
2938     + if (unlikely(dend != dstart)) {
2939     + PRINT_CALLER(fname, fxn, line);
2940     + pr_debug(" CF4: file/dentry=%p:%p dstart=%d dend=%d\n",
2941     + file, dentry, dstart, dend);
2942     + }
2943     + }
2944     +
2945     + /*
2946     + * check for NULL dentries inside the start/end range, or
2947     + * non-NULL dentries outside the start/end range.
2948     + */
2949     + for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2950     + lower_file = unionfs_lower_file_idx(file, bindex);
2951     + if (lower_file) {
2952     + if (unlikely(bindex < fstart || bindex > fend)) {
2953     + PRINT_CALLER(fname, fxn, line);
2954     + pr_debug(" CF5: file/lower=%p:%p bindex=%d "
2955     + "fstart/end=%d:%d\n", file,
2956     + lower_file, bindex, fstart, fend);
2957     + }
2958     + } else { /* lower_file == NULL */
2959     + if (bindex >= fstart && bindex <= fend) {
2960     + /*
2961     + * directories can have NULL lower inodes in
2962     + * b/t start/end, but NOT if at the
2963     + * start/end range.
2964     + */
2965     + if (unlikely(!(S_ISDIR(inode->i_mode) &&
2966     + bindex > fstart &&
2967     + bindex < fend))) {
2968     + PRINT_CALLER(fname, fxn, line);
2969     + pr_debug(" CF6: file/lower=%p:%p "
2970     + "bindex=%d fstart/end=%d:%d\n",
2971     + file, lower_file, bindex,
2972     + fstart, fend);
2973     + }
2974     + }
2975     + }
2976     + }
2977     +
2978     + __unionfs_check_dentry(dentry, fname, fxn, line);
2979     +}
2980     +
2981     +void __unionfs_check_nd(const struct nameidata *nd,
2982     + const char *fname, const char *fxn, int line)
2983     +{
2984     + struct file *file;
2985     + int printed_caller = 0;
2986     +
2987     + if (unlikely(!nd))
2988     + return;
2989     + if (nd->flags & LOOKUP_OPEN) {
2990     + file = nd->intent.open.file;
2991     + if (unlikely(file->f_path.dentry &&
2992     + strcmp(file->f_path.dentry->d_sb->s_type->name,
2993     + UNIONFS_NAME))) {
2994     + PRINT_CALLER(fname, fxn, line);
2995     + pr_debug(" CND1: lower_file of type %s\n",
2996     + file->f_path.dentry->d_sb->s_type->name);
2997     + BUG();
2998     + }
2999     + }
3000     +}
3001     +
3002     +/* useful to track vfsmount leaks that could cause EBUSY on unmount */
3003     +void __show_branch_counts(const struct super_block *sb,
3004     + const char *file, const char *fxn, int line)
3005     +{
3006     + int i;
3007     + struct vfsmount *mnt;
3008     +
3009     + pr_debug("BC:");
3010     + for (i = 0; i < sbmax(sb); i++) {
3011     + if (likely(sb->s_root))
3012     + mnt = UNIONFS_D(sb->s_root)->lower_paths[i].mnt;
3013     + else
3014     + mnt = NULL;
3015     + printk(KERN_CONT "%d:",
3016     + (mnt ? atomic_read(&mnt->mnt_count) : -99));
3017     + }
3018     + printk(KERN_CONT "%s:%s:%d\n", file, fxn, line);
3019     +}
3020     +
3021     +void __show_inode_times(const struct inode *inode,
3022     + const char *file, const char *fxn, int line)
3023     +{
3024     + struct inode *lower_inode;
3025     + int bindex;
3026     +
3027     + for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
3028     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
3029     + if (unlikely(!lower_inode))
3030     + continue;
3031     + pr_debug("IT(%lu:%d): %s:%s:%d "
3032     + "um=%lu/%lu lm=%lu/%lu uc=%lu/%lu lc=%lu/%lu\n",
3033     + inode->i_ino, bindex,
3034     + file, fxn, line,
3035     + inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
3036     + lower_inode->i_mtime.tv_sec,
3037     + lower_inode->i_mtime.tv_nsec,
3038     + inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
3039     + lower_inode->i_ctime.tv_sec,
3040     + lower_inode->i_ctime.tv_nsec);
3041     + }
3042     +}
3043     +
3044     +void __show_dinode_times(const struct dentry *dentry,
3045     + const char *file, const char *fxn, int line)
3046     +{
3047     + struct inode *inode = dentry->d_inode;
3048     + struct inode *lower_inode;
3049     + int bindex;
3050     +
3051     + for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
3052     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
3053     + if (!lower_inode)
3054     + continue;
3055     + pr_debug("DT(%s:%lu:%d): %s:%s:%d "
3056     + "um=%lu/%lu lm=%lu/%lu uc=%lu/%lu lc=%lu/%lu\n",
3057     + dentry->d_name.name, inode->i_ino, bindex,
3058     + file, fxn, line,
3059     + inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
3060     + lower_inode->i_mtime.tv_sec,
3061     + lower_inode->i_mtime.tv_nsec,
3062     + inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
3063     + lower_inode->i_ctime.tv_sec,
3064     + lower_inode->i_ctime.tv_nsec);
3065     + }
3066     +}
3067     +
3068     +void __show_inode_counts(const struct inode *inode,
3069     + const char *file, const char *fxn, int line)
3070     +{
3071     + struct inode *lower_inode;
3072     + int bindex;
3073     +
3074     + if (unlikely(!inode)) {
3075     + pr_debug("SiC: Null inode\n");
3076     + return;
3077     + }
3078     + for (bindex = sbstart(inode->i_sb); bindex <= sbend(inode->i_sb);
3079     + bindex++) {
3080     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
3081     + if (unlikely(!lower_inode))
3082     + continue;
3083     + pr_debug("SIC(%lu:%d:%d): lc=%d %s:%s:%d\n",
3084     + inode->i_ino, bindex,
3085     + atomic_read(&(inode)->i_count),
3086     + atomic_read(&(lower_inode)->i_count),
3087     + file, fxn, line);
3088     + }
3089     +}
3090     diff --git a/fs/unionfs/dentry.c b/fs/unionfs/dentry.c
3091     new file mode 100644
3092     index 0000000..a0c3bba
3093     --- /dev/null
3094     +++ b/fs/unionfs/dentry.c
3095     @@ -0,0 +1,397 @@
3096     +/*
3097     + * Copyright (c) 2003-2010 Erez Zadok
3098     + * Copyright (c) 2003-2006 Charles P. Wright
3099     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
3100     + * Copyright (c) 2005-2006 Junjiro Okajima
3101     + * Copyright (c) 2005 Arun M. Krishnakumar
3102     + * Copyright (c) 2004-2006 David P. Quigley
3103     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
3104     + * Copyright (c) 2003 Puja Gupta
3105     + * Copyright (c) 2003 Harikesavan Krishnan
3106     + * Copyright (c) 2003-2010 Stony Brook University
3107     + * Copyright (c) 2003-2010 The Research Foundation of SUNY
3108     + *
3109     + * This program is free software; you can redistribute it and/or modify
3110     + * it under the terms of the GNU General Public License version 2 as
3111     + * published by the Free Software Foundation.
3112     + */
3113     +
3114     +#include "union.h"
3115     +
3116     +bool is_negative_lower(const struct dentry *dentry)
3117     +{
3118     + int bindex;
3119     + struct dentry *lower_dentry;
3120     +
3121     + BUG_ON(!dentry);
3122     + /* cache coherency: check if file was deleted on lower branch */
3123     + if (dbstart(dentry) < 0)
3124     + return true;
3125     + for (bindex = dbstart(dentry); bindex <= dbend(dentry); bindex++) {
3126     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3127     + /* unhashed (i.e., unlinked) lower dentries don't count */
3128     + if (lower_dentry && lower_dentry->d_inode &&
3129     + !d_deleted(lower_dentry) &&
3130     + !(lower_dentry->d_flags & DCACHE_NFSFS_RENAMED))
3131     + return false;
3132     + }
3133     + return true;
3134     +}
3135     +
3136     +static inline void __dput_lowers(struct dentry *dentry, int start, int end)
3137     +{
3138     + struct dentry *lower_dentry;
3139     + int bindex;
3140     +
3141     + if (start < 0)
3142     + return;
3143     + for (bindex = start; bindex <= end; bindex++) {
3144     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3145     + if (!lower_dentry)
3146     + continue;
3147     + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
3148     + dput(lower_dentry);
3149     + }
3150     +}
3151     +
3152     +/*
3153     + * Purge and invalidate as many data pages of a unionfs inode. This is
3154     + * called when the lower inode has changed, and we want to force processes
3155     + * to re-get the new data.
3156     + */
3157     +static inline void purge_inode_data(struct inode *inode)
3158     +{
3159     + /* remove all non-private mappings */
3160     + unmap_mapping_range(inode->i_mapping, 0, 0, 0);
3161     + /* invalidate as many pages as possible */
3162     + invalidate_mapping_pages(inode->i_mapping, 0, -1);
3163     + /*
3164     + * Don't try to truncate_inode_pages here, because this could lead
3165     + * to a deadlock between some of address_space ops and dentry
3166     + * revalidation: the address space op is invoked with a lock on our
3167     + * own page, and truncate_inode_pages will block on locked pages.
3168     + */
3169     +}
3170     +
3171     +/*
3172     + * Revalidate a single file/symlink/special dentry. Assume that info nodes
3173     + * of the @dentry and its @parent are locked. Assume parent is valid,
3174     + * otherwise return false (and let's hope the VFS will try to re-lookup this
3175     + * dentry). Returns true if valid, false otherwise.
3176     + */
3177     +bool __unionfs_d_revalidate(struct dentry *dentry, struct dentry *parent,
3178     + bool willwrite)
3179     +{
3180     + bool valid = true; /* default is valid */
3181     + struct dentry *lower_dentry;
3182     + struct dentry *result;
3183     + int bindex, bstart, bend;
3184     + int sbgen, dgen, pdgen;
3185     + int positive = 0;
3186     + int interpose_flag;
3187     +
3188     + verify_locked(dentry);
3189     + verify_locked(parent);
3190     +
3191     + /* if the dentry is unhashed, do NOT revalidate */
3192     + if (d_deleted(dentry))
3193     + goto out;
3194     +
3195     + dgen = atomic_read(&UNIONFS_D(dentry)->generation);
3196     +
3197     + if (is_newer_lower(dentry)) {
3198     + /* root dentry is always valid */
3199     + if (IS_ROOT(dentry)) {
3200     + unionfs_copy_attr_times(dentry->d_inode);
3201     + } else {
3202     + /*
3203     + * reset generation number to zero, guaranteed to be
3204     + * "old"
3205     + */
3206     + dgen = 0;
3207     + atomic_set(&UNIONFS_D(dentry)->generation, dgen);
3208     + }
3209     + if (!willwrite)
3210     + purge_inode_data(dentry->d_inode);
3211     + }
3212     +
3213     + sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
3214     +
3215     + BUG_ON(dbstart(dentry) == -1);
3216     + if (dentry->d_inode)
3217     + positive = 1;
3218     +
3219     + /* if our dentry is valid, then validate all lower ones */
3220     + if (sbgen == dgen)
3221     + goto validate_lowers;
3222     +
3223     + /* The root entry should always be valid */
3224     + BUG_ON(IS_ROOT(dentry));
3225     +
3226     + /* We can't work correctly if our parent isn't valid. */
3227     + pdgen = atomic_read(&UNIONFS_D(parent)->generation);
3228     +
3229     + /* Free the pointers for our inodes and this dentry. */
3230     + path_put_lowers_all(dentry, false);
3231     +
3232     + interpose_flag = INTERPOSE_REVAL_NEG;
3233     + if (positive) {
3234     + interpose_flag = INTERPOSE_REVAL;
3235     + iput_lowers_all(dentry->d_inode, true);
3236     + }
3237     +
3238     + if (realloc_dentry_private_data(dentry) != 0) {
3239     + valid = false;
3240     + goto out;
3241     + }
3242     +
3243     + result = unionfs_lookup_full(dentry, parent, interpose_flag);
3244     + if (result) {
3245     + if (IS_ERR(result)) {
3246     + valid = false;
3247     + goto out;
3248     + }
3249     + /*
3250     + * current unionfs_lookup_backend() doesn't return
3251     + * a valid dentry
3252     + */
3253     + dput(dentry);
3254     + dentry = result;
3255     + }
3256     +
3257     + if (unlikely(positive && is_negative_lower(dentry))) {
3258     + /* call make_bad_inode here ? */
3259     + d_drop(dentry);
3260     + valid = false;
3261     + goto out;
3262     + }
3263     +
3264     + /*
3265     + * if we got here then we have revalidated our dentry and all lower
3266     + * ones, so we can return safely.
3267     + */
3268     + if (!valid) /* lower dentry revalidation failed */
3269     + goto out;
3270     +
3271     + /*
3272     + * If the parent's gen no. matches the superblock's gen no., then
3273     + * we can update our denty's gen no. If they didn't match, then it
3274     + * was OK to revalidate this dentry with a stale parent, but we'll
3275     + * purposely not update our dentry's gen no. (so it can be redone);
3276     + * and, we'll mark our parent dentry as invalid so it'll force it
3277     + * (and our dentry) to be revalidated.
3278     + */
3279     + if (pdgen == sbgen)
3280     + atomic_set(&UNIONFS_D(dentry)->generation, sbgen);
3281     + goto out;
3282     +
3283     +validate_lowers:
3284     +
3285     + /* The revalidation must occur across all branches */
3286     + bstart = dbstart(dentry);
3287     + bend = dbend(dentry);
3288     + BUG_ON(bstart == -1);
3289     + for (bindex = bstart; bindex <= bend; bindex++) {
3290     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3291     + if (!lower_dentry || !lower_dentry->d_op
3292     + || !lower_dentry->d_op->d_revalidate)
3293     + continue;
3294     + /*
3295     + * Don't pass nameidata to lower file system, because we
3296     + * don't want an arbitrary lower file being opened or
3297     + * returned to us: it may be useless to us because of the
3298     + * fanout nature of unionfs (cf. file/directory open-file
3299     + * invariants). We will open lower files as and when needed
3300     + * later on.
3301     + */
3302     + if (!lower_dentry->d_op->d_revalidate(lower_dentry, NULL))
3303     + valid = false;
3304     + }
3305     +
3306     + if (!dentry->d_inode ||
3307     + ibstart(dentry->d_inode) < 0 ||
3308     + ibend(dentry->d_inode) < 0) {
3309     + valid = false;
3310     + goto out;
3311     + }
3312     +
3313     + if (valid) {
3314     + /*
3315     + * If we get here, and we copy the meta-data from the lower
3316     + * inode to our inode, then it is vital that we have already
3317     + * purged all unionfs-level file data. We do that in the
3318     + * caller (__unionfs_d_revalidate) by calling
3319     + * purge_inode_data.
3320     + */
3321     + unionfs_copy_attr_all(dentry->d_inode,
3322     + unionfs_lower_inode(dentry->d_inode));
3323     + fsstack_copy_inode_size(dentry->d_inode,
3324     + unionfs_lower_inode(dentry->d_inode));
3325     + }
3326     +
3327     +out:
3328     + return valid;
3329     +}
3330     +
3331     +/*
3332     + * Determine if the lower inode objects have changed from below the unionfs
3333     + * inode. Return true if changed, false otherwise.
3334     + *
3335     + * We check if the mtime or ctime have changed. However, the inode times
3336     + * can be changed by anyone without much protection, including
3337     + * asynchronously. This can sometimes cause unionfs to find that the lower
3338     + * file system doesn't change its inode times quick enough, resulting in a
3339     + * false positive indication (which is harmless, it just makes unionfs do
3340     + * extra work in re-validating the objects). To minimize the chances of
3341     + * these situations, we still consider such small time changes valid, but we
3342     + * don't print debugging messages unless the time changes are greater than
3343     + * UNIONFS_MIN_CC_TIME (which defaults to 3 seconds, as with NFS's acregmin)
3344     + * because significant changes are more likely due to users manually
3345     + * touching lower files.
3346     + */
3347     +bool is_newer_lower(const struct dentry *dentry)
3348     +{
3349     + int bindex;
3350     + struct inode *inode;
3351     + struct inode *lower_inode;
3352     +
3353     + /* ignore if we're called on semi-initialized dentries/inodes */
3354     + if (!dentry || !UNIONFS_D(dentry))
3355     + return false;
3356     + inode = dentry->d_inode;
3357     + if (!inode || !UNIONFS_I(inode)->lower_inodes ||
3358     + ibstart(inode) < 0 || ibend(inode) < 0)
3359     + return false;
3360     +
3361     + for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
3362     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
3363     + if (!lower_inode)
3364     + continue;
3365     +
3366     + /* check if mtime/ctime have changed */
3367     + if (unlikely(timespec_compare(&inode->i_mtime,
3368     + &lower_inode->i_mtime) < 0)) {
3369     + if ((lower_inode->i_mtime.tv_sec -
3370     + inode->i_mtime.tv_sec) > UNIONFS_MIN_CC_TIME) {
3371     + pr_info("unionfs: new lower inode mtime "
3372     + "(bindex=%d, name=%s)\n", bindex,
3373     + dentry->d_name.name);
3374     + show_dinode_times(dentry);
3375     + }
3376     + return true;
3377     + }
3378     + if (unlikely(timespec_compare(&inode->i_ctime,
3379     + &lower_inode->i_ctime) < 0)) {
3380     + if ((lower_inode->i_ctime.tv_sec -
3381     + inode->i_ctime.tv_sec) > UNIONFS_MIN_CC_TIME) {
3382     + pr_info("unionfs: new lower inode ctime "
3383     + "(bindex=%d, name=%s)\n", bindex,
3384     + dentry->d_name.name);
3385     + show_dinode_times(dentry);
3386     + }
3387     + return true;
3388     + }
3389     + }
3390     +
3391     + /*
3392     + * Last check: if this is a positive dentry, but somehow all lower
3393     + * dentries are negative or unhashed, then this dentry needs to be
3394     + * revalidated, because someone probably deleted the objects from
3395     + * the lower branches directly.
3396     + */
3397     + if (is_negative_lower(dentry))
3398     + return true;
3399     +
3400     + return false; /* default: lower is not newer */
3401     +}
3402     +
3403     +static int unionfs_d_revalidate(struct dentry *dentry,
3404     + struct nameidata *nd_unused)
3405     +{
3406     + bool valid = true;
3407     + int err = 1; /* 1 means valid for the VFS */
3408     + struct dentry *parent;
3409     +
3410     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
3411     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
3412     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3413     +
3414     + valid = __unionfs_d_revalidate(dentry, parent, false);
3415     + if (valid) {
3416     + unionfs_postcopyup_setmnt(dentry);
3417     + unionfs_check_dentry(dentry);
3418     + } else {
3419     + d_drop(dentry);
3420     + err = valid;
3421     + }
3422     + unionfs_unlock_dentry(dentry);
3423     + unionfs_unlock_parent(dentry, parent);
3424     + unionfs_read_unlock(dentry->d_sb);
3425     +
3426     + return err;
3427     +}
3428     +
3429     +static void unionfs_d_release(struct dentry *dentry)
3430     +{
3431     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
3432     + if (unlikely(!UNIONFS_D(dentry)))
3433     + goto out; /* skip if no lower branches */
3434     + /* must lock our branch configuration here */
3435     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3436     +
3437     + unionfs_check_dentry(dentry);
3438     + /* this could be a negative dentry, so check first */
3439     + if (dbstart(dentry) < 0) {
3440     + unionfs_unlock_dentry(dentry);
3441     + goto out; /* due to a (normal) failed lookup */
3442     + }
3443     +
3444     + /* Release all the lower dentries */
3445     + path_put_lowers_all(dentry, true);
3446     +
3447     + unionfs_unlock_dentry(dentry);
3448     +
3449     +out:
3450     + free_dentry_private_data(dentry);
3451     + unionfs_read_unlock(dentry->d_sb);
3452     + return;
3453     +}
3454     +
3455     +/*
3456     + * Called when we're removing the last reference to our dentry. So we
3457     + * should drop all lower references too.
3458     + */
3459     +static void unionfs_d_iput(struct dentry *dentry, struct inode *inode)
3460     +{
3461     + int rc;
3462     +
3463     + BUG_ON(!dentry);
3464     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
3465     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3466     +
3467     + if (!UNIONFS_D(dentry) || dbstart(dentry) < 0)
3468     + goto drop_lower_inodes;
3469     + path_put_lowers_all(dentry, false);
3470     +
3471     +drop_lower_inodes:
3472     + rc = atomic_read(&inode->i_count);
3473     + if (rc == 1 && inode->i_nlink == 1 && ibstart(inode) >= 0) {
3474     + /* see Documentation/filesystems/unionfs/issues.txt */
3475     + lockdep_off();
3476     + iput(unionfs_lower_inode(inode));
3477     + lockdep_on();
3478     + unionfs_set_lower_inode(inode, NULL);
3479     + /* XXX: may need to set start/end to -1? */
3480     + }
3481     +
3482     + iput(inode);
3483     +
3484     + unionfs_unlock_dentry(dentry);
3485     + unionfs_read_unlock(dentry->d_sb);
3486     +}
3487     +
3488     +struct dentry_operations unionfs_dops = {
3489     + .d_revalidate = unionfs_d_revalidate,
3490     + .d_release = unionfs_d_release,
3491     + .d_iput = unionfs_d_iput,
3492     +};
3493     diff --git a/fs/unionfs/dirfops.c b/fs/unionfs/dirfops.c
3494     new file mode 100644
3495     index 0000000..7da0ff0
3496     --- /dev/null
3497     +++ b/fs/unionfs/dirfops.c
3498     @@ -0,0 +1,302 @@
3499     +/*
3500     + * Copyright (c) 2003-2010 Erez Zadok
3501     + * Copyright (c) 2003-2006 Charles P. Wright
3502     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
3503     + * Copyright (c) 2005-2006 Junjiro Okajima
3504     + * Copyright (c) 2005 Arun M. Krishnakumar
3505     + * Copyright (c) 2004-2006 David P. Quigley
3506     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
3507     + * Copyright (c) 2003 Puja Gupta
3508     + * Copyright (c) 2003 Harikesavan Krishnan
3509     + * Copyright (c) 2003-2010 Stony Brook University
3510     + * Copyright (c) 2003-2010 The Research Foundation of SUNY
3511     + *
3512     + * This program is free software; you can redistribute it and/or modify
3513     + * it under the terms of the GNU General Public License version 2 as
3514     + * published by the Free Software Foundation.
3515     + */
3516     +
3517     +#include "union.h"
3518     +
3519     +/* Make sure our rdstate is playing by the rules. */
3520     +static void verify_rdstate_offset(struct unionfs_dir_state *rdstate)
3521     +{
3522     + BUG_ON(rdstate->offset >= DIREOF);
3523     + BUG_ON(rdstate->cookie >= MAXRDCOOKIE);
3524     +}
3525     +
3526     +struct unionfs_getdents_callback {
3527     + struct unionfs_dir_state *rdstate;
3528     + void *dirent;
3529     + int entries_written;
3530     + int filldir_called;
3531     + int filldir_error;
3532     + filldir_t filldir;
3533     + struct super_block *sb;
3534     +};
3535     +
3536     +/* based on generic filldir in fs/readir.c */
3537     +static int unionfs_filldir(void *dirent, const char *oname, int namelen,
3538     + loff_t offset, u64 ino, unsigned int d_type)
3539     +{
3540     + struct unionfs_getdents_callback *buf = dirent;
3541     + struct filldir_node *found = NULL;
3542     + int err = 0;
3543     + int is_whiteout;
3544     + char *name = (char *) oname;
3545     +
3546     + buf->filldir_called++;
3547     +
3548     + is_whiteout = is_whiteout_name(&name, &namelen);
3549     +
3550     + found = find_filldir_node(buf->rdstate, name, namelen, is_whiteout);
3551     +
3552     + if (found) {
3553     + /*
3554     + * If we had non-whiteout entry in dir cache, then mark it
3555     + * as a whiteout and but leave it in the dir cache.
3556     + */
3557     + if (is_whiteout && !found->whiteout)
3558     + found->whiteout = is_whiteout;
3559     + goto out;
3560     + }
3561     +
3562     + /* if 'name' isn't a whiteout, filldir it. */
3563     + if (!is_whiteout) {
3564     + off_t pos = rdstate2offset(buf->rdstate);
3565     + u64 unionfs_ino = ino;
3566     +
3567     + err = buf->filldir(buf->dirent, name, namelen, pos,
3568     + unionfs_ino, d_type);
3569     + buf->rdstate->offset++;
3570     + verify_rdstate_offset(buf->rdstate);
3571     + }
3572     + /*
3573     + * If we did fill it, stuff it in our hash, otherwise return an
3574     + * error.
3575     + */
3576     + if (err) {
3577     + buf->filldir_error = err;
3578     + goto out;
3579     + }
3580     + buf->entries_written++;
3581     + err = add_filldir_node(buf->rdstate, name, namelen,
3582     + buf->rdstate->bindex, is_whiteout);
3583     + if (err)
3584     + buf->filldir_error = err;
3585     +
3586     +out:
3587     + return err;
3588     +}
3589     +
3590     +static int unionfs_readdir(struct file *file, void *dirent, filldir_t filldir)
3591     +{
3592     + int err = 0;
3593     + struct file *lower_file = NULL;
3594     + struct dentry *dentry = file->f_path.dentry;
3595     + struct dentry *parent;
3596     + struct inode *inode = NULL;
3597     + struct unionfs_getdents_callback buf;
3598     + struct unionfs_dir_state *uds;
3599     + int bend;
3600     + loff_t offset;
3601     +
3602     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
3603     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
3604     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3605     +
3606     + err = unionfs_file_revalidate(file, parent, false);
3607     + if (unlikely(err))
3608     + goto out;
3609     +
3610     + inode = dentry->d_inode;
3611     +
3612     + uds = UNIONFS_F(file)->rdstate;
3613     + if (!uds) {
3614     + if (file->f_pos == DIREOF) {
3615     + goto out;
3616     + } else if (file->f_pos > 0) {
3617     + uds = find_rdstate(inode, file->f_pos);
3618     + if (unlikely(!uds)) {
3619     + err = -ESTALE;
3620     + goto out;
3621     + }
3622     + UNIONFS_F(file)->rdstate = uds;
3623     + } else {
3624     + init_rdstate(file);
3625     + uds = UNIONFS_F(file)->rdstate;
3626     + }
3627     + }
3628     + bend = fbend(file);
3629     +
3630     + while (uds->bindex <= bend) {
3631     + lower_file = unionfs_lower_file_idx(file, uds->bindex);
3632     + if (!lower_file) {
3633     + uds->bindex++;
3634     + uds->dirpos = 0;
3635     + continue;
3636     + }
3637     +
3638     + /* prepare callback buffer */
3639     + buf.filldir_called = 0;
3640     + buf.filldir_error = 0;
3641     + buf.entries_written = 0;
3642     + buf.dirent = dirent;
3643     + buf.filldir = filldir;
3644     + buf.rdstate = uds;
3645     + buf.sb = inode->i_sb;
3646     +
3647     + /* Read starting from where we last left off. */
3648     + offset = vfs_llseek(lower_file, uds->dirpos, SEEK_SET);
3649     + if (offset < 0) {
3650     + err = offset;
3651     + goto out;
3652     + }
3653     + err = vfs_readdir(lower_file, unionfs_filldir, &buf);
3654     +
3655     + /* Save the position for when we continue. */
3656     + offset = vfs_llseek(lower_file, 0, SEEK_CUR);
3657     + if (offset < 0) {
3658     + err = offset;
3659     + goto out;
3660     + }
3661     + uds->dirpos = offset;
3662     +
3663     + /* Copy the atime. */
3664     + fsstack_copy_attr_atime(inode,
3665     + lower_file->f_path.dentry->d_inode);
3666     +
3667     + if (err < 0)
3668     + goto out;
3669     +
3670     + if (buf.filldir_error)
3671     + break;
3672     +
3673     + if (!buf.entries_written) {
3674     + uds->bindex++;
3675     + uds->dirpos = 0;
3676     + }
3677     + }
3678     +
3679     + if (!buf.filldir_error && uds->bindex >= bend) {
3680     + /* Save the number of hash entries for next time. */
3681     + UNIONFS_I(inode)->hashsize = uds->hashentries;
3682     + free_rdstate(uds);
3683     + UNIONFS_F(file)->rdstate = NULL;
3684     + file->f_pos = DIREOF;
3685     + } else {
3686     + file->f_pos = rdstate2offset(uds);
3687     + }
3688     +
3689     +out:
3690     + if (!err)
3691     + unionfs_check_file(file);
3692     + unionfs_unlock_dentry(dentry);
3693     + unionfs_unlock_parent(dentry, parent);
3694     + unionfs_read_unlock(dentry->d_sb);
3695     + return err;
3696     +}
3697     +
3698     +/*
3699     + * This is not meant to be a generic repositioning function. If you do
3700     + * things that aren't supported, then we return EINVAL.
3701     + *
3702     + * What is allowed:
3703     + * (1) seeking to the same position that you are currently at
3704     + * This really has no effect, but returns where you are.
3705     + * (2) seeking to the beginning of the file
3706     + * This throws out all state, and lets you begin again.
3707     + */
3708     +static loff_t unionfs_dir_llseek(struct file *file, loff_t offset, int origin)
3709     +{
3710     + struct unionfs_dir_state *rdstate;
3711     + struct dentry *dentry = file->f_path.dentry;
3712     + struct dentry *parent;
3713     + loff_t err;
3714     +
3715     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
3716     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
3717     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3718     +
3719     + err = unionfs_file_revalidate(file, parent, false);
3720     + if (unlikely(err))
3721     + goto out;
3722     +
3723     + rdstate = UNIONFS_F(file)->rdstate;
3724     +
3725     + /*
3726     + * we let users seek to their current position, but not anywhere
3727     + * else.
3728     + */
3729     + if (!offset) {
3730     + switch (origin) {
3731     + case SEEK_SET:
3732     + if (rdstate) {
3733     + free_rdstate(rdstate);
3734     + UNIONFS_F(file)->rdstate = NULL;
3735     + }
3736     + init_rdstate(file);
3737     + err = 0;
3738     + break;
3739     + case SEEK_CUR:
3740     + err = file->f_pos;
3741     + break;
3742     + case SEEK_END:
3743     + /* Unsupported, because we would break everything. */
3744     + err = -EINVAL;
3745     + break;
3746     + }
3747     + } else {
3748     + switch (origin) {
3749     + case SEEK_SET:
3750     + if (rdstate) {
3751     + if (offset == rdstate2offset(rdstate))
3752     + err = offset;
3753     + else if (file->f_pos == DIREOF)
3754     + err = DIREOF;
3755     + else
3756     + err = -EINVAL;
3757     + } else {
3758     + struct inode *inode;
3759     + inode = dentry->d_inode;
3760     + rdstate = find_rdstate(inode, offset);
3761     + if (rdstate) {
3762     + UNIONFS_F(file)->rdstate = rdstate;
3763     + err = rdstate->offset;
3764     + } else {
3765     + err = -EINVAL;
3766     + }
3767     + }
3768     + break;
3769     + case SEEK_CUR:
3770     + case SEEK_END:
3771     + /* Unsupported, because we would break everything. */
3772     + err = -EINVAL;
3773     + break;
3774     + }
3775     + }
3776     +
3777     +out:
3778     + if (!err)
3779     + unionfs_check_file(file);
3780     + unionfs_unlock_dentry(dentry);
3781     + unionfs_unlock_parent(dentry, parent);
3782     + unionfs_read_unlock(dentry->d_sb);
3783     + return err;
3784     +}
3785     +
3786     +/*
3787     + * Trimmed directory options, we shouldn't pass everything down since
3788     + * we don't want to operate on partial directories.
3789     + */
3790     +struct file_operations unionfs_dir_fops = {
3791     + .llseek = unionfs_dir_llseek,
3792     + .read = generic_read_dir,
3793     + .readdir = unionfs_readdir,
3794     + .unlocked_ioctl = unionfs_ioctl,
3795     + .open = unionfs_open,
3796     + .release = unionfs_file_release,
3797     + .flush = unionfs_flush,
3798     + .fsync = unionfs_fsync,
3799     + .fasync = unionfs_fasync,
3800     +};
3801     diff --git a/fs/unionfs/dirhelper.c b/fs/unionfs/dirhelper.c
3802     new file mode 100644
3803     index 0000000..033343b
3804     --- /dev/null
3805     +++ b/fs/unionfs/dirhelper.c
3806     @@ -0,0 +1,158 @@
3807     +/*
3808     + * Copyright (c) 2003-2010 Erez Zadok
3809     + * Copyright (c) 2003-2006 Charles P. Wright
3810     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
3811     + * Copyright (c) 2005-2006 Junjiro Okajima
3812     + * Copyright (c) 2005 Arun M. Krishnakumar
3813     + * Copyright (c) 2004-2006 David P. Quigley
3814     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
3815     + * Copyright (c) 2003 Puja Gupta
3816     + * Copyright (c) 2003 Harikesavan Krishnan
3817     + * Copyright (c) 2003-2010 Stony Brook University
3818     + * Copyright (c) 2003-2010 The Research Foundation of SUNY
3819     + *
3820     + * This program is free software; you can redistribute it and/or modify
3821     + * it under the terms of the GNU General Public License version 2 as
3822     + * published by the Free Software Foundation.
3823     + */
3824     +
3825     +#include "union.h"
3826     +
3827     +#define RD_NONE 0
3828     +#define RD_CHECK_EMPTY 1
3829     +/* The callback structure for check_empty. */
3830     +struct unionfs_rdutil_callback {
3831     + int err;
3832     + int filldir_called;
3833     + struct unionfs_dir_state *rdstate;
3834     + int mode;
3835     +};
3836     +
3837     +/* This filldir function makes sure only whiteouts exist within a directory. */
3838     +static int readdir_util_callback(void *dirent, const char *oname, int namelen,
3839     + loff_t offset, u64 ino, unsigned int d_type)
3840     +{
3841     + int err = 0;
3842     + struct unionfs_rdutil_callback *buf = dirent;
3843     + int is_whiteout;
3844     + struct filldir_node *found;
3845     + char *name = (char *) oname;
3846     +
3847     + buf->filldir_called = 1;
3848     +
3849     + if (name[0] == '.' && (namelen == 1 ||
3850     + (name[1] == '.' && namelen == 2)))
3851     + goto out;
3852     +
3853     + is_whiteout = is_whiteout_name(&name, &namelen);
3854     +
3855     + found = find_filldir_node(buf->rdstate, name, namelen, is_whiteout);
3856     + /* If it was found in the table there was a previous whiteout. */
3857     + if (found)
3858     + goto out;
3859     +
3860     + /*
3861     + * if it wasn't found and isn't a whiteout, the directory isn't
3862     + * empty.
3863     + */
3864     + err = -ENOTEMPTY;
3865     + if ((buf->mode == RD_CHECK_EMPTY) && !is_whiteout)
3866     + goto out;
3867     +
3868     + err = add_filldir_node(buf->rdstate, name, namelen,
3869     + buf->rdstate->bindex, is_whiteout);
3870     +
3871     +out:
3872     + buf->err = err;
3873     + return err;
3874     +}
3875     +
3876     +/* Is a directory logically empty? */
3877     +int check_empty(struct dentry *dentry, struct dentry *parent,
3878     + struct unionfs_dir_state **namelist)
3879     +{
3880     + int err = 0;
3881     + struct dentry *lower_dentry = NULL;
3882     + struct vfsmount *mnt;
3883     + struct super_block *sb;
3884     + struct file *lower_file;
3885     + struct unionfs_rdutil_callback *buf = NULL;
3886     + int bindex, bstart, bend, bopaque;
3887     +
3888     + sb = dentry->d_sb;
3889     +
3890     +
3891     + BUG_ON(!S_ISDIR(dentry->d_inode->i_mode));
3892     +
3893     + err = unionfs_partial_lookup(dentry, parent);
3894     + if (err)
3895     + goto out;
3896     +
3897     + bstart = dbstart(dentry);
3898     + bend = dbend(dentry);
3899     + bopaque = dbopaque(dentry);
3900     + if (0 <= bopaque && bopaque < bend)
3901     + bend = bopaque;
3902     +
3903     + buf = kmalloc(sizeof(struct unionfs_rdutil_callback), GFP_KERNEL);
3904     + if (unlikely(!buf)) {
3905     + err = -ENOMEM;
3906     + goto out;
3907     + }
3908     + buf->err = 0;
3909     + buf->mode = RD_CHECK_EMPTY;
3910     + buf->rdstate = alloc_rdstate(dentry->d_inode, bstart);
3911     + if (unlikely(!buf->rdstate)) {
3912     + err = -ENOMEM;
3913     + goto out;
3914     + }
3915     +
3916     + /* Process the lower directories with rdutil_callback as a filldir. */
3917     + for (bindex = bstart; bindex <= bend; bindex++) {
3918     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3919     + if (!lower_dentry)
3920     + continue;
3921     + if (!lower_dentry->d_inode)
3922     + continue;
3923     + if (!S_ISDIR(lower_dentry->d_inode->i_mode))
3924     + continue;
3925     +
3926     + dget(lower_dentry);
3927     + mnt = unionfs_mntget(dentry, bindex);
3928     + branchget(sb, bindex);
3929     + lower_file = dentry_open(lower_dentry, mnt, O_RDONLY, current_cred());
3930     + if (IS_ERR(lower_file)) {
3931     + err = PTR_ERR(lower_file);
3932     + branchput(sb, bindex);
3933     + goto out;
3934     + }
3935     +
3936     + do {
3937     + buf->filldir_called = 0;
3938     + buf->rdstate->bindex = bindex;
3939     + err = vfs_readdir(lower_file,
3940     + readdir_util_callback, buf);
3941     + if (buf->err)
3942     + err = buf->err;
3943     + } while ((err >= 0) && buf->filldir_called);
3944     +
3945     + /* fput calls dput for lower_dentry */
3946     + fput(lower_file);
3947     + branchput(sb, bindex);
3948     +
3949     + if (err < 0)
3950     + goto out;
3951     + }
3952     +
3953     +out:
3954     + if (buf) {
3955     + if (namelist && !err)
3956     + *namelist = buf->rdstate;
3957     + else if (buf->rdstate)
3958     + free_rdstate(buf->rdstate);
3959     + kfree(buf);
3960     + }
3961     +
3962     +
3963     + return err;
3964     +}
3965     diff --git a/fs/unionfs/fanout.h b/fs/unionfs/fanout.h
3966     new file mode 100644
3967     index 0000000..5b77eac
3968     --- /dev/null
3969     +++ b/fs/unionfs/fanout.h
3970     @@ -0,0 +1,407 @@
3971     +/*
3972     + * Copyright (c) 2003-2010 Erez Zadok
3973     + * Copyright (c) 2003-2006 Charles P. Wright
3974     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
3975     + * Copyright (c) 2005 Arun M. Krishnakumar
3976     + * Copyright (c) 2004-2006 David P. Quigley
3977     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
3978     + * Copyright (c) 2003 Puja Gupta
3979     + * Copyright (c) 2003 Harikesavan Krishnan
3980     + * Copyright (c) 2003-2010 Stony Brook University
3981     + * Copyright (c) 2003-2010 The Research Foundation of SUNY
3982     + *
3983     + * This program is free software; you can redistribute it and/or modify
3984     + * it under the terms of the GNU General Public License version 2 as
3985     + * published by the Free Software Foundation.
3986     + */
3987     +
3988     +#ifndef _FANOUT_H_
3989     +#define _FANOUT_H_
3990     +
3991     +/*
3992     + * Inode to private data
3993     + *
3994     + * Since we use containers and the struct inode is _inside_ the
3995     + * unionfs_inode_info structure, UNIONFS_I will always (given a non-NULL
3996     + * inode pointer), return a valid non-NULL pointer.
3997     + */
3998     +static inline struct unionfs_inode_info *UNIONFS_I(const struct inode *inode)
3999     +{
4000     + return container_of(inode, struct unionfs_inode_info, vfs_inode);
4001     +}
4002     +
4003     +#define ibstart(ino) (UNIONFS_I(ino)->bstart)
4004     +#define ibend(ino) (UNIONFS_I(ino)->bend)
4005     +
4006     +/* Dentry to private data */
4007     +#define UNIONFS_D(dent) ((struct unionfs_dentry_info *)(dent)->d_fsdata)
4008     +#define dbstart(dent) (UNIONFS_D(dent)->bstart)
4009     +#define dbend(dent) (UNIONFS_D(dent)->bend)
4010     +#define dbopaque(dent) (UNIONFS_D(dent)->bopaque)
4011     +
4012     +/* Superblock to private data */
4013     +#define UNIONFS_SB(super) ((struct unionfs_sb_info *)(super)->s_fs_info)
4014     +#define sbstart(sb) 0
4015     +#define sbend(sb) (UNIONFS_SB(sb)->bend)
4016     +#define sbmax(sb) (UNIONFS_SB(sb)->bend + 1)
4017     +#define sbhbid(sb) (UNIONFS_SB(sb)->high_branch_id)
4018     +
4019     +/* File to private Data */
4020     +#define UNIONFS_F(file) ((struct unionfs_file_info *)((file)->private_data))
4021     +#define fbstart(file) (UNIONFS_F(file)->bstart)
4022     +#define fbend(file) (UNIONFS_F(file)->bend)
4023     +
4024     +/* macros to manipulate branch IDs in stored in our superblock */
4025     +static inline int branch_id(struct super_block *sb, int index)
4026     +{
4027     + BUG_ON(!sb || index < 0);
4028     + return UNIONFS_SB(sb)->data[index].branch_id;
4029     +}
4030     +
4031     +static inline void set_branch_id(struct super_block *sb, int index, int val)
4032     +{
4033     + BUG_ON(!sb || index < 0);
4034     + UNIONFS_SB(sb)->data[index].branch_id = val;
4035     +}
4036     +
4037     +static inline void new_branch_id(struct super_block *sb, int index)
4038     +{
4039     + BUG_ON(!sb || index < 0);
4040     + set_branch_id(sb, index, ++UNIONFS_SB(sb)->high_branch_id);
4041     +}
4042     +
4043     +/*
4044     + * Find new index of matching branch with an existing superblock of a known
4045     + * (possibly old) id. This is needed because branches could have been
4046     + * added/deleted causing the branches of any open files to shift.
4047     + *
4048     + * @sb: the new superblock which may have new/different branch IDs
4049     + * @id: the old/existing id we're looking for
4050     + * Returns index of newly found branch (0 or greater), -1 otherwise.
4051     + */
4052     +static inline int branch_id_to_idx(struct super_block *sb, int id)
4053     +{
4054     + int i;
4055     + for (i = 0; i < sbmax(sb); i++) {
4056     + if (branch_id(sb, i) == id)
4057     + return i;
4058     + }
4059     + /* in the non-ODF code, this should really never happen */
4060     + printk(KERN_WARNING "unionfs: cannot find branch with id %d\n", id);
4061     + return -1;
4062     +}
4063     +
4064     +/* File to lower file. */
4065     +static inline struct file *unionfs_lower_file(const struct file *f)
4066     +{
4067     + BUG_ON(!f);
4068     + return UNIONFS_F(f)->lower_files[fbstart(f)];
4069     +}
4070     +
4071     +static inline struct file *unionfs_lower_file_idx(const struct file *f,
4072     + int index)
4073     +{
4074     + BUG_ON(!f || index < 0);
4075     + return UNIONFS_F(f)->lower_files[index];
4076     +}
4077     +
4078     +static inline void unionfs_set_lower_file_idx(struct file *f, int index,
4079     + struct file *val)
4080     +{
4081     + BUG_ON(!f || index < 0);
4082     + UNIONFS_F(f)->lower_files[index] = val;
4083     + /* save branch ID (may be redundant?) */
4084     + UNIONFS_F(f)->saved_branch_ids[index] =
4085     + branch_id((f)->f_path.dentry->d_sb, index);
4086     +}
4087     +
4088     +static inline void unionfs_set_lower_file(struct file *f, struct file *val)
4089     +{
4090     + BUG_ON(!f);
4091     + unionfs_set_lower_file_idx((f), fbstart(f), (val));
4092     +}
4093     +
4094     +/* Inode to lower inode. */
4095     +static inline struct inode *unionfs_lower_inode(const struct inode *i)
4096     +{
4097     + BUG_ON(!i);
4098     + return UNIONFS_I(i)->lower_inodes[ibstart(i)];
4099     +}
4100     +
4101     +static inline struct inode *unionfs_lower_inode_idx(const struct inode *i,
4102     + int index)
4103     +{
4104     + BUG_ON(!i || index < 0);
4105     + return UNIONFS_I(i)->lower_inodes[index];
4106     +}
4107     +
4108     +static inline void unionfs_set_lower_inode_idx(struct inode *i, int index,
4109     + struct inode *val)
4110     +{
4111     + BUG_ON(!i || index < 0);
4112     + UNIONFS_I(i)->lower_inodes[index] = val;
4113     +}
4114     +
4115     +static inline void unionfs_set_lower_inode(struct inode *i, struct inode *val)
4116     +{
4117     + BUG_ON(!i);
4118     + UNIONFS_I(i)->lower_inodes[ibstart(i)] = val;
4119     +}
4120     +
4121     +/* Superblock to lower superblock. */
4122     +static inline struct super_block *unionfs_lower_super(
4123     + const struct super_block *sb)
4124     +{
4125     + BUG_ON(!sb);
4126     + return UNIONFS_SB(sb)->data[sbstart(sb)].sb;
4127     +}
4128     +
4129     +static inline struct super_block *unionfs_lower_super_idx(
4130     + const struct super_block *sb,
4131     + int index)
4132     +{
4133     + BUG_ON(!sb || index < 0);
4134     + return UNIONFS_SB(sb)->data[index].sb;
4135     +}
4136     +
4137     +static inline void unionfs_set_lower_super_idx(struct super_block *sb,
4138     + int index,
4139     + struct super_block *val)
4140     +{
4141     + BUG_ON(!sb || index < 0);
4142     + UNIONFS_SB(sb)->data[index].sb = val;
4143     +}
4144     +
4145     +static inline void unionfs_set_lower_super(struct super_block *sb,
4146     + struct super_block *val)
4147     +{
4148     + BUG_ON(!sb);
4149     + UNIONFS_SB(sb)->data[sbstart(sb)].sb = val;
4150     +}
4151     +
4152     +/* Branch count macros. */
4153     +static inline int branch_count(const struct super_block *sb, int index)
4154     +{
4155     + BUG_ON(!sb || index < 0);
4156     + return atomic_read(&UNIONFS_SB(sb)->data[index].open_files);
4157     +}
4158     +
4159     +static inline void set_branch_count(struct super_block *sb, int index, int val)
4160     +{
4161     + BUG_ON(!sb || index < 0);
4162     + atomic_set(&UNIONFS_SB(sb)->data[index].open_files, val);
4163     +}
4164     +
4165     +static inline void branchget(struct super_block *sb, int index)
4166     +{
4167     + BUG_ON(!sb || index < 0);
4168     + atomic_inc(&UNIONFS_SB(sb)->data[index].open_files);
4169     +}
4170     +
4171     +static inline void branchput(struct super_block *sb, int index)
4172     +{
4173     + BUG_ON(!sb || index < 0);
4174     + atomic_dec(&UNIONFS_SB(sb)->data[index].open_files);
4175     +}
4176     +
4177     +/* Dentry macros */
4178     +static inline void unionfs_set_lower_dentry_idx(struct dentry *dent, int index,
4179     + struct dentry *val)
4180     +{
4181     + BUG_ON(!dent || index < 0);
4182     + UNIONFS_D(dent)->lower_paths[index].dentry = val;
4183     +}
4184     +
4185     +static inline struct dentry *unionfs_lower_dentry_idx(
4186     + const struct dentry *dent,
4187     + int index)
4188     +{
4189     + BUG_ON(!dent || index < 0);
4190     + return UNIONFS_D(dent)->lower_paths[index].dentry;
4191     +}
4192     +
4193     +static inline struct dentry *unionfs_lower_dentry(const struct dentry *dent)
4194     +{
4195     + BUG_ON(!dent);
4196     + return unionfs_lower_dentry_idx(dent, dbstart(dent));
4197     +}
4198     +
4199     +static inline void unionfs_set_lower_mnt_idx(struct dentry *dent, int index,
4200     + struct vfsmount *mnt)
4201     +{
4202     + BUG_ON(!dent || index < 0);
4203     + UNIONFS_D(dent)->lower_paths[index].mnt = mnt;
4204     +}
4205     +
4206     +static inline struct vfsmount *unionfs_lower_mnt_idx(
4207     + const struct dentry *dent,
4208     + int index)
4209     +{
4210     + BUG_ON(!dent || index < 0);
4211     + return UNIONFS_D(dent)->lower_paths[index].mnt;
4212     +}
4213     +
4214     +static inline struct vfsmount *unionfs_lower_mnt(const struct dentry *dent)
4215     +{
4216     + BUG_ON(!dent);
4217     + return unionfs_lower_mnt_idx(dent, dbstart(dent));
4218     +}
4219     +
4220     +/* Macros for locking a dentry. */
4221     +enum unionfs_dentry_lock_class {
4222     + UNIONFS_DMUTEX_NORMAL,
4223     + UNIONFS_DMUTEX_ROOT,
4224     + UNIONFS_DMUTEX_PARENT,
4225     + UNIONFS_DMUTEX_CHILD,
4226     + UNIONFS_DMUTEX_WHITEOUT,
4227     + UNIONFS_DMUTEX_REVAL_PARENT, /* for file/dentry revalidate */
4228     + UNIONFS_DMUTEX_REVAL_CHILD, /* for file/dentry revalidate */
4229     +};
4230     +
4231     +static inline void unionfs_lock_dentry(struct dentry *d,
4232     + unsigned int subclass)
4233     +{
4234     + BUG_ON(!d);
4235     + mutex_lock_nested(&UNIONFS_D(d)->lock, subclass);
4236     +}
4237     +
4238     +static inline void unionfs_unlock_dentry(struct dentry *d)
4239     +{
4240     + BUG_ON(!d);
4241     + mutex_unlock(&UNIONFS_D(d)->lock);
4242     +}
4243     +
4244     +static inline struct dentry *unionfs_lock_parent(struct dentry *d,
4245     + unsigned int subclass)
4246     +{
4247     + struct dentry *p;
4248     +
4249     + BUG_ON(!d);
4250     + p = dget_parent(d);
4251     + if (p != d)
4252     + mutex_lock_nested(&UNIONFS_D(p)->lock, subclass);
4253     + return p;
4254     +}
4255     +
4256     +static inline void unionfs_unlock_parent(struct dentry *d, struct dentry *p)
4257     +{
4258     + BUG_ON(!d);
4259     + BUG_ON(!p);
4260     + if (p != d) {
4261     + BUG_ON(!mutex_is_locked(&UNIONFS_D(p)->lock));
4262     + mutex_unlock(&UNIONFS_D(p)->lock);
4263     + }
4264     + dput(p);
4265     +}
4266     +
4267     +static inline void verify_locked(struct dentry *d)
4268     +{
4269     + BUG_ON(!d);
4270     + BUG_ON(!mutex_is_locked(&UNIONFS_D(d)->lock));
4271     +}
4272     +
4273     +/* macros to put lower objects */
4274     +
4275     +/*
4276     + * iput lower inodes of an unionfs dentry, from bstart to bend. If
4277     + * @free_lower is true, then also kfree the memory used to hold the lower
4278     + * object pointers.
4279     + */
4280     +static inline void iput_lowers(struct inode *inode,
4281     + int bstart, int bend, bool free_lower)
4282     +{
4283     + struct inode *lower_inode;
4284     + int bindex;
4285     +
4286     + BUG_ON(!inode);
4287     + BUG_ON(!UNIONFS_I(inode));
4288     + BUG_ON(bstart < 0);
4289     +
4290     + for (bindex = bstart; bindex <= bend; bindex++) {
4291     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
4292     + if (lower_inode) {
4293     + unionfs_set_lower_inode_idx(inode, bindex, NULL);
4294     + /* see Documentation/filesystems/unionfs/issues.txt */
4295     + lockdep_off();
4296     + iput(lower_inode);
4297     + lockdep_on();
4298     + }
4299     + }
4300     +
4301     + if (free_lower) {
4302     + kfree(UNIONFS_I(inode)->lower_inodes);
4303     + UNIONFS_I(inode)->lower_inodes = NULL;
4304     + }
4305     +}
4306     +
4307     +/* iput all lower inodes, and reset start/end branch indices to -1 */
4308     +static inline void iput_lowers_all(struct inode *inode, bool free_lower)
4309     +{
4310     + int bstart, bend;
4311     +
4312     + BUG_ON(!inode);
4313     + BUG_ON(!UNIONFS_I(inode));
4314     + bstart = ibstart(inode);
4315     + bend = ibend(inode);
4316     + BUG_ON(bstart < 0);
4317     +
4318     + iput_lowers(inode, bstart, bend, free_lower);
4319     + ibstart(inode) = ibend(inode) = -1;
4320     +}
4321     +
4322     +/*
4323     + * dput/mntput all lower dentries and vfsmounts of an unionfs dentry, from
4324     + * bstart to bend. If @free_lower is true, then also kfree the memory used
4325     + * to hold the lower object pointers.
4326     + *
4327     + * XXX: implement using path_put VFS macros
4328     + */
4329     +static inline void path_put_lowers(struct dentry *dentry,
4330     + int bstart, int bend, bool free_lower)
4331     +{
4332     + struct dentry *lower_dentry;
4333     + struct vfsmount *lower_mnt;
4334     + int bindex;
4335     +
4336     + BUG_ON(!dentry);
4337     + BUG_ON(!UNIONFS_D(dentry));
4338     + BUG_ON(bstart < 0);
4339     +
4340     + for (bindex = bstart; bindex <= bend; bindex++) {
4341     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4342     + if (lower_dentry) {
4343     + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
4344     + dput(lower_dentry);
4345     + }
4346     + lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
4347     + if (lower_mnt) {
4348     + unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
4349     + mntput(lower_mnt);
4350     + }
4351     + }
4352     +
4353     + if (free_lower) {
4354     + kfree(UNIONFS_D(dentry)->lower_paths);
4355     + UNIONFS_D(dentry)->lower_paths = NULL;
4356     + }
4357     +}
4358     +
4359     +/*
4360     + * dput/mntput all lower dentries and vfsmounts, and reset start/end branch
4361     + * indices to -1.
4362     + */
4363     +static inline void path_put_lowers_all(struct dentry *dentry, bool free_lower)
4364     +{
4365     + int bstart, bend;
4366     +
4367     + BUG_ON(!dentry);
4368     + BUG_ON(!UNIONFS_D(dentry));
4369     + bstart = dbstart(dentry);
4370     + bend = dbend(dentry);
4371     + BUG_ON(bstart < 0);
4372     +
4373     + path_put_lowers(dentry, bstart, bend, free_lower);
4374     + dbstart(dentry) = dbend(dentry) = -1;
4375     +}
4376     +
4377     +#endif /* not _FANOUT_H */
4378     diff --git a/fs/unionfs/file.c b/fs/unionfs/file.c
4379     new file mode 100644
4380     index 0000000..5a8f4e0
4381     --- /dev/null
4382     +++ b/fs/unionfs/file.c
4383     @@ -0,0 +1,379 @@
4384     +/*
4385     + * Copyright (c) 2003-2010 Erez Zadok
4386     + * Copyright (c) 2003-2006 Charles P. Wright
4387     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4388     + * Copyright (c) 2005-2006 Junjiro Okajima
4389     + * Copyright (c) 2005 Arun M. Krishnakumar
4390     + * Copyright (c) 2004-2006 David P. Quigley
4391     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4392     + * Copyright (c) 2003 Puja Gupta
4393     + * Copyright (c) 2003 Harikesavan Krishnan
4394     + * Copyright (c) 2003-2010 Stony Brook University
4395     + * Copyright (c) 2003-2010 The Research Foundation of SUNY
4396     + *
4397     + * This program is free software; you can redistribute it and/or modify
4398     + * it under the terms of the GNU General Public License version 2 as
4399     + * published by the Free Software Foundation.
4400     + */
4401     +
4402     +#include "union.h"
4403     +
4404     +static ssize_t unionfs_read(struct file *file, char __user *buf,
4405     + size_t count, loff_t *ppos)
4406     +{
4407     + int err;
4408     + struct file *lower_file;
4409     + struct dentry *dentry = file->f_path.dentry;
4410     + struct dentry *parent;
4411     +
4412     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4413     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4414     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4415     +
4416     + err = unionfs_file_revalidate(file, parent, false);
4417     + if (unlikely(err))
4418     + goto out;
4419     +
4420     + lower_file = unionfs_lower_file(file);
4421     + err = vfs_read(lower_file, buf, count, ppos);
4422     + /* update our inode atime upon a successful lower read */
4423     + if (err >= 0) {
4424     + fsstack_copy_attr_atime(dentry->d_inode,
4425     + lower_file->f_path.dentry->d_inode);
4426     + unionfs_check_file(file);
4427     + }
4428     +
4429     +out:
4430     + unionfs_unlock_dentry(dentry);
4431     + unionfs_unlock_parent(dentry, parent);
4432     + unionfs_read_unlock(dentry->d_sb);
4433     + return err;
4434     +}
4435     +
4436     +static ssize_t unionfs_write(struct file *file, const char __user *buf,
4437     + size_t count, loff_t *ppos)
4438     +{
4439     + int err = 0;
4440     + struct file *lower_file;
4441     + struct dentry *dentry = file->f_path.dentry;
4442     + struct dentry *parent;
4443     +
4444     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4445     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4446     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4447     +
4448     + err = unionfs_file_revalidate(file, parent, true);
4449     + if (unlikely(err))
4450     + goto out;
4451     +
4452     + lower_file = unionfs_lower_file(file);
4453     + err = vfs_write(lower_file, buf, count, ppos);
4454     + /* update our inode times+sizes upon a successful lower write */
4455     + if (err >= 0) {
4456     + fsstack_copy_inode_size(dentry->d_inode,
4457     + lower_file->f_path.dentry->d_inode);
4458     + fsstack_copy_attr_times(dentry->d_inode,
4459     + lower_file->f_path.dentry->d_inode);
4460     + UNIONFS_F(file)->wrote_to_file = true; /* for delayed copyup */
4461     + unionfs_check_file(file);
4462     + }
4463     +
4464     +out:
4465     + unionfs_unlock_dentry(dentry);
4466     + unionfs_unlock_parent(dentry, parent);
4467     + unionfs_read_unlock(dentry->d_sb);
4468     + return err;
4469     +}
4470     +
4471     +static int unionfs_file_readdir(struct file *file, void *dirent,
4472     + filldir_t filldir)
4473     +{
4474     + return -ENOTDIR;
4475     +}
4476     +
4477     +static int unionfs_mmap(struct file *file, struct vm_area_struct *vma)
4478     +{
4479     + int err = 0;
4480     + bool willwrite;
4481     + struct file *lower_file;
4482     + struct dentry *dentry = file->f_path.dentry;
4483     + struct dentry *parent;
4484     + const struct vm_operations_struct *saved_vm_ops = NULL;
4485     +
4486     + /*
4487     + * Since mm/memory.c:might_fault() (under PROVE_LOCKING) was
4488     + * modified in 2.6.29-rc1 to call might_lock_read on mmap_sem, this
4489     + * has been causing false positives in file system stacking layers.
4490     + * In particular, our ->mmap is called after sys_mmap2 already holds
4491     + * mmap_sem, then we lock our own mutexes; but earlier, it's
4492     + * possible for lockdep to have locked our mutexes first, and then
4493     + * we call a lower ->readdir which could call might_fault. The
4494     + * different ordering of the locks is what lockdep complains about
4495     + * -- unnecessarily. Therefore, we have no choice but to tell
4496     + * lockdep to temporarily turn off lockdep here. Note: the comments
4497     + * inside might_sleep also suggest that it would have been
4498     + * nicer to only annotate paths that needs that might_lock_read.
4499     + */
4500     + lockdep_off();
4501     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4502     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4503     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4504     +
4505     + /* This might be deferred to mmap's writepage */
4506     + willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags);
4507     + err = unionfs_file_revalidate(file, parent, willwrite);
4508     + if (unlikely(err))
4509     + goto out;
4510     + unionfs_check_file(file);
4511     +
4512     + /*
4513     + * File systems which do not implement ->writepage may use
4514     + * generic_file_readonly_mmap as their ->mmap op. If you call
4515     + * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL.
4516     + * But we cannot call the lower ->mmap op, so we can't tell that
4517     + * writeable mappings won't work. Therefore, our only choice is to
4518     + * check if the lower file system supports the ->writepage, and if
4519     + * not, return EINVAL (the same error that
4520     + * generic_file_readonly_mmap returns in that case).
4521     + */
4522     + lower_file = unionfs_lower_file(file);
4523     + if (willwrite && !lower_file->f_mapping->a_ops->writepage) {
4524     + err = -EINVAL;
4525     + printk(KERN_ERR "unionfs: branch %d file system does not "
4526     + "support writeable mmap\n", fbstart(file));
4527     + goto out;
4528     + }
4529     +
4530     + /*
4531     + * find and save lower vm_ops.
4532     + *
4533     + * XXX: the VFS should have a cleaner way of finding the lower vm_ops
4534     + */
4535     + if (!UNIONFS_F(file)->lower_vm_ops) {
4536     + err = lower_file->f_op->mmap(lower_file, vma);
4537     + if (err) {
4538     + printk(KERN_ERR "unionfs: lower mmap failed %d\n", err);
4539     + goto out;
4540     + }
4541     + saved_vm_ops = vma->vm_ops;
4542     + err = do_munmap(current->mm, vma->vm_start,
4543     + vma->vm_end - vma->vm_start);
4544     + if (err) {
4545     + printk(KERN_ERR "unionfs: do_munmap failed %d\n", err);
4546     + goto out;
4547     + }
4548     + }
4549     +
4550     + file->f_mapping->a_ops = &unionfs_dummy_aops;
4551     + err = generic_file_mmap(file, vma);
4552     + file->f_mapping->a_ops = &unionfs_aops;
4553     + if (err) {
4554     + printk(KERN_ERR "unionfs: generic_file_mmap failed %d\n", err);
4555     + goto out;
4556     + }
4557     + vma->vm_ops = &unionfs_vm_ops;
4558     + if (!UNIONFS_F(file)->lower_vm_ops)
4559     + UNIONFS_F(file)->lower_vm_ops = saved_vm_ops;
4560     +
4561     +out:
4562     + if (!err) {
4563     + /* copyup could cause parent dir times to change */
4564     + unionfs_copy_attr_times(parent->d_inode);
4565     + unionfs_check_file(file);
4566     + }
4567     + unionfs_unlock_dentry(dentry);
4568     + unionfs_unlock_parent(dentry, parent);
4569     + unionfs_read_unlock(dentry->d_sb);
4570     + lockdep_on();
4571     + return err;
4572     +}
4573     +
4574     +int unionfs_fsync(struct file *file, int datasync)
4575     +{
4576     + int bindex, bstart, bend;
4577     + struct file *lower_file;
4578     + struct dentry *dentry = file->f_path.dentry;
4579     + struct dentry *lower_dentry;
4580     + struct dentry *parent;
4581     + struct inode *lower_inode, *inode;
4582     + int err = -EINVAL;
4583     +
4584     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4585     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4586     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4587     +
4588     + err = unionfs_file_revalidate(file, parent, true);
4589     + if (unlikely(err))
4590     + goto out;
4591     + unionfs_check_file(file);
4592     +
4593     + bstart = fbstart(file);
4594     + bend = fbend(file);
4595     + if (bstart < 0 || bend < 0)
4596     + goto out;
4597     +
4598     + inode = dentry->d_inode;
4599     + if (unlikely(!inode)) {
4600     + printk(KERN_ERR
4601     + "unionfs: null lower inode in unionfs_fsync\n");
4602     + goto out;
4603     + }
4604     + for (bindex = bstart; bindex <= bend; bindex++) {
4605     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
4606     + if (!lower_inode || !lower_inode->i_fop->fsync)
4607     + continue;
4608     + lower_file = unionfs_lower_file_idx(file, bindex);
4609     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4610     + mutex_lock(&lower_inode->i_mutex);
4611     + err = lower_inode->i_fop->fsync(lower_file, datasync);
4612     + if (!err && bindex == bstart)
4613     + fsstack_copy_attr_times(inode, lower_inode);
4614     + mutex_unlock(&lower_inode->i_mutex);
4615     + if (err)
4616     + goto out;
4617     + }
4618     +
4619     +out:
4620     + if (!err)
4621     + unionfs_check_file(file);
4622     + unionfs_unlock_dentry(dentry);
4623     + unionfs_unlock_parent(dentry, parent);
4624     + unionfs_read_unlock(dentry->d_sb);
4625     + return err;
4626     +}
4627     +
4628     +int unionfs_fasync(int fd, struct file *file, int flag)
4629     +{
4630     + int bindex, bstart, bend;
4631     + struct file *lower_file;
4632     + struct dentry *dentry = file->f_path.dentry;
4633     + struct dentry *parent;
4634     + struct inode *lower_inode, *inode;
4635     + int err = 0;
4636     +
4637     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4638     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4639     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4640     +
4641     + err = unionfs_file_revalidate(file, parent, true);
4642     + if (unlikely(err))
4643     + goto out;
4644     + unionfs_check_file(file);
4645     +
4646     + bstart = fbstart(file);
4647     + bend = fbend(file);
4648     + if (bstart < 0 || bend < 0)
4649     + goto out;
4650     +
4651     + inode = dentry->d_inode;
4652     + if (unlikely(!inode)) {
4653     + printk(KERN_ERR
4654     + "unionfs: null lower inode in unionfs_fasync\n");
4655     + goto out;
4656     + }
4657     + for (bindex = bstart; bindex <= bend; bindex++) {
4658     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
4659     + if (!lower_inode || !lower_inode->i_fop->fasync)
4660     + continue;
4661     + lower_file = unionfs_lower_file_idx(file, bindex);
4662     + mutex_lock(&lower_inode->i_mutex);
4663     + err = lower_inode->i_fop->fasync(fd, lower_file, flag);
4664     + if (!err && bindex == bstart)
4665     + fsstack_copy_attr_times(inode, lower_inode);
4666     + mutex_unlock(&lower_inode->i_mutex);
4667     + if (err)
4668     + goto out;
4669     + }
4670     +
4671     +out:
4672     + if (!err)
4673     + unionfs_check_file(file);
4674     + unionfs_unlock_dentry(dentry);
4675     + unionfs_unlock_parent(dentry, parent);
4676     + unionfs_read_unlock(dentry->d_sb);
4677     + return err;
4678     +}
4679     +
4680     +static ssize_t unionfs_splice_read(struct file *file, loff_t *ppos,
4681     + struct pipe_inode_info *pipe, size_t len,
4682     + unsigned int flags)
4683     +{
4684     + ssize_t err;
4685     + struct file *lower_file;
4686     + struct dentry *dentry = file->f_path.dentry;
4687     + struct dentry *parent;
4688     +
4689     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4690     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4691     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4692     +
4693     + err = unionfs_file_revalidate(file, parent, false);
4694     + if (unlikely(err))
4695     + goto out;
4696     +
4697     + lower_file = unionfs_lower_file(file);
4698     + err = vfs_splice_to(lower_file, ppos, pipe, len, flags);
4699     + /* update our inode atime upon a successful lower splice-read */
4700     + if (err >= 0) {
4701     + fsstack_copy_attr_atime(dentry->d_inode,
4702     + lower_file->f_path.dentry->d_inode);
4703     + unionfs_check_file(file);
4704     + }
4705     +
4706     +out:
4707     + unionfs_unlock_dentry(dentry);
4708     + unionfs_unlock_parent(dentry, parent);
4709     + unionfs_read_unlock(dentry->d_sb);
4710     + return err;
4711     +}
4712     +
4713     +static ssize_t unionfs_splice_write(struct pipe_inode_info *pipe,
4714     + struct file *file, loff_t *ppos,
4715     + size_t len, unsigned int flags)
4716     +{
4717     + ssize_t err = 0;
4718     + struct file *lower_file;
4719     + struct dentry *dentry = file->f_path.dentry;
4720     + struct dentry *parent;
4721     +
4722     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4723     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4724     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4725     +
4726     + err = unionfs_file_revalidate(file, parent, true);
4727     + if (unlikely(err))
4728     + goto out;
4729     +
4730     + lower_file = unionfs_lower_file(file);
4731     + err = vfs_splice_from(pipe, lower_file, ppos, len, flags);
4732     + /* update our inode times+sizes upon a successful lower write */
4733     + if (err >= 0) {
4734     + fsstack_copy_inode_size(dentry->d_inode,
4735     + lower_file->f_path.dentry->d_inode);
4736     + fsstack_copy_attr_times(dentry->d_inode,
4737     + lower_file->f_path.dentry->d_inode);
4738     + unionfs_check_file(file);
4739     + }
4740     +
4741     +out:
4742     + unionfs_unlock_dentry(dentry);
4743     + unionfs_unlock_parent(dentry, parent);
4744     + unionfs_read_unlock(dentry->d_sb);
4745     + return err;
4746     +}
4747     +
4748     +struct file_operations unionfs_main_fops = {
4749     + .llseek = generic_file_llseek,
4750     + .read = unionfs_read,
4751     + .write = unionfs_write,
4752     + .readdir = unionfs_file_readdir,
4753     + .unlocked_ioctl = unionfs_ioctl,
4754     + .mmap = unionfs_mmap,
4755     + .open = unionfs_open,
4756     + .flush = unionfs_flush,
4757     + .release = unionfs_file_release,
4758     + .fsync = unionfs_fsync,
4759     + .fasync = unionfs_fasync,
4760     + .splice_read = unionfs_splice_read,
4761     + .splice_write = unionfs_splice_write,
4762     +};
4763     diff --git a/fs/unionfs/inode.c b/fs/unionfs/inode.c
4764     new file mode 100644
4765     index 0000000..062163a
4766     --- /dev/null
4767     +++ b/fs/unionfs/inode.c
4768     @@ -0,0 +1,1055 @@
4769     +/*
4770     + * Copyright (c) 2003-2010 Erez Zadok
4771     + * Copyright (c) 2003-2006 Charles P. Wright
4772     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4773     + * Copyright (c) 2005-2006 Junjiro Okajima
4774     + * Copyright (c) 2005 Arun M. Krishnakumar
4775     + * Copyright (c) 2004-2006 David P. Quigley
4776     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4777     + * Copyright (c) 2003 Puja Gupta
4778     + * Copyright (c) 2003 Harikesavan Krishnan
4779     + * Copyright (c) 2003-2010 Stony Brook University
4780     + * Copyright (c) 2003-2010 The Research Foundation of SUNY
4781     + *
4782     + * This program is free software; you can redistribute it and/or modify
4783     + * it under the terms of the GNU General Public License version 2 as
4784     + * published by the Free Software Foundation.
4785     + */
4786     +
4787     +#include "union.h"
4788     +
4789     +/*
4790     + * Find a writeable branch to create new object in. Checks all writeble
4791     + * branches of the parent inode, from istart to iend order; if none are
4792     + * suitable, also tries branch 0 (which may require a copyup).
4793     + *
4794     + * Return a lower_dentry we can use to create object in, or ERR_PTR.
4795     + */
4796     +static struct dentry *find_writeable_branch(struct inode *parent,
4797     + struct dentry *dentry)
4798     +{
4799     + int err = -EINVAL;
4800     + int bindex, istart, iend;
4801     + struct dentry *lower_dentry = NULL;
4802     +
4803     + istart = ibstart(parent);
4804     + iend = ibend(parent);
4805     + if (istart < 0)
4806     + goto out;
4807     +
4808     +begin:
4809     + for (bindex = istart; bindex <= iend; bindex++) {
4810     + /* skip non-writeable branches */
4811     + err = is_robranch_super(dentry->d_sb, bindex);
4812     + if (err) {
4813     + err = -EROFS;
4814     + continue;
4815     + }
4816     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4817     + if (!lower_dentry)
4818     + continue;
4819     + /*
4820     + * check for whiteouts in writeable branch, and remove them
4821     + * if necessary.
4822     + */
4823     + err = check_unlink_whiteout(dentry, lower_dentry, bindex);
4824     + if (err > 0) /* ignore if whiteout found and removed */
4825     + err = 0;
4826     + if (err)
4827     + continue;
4828     + /* if get here, we can write to the branch */
4829     + break;
4830     + }
4831     + /*
4832     + * If istart wasn't already branch 0, and we got any error, then try
4833     + * branch 0 (which may require copyup)
4834     + */
4835     + if (err && istart > 0) {
4836     + istart = iend = 0;
4837     + goto begin;
4838     + }
4839     +
4840     + /*
4841     + * If we tried even branch 0, and still got an error, abort. But if
4842     + * the error was an EROFS, then we should try to copyup.
4843     + */
4844     + if (err && err != -EROFS)
4845     + goto out;
4846     +
4847     + /*
4848     + * If we get here, then check if copyup needed. If lower_dentry is
4849     + * NULL, create the entire dentry directory structure in branch 0.
4850     + */
4851     + if (!lower_dentry) {
4852     + bindex = 0;
4853     + lower_dentry = create_parents(parent, dentry,
4854     + dentry->d_name.name, bindex);
4855     + if (IS_ERR(lower_dentry)) {
4856     + err = PTR_ERR(lower_dentry);
4857     + goto out;
4858     + }
4859     + }
4860     + err = 0; /* all's well */
4861     +out:
4862     + if (err)
4863     + return ERR_PTR(err);
4864     + return lower_dentry;
4865     +}
4866     +
4867     +static int unionfs_create(struct inode *dir, struct dentry *dentry,
4868     + int mode, struct nameidata *nd_unused)
4869     +{
4870     + int err = 0;
4871     + struct dentry *lower_dentry = NULL;
4872     + struct dentry *lower_parent_dentry = NULL;
4873     + struct dentry *parent;
4874     + int valid = 0;
4875     + struct nameidata lower_nd;
4876     +
4877     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
4878     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4879     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4880     +
4881     + valid = __unionfs_d_revalidate(dentry, parent, false);
4882     + if (unlikely(!valid)) {
4883     + err = -ESTALE; /* same as what real_lookup does */
4884     + goto out;
4885     + }
4886     +
4887     + lower_dentry = find_writeable_branch(dir, dentry);
4888     + if (IS_ERR(lower_dentry)) {
4889     + err = PTR_ERR(lower_dentry);
4890     + goto out;
4891     + }
4892     +
4893     + lower_parent_dentry = lock_parent(lower_dentry);
4894     + if (IS_ERR(lower_parent_dentry)) {
4895     + err = PTR_ERR(lower_parent_dentry);
4896     + goto out_unlock;
4897     + }
4898     +
4899     + err = init_lower_nd(&lower_nd, LOOKUP_CREATE);
4900     + if (unlikely(err < 0))
4901     + goto out_unlock;
4902     + err = vfs_create(lower_parent_dentry->d_inode, lower_dentry, mode,
4903     + &lower_nd);
4904     + release_lower_nd(&lower_nd, err);
4905     +
4906     + if (!err) {
4907     + err = PTR_ERR(unionfs_interpose(dentry, dir->i_sb, 0));
4908     + if (!err) {
4909     + unionfs_copy_attr_times(dir);
4910     + fsstack_copy_inode_size(dir,
4911     + lower_parent_dentry->d_inode);
4912     + /* update no. of links on parent directory */
4913     + dir->i_nlink = unionfs_get_nlinks(dir);
4914     + }
4915     + }
4916     +
4917     +out_unlock:
4918     + unlock_dir(lower_parent_dentry);
4919     +out:
4920     + if (!err) {
4921     + unionfs_postcopyup_setmnt(dentry);
4922     + unionfs_check_inode(dir);
4923     + unionfs_check_dentry(dentry);
4924     + }
4925     + unionfs_unlock_dentry(dentry);
4926     + unionfs_unlock_parent(dentry, parent);
4927     + unionfs_read_unlock(dentry->d_sb);
4928     + return err;
4929     +}
4930     +
4931     +/*
4932     + * unionfs_lookup is the only special function which takes a dentry, yet we
4933     + * do NOT want to call __unionfs_d_revalidate_chain because by definition,
4934     + * we don't have a valid dentry here yet.
4935     + */
4936     +static struct dentry *unionfs_lookup(struct inode *dir,
4937     + struct dentry *dentry,
4938     + struct nameidata *nd_unused)
4939     +{
4940     + struct dentry *ret, *parent;
4941     + int err = 0;
4942     +
4943     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
4944     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4945     +
4946     + /*
4947     + * As long as we lock/dget the parent, then can skip validating the
4948     + * parent now; we may have to rebuild this dentry on the next
4949     + * ->d_revalidate, however.
4950     + */
4951     +
4952     + /* allocate dentry private data. We free it in ->d_release */
4953     + err = new_dentry_private_data(dentry, UNIONFS_DMUTEX_CHILD);
4954     + if (unlikely(err)) {
4955     + ret = ERR_PTR(err);
4956     + goto out;
4957     + }
4958     +
4959     + ret = unionfs_lookup_full(dentry, parent, INTERPOSE_LOOKUP);
4960     +
4961     + if (!IS_ERR(ret)) {
4962     + if (ret)
4963     + dentry = ret;
4964     + /* lookup_full can return multiple positive dentries */
4965     + if (dentry->d_inode && !S_ISDIR(dentry->d_inode->i_mode)) {
4966     + BUG_ON(dbstart(dentry) < 0);
4967     + unionfs_postcopyup_release(dentry);
4968     + }
4969     + unionfs_copy_attr_times(dentry->d_inode);
4970     + }
4971     +
4972     + unionfs_check_inode(dir);
4973     + if (!IS_ERR(ret))
4974     + unionfs_check_dentry(dentry);
4975     + unionfs_check_dentry(parent);
4976     + unionfs_unlock_dentry(dentry); /* locked in new_dentry_private data */
4977     +
4978     +out:
4979     + unionfs_unlock_parent(dentry, parent);
4980     + unionfs_read_unlock(dentry->d_sb);
4981     +
4982     + return ret;
4983     +}
4984     +
4985     +static int unionfs_link(struct dentry *old_dentry, struct inode *dir,
4986     + struct dentry *new_dentry)
4987     +{
4988     + int err = 0;
4989     + struct dentry *lower_old_dentry = NULL;
4990     + struct dentry *lower_new_dentry = NULL;
4991     + struct dentry *lower_dir_dentry = NULL;
4992     + struct dentry *old_parent, *new_parent;
4993     + char *name = NULL;
4994     + bool valid;
4995     +
4996     + unionfs_read_lock(old_dentry->d_sb, UNIONFS_SMUTEX_CHILD);
4997     + old_parent = dget_parent(old_dentry);
4998     + new_parent = dget_parent(new_dentry);
4999     + unionfs_double_lock_parents(old_parent, new_parent);
5000     + unionfs_double_lock_dentry(old_dentry, new_dentry);
5001     +
5002     + valid = __unionfs_d_revalidate(old_dentry, old_parent, false);
5003     + if (unlikely(!valid)) {
5004     + err = -ESTALE;
5005     + goto out;
5006     + }
5007     + if (new_dentry->d_inode) {
5008     + valid = __unionfs_d_revalidate(new_dentry, new_parent, false);
5009     + if (unlikely(!valid)) {
5010     + err = -ESTALE;
5011     + goto out;
5012     + }
5013     + }
5014     +
5015     + lower_new_dentry = unionfs_lower_dentry(new_dentry);
5016     +
5017     + /* check for a whiteout in new dentry branch, and delete it */
5018     + err = check_unlink_whiteout(new_dentry, lower_new_dentry,
5019     + dbstart(new_dentry));
5020     + if (err > 0) { /* whiteout found and removed successfully */
5021     + lower_dir_dentry = dget_parent(lower_new_dentry);
5022     + fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
5023     + dput(lower_dir_dentry);
5024     + dir->i_nlink = unionfs_get_nlinks(dir);
5025     + err = 0;
5026     + }
5027     + if (err)
5028     + goto out;
5029     +
5030     + /* check if parent hierachy is needed, then link in same branch */
5031     + if (dbstart(old_dentry) != dbstart(new_dentry)) {
5032     + lower_new_dentry = create_parents(dir, new_dentry,
5033     + new_dentry->d_name.name,
5034     + dbstart(old_dentry));
5035     + err = PTR_ERR(lower_new_dentry);
5036     + if (IS_COPYUP_ERR(err))
5037     + goto docopyup;
5038     + if (!lower_new_dentry || IS_ERR(lower_new_dentry))
5039     + goto out;
5040     + }
5041     + lower_new_dentry = unionfs_lower_dentry(new_dentry);
5042     + lower_old_dentry = unionfs_lower_dentry(old_dentry);
5043     +
5044     + BUG_ON(dbstart(old_dentry) != dbstart(new_dentry));
5045     + lower_dir_dentry = lock_parent(lower_new_dentry);
5046     + err = is_robranch(old_dentry);
5047     + if (!err) {
5048     + /* see Documentation/filesystems/unionfs/issues.txt */
5049     + lockdep_off();
5050     + err = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
5051     + lower_new_dentry);
5052     + lockdep_on();
5053     + }
5054     + unlock_dir(lower_dir_dentry);
5055     +
5056     +docopyup:
5057     + if (IS_COPYUP_ERR(err)) {
5058     + int old_bstart = dbstart(old_dentry);
5059     + int bindex;
5060     +
5061     + for (bindex = old_bstart - 1; bindex >= 0; bindex--) {
5062     + err = copyup_dentry(old_parent->d_inode,
5063     + old_dentry, old_bstart,
5064     + bindex, old_dentry->d_name.name,
5065     + old_dentry->d_name.len, NULL,
5066     + i_size_read(old_dentry->d_inode));
5067     + if (err)
5068     + continue;
5069     + lower_new_dentry =
5070     + create_parents(dir, new_dentry,
5071     + new_dentry->d_name.name,
5072     + bindex);
5073     + lower_old_dentry = unionfs_lower_dentry(old_dentry);
5074     + lower_dir_dentry = lock_parent(lower_new_dentry);
5075     + /* see Documentation/filesystems/unionfs/issues.txt */
5076     + lockdep_off();
5077     + /* do vfs_link */
5078     + err = vfs_link(lower_old_dentry,
5079     + lower_dir_dentry->d_inode,
5080     + lower_new_dentry);
5081     + lockdep_on();
5082     + unlock_dir(lower_dir_dentry);
5083     + goto check_link;
5084     + }
5085     + goto out;
5086     + }
5087     +
5088     +check_link:
5089     + if (err || !lower_new_dentry->d_inode)
5090     + goto out;
5091     +
5092     + /* Its a hard link, so use the same inode */
5093     + new_dentry->d_inode = igrab(old_dentry->d_inode);
5094     + d_add(new_dentry, new_dentry->d_inode);
5095     + unionfs_copy_attr_all(dir, lower_new_dentry->d_parent->d_inode);
5096     + fsstack_copy_inode_size(dir, lower_new_dentry->d_parent->d_inode);
5097     +
5098     + /* propagate number of hard-links */
5099     + old_dentry->d_inode->i_nlink = unionfs_get_nlinks(old_dentry->d_inode);
5100     + /* new dentry's ctime may have changed due to hard-link counts */
5101     + unionfs_copy_attr_times(new_dentry->d_inode);
5102     +
5103     +out:
5104     + if (!new_dentry->d_inode)
5105     + d_drop(new_dentry);
5106     +
5107     + kfree(name);
5108     + if (!err)
5109     + unionfs_postcopyup_setmnt(new_dentry);
5110     +
5111     + unionfs_check_inode(dir);
5112     + unionfs_check_dentry(new_dentry);
5113     + unionfs_check_dentry(old_dentry);
5114     +
5115     + unionfs_double_unlock_dentry(old_dentry, new_dentry);
5116     + unionfs_double_unlock_parents(old_parent, new_parent);
5117     + dput(new_parent);
5118     + dput(old_parent);
5119     + unionfs_read_unlock(old_dentry->d_sb);
5120     +
5121     + return err;
5122     +}
5123     +
5124     +static int unionfs_symlink(struct inode *dir, struct dentry *dentry,
5125     + const char *symname)
5126     +{
5127     + int err = 0;
5128     + struct dentry *lower_dentry = NULL;
5129     + struct dentry *wh_dentry = NULL;
5130     + struct dentry *lower_parent_dentry = NULL;
5131     + struct dentry *parent;
5132     + char *name = NULL;
5133     + int valid = 0;
5134     + umode_t mode;
5135     +
5136     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5137     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5138     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5139     +
5140     + valid = __unionfs_d_revalidate(dentry, parent, false);
5141     + if (unlikely(!valid)) {
5142     + err = -ESTALE;
5143     + goto out;
5144     + }
5145     +
5146     + /*
5147     + * It's only a bug if this dentry was not negative and couldn't be
5148     + * revalidated (shouldn't happen).
5149     + */
5150     + BUG_ON(!valid && dentry->d_inode);
5151     +
5152     + lower_dentry = find_writeable_branch(dir, dentry);
5153     + if (IS_ERR(lower_dentry)) {
5154     + err = PTR_ERR(lower_dentry);
5155     + goto out;
5156     + }
5157     +
5158     + lower_parent_dentry = lock_parent(lower_dentry);
5159     + if (IS_ERR(lower_parent_dentry)) {
5160     + err = PTR_ERR(lower_parent_dentry);
5161     + goto out_unlock;
5162     + }
5163     +
5164     + mode = S_IALLUGO;
5165     + err = vfs_symlink(lower_parent_dentry->d_inode, lower_dentry, symname);
5166     + if (!err) {
5167     + err = PTR_ERR(unionfs_interpose(dentry, dir->i_sb, 0));
5168     + if (!err) {
5169     + unionfs_copy_attr_times(dir);
5170     + fsstack_copy_inode_size(dir,
5171     + lower_parent_dentry->d_inode);
5172     + /* update no. of links on parent directory */
5173     + dir->i_nlink = unionfs_get_nlinks(dir);
5174     + }
5175     + }
5176     +
5177     +out_unlock:
5178     + unlock_dir(lower_parent_dentry);
5179     +out:
5180     + dput(wh_dentry);
5181     + kfree(name);
5182     +
5183     + if (!err) {
5184     + unionfs_postcopyup_setmnt(dentry);
5185     + unionfs_check_inode(dir);
5186     + unionfs_check_dentry(dentry);
5187     + }
5188     + unionfs_unlock_dentry(dentry);
5189     + unionfs_unlock_parent(dentry, parent);
5190     + unionfs_read_unlock(dentry->d_sb);
5191     + return err;
5192     +}
5193     +
5194     +static int unionfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
5195     +{
5196     + int err = 0;
5197     + struct dentry *lower_dentry = NULL;
5198     + struct dentry *lower_parent_dentry = NULL;
5199     + struct dentry *parent;
5200     + int bindex = 0, bstart;
5201     + char *name = NULL;
5202     + int valid;
5203     +
5204     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5205     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5206     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5207     +
5208     + valid = __unionfs_d_revalidate(dentry, parent, false);
5209     + if (unlikely(!valid)) {
5210     + err = -ESTALE; /* same as what real_lookup does */
5211     + goto out;
5212     + }
5213     +
5214     + bstart = dbstart(dentry);
5215     +
5216     + lower_dentry = unionfs_lower_dentry(dentry);
5217     +
5218     + /* check for a whiteout in new dentry branch, and delete it */
5219     + err = check_unlink_whiteout(dentry, lower_dentry, bstart);
5220     + if (err > 0) /* whiteout found and removed successfully */
5221     + err = 0;
5222     + if (err) {
5223     + /* exit if the error returned was NOT -EROFS */
5224     + if (!IS_COPYUP_ERR(err))
5225     + goto out;
5226     + bstart--;
5227     + }
5228     +
5229     + /* check if copyup's needed, and mkdir */
5230     + for (bindex = bstart; bindex >= 0; bindex--) {
5231     + int i;
5232     + int bend = dbend(dentry);
5233     +
5234     + if (is_robranch_super(dentry->d_sb, bindex))
5235     + continue;
5236     +
5237     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
5238     + if (!lower_dentry) {
5239     + lower_dentry = create_parents(dir, dentry,
5240     + dentry->d_name.name,
5241     + bindex);
5242     + if (!lower_dentry || IS_ERR(lower_dentry)) {
5243     + printk(KERN_ERR "unionfs: lower dentry "
5244     + " NULL for bindex = %d\n", bindex);
5245     + continue;
5246     + }
5247     + }
5248     +
5249     + lower_parent_dentry = lock_parent(lower_dentry);
5250     +
5251     + if (IS_ERR(lower_parent_dentry)) {
5252     + err = PTR_ERR(lower_parent_dentry);
5253     + goto out;
5254     + }
5255     +
5256     + err = vfs_mkdir(lower_parent_dentry->d_inode, lower_dentry,
5257     + mode);
5258     +
5259     + unlock_dir(lower_parent_dentry);
5260     +
5261     + /* did the mkdir succeed? */
5262     + if (err)
5263     + break;
5264     +
5265     + for (i = bindex + 1; i <= bend; i++) {
5266     + /* XXX: use path_put_lowers? */
5267     + if (unionfs_lower_dentry_idx(dentry, i)) {
5268     + dput(unionfs_lower_dentry_idx(dentry, i));
5269     + unionfs_set_lower_dentry_idx(dentry, i, NULL);
5270     + }
5271     + }
5272     + dbend(dentry) = bindex;
5273     +
5274     + /*
5275     + * Only INTERPOSE_LOOKUP can return a value other than 0 on
5276     + * err.
5277     + */
5278     + err = PTR_ERR(unionfs_interpose(dentry, dir->i_sb, 0));
5279     + if (!err) {
5280     + unionfs_copy_attr_times(dir);
5281     + fsstack_copy_inode_size(dir,
5282     + lower_parent_dentry->d_inode);
5283     +
5284     + /* update number of links on parent directory */
5285     + dir->i_nlink = unionfs_get_nlinks(dir);
5286     + }
5287     +
5288     + err = make_dir_opaque(dentry, dbstart(dentry));
5289     + if (err) {
5290     + printk(KERN_ERR "unionfs: mkdir: error creating "
5291     + ".wh.__dir_opaque: %d\n", err);
5292     + goto out;
5293     + }
5294     +
5295     + /* we are done! */
5296     + break;
5297     + }
5298     +
5299     +out:
5300     + if (!dentry->d_inode)
5301     + d_drop(dentry);
5302     +
5303     + kfree(name);
5304     +
5305     + if (!err) {
5306     + unionfs_copy_attr_times(dentry->d_inode);
5307     + unionfs_postcopyup_setmnt(dentry);
5308     + }
5309     + unionfs_check_inode(dir);
5310     + unionfs_check_dentry(dentry);
5311     + unionfs_unlock_dentry(dentry);
5312     + unionfs_unlock_parent(dentry, parent);
5313     + unionfs_read_unlock(dentry->d_sb);
5314     +
5315     + return err;
5316     +}
5317     +
5318     +static int unionfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
5319     + dev_t dev)
5320     +{
5321     + int err = 0;
5322     + struct dentry *lower_dentry = NULL;
5323     + struct dentry *wh_dentry = NULL;
5324     + struct dentry *lower_parent_dentry = NULL;
5325     + struct dentry *parent;
5326     + char *name = NULL;
5327     + int valid = 0;
5328     +
5329     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5330     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5331     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5332     +
5333     + valid = __unionfs_d_revalidate(dentry, parent, false);
5334     + if (unlikely(!valid)) {
5335     + err = -ESTALE;
5336     + goto out;
5337     + }
5338     +
5339     + /*
5340     + * It's only a bug if this dentry was not negative and couldn't be
5341     + * revalidated (shouldn't happen).
5342     + */
5343     + BUG_ON(!valid && dentry->d_inode);
5344     +
5345     + lower_dentry = find_writeable_branch(dir, dentry);
5346     + if (IS_ERR(lower_dentry)) {
5347     + err = PTR_ERR(lower_dentry);
5348     + goto out;
5349     + }
5350     +
5351     + lower_parent_dentry = lock_parent(lower_dentry);
5352     + if (IS_ERR(lower_parent_dentry)) {
5353     + err = PTR_ERR(lower_parent_dentry);
5354     + goto out_unlock;
5355     + }
5356     +
5357     + err = vfs_mknod(lower_parent_dentry->d_inode, lower_dentry, mode, dev);
5358     + if (!err) {
5359     + err = PTR_ERR(unionfs_interpose(dentry, dir->i_sb, 0));
5360     + if (!err) {
5361     + unionfs_copy_attr_times(dir);
5362     + fsstack_copy_inode_size(dir,
5363     + lower_parent_dentry->d_inode);
5364     + /* update no. of links on parent directory */
5365     + dir->i_nlink = unionfs_get_nlinks(dir);
5366     + }
5367     + }
5368     +
5369     +out_unlock:
5370     + unlock_dir(lower_parent_dentry);
5371     +out:
5372     + dput(wh_dentry);
5373     + kfree(name);
5374     +
5375     + if (!err) {
5376     + unionfs_postcopyup_setmnt(dentry);
5377     + unionfs_check_inode(dir);
5378     + unionfs_check_dentry(dentry);
5379     + }
5380     + unionfs_unlock_dentry(dentry);
5381     + unionfs_unlock_parent(dentry, parent);
5382     + unionfs_read_unlock(dentry->d_sb);
5383     + return err;
5384     +}
5385     +
5386     +/* requires sb, dentry, and parent to already be locked */
5387     +static int __unionfs_readlink(struct dentry *dentry, char __user *buf,
5388     + int bufsiz)
5389     +{
5390     + int err;
5391     + struct dentry *lower_dentry;
5392     +
5393     + lower_dentry = unionfs_lower_dentry(dentry);
5394     +
5395     + if (!lower_dentry->d_inode->i_op ||
5396     + !lower_dentry->d_inode->i_op->readlink) {
5397     + err = -EINVAL;
5398     + goto out;
5399     + }
5400     +
5401     + err = lower_dentry->d_inode->i_op->readlink(lower_dentry,
5402     + buf, bufsiz);
5403     + if (err >= 0)
5404     + fsstack_copy_attr_atime(dentry->d_inode,
5405     + lower_dentry->d_inode);
5406     +
5407     +out:
5408     + return err;
5409     +}
5410     +
5411     +static int unionfs_readlink(struct dentry *dentry, char __user *buf,
5412     + int bufsiz)
5413     +{
5414     + int err;
5415     + struct dentry *parent;
5416     +
5417     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5418     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5419     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5420     +
5421     + if (unlikely(!__unionfs_d_revalidate(dentry, parent, false))) {
5422     + err = -ESTALE;
5423     + goto out;
5424     + }
5425     +
5426     + err = __unionfs_readlink(dentry, buf, bufsiz);
5427     +
5428     +out:
5429     + unionfs_check_dentry(dentry);
5430     + unionfs_unlock_dentry(dentry);
5431     + unionfs_unlock_parent(dentry, parent);
5432     + unionfs_read_unlock(dentry->d_sb);
5433     +
5434     + return err;
5435     +}
5436     +
5437     +static void *unionfs_follow_link(struct dentry *dentry, struct nameidata *nd)
5438     +{
5439     + char *buf;
5440     + int len = PAGE_SIZE, err;
5441     + mm_segment_t old_fs;
5442     + struct dentry *parent;
5443     +
5444     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5445     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5446     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5447     +
5448     + /* This is freed by the put_link method assuming a successful call. */
5449     + buf = kmalloc(len, GFP_KERNEL);
5450     + if (unlikely(!buf)) {
5451     + err = -ENOMEM;
5452     + goto out;
5453     + }
5454     +
5455     + /* read the symlink, and then we will follow it */
5456     + old_fs = get_fs();
5457     + set_fs(KERNEL_DS);
5458     + err = __unionfs_readlink(dentry, buf, len);
5459     + set_fs(old_fs);
5460     + if (err < 0) {
5461     + kfree(buf);
5462     + buf = NULL;
5463     + goto out;
5464     + }
5465     + buf[err] = 0;
5466     + nd_set_link(nd, buf);
5467     + err = 0;
5468     +
5469     +out:
5470     + if (err >= 0) {
5471     + unionfs_check_nd(nd);
5472     + unionfs_check_dentry(dentry);
5473     + }
5474     +
5475     + unionfs_unlock_dentry(dentry);
5476     + unionfs_unlock_parent(dentry, parent);
5477     + unionfs_read_unlock(dentry->d_sb);
5478     +
5479     + return ERR_PTR(err);
5480     +}
5481     +
5482     +/* this @nd *IS* still used */
5483     +static void unionfs_put_link(struct dentry *dentry, struct nameidata *nd,
5484     + void *cookie)
5485     +{
5486     + struct dentry *parent;
5487     +
5488     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5489     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5490     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5491     +
5492     + if (unlikely(!__unionfs_d_revalidate(dentry, parent, false)))
5493     + printk(KERN_ERR
5494     + "unionfs: put_link failed to revalidate dentry\n");
5495     +
5496     + unionfs_check_dentry(dentry);
5497     + unionfs_check_nd(nd);
5498     + kfree(nd_get_link(nd));
5499     + unionfs_unlock_dentry(dentry);
5500     + unionfs_unlock_parent(dentry, parent);
5501     + unionfs_read_unlock(dentry->d_sb);
5502     +}
5503     +
5504     +/*
5505     + * This is a variant of fs/namei.c:permission() or inode_permission() which
5506     + * skips over EROFS tests (because we perform copyup on EROFS).
5507     + */
5508     +static int __inode_permission(struct inode *inode, int mask)
5509     +{
5510     + int retval;
5511     +
5512     + /* nobody gets write access to an immutable file */
5513     + if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
5514     + return -EACCES;
5515     +
5516     + /* Ordinary permission routines do not understand MAY_APPEND. */
5517     + if (inode->i_op && inode->i_op->permission) {
5518     + retval = inode->i_op->permission(inode, mask);
5519     + if (!retval) {
5520     + /*
5521     + * Exec permission on a regular file is denied if none
5522     + * of the execute bits are set.
5523     + *
5524     + * This check should be done by the ->permission()
5525     + * method.
5526     + */
5527     + if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode) &&
5528     + !(inode->i_mode & S_IXUGO))
5529     + return -EACCES;
5530     + }
5531     + } else {
5532     + retval = generic_permission(inode, mask, NULL);
5533     + }
5534     + if (retval)
5535     + return retval;
5536     +
5537     + return security_inode_permission(inode,
5538     + mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND));
5539     +}
5540     +
5541     +/*
5542     + * Don't grab the superblock read-lock in unionfs_permission, which prevents
5543     + * a deadlock with the branch-management "add branch" code (which grabbed
5544     + * the write lock). It is safe to not grab the read lock here, because even
5545     + * with branch management taking place, there is no chance that
5546     + * unionfs_permission, or anything it calls, will use stale branch
5547     + * information.
5548     + */
5549     +static int unionfs_permission(struct inode *inode, int mask)
5550     +{
5551     + struct inode *lower_inode = NULL;
5552     + int err = 0;
5553     + int bindex, bstart, bend;
5554     + const int is_file = !S_ISDIR(inode->i_mode);
5555     + const int write_mask = (mask & MAY_WRITE) && !(mask & MAY_READ);
5556     + struct inode *inode_grabbed = igrab(inode);
5557     + struct dentry *dentry = d_find_alias(inode);
5558     +
5559     + if (dentry)
5560     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5561     +
5562     + if (!UNIONFS_I(inode)->lower_inodes) {
5563     + if (is_file) /* dirs can be unlinked but chdir'ed to */
5564     + err = -ESTALE; /* force revalidate */
5565     + goto out;
5566     + }
5567     + bstart = ibstart(inode);
5568     + bend = ibend(inode);
5569     + if (unlikely(bstart < 0 || bend < 0)) {
5570     + /*
5571     + * With branch-management, we can get a stale inode here.
5572     + * If so, we return ESTALE back to link_path_walk, which
5573     + * would discard the dcache entry and re-lookup the
5574     + * dentry+inode. This should be equivalent to issuing
5575     + * __unionfs_d_revalidate_chain on nd.dentry here.
5576     + */
5577     + if (is_file) /* dirs can be unlinked but chdir'ed to */
5578     + err = -ESTALE; /* force revalidate */
5579     + goto out;
5580     + }
5581     +
5582     + for (bindex = bstart; bindex <= bend; bindex++) {
5583     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
5584     + if (!lower_inode)
5585     + continue;
5586     +
5587     + /*
5588     + * check the condition for D-F-D underlying files/directories,
5589     + * we don't have to check for files, if we are checking for
5590     + * directories.
5591     + */
5592     + if (!is_file && !S_ISDIR(lower_inode->i_mode))
5593     + continue;
5594     +
5595     + /*
5596     + * We check basic permissions, but we ignore any conditions
5597     + * such as readonly file systems or branches marked as
5598     + * readonly, because those conditions should lead to a
5599     + * copyup taking place later on. However, if user never had
5600     + * access to the file, then no copyup could ever take place.
5601     + */
5602     + err = __inode_permission(lower_inode, mask);
5603     + if (err && err != -EACCES && err != EPERM && bindex > 0) {
5604     + umode_t mode = lower_inode->i_mode;
5605     + if ((is_robranch_super(inode->i_sb, bindex) ||
5606     + __is_rdonly(lower_inode)) &&
5607     + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
5608     + err = 0;
5609     + if (IS_COPYUP_ERR(err))
5610     + err = 0;
5611     + }
5612     +
5613     + /*
5614     + * NFS HACK: NFSv2/3 return EACCES on readonly-exported,
5615     + * locally readonly-mounted file systems, instead of EROFS
5616     + * like other file systems do. So we have no choice here
5617     + * but to intercept this and ignore it for NFS branches
5618     + * marked readonly. Specifically, we avoid using NFS's own
5619     + * "broken" ->permission method, and rely on
5620     + * generic_permission() to do basic checking for us.
5621     + */
5622     + if (err && err == -EACCES &&
5623     + is_robranch_super(inode->i_sb, bindex) &&
5624     + lower_inode->i_sb->s_magic == NFS_SUPER_MAGIC)
5625     + err = generic_permission(lower_inode, mask, NULL);
5626     +
5627     + /*
5628     + * The permissions are an intersection of the overall directory
5629     + * permissions, so we fail if one fails.
5630     + */
5631     + if (err)
5632     + goto out;
5633     +
5634     + /* only the leftmost file matters. */
5635     + if (is_file || write_mask) {
5636     + if (is_file && write_mask) {
5637     + err = get_write_access(lower_inode);
5638     + if (!err)
5639     + put_write_access(lower_inode);
5640     + }
5641     + break;
5642     + }
5643     + }
5644     + /* sync times which may have changed (asynchronously) below */
5645     + unionfs_copy_attr_times(inode);
5646     +
5647     +out:
5648     + unionfs_check_inode(inode);
5649     + if (dentry) {
5650     + unionfs_unlock_dentry(dentry);
5651     + dput(dentry);
5652     + }
5653     + iput(inode_grabbed);
5654     + return err;
5655     +}
5656     +
5657     +static int unionfs_setattr(struct dentry *dentry, struct iattr *ia)
5658     +{
5659     + int err = 0;
5660     + struct dentry *lower_dentry;
5661     + struct dentry *parent;
5662     + struct inode *inode;
5663     + struct inode *lower_inode;
5664     + int bstart, bend, bindex;
5665     + loff_t size;
5666     +
5667     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5668     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5669     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5670     +
5671     + if (unlikely(!__unionfs_d_revalidate(dentry, parent, false))) {
5672     + err = -ESTALE;
5673     + goto out;
5674     + }
5675     +
5676     + bstart = dbstart(dentry);
5677     + bend = dbend(dentry);
5678     + inode = dentry->d_inode;
5679     +
5680     + /*
5681     + * mode change is for clearing setuid/setgid. Allow lower filesystem
5682     + * to reinterpret it in its own way.
5683     + */
5684     + if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
5685     + ia->ia_valid &= ~ATTR_MODE;
5686     +
5687     + lower_dentry = unionfs_lower_dentry(dentry);
5688     + if (!lower_dentry) { /* should never happen after above revalidate */
5689     + err = -EINVAL;
5690     + goto out;
5691     + }
5692     + lower_inode = unionfs_lower_inode(inode);
5693     +
5694     + /* check if user has permission to change lower inode */
5695     + err = inode_change_ok(lower_inode, ia);
5696     + if (err)
5697     + goto out;
5698     +
5699     + /* copyup if the file is on a read only branch */
5700     + if (is_robranch_super(dentry->d_sb, bstart)
5701     + || __is_rdonly(lower_inode)) {
5702     + /* check if we have a branch to copy up to */
5703     + if (bstart <= 0) {
5704     + err = -EACCES;
5705     + goto out;
5706     + }
5707     +
5708     + if (ia->ia_valid & ATTR_SIZE)
5709     + size = ia->ia_size;
5710     + else
5711     + size = i_size_read(inode);
5712     + /* copyup to next available branch */
5713     + for (bindex = bstart - 1; bindex >= 0; bindex--) {
5714     + err = copyup_dentry(parent->d_inode,
5715     + dentry, bstart, bindex,
5716     + dentry->d_name.name,
5717     + dentry->d_name.len,
5718     + NULL, size);
5719     + if (!err)
5720     + break;
5721     + }
5722     + if (err)
5723     + goto out;
5724     + /* get updated lower_dentry/inode after copyup */
5725     + lower_dentry = unionfs_lower_dentry(dentry);
5726     + lower_inode = unionfs_lower_inode(inode);
5727     + }
5728     +
5729     + /*
5730     + * If shrinking, first truncate upper level to cancel writing dirty
5731     + * pages beyond the new eof; and also if its' maxbytes is more
5732     + * limiting (fail with -EFBIG before making any change to the lower
5733     + * level). There is no need to vmtruncate the upper level
5734     + * afterwards in the other cases: we fsstack_copy_inode_size from
5735     + * the lower level.
5736     + */
5737     + if (ia->ia_valid & ATTR_SIZE) {
5738     + size = i_size_read(inode);
5739     + if (ia->ia_size < size || (ia->ia_size > size &&
5740     + inode->i_sb->s_maxbytes < lower_inode->i_sb->s_maxbytes)) {
5741     + err = vmtruncate(inode, ia->ia_size);
5742     + if (err)
5743     + goto out;
5744     + }
5745     + }
5746     +
5747     + /* notify the (possibly copied-up) lower inode */
5748     + /*
5749     + * Note: we use lower_dentry->d_inode, because lower_inode may be
5750     + * unlinked (no inode->i_sb and i_ino==0. This happens if someone
5751     + * tries to open(), unlink(), then ftruncate() a file.
5752     + */
5753     + mutex_lock(&lower_dentry->d_inode->i_mutex);
5754     + err = notify_change(lower_dentry, ia);
5755     + mutex_unlock(&lower_dentry->d_inode->i_mutex);
5756     + if (err)
5757     + goto out;
5758     +
5759     + /* get attributes from the first lower inode */
5760     + if (ibstart(inode) >= 0)
5761     + unionfs_copy_attr_all(inode, lower_inode);
5762     + /*
5763     + * unionfs_copy_attr_all will copy the lower times to our inode if
5764     + * the lower ones are newer (useful for cache coherency). However,
5765     + * ->setattr is the only place in which we may have to copy the
5766     + * lower inode times absolutely, to support utimes(2).
5767     + */
5768     + if (ia->ia_valid & ATTR_MTIME_SET)
5769     + inode->i_mtime = lower_inode->i_mtime;
5770     + if (ia->ia_valid & ATTR_CTIME)
5771     + inode->i_ctime = lower_inode->i_ctime;
5772     + if (ia->ia_valid & ATTR_ATIME_SET)
5773     + inode->i_atime = lower_inode->i_atime;
5774     + fsstack_copy_inode_size(inode, lower_inode);
5775     +
5776     +out:
5777     + if (!err)
5778     + unionfs_check_dentry(dentry);
5779     + unionfs_unlock_dentry(dentry);
5780     + unionfs_unlock_parent(dentry, parent);
5781     + unionfs_read_unlock(dentry->d_sb);
5782     +
5783     + return err;
5784     +}
5785     +
5786     +struct inode_operations unionfs_symlink_iops = {
5787     + .readlink = unionfs_readlink,
5788     + .permission = unionfs_permission,
5789     + .follow_link = unionfs_follow_link,
5790     + .setattr = unionfs_setattr,
5791     + .put_link = unionfs_put_link,
5792     +};
5793     +
5794     +struct inode_operations unionfs_dir_iops = {
5795     + .create = unionfs_create,
5796     + .lookup = unionfs_lookup,
5797     + .link = unionfs_link,
5798     + .unlink = unionfs_unlink,
5799     + .symlink = unionfs_symlink,
5800     + .mkdir = unionfs_mkdir,
5801     + .rmdir = unionfs_rmdir,
5802     + .mknod = unionfs_mknod,
5803     + .rename = unionfs_rename,
5804     + .permission = unionfs_permission,
5805     + .setattr = unionfs_setattr,
5806     +#ifdef CONFIG_UNION_FS_XATTR
5807     + .setxattr = unionfs_setxattr,
5808     + .getxattr = unionfs_getxattr,
5809     + .removexattr = unionfs_removexattr,
5810     + .listxattr = unionfs_listxattr,
5811     +#endif /* CONFIG_UNION_FS_XATTR */
5812     +};
5813     +
5814     +struct inode_operations unionfs_main_iops = {
5815     + .permission = unionfs_permission,
5816     + .setattr = unionfs_setattr,
5817     +#ifdef CONFIG_UNION_FS_XATTR
5818     + .setxattr = unionfs_setxattr,
5819     + .getxattr = unionfs_getxattr,
5820     + .removexattr = unionfs_removexattr,
5821     + .listxattr = unionfs_listxattr,
5822     +#endif /* CONFIG_UNION_FS_XATTR */
5823     +};
5824     diff --git a/fs/unionfs/lookup.c b/fs/unionfs/lookup.c
5825     new file mode 100644
5826     index 0000000..b63c17e
5827     --- /dev/null
5828     +++ b/fs/unionfs/lookup.c
5829     @@ -0,0 +1,569 @@
5830     +/*
5831     + * Copyright (c) 2003-2010 Erez Zadok
5832     + * Copyright (c) 2003-2006 Charles P. Wright
5833     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
5834     + * Copyright (c) 2005-2006 Junjiro Okajima
5835     + * Copyright (c) 2005 Arun M. Krishnakumar
5836     + * Copyright (c) 2004-2006 David P. Quigley
5837     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
5838     + * Copyright (c) 2003 Puja Gupta
5839     + * Copyright (c) 2003 Harikesavan Krishnan
5840     + * Copyright (c) 2003-2010 Stony Brook University
5841     + * Copyright (c) 2003-2010 The Research Foundation of SUNY
5842     + *
5843     + * This program is free software; you can redistribute it and/or modify
5844     + * it under the terms of the GNU General Public License version 2 as
5845     + * published by the Free Software Foundation.
5846     + */
5847     +
5848     +#include "union.h"
5849     +
5850     +/*
5851     + * Lookup one path component @name relative to a <base,mnt> path pair.
5852     + * Behaves nearly the same as lookup_one_len (i.e., return negative dentry
5853     + * on ENOENT), but uses the @mnt passed, so it can cross bind mounts and
5854     + * other lower mounts properly. If @new_mnt is non-null, will fill in the
5855     + * new mnt there. Caller is responsible to dput/mntput/path_put returned
5856     + * @dentry and @new_mnt.
5857     + */
5858     +struct dentry *__lookup_one(struct dentry *base, struct vfsmount *mnt,
5859     + const char *name, struct vfsmount **new_mnt)
5860     +{
5861     + struct dentry *dentry = NULL;
5862     + struct nameidata lower_nd;
5863     + int err;
5864     +
5865     + /* we use flags=0 to get basic lookup */
5866     + err = vfs_path_lookup(base, mnt, name, 0, &lower_nd);
5867     +
5868     + switch (err) {
5869     + case 0: /* no error */
5870     + dentry = lower_nd.path.dentry;
5871     + if (new_mnt)
5872     + *new_mnt = lower_nd.path.mnt; /* rc already inc'ed */
5873     + break;
5874     + case -ENOENT:
5875     + /*
5876     + * We don't consider ENOENT an error, and we want to return
5877     + * a negative dentry (ala lookup_one_len). As we know
5878     + * there was no inode for this name before (-ENOENT), then
5879     + * it's safe to call lookup_one_len (which doesn't take a
5880     + * vfsmount).
5881     + */
5882     + dentry = lookup_lck_len(name, base, strlen(name));
5883     + if (new_mnt)
5884     + *new_mnt = mntget(lower_nd.path.mnt);
5885     + break;
5886     + default: /* all other real errors */
5887     + dentry = ERR_PTR(err);
5888     + break;
5889     + }
5890     +
5891     + return dentry;
5892     +}
5893     +
5894     +/*
5895     + * This is a utility function that fills in a unionfs dentry.
5896     + * Caller must lock this dentry with unionfs_lock_dentry.
5897     + *
5898     + * Returns: 0 (ok), or -ERRNO if an error occurred.
5899     + * XXX: get rid of _partial_lookup and make callers call _lookup_full directly
5900     + */
5901     +int unionfs_partial_lookup(struct dentry *dentry, struct dentry *parent)
5902     +{
5903     + struct dentry *tmp;
5904     + int err = -ENOSYS;
5905     +
5906     + tmp = unionfs_lookup_full(dentry, parent, INTERPOSE_PARTIAL);
5907     +
5908     + if (!tmp) {
5909     + err = 0;
5910     + goto out;
5911     + }
5912     + if (IS_ERR(tmp)) {
5913     + err = PTR_ERR(tmp);
5914     + goto out;
5915     + }
5916     + /* XXX: need to change the interface */
5917     + BUG_ON(tmp != dentry);
5918     +out:
5919     + return err;
5920     +}
5921     +
5922     +/* The dentry cache is just so we have properly sized dentries. */
5923     +static struct kmem_cache *unionfs_dentry_cachep;
5924     +int unionfs_init_dentry_cache(void)
5925     +{
5926     + unionfs_dentry_cachep =
5927     + kmem_cache_create("unionfs_dentry",
5928     + sizeof(struct unionfs_dentry_info),
5929     + 0, SLAB_RECLAIM_ACCOUNT, NULL);
5930     +
5931     + return (unionfs_dentry_cachep ? 0 : -ENOMEM);
5932     +}
5933     +
5934     +void unionfs_destroy_dentry_cache(void)
5935     +{
5936     + if (unionfs_dentry_cachep)
5937     + kmem_cache_destroy(unionfs_dentry_cachep);
5938     +}
5939     +
5940     +void free_dentry_private_data(struct dentry *dentry)
5941     +{
5942     + if (!dentry || !dentry->d_fsdata)
5943     + return;
5944     + kfree(UNIONFS_D(dentry)->lower_paths);
5945     + UNIONFS_D(dentry)->lower_paths = NULL;
5946     + kmem_cache_free(unionfs_dentry_cachep, dentry->d_fsdata);
5947     + dentry->d_fsdata = NULL;
5948     +}
5949     +
5950     +static inline int __realloc_dentry_private_data(struct dentry *dentry)
5951     +{
5952     + struct unionfs_dentry_info *info = UNIONFS_D(dentry);
5953     + void *p;
5954     + int size;
5955     +
5956     + BUG_ON(!info);
5957     +
5958     + size = sizeof(struct path) * sbmax(dentry->d_sb);
5959     + p = krealloc(info->lower_paths, size, GFP_ATOMIC);
5960     + if (unlikely(!p))
5961     + return -ENOMEM;
5962     +
5963     + info->lower_paths = p;
5964     +
5965     + info->bstart = -1;
5966     + info->bend = -1;
5967     + info->bopaque = -1;
5968     + info->bcount = sbmax(dentry->d_sb);
5969     + atomic_set(&info->generation,
5970     + atomic_read(&UNIONFS_SB(dentry->d_sb)->generation));
5971     +
5972     + memset(info->lower_paths, 0, size);
5973     +
5974     + return 0;
5975     +}
5976     +
5977     +/* UNIONFS_D(dentry)->lock must be locked */
5978     +int realloc_dentry_private_data(struct dentry *dentry)
5979     +{
5980     + if (!__realloc_dentry_private_data(dentry))
5981     + return 0;
5982     +
5983     + kfree(UNIONFS_D(dentry)->lower_paths);
5984     + free_dentry_private_data(dentry);
5985     + return -ENOMEM;
5986     +}
5987     +
5988     +/* allocate new dentry private data */
5989     +int new_dentry_private_data(struct dentry *dentry, int subclass)
5990     +{
5991     + struct unionfs_dentry_info *info = UNIONFS_D(dentry);
5992     +
5993     + BUG_ON(info);
5994     +
5995     + info = kmem_cache_alloc(unionfs_dentry_cachep, GFP_ATOMIC);
5996     + if (unlikely(!info))
5997     + return -ENOMEM;
5998     +
5999     + mutex_init(&info->lock);
6000     + mutex_lock_nested(&info->lock, subclass);
6001     +
6002     + info->lower_paths = NULL;
6003     +
6004     + dentry->d_fsdata = info;
6005     +
6006     + if (!__realloc_dentry_private_data(dentry))
6007     + return 0;
6008     +
6009     + mutex_unlock(&info->lock);
6010     + free_dentry_private_data(dentry);
6011     + return -ENOMEM;
6012     +}
6013     +
6014     +/*
6015     + * scan through the lower dentry objects, and set bstart to reflect the
6016     + * starting branch
6017     + */
6018     +void update_bstart(struct dentry *dentry)
6019     +{
6020     + int bindex;
6021     + int bstart = dbstart(dentry);
6022     + int bend = dbend(dentry);
6023     + struct dentry *lower_dentry;
6024     +
6025     + for (bindex = bstart; bindex <= bend; bindex++) {
6026     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6027     + if (!lower_dentry)
6028     + continue;
6029     + if (lower_dentry->d_inode) {
6030     + dbstart(dentry) = bindex;
6031     + break;
6032     + }
6033     + dput(lower_dentry);
6034     + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
6035     + }
6036     +}
6037     +
6038     +
6039     +/*
6040     + * Initialize a nameidata structure (the intent part) we can pass to a lower
6041     + * file system. Returns 0 on success or -error (only -ENOMEM possible).
6042     + * Inside that nd structure, this function may also return an allocated
6043     + * struct file (for open intents). The caller, when done with this nd, must
6044     + * kfree the intent file (using release_lower_nd).
6045     + *
6046     + * XXX: this code, and the callers of this code, should be redone using
6047     + * vfs_path_lookup() when (1) the nameidata structure is refactored into a
6048     + * separate intent-structure, and (2) open_namei() is broken into a VFS-only
6049     + * function and a method that other file systems can call.
6050     + */
6051     +int init_lower_nd(struct nameidata *nd, unsigned int flags)
6052     +{
6053     + int err = 0;
6054     +#ifdef ALLOC_LOWER_ND_FILE
6055     + /*
6056     + * XXX: one day we may need to have the lower return an open file
6057     + * for us. It is not needed in 2.6.23-rc1 for nfs2/nfs3, but may
6058     + * very well be needed for nfs4.
6059     + */
6060     + struct file *file;
6061     +#endif /* ALLOC_LOWER_ND_FILE */
6062     +
6063     + memset(nd, 0, sizeof(struct nameidata));
6064     + if (!flags)
6065     + return err;
6066     +
6067     + switch (flags) {
6068     + case LOOKUP_CREATE:
6069     + nd->intent.open.flags |= O_CREAT;
6070     + /* fall through: shared code for create/open cases */
6071     + case LOOKUP_OPEN:
6072     + nd->flags = flags;
6073     + nd->intent.open.flags |= (FMODE_READ | FMODE_WRITE);
6074     +#ifdef ALLOC_LOWER_ND_FILE
6075     + file = kzalloc(sizeof(struct file), GFP_KERNEL);
6076     + if (unlikely(!file)) {
6077     + err = -ENOMEM;
6078     + break; /* exit switch statement and thus return */
6079     + }
6080     + nd->intent.open.file = file;
6081     +#endif /* ALLOC_LOWER_ND_FILE */
6082     + break;
6083     + default:
6084     + /*
6085     + * We should never get here, for now.
6086     + * We can add new cases here later on.
6087     + */
6088     + pr_debug("unionfs: unknown nameidata flag 0x%x\n", flags);
6089     + BUG();
6090     + break;
6091     + }
6092     +
6093     + return err;
6094     +}
6095     +
6096     +void release_lower_nd(struct nameidata *nd, int err)
6097     +{
6098     + if (!nd->intent.open.file)
6099     + return;
6100     + else if (!err)
6101     + release_open_intent(nd);
6102     +#ifdef ALLOC_LOWER_ND_FILE
6103     + kfree(nd->intent.open.file);
6104     +#endif /* ALLOC_LOWER_ND_FILE */
6105     +}
6106     +
6107     +/*
6108     + * Main (and complex) driver function for Unionfs's lookup
6109     + *
6110     + * Returns: NULL (ok), ERR_PTR if an error occurred, or a non-null non-error
6111     + * PTR if d_splice returned a different dentry.
6112     + *
6113     + * If lookupmode is INTERPOSE_PARTIAL/REVAL/REVAL_NEG, the passed dentry's
6114     + * inode info must be locked. If lookupmode is INTERPOSE_LOOKUP (i.e., a
6115     + * newly looked-up dentry), then unionfs_lookup_backend will return a locked
6116     + * dentry's info, which the caller must unlock.
6117     + */
6118     +struct dentry *unionfs_lookup_full(struct dentry *dentry,
6119     + struct dentry *parent, int lookupmode)
6120     +{
6121     + int err = 0;
6122     + struct dentry *lower_dentry = NULL;
6123     + struct vfsmount *lower_mnt;
6124     + struct vfsmount *lower_dir_mnt;
6125     + struct dentry *wh_lower_dentry = NULL;
6126     + struct dentry *lower_dir_dentry = NULL;
6127     + struct dentry *d_interposed = NULL;
6128     + int bindex, bstart, bend, bopaque;
6129     + int opaque, num_positive = 0;
6130     + const char *name;
6131     + int namelen;
6132     + int pos_start, pos_end;
6133     +
6134     + /*
6135     + * We should already have a lock on this dentry in the case of a
6136     + * partial lookup, or a revalidation. Otherwise it is returned from
6137     + * new_dentry_private_data already locked.
6138     + */
6139     + verify_locked(dentry);
6140     + verify_locked(parent);
6141     +
6142     + /* must initialize dentry operations */
6143     + dentry->d_op = &unionfs_dops;
6144     +
6145     + /* We never partial lookup the root directory. */
6146     + if (IS_ROOT(dentry))
6147     + goto out;
6148     +
6149     + name = dentry->d_name.name;
6150     + namelen = dentry->d_name.len;
6151     +
6152     + /* No dentries should get created for possible whiteout names. */
6153     + if (!is_validname(name)) {
6154     + err = -EPERM;
6155     + goto out_free;
6156     + }
6157     +
6158     + /* Now start the actual lookup procedure. */
6159     + bstart = dbstart(parent);
6160     + bend = dbend(parent);
6161     + bopaque = dbopaque(parent);
6162     + BUG_ON(bstart < 0);
6163     +
6164     + /* adjust bend to bopaque if needed */
6165     + if ((bopaque >= 0) && (bopaque < bend))
6166     + bend = bopaque;
6167     +
6168     + /* lookup all possible dentries */
6169     + for (bindex = bstart; bindex <= bend; bindex++) {
6170     +
6171     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6172     + lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
6173     +
6174     + /* skip if we already have a positive lower dentry */
6175     + if (lower_dentry) {
6176     + if (dbstart(dentry) < 0)
6177     + dbstart(dentry) = bindex;
6178     + if (bindex > dbend(dentry))
6179     + dbend(dentry) = bindex;
6180     + if (lower_dentry->d_inode)
6181     + num_positive++;
6182     + continue;
6183     + }
6184     +
6185     + lower_dir_dentry =
6186     + unionfs_lower_dentry_idx(parent, bindex);
6187     + /* if the lower dentry's parent does not exist, skip this */
6188     + if (!lower_dir_dentry || !lower_dir_dentry->d_inode)
6189     + continue;
6190     +
6191     + /* also skip it if the parent isn't a directory. */
6192     + if (!S_ISDIR(lower_dir_dentry->d_inode->i_mode))
6193     + continue; /* XXX: should be BUG_ON */
6194     +
6195     + /* check for whiteouts: stop lookup if found */
6196     + wh_lower_dentry = lookup_whiteout(name, lower_dir_dentry);
6197     + if (IS_ERR(wh_lower_dentry)) {
6198     + err = PTR_ERR(wh_lower_dentry);
6199     + goto out_free;
6200     + }
6201     + if (wh_lower_dentry->d_inode) {
6202     + dbend(dentry) = dbopaque(dentry) = bindex;
6203     + if (dbstart(dentry) < 0)
6204     + dbstart(dentry) = bindex;
6205     + dput(wh_lower_dentry);
6206     + break;
6207     + }
6208     + dput(wh_lower_dentry);
6209     +
6210     + /* Now do regular lookup; lookup @name */
6211     + lower_dir_mnt = unionfs_lower_mnt_idx(parent, bindex);
6212     + lower_mnt = NULL; /* XXX: needed? */
6213     +
6214     + lower_dentry = __lookup_one(lower_dir_dentry, lower_dir_mnt,
6215     + name, &lower_mnt);
6216     +
6217     + if (IS_ERR(lower_dentry)) {
6218     + err = PTR_ERR(lower_dentry);
6219     + goto out_free;
6220     + }
6221     + unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
6222     + if (!lower_mnt)
6223     + lower_mnt = unionfs_mntget(dentry->d_sb->s_root,
6224     + bindex);
6225     + unionfs_set_lower_mnt_idx(dentry, bindex, lower_mnt);
6226     +
6227     + /* adjust dbstart/end */
6228     + if (dbstart(dentry) < 0)
6229     + dbstart(dentry) = bindex;
6230     + if (bindex > dbend(dentry))
6231     + dbend(dentry) = bindex;
6232     + /*
6233     + * We always store the lower dentries above, and update
6234     + * dbstart/dbend, even if the whole unionfs dentry is
6235     + * negative (i.e., no lower inodes).
6236     + */
6237     + if (!lower_dentry->d_inode)
6238     + continue;
6239     + num_positive++;
6240     +
6241     + /*
6242     + * check if we just found an opaque directory, if so, stop
6243     + * lookups here.
6244     + */
6245     + if (!S_ISDIR(lower_dentry->d_inode->i_mode))
6246     + continue;
6247     + opaque = is_opaque_dir(dentry, bindex);
6248     + if (opaque < 0) {
6249     + err = opaque;
6250     + goto out_free;
6251     + } else if (opaque) {
6252     + dbend(dentry) = dbopaque(dentry) = bindex;
6253     + break;
6254     + }
6255     + dbend(dentry) = bindex;
6256     +
6257     + /* update parent directory's atime with the bindex */
6258     + fsstack_copy_attr_atime(parent->d_inode,
6259     + lower_dir_dentry->d_inode);
6260     + }
6261     +
6262     + /* sanity checks, then decide if to process a negative dentry */
6263     + BUG_ON(dbstart(dentry) < 0 && dbend(dentry) >= 0);
6264     + BUG_ON(dbstart(dentry) >= 0 && dbend(dentry) < 0);
6265     +
6266     + if (num_positive > 0)
6267     + goto out_positive;
6268     +
6269     + /*** handle NEGATIVE dentries ***/
6270     +
6271     + /*
6272     + * If negative, keep only first lower negative dentry, to save on
6273     + * memory.
6274     + */
6275     + if (dbstart(dentry) < dbend(dentry)) {
6276     + path_put_lowers(dentry, dbstart(dentry) + 1,
6277     + dbend(dentry), false);
6278     + dbend(dentry) = dbstart(dentry);
6279     + }
6280     + if (lookupmode == INTERPOSE_PARTIAL)
6281     + goto out;
6282     + if (lookupmode == INTERPOSE_LOOKUP) {
6283     + /*
6284     + * If all we found was a whiteout in the first available
6285     + * branch, then create a negative dentry for a possibly new
6286     + * file to be created.
6287     + */
6288     + if (dbopaque(dentry) < 0)
6289     + goto out;
6290     + /* XXX: need to get mnt here */
6291     + bindex = dbstart(dentry);
6292     + if (unionfs_lower_dentry_idx(dentry, bindex))
6293     + goto out;
6294     + lower_dir_dentry =
6295     + unionfs_lower_dentry_idx(parent, bindex);
6296     + if (!lower_dir_dentry || !lower_dir_dentry->d_inode)
6297     + goto out;
6298     + if (!S_ISDIR(lower_dir_dentry->d_inode->i_mode))
6299     + goto out; /* XXX: should be BUG_ON */
6300     + /* XXX: do we need to cross bind mounts here? */
6301     + lower_dentry = lookup_lck_len(name, lower_dir_dentry, namelen);
6302     + if (IS_ERR(lower_dentry)) {
6303     + err = PTR_ERR(lower_dentry);
6304     + goto out;
6305     + }
6306     + /* XXX: need to mntget/mntput as needed too! */
6307     + unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
6308     + /* XXX: wrong mnt for crossing bind mounts! */
6309     + lower_mnt = unionfs_mntget(dentry->d_sb->s_root, bindex);
6310     + unionfs_set_lower_mnt_idx(dentry, bindex, lower_mnt);
6311     +
6312     + goto out;
6313     + }
6314     +
6315     + /* if we're revalidating a positive dentry, don't make it negative */
6316     + if (lookupmode != INTERPOSE_REVAL)
6317     + d_add(dentry, NULL);
6318     +
6319     + goto out;
6320     +
6321     +out_positive:
6322     + /*** handle POSITIVE dentries ***/
6323     +
6324     + /*
6325     + * This unionfs dentry is positive (at least one lower inode
6326     + * exists), so scan entire dentry from beginning to end, and remove
6327     + * any negative lower dentries, if any. Then, update dbstart/dbend
6328     + * to reflect the start/end of positive dentries.
6329     + */
6330     + pos_start = pos_end = -1;
6331     + for (bindex = bstart; bindex <= bend; bindex++) {
6332     + lower_dentry = unionfs_lower_dentry_idx(dentry,
6333     + bindex);
6334     + if (lower_dentry && lower_dentry->d_inode) {
6335     + if (pos_start < 0)
6336     + pos_start = bindex;
6337     + if (bindex > pos_end)
6338     + pos_end = bindex;
6339     + continue;
6340     + }
6341     + path_put_lowers(dentry, bindex, bindex, false);
6342     + }
6343     + if (pos_start >= 0)
6344     + dbstart(dentry) = pos_start;
6345     + if (pos_end >= 0)
6346     + dbend(dentry) = pos_end;
6347     +
6348     + /* Partial lookups need to re-interpose, or throw away older negs. */
6349     + if (lookupmode == INTERPOSE_PARTIAL) {
6350     + if (dentry->d_inode) {
6351     + unionfs_reinterpose(dentry);
6352     + goto out;
6353     + }
6354     +
6355     + /*
6356     + * This dentry was positive, so it is as if we had a
6357     + * negative revalidation.
6358     + */
6359     + lookupmode = INTERPOSE_REVAL_NEG;
6360     + update_bstart(dentry);
6361     + }
6362     +
6363     + /*
6364     + * Interpose can return a dentry if d_splice returned a different
6365     + * dentry.
6366     + */
6367     + d_interposed = unionfs_interpose(dentry, dentry->d_sb, lookupmode);
6368     + if (IS_ERR(d_interposed))
6369     + err = PTR_ERR(d_interposed);
6370     + else if (d_interposed)
6371     + dentry = d_interposed;
6372     +
6373     + if (!err)
6374     + goto out;
6375     + d_drop(dentry);
6376     +
6377     +out_free:
6378     + /* should dput/mntput all the underlying dentries on error condition */
6379     + if (dbstart(dentry) >= 0)
6380     + path_put_lowers_all(dentry, false);
6381     + /* free lower_paths unconditionally */
6382     + kfree(UNIONFS_D(dentry)->lower_paths);
6383     + UNIONFS_D(dentry)->lower_paths = NULL;
6384     +
6385     +out:
6386     + if (dentry && UNIONFS_D(dentry)) {
6387     + BUG_ON(dbstart(dentry) < 0 && dbend(dentry) >= 0);
6388     + BUG_ON(dbstart(dentry) >= 0 && dbend(dentry) < 0);
6389     + }
6390     + if (d_interposed && UNIONFS_D(d_interposed)) {
6391     + BUG_ON(dbstart(d_interposed) < 0 && dbend(d_interposed) >= 0);
6392     + BUG_ON(dbstart(d_interposed) >= 0 && dbend(d_interposed) < 0);
6393     + }
6394     +
6395     + if (!err && d_interposed)
6396     + return d_interposed;
6397     + return ERR_PTR(err);
6398     +}
6399     diff --git a/fs/unionfs/main.c b/fs/unionfs/main.c
6400     new file mode 100644
6401     index 0000000..258386e
6402     --- /dev/null
6403     +++ b/fs/unionfs/main.c
6404     @@ -0,0 +1,758 @@
6405     +/*
6406     + * Copyright (c) 2003-2010 Erez Zadok
6407     + * Copyright (c) 2003-2006 Charles P. Wright
6408     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
6409     + * Copyright (c) 2005-2006 Junjiro Okajima
6410     + * Copyright (c) 2005 Arun M. Krishnakumar
6411     + * Copyright (c) 2004-2006 David P. Quigley
6412     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
6413     + * Copyright (c) 2003 Puja Gupta
6414     + * Copyright (c) 2003 Harikesavan Krishnan
6415     + * Copyright (c) 2003-2010 Stony Brook University
6416     + * Copyright (c) 2003-2010 The Research Foundation of SUNY
6417     + *
6418     + * This program is free software; you can redistribute it and/or modify
6419     + * it under the terms of the GNU General Public License version 2 as
6420     + * published by the Free Software Foundation.
6421     + */
6422     +
6423     +#include "union.h"
6424     +#include <linux/module.h>
6425     +#include <linux/moduleparam.h>
6426     +
6427     +static void unionfs_fill_inode(struct dentry *dentry,
6428     + struct inode *inode)
6429     +{
6430     + struct inode *lower_inode;
6431     + struct dentry *lower_dentry;
6432     + int bindex, bstart, bend;
6433     +
6434     + bstart = dbstart(dentry);
6435     + bend = dbend(dentry);
6436     +
6437     + for (bindex = bstart; bindex <= bend; bindex++) {
6438     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6439     + if (!lower_dentry) {
6440     + unionfs_set_lower_inode_idx(inode, bindex, NULL);
6441     + continue;
6442     + }
6443     +
6444     + /* Initialize the lower inode to the new lower inode. */
6445     + if (!lower_dentry->d_inode)
6446     + continue;
6447     +
6448     + unionfs_set_lower_inode_idx(inode, bindex,
6449     + igrab(lower_dentry->d_inode));
6450     + }
6451     +
6452     + ibstart(inode) = dbstart(dentry);
6453     + ibend(inode) = dbend(dentry);
6454     +
6455     + /* Use attributes from the first branch. */
6456     + lower_inode = unionfs_lower_inode(inode);
6457     +
6458     + /* Use different set of inode ops for symlinks & directories */
6459     + if (S_ISLNK(lower_inode->i_mode))
6460     + inode->i_op = &unionfs_symlink_iops;
6461     + else if (S_ISDIR(lower_inode->i_mode))
6462     + inode->i_op = &unionfs_dir_iops;
6463     +
6464     + /* Use different set of file ops for directories */
6465     + if (S_ISDIR(lower_inode->i_mode))
6466     + inode->i_fop = &unionfs_dir_fops;
6467     +
6468     + /* properly initialize special inodes */
6469     + if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) ||
6470     + S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode))
6471     + init_special_inode(inode, lower_inode->i_mode,
6472     + lower_inode->i_rdev);
6473     +
6474     + /* all well, copy inode attributes */
6475     + unionfs_copy_attr_all(inode, lower_inode);
6476     + fsstack_copy_inode_size(inode, lower_inode);
6477     +}
6478     +
6479     +/*
6480     + * Connect a unionfs inode dentry/inode with several lower ones. This is
6481     + * the classic stackable file system "vnode interposition" action.
6482     + *
6483     + * @sb: unionfs's super_block
6484     + */
6485     +struct dentry *unionfs_interpose(struct dentry *dentry, struct super_block *sb,
6486     + int flag)
6487     +{
6488     + int err = 0;
6489     + struct inode *inode;
6490     + int need_fill_inode = 1;
6491     + struct dentry *spliced = NULL;
6492     +
6493     + verify_locked(dentry);
6494     +
6495     + /*
6496     + * We allocate our new inode below by calling unionfs_iget,
6497     + * which will initialize some of the new inode's fields
6498     + */
6499     +
6500     + /*
6501     + * On revalidate we've already got our own inode and just need
6502     + * to fix it up.
6503     + */
6504     + if (flag == INTERPOSE_REVAL) {
6505     + inode = dentry->d_inode;
6506     + UNIONFS_I(inode)->bstart = -1;
6507     + UNIONFS_I(inode)->bend = -1;
6508     + atomic_set(&UNIONFS_I(inode)->generation,
6509     + atomic_read(&UNIONFS_SB(sb)->generation));
6510     +
6511     + UNIONFS_I(inode)->lower_inodes =
6512     + kcalloc(sbmax(sb), sizeof(struct inode *), GFP_KERNEL);
6513     + if (unlikely(!UNIONFS_I(inode)->lower_inodes)) {
6514     + err = -ENOMEM;
6515     + goto out;
6516     + }
6517     + } else {
6518     + /* get unique inode number for unionfs */
6519     + inode = unionfs_iget(sb, iunique(sb, UNIONFS_ROOT_INO));
6520     + if (IS_ERR(inode)) {
6521     + err = PTR_ERR(inode);
6522     + goto out;
6523     + }
6524     + if (atomic_read(&inode->i_count) > 1)
6525     + goto skip;
6526     + }
6527     +
6528     + need_fill_inode = 0;
6529     + unionfs_fill_inode(dentry, inode);
6530     +
6531     +skip:
6532     + /* only (our) lookup wants to do a d_add */
6533     + switch (flag) {
6534     + case INTERPOSE_DEFAULT:
6535     + /* for operations which create new inodes */
6536     + d_add(dentry, inode);
6537     + break;
6538     + case INTERPOSE_REVAL_NEG:
6539     + d_instantiate(dentry, inode);
6540     + break;
6541     + case INTERPOSE_LOOKUP:
6542     + spliced = d_splice_alias(inode, dentry);
6543     + if (spliced && spliced != dentry) {
6544     + /*
6545     + * d_splice can return a dentry if it was
6546     + * disconnected and had to be moved. We must ensure
6547     + * that the private data of the new dentry is
6548     + * correct and that the inode info was filled
6549     + * properly. Finally we must return this new
6550     + * dentry.
6551     + */
6552     + spliced->d_op = &unionfs_dops;
6553     + spliced->d_fsdata = dentry->d_fsdata;
6554     + dentry->d_fsdata = NULL;
6555     + dentry = spliced;
6556     + if (need_fill_inode) {
6557     + need_fill_inode = 0;
6558     + unionfs_fill_inode(dentry, inode);
6559     + }
6560     + goto out_spliced;
6561     + } else if (!spliced) {
6562     + if (need_fill_inode) {
6563     + need_fill_inode = 0;
6564     + unionfs_fill_inode(dentry, inode);
6565     + goto out_spliced;
6566     + }
6567     + }
6568     + break;
6569     + case INTERPOSE_REVAL:
6570     + /* Do nothing. */
6571     + break;
6572     + default:
6573     + printk(KERN_CRIT "unionfs: invalid interpose flag passed!\n");
6574     + BUG();
6575     + }
6576     + goto out;
6577     +
6578     +out_spliced:
6579     + if (!err)
6580     + return spliced;
6581     +out:
6582     + return ERR_PTR(err);
6583     +}
6584     +
6585     +/* like interpose above, but for an already existing dentry */
6586     +void unionfs_reinterpose(struct dentry *dentry)
6587     +{
6588     + struct dentry *lower_dentry;
6589     + struct inode *inode;
6590     + int bindex, bstart, bend;
6591     +
6592     + verify_locked(dentry);
6593     +
6594     + /* This is pre-allocated inode */
6595     + inode = dentry->d_inode;
6596     +
6597     + bstart = dbstart(dentry);
6598     + bend = dbend(dentry);
6599     + for (bindex = bstart; bindex <= bend; bindex++) {
6600     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6601     + if (!lower_dentry)
6602     + continue;
6603     +
6604     + if (!lower_dentry->d_inode)
6605     + continue;
6606     + if (unionfs_lower_inode_idx(inode, bindex))
6607     + continue;
6608     + unionfs_set_lower_inode_idx(inode, bindex,
6609     + igrab(lower_dentry->d_inode));
6610     + }
6611     + ibstart(inode) = dbstart(dentry);
6612     + ibend(inode) = dbend(dentry);
6613     +}
6614     +
6615     +/*
6616     + * make sure the branch we just looked up (nd) makes sense:
6617     + *
6618     + * 1) we're not trying to stack unionfs on top of unionfs
6619     + * 2) it exists
6620     + * 3) is a directory
6621     + */
6622     +int check_branch(struct nameidata *nd)
6623     +{
6624     + /* XXX: remove in ODF code -- stacking unions allowed there */
6625     + if (!strcmp(nd->path.dentry->d_sb->s_type->name, UNIONFS_NAME))
6626     + return -EINVAL;
6627     + if (!nd->path.dentry->d_inode)
6628     + return -ENOENT;
6629     + if (!S_ISDIR(nd->path.dentry->d_inode->i_mode))
6630     + return -ENOTDIR;
6631     + return 0;
6632     +}
6633     +
6634     +/* checks if two lower_dentries have overlapping branches */
6635     +static int is_branch_overlap(struct dentry *dent1, struct dentry *dent2)
6636     +{
6637     + struct dentry *dent = NULL;
6638     +
6639     + dent = dent1;
6640     + while ((dent != dent2) && (dent->d_parent != dent))
6641     + dent = dent->d_parent;
6642     +
6643     + if (dent == dent2)
6644     + return 1;
6645     +
6646     + dent = dent2;
6647     + while ((dent != dent1) && (dent->d_parent != dent))
6648     + dent = dent->d_parent;
6649     +
6650     + return (dent == dent1);
6651     +}
6652     +
6653     +/*
6654     + * Parse "ro" or "rw" options, but default to "rw" if no mode options was
6655     + * specified. Fill the mode bits in @perms. If encounter an unknown
6656     + * string, return -EINVAL. Otherwise return 0.
6657     + */
6658     +int parse_branch_mode(const char *name, int *perms)
6659     +{
6660     + if (!name || !strcmp(name, "rw")) {
6661     + *perms = MAY_READ | MAY_WRITE;
6662     + return 0;
6663     + }
6664     + if (!strcmp(name, "ro")) {
6665     + *perms = MAY_READ;
6666     + return 0;
6667     + }
6668     + return -EINVAL;
6669     +}
6670     +
6671     +/*
6672     + * parse the dirs= mount argument
6673     + *
6674     + * We don't need to lock the superblock private data's rwsem, as we get
6675     + * called only by unionfs_read_super - it is still a long time before anyone
6676     + * can even get a reference to us.
6677     + */
6678     +static int parse_dirs_option(struct super_block *sb, struct unionfs_dentry_info
6679     + *lower_root_info, char *options)
6680     +{
6681     + struct nameidata nd;
6682     + char *name;
6683     + int err = 0;
6684     + int branches = 1;
6685     + int bindex = 0;
6686     + int i = 0;
6687     + int j = 0;
6688     + struct dentry *dent1;
6689     + struct dentry *dent2;
6690     +
6691     + if (options[0] == '\0') {
6692     + printk(KERN_ERR "unionfs: no branches specified\n");
6693     + err = -EINVAL;
6694     + goto out;
6695     + }
6696     +
6697     + /*
6698     + * Each colon means we have a separator, this is really just a rough
6699     + * guess, since strsep will handle empty fields for us.
6700     + */
6701     + for (i = 0; options[i]; i++)
6702     + if (options[i] == ':')
6703     + branches++;
6704     +
6705     + /* allocate space for underlying pointers to lower dentry */
6706     + UNIONFS_SB(sb)->data =
6707     + kcalloc(branches, sizeof(struct unionfs_data), GFP_KERNEL);
6708     + if (unlikely(!UNIONFS_SB(sb)->data)) {
6709     + err = -ENOMEM;
6710     + goto out;
6711     + }
6712     +
6713     + lower_root_info->lower_paths =
6714     + kcalloc(branches, sizeof(struct path), GFP_KERNEL);
6715     + if (unlikely(!lower_root_info->lower_paths)) {
6716     + err = -ENOMEM;
6717     + goto out;
6718     + }
6719     +
6720     + /* now parsing a string such as "b1:b2=rw:b3=ro:b4" */
6721     + branches = 0;
6722     + while ((name = strsep(&options, ":")) != NULL) {
6723     + int perms;
6724     + char *mode = strchr(name, '=');
6725     +
6726     + if (!name)
6727     + continue;
6728     + if (!*name) { /* bad use of ':' (extra colons) */
6729     + err = -EINVAL;
6730     + goto out;
6731     + }
6732     +
6733     + branches++;
6734     +
6735     + /* strip off '=' if any */
6736     + if (mode)
6737     + *mode++ = '\0';
6738     +
6739     + err = parse_branch_mode(mode, &perms);
6740     + if (err) {
6741     + printk(KERN_ERR "unionfs: invalid mode \"%s\" for "
6742     + "branch %d\n", mode, bindex);
6743     + goto out;
6744     + }
6745     + /* ensure that leftmost branch is writeable */
6746     + if (!bindex && !(perms & MAY_WRITE)) {
6747     + printk(KERN_ERR "unionfs: leftmost branch cannot be "
6748     + "read-only (use \"-o ro\" to create a "
6749     + "read-only union)\n");
6750     + err = -EINVAL;
6751     + goto out;
6752     + }
6753     +
6754     + err = path_lookup(name, LOOKUP_FOLLOW, &nd);
6755     + if (err) {
6756     + printk(KERN_ERR "unionfs: error accessing "
6757     + "lower directory '%s' (error %d)\n",
6758     + name, err);
6759     + goto out;
6760     + }
6761     +
6762     + err = check_branch(&nd);
6763     + if (err) {
6764     + printk(KERN_ERR "unionfs: lower directory "
6765     + "'%s' is not a valid branch\n", name);
6766     + path_put(&nd.path);
6767     + goto out;
6768     + }
6769     +
6770     + lower_root_info->lower_paths[bindex].dentry = nd.path.dentry;
6771     + lower_root_info->lower_paths[bindex].mnt = nd.path.mnt;
6772     +
6773     + set_branchperms(sb, bindex, perms);
6774     + set_branch_count(sb, bindex, 0);
6775     + new_branch_id(sb, bindex);
6776     +
6777     + if (lower_root_info->bstart < 0)
6778     + lower_root_info->bstart = bindex;
6779     + lower_root_info->bend = bindex;
6780     + bindex++;
6781     + }
6782     +
6783     + if (branches == 0) {
6784     + printk(KERN_ERR "unionfs: no branches specified\n");
6785     + err = -EINVAL;
6786     + goto out;
6787     + }
6788     +
6789     + BUG_ON(branches != (lower_root_info->bend + 1));
6790     +
6791     + /*
6792     + * Ensure that no overlaps exist in the branches.
6793     + *
6794     + * This test is required because the Linux kernel has no support
6795     + * currently for ensuring coherency between stackable layers and
6796     + * branches. If we were to allow overlapping branches, it would be
6797     + * possible, for example, to delete a file via one branch, which
6798     + * would not be reflected in another branch. Such incoherency could
6799     + * lead to inconsistencies and even kernel oopses. Rather than
6800     + * implement hacks to work around some of these cache-coherency
6801     + * problems, we prevent branch overlapping, for now. A complete
6802     + * solution will involve proper kernel/VFS support for cache
6803     + * coherency, at which time we could safely remove this
6804     + * branch-overlapping test.
6805     + */
6806     + for (i = 0; i < branches; i++) {
6807     + dent1 = lower_root_info->lower_paths[i].dentry;
6808     + for (j = i + 1; j < branches; j++) {
6809     + dent2 = lower_root_info->lower_paths[j].dentry;
6810     + if (is_branch_overlap(dent1, dent2)) {
6811     + printk(KERN_ERR "unionfs: branches %d and "
6812     + "%d overlap\n", i, j);
6813     + err = -EINVAL;
6814     + goto out;
6815     + }
6816     + }
6817     + }
6818     +
6819     +out:
6820     + if (err) {
6821     + for (i = 0; i < branches; i++)
6822     + path_put(&lower_root_info->lower_paths[i]);
6823     +
6824     + kfree(lower_root_info->lower_paths);
6825     + kfree(UNIONFS_SB(sb)->data);
6826     +
6827     + /*
6828     + * MUST clear the pointers to prevent potential double free if
6829     + * the caller dies later on
6830     + */
6831     + lower_root_info->lower_paths = NULL;
6832     + UNIONFS_SB(sb)->data = NULL;
6833     + }
6834     + return err;
6835     +}
6836     +
6837     +/*
6838     + * Parse mount options. See the manual page for usage instructions.
6839     + *
6840     + * Returns the dentry object of the lower-level (lower) directory;
6841     + * We want to mount our stackable file system on top of that lower directory.
6842     + */
6843     +static struct unionfs_dentry_info *unionfs_parse_options(
6844     + struct super_block *sb,
6845     + char *options)
6846     +{
6847     + struct unionfs_dentry_info *lower_root_info;
6848     + char *optname;
6849     + int err = 0;
6850     + int bindex;
6851     + int dirsfound = 0;
6852     +
6853     + /* allocate private data area */
6854     + err = -ENOMEM;
6855     + lower_root_info =
6856     + kzalloc(sizeof(struct unionfs_dentry_info), GFP_KERNEL);
6857     + if (unlikely(!lower_root_info))
6858     + goto out_error;
6859     + lower_root_info->bstart = -1;
6860     + lower_root_info->bend = -1;
6861     + lower_root_info->bopaque = -1;
6862     +
6863     + while ((optname = strsep(&options, ",")) != NULL) {
6864     + char *optarg;
6865     +
6866     + if (!optname || !*optname)
6867     + continue;
6868     +
6869     + optarg = strchr(optname, '=');
6870     + if (optarg)
6871     + *optarg++ = '\0';
6872     +
6873     + /*
6874     + * All of our options take an argument now. Insert ones that
6875     + * don't, above this check.
6876     + */
6877     + if (!optarg) {
6878     + printk(KERN_ERR "unionfs: %s requires an argument\n",
6879     + optname);
6880     + err = -EINVAL;
6881     + goto out_error;
6882     + }
6883     +
6884     + if (!strcmp("dirs", optname)) {
6885     + if (++dirsfound > 1) {
6886     + printk(KERN_ERR
6887     + "unionfs: multiple dirs specified\n");
6888     + err = -EINVAL;
6889     + goto out_error;
6890     + }
6891     + err = parse_dirs_option(sb, lower_root_info, optarg);
6892     + if (err)
6893     + goto out_error;
6894     + continue;
6895     + }
6896     +
6897     + err = -EINVAL;
6898     + printk(KERN_ERR
6899     + "unionfs: unrecognized option '%s'\n", optname);
6900     + goto out_error;
6901     + }
6902     + if (dirsfound != 1) {
6903     + printk(KERN_ERR "unionfs: dirs option required\n");
6904     + err = -EINVAL;
6905     + goto out_error;
6906     + }
6907     + goto out;
6908     +
6909     +out_error:
6910     + if (lower_root_info && lower_root_info->lower_paths) {
6911     + for (bindex = lower_root_info->bstart;
6912     + bindex >= 0 && bindex <= lower_root_info->bend;
6913     + bindex++)
6914     + path_put(&lower_root_info->lower_paths[bindex]);
6915     + }
6916     +
6917     + kfree(lower_root_info->lower_paths);
6918     + kfree(lower_root_info);
6919     +
6920     + kfree(UNIONFS_SB(sb)->data);
6921     + UNIONFS_SB(sb)->data = NULL;
6922     +
6923     + lower_root_info = ERR_PTR(err);
6924     +out:
6925     + return lower_root_info;
6926     +}
6927     +
6928     +/*
6929     + * our custom d_alloc_root work-alike
6930     + *
6931     + * we can't use d_alloc_root if we want to use our own interpose function
6932     + * unchanged, so we simply call our own "fake" d_alloc_root
6933     + */
6934     +static struct dentry *unionfs_d_alloc_root(struct super_block *sb)
6935     +{
6936     + struct dentry *ret = NULL;
6937     +
6938     + if (sb) {
6939     + static const struct qstr name = {
6940     + .name = "/",
6941     + .len = 1
6942     + };
6943     +
6944     + ret = d_alloc(NULL, &name);
6945     + if (likely(ret)) {
6946     + ret->d_op = &unionfs_dops;
6947     + ret->d_sb = sb;
6948     + ret->d_parent = ret;
6949     + }
6950     + }
6951     + return ret;
6952     +}
6953     +
6954     +/*
6955     + * There is no need to lock the unionfs_super_info's rwsem as there is no
6956     + * way anyone can have a reference to the superblock at this point in time.
6957     + */
6958     +static int unionfs_read_super(struct super_block *sb, void *raw_data,
6959     + int silent)
6960     +{
6961     + int err = 0;
6962     + struct unionfs_dentry_info *lower_root_info = NULL;
6963     + int bindex, bstart, bend;
6964     +
6965     + if (!raw_data) {
6966     + printk(KERN_ERR
6967     + "unionfs: read_super: missing data argument\n");
6968     + err = -EINVAL;
6969     + goto out;
6970     + }
6971     +
6972     + /* Allocate superblock private data */
6973     + sb->s_fs_info = kzalloc(sizeof(struct unionfs_sb_info), GFP_KERNEL);
6974     + if (unlikely(!UNIONFS_SB(sb))) {
6975     + printk(KERN_CRIT "unionfs: read_super: out of memory\n");
6976     + err = -ENOMEM;
6977     + goto out;
6978     + }
6979     +
6980     + UNIONFS_SB(sb)->bend = -1;
6981     + atomic_set(&UNIONFS_SB(sb)->generation, 1);
6982     + init_rwsem(&UNIONFS_SB(sb)->rwsem);
6983     + UNIONFS_SB(sb)->high_branch_id = -1; /* -1 == invalid branch ID */
6984     +
6985     + lower_root_info = unionfs_parse_options(sb, raw_data);
6986     + if (IS_ERR(lower_root_info)) {
6987     + printk(KERN_ERR
6988     + "unionfs: read_super: error while parsing options "
6989     + "(err = %ld)\n", PTR_ERR(lower_root_info));
6990     + err = PTR_ERR(lower_root_info);
6991     + lower_root_info = NULL;
6992     + goto out_free;
6993     + }
6994     + if (lower_root_info->bstart == -1) {
6995     + err = -ENOENT;
6996     + goto out_free;
6997     + }
6998     +
6999     + /* set the lower superblock field of upper superblock */
7000     + bstart = lower_root_info->bstart;
7001     + BUG_ON(bstart != 0);
7002     + sbend(sb) = bend = lower_root_info->bend;
7003     + for (bindex = bstart; bindex <= bend; bindex++) {
7004     + struct dentry *d = lower_root_info->lower_paths[bindex].dentry;
7005     + atomic_inc(&d->d_sb->s_active);
7006     + unionfs_set_lower_super_idx(sb, bindex, d->d_sb);
7007     + }
7008     +
7009     + /* max Bytes is the maximum bytes from highest priority branch */
7010     + sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes;
7011     +
7012     + /*
7013     + * Our c/m/atime granularity is 1 ns because we may stack on file
7014     + * systems whose granularity is as good. This is important for our
7015     + * time-based cache coherency.
7016     + */
7017     + sb->s_time_gran = 1;
7018     +
7019     + sb->s_op = &unionfs_sops;
7020     +
7021     + /* See comment next to the definition of unionfs_d_alloc_root */
7022     + sb->s_root = unionfs_d_alloc_root(sb);
7023     + if (unlikely(!sb->s_root)) {
7024     + err = -ENOMEM;
7025     + goto out_dput;
7026     + }
7027     +
7028     + /* link the upper and lower dentries */
7029     + sb->s_root->d_fsdata = NULL;
7030     + err = new_dentry_private_data(sb->s_root, UNIONFS_DMUTEX_ROOT);
7031     + if (unlikely(err))
7032     + goto out_freedpd;
7033     +
7034     + /* Set the lower dentries for s_root */
7035     + for (bindex = bstart; bindex <= bend; bindex++) {
7036     + struct dentry *d;
7037     + struct vfsmount *m;
7038     +
7039     + d = lower_root_info->lower_paths[bindex].dentry;
7040     + m = lower_root_info->lower_paths[bindex].mnt;
7041     +
7042     + unionfs_set_lower_dentry_idx(sb->s_root, bindex, d);
7043     + unionfs_set_lower_mnt_idx(sb->s_root, bindex, m);
7044     + }
7045     + dbstart(sb->s_root) = bstart;
7046     + dbend(sb->s_root) = bend;
7047     +
7048     + /* Set the generation number to one, since this is for the mount. */
7049     + atomic_set(&UNIONFS_D(sb->s_root)->generation, 1);
7050     +
7051     + /*
7052     + * Call interpose to create the upper level inode. Only
7053     + * INTERPOSE_LOOKUP can return a value other than 0 on err.
7054     + */
7055     + err = PTR_ERR(unionfs_interpose(sb->s_root, sb, 0));
7056     + unionfs_unlock_dentry(sb->s_root);
7057     + if (!err)
7058     + goto out;
7059     + /* else fall through */
7060     +
7061     +out_freedpd:
7062     + if (UNIONFS_D(sb->s_root)) {
7063     + kfree(UNIONFS_D(sb->s_root)->lower_paths);
7064     + free_dentry_private_data(sb->s_root);
7065     + }
7066     + dput(sb->s_root);
7067     +
7068     +out_dput:
7069     + if (lower_root_info && !IS_ERR(lower_root_info)) {
7070     + for (bindex = lower_root_info->bstart;
7071     + bindex <= lower_root_info->bend; bindex++) {
7072     + struct dentry *d;
7073     + d = lower_root_info->lower_paths[bindex].dentry;
7074     + /* drop refs we took earlier */
7075     + atomic_dec(&d->d_sb->s_active);
7076     + path_put(&lower_root_info->lower_paths[bindex]);
7077     + }
7078     + kfree(lower_root_info->lower_paths);
7079     + kfree(lower_root_info);
7080     + lower_root_info = NULL;
7081     + }
7082     +
7083     +out_free:
7084     + kfree(UNIONFS_SB(sb)->data);
7085     + kfree(UNIONFS_SB(sb));
7086     + sb->s_fs_info = NULL;
7087     +
7088     +out:
7089     + if (lower_root_info && !IS_ERR(lower_root_info)) {
7090     + kfree(lower_root_info->lower_paths);
7091     + kfree(lower_root_info);
7092     + }
7093     + return err;
7094     +}
7095     +
7096     +static int unionfs_get_sb(struct file_system_type *fs_type,
7097     + int flags, const char *dev_name,
7098     + void *raw_data, struct vfsmount *mnt)
7099     +{
7100     + int err;
7101     + err = get_sb_nodev(fs_type, flags, raw_data, unionfs_read_super, mnt);
7102     + if (!err)
7103     + UNIONFS_SB(mnt->mnt_sb)->dev_name =
7104     + kstrdup(dev_name, GFP_KERNEL);
7105     + return err;
7106     +}
7107     +
7108     +static struct file_system_type unionfs_fs_type = {
7109     + .owner = THIS_MODULE,
7110     + .name = UNIONFS_NAME,
7111     + .get_sb = unionfs_get_sb,
7112     + .kill_sb = generic_shutdown_super,
7113     + .fs_flags = FS_REVAL_DOT,
7114     +};
7115     +
7116     +static int __init init_unionfs_fs(void)
7117     +{
7118     + int err;
7119     +
7120     + pr_info("Registering unionfs " UNIONFS_VERSION "\n");
7121     +
7122     + err = unionfs_init_filldir_cache();
7123     + if (unlikely(err))
7124     + goto out;
7125     + err = unionfs_init_inode_cache();
7126     + if (unlikely(err))
7127     + goto out;
7128     + err = unionfs_init_dentry_cache();
7129     + if (unlikely(err))
7130     + goto out;
7131     + err = init_sioq();
7132     + if (unlikely(err))
7133     + goto out;
7134     + err = register_filesystem(&unionfs_fs_type);
7135     +out:
7136     + if (unlikely(err)) {
7137     + stop_sioq();
7138     + unionfs_destroy_filldir_cache();
7139     + unionfs_destroy_inode_cache();
7140     + unionfs_destroy_dentry_cache();
7141     + }
7142     + return err;
7143     +}
7144     +
7145     +static void __exit exit_unionfs_fs(void)
7146     +{
7147     + stop_sioq();
7148     + unionfs_destroy_filldir_cache();
7149     + unionfs_destroy_inode_cache();
7150     + unionfs_destroy_dentry_cache();
7151     + unregister_filesystem(&unionfs_fs_type);
7152     + pr_info("Completed unionfs module unload\n");
7153     +}
7154     +
7155     +MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University"
7156     + " (http://www.fsl.cs.sunysb.edu)");
7157     +MODULE_DESCRIPTION("Unionfs " UNIONFS_VERSION
7158     + " (http://unionfs.filesystems.org)");
7159     +MODULE_LICENSE("GPL");
7160     +
7161     +module_init(init_unionfs_fs);
7162     +module_exit(exit_unionfs_fs);
7163     diff --git a/fs/unionfs/mmap.c b/fs/unionfs/mmap.c
7164     new file mode 100644
7165     index 0000000..1f70535
7166     --- /dev/null
7167     +++ b/fs/unionfs/mmap.c
7168     @@ -0,0 +1,89 @@
7169     +/*
7170     + * Copyright (c) 2003-2010 Erez Zadok
7171     + * Copyright (c) 2003-2006 Charles P. Wright
7172     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7173     + * Copyright (c) 2005-2006 Junjiro Okajima
7174     + * Copyright (c) 2006 Shaya Potter
7175     + * Copyright (c) 2005 Arun M. Krishnakumar
7176     + * Copyright (c) 2004-2006 David P. Quigley
7177     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7178     + * Copyright (c) 2003 Puja Gupta
7179     + * Copyright (c) 2003 Harikesavan Krishnan
7180     + * Copyright (c) 2003-2010 Stony Brook University
7181     + * Copyright (c) 2003-2010 The Research Foundation of SUNY
7182     + *
7183     + * This program is free software; you can redistribute it and/or modify
7184     + * it under the terms of the GNU General Public License version 2 as
7185     + * published by the Free Software Foundation.
7186     + */
7187     +
7188     +#include "union.h"
7189     +
7190     +
7191     +/*
7192     + * XXX: we need a dummy readpage handler because generic_file_mmap (which we
7193     + * use in unionfs_mmap) checks for the existence of
7194     + * mapping->a_ops->readpage, else it returns -ENOEXEC. The VFS will need to
7195     + * be fixed to allow a file system to define vm_ops->fault without any
7196     + * address_space_ops whatsoever.
7197     + *
7198     + * Otherwise, we don't want to use our readpage method at all.
7199     + */
7200     +static int unionfs_readpage(struct file *file, struct page *page)
7201     +{
7202     + BUG();
7203     + return -EINVAL;
7204     +}
7205     +
7206     +static int unionfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
7207     +{
7208     + int err;
7209     + struct file *file, *lower_file;
7210     + const struct vm_operations_struct *lower_vm_ops;
7211     + struct vm_area_struct lower_vma;
7212     +
7213     + BUG_ON(!vma);
7214     + memcpy(&lower_vma, vma, sizeof(struct vm_area_struct));
7215     + file = lower_vma.vm_file;
7216     + lower_vm_ops = UNIONFS_F(file)->lower_vm_ops;
7217     + BUG_ON(!lower_vm_ops);
7218     +
7219     + lower_file = unionfs_lower_file(file);
7220     + BUG_ON(!lower_file);
7221     + /*
7222     + * XXX: vm_ops->fault may be called in parallel. Because we have to
7223     + * resort to temporarily changing the vma->vm_file to point to the
7224     + * lower file, a concurrent invocation of unionfs_fault could see a
7225     + * different value. In this workaround, we keep a different copy of
7226     + * the vma structure in our stack, so we never expose a different
7227     + * value of the vma->vm_file called to us, even temporarily. A
7228     + * better fix would be to change the calling semantics of ->fault to
7229     + * take an explicit file pointer.
7230     + */
7231     + lower_vma.vm_file = lower_file;
7232     + err = lower_vm_ops->fault(&lower_vma, vmf);
7233     + return err;
7234     +}
7235     +
7236     +/*
7237     + * XXX: the default address_space_ops for unionfs is empty. We cannot set
7238     + * our inode->i_mapping->a_ops to NULL because too many code paths expect
7239     + * the a_ops vector to be non-NULL.
7240     + */
7241     +struct address_space_operations unionfs_aops = {
7242     + /* empty on purpose */
7243     +};
7244     +
7245     +/*
7246     + * XXX: we need a second, dummy address_space_ops vector, to be used
7247     + * temporarily during unionfs_mmap, because the latter calls
7248     + * generic_file_mmap, which checks if ->readpage exists, else returns
7249     + * -ENOEXEC.
7250     + */
7251     +struct address_space_operations unionfs_dummy_aops = {
7252     + .readpage = unionfs_readpage,
7253     +};
7254     +
7255     +struct vm_operations_struct unionfs_vm_ops = {
7256     + .fault = unionfs_fault,
7257     +};
7258     diff --git a/fs/unionfs/rdstate.c b/fs/unionfs/rdstate.c
7259     new file mode 100644
7260     index 0000000..f745fbc
7261     --- /dev/null
7262     +++ b/fs/unionfs/rdstate.c
7263     @@ -0,0 +1,285 @@
7264     +/*
7265     + * Copyright (c) 2003-2010 Erez Zadok
7266     + * Copyright (c) 2003-2006 Charles P. Wright
7267     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7268     + * Copyright (c) 2005-2006 Junjiro Okajima
7269     + * Copyright (c) 2005 Arun M. Krishnakumar
7270     + * Copyright (c) 2004-2006 David P. Quigley
7271     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7272     + * Copyright (c) 2003 Puja Gupta
7273     + * Copyright (c) 2003 Harikesavan Krishnan
7274     + * Copyright (c) 2003-2010 Stony Brook University
7275     + * Copyright (c) 2003-2010 The Research Foundation of SUNY
7276     + *
7277     + * This program is free software; you can redistribute it and/or modify
7278     + * it under the terms of the GNU General Public License version 2 as
7279     + * published by the Free Software Foundation.
7280     + */
7281     +
7282     +#include "union.h"
7283     +
7284     +/* This file contains the routines for maintaining readdir state. */
7285     +
7286     +/*
7287     + * There are two structures here, rdstate which is a hash table
7288     + * of the second structure which is a filldir_node.
7289     + */
7290     +
7291     +/*
7292     + * This is a struct kmem_cache for filldir nodes, because we allocate a lot
7293     + * of them and they shouldn't waste memory. If the node has a small name
7294     + * (as defined by the dentry structure), then we use an inline name to
7295     + * preserve kmalloc space.
7296     + */
7297     +static struct kmem_cache *unionfs_filldir_cachep;
7298     +
7299     +int unionfs_init_filldir_cache(void)
7300     +{
7301     + unionfs_filldir_cachep =
7302     + kmem_cache_create("unionfs_filldir",
7303     + sizeof(struct filldir_node), 0,
7304     + SLAB_RECLAIM_ACCOUNT, NULL);
7305     +
7306     + return (unionfs_filldir_cachep ? 0 : -ENOMEM);
7307     +}
7308     +
7309     +void unionfs_destroy_filldir_cache(void)
7310     +{
7311     + if (unionfs_filldir_cachep)
7312     + kmem_cache_destroy(unionfs_filldir_cachep);
7313     +}
7314     +
7315     +/*
7316     + * This is a tuning parameter that tells us roughly how big to make the
7317     + * hash table in directory entries per page. This isn't perfect, but
7318     + * at least we get a hash table size that shouldn't be too overloaded.
7319     + * The following averages are based on my home directory.
7320     + * 14.44693 Overall
7321     + * 12.29 Single Page Directories
7322     + * 117.93 Multi-page directories
7323     + */
7324     +#define DENTPAGE 4096
7325     +#define DENTPERONEPAGE 12
7326     +#define DENTPERPAGE 118
7327     +#define MINHASHSIZE 1
7328     +static int guesstimate_hash_size(struct inode *inode)
7329     +{
7330     + struct inode *lower_inode;
7331     + int bindex;
7332     + int hashsize = MINHASHSIZE;
7333     +
7334     + if (UNIONFS_I(inode)->hashsize > 0)
7335     + return UNIONFS_I(inode)->hashsize;
7336     +
7337     + for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
7338     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
7339     + if (!lower_inode)
7340     + continue;
7341     +
7342     + if (i_size_read(lower_inode) == DENTPAGE)
7343     + hashsize += DENTPERONEPAGE;
7344     + else
7345     + hashsize += (i_size_read(lower_inode) / DENTPAGE) *
7346     + DENTPERPAGE;
7347     + }
7348     +
7349     + return hashsize;
7350     +}
7351     +
7352     +int init_rdstate(struct file *file)
7353     +{
7354     + BUG_ON(sizeof(loff_t) !=
7355     + (sizeof(unsigned int) + sizeof(unsigned int)));
7356     + BUG_ON(UNIONFS_F(file)->rdstate != NULL);
7357     +
7358     + UNIONFS_F(file)->rdstate = alloc_rdstate(file->f_path.dentry->d_inode,
7359     + fbstart(file));
7360     +
7361     + return (UNIONFS_F(file)->rdstate ? 0 : -ENOMEM);
7362     +}
7363     +
7364     +struct unionfs_dir_state *find_rdstate(struct inode *inode, loff_t fpos)
7365     +{
7366     + struct unionfs_dir_state *rdstate = NULL;
7367     + struct list_head *pos;
7368     +
7369     + spin_lock(&UNIONFS_I(inode)->rdlock);
7370     + list_for_each(pos, &UNIONFS_I(inode)->readdircache) {
7371     + struct unionfs_dir_state *r =
7372     + list_entry(pos, struct unionfs_dir_state, cache);
7373     + if (fpos == rdstate2offset(r)) {
7374     + UNIONFS_I(inode)->rdcount--;
7375     + list_del(&r->cache);
7376     + rdstate = r;
7377     + break;
7378     + }
7379     + }
7380     + spin_unlock(&UNIONFS_I(inode)->rdlock);
7381     + return rdstate;
7382     +}
7383     +
7384     +struct unionfs_dir_state *alloc_rdstate(struct inode *inode, int bindex)
7385     +{
7386     + int i = 0;
7387     + int hashsize;
7388     + unsigned long mallocsize = sizeof(struct unionfs_dir_state);
7389     + struct unionfs_dir_state *rdstate;
7390     +
7391     + hashsize = guesstimate_hash_size(inode);
7392     + mallocsize += hashsize * sizeof(struct list_head);
7393     + mallocsize = __roundup_pow_of_two(mallocsize);
7394     +
7395     + /* This should give us about 500 entries anyway. */
7396     + if (mallocsize > PAGE_SIZE)
7397     + mallocsize = PAGE_SIZE;
7398     +
7399     + hashsize = (mallocsize - sizeof(struct unionfs_dir_state)) /
7400     + sizeof(struct list_head);
7401     +
7402     + rdstate = kmalloc(mallocsize, GFP_KERNEL);
7403     + if (unlikely(!rdstate))
7404     + return NULL;
7405     +
7406     + spin_lock(&UNIONFS_I(inode)->rdlock);
7407     + if (UNIONFS_I(inode)->cookie >= (MAXRDCOOKIE - 1))
7408     + UNIONFS_I(inode)->cookie = 1;
7409     + else
7410     + UNIONFS_I(inode)->cookie++;
7411     +
7412     + rdstate->cookie = UNIONFS_I(inode)->cookie;
7413     + spin_unlock(&UNIONFS_I(inode)->rdlock);
7414     + rdstate->offset = 1;
7415     + rdstate->access = jiffies;
7416     + rdstate->bindex = bindex;
7417     + rdstate->dirpos = 0;
7418     + rdstate->hashentries = 0;
7419     + rdstate->size = hashsize;
7420     + for (i = 0; i < rdstate->size; i++)
7421     + INIT_LIST_HEAD(&rdstate->list[i]);
7422     +
7423     + return rdstate;
7424     +}
7425     +
7426     +static void free_filldir_node(struct filldir_node *node)
7427     +{
7428     + if (node->namelen >= DNAME_INLINE_LEN_MIN)
7429     + kfree(node->name);
7430     + kmem_cache_free(unionfs_filldir_cachep, node);
7431     +}
7432     +
7433     +void free_rdstate(struct unionfs_dir_state *state)
7434     +{
7435     + struct filldir_node *tmp;
7436     + int i;
7437     +
7438     + for (i = 0; i < state->size; i++) {
7439     + struct list_head *head = &(state->list[i]);
7440     + struct list_head *pos, *n;
7441     +
7442     + /* traverse the list and deallocate space */
7443     + list_for_each_safe(pos, n, head) {
7444     + tmp = list_entry(pos, struct filldir_node, file_list);
7445     + list_del(&tmp->file_list);
7446     + free_filldir_node(tmp);
7447     + }
7448     + }
7449     +
7450     + kfree(state);
7451     +}
7452     +
7453     +struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate,
7454     + const char *name, int namelen,
7455     + int is_whiteout)
7456     +{
7457     + int index;
7458     + unsigned int hash;
7459     + struct list_head *head;
7460     + struct list_head *pos;
7461     + struct filldir_node *cursor = NULL;
7462     + int found = 0;
7463     +
7464     + BUG_ON(namelen <= 0);
7465     +
7466     + hash = full_name_hash(name, namelen);
7467     + index = hash % rdstate->size;
7468     +
7469     + head = &(rdstate->list[index]);
7470     + list_for_each(pos, head) {
7471     + cursor = list_entry(pos, struct filldir_node, file_list);
7472     +
7473     + if (cursor->namelen == namelen && cursor->hash == hash &&
7474     + !strncmp(cursor->name, name, namelen)) {
7475     + /*
7476     + * a duplicate exists, and hence no need to create
7477     + * entry to the list
7478     + */
7479     + found = 1;
7480     +
7481     + /*
7482     + * if a duplicate is found in this branch, and is
7483     + * not due to the caller looking for an entry to
7484     + * whiteout, then the file system may be corrupted.
7485     + */
7486     + if (unlikely(!is_whiteout &&
7487     + cursor->bindex == rdstate->bindex))
7488     + printk(KERN_ERR "unionfs: filldir: possible "
7489     + "I/O error: a file is duplicated "
7490     + "in the same branch %d: %s\n",
7491     + rdstate->bindex, cursor->name);
7492     + break;
7493     + }
7494     + }
7495     +
7496     + if (!found)
7497     + cursor = NULL;
7498     +
7499     + return cursor;
7500     +}
7501     +
7502     +int add_filldir_node(struct unionfs_dir_state *rdstate, const char *name,
7503     + int namelen, int bindex, int whiteout)
7504     +{
7505     + struct filldir_node *new;
7506     + unsigned int hash;
7507     + int index;
7508     + int err = 0;
7509     + struct list_head *head;
7510     +
7511     + BUG_ON(namelen <= 0);
7512     +
7513     + hash = full_name_hash(name, namelen);
7514     + index = hash % rdstate->size;
7515     + head = &(rdstate->list[index]);
7516     +
7517     + new = kmem_cache_alloc(unionfs_filldir_cachep, GFP_KERNEL);
7518     + if (unlikely(!new)) {
7519     + err = -ENOMEM;
7520     + goto out;
7521     + }
7522     +
7523     + INIT_LIST_HEAD(&new->file_list);
7524     + new->namelen = namelen;
7525     + new->hash = hash;
7526     + new->bindex = bindex;
7527     + new->whiteout = whiteout;
7528     +
7529     + if (namelen < DNAME_INLINE_LEN_MIN) {
7530     + new->name = new->iname;
7531     + } else {
7532     + new->name = kmalloc(namelen + 1, GFP_KERNEL);
7533     + if (unlikely(!new->name)) {
7534     + kmem_cache_free(unionfs_filldir_cachep, new);
7535     + new = NULL;
7536     + goto out;
7537     + }
7538     + }
7539     +
7540     + memcpy(new->name, name, namelen);
7541     + new->name[namelen] = '\0';
7542     +
7543     + rdstate->hashentries++;
7544     +
7545     + list_add(&(new->file_list), head);
7546     +out:
7547     + return err;
7548     +}
7549     diff --git a/fs/unionfs/rename.c b/fs/unionfs/rename.c
7550     new file mode 100644
7551     index 0000000..936700e
7552     --- /dev/null
7553     +++ b/fs/unionfs/rename.c
7554     @@ -0,0 +1,517 @@
7555     +/*
7556     + * Copyright (c) 2003-2010 Erez Zadok
7557     + * Copyright (c) 2003-2006 Charles P. Wright
7558     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7559     + * Copyright (c) 2005-2006 Junjiro Okajima
7560     + * Copyright (c) 2005 Arun M. Krishnakumar
7561     + * Copyright (c) 2004-2006 David P. Quigley
7562     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7563     + * Copyright (c) 2003 Puja Gupta
7564     + * Copyright (c) 2003 Harikesavan Krishnan
7565     + * Copyright (c) 2003-2010 Stony Brook University
7566     + * Copyright (c) 2003-2010 The Research Foundation of SUNY
7567     + *
7568     + * This program is free software; you can redistribute it and/or modify
7569     + * it under the terms of the GNU General Public License version 2 as
7570     + * published by the Free Software Foundation.
7571     + */
7572     +
7573     +#include "union.h"
7574     +
7575     +/*
7576     + * This is a helper function for rename, used when rename ends up with hosed
7577     + * over dentries and we need to revert.
7578     + */
7579     +static int unionfs_refresh_lower_dentry(struct dentry *dentry,
7580     + struct dentry *parent, int bindex)
7581     +{
7582     + struct dentry *lower_dentry;
7583     + struct dentry *lower_parent;
7584     + int err = 0;
7585     +
7586     + verify_locked(dentry);
7587     +
7588     + lower_parent = unionfs_lower_dentry_idx(parent, bindex);
7589     +
7590     + BUG_ON(!S_ISDIR(lower_parent->d_inode->i_mode));
7591     +
7592     + lower_dentry = lookup_one_len(dentry->d_name.name, lower_parent,
7593     + dentry->d_name.len);
7594     + if (IS_ERR(lower_dentry)) {
7595     + err = PTR_ERR(lower_dentry);
7596     + goto out;
7597     + }
7598     +
7599     + dput(unionfs_lower_dentry_idx(dentry, bindex));
7600     + iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
7601     + unionfs_set_lower_inode_idx(dentry->d_inode, bindex, NULL);
7602     +
7603     + if (!lower_dentry->d_inode) {
7604     + dput(lower_dentry);
7605     + unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
7606     + } else {
7607     + unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
7608     + unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
7609     + igrab(lower_dentry->d_inode));
7610     + }
7611     +
7612     +out:
7613     + return err;
7614     +}
7615     +
7616     +static int __unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7617     + struct dentry *old_parent,
7618     + struct inode *new_dir, struct dentry *new_dentry,
7619     + struct dentry *new_parent,
7620     + int bindex)
7621     +{
7622     + int err = 0;
7623     + struct dentry *lower_old_dentry;
7624     + struct dentry *lower_new_dentry;
7625     + struct dentry *lower_old_dir_dentry;
7626     + struct dentry *lower_new_dir_dentry;
7627     + struct dentry *trap;
7628     +
7629     + lower_new_dentry = unionfs_lower_dentry_idx(new_dentry, bindex);
7630     + lower_old_dentry = unionfs_lower_dentry_idx(old_dentry, bindex);
7631     +
7632     + if (!lower_new_dentry) {
7633     + lower_new_dentry =
7634     + create_parents(new_parent->d_inode,
7635     + new_dentry, new_dentry->d_name.name,
7636     + bindex);
7637     + if (IS_ERR(lower_new_dentry)) {
7638     + err = PTR_ERR(lower_new_dentry);
7639     + if (IS_COPYUP_ERR(err))
7640     + goto out;
7641     + printk(KERN_ERR "unionfs: error creating directory "
7642     + "tree for rename, bindex=%d err=%d\n",
7643     + bindex, err);
7644     + goto out;
7645     + }
7646     + }
7647     +
7648     + /* check for and remove whiteout, if any */
7649     + err = check_unlink_whiteout(new_dentry, lower_new_dentry, bindex);
7650     + if (err > 0) /* ignore if whiteout found and successfully removed */
7651     + err = 0;
7652     + if (err)
7653     + goto out;
7654     +
7655     + /* check of old_dentry branch is writable */
7656     + err = is_robranch_super(old_dentry->d_sb, bindex);
7657     + if (err)
7658     + goto out;
7659     +
7660     + dget(lower_old_dentry);
7661     + dget(lower_new_dentry);
7662     + lower_old_dir_dentry = dget_parent(lower_old_dentry);
7663     + lower_new_dir_dentry = dget_parent(lower_new_dentry);
7664     +
7665     + trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
7666     + /* source should not be ancenstor of target */
7667     + if (trap == lower_old_dentry) {
7668     + err = -EINVAL;
7669     + goto out_err_unlock;
7670     + }
7671     + /* target should not be ancenstor of source */
7672     + if (trap == lower_new_dentry) {
7673     + err = -ENOTEMPTY;
7674     + goto out_err_unlock;
7675     + }
7676     + err = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
7677     + lower_new_dir_dentry->d_inode, lower_new_dentry);
7678     +out_err_unlock:
7679     + if (!err) {
7680     + /* update parent dir times */
7681     + fsstack_copy_attr_times(old_dir, lower_old_dir_dentry->d_inode);
7682     + fsstack_copy_attr_times(new_dir, lower_new_dir_dentry->d_inode);
7683     + }
7684     + unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
7685     +
7686     + dput(lower_old_dir_dentry);
7687     + dput(lower_new_dir_dentry);
7688     + dput(lower_old_dentry);
7689     + dput(lower_new_dentry);
7690     +
7691     +out:
7692     + if (!err) {
7693     + /* Fixup the new_dentry. */
7694     + if (bindex < dbstart(new_dentry))
7695     + dbstart(new_dentry) = bindex;
7696     + else if (bindex > dbend(new_dentry))
7697     + dbend(new_dentry) = bindex;
7698     + }
7699     +
7700     + return err;
7701     +}
7702     +
7703     +/*
7704     + * Main rename code. This is sufficiently complex, that it's documented in
7705     + * Documentation/filesystems/unionfs/rename.txt. This routine calls
7706     + * __unionfs_rename() above to perform some of the work.
7707     + */
7708     +static int do_unionfs_rename(struct inode *old_dir,
7709     + struct dentry *old_dentry,
7710     + struct dentry *old_parent,
7711     + struct inode *new_dir,
7712     + struct dentry *new_dentry,
7713     + struct dentry *new_parent)
7714     +{
7715     + int err = 0;
7716     + int bindex;
7717     + int old_bstart, old_bend;
7718     + int new_bstart, new_bend;
7719     + int do_copyup = -1;
7720     + int local_err = 0;
7721     + int eio = 0;
7722     + int revert = 0;
7723     +
7724     + old_bstart = dbstart(old_dentry);
7725     + old_bend = dbend(old_dentry);
7726     +
7727     + new_bstart = dbstart(new_dentry);
7728     + new_bend = dbend(new_dentry);
7729     +
7730     + /* Rename source to destination. */
7731     + err = __unionfs_rename(old_dir, old_dentry, old_parent,
7732     + new_dir, new_dentry, new_parent,
7733     + old_bstart);
7734     + if (err) {
7735     + if (!IS_COPYUP_ERR(err))
7736     + goto out;
7737     + do_copyup = old_bstart - 1;
7738     + } else {
7739     + revert = 1;
7740     + }
7741     +
7742     + /*
7743     + * Unlink all instances of destination that exist to the left of
7744     + * bstart of source. On error, revert back, goto out.
7745     + */
7746     + for (bindex = old_bstart - 1; bindex >= new_bstart; bindex--) {
7747     + struct dentry *unlink_dentry;
7748     + struct dentry *unlink_dir_dentry;
7749     +
7750     + BUG_ON(bindex < 0);
7751     + unlink_dentry = unionfs_lower_dentry_idx(new_dentry, bindex);
7752     + if (!unlink_dentry)
7753     + continue;
7754     +
7755     + unlink_dir_dentry = lock_parent(unlink_dentry);
7756     + err = is_robranch_super(old_dir->i_sb, bindex);
7757     + if (!err)
7758     + err = vfs_unlink(unlink_dir_dentry->d_inode,
7759     + unlink_dentry);
7760     +
7761     + fsstack_copy_attr_times(new_parent->d_inode,
7762     + unlink_dir_dentry->d_inode);
7763     + /* propagate number of hard-links */
7764     + new_parent->d_inode->i_nlink =
7765     + unionfs_get_nlinks(new_parent->d_inode);
7766     +
7767     + unlock_dir(unlink_dir_dentry);
7768     + if (!err) {
7769     + if (bindex != new_bstart) {
7770     + dput(unlink_dentry);
7771     + unionfs_set_lower_dentry_idx(new_dentry,
7772     + bindex, NULL);
7773     + }
7774     + } else if (IS_COPYUP_ERR(err)) {
7775     + do_copyup = bindex - 1;
7776     + } else if (revert) {
7777     + goto revert;
7778     + }
7779     + }
7780     +
7781     + if (do_copyup != -1) {
7782     + for (bindex = do_copyup; bindex >= 0; bindex--) {
7783     + /*
7784     + * copyup the file into some left directory, so that
7785     + * you can rename it
7786     + */
7787     + err = copyup_dentry(old_parent->d_inode,
7788     + old_dentry, old_bstart, bindex,
7789     + old_dentry->d_name.name,
7790     + old_dentry->d_name.len, NULL,
7791     + i_size_read(old_dentry->d_inode));
7792     + /* if copyup failed, try next branch to the left */
7793     + if (err)
7794     + continue;
7795     + /*
7796     + * create whiteout before calling __unionfs_rename
7797     + * because the latter will change the old_dentry's
7798     + * lower name and parent dir, resulting in the
7799     + * whiteout getting created in the wrong dir.
7800     + */
7801     + err = create_whiteout(old_dentry, bindex);
7802     + if (err) {
7803     + printk(KERN_ERR "unionfs: can't create a "
7804     + "whiteout for %s in rename (err=%d)\n",
7805     + old_dentry->d_name.name, err);
7806     + continue;
7807     + }
7808     + err = __unionfs_rename(old_dir, old_dentry, old_parent,
7809     + new_dir, new_dentry, new_parent,
7810     + bindex);
7811     + break;
7812     + }
7813     + }
7814     +
7815     + /* make it opaque */
7816     + if (S_ISDIR(old_dentry->d_inode->i_mode)) {
7817     + err = make_dir_opaque(old_dentry, dbstart(old_dentry));
7818     + if (err)
7819     + goto revert;
7820     + }
7821     +
7822     + /*
7823     + * Create whiteout for source, only if:
7824     + * (1) There is more than one underlying instance of source.
7825     + * (We did a copy_up is taken care of above).
7826     + */
7827     + if ((old_bstart != old_bend) && (do_copyup == -1)) {
7828     + err = create_whiteout(old_dentry, old_bstart);
7829     + if (err) {
7830     + /* can't fix anything now, so we exit with -EIO */
7831     + printk(KERN_ERR "unionfs: can't create a whiteout for "
7832     + "%s in rename!\n", old_dentry->d_name.name);
7833     + err = -EIO;
7834     + }
7835     + }
7836     +
7837     +out:
7838     + return err;
7839     +
7840     +revert:
7841     + /* Do revert here. */
7842     + local_err = unionfs_refresh_lower_dentry(new_dentry, new_parent,
7843     + old_bstart);
7844     + if (local_err) {
7845     + printk(KERN_ERR "unionfs: revert failed in rename: "
7846     + "the new refresh failed\n");
7847     + eio = -EIO;
7848     + }
7849     +
7850     + local_err = unionfs_refresh_lower_dentry(old_dentry, old_parent,
7851     + old_bstart);
7852     + if (local_err) {
7853     + printk(KERN_ERR "unionfs: revert failed in rename: "
7854     + "the old refresh failed\n");
7855     + eio = -EIO;
7856     + goto revert_out;
7857     + }
7858     +
7859     + if (!unionfs_lower_dentry_idx(new_dentry, bindex) ||
7860     + !unionfs_lower_dentry_idx(new_dentry, bindex)->d_inode) {
7861     + printk(KERN_ERR "unionfs: revert failed in rename: "
7862     + "the object disappeared from under us!\n");
7863     + eio = -EIO;
7864     + goto revert_out;
7865     + }
7866     +
7867     + if (unionfs_lower_dentry_idx(old_dentry, bindex) &&
7868     + unionfs_lower_dentry_idx(old_dentry, bindex)->d_inode) {
7869     + printk(KERN_ERR "unionfs: revert failed in rename: "
7870     + "the object was created underneath us!\n");
7871     + eio = -EIO;
7872     + goto revert_out;
7873     + }
7874     +
7875     + local_err = __unionfs_rename(new_dir, new_dentry, new_parent,
7876     + old_dir, old_dentry, old_parent,
7877     + old_bstart);
7878     +
7879     + /* If we can't fix it, then we cop-out with -EIO. */
7880     + if (local_err) {
7881     + printk(KERN_ERR "unionfs: revert failed in rename!\n");
7882     + eio = -EIO;
7883     + }
7884     +
7885     + local_err = unionfs_refresh_lower_dentry(new_dentry, new_parent,
7886     + bindex);
7887     + if (local_err)
7888     + eio = -EIO;
7889     + local_err = unionfs_refresh_lower_dentry(old_dentry, old_parent,
7890     + bindex);
7891     + if (local_err)
7892     + eio = -EIO;
7893     +
7894     +revert_out:
7895     + if (eio)
7896     + err = eio;
7897     + return err;
7898     +}
7899     +
7900     +/*
7901     + * We can't copyup a directory, because it may involve huge numbers of
7902     + * children, etc. Doing that in the kernel would be bad, so instead we
7903     + * return EXDEV to the user-space utility that caused this, and let the
7904     + * user-space recurse and ask us to copy up each file separately.
7905     + */
7906     +static int may_rename_dir(struct dentry *dentry, struct dentry *parent)
7907     +{
7908     + int err, bstart;
7909     +
7910     + err = check_empty(dentry, parent, NULL);
7911     + if (err == -ENOTEMPTY) {
7912     + if (is_robranch(dentry))
7913     + return -EXDEV;
7914     + } else if (err) {
7915     + return err;
7916     + }
7917     +
7918     + bstart = dbstart(dentry);
7919     + if (dbend(dentry) == bstart || dbopaque(dentry) == bstart)
7920     + return 0;
7921     +
7922     + dbstart(dentry) = bstart + 1;
7923     + err = check_empty(dentry, parent, NULL);
7924     + dbstart(dentry) = bstart;
7925     + if (err == -ENOTEMPTY)
7926     + err = -EXDEV;
7927     + return err;
7928     +}
7929     +
7930     +/*
7931     + * The locking rules in unionfs_rename are complex. We could use a simpler
7932     + * superblock-level name-space lock for renames and copy-ups.
7933     + */
7934     +int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7935     + struct inode *new_dir, struct dentry *new_dentry)
7936     +{
7937     + int err = 0;
7938     + struct dentry *wh_dentry;
7939     + struct dentry *old_parent, *new_parent;
7940     + int valid = true;
7941     +
7942     + unionfs_read_lock(old_dentry->d_sb, UNIONFS_SMUTEX_CHILD);
7943     + old_parent = dget_parent(old_dentry);
7944     + new_parent = dget_parent(new_dentry);
7945     + /* un/lock parent dentries only if they differ from old/new_dentry */
7946     + if (old_parent != old_dentry &&
7947     + old_parent != new_dentry)
7948     + unionfs_lock_dentry(old_parent, UNIONFS_DMUTEX_REVAL_PARENT);
7949     + if (new_parent != old_dentry &&
7950     + new_parent != new_dentry &&
7951     + new_parent != old_parent)
7952     + unionfs_lock_dentry(new_parent, UNIONFS_DMUTEX_REVAL_CHILD);
7953     + unionfs_double_lock_dentry(old_dentry, new_dentry);
7954     +
7955     + valid = __unionfs_d_revalidate(old_dentry, old_parent, false);
7956     + if (!valid) {
7957     + err = -ESTALE;
7958     + goto out;
7959     + }
7960     + if (!d_deleted(new_dentry) && new_dentry->d_inode) {
7961     + valid = __unionfs_d_revalidate(new_dentry, new_parent, false);
7962     + if (!valid) {
7963     + err = -ESTALE;
7964     + goto out;
7965     + }
7966     + }
7967     +
7968     + if (!S_ISDIR(old_dentry->d_inode->i_mode))
7969     + err = unionfs_partial_lookup(old_dentry, old_parent);
7970     + else
7971     + err = may_rename_dir(old_dentry, old_parent);
7972     +
7973     + if (err)
7974     + goto out;
7975     +
7976     + err = unionfs_partial_lookup(new_dentry, new_parent);
7977     + if (err)
7978     + goto out;
7979     +
7980     + /*
7981     + * if new_dentry is already lower because of whiteout,
7982     + * simply override it even if the whited-out dir is not empty.
7983     + */
7984     + wh_dentry = find_first_whiteout(new_dentry);
7985     + if (!IS_ERR(wh_dentry)) {
7986     + dput(wh_dentry);
7987     + } else if (new_dentry->d_inode) {
7988     + if (S_ISDIR(old_dentry->d_inode->i_mode) !=
7989     + S_ISDIR(new_dentry->d_inode->i_mode)) {
7990     + err = S_ISDIR(old_dentry->d_inode->i_mode) ?
7991     + -ENOTDIR : -EISDIR;
7992     + goto out;
7993     + }
7994     +
7995     + if (S_ISDIR(new_dentry->d_inode->i_mode)) {
7996     + struct unionfs_dir_state *namelist = NULL;
7997     + /* check if this unionfs directory is empty or not */
7998     + err = check_empty(new_dentry, new_parent, &namelist);
7999     + if (err)
8000     + goto out;
8001     +
8002     + if (!is_robranch(new_dentry))
8003     + err = delete_whiteouts(new_dentry,
8004     + dbstart(new_dentry),
8005     + namelist);
8006     +
8007     + free_rdstate(namelist);
8008     +
8009     + if (err)
8010     + goto out;
8011     + }
8012     + }
8013     +
8014     + err = do_unionfs_rename(old_dir, old_dentry, old_parent,
8015     + new_dir, new_dentry, new_parent);
8016     + if (err)
8017     + goto out;
8018     +
8019     + /*
8020     + * force re-lookup since the dir on ro branch is not renamed, and
8021     + * lower dentries still indicate the un-renamed ones.
8022     + */
8023     + if (S_ISDIR(old_dentry->d_inode->i_mode))
8024     + atomic_dec(&UNIONFS_D(old_dentry)->generation);
8025     + else
8026     + unionfs_postcopyup_release(old_dentry);
8027     + if (new_dentry->d_inode && !S_ISDIR(new_dentry->d_inode->i_mode)) {
8028     + unionfs_postcopyup_release(new_dentry);
8029     + unionfs_postcopyup_setmnt(new_dentry);
8030     + if (!unionfs_lower_inode(new_dentry->d_inode)) {
8031     + /*
8032     + * If we get here, it means that no copyup was
8033     + * needed, and that a file by the old name already
8034     + * existing on the destination branch; that file got
8035     + * renamed earlier in this function, so all we need
8036     + * to do here is set the lower inode.
8037     + */
8038     + struct inode *inode;
8039     + inode = unionfs_lower_inode(old_dentry->d_inode);
8040     + igrab(inode);
8041     + unionfs_set_lower_inode_idx(new_dentry->d_inode,
8042     + dbstart(new_dentry),
8043     + inode);
8044     + }
8045     + }
8046     + /* if all of this renaming succeeded, update our times */
8047     + unionfs_copy_attr_times(old_dentry->d_inode);
8048     + unionfs_copy_attr_times(new_dentry->d_inode);
8049     + unionfs_check_inode(old_dir);
8050     + unionfs_check_inode(new_dir);
8051     + unionfs_check_dentry(old_dentry);
8052     + unionfs_check_dentry(new_dentry);
8053     +
8054     +out:
8055     + if (err) /* clear the new_dentry stuff created */
8056     + d_drop(new_dentry);
8057     +
8058     + unionfs_double_unlock_dentry(old_dentry, new_dentry);
8059     + if (new_parent != old_dentry &&
8060     + new_parent != new_dentry &&
8061     + new_parent != old_parent)
8062     + unionfs_unlock_dentry(new_parent);
8063     + if (old_parent != old_dentry &&
8064     + old_parent != new_dentry)
8065     + unionfs_unlock_dentry(old_parent);
8066     + dput(new_parent);
8067     + dput(old_parent);
8068     + unionfs_read_unlock(old_dentry->d_sb);
8069     +
8070     + return err;
8071     +}
8072     diff --git a/fs/unionfs/sioq.c b/fs/unionfs/sioq.c
8073     new file mode 100644
8074     index 0000000..760c580
8075     --- /dev/null
8076     +++ b/fs/unionfs/sioq.c
8077     @@ -0,0 +1,101 @@
8078     +/*
8079     + * Copyright (c) 2006-2010 Erez Zadok
8080     + * Copyright (c) 2006 Charles P. Wright
8081     + * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
8082     + * Copyright (c) 2006 Junjiro Okajima
8083     + * Copyright (c) 2006 David P. Quigley
8084     + * Copyright (c) 2006-2010 Stony Brook University
8085     + * Copyright (c) 2006-2010 The Research Foundation of SUNY
8086     + *
8087     + * This program is free software; you can redistribute it and/or modify
8088     + * it under the terms of the GNU General Public License version 2 as
8089     + * published by the Free Software Foundation.
8090     + */
8091     +
8092     +#include "union.h"
8093     +
8094     +/*
8095     + * Super-user IO work Queue - sometimes we need to perform actions which
8096     + * would fail due to the unix permissions on the parent directory (e.g.,
8097     + * rmdir a directory which appears empty, but in reality contains
8098     + * whiteouts).
8099     + */
8100     +
8101     +static struct workqueue_struct *superio_workqueue;
8102     +
8103     +int __init init_sioq(void)
8104     +{
8105     + int err;
8106     +
8107     + superio_workqueue = create_workqueue("unionfs_siod");
8108     + if (!IS_ERR(superio_workqueue))
8109     + return 0;
8110     +
8111     + err = PTR_ERR(superio_workqueue);
8112     + printk(KERN_ERR "unionfs: create_workqueue failed %d\n", err);
8113     + superio_workqueue = NULL;
8114     + return err;
8115     +}
8116     +
8117     +void stop_sioq(void)
8118     +{
8119     + if (superio_workqueue)
8120     + destroy_workqueue(superio_workqueue);
8121     +}
8122     +
8123     +void run_sioq(work_func_t func, struct sioq_args *args)
8124     +{
8125     + INIT_WORK(&args->work, func);
8126     +
8127     + init_completion(&args->comp);
8128     + while (!queue_work(superio_workqueue, &args->work)) {
8129     + /* TODO: do accounting if needed */
8130     + schedule();
8131     + }
8132     + wait_for_completion(&args->comp);
8133     +}
8134     +
8135     +void __unionfs_create(struct work_struct *work)
8136     +{
8137     + struct sioq_args *args = container_of(work, struct sioq_args, work);
8138     + struct create_args *c = &args->create;
8139     +
8140     + args->err = vfs_create(c->parent, c->dentry, c->mode, c->nd);
8141     + complete(&args->comp);
8142     +}
8143     +
8144     +void __unionfs_mkdir(struct work_struct *work)
8145     +{
8146     + struct sioq_args *args = container_of(work, struct sioq_args, work);
8147     + struct mkdir_args *m = &args->mkdir;
8148     +
8149     + args->err = vfs_mkdir(m->parent, m->dentry, m->mode);
8150     + complete(&args->comp);
8151     +}
8152     +
8153     +void __unionfs_mknod(struct work_struct *work)
8154     +{
8155     + struct sioq_args *args = container_of(work, struct sioq_args, work);
8156     + struct mknod_args *m = &args->mknod;
8157     +
8158     + args->err = vfs_mknod(m->parent, m->dentry, m->mode, m->dev);
8159     + complete(&args->comp);
8160     +}
8161     +
8162     +void __unionfs_symlink(struct work_struct *work)
8163     +{
8164     + struct sioq_args *args = container_of(work, struct sioq_args, work);
8165     + struct symlink_args *s = &args->symlink;
8166     +
8167     + args->err = vfs_symlink(s->parent, s->dentry, s->symbuf);
8168     + complete(&args->comp);
8169     +}
8170     +
8171     +void __unionfs_unlink(struct work_struct *work)
8172     +{
8173     + struct sioq_args *args = container_of(work, struct sioq_args, work);
8174     + struct unlink_args *u = &args->unlink;
8175     +
8176     + args->err = vfs_unlink(u->parent, u->dentry);
8177     + complete(&args->comp);
8178     +}
8179     diff --git a/fs/unionfs/sioq.h b/fs/unionfs/sioq.h
8180     new file mode 100644
8181     index 0000000..b26d248
8182     --- /dev/null
8183     +++ b/fs/unionfs/sioq.h
8184     @@ -0,0 +1,91 @@
8185     +/*
8186     + * Copyright (c) 2006-2010 Erez Zadok
8187     + * Copyright (c) 2006 Charles P. Wright
8188     + * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
8189     + * Copyright (c) 2006 Junjiro Okajima
8190     + * Copyright (c) 2006 David P. Quigley
8191     + * Copyright (c) 2006-2010 Stony Brook University
8192     + * Copyright (c) 2006-2010 The Research Foundation of SUNY
8193     + *
8194     + * This program is free software; you can redistribute it and/or modify
8195     + * it under the terms of the GNU General Public License version 2 as
8196     + * published by the Free Software Foundation.
8197     + */
8198     +
8199     +#ifndef _SIOQ_H
8200     +#define _SIOQ_H
8201     +
8202     +struct deletewh_args {
8203     + struct unionfs_dir_state *namelist;
8204     + struct dentry *dentry;
8205     + int bindex;
8206     +};
8207     +
8208     +struct is_opaque_args {
8209     + struct dentry *dentry;
8210     +};
8211     +
8212     +struct create_args {
8213     + struct inode *parent;
8214     + struct dentry *dentry;
8215     + umode_t mode;
8216     + struct nameidata *nd;
8217     +};
8218     +
8219     +struct mkdir_args {
8220     + struct inode *parent;
8221     + struct dentry *dentry;
8222     + umode_t mode;
8223     +};
8224     +
8225     +struct mknod_args {
8226     + struct inode *parent;
8227     + struct dentry *dentry;
8228     + umode_t mode;
8229     + dev_t dev;
8230     +};
8231     +
8232     +struct symlink_args {
8233     + struct inode *parent;
8234     + struct dentry *dentry;
8235     + char *symbuf;
8236     +};
8237     +
8238     +struct unlink_args {
8239     + struct inode *parent;
8240     + struct dentry *dentry;
8241     +};
8242     +
8243     +
8244     +struct sioq_args {
8245     + struct completion comp;
8246     + struct work_struct work;
8247     + int err;
8248     + void *ret;
8249     +
8250     + union {
8251     + struct deletewh_args deletewh;
8252     + struct is_opaque_args is_opaque;
8253     + struct create_args create;
8254     + struct mkdir_args mkdir;
8255     + struct mknod_args mknod;
8256     + struct symlink_args symlink;
8257     + struct unlink_args unlink;
8258     + };
8259     +};
8260     +
8261     +/* Extern definitions for SIOQ functions */
8262     +extern int __init init_sioq(void);
8263     +extern void stop_sioq(void);
8264     +extern void run_sioq(work_func_t func, struct sioq_args *args);
8265     +
8266     +/* Extern definitions for our privilege escalation helpers */
8267     +extern void __unionfs_create(struct work_struct *work);
8268     +extern void __unionfs_mkdir(struct work_struct *work);
8269     +extern void __unionfs_mknod(struct work_struct *work);
8270     +extern void __unionfs_symlink(struct work_struct *work);
8271     +extern void __unionfs_unlink(struct work_struct *work);
8272     +extern void __delete_whiteouts(struct work_struct *work);
8273     +extern void __is_opaque_dir(struct work_struct *work);
8274     +
8275     +#endif /* not _SIOQ_H */
8276     diff --git a/fs/unionfs/subr.c b/fs/unionfs/subr.c
8277     new file mode 100644
8278     index 0000000..570a344
8279     --- /dev/null
8280     +++ b/fs/unionfs/subr.c
8281     @@ -0,0 +1,95 @@
8282     +/*
8283     + * Copyright (c) 2003-2010 Erez Zadok
8284     + * Copyright (c) 2003-2006 Charles P. Wright
8285     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
8286     + * Copyright (c) 2005-2006 Junjiro Okajima
8287     + * Copyright (c) 2005 Arun M. Krishnakumar
8288     + * Copyright (c) 2004-2006 David P. Quigley
8289     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
8290     + * Copyright (c) 2003 Puja Gupta
8291     + * Copyright (c) 2003 Harikesavan Krishnan
8292     + * Copyright (c) 2003-2010 Stony Brook University
8293     + * Copyright (c) 2003-2010 The Research Foundation of SUNY
8294     + *
8295     + * This program is free software; you can redistribute it and/or modify
8296     + * it under the terms of the GNU General Public License version 2 as
8297     + * published by the Free Software Foundation.
8298     + */
8299     +
8300     +#include "union.h"
8301     +
8302     +/*
8303     + * returns the right n_link value based on the inode type
8304     + */
8305     +int unionfs_get_nlinks(const struct inode *inode)
8306     +{
8307     + /* don't bother to do all the work since we're unlinked */
8308     + if (inode->i_nlink == 0)
8309     + return 0;
8310     +
8311     + if (!S_ISDIR(inode->i_mode))
8312     + return unionfs_lower_inode(inode)->i_nlink;
8313     +
8314     + /*
8315     + * For directories, we return 1. The only place that could cares
8316     + * about links is readdir, and there's d_type there so even that
8317     + * doesn't matter.
8318     + */
8319     + return 1;
8320     +}
8321     +
8322     +/* copy a/m/ctime from the lower branch with the newest times */
8323     +void unionfs_copy_attr_times(struct inode *upper)
8324     +{
8325     + int bindex;
8326     + struct inode *lower;
8327     +
8328     + if (!upper)
8329     + return;
8330     + if (ibstart(upper) < 0) {
8331     +#ifdef CONFIG_UNION_FS_DEBUG
8332     + WARN_ON(ibstart(upper) < 0);
8333     +#endif /* CONFIG_UNION_FS_DEBUG */
8334     + return;
8335     + }
8336     + for (bindex = ibstart(upper); bindex <= ibend(upper); bindex++) {
8337     + lower = unionfs_lower_inode_idx(upper, bindex);
8338     + if (!lower)
8339     + continue; /* not all lower dir objects may exist */
8340     + if (unlikely(timespec_compare(&upper->i_mtime,
8341     + &lower->i_mtime) < 0))
8342     + upper->i_mtime = lower->i_mtime;
8343     + if (unlikely(timespec_compare(&upper->i_ctime,
8344     + &lower->i_ctime) < 0))
8345     + upper->i_ctime = lower->i_ctime;
8346     + if (unlikely(timespec_compare(&upper->i_atime,
8347     + &lower->i_atime) < 0))
8348     + upper->i_atime = lower->i_atime;
8349     + }
8350     +}
8351     +
8352     +/*
8353     + * A unionfs/fanout version of fsstack_copy_attr_all. Uses a
8354     + * unionfs_get_nlinks to properly calcluate the number of links to a file.
8355     + * Also, copies the max() of all a/m/ctimes for all lower inodes (which is
8356     + * important if the lower inode is a directory type)
8357     + */
8358     +void unionfs_copy_attr_all(struct inode *dest,
8359     + const struct inode *src)
8360     +{
8361     + dest->i_mode = src->i_mode;
8362     + dest->i_uid = src->i_uid;
8363     + dest->i_gid = src->i_gid;
8364     + dest->i_rdev = src->i_rdev;
8365     +
8366     + unionfs_copy_attr_times(dest);
8367     +
8368     + dest->i_blkbits = src->i_blkbits;
8369     + dest->i_flags = src->i_flags;
8370     +
8371     + /*
8372     + * Update the nlinks AFTER updating the above fields, because the
8373     + * get_links callback may depend on them.
8374     + */
8375     + dest->i_nlink = unionfs_get_nlinks(dest);
8376     +}
8377     diff --git a/fs/unionfs/super.c b/fs/unionfs/super.c
8378     new file mode 100644
8379     index 0000000..a8f5571
8380     --- /dev/null
8381     +++ b/fs/unionfs/super.c
8382     @@ -0,0 +1,1048 @@
8383     +/*
8384     + * Copyright (c) 2003-2010 Erez Zadok
8385     + * Copyright (c) 2003-2006 Charles P. Wright
8386     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
8387     + * Copyright (c) 2005-2006 Junjiro Okajima
8388     + * Copyright (c) 2005 Arun M. Krishnakumar
8389     + * Copyright (c) 2004-2006 David P. Quigley
8390     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
8391     + * Copyright (c) 2003 Puja Gupta
8392     + * Copyright (c) 2003 Harikesavan Krishnan
8393     + * Copyright (c) 2003-2010 Stony Brook University
8394     + * Copyright (c) 2003-2010 The Research Foundation of SUNY
8395     + *
8396     + * This program is free software; you can redistribute it and/or modify
8397     + * it under the terms of the GNU General Public License version 2 as
8398     + * published by the Free Software Foundation.
8399     + */
8400     +
8401     +#include "union.h"
8402     +
8403     +/*
8404     + * The inode cache is used with alloc_inode for both our inode info and the
8405     + * vfs inode.
8406     + */
8407     +static struct kmem_cache *unionfs_inode_cachep;
8408     +
8409     +struct inode *unionfs_iget(struct super_block *sb, unsigned long ino)
8410     +{
8411     + int size;
8412     + struct unionfs_inode_info *info;
8413     + struct inode *inode;
8414     +
8415     + inode = iget_locked(sb, ino);
8416     + if (!inode)
8417     + return ERR_PTR(-ENOMEM);
8418     + if (!(inode->i_state & I_NEW))
8419     + return inode;
8420     +
8421     + info = UNIONFS_I(inode);
8422     + memset(info, 0, offsetof(struct unionfs_inode_info, vfs_inode));
8423     + info->bstart = -1;
8424     + info->bend = -1;
8425     + atomic_set(&info->generation,
8426     + atomic_read(&UNIONFS_SB(inode->i_sb)->generation));
8427     + spin_lock_init(&info->rdlock);
8428     + info->rdcount = 1;
8429     + info->hashsize = -1;
8430     + INIT_LIST_HEAD(&info->readdircache);
8431     +
8432     + size = sbmax(inode->i_sb) * sizeof(struct inode *);
8433     + info->lower_inodes = kzalloc(size, GFP_KERNEL);
8434     + if (unlikely(!info->lower_inodes)) {
8435     + printk(KERN_CRIT "unionfs: no kernel memory when allocating "
8436     + "lower-pointer array!\n");
8437     + iget_failed(inode);
8438     + return ERR_PTR(-ENOMEM);
8439     + }
8440     +
8441     + inode->i_version++;
8442     + inode->i_op = &unionfs_main_iops;
8443     + inode->i_fop = &unionfs_main_fops;
8444     +
8445     + inode->i_mapping->a_ops = &unionfs_aops;
8446     +
8447     + /*
8448     + * reset times so unionfs_copy_attr_all can keep out time invariants
8449     + * right (upper inode time being the max of all lower ones).
8450     + */
8451     + inode->i_atime.tv_sec = inode->i_atime.tv_nsec = 0;
8452     + inode->i_mtime.tv_sec = inode->i_mtime.tv_nsec = 0;
8453     + inode->i_ctime.tv_sec = inode->i_ctime.tv_nsec = 0;
8454     + unlock_new_inode(inode);
8455     + return inode;
8456     +}
8457     +
8458     +/*
8459     + * we now define delete_inode, because there are two VFS paths that may
8460     + * destroy an inode: one of them calls clear inode before doing everything
8461     + * else that's needed, and the other is fine. This way we truncate the inode
8462     + * size (and its pages) and then clear our own inode, which will do an iput
8463     + * on our and the lower inode.
8464     + *
8465     + * No need to lock sb info's rwsem.
8466     + */
8467     +static void unionfs_delete_inode(struct inode *inode)
8468     +{
8469     +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
8470     + spin_lock(&inode->i_lock);
8471     +#endif
8472     + i_size_write(inode, 0); /* every f/s seems to do that */
8473     +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
8474     + spin_unlock(&inode->i_lock);
8475     +#endif
8476     +
8477     + if (inode->i_data.nrpages)
8478     + truncate_inode_pages(&inode->i_data, 0);
8479     +
8480     + clear_inode(inode);
8481     +}
8482     +
8483     +/*
8484     + * final actions when unmounting a file system
8485     + *
8486     + * No need to lock rwsem.
8487     + */
8488     +static void unionfs_put_super(struct super_block *sb)
8489     +{
8490     + int bindex, bstart, bend;
8491     + struct unionfs_sb_info *spd;
8492     + int leaks = 0;
8493     +
8494     + spd = UNIONFS_SB(sb);
8495     + if (!spd)
8496     + return;
8497     +
8498     + bstart = sbstart(sb);
8499     + bend = sbend(sb);
8500     +
8501     + /* Make sure we have no leaks of branchget/branchput. */
8502     + for (bindex = bstart; bindex <= bend; bindex++)
8503     + if (unlikely(branch_count(sb, bindex) != 0)) {
8504     + printk(KERN_CRIT
8505     + "unionfs: branch %d has %d references left!\n",
8506     + bindex, branch_count(sb, bindex));
8507     + leaks = 1;
8508     + }
8509     + WARN_ON(leaks != 0);
8510     +
8511     + /* decrement lower super references */
8512     + for (bindex = bstart; bindex <= bend; bindex++) {
8513     + struct super_block *s;
8514     + s = unionfs_lower_super_idx(sb, bindex);
8515     + unionfs_set_lower_super_idx(sb, bindex, NULL);
8516     + atomic_dec(&s->s_active);
8517     + }
8518     +
8519     + kfree(spd->dev_name);
8520     + kfree(spd->data);
8521     + kfree(spd);
8522     + sb->s_fs_info = NULL;
8523     +}
8524     +
8525     +/*
8526     + * Since people use this to answer the "How big of a file can I write?"
8527     + * question, we report the size of the highest priority branch as the size of
8528     + * the union.
8529     + */
8530     +static int unionfs_statfs(struct dentry *dentry, struct kstatfs *buf)
8531     +{
8532     + int err = 0;
8533     + struct super_block *sb;
8534     + struct dentry *lower_dentry;
8535     + struct dentry *parent;
8536     + bool valid;
8537     +
8538     + sb = dentry->d_sb;
8539     +
8540     + unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD);
8541     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
8542     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
8543     +
8544     + valid = __unionfs_d_revalidate(dentry, parent, false);
8545     + if (unlikely(!valid)) {
8546     + err = -ESTALE;
8547     + goto out;
8548     + }
8549     + unionfs_check_dentry(dentry);
8550     +
8551     + lower_dentry = unionfs_lower_dentry(sb->s_root);
8552     + err = vfs_statfs(lower_dentry, buf);
8553     +
8554     + /* set return buf to our f/s to avoid confusing user-level utils */
8555     + buf->f_type = UNIONFS_SUPER_MAGIC;
8556     + /*
8557     + * Our maximum file name can is shorter by a few bytes because every
8558     + * file name could potentially be whited-out.
8559     + *
8560     + * XXX: this restriction goes away with ODF.
8561     + */
8562     + unionfs_set_max_namelen(&buf->f_namelen);
8563     +
8564     + /*
8565     + * reset two fields to avoid confusing user-land.
8566     + * XXX: is this still necessary?
8567     + */
8568     + memset(&buf->f_fsid, 0, sizeof(__kernel_fsid_t));
8569     + memset(&buf->f_spare, 0, sizeof(buf->f_spare));
8570     +
8571     +out:
8572     + unionfs_check_dentry(dentry);
8573     + unionfs_unlock_dentry(dentry);
8574     + unionfs_unlock_parent(dentry, parent);
8575     + unionfs_read_unlock(sb);
8576     + return err;
8577     +}
8578     +
8579     +/* handle mode changing during remount */
8580     +static noinline_for_stack int do_remount_mode_option(
8581     + char *optarg,
8582     + int cur_branches,
8583     + struct unionfs_data *new_data,
8584     + struct path *new_lower_paths)
8585     +{
8586     + int err = -EINVAL;
8587     + int perms, idx;
8588     + char *modename = strchr(optarg, '=');
8589     + struct nameidata nd;
8590     +
8591     + /* by now, optarg contains the branch name */
8592     + if (!*optarg) {
8593     + printk(KERN_ERR
8594     + "unionfs: no branch specified for mode change\n");
8595     + goto out;
8596     + }
8597     + if (!modename) {
8598     + printk(KERN_ERR "unionfs: branch \"%s\" requires a mode\n",
8599     + optarg);
8600     + goto out;
8601     + }
8602     + *modename++ = '\0';
8603     + err = parse_branch_mode(modename, &perms);
8604     + if (err) {
8605     + printk(KERN_ERR "unionfs: invalid mode \"%s\" for \"%s\"\n",
8606     + modename, optarg);
8607     + goto out;
8608     + }
8609     +
8610     + /*
8611     + * Find matching branch index. For now, this assumes that nothing
8612     + * has been mounted on top of this Unionfs stack. Once we have /odf
8613     + * and cache-coherency resolved, we'll address the branch-path
8614     + * uniqueness.
8615     + */
8616     + err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
8617     + if (err) {
8618     + printk(KERN_ERR "unionfs: error accessing "
8619     + "lower directory \"%s\" (error %d)\n",
8620     + optarg, err);
8621     + goto out;
8622     + }
8623     + for (idx = 0; idx < cur_branches; idx++)
8624     + if (nd.path.mnt == new_lower_paths[idx].mnt &&
8625     + nd.path.dentry == new_lower_paths[idx].dentry)
8626     + break;
8627     + path_put(&nd.path); /* no longer needed */
8628     + if (idx == cur_branches) {
8629     + err = -ENOENT; /* err may have been reset above */
8630     + printk(KERN_ERR "unionfs: branch \"%s\" "
8631     + "not found\n", optarg);
8632     + goto out;
8633     + }
8634     + /* check/change mode for existing branch */
8635     + /* we don't warn if perms==branchperms */
8636     + new_data[idx].branchperms = perms;
8637     + err = 0;
8638     +out:
8639     + return err;
8640     +}
8641     +
8642     +/* handle branch deletion during remount */
8643     +static noinline_for_stack int do_remount_del_option(
8644     + char *optarg, int cur_branches,
8645     + struct unionfs_data *new_data,
8646     + struct path *new_lower_paths)
8647     +{
8648     + int err = -EINVAL;
8649     + int idx;
8650     + struct nameidata nd;
8651     +
8652     + /* optarg contains the branch name to delete */
8653     +
8654     + /*
8655     + * Find matching branch index. For now, this assumes that nothing
8656     + * has been mounted on top of this Unionfs stack. Once we have /odf
8657     + * and cache-coherency resolved, we'll address the branch-path
8658     + * uniqueness.
8659     + */
8660     + err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
8661     + if (err) {
8662     + printk(KERN_ERR "unionfs: error accessing "
8663     + "lower directory \"%s\" (error %d)\n",
8664     + optarg, err);
8665     + goto out;
8666     + }
8667     + for (idx = 0; idx < cur_branches; idx++)
8668     + if (nd.path.mnt == new_lower_paths[idx].mnt &&
8669     + nd.path.dentry == new_lower_paths[idx].dentry)
8670     + break;
8671     + path_put(&nd.path); /* no longer needed */
8672     + if (idx == cur_branches) {
8673     + printk(KERN_ERR "unionfs: branch \"%s\" "
8674     + "not found\n", optarg);
8675     + err = -ENOENT;
8676     + goto out;
8677     + }
8678     + /* check if there are any open files on the branch to be deleted */
8679     + if (atomic_read(&new_data[idx].open_files) > 0) {
8680     + err = -EBUSY;
8681     + goto out;
8682     + }
8683     +
8684     + /*
8685     + * Now we have to delete the branch. First, release any handles it
8686     + * has. Then, move the remaining array indexes past "idx" in
8687     + * new_data and new_lower_paths one to the left. Finally, adjust
8688     + * cur_branches.
8689     + */
8690     + path_put(&new_lower_paths[idx]);
8691     +
8692     + if (idx < cur_branches - 1) {
8693     + /* if idx==cur_branches-1, we delete last branch: easy */
8694     + memmove(&new_data[idx], &new_data[idx+1],
8695     + (cur_branches - 1 - idx) *
8696     + sizeof(struct unionfs_data));
8697     + memmove(&new_lower_paths[idx], &new_lower_paths[idx+1],
8698     + (cur_branches - 1 - idx) * sizeof(struct path));
8699     + }
8700     +
8701     + err = 0;
8702     +out:
8703     + return err;
8704     +}
8705     +
8706     +/* handle branch insertion during remount */
8707     +static noinline_for_stack int do_remount_add_option(
8708     + char *optarg, int cur_branches,
8709     + struct unionfs_data *new_data,
8710     + struct path *new_lower_paths,
8711     + int *high_branch_id)
8712     +{
8713     + int err = -EINVAL;
8714     + int perms;
8715     + int idx = 0; /* default: insert at beginning */
8716     + char *new_branch , *modename = NULL;
8717     + struct nameidata nd;
8718     +
8719     + /*
8720     + * optarg can be of several forms:
8721     + *
8722     + * /bar:/foo insert /foo before /bar
8723     + * /bar:/foo=ro insert /foo in ro mode before /bar
8724     + * /foo insert /foo in the beginning (prepend)
8725     + * :/foo insert /foo at the end (append)
8726     + */
8727     + if (*optarg == ':') { /* append? */
8728     + new_branch = optarg + 1; /* skip ':' */
8729     + idx = cur_branches;
8730     + goto found_insertion_point;
8731     + }
8732     + new_branch = strchr(optarg, ':');
8733     + if (!new_branch) { /* prepend? */
8734     + new_branch = optarg;
8735     + goto found_insertion_point;
8736     + }
8737     + *new_branch++ = '\0'; /* holds path+mode of new branch */
8738     +
8739     + /*
8740     + * Find matching branch index. For now, this assumes that nothing
8741     + * has been mounted on top of this Unionfs stack. Once we have /odf
8742     + * and cache-coherency resolved, we'll address the branch-path
8743     + * uniqueness.
8744     + */
8745     + err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
8746     + if (err) {
8747     + printk(KERN_ERR "unionfs: error accessing "
8748     + "lower directory \"%s\" (error %d)\n",
8749     + optarg, err);
8750     + goto out;
8751     + }
8752     + for (idx = 0; idx < cur_branches; idx++)
8753     + if (nd.path.mnt == new_lower_paths[idx].mnt &&
8754     + nd.path.dentry == new_lower_paths[idx].dentry)
8755     + break;
8756     + path_put(&nd.path); /* no longer needed */
8757     + if (idx == cur_branches) {
8758     + printk(KERN_ERR "unionfs: branch \"%s\" "
8759     + "not found\n", optarg);
8760     + err = -ENOENT;
8761     + goto out;
8762     + }
8763     +
8764     + /*
8765     + * At this point idx will hold the index where the new branch should
8766     + * be inserted before.
8767     + */
8768     +found_insertion_point:
8769     + /* find the mode for the new branch */
8770     + if (new_branch)
8771     + modename = strchr(new_branch, '=');
8772     + if (modename)
8773     + *modename++ = '\0';
8774     + if (!new_branch || !*new_branch) {
8775     + printk(KERN_ERR "unionfs: null new branch\n");
8776     + err = -EINVAL;
8777     + goto out;
8778     + }
8779     + err = parse_branch_mode(modename, &perms);
8780     + if (err) {
8781     + printk(KERN_ERR "unionfs: invalid mode \"%s\" for "
8782     + "branch \"%s\"\n", modename, new_branch);
8783     + goto out;
8784     + }
8785     + err = path_lookup(new_branch, LOOKUP_FOLLOW, &nd);
8786     + if (err) {
8787     + printk(KERN_ERR "unionfs: error accessing "
8788     + "lower directory \"%s\" (error %d)\n",
8789     + new_branch, err);
8790     + goto out;
8791     + }
8792     + /*
8793     + * It's probably safe to check_mode the new branch to insert. Note:
8794     + * we don't allow inserting branches which are unionfs's by
8795     + * themselves (check_branch returns EINVAL in that case). This is
8796     + * because this code base doesn't support stacking unionfs: the ODF
8797     + * code base supports that correctly.
8798     + */
8799     + err = check_branch(&nd);
8800     + if (err) {
8801     + printk(KERN_ERR "unionfs: lower directory "
8802     + "\"%s\" is not a valid branch\n", optarg);
8803     + path_put(&nd.path);
8804     + goto out;
8805     + }
8806     +
8807     + /*
8808     + * Now we have to insert the new branch. But first, move the bits
8809     + * to make space for the new branch, if needed. Finally, adjust
8810     + * cur_branches.
8811     + * We don't release nd here; it's kept until umount/remount.
8812     + */
8813     + if (idx < cur_branches) {
8814     + /* if idx==cur_branches, we append: easy */
8815     + memmove(&new_data[idx+1], &new_data[idx],
8816     + (cur_branches - idx) * sizeof(struct unionfs_data));
8817     + memmove(&new_lower_paths[idx+1], &new_lower_paths[idx],
8818     + (cur_branches - idx) * sizeof(struct path));
8819     + }
8820     + new_lower_paths[idx].dentry = nd.path.dentry;
8821     + new_lower_paths[idx].mnt = nd.path.mnt;
8822     +
8823     + new_data[idx].sb = nd.path.dentry->d_sb;
8824     + atomic_set(&new_data[idx].open_files, 0);
8825     + new_data[idx].branchperms = perms;
8826     + new_data[idx].branch_id = ++*high_branch_id; /* assign new branch ID */
8827     +
8828     + err = 0;
8829     +out:
8830     + return err;
8831     +}
8832     +
8833     +
8834     +/*
8835     + * Support branch management options on remount.
8836     + *
8837     + * See Documentation/filesystems/unionfs/ for details.
8838     + *
8839     + * @flags: numeric mount options
8840     + * @options: mount options string
8841     + *
8842     + * This function can rearrange a mounted union dynamically, adding and
8843     + * removing branches, including changing branch modes. Clearly this has to
8844     + * be done safely and atomically. Luckily, the VFS already calls this
8845     + * function with lock_super(sb) and lock_kernel() held, preventing
8846     + * concurrent mixing of new mounts, remounts, and unmounts. Moreover,
8847     + * do_remount_sb(), our caller function, already called shrink_dcache_sb(sb)
8848     + * to purge dentries/inodes from our superblock, and also called
8849     + * fsync_super(sb) to purge any dirty pages. So we're good.
8850     + *
8851     + * XXX: however, our remount code may also need to invalidate mapped pages
8852     + * so as to force them to be re-gotten from the (newly reconfigured) lower
8853     + * branches. This has to wait for proper mmap and cache coherency support
8854     + * in the VFS.
8855     + *
8856     + */
8857     +static int unionfs_remount_fs(struct super_block *sb, int *flags,
8858     + char *options)
8859     +{
8860     + int err = 0;
8861     + int i;
8862     + char *optionstmp, *tmp_to_free; /* kstrdup'ed of "options" */
8863     + char *optname;
8864     + int cur_branches = 0; /* no. of current branches */
8865     + int new_branches = 0; /* no. of branches actually left in the end */
8866     + int add_branches; /* est. no. of branches to add */
8867     + int del_branches; /* est. no. of branches to del */
8868     + int max_branches; /* max possible no. of branches */
8869     + struct unionfs_data *new_data = NULL, *tmp_data = NULL;
8870     + struct path *new_lower_paths = NULL, *tmp_lower_paths = NULL;
8871     + struct inode **new_lower_inodes = NULL;
8872     + int new_high_branch_id; /* new high branch ID */
8873     + int size; /* memory allocation size, temp var */
8874     + int old_ibstart, old_ibend;
8875     +
8876     + unionfs_write_lock(sb);
8877     +
8878     + /*
8879     + * The VFS will take care of "ro" and "rw" flags, and we can safely
8880     + * ignore MS_SILENT, but anything else left over is an error. So we
8881     + * need to check if any other flags may have been passed (none are
8882     + * allowed/supported as of now).
8883     + */
8884     + if ((*flags & ~(MS_RDONLY | MS_SILENT)) != 0) {
8885     + printk(KERN_ERR
8886     + "unionfs: remount flags 0x%x unsupported\n", *flags);
8887     + err = -EINVAL;
8888     + goto out_error;
8889     + }
8890     +
8891     + /*
8892     + * If 'options' is NULL, it's probably because the user just changed
8893     + * the union to a "ro" or "rw" and the VFS took care of it. So
8894     + * nothing to do and we're done.
8895     + */
8896     + if (!options || options[0] == '\0')
8897     + goto out_error;
8898     +
8899     + /*
8900     + * Find out how many branches we will have in the end, counting
8901     + * "add" and "del" commands. Copy the "options" string because
8902     + * strsep modifies the string and we need it later.
8903     + */
8904     + tmp_to_free = kstrdup(options, GFP_KERNEL);
8905     + optionstmp = tmp_to_free;
8906     + if (unlikely(!optionstmp)) {
8907     + err = -ENOMEM;
8908     + goto out_free;
8909     + }
8910     + cur_branches = sbmax(sb); /* current no. branches */
8911     + new_branches = sbmax(sb);
8912     + del_branches = 0;
8913     + add_branches = 0;
8914     + new_high_branch_id = sbhbid(sb); /* save current high_branch_id */
8915     + while ((optname = strsep(&optionstmp, ",")) != NULL) {
8916     + char *optarg;
8917     +
8918     + if (!optname || !*optname)
8919     + continue;
8920     +
8921     + optarg = strchr(optname, '=');
8922     + if (optarg)
8923     + *optarg++ = '\0';
8924     +
8925     + if (!strcmp("add", optname))
8926     + add_branches++;
8927     + else if (!strcmp("del", optname))
8928     + del_branches++;
8929     + }
8930     + kfree(tmp_to_free);
8931     + /* after all changes, will we have at least one branch left? */
8932     + if ((new_branches + add_branches - del_branches) < 1) {
8933     + printk(KERN_ERR
8934     + "unionfs: no branches left after remount\n");
8935     + err = -EINVAL;
8936     + goto out_free;
8937     + }
8938     +
8939     + /*
8940     + * Since we haven't actually parsed all the add/del options, nor
8941     + * have we checked them for errors, we don't know for sure how many
8942     + * branches we will have after all changes have taken place. In
8943     + * fact, the total number of branches left could be less than what
8944     + * we have now. So we need to allocate space for a temporary
8945     + * placeholder that is at least as large as the maximum number of
8946     + * branches we *could* have, which is the current number plus all
8947     + * the additions. Once we're done with these temp placeholders, we
8948     + * may have to re-allocate the final size, copy over from the temp,
8949     + * and then free the temps (done near the end of this function).
8950     + */
8951     + max_branches = cur_branches + add_branches;
8952     + /* allocate space for new pointers to lower dentry */
8953     + tmp_data = kcalloc(max_branches,
8954     + sizeof(struct unionfs_data), GFP_KERNEL);
8955     + if (unlikely(!tmp_data)) {
8956     + err = -ENOMEM;
8957     + goto out_free;
8958     + }
8959     + /* allocate space for new pointers to lower paths */
8960     + tmp_lower_paths = kcalloc(max_branches,
8961     + sizeof(struct path), GFP_KERNEL);
8962     + if (unlikely(!tmp_lower_paths)) {
8963     + err = -ENOMEM;
8964     + goto out_free;
8965     + }
8966     + /* copy current info into new placeholders, incrementing refcnts */
8967     + memcpy(tmp_data, UNIONFS_SB(sb)->data,
8968     + cur_branches * sizeof(struct unionfs_data));
8969     + memcpy(tmp_lower_paths, UNIONFS_D(sb->s_root)->lower_paths,
8970     + cur_branches * sizeof(struct path));
8971     + for (i = 0; i < cur_branches; i++)
8972     + path_get(&tmp_lower_paths[i]); /* drop refs at end of fxn */
8973     +
8974     + /*******************************************************************
8975     + * For each branch command, do path_lookup on the requested branch,
8976     + * and apply the change to a temp branch list. To handle errors, we
8977     + * already dup'ed the old arrays (above), and increased the refcnts
8978     + * on various f/s objects. So now we can do all the path_lookups
8979     + * and branch-management commands on the new arrays. If it fail mid
8980     + * way, we free the tmp arrays and *put all objects. If we succeed,
8981     + * then we free old arrays and *put its objects, and then replace
8982     + * the arrays with the new tmp list (we may have to re-allocate the
8983     + * memory because the temp lists could have been larger than what we
8984     + * actually needed).
8985     + *******************************************************************/
8986     +
8987     + while ((optname = strsep(&options, ",")) != NULL) {
8988     + char *optarg;
8989     +
8990     + if (!optname || !*optname)
8991     + continue;
8992     + /*
8993     + * At this stage optname holds a comma-delimited option, but
8994     + * without the commas. Next, we need to break the string on
8995     + * the '=' symbol to separate CMD=ARG, where ARG itself can
8996     + * be KEY=VAL. For example, in mode=/foo=rw, CMD is "mode",
8997     + * KEY is "/foo", and VAL is "rw".
8998     + */
8999     + optarg = strchr(optname, '=');
9000     + if (optarg)
9001     + *optarg++ = '\0';
9002     + /* incgen remount option (instead of old ioctl) */
9003     + if (!strcmp("incgen", optname)) {
9004     + err = 0;
9005     + goto out_no_change;
9006     + }
9007     +
9008     + /*
9009     + * All of our options take an argument now. (Insert ones
9010     + * that don't above this check.) So at this stage optname
9011     + * contains the CMD part and optarg contains the ARG part.
9012     + */
9013     + if (!optarg || !*optarg) {
9014     + printk(KERN_ERR "unionfs: all remount options require "
9015     + "an argument (%s)\n", optname);
9016     + err = -EINVAL;
9017     + goto out_release;
9018     + }
9019     +
9020     + if (!strcmp("add", optname)) {
9021     + err = do_remount_add_option(optarg, new_branches,
9022     + tmp_data,
9023     + tmp_lower_paths,
9024     + &new_high_branch_id);
9025     + if (err)
9026     + goto out_release;
9027     + new_branches++;
9028     + if (new_branches > UNIONFS_MAX_BRANCHES) {
9029     + printk(KERN_ERR "unionfs: command exceeds "
9030     + "%d branches\n", UNIONFS_MAX_BRANCHES);
9031     + err = -E2BIG;
9032     + goto out_release;
9033     + }
9034     + continue;
9035     + }
9036     + if (!strcmp("del", optname)) {
9037     + err = do_remount_del_option(optarg, new_branches,
9038     + tmp_data,
9039     + tmp_lower_paths);
9040     + if (err)
9041     + goto out_release;
9042     + new_branches--;
9043     + continue;
9044     + }
9045     + if (!strcmp("mode", optname)) {
9046     + err = do_remount_mode_option(optarg, new_branches,
9047     + tmp_data,
9048     + tmp_lower_paths);
9049     + if (err)
9050     + goto out_release;
9051     + continue;
9052     + }
9053     +
9054     + /*
9055     + * When you use "mount -o remount,ro", mount(8) will
9056     + * reportedly pass the original dirs= string from
9057     + * /proc/mounts. So for now, we have to ignore dirs= and
9058     + * not consider it an error, unless we want to allow users
9059     + * to pass dirs= in remount. Note that to allow the VFS to
9060     + * actually process the ro/rw remount options, we have to
9061     + * return 0 from this function.
9062     + */
9063     + if (!strcmp("dirs", optname)) {
9064     + printk(KERN_WARNING
9065     + "unionfs: remount ignoring option \"%s\"\n",
9066     + optname);
9067     + continue;
9068     + }
9069     +
9070     + err = -EINVAL;
9071     + printk(KERN_ERR
9072     + "unionfs: unrecognized option \"%s\"\n", optname);
9073     + goto out_release;
9074     + }
9075     +
9076     +out_no_change:
9077     +
9078     + /******************************************************************
9079     + * WE'RE ALMOST DONE: check if leftmost branch might be read-only,
9080     + * see if we need to allocate a small-sized new vector, copy the
9081     + * vectors to their correct place, release the refcnt of the older
9082     + * ones, and return. Also handle invalidating any pages that will
9083     + * have to be re-read.
9084     + *******************************************************************/
9085     +
9086     + if (!(tmp_data[0].branchperms & MAY_WRITE)) {
9087     + printk(KERN_ERR "unionfs: leftmost branch cannot be read-only "
9088     + "(use \"remount,ro\" to create a read-only union)\n");
9089     + err = -EINVAL;
9090     + goto out_release;
9091     + }
9092     +
9093     + /* (re)allocate space for new pointers to lower dentry */
9094     + size = new_branches * sizeof(struct unionfs_data);
9095     + new_data = krealloc(tmp_data, size, GFP_KERNEL);
9096     + if (unlikely(!new_data)) {
9097     + err = -ENOMEM;
9098     + goto out_release;
9099     + }
9100     +
9101     + /* allocate space for new pointers to lower paths */
9102     + size = new_branches * sizeof(struct path);
9103     + new_lower_paths = krealloc(tmp_lower_paths, size, GFP_KERNEL);
9104     + if (unlikely(!new_lower_paths)) {
9105     + err = -ENOMEM;
9106     + goto out_release;
9107     + }
9108     +
9109     + /* allocate space for new pointers to lower inodes */
9110     + new_lower_inodes = kcalloc(new_branches,
9111     + sizeof(struct inode *), GFP_KERNEL);
9112     + if (unlikely(!new_lower_inodes)) {
9113     + err = -ENOMEM;
9114     + goto out_release;
9115     + }
9116     +
9117     + /*
9118     + * OK, just before we actually put the new set of branches in place,
9119     + * we need to ensure that our own f/s has no dirty objects left.
9120     + * Luckily, do_remount_sb() already calls shrink_dcache_sb(sb) and
9121     + * fsync_super(sb), taking care of dentries, inodes, and dirty
9122     + * pages. So all that's left is for us to invalidate any leftover
9123     + * (non-dirty) pages to ensure that they will be re-read from the
9124     + * new lower branches (and to support mmap).
9125     + */
9126     +
9127     + /*
9128     + * Once we finish the remounting successfully, our superblock
9129     + * generation number will have increased. This will be detected by
9130     + * our dentry-revalidation code upon subsequent f/s operations
9131     + * through unionfs. The revalidation code will rebuild the union of
9132     + * lower inodes for a given unionfs inode and invalidate any pages
9133     + * of such "stale" inodes (by calling our purge_inode_data
9134     + * function). This revalidation will happen lazily and
9135     + * incrementally, as users perform operations on cached inodes. We
9136     + * would like to encourage this revalidation to happen sooner if
9137     + * possible, so we like to try to invalidate as many other pages in
9138     + * our superblock as we can. We used to call drop_pagecache_sb() or
9139     + * a variant thereof, but either method was racy (drop_caches alone
9140     + * is known to be racy). So now we let the revalidation happen on a
9141     + * per file basis in ->d_revalidate.
9142     + */
9143     +
9144     + /* grab new lower super references; release old ones */
9145     + for (i = 0; i < new_branches; i++)
9146     + atomic_inc(&new_data[i].sb->s_active);
9147     + for (i = 0; i < sbmax(sb); i++)
9148     + atomic_dec(&UNIONFS_SB(sb)->data[i].sb->s_active);
9149     +
9150     + /* copy new vectors into their correct place */
9151     + tmp_data = UNIONFS_SB(sb)->data;
9152     + UNIONFS_SB(sb)->data = new_data;
9153     + new_data = NULL; /* so don't free good pointers below */
9154     + tmp_lower_paths = UNIONFS_D(sb->s_root)->lower_paths;
9155     + UNIONFS_D(sb->s_root)->lower_paths = new_lower_paths;
9156     + new_lower_paths = NULL; /* so don't free good pointers below */
9157     +
9158     + /* update our unionfs_sb_info and root dentry index of last branch */
9159     + i = sbmax(sb); /* save no. of branches to release at end */
9160     + sbend(sb) = new_branches - 1;
9161     + dbend(sb->s_root) = new_branches - 1;
9162     + old_ibstart = ibstart(sb->s_root->d_inode);
9163     + old_ibend = ibend(sb->s_root->d_inode);
9164     + ibend(sb->s_root->d_inode) = new_branches - 1;
9165     + UNIONFS_D(sb->s_root)->bcount = new_branches;
9166     + new_branches = i; /* no. of branches to release below */
9167     +
9168     + /*
9169     + * Update lower inodes: 3 steps
9170     + * 1. grab ref on all new lower inodes
9171     + */
9172     + for (i = dbstart(sb->s_root); i <= dbend(sb->s_root); i++) {
9173     + struct dentry *lower_dentry =
9174     + unionfs_lower_dentry_idx(sb->s_root, i);
9175     + igrab(lower_dentry->d_inode);
9176     + new_lower_inodes[i] = lower_dentry->d_inode;
9177     + }
9178     + /* 2. release reference on all older lower inodes */
9179     + iput_lowers(sb->s_root->d_inode, old_ibstart, old_ibend, true);
9180     + /* 3. update root dentry's inode to new lower_inodes array */
9181     + UNIONFS_I(sb->s_root->d_inode)->lower_inodes = new_lower_inodes;
9182     + new_lower_inodes = NULL;
9183     +
9184     + /* maxbytes may have changed */
9185     + sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes;
9186     + /* update high branch ID */
9187     + sbhbid(sb) = new_high_branch_id;
9188     +
9189     + /* update our sb->generation for revalidating objects */
9190     + i = atomic_inc_return(&UNIONFS_SB(sb)->generation);
9191     + atomic_set(&UNIONFS_D(sb->s_root)->generation, i);
9192     + atomic_set(&UNIONFS_I(sb->s_root->d_inode)->generation, i);
9193     + if (!(*flags & MS_SILENT))
9194     + pr_info("unionfs: %s: new generation number %d\n",
9195     + UNIONFS_SB(sb)->dev_name, i);
9196     + /* finally, update the root dentry's times */
9197     + unionfs_copy_attr_times(sb->s_root->d_inode);
9198     + err = 0; /* reset to success */
9199     +
9200     + /*
9201     + * The code above falls through to the next label, and releases the
9202     + * refcnts of the older ones (stored in tmp_*): if we fell through
9203     + * here, it means success. However, if we jump directly to this
9204     + * label from any error above, then an error occurred after we
9205     + * grabbed various refcnts, and so we have to release the
9206     + * temporarily constructed structures.
9207     + */
9208     +out_release:
9209     + /* no need to cleanup/release anything in tmp_data */
9210     + if (tmp_lower_paths)
9211     + for (i = 0; i < new_branches; i++)
9212     + path_put(&tmp_lower_paths[i]);
9213     +out_free:
9214     + kfree(tmp_lower_paths);
9215     + kfree(tmp_data);
9216     + kfree(new_lower_paths);
9217     + kfree(new_data);
9218     + kfree(new_lower_inodes);
9219     +out_error:
9220     + unionfs_check_dentry(sb->s_root);
9221     + unionfs_write_unlock(sb);
9222     + return err;
9223     +}
9224     +
9225     +/*
9226     + * Called by iput() when the inode reference count reached zero
9227     + * and the inode is not hashed anywhere. Used to clear anything
9228     + * that needs to be, before the inode is completely destroyed and put
9229     + * on the inode free list.
9230     + *
9231     + * No need to lock sb info's rwsem.
9232     + */
9233     +static void unionfs_clear_inode(struct inode *inode)
9234     +{
9235     + int bindex, bstart, bend;
9236     + struct inode *lower_inode;
9237     + struct list_head *pos, *n;
9238     + struct unionfs_dir_state *rdstate;
9239     +
9240     + list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) {
9241     + rdstate = list_entry(pos, struct unionfs_dir_state, cache);
9242     + list_del(&rdstate->cache);
9243     + free_rdstate(rdstate);
9244     + }
9245     +
9246     + /*
9247     + * Decrement a reference to a lower_inode, which was incremented
9248     + * by our read_inode when it was created initially.
9249     + */
9250     + bstart = ibstart(inode);
9251     + bend = ibend(inode);
9252     + if (bstart >= 0) {
9253     + for (bindex = bstart; bindex <= bend; bindex++) {
9254     + lower_inode = unionfs_lower_inode_idx(inode, bindex);
9255     + if (!lower_inode)
9256     + continue;
9257     + unionfs_set_lower_inode_idx(inode, bindex, NULL);
9258     + /* see Documentation/filesystems/unionfs/issues.txt */
9259     + lockdep_off();
9260     + iput(lower_inode);
9261     + lockdep_on();
9262     + }
9263     + }
9264     +
9265     + kfree(UNIONFS_I(inode)->lower_inodes);
9266     + UNIONFS_I(inode)->lower_inodes = NULL;
9267     +}
9268     +
9269     +static struct inode *unionfs_alloc_inode(struct super_block *sb)
9270     +{
9271     + struct unionfs_inode_info *i;
9272     +
9273     + i = kmem_cache_alloc(unionfs_inode_cachep, GFP_KERNEL);
9274     + if (unlikely(!i))
9275     + return NULL;
9276     +
9277     + /* memset everything up to the inode to 0 */
9278     + memset(i, 0, offsetof(struct unionfs_inode_info, vfs_inode));
9279     +
9280     + i->vfs_inode.i_version = 1;
9281     + return &i->vfs_inode;
9282     +}
9283     +
9284     +static void unionfs_destroy_inode(struct inode *inode)
9285     +{
9286     + kmem_cache_free(unionfs_inode_cachep, UNIONFS_I(inode));
9287     +}
9288     +
9289     +/* unionfs inode cache constructor */
9290     +static void init_once(void *obj)
9291     +{
9292     + struct unionfs_inode_info *i = obj;
9293     +
9294     + inode_init_once(&i->vfs_inode);
9295     +}
9296     +
9297     +int unionfs_init_inode_cache(void)
9298     +{
9299     + int err = 0;
9300     +
9301     + unionfs_inode_cachep =
9302     + kmem_cache_create("unionfs_inode_cache",
9303     + sizeof(struct unionfs_inode_info), 0,
9304     + SLAB_RECLAIM_ACCOUNT, init_once);
9305     + if (unlikely(!unionfs_inode_cachep))
9306     + err = -ENOMEM;
9307     + return err;
9308     +}
9309     +
9310     +/* unionfs inode cache destructor */
9311     +void unionfs_destroy_inode_cache(void)
9312     +{
9313     + if (unionfs_inode_cachep)
9314     + kmem_cache_destroy(unionfs_inode_cachep);
9315     +}
9316     +
9317     +/*
9318     + * Called when we have a dirty inode, right here we only throw out
9319     + * parts of our readdir list that are too old.
9320     + *
9321     + * No need to grab sb info's rwsem.
9322     + */
9323     +static int unionfs_write_inode(struct inode *inode,
9324     + struct writeback_control *wbc)
9325     +{
9326     + struct list_head *pos, *n;
9327     + struct unionfs_dir_state *rdstate;
9328     +
9329     + spin_lock(&UNIONFS_I(inode)->rdlock);
9330     + list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) {
9331     + rdstate = list_entry(pos, struct unionfs_dir_state, cache);
9332     + /* We keep this list in LRU order. */
9333     + if ((rdstate->access + RDCACHE_JIFFIES) > jiffies)
9334     + break;
9335     + UNIONFS_I(inode)->rdcount--;
9336     + list_del(&rdstate->cache);
9337     + free_rdstate(rdstate);
9338     + }
9339     + spin_unlock(&UNIONFS_I(inode)->rdlock);
9340     +
9341     + return 0;
9342     +}
9343     +
9344     +/*
9345     + * Used only in nfs, to kill any pending RPC tasks, so that subsequent
9346     + * code can actually succeed and won't leave tasks that need handling.
9347     + */
9348     +static void unionfs_umount_begin(struct super_block *sb)
9349     +{
9350     + struct super_block *lower_sb;
9351     + int bindex, bstart, bend;
9352     +
9353     + unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD);
9354     +
9355     + bstart = sbstart(sb);
9356     + bend = sbend(sb);
9357     + for (bindex = bstart; bindex <= bend; bindex++) {
9358     + lower_sb = unionfs_lower_super_idx(sb, bindex);
9359     +
9360     + if (lower_sb && lower_sb->s_op &&
9361     + lower_sb->s_op->umount_begin)
9362     + lower_sb->s_op->umount_begin(lower_sb);
9363     + }
9364     +
9365     + unionfs_read_unlock(sb);
9366     +}
9367     +
9368     +static int unionfs_show_options(struct seq_file *m, struct vfsmount *mnt)
9369     +{
9370     + struct super_block *sb = mnt->mnt_sb;
9371     + int ret = 0;
9372     + char *tmp_page;
9373     + char *path;
9374     + int bindex, bstart, bend;
9375     + int perms;
9376     +
9377     + unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD);
9378     +
9379     + unionfs_lock_dentry(sb->s_root, UNIONFS_DMUTEX_CHILD);
9380     +
9381     + tmp_page = (char *) __get_free_page(GFP_KERNEL);
9382     + if (unlikely(!tmp_page)) {
9383     + ret = -ENOMEM;
9384     + goto out;
9385     + }
9386     +
9387     + bstart = sbstart(sb);
9388     + bend = sbend(sb);
9389     +
9390     + seq_printf(m, ",dirs=");
9391     + for (bindex = bstart; bindex <= bend; bindex++) {
9392     + struct path p;
9393     + p.dentry = unionfs_lower_dentry_idx(sb->s_root, bindex);
9394     + p.mnt = unionfs_lower_mnt_idx(sb->s_root, bindex);
9395     + path = d_path(&p, tmp_page, PAGE_SIZE);
9396     + if (IS_ERR(path)) {
9397     + ret = PTR_ERR(path);
9398     + goto out;
9399     + }
9400     +
9401     + perms = branchperms(sb, bindex);
9402     +
9403     + seq_printf(m, "%s=%s", path,
9404     + perms & MAY_WRITE ? "rw" : "ro");
9405     + if (bindex != bend)
9406     + seq_printf(m, ":");
9407     + }
9408     +
9409     +out:
9410     + free_page((unsigned long) tmp_page);
9411     +
9412     + unionfs_unlock_dentry(sb->s_root);
9413     +
9414     + unionfs_read_unlock(sb);
9415     +
9416     + return ret;
9417     +}
9418     +
9419     +struct super_operations unionfs_sops = {
9420     + .delete_inode = unionfs_delete_inode,
9421     + .put_super = unionfs_put_super,
9422     + .statfs = unionfs_statfs,
9423     + .remount_fs = unionfs_remount_fs,
9424     + .clear_inode = unionfs_clear_inode,
9425     + .umount_begin = unionfs_umount_begin,
9426     + .show_options = unionfs_show_options,
9427     + .write_inode = unionfs_write_inode,
9428     + .alloc_inode = unionfs_alloc_inode,
9429     + .destroy_inode = unionfs_destroy_inode,
9430     +};
9431     diff --git a/fs/unionfs/union.h b/fs/unionfs/union.h
9432     new file mode 100644
9433     index 0000000..d49c834
9434     --- /dev/null
9435     +++ b/fs/unionfs/union.h
9436     @@ -0,0 +1,669 @@
9437     +/*
9438     + * Copyright (c) 2003-2010 Erez Zadok
9439     + * Copyright (c) 2003-2006 Charles P. Wright
9440     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
9441     + * Copyright (c) 2005 Arun M. Krishnakumar
9442     + * Copyright (c) 2004-2006 David P. Quigley
9443     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
9444     + * Copyright (c) 2003 Puja Gupta
9445     + * Copyright (c) 2003 Harikesavan Krishnan
9446     + * Copyright (c) 2003-2010 Stony Brook University
9447     + * Copyright (c) 2003-2010 The Research Foundation of SUNY
9448     + *
9449     + * This program is free software; you can redistribute it and/or modify
9450     + * it under the terms of the GNU General Public License version 2 as
9451     + * published by the Free Software Foundation.
9452     + */
9453     +
9454     +#ifndef _UNION_H_
9455     +#define _UNION_H_
9456     +
9457     +#include <linux/dcache.h>
9458     +#include <linux/file.h>
9459     +#include <linux/list.h>
9460     +#include <linux/fs.h>
9461     +#include <linux/mm.h>
9462     +#include <linux/module.h>
9463     +#include <linux/mount.h>
9464     +#include <linux/namei.h>
9465     +#include <linux/page-flags.h>
9466     +#include <linux/pagemap.h>
9467     +#include <linux/poll.h>
9468     +#include <linux/security.h>
9469     +#include <linux/seq_file.h>
9470     +#include <linux/slab.h>
9471     +#include <linux/spinlock.h>
9472     +#include <linux/smp_lock.h>
9473     +#include <linux/statfs.h>
9474     +#include <linux/string.h>
9475     +#include <linux/vmalloc.h>
9476     +#include <linux/writeback.h>
9477     +#include <linux/buffer_head.h>
9478     +#include <linux/xattr.h>
9479     +#include <linux/fs_stack.h>
9480     +#include <linux/magic.h>
9481     +#include <linux/log2.h>
9482     +#include <linux/poison.h>
9483     +#include <linux/mman.h>
9484     +#include <linux/backing-dev.h>
9485     +#include <linux/splice.h>
9486     +
9487     +#include <asm/system.h>
9488     +
9489     +#include <linux/union_fs.h>
9490     +
9491     +/* the file system name */
9492     +#define UNIONFS_NAME "unionfs"
9493     +
9494     +/* unionfs root inode number */
9495     +#define UNIONFS_ROOT_INO 1
9496     +
9497     +/* number of times we try to get a unique temporary file name */
9498     +#define GET_TMPNAM_MAX_RETRY 5
9499     +
9500     +/* maximum number of branches we support, to avoid memory blowup */
9501     +#define UNIONFS_MAX_BRANCHES 128
9502     +
9503     +/* minimum time (seconds) required for time-based cache-coherency */
9504     +#define UNIONFS_MIN_CC_TIME 3
9505     +
9506     +/* Operations vectors defined in specific files. */
9507     +extern struct file_operations unionfs_main_fops;
9508     +extern struct file_operations unionfs_dir_fops;
9509     +extern struct inode_operations unionfs_main_iops;
9510     +extern struct inode_operations unionfs_dir_iops;
9511     +extern struct inode_operations unionfs_symlink_iops;
9512     +extern struct super_operations unionfs_sops;
9513     +extern struct dentry_operations unionfs_dops;
9514     +extern struct address_space_operations unionfs_aops, unionfs_dummy_aops;
9515     +extern struct vm_operations_struct unionfs_vm_ops;
9516     +
9517     +/* How long should an entry be allowed to persist */
9518     +#define RDCACHE_JIFFIES (5*HZ)
9519     +
9520     +/* compatibility with Real-Time patches */
9521     +#ifdef CONFIG_PREEMPT_RT
9522     +# define unionfs_rw_semaphore compat_rw_semaphore
9523     +#else /* not CONFIG_PREEMPT_RT */
9524     +# define unionfs_rw_semaphore rw_semaphore
9525     +#endif /* not CONFIG_PREEMPT_RT */
9526     +
9527     +/* file private data. */
9528     +struct unionfs_file_info {
9529     + int bstart;
9530     + int bend;
9531     + atomic_t generation;
9532     +
9533     + struct unionfs_dir_state *rdstate;
9534     + struct file **lower_files;
9535     + int *saved_branch_ids; /* IDs of branches when file was opened */
9536     + const struct vm_operations_struct *lower_vm_ops;
9537     + bool wrote_to_file; /* for delayed copyup */
9538     +};
9539     +
9540     +/* unionfs inode data in memory */
9541     +struct unionfs_inode_info {
9542     + int bstart;
9543     + int bend;
9544     + atomic_t generation;
9545     + /* Stuff for readdir over NFS. */
9546     + spinlock_t rdlock;
9547     + struct list_head readdircache;
9548     + int rdcount;
9549     + int hashsize;
9550     + int cookie;
9551     +
9552     + /* The lower inodes */
9553     + struct inode **lower_inodes;
9554     +
9555     + struct inode vfs_inode;
9556     +};
9557     +
9558     +/* unionfs dentry data in memory */
9559     +struct unionfs_dentry_info {
9560     + /*
9561     + * The semaphore is used to lock the dentry as soon as we get into a
9562     + * unionfs function from the VFS. Our lock ordering is that children
9563     + * go before their parents.
9564     + */
9565     + struct mutex lock;
9566     + int bstart;
9567     + int bend;
9568     + int bopaque;
9569     + int bcount;
9570     + atomic_t generation;
9571     + struct path *lower_paths;
9572     +};
9573     +
9574     +/* These are the pointers to our various objects. */
9575     +struct unionfs_data {
9576     + struct super_block *sb; /* lower super_block */
9577     + atomic_t open_files; /* number of open files on branch */
9578     + int branchperms;
9579     + int branch_id; /* unique branch ID at re/mount time */
9580     +};
9581     +
9582     +/* unionfs super-block data in memory */
9583     +struct unionfs_sb_info {
9584     + int bend;
9585     +
9586     + atomic_t generation;
9587     +
9588     + /*
9589     + * This rwsem is used to make sure that a branch management
9590     + * operation...
9591     + * 1) will not begin before all currently in-flight operations
9592     + * complete.
9593     + * 2) any new operations do not execute until the currently
9594     + * running branch management operation completes.
9595     + *
9596     + * The write_lock_owner records the PID of the task which grabbed
9597     + * the rw_sem for writing. If the same task also tries to grab the
9598     + * read lock, we allow it. This prevents a self-deadlock when
9599     + * branch-management is used on a pivot_root'ed union, because we
9600     + * have to ->lookup paths which belong to the same union.
9601     + */
9602     + struct unionfs_rw_semaphore rwsem;
9603     + pid_t write_lock_owner; /* PID of rw_sem owner (write lock) */
9604     + int high_branch_id; /* last unique branch ID given */
9605     + char *dev_name; /* to identify different unions in pr_debug */
9606     + struct unionfs_data *data;
9607     +};
9608     +
9609     +/*
9610     + * structure for making the linked list of entries by readdir on left branch
9611     + * to compare with entries on right branch
9612     + */
9613     +struct filldir_node {
9614     + struct list_head file_list; /* list for directory entries */
9615     + char *name; /* name entry */
9616     + int hash; /* name hash */
9617     + int namelen; /* name len since name is not 0 terminated */
9618     +
9619     + /*
9620     + * we can check for duplicate whiteouts and files in the same branch
9621     + * in order to return -EIO.
9622     + */
9623     + int bindex;
9624     +
9625     + /* is this a whiteout entry? */
9626     + int whiteout;
9627     +
9628     + /* Inline name, so we don't need to separately kmalloc small ones */
9629     + char iname[DNAME_INLINE_LEN_MIN];
9630     +};
9631     +
9632     +/* Directory hash table. */
9633     +struct unionfs_dir_state {
9634     + unsigned int cookie; /* the cookie, based off of rdversion */
9635     + unsigned int offset; /* The entry we have returned. */
9636     + int bindex;
9637     + loff_t dirpos; /* offset within the lower level directory */
9638     + int size; /* How big is the hash table? */
9639     + int hashentries; /* How many entries have been inserted? */
9640     + unsigned long access;
9641     +
9642     + /* This cache list is used when the inode keeps us around. */
9643     + struct list_head cache;
9644     + struct list_head list[0];
9645     +};
9646     +
9647     +/* externs needed for fanout.h or sioq.h */
9648     +extern int unionfs_get_nlinks(const struct inode *inode);
9649     +extern void unionfs_copy_attr_times(struct inode *upper);
9650     +extern void unionfs_copy_attr_all(struct inode *dest, const struct inode *src);
9651     +
9652     +/* include miscellaneous macros */
9653     +#include "fanout.h"
9654     +#include "sioq.h"
9655     +
9656     +/* externs for cache creation/deletion routines */
9657     +extern void unionfs_destroy_filldir_cache(void);
9658     +extern int unionfs_init_filldir_cache(void);
9659     +extern int unionfs_init_inode_cache(void);
9660     +extern void unionfs_destroy_inode_cache(void);
9661     +extern int unionfs_init_dentry_cache(void);
9662     +extern void unionfs_destroy_dentry_cache(void);
9663     +
9664     +/* Initialize and free readdir-specific state. */
9665     +extern int init_rdstate(struct file *file);
9666     +extern struct unionfs_dir_state *alloc_rdstate(struct inode *inode,
9667     + int bindex);
9668     +extern struct unionfs_dir_state *find_rdstate(struct inode *inode,
9669     + loff_t fpos);
9670     +extern void free_rdstate(struct unionfs_dir_state *state);
9671     +extern int add_filldir_node(struct unionfs_dir_state *rdstate,
9672     + const char *name, int namelen, int bindex,
9673     + int whiteout);
9674     +extern struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate,
9675     + const char *name, int namelen,
9676     + int is_whiteout);
9677     +
9678     +extern struct dentry **alloc_new_dentries(int objs);
9679     +extern struct unionfs_data *alloc_new_data(int objs);
9680     +
9681     +/* We can only use 32-bits of offset for rdstate --- blech! */
9682     +#define DIREOF (0xfffff)
9683     +#define RDOFFBITS 20 /* This is the number of bits in DIREOF. */
9684     +#define MAXRDCOOKIE (0xfff)
9685     +/* Turn an rdstate into an offset. */
9686     +static inline off_t rdstate2offset(struct unionfs_dir_state *buf)
9687     +{
9688     + off_t tmp;
9689     +
9690     + tmp = ((buf->cookie & MAXRDCOOKIE) << RDOFFBITS)
9691     + | (buf->offset & DIREOF);
9692     + return tmp;
9693     +}
9694     +
9695     +/* Macros for locking a super_block. */
9696     +enum unionfs_super_lock_class {
9697     + UNIONFS_SMUTEX_NORMAL,
9698     + UNIONFS_SMUTEX_PARENT, /* when locking on behalf of file */
9699     + UNIONFS_SMUTEX_CHILD, /* when locking on behalf of dentry */
9700     +};
9701     +static inline void unionfs_read_lock(struct super_block *sb, int subclass)
9702     +{
9703     + if (UNIONFS_SB(sb)->write_lock_owner &&
9704     + UNIONFS_SB(sb)->write_lock_owner == current->pid)
9705     + return;
9706     + down_read_nested(&UNIONFS_SB(sb)->rwsem, subclass);
9707     +}
9708     +static inline void unionfs_read_unlock(struct super_block *sb)
9709     +{
9710     + if (UNIONFS_SB(sb)->write_lock_owner &&
9711     + UNIONFS_SB(sb)->write_lock_owner == current->pid)
9712     + return;
9713     + up_read(&UNIONFS_SB(sb)->rwsem);
9714     +}
9715     +static inline void unionfs_write_lock(struct super_block *sb)
9716     +{
9717     + down_write(&UNIONFS_SB(sb)->rwsem);
9718     + UNIONFS_SB(sb)->write_lock_owner = current->pid;
9719     +}
9720     +static inline void unionfs_write_unlock(struct super_block *sb)
9721     +{
9722     + up_write(&UNIONFS_SB(sb)->rwsem);
9723     + UNIONFS_SB(sb)->write_lock_owner = 0;
9724     +}
9725     +
9726     +static inline void unionfs_double_lock_dentry(struct dentry *d1,
9727     + struct dentry *d2)
9728     +{
9729     + BUG_ON(d1 == d2);
9730     + if (d1 < d2) {
9731     + unionfs_lock_dentry(d1, UNIONFS_DMUTEX_PARENT);
9732     + unionfs_lock_dentry(d2, UNIONFS_DMUTEX_CHILD);
9733     + } else {
9734     + unionfs_lock_dentry(d2, UNIONFS_DMUTEX_PARENT);
9735     + unionfs_lock_dentry(d1, UNIONFS_DMUTEX_CHILD);
9736     + }
9737     +}
9738     +
9739     +static inline void unionfs_double_unlock_dentry(struct dentry *d1,
9740     + struct dentry *d2)
9741     +{
9742     + BUG_ON(d1 == d2);
9743     + if (d1 < d2) { /* unlock in reverse order than double_lock_dentry */
9744     + unionfs_unlock_dentry(d1);
9745     + unionfs_unlock_dentry(d2);
9746     + } else {
9747     + unionfs_unlock_dentry(d2);
9748     + unionfs_unlock_dentry(d1);
9749     + }
9750     +}
9751     +
9752     +static inline void unionfs_double_lock_parents(struct dentry *p1,
9753     + struct dentry *p2)
9754     +{
9755     + if (p1 == p2) {
9756     + unionfs_lock_dentry(p1, UNIONFS_DMUTEX_REVAL_PARENT);
9757     + return;
9758     + }
9759     + if (p1 < p2) {
9760     + unionfs_lock_dentry(p1, UNIONFS_DMUTEX_REVAL_PARENT);
9761     + unionfs_lock_dentry(p2, UNIONFS_DMUTEX_REVAL_CHILD);
9762     + } else {
9763     + unionfs_lock_dentry(p2, UNIONFS_DMUTEX_REVAL_PARENT);
9764     + unionfs_lock_dentry(p1, UNIONFS_DMUTEX_REVAL_CHILD);
9765     + }
9766     +}
9767     +
9768     +static inline void unionfs_double_unlock_parents(struct dentry *p1,
9769     + struct dentry *p2)
9770     +{
9771     + if (p1 == p2) {
9772     + unionfs_unlock_dentry(p1);
9773     + return;
9774     + }
9775     + if (p1 < p2) { /* unlock in reverse order of double_lock_parents */
9776     + unionfs_unlock_dentry(p1);
9777     + unionfs_unlock_dentry(p2);
9778     + } else {
9779     + unionfs_unlock_dentry(p2);
9780     + unionfs_unlock_dentry(p1);
9781     + }
9782     +}
9783     +
9784     +extern int new_dentry_private_data(struct dentry *dentry, int subclass);
9785     +extern int realloc_dentry_private_data(struct dentry *dentry);
9786     +extern void free_dentry_private_data(struct dentry *dentry);
9787     +extern void update_bstart(struct dentry *dentry);
9788     +extern int init_lower_nd(struct nameidata *nd, unsigned int flags);
9789     +extern void release_lower_nd(struct nameidata *nd, int err);
9790     +
9791     +/*
9792     + * EXTERNALS:
9793     + */
9794     +
9795     +/* replicates the directory structure up to given dentry in given branch */
9796     +extern struct dentry *create_parents(struct inode *dir, struct dentry *dentry,
9797     + const char *name, int bindex);
9798     +
9799     +/* partial lookup */
9800     +extern int unionfs_partial_lookup(struct dentry *dentry,
9801     + struct dentry *parent);
9802     +extern struct dentry *unionfs_lookup_full(struct dentry *dentry,
9803     + struct dentry *parent,
9804     + int lookupmode);
9805     +
9806     +/* copies a file from dbstart to newbindex branch */
9807     +extern int copyup_file(struct inode *dir, struct file *file, int bstart,
9808     + int newbindex, loff_t size);
9809     +extern int copyup_named_file(struct inode *dir, struct file *file,
9810     + char *name, int bstart, int new_bindex,
9811     + loff_t len);
9812     +/* copies a dentry from dbstart to newbindex branch */
9813     +extern int copyup_dentry(struct inode *dir, struct dentry *dentry,
9814     + int bstart, int new_bindex, const char *name,
9815     + int namelen, struct file **copyup_file, loff_t len);
9816     +/* helper functions for post-copyup actions */
9817     +extern void unionfs_postcopyup_setmnt(struct dentry *dentry);
9818     +extern void unionfs_postcopyup_release(struct dentry *dentry);
9819     +
9820     +/* Is this directory empty: 0 if it is empty, -ENOTEMPTY if not. */
9821     +extern int check_empty(struct dentry *dentry, struct dentry *parent,
9822     + struct unionfs_dir_state **namelist);
9823     +/* whiteout and opaque directory helpers */
9824     +extern char *alloc_whname(const char *name, int len);
9825     +extern bool is_whiteout_name(char **namep, int *namelenp);
9826     +extern bool is_validname(const char *name);
9827     +extern struct dentry *lookup_whiteout(const char *name,
9828     + struct dentry *lower_parent);
9829     +extern struct dentry *find_first_whiteout(struct dentry *dentry);
9830     +extern int unlink_whiteout(struct dentry *wh_dentry);
9831     +extern int check_unlink_whiteout(struct dentry *dentry,
9832     + struct dentry *lower_dentry, int bindex);
9833     +extern int create_whiteout(struct dentry *dentry, int start);
9834     +extern int delete_whiteouts(struct dentry *dentry, int bindex,
9835     + struct unionfs_dir_state *namelist);
9836     +extern int is_opaque_dir(struct dentry *dentry, int bindex);
9837     +extern int make_dir_opaque(struct dentry *dir, int bindex);
9838     +extern void unionfs_set_max_namelen(long *namelen);
9839     +
9840     +extern void unionfs_reinterpose(struct dentry *this_dentry);
9841     +extern struct super_block *unionfs_duplicate_super(struct super_block *sb);
9842     +
9843     +/* Locking functions. */
9844     +extern int unionfs_setlk(struct file *file, int cmd, struct file_lock *fl);
9845     +extern int unionfs_getlk(struct file *file, struct file_lock *fl);
9846     +
9847     +/* Common file operations. */
9848     +extern int unionfs_file_revalidate(struct file *file, struct dentry *parent,
9849     + bool willwrite);
9850     +extern int unionfs_open(struct inode *inode, struct file *file);
9851     +extern int unionfs_file_release(struct inode *inode, struct file *file);
9852     +extern int unionfs_flush(struct file *file, fl_owner_t id);
9853     +extern long unionfs_ioctl(struct file *file, unsigned int cmd,
9854     + unsigned long arg);
9855     +extern int unionfs_fsync(struct file *file, int datasync);
9856     +extern int unionfs_fasync(int fd, struct file *file, int flag);
9857     +
9858     +/* Inode operations */
9859     +extern struct inode *unionfs_iget(struct super_block *sb, unsigned long ino);
9860     +extern int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
9861     + struct inode *new_dir, struct dentry *new_dentry);
9862     +extern int unionfs_unlink(struct inode *dir, struct dentry *dentry);
9863     +extern int unionfs_rmdir(struct inode *dir, struct dentry *dentry);
9864     +
9865     +extern bool __unionfs_d_revalidate(struct dentry *dentry,
9866     + struct dentry *parent, bool willwrite);
9867     +extern bool is_negative_lower(const struct dentry *dentry);
9868     +extern bool is_newer_lower(const struct dentry *dentry);
9869     +extern void purge_sb_data(struct super_block *sb);
9870     +
9871     +/* The values for unionfs_interpose's flag. */
9872     +#define INTERPOSE_DEFAULT 0
9873     +#define INTERPOSE_LOOKUP 1
9874     +#define INTERPOSE_REVAL 2
9875     +#define INTERPOSE_REVAL_NEG 3
9876     +#define INTERPOSE_PARTIAL 4
9877     +
9878     +extern struct dentry *unionfs_interpose(struct dentry *this_dentry,
9879     + struct super_block *sb, int flag);
9880     +
9881     +#ifdef CONFIG_UNION_FS_XATTR
9882     +/* Extended attribute functions. */
9883     +extern void *unionfs_xattr_alloc(size_t size, size_t limit);
9884     +static inline void unionfs_xattr_kfree(const void *p)
9885     +{
9886     + kfree(p);
9887     +}
9888     +extern ssize_t unionfs_getxattr(struct dentry *dentry, const char *name,
9889     + void *value, size_t size);
9890     +extern int unionfs_removexattr(struct dentry *dentry, const char *name);
9891     +extern ssize_t unionfs_listxattr(struct dentry *dentry, char *list,
9892     + size_t size);
9893     +extern int unionfs_setxattr(struct dentry *dentry, const char *name,
9894     + const void *value, size_t size, int flags);
9895     +#endif /* CONFIG_UNION_FS_XATTR */
9896     +
9897     +/* The root directory is unhashed, but isn't deleted. */
9898     +static inline int d_deleted(struct dentry *d)
9899     +{
9900     + return d_unhashed(d) && (d != d->d_sb->s_root);
9901     +}
9902     +
9903     +/* unionfs_permission, check if we should bypass error to facilitate copyup */
9904     +#define IS_COPYUP_ERR(err) ((err) == -EROFS)
9905     +
9906     +/* unionfs_open, check if we need to copyup the file */
9907     +#define OPEN_WRITE_FLAGS (O_WRONLY | O_RDWR | O_APPEND)
9908     +#define IS_WRITE_FLAG(flag) ((flag) & OPEN_WRITE_FLAGS)
9909     +
9910     +static inline int branchperms(const struct super_block *sb, int index)
9911     +{
9912     + BUG_ON(index < 0);
9913     + return UNIONFS_SB(sb)->data[index].branchperms;
9914     +}
9915     +
9916     +static inline int set_branchperms(struct super_block *sb, int index, int perms)
9917     +{
9918     + BUG_ON(index < 0);
9919     + UNIONFS_SB(sb)->data[index].branchperms = perms;
9920     + return perms;
9921     +}
9922     +
9923     +/* check if readonly lower inode, but possibly unlinked (no inode->i_sb) */
9924     +static inline int __is_rdonly(const struct inode *inode)
9925     +{
9926     + /* if unlinked, can't be readonly (?) */
9927     + if (!inode->i_sb)
9928     + return 0;
9929     + return IS_RDONLY(inode);
9930     +
9931     +}
9932     +/* Is this file on a read-only branch? */
9933     +static inline int is_robranch_super(const struct super_block *sb, int index)
9934     +{
9935     + int ret;
9936     +
9937     + ret = (!(branchperms(sb, index) & MAY_WRITE)) ? -EROFS : 0;
9938     + return ret;
9939     +}
9940     +
9941     +/* Is this file on a read-only branch? */
9942     +static inline int is_robranch_idx(const struct dentry *dentry, int index)
9943     +{
9944     + struct super_block *lower_sb;
9945     +
9946     + BUG_ON(index < 0);
9947     +
9948     + if (!(branchperms(dentry->d_sb, index) & MAY_WRITE))
9949     + return -EROFS;
9950     +
9951     + lower_sb = unionfs_lower_super_idx(dentry->d_sb, index);
9952     + BUG_ON(lower_sb == NULL);
9953     + /*
9954     + * test sb flags directly, not IS_RDONLY(lower_inode) because the
9955     + * lower_dentry could be a negative.
9956     + */
9957     + if (lower_sb->s_flags & MS_RDONLY)
9958     + return -EROFS;
9959     +
9960     + return 0;
9961     +}
9962     +
9963     +static inline int is_robranch(const struct dentry *dentry)
9964     +{
9965     + int index;
9966     +
9967     + index = UNIONFS_D(dentry)->bstart;
9968     + BUG_ON(index < 0);
9969     +
9970     + return is_robranch_idx(dentry, index);
9971     +}
9972     +
9973     +/*
9974     + * EXTERNALS:
9975     + */
9976     +extern int check_branch(struct nameidata *nd);
9977     +extern int parse_branch_mode(const char *name, int *perms);
9978     +
9979     +/* locking helpers */
9980     +static inline struct dentry *lock_parent(struct dentry *dentry)
9981     +{
9982     + struct dentry *dir = dget_parent(dentry);
9983     + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
9984     + return dir;
9985     +}
9986     +static inline struct dentry *lock_parent_wh(struct dentry *dentry)
9987     +{
9988     + struct dentry *dir = dget_parent(dentry);
9989     +
9990     + mutex_lock_nested(&dir->d_inode->i_mutex, UNIONFS_DMUTEX_WHITEOUT);
9991     + return dir;
9992     +}
9993     +
9994     +static inline void unlock_dir(struct dentry *dir)
9995     +{
9996     + mutex_unlock(&dir->d_inode->i_mutex);
9997     + dput(dir);
9998     +}
9999     +
10000     +/* lock base inode mutex before calling lookup_one_len */
10001     +static inline struct dentry *lookup_lck_len(const char *name,
10002     + struct dentry *base, int len)
10003     +{
10004     + struct dentry *d;
10005     + mutex_lock(&base->d_inode->i_mutex);
10006     + d = lookup_one_len(name, base, len);
10007     + mutex_unlock(&base->d_inode->i_mutex);
10008     + return d;
10009     +}
10010     +
10011     +static inline struct vfsmount *unionfs_mntget(struct dentry *dentry,
10012     + int bindex)
10013     +{
10014     + struct vfsmount *mnt;
10015     +
10016     + BUG_ON(!dentry || bindex < 0);
10017     +
10018     + mnt = mntget(unionfs_lower_mnt_idx(dentry, bindex));
10019     +#ifdef CONFIG_UNION_FS_DEBUG
10020     + if (!mnt)
10021     + pr_debug("unionfs: mntget: mnt=%p bindex=%d\n",
10022     + mnt, bindex);
10023     +#endif /* CONFIG_UNION_FS_DEBUG */
10024     +
10025     + return mnt;
10026     +}
10027     +
10028     +static inline void unionfs_mntput(struct dentry *dentry, int bindex)
10029     +{
10030     + struct vfsmount *mnt;
10031     +
10032     + if (!dentry && bindex < 0)
10033     + return;
10034     + BUG_ON(!dentry || bindex < 0);
10035     +
10036     + mnt = unionfs_lower_mnt_idx(dentry, bindex);
10037     +#ifdef CONFIG_UNION_FS_DEBUG
10038     + /*
10039     + * Directories can have NULL lower objects in between start/end, but
10040     + * NOT if at the start/end range. We cannot verify that this dentry
10041     + * is a type=DIR, because it may already be a negative dentry. But
10042     + * if dbstart is greater than dbend, we know that this couldn't have
10043     + * been a regular file: it had to have been a directory.
10044     + */
10045     + if (!mnt && !(bindex > dbstart(dentry) && bindex < dbend(dentry)))
10046     + pr_debug("unionfs: mntput: mnt=%p bindex=%d\n", mnt, bindex);
10047     +#endif /* CONFIG_UNION_FS_DEBUG */
10048     + mntput(mnt);
10049     +}
10050     +
10051     +#ifdef CONFIG_UNION_FS_DEBUG
10052     +
10053     +/* useful for tracking code reachability */
10054     +#define UDBG pr_debug("DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__)
10055     +
10056     +#define unionfs_check_inode(i) __unionfs_check_inode((i), \
10057     + __FILE__, __func__, __LINE__)
10058     +#define unionfs_check_dentry(d) __unionfs_check_dentry((d), \
10059     + __FILE__, __func__, __LINE__)
10060     +#define unionfs_check_file(f) __unionfs_check_file((f), \
10061     + __FILE__, __func__, __LINE__)
10062     +#define unionfs_check_nd(n) __unionfs_check_nd((n), \
10063     + __FILE__, __func__, __LINE__)
10064     +#define show_branch_counts(sb) __show_branch_counts((sb), \
10065     + __FILE__, __func__, __LINE__)
10066     +#define show_inode_times(i) __show_inode_times((i), \
10067     + __FILE__, __func__, __LINE__)
10068     +#define show_dinode_times(d) __show_dinode_times((d), \
10069     + __FILE__, __func__, __LINE__)
10070     +#define show_inode_counts(i) __show_inode_counts((i), \
10071     + __FILE__, __func__, __LINE__)
10072     +
10073     +extern void __unionfs_check_inode(const struct inode *inode, const char *fname,
10074     + const char *fxn, int line);
10075     +extern void __unionfs_check_dentry(const struct dentry *dentry,
10076     + const char *fname, const char *fxn,
10077     + int line);
10078     +extern void __unionfs_check_file(const struct file *file,
10079     + const char *fname, const char *fxn, int line);
10080     +extern void __unionfs_check_nd(const struct nameidata *nd,
10081     + const char *fname, const char *fxn, int line);
10082     +extern void __show_branch_counts(const struct super_block *sb,
10083     + const char *file, const char *fxn, int line);
10084     +extern void __show_inode_times(const struct inode *inode,
10085     + const char *file, const char *fxn, int line);
10086     +extern void __show_dinode_times(const struct dentry *dentry,
10087     + const char *file, const char *fxn, int line);
10088     +extern void __show_inode_counts(const struct inode *inode,
10089     + const char *file, const char *fxn, int line);
10090     +
10091     +#else /* not CONFIG_UNION_FS_DEBUG */
10092     +
10093     +/* we leave useful hooks for these check functions throughout the code */
10094     +#define unionfs_check_inode(i) do { } while (0)
10095     +#define unionfs_check_dentry(d) do { } while (0)
10096     +#define unionfs_check_file(f) do { } while (0)
10097     +#define unionfs_check_nd(n) do { } while (0)
10098     +#define show_branch_counts(sb) do { } while (0)
10099     +#define show_inode_times(i) do { } while (0)
10100     +#define show_dinode_times(d) do { } while (0)
10101     +#define show_inode_counts(i) do { } while (0)
10102     +
10103     +#endif /* not CONFIG_UNION_FS_DEBUG */
10104     +
10105     +#endif /* not _UNION_H_ */
10106     diff --git a/fs/unionfs/unlink.c b/fs/unionfs/unlink.c
10107     new file mode 100644
10108     index 0000000..542c513
10109     --- /dev/null
10110     +++ b/fs/unionfs/unlink.c
10111     @@ -0,0 +1,278 @@
10112     +/*
10113     + * Copyright (c) 2003-2010 Erez Zadok
10114     + * Copyright (c) 2003-2006 Charles P. Wright
10115     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10116     + * Copyright (c) 2005-2006 Junjiro Okajima
10117     + * Copyright (c) 2005 Arun M. Krishnakumar
10118     + * Copyright (c) 2004-2006 David P. Quigley
10119     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
10120     + * Copyright (c) 2003 Puja Gupta
10121     + * Copyright (c) 2003 Harikesavan Krishnan
10122     + * Copyright (c) 2003-2010 Stony Brook University
10123     + * Copyright (c) 2003-2010 The Research Foundation of SUNY
10124     + *
10125     + * This program is free software; you can redistribute it and/or modify
10126     + * it under the terms of the GNU General Public License version 2 as
10127     + * published by the Free Software Foundation.
10128     + */
10129     +
10130     +#include "union.h"
10131     +
10132     +/*
10133     + * Helper function for Unionfs's unlink operation.
10134     + *
10135     + * The main goal of this function is to optimize the unlinking of non-dir
10136     + * objects in unionfs by deleting all possible lower inode objects from the
10137     + * underlying branches having same dentry name as the non-dir dentry on
10138     + * which this unlink operation is called. This way we delete as many lower
10139     + * inodes as possible, and save space. Whiteouts need to be created in
10140     + * branch0 only if unlinking fails on any of the lower branch other than
10141     + * branch0, or if a lower branch is marked read-only.
10142     + *
10143     + * Also, while unlinking a file, if we encounter any dir type entry in any
10144     + * intermediate branch, then we remove the directory by calling vfs_rmdir.
10145     + * The following special cases are also handled:
10146     +
10147     + * (1) If an error occurs in branch0 during vfs_unlink, then we return
10148     + * appropriate error.
10149     + *
10150     + * (2) If we get an error during unlink in any of other lower branch other
10151     + * than branch0, then we create a whiteout in branch0.
10152     + *
10153     + * (3) If a whiteout already exists in any intermediate branch, we delete
10154     + * all possible inodes only up to that branch (this is an "opaqueness"
10155     + * as as per Documentation/filesystems/unionfs/concepts.txt).
10156     + *
10157     + */
10158     +static int unionfs_unlink_whiteout(struct inode *dir, struct dentry *dentry,
10159     + struct dentry *parent)
10160     +{
10161     + struct dentry *lower_dentry;
10162     + struct dentry *lower_dir_dentry;
10163     + int bindex;
10164     + int err = 0;
10165     +
10166     + err = unionfs_partial_lookup(dentry, parent);
10167     + if (err)
10168     + goto out;
10169     +
10170     + /* trying to unlink all possible valid instances */
10171     + for (bindex = dbstart(dentry); bindex <= dbend(dentry); bindex++) {
10172     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10173     + if (!lower_dentry || !lower_dentry->d_inode)
10174     + continue;
10175     +
10176     + lower_dir_dentry = lock_parent(lower_dentry);
10177     +
10178     + /* avoid destroying the lower inode if the object is in use */
10179     + dget(lower_dentry);
10180     + err = is_robranch_super(dentry->d_sb, bindex);
10181     + if (!err) {
10182     + /* see Documentation/filesystems/unionfs/issues.txt */
10183     + lockdep_off();
10184     + if (!S_ISDIR(lower_dentry->d_inode->i_mode))
10185     + err = vfs_unlink(lower_dir_dentry->d_inode,
10186     + lower_dentry);
10187     + else
10188     + err = vfs_rmdir(lower_dir_dentry->d_inode,
10189     + lower_dentry);
10190     + lockdep_on();
10191     + }
10192     +
10193     + /* if lower object deletion succeeds, update inode's times */
10194     + if (!err)
10195     + unionfs_copy_attr_times(dentry->d_inode);
10196     + dput(lower_dentry);
10197     + fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
10198     + unlock_dir(lower_dir_dentry);
10199     +
10200     + if (err)
10201     + break;
10202     + }
10203     +
10204     + /*
10205     + * Create the whiteout in branch 0 (highest priority) only if (a)
10206     + * there was an error in any intermediate branch other than branch 0
10207     + * due to failure of vfs_unlink/vfs_rmdir or (b) a branch marked or
10208     + * mounted read-only.
10209     + */
10210     + if (err) {
10211     + if ((bindex == 0) ||
10212     + ((bindex == dbstart(dentry)) &&
10213     + (!IS_COPYUP_ERR(err))))
10214     + goto out;
10215     + else {
10216     + if (!IS_COPYUP_ERR(err))
10217     + pr_debug("unionfs: lower object deletion "
10218     + "failed in branch:%d\n", bindex);
10219     + err = create_whiteout(dentry, sbstart(dentry->d_sb));
10220     + }
10221     + }
10222     +
10223     +out:
10224     + if (!err)
10225     + inode_dec_link_count(dentry->d_inode);
10226     +
10227     + /* We don't want to leave negative leftover dentries for revalidate. */
10228     + if (!err && (dbopaque(dentry) != -1))
10229     + update_bstart(dentry);
10230     +
10231     + return err;
10232     +}
10233     +
10234     +int unionfs_unlink(struct inode *dir, struct dentry *dentry)
10235     +{
10236     + int err = 0;
10237     + struct inode *inode = dentry->d_inode;
10238     + struct dentry *parent;
10239     + int valid;
10240     +
10241     + BUG_ON(S_ISDIR(inode->i_mode));
10242     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
10243     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
10244     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
10245     +
10246     + valid = __unionfs_d_revalidate(dentry, parent, false);
10247     + if (unlikely(!valid)) {
10248     + err = -ESTALE;
10249     + goto out;
10250     + }
10251     + unionfs_check_dentry(dentry);
10252     +
10253     + err = unionfs_unlink_whiteout(dir, dentry, parent);
10254     + /* call d_drop so the system "forgets" about us */
10255     + if (!err) {
10256     + unionfs_postcopyup_release(dentry);
10257     + unionfs_postcopyup_setmnt(parent);
10258     + if (inode->i_nlink == 0) /* drop lower inodes */
10259     + iput_lowers_all(inode, false);
10260     + d_drop(dentry);
10261     + /*
10262     + * if unlink/whiteout succeeded, parent dir mtime has
10263     + * changed
10264     + */
10265     + unionfs_copy_attr_times(dir);
10266     + }
10267     +
10268     +out:
10269     + if (!err) {
10270     + unionfs_check_dentry(dentry);
10271     + unionfs_check_inode(dir);
10272     + }
10273     + unionfs_unlock_dentry(dentry);
10274     + unionfs_unlock_parent(dentry, parent);
10275     + unionfs_read_unlock(dentry->d_sb);
10276     + return err;
10277     +}
10278     +
10279     +static int unionfs_rmdir_first(struct inode *dir, struct dentry *dentry,
10280     + struct unionfs_dir_state *namelist)
10281     +{
10282     + int err;
10283     + struct dentry *lower_dentry;
10284     + struct dentry *lower_dir_dentry = NULL;
10285     +
10286     + /* Here we need to remove whiteout entries. */
10287     + err = delete_whiteouts(dentry, dbstart(dentry), namelist);
10288     + if (err)
10289     + goto out;
10290     +
10291     + lower_dentry = unionfs_lower_dentry(dentry);
10292     +
10293     + lower_dir_dentry = lock_parent(lower_dentry);
10294     +
10295     + /* avoid destroying the lower inode if the file is in use */
10296     + dget(lower_dentry);
10297     + err = is_robranch(dentry);
10298     + if (!err)
10299     + err = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
10300     + dput(lower_dentry);
10301     +
10302     + fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
10303     + /* propagate number of hard-links */
10304     + dentry->d_inode->i_nlink = unionfs_get_nlinks(dentry->d_inode);
10305     +
10306     +out:
10307     + if (lower_dir_dentry)
10308     + unlock_dir(lower_dir_dentry);
10309     + return err;
10310     +}
10311     +
10312     +int unionfs_rmdir(struct inode *dir, struct dentry *dentry)
10313     +{
10314     + int err = 0;
10315     + struct unionfs_dir_state *namelist = NULL;
10316     + struct dentry *parent;
10317     + int dstart, dend;
10318     + bool valid;
10319     +
10320     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
10321     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
10322     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
10323     +
10324     + valid = __unionfs_d_revalidate(dentry, parent, false);
10325     + if (unlikely(!valid)) {
10326     + err = -ESTALE;
10327     + goto out;
10328     + }
10329     + unionfs_check_dentry(dentry);
10330     +
10331     + /* check if this unionfs directory is empty or not */
10332     + err = check_empty(dentry, parent, &namelist);
10333     + if (err)
10334     + goto out;
10335     +
10336     + err = unionfs_rmdir_first(dir, dentry, namelist);
10337     + dstart = dbstart(dentry);
10338     + dend = dbend(dentry);
10339     + /*
10340     + * We create a whiteout for the directory if there was an error to
10341     + * rmdir the first directory entry in the union. Otherwise, we
10342     + * create a whiteout only if there is no chance that a lower
10343     + * priority branch might also have the same named directory. IOW,
10344     + * if there is not another same-named directory at a lower priority
10345     + * branch, then we don't need to create a whiteout for it.
10346     + */
10347     + if (!err) {
10348     + if (dstart < dend)
10349     + err = create_whiteout(dentry, dstart);
10350     + } else {
10351     + int new_err;
10352     +
10353     + if (dstart == 0)
10354     + goto out;
10355     +
10356     + /* exit if the error returned was NOT -EROFS */
10357     + if (!IS_COPYUP_ERR(err))
10358     + goto out;
10359     +
10360     + new_err = create_whiteout(dentry, dstart - 1);
10361     + if (new_err != -EEXIST)
10362     + err = new_err;
10363     + }
10364     +
10365     +out:
10366     + /*
10367     + * Drop references to lower dentry/inode so storage space for them
10368     + * can be reclaimed. Then, call d_drop so the system "forgets"
10369     + * about us.
10370     + */
10371     + if (!err) {
10372     + iput_lowers_all(dentry->d_inode, false);
10373     + dput(unionfs_lower_dentry_idx(dentry, dstart));
10374     + unionfs_set_lower_dentry_idx(dentry, dstart, NULL);
10375     + d_drop(dentry);
10376     + /* update our lower vfsmnts, in case a copyup took place */
10377     + unionfs_postcopyup_setmnt(dentry);
10378     + unionfs_check_dentry(dentry);
10379     + unionfs_check_inode(dir);
10380     + }
10381     +
10382     + if (namelist)
10383     + free_rdstate(namelist);
10384     +
10385     + unionfs_unlock_dentry(dentry);
10386     + unionfs_unlock_parent(dentry, parent);
10387     + unionfs_read_unlock(dentry->d_sb);
10388     + return err;
10389     +}
10390     diff --git a/fs/unionfs/whiteout.c b/fs/unionfs/whiteout.c
10391     new file mode 100644
10392     index 0000000..405073a
10393     --- /dev/null
10394     +++ b/fs/unionfs/whiteout.c
10395     @@ -0,0 +1,584 @@
10396     +/*
10397     + * Copyright (c) 2003-2010 Erez Zadok
10398     + * Copyright (c) 2003-2006 Charles P. Wright
10399     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10400     + * Copyright (c) 2005-2006 Junjiro Okajima
10401     + * Copyright (c) 2005 Arun M. Krishnakumar
10402     + * Copyright (c) 2004-2006 David P. Quigley
10403     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
10404     + * Copyright (c) 2003 Puja Gupta
10405     + * Copyright (c) 2003 Harikesavan Krishnan
10406     + * Copyright (c) 2003-2010 Stony Brook University
10407     + * Copyright (c) 2003-2010 The Research Foundation of SUNY
10408     + *
10409     + * This program is free software; you can redistribute it and/or modify
10410     + * it under the terms of the GNU General Public License version 2 as
10411     + * published by the Free Software Foundation.
10412     + */
10413     +
10414     +#include "union.h"
10415     +
10416     +/*
10417     + * whiteout and opaque directory helpers
10418     + */
10419     +
10420     +/* What do we use for whiteouts. */
10421     +#define UNIONFS_WHPFX ".wh."
10422     +#define UNIONFS_WHLEN 4
10423     +/*
10424     + * If a directory contains this file, then it is opaque. We start with the
10425     + * .wh. flag so that it is blocked by lookup.
10426     + */
10427     +#define UNIONFS_DIR_OPAQUE_NAME "__dir_opaque"
10428     +#define UNIONFS_DIR_OPAQUE UNIONFS_WHPFX UNIONFS_DIR_OPAQUE_NAME
10429     +
10430     +/* construct whiteout filename */
10431     +char *alloc_whname(const char *name, int len)
10432     +{
10433     + char *buf;
10434     +
10435     + buf = kmalloc(len + UNIONFS_WHLEN + 1, GFP_KERNEL);
10436     + if (unlikely(!buf))
10437     + return ERR_PTR(-ENOMEM);
10438     +
10439     + strcpy(buf, UNIONFS_WHPFX);
10440     + strlcat(buf, name, len + UNIONFS_WHLEN + 1);
10441     +
10442     + return buf;
10443     +}
10444     +
10445     +/*
10446     + * XXX: this can be inline or CPP macro, but is here to keep all whiteout
10447     + * code in one place.
10448     + */
10449     +void unionfs_set_max_namelen(long *namelen)
10450     +{
10451     + *namelen -= UNIONFS_WHLEN;
10452     +}
10453     +
10454     +/* check if @namep is a whiteout, update @namep and @namelenp accordingly */
10455     +bool is_whiteout_name(char **namep, int *namelenp)
10456     +{
10457     + if (*namelenp > UNIONFS_WHLEN &&
10458     + !strncmp(*namep, UNIONFS_WHPFX, UNIONFS_WHLEN)) {
10459     + *namep += UNIONFS_WHLEN;
10460     + *namelenp -= UNIONFS_WHLEN;
10461     + return true;
10462     + }
10463     + return false;
10464     +}
10465     +
10466     +/* is the filename valid == !(whiteout for a file or opaque dir marker) */
10467     +bool is_validname(const char *name)
10468     +{
10469     + if (!strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN))
10470     + return false;
10471     + if (!strncmp(name, UNIONFS_DIR_OPAQUE_NAME,
10472     + sizeof(UNIONFS_DIR_OPAQUE_NAME) - 1))
10473     + return false;
10474     + return true;
10475     +}
10476     +
10477     +/*
10478     + * Look for a whiteout @name in @lower_parent directory. If error, return
10479     + * ERR_PTR. Caller must dput() the returned dentry if not an error.
10480     + *
10481     + * XXX: some callers can reuse the whname allocated buffer to avoid repeated
10482     + * free then re-malloc calls. Need to provide a different API for those
10483     + * callers.
10484     + */
10485     +struct dentry *lookup_whiteout(const char *name, struct dentry *lower_parent)
10486     +{
10487     + char *whname = NULL;
10488     + int err = 0, namelen;
10489     + struct dentry *wh_dentry = NULL;
10490     +
10491     + namelen = strlen(name);
10492     + whname = alloc_whname(name, namelen);
10493     + if (unlikely(IS_ERR(whname))) {
10494     + err = PTR_ERR(whname);
10495     + goto out;
10496     + }
10497     +
10498     + /* check if whiteout exists in this branch: lookup .wh.foo */
10499     + wh_dentry = lookup_lck_len(whname, lower_parent, strlen(whname));
10500     + if (IS_ERR(wh_dentry)) {
10501     + err = PTR_ERR(wh_dentry);
10502     + goto out;
10503     + }
10504     +
10505     + /* check if negative dentry (ENOENT) */
10506     + if (!wh_dentry->d_inode)
10507     + goto out;
10508     +
10509     + /* whiteout found: check if valid type */
10510     + if (!S_ISREG(wh_dentry->d_inode->i_mode)) {
10511     + printk(KERN_ERR "unionfs: invalid whiteout %s entry type %d\n",
10512     + whname, wh_dentry->d_inode->i_mode);
10513     + dput(wh_dentry);
10514     + err = -EIO;
10515     + goto out;
10516     + }
10517     +
10518     +out:
10519     + kfree(whname);
10520     + if (err)
10521     + wh_dentry = ERR_PTR(err);
10522     + return wh_dentry;
10523     +}
10524     +
10525     +/* find and return first whiteout in parent directory, else ENOENT */
10526     +struct dentry *find_first_whiteout(struct dentry *dentry)
10527     +{
10528     + int bindex, bstart, bend;
10529     + struct dentry *parent, *lower_parent, *wh_dentry;
10530     +
10531     + parent = dget_parent(dentry);
10532     +
10533     + bstart = dbstart(parent);
10534     + bend = dbend(parent);
10535     + wh_dentry = ERR_PTR(-ENOENT);
10536     +
10537     + for (bindex = bstart; bindex <= bend; bindex++) {
10538     + lower_parent = unionfs_lower_dentry_idx(parent, bindex);
10539     + if (!lower_parent)
10540     + continue;
10541     + wh_dentry = lookup_whiteout(dentry->d_name.name, lower_parent);
10542     + if (IS_ERR(wh_dentry))
10543     + continue;
10544     + if (wh_dentry->d_inode)
10545     + break;
10546     + dput(wh_dentry);
10547     + wh_dentry = ERR_PTR(-ENOENT);
10548     + }
10549     +
10550     + dput(parent);
10551     +
10552     + return wh_dentry;
10553     +}
10554     +
10555     +/*
10556     + * Unlink a whiteout dentry. Returns 0 or -errno. Caller must hold and
10557     + * release dentry reference.
10558     + */
10559     +int unlink_whiteout(struct dentry *wh_dentry)
10560     +{
10561     + int err;
10562     + struct dentry *lower_dir_dentry;
10563     +
10564     + /* dget and lock parent dentry */
10565     + lower_dir_dentry = lock_parent_wh(wh_dentry);
10566     +
10567     + /* see Documentation/filesystems/unionfs/issues.txt */
10568     + lockdep_off();
10569     + err = vfs_unlink(lower_dir_dentry->d_inode, wh_dentry);
10570     + lockdep_on();
10571     + unlock_dir(lower_dir_dentry);
10572     +
10573     + /*
10574     + * Whiteouts are special files and should be deleted no matter what
10575     + * (as if they never existed), in order to allow this create
10576     + * operation to succeed. This is especially important in sticky
10577     + * directories: a whiteout may have been created by one user, but
10578     + * the newly created file may be created by another user.
10579     + * Therefore, in order to maintain Unix semantics, if the vfs_unlink
10580     + * above failed, then we have to try to directly unlink the
10581     + * whiteout. Note: in the ODF version of unionfs, whiteout are
10582     + * handled much more cleanly.
10583     + */
10584     + if (err == -EPERM) {
10585     + struct inode *inode = lower_dir_dentry->d_inode;
10586     + err = inode->i_op->unlink(inode, wh_dentry);
10587     + }
10588     + if (err)
10589     + printk(KERN_ERR "unionfs: could not unlink whiteout %s, "
10590     + "err = %d\n", wh_dentry->d_name.name, err);
10591     +
10592     + return err;
10593     +
10594     +}
10595     +
10596     +/*
10597     + * Helper function when creating new objects (create, symlink, mknod, etc.).
10598     + * Checks to see if there's a whiteout in @lower_dentry's parent directory,
10599     + * whose name is taken from @dentry. Then tries to remove that whiteout, if
10600     + * found. If <dentry,bindex> is a branch marked readonly, return -EROFS.
10601     + * If it finds both a regular file and a whiteout, return -EIO (this should
10602     + * never happen).
10603     + *
10604     + * Return 0 if no whiteout was found. Return 1 if one was found and
10605     + * successfully removed. Therefore a value >= 0 tells the caller that
10606     + * @lower_dentry belongs to a good branch to create the new object in).
10607     + * Return -ERRNO if an error occurred during whiteout lookup or in trying to
10608     + * unlink the whiteout.
10609     + */
10610     +int check_unlink_whiteout(struct dentry *dentry, struct dentry *lower_dentry,
10611     + int bindex)
10612     +{
10613     + int err;
10614     + struct dentry *wh_dentry = NULL;
10615     + struct dentry *lower_dir_dentry = NULL;
10616     +
10617     + /* look for whiteout dentry first */
10618     + lower_dir_dentry = dget_parent(lower_dentry);
10619     + wh_dentry = lookup_whiteout(dentry->d_name.name, lower_dir_dentry);
10620     + dput(lower_dir_dentry);
10621     + if (IS_ERR(wh_dentry)) {
10622     + err = PTR_ERR(wh_dentry);
10623     + goto out;
10624     + }
10625     +
10626     + if (!wh_dentry->d_inode) { /* no whiteout exists*/
10627     + err = 0;
10628     + goto out_dput;
10629     + }
10630     +
10631     + /* check if regular file and whiteout were both found */
10632     + if (unlikely(lower_dentry->d_inode)) {
10633     + err = -EIO;
10634     + printk(KERN_ERR "unionfs: found both whiteout and regular "
10635     + "file in directory %s (branch %d)\n",
10636     + lower_dir_dentry->d_name.name, bindex);
10637     + goto out_dput;
10638     + }
10639     +
10640     + /* check if branch is writeable */
10641     + err = is_robranch_super(dentry->d_sb, bindex);
10642     + if (err)
10643     + goto out_dput;
10644     +
10645     + /* .wh.foo has been found, so let's unlink it */
10646     + err = unlink_whiteout(wh_dentry);
10647     + if (!err)
10648     + err = 1; /* a whiteout was found and successfully removed */
10649     +out_dput:
10650     + dput(wh_dentry);
10651     +out:
10652     + return err;
10653     +}
10654     +
10655     +/*
10656     + * Pass an unionfs dentry and an index. It will try to create a whiteout
10657     + * for the filename in dentry, and will try in branch 'index'. On error,
10658     + * it will proceed to a branch to the left.
10659     + */
10660     +int create_whiteout(struct dentry *dentry, int start)
10661     +{
10662     + int bstart, bend, bindex;
10663     + struct dentry *lower_dir_dentry;
10664     + struct dentry *lower_dentry;
10665     + struct dentry *lower_wh_dentry;
10666     + struct nameidata nd;
10667     + char *name = NULL;
10668     + int err = -EINVAL;
10669     +
10670     + verify_locked(dentry);
10671     +
10672     + bstart = dbstart(dentry);
10673     + bend = dbend(dentry);
10674     +
10675     + /* create dentry's whiteout equivalent */
10676     + name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
10677     + if (unlikely(IS_ERR(name))) {
10678     + err = PTR_ERR(name);
10679     + goto out;
10680     + }
10681     +
10682     + for (bindex = start; bindex >= 0; bindex--) {
10683     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10684     +
10685     + if (!lower_dentry) {
10686     + /*
10687     + * if lower dentry is not present, create the
10688     + * entire lower dentry directory structure and go
10689     + * ahead. Since we want to just create whiteout, we
10690     + * only want the parent dentry, and hence get rid of
10691     + * this dentry.
10692     + */
10693     + lower_dentry = create_parents(dentry->d_inode,
10694     + dentry,
10695     + dentry->d_name.name,
10696     + bindex);
10697     + if (!lower_dentry || IS_ERR(lower_dentry)) {
10698     + int ret = PTR_ERR(lower_dentry);
10699     + if (!IS_COPYUP_ERR(ret))
10700     + printk(KERN_ERR
10701     + "unionfs: create_parents for "
10702     + "whiteout failed: bindex=%d "
10703     + "err=%d\n", bindex, ret);
10704     + continue;
10705     + }
10706     + }
10707     +
10708     + lower_wh_dentry =
10709     + lookup_lck_len(name, lower_dentry->d_parent,
10710     + dentry->d_name.len + UNIONFS_WHLEN);
10711     + if (IS_ERR(lower_wh_dentry))
10712     + continue;
10713     +
10714     + /*
10715     + * The whiteout already exists. This used to be impossible,
10716     + * but now is possible because of opaqueness.
10717     + */
10718     + if (lower_wh_dentry->d_inode) {
10719     + dput(lower_wh_dentry);
10720     + err = 0;
10721     + goto out;
10722     + }
10723     +
10724     + err = init_lower_nd(&nd, LOOKUP_CREATE);
10725     + if (unlikely(err < 0))
10726     + goto out;
10727     + lower_dir_dentry = lock_parent_wh(lower_wh_dentry);
10728     + err = is_robranch_super(dentry->d_sb, bindex);
10729     + if (!err)
10730     + err = vfs_create(lower_dir_dentry->d_inode,
10731     + lower_wh_dentry,
10732     + current_umask() & S_IRUGO,
10733     + &nd);
10734     + unlock_dir(lower_dir_dentry);
10735     + dput(lower_wh_dentry);
10736     + release_lower_nd(&nd, err);
10737     +
10738     + if (!err || !IS_COPYUP_ERR(err))
10739     + break;
10740     + }
10741     +
10742     + /* set dbopaque so that lookup will not proceed after this branch */
10743     + if (!err)
10744     + dbopaque(dentry) = bindex;
10745     +
10746     +out:
10747     + kfree(name);
10748     + return err;
10749     +}
10750     +
10751     +/*
10752     + * Delete all of the whiteouts in a given directory for rmdir.
10753     + *
10754     + * lower directory inode should be locked
10755     + */
10756     +static int do_delete_whiteouts(struct dentry *dentry, int bindex,
10757     + struct unionfs_dir_state *namelist)
10758     +{
10759     + int err = 0;
10760     + struct dentry *lower_dir_dentry = NULL;
10761     + struct dentry *lower_dentry;
10762     + char *name = NULL, *p;
10763     + struct inode *lower_dir;
10764     + int i;
10765     + struct list_head *pos;
10766     + struct filldir_node *cursor;
10767     +
10768     + /* Find out lower parent dentry */
10769     + lower_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10770     + BUG_ON(!S_ISDIR(lower_dir_dentry->d_inode->i_mode));
10771     + lower_dir = lower_dir_dentry->d_inode;
10772     + BUG_ON(!S_ISDIR(lower_dir->i_mode));
10773     +
10774     + err = -ENOMEM;
10775     + name = __getname();
10776     + if (unlikely(!name))
10777     + goto out;
10778     + strcpy(name, UNIONFS_WHPFX);
10779     + p = name + UNIONFS_WHLEN;
10780     +
10781     + err = 0;
10782     + for (i = 0; !err && i < namelist->size; i++) {
10783     + list_for_each(pos, &namelist->list[i]) {
10784     + cursor =
10785     + list_entry(pos, struct filldir_node,
10786     + file_list);
10787     + /* Only operate on whiteouts in this branch. */
10788     + if (cursor->bindex != bindex)
10789     + continue;
10790     + if (!cursor->whiteout)
10791     + continue;
10792     +
10793     + strlcpy(p, cursor->name, PATH_MAX - UNIONFS_WHLEN);
10794     + lower_dentry =
10795     + lookup_lck_len(name, lower_dir_dentry,
10796     + cursor->namelen +
10797     + UNIONFS_WHLEN);
10798     + if (IS_ERR(lower_dentry)) {
10799     + err = PTR_ERR(lower_dentry);
10800     + break;
10801     + }
10802     + if (lower_dentry->d_inode)
10803     + err = vfs_unlink(lower_dir, lower_dentry);
10804     + dput(lower_dentry);
10805     + if (err)
10806     + break;
10807     + }
10808     + }
10809     +
10810     + __putname(name);
10811     +
10812     + /* After all of the removals, we should copy the attributes once. */
10813     + fsstack_copy_attr_times(dentry->d_inode, lower_dir_dentry->d_inode);
10814     +
10815     +out:
10816     + return err;
10817     +}
10818     +
10819     +
10820     +void __delete_whiteouts(struct work_struct *work)
10821     +{
10822     + struct sioq_args *args = container_of(work, struct sioq_args, work);
10823     + struct deletewh_args *d = &args->deletewh;
10824     +
10825     + args->err = do_delete_whiteouts(d->dentry, d->bindex, d->namelist);
10826     + complete(&args->comp);
10827     +}
10828     +
10829     +/* delete whiteouts in a dir (for rmdir operation) using sioq if necessary */
10830     +int delete_whiteouts(struct dentry *dentry, int bindex,
10831     + struct unionfs_dir_state *namelist)
10832     +{
10833     + int err;
10834     + struct super_block *sb;
10835     + struct dentry *lower_dir_dentry;
10836     + struct inode *lower_dir;
10837     + struct sioq_args args;
10838     +
10839     + sb = dentry->d_sb;
10840     +
10841     + BUG_ON(!S_ISDIR(dentry->d_inode->i_mode));
10842     + BUG_ON(bindex < dbstart(dentry));
10843     + BUG_ON(bindex > dbend(dentry));
10844     + err = is_robranch_super(sb, bindex);
10845     + if (err)
10846     + goto out;
10847     +
10848     + lower_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10849     + BUG_ON(!S_ISDIR(lower_dir_dentry->d_inode->i_mode));
10850     + lower_dir = lower_dir_dentry->d_inode;
10851     + BUG_ON(!S_ISDIR(lower_dir->i_mode));
10852     +
10853     + if (!inode_permission(lower_dir, MAY_WRITE | MAY_EXEC)) {
10854     + err = do_delete_whiteouts(dentry, bindex, namelist);
10855     + } else {
10856     + args.deletewh.namelist = namelist;
10857     + args.deletewh.dentry = dentry;
10858     + args.deletewh.bindex = bindex;
10859     + run_sioq(__delete_whiteouts, &args);
10860     + err = args.err;
10861     + }
10862     +
10863     +out:
10864     + return err;
10865     +}
10866     +
10867     +/****************************************************************************
10868     + * Opaque directory helpers *
10869     + ****************************************************************************/
10870     +
10871     +/*
10872     + * is_opaque_dir: returns 0 if it is NOT an opaque dir, 1 if it is, and
10873     + * -errno if an error occurred trying to figure this out.
10874     + */
10875     +int is_opaque_dir(struct dentry *dentry, int bindex)
10876     +{
10877     + int err = 0;
10878     + struct dentry *lower_dentry;
10879     + struct dentry *wh_lower_dentry;
10880     + struct inode *lower_inode;
10881     + struct sioq_args args;
10882     +
10883     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10884     + lower_inode = lower_dentry->d_inode;
10885     +
10886     + BUG_ON(!S_ISDIR(lower_inode->i_mode));
10887     +
10888     + mutex_lock(&lower_inode->i_mutex);
10889     +
10890     + if (!inode_permission(lower_inode, MAY_EXEC)) {
10891     + wh_lower_dentry =
10892     + lookup_one_len(UNIONFS_DIR_OPAQUE, lower_dentry,
10893     + sizeof(UNIONFS_DIR_OPAQUE) - 1);
10894     + } else {
10895     + args.is_opaque.dentry = lower_dentry;
10896     + run_sioq(__is_opaque_dir, &args);
10897     + wh_lower_dentry = args.ret;
10898     + }
10899     +
10900     + mutex_unlock(&lower_inode->i_mutex);
10901     +
10902     + if (IS_ERR(wh_lower_dentry)) {
10903     + err = PTR_ERR(wh_lower_dentry);
10904     + goto out;
10905     + }
10906     +
10907     + /* This is an opaque dir iff wh_lower_dentry is positive */
10908     + err = !!wh_lower_dentry->d_inode;
10909     +
10910     + dput(wh_lower_dentry);
10911     +out:
10912     + return err;
10913     +}
10914     +
10915     +void __is_opaque_dir(struct work_struct *work)
10916     +{
10917     + struct sioq_args *args = container_of(work, struct sioq_args, work);
10918     +
10919     + args->ret = lookup_one_len(UNIONFS_DIR_OPAQUE, args->is_opaque.dentry,
10920     + sizeof(UNIONFS_DIR_OPAQUE) - 1);
10921     + complete(&args->comp);
10922     +}
10923     +
10924     +int make_dir_opaque(struct dentry *dentry, int bindex)
10925     +{
10926     + int err = 0;
10927     + struct dentry *lower_dentry, *diropq;
10928     + struct inode *lower_dir;
10929     + struct nameidata nd;
10930     + const struct cred *old_creds;
10931     + struct cred *new_creds;
10932     +
10933     + /*
10934     + * Opaque directory whiteout markers are special files (like regular
10935     + * whiteouts), and should appear to the users as if they don't
10936     + * exist. They should be created/deleted regardless of directory
10937     + * search/create permissions, but only for the duration of this
10938     + * creation of the .wh.__dir_opaque: file. Note, this does not
10939     + * circumvent normal ->permission).
10940     + */
10941     + new_creds = prepare_creds();
10942     + if (unlikely(!new_creds)) {
10943     + err = -ENOMEM;
10944     + goto out_err;
10945     + }
10946     + cap_raise(new_creds->cap_effective, CAP_DAC_READ_SEARCH);
10947     + cap_raise(new_creds->cap_effective, CAP_DAC_OVERRIDE);
10948     + old_creds = override_creds(new_creds);
10949     +
10950     + lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10951     + lower_dir = lower_dentry->d_inode;
10952     + BUG_ON(!S_ISDIR(dentry->d_inode->i_mode) ||
10953     + !S_ISDIR(lower_dir->i_mode));
10954     +
10955     + mutex_lock(&lower_dir->i_mutex);
10956     + diropq = lookup_one_len(UNIONFS_DIR_OPAQUE, lower_dentry,
10957     + sizeof(UNIONFS_DIR_OPAQUE) - 1);
10958     + if (IS_ERR(diropq)) {
10959     + err = PTR_ERR(diropq);
10960     + goto out;
10961     + }
10962     +
10963     + err = init_lower_nd(&nd, LOOKUP_CREATE);
10964     + if (unlikely(err < 0))
10965     + goto out;
10966     + if (!diropq->d_inode)
10967     + err = vfs_create(lower_dir, diropq, S_IRUGO, &nd);
10968     + if (!err)
10969     + dbopaque(dentry) = bindex;
10970     + release_lower_nd(&nd, err);
10971     +
10972     + dput(diropq);
10973     +
10974     +out:
10975     + mutex_unlock(&lower_dir->i_mutex);
10976     + revert_creds(old_creds);
10977     +out_err:
10978     + return err;
10979     +}
10980     diff --git a/fs/unionfs/xattr.c b/fs/unionfs/xattr.c
10981     new file mode 100644
10982     index 0000000..9002e06
10983     --- /dev/null
10984     +++ b/fs/unionfs/xattr.c
10985     @@ -0,0 +1,173 @@
10986     +/*
10987     + * Copyright (c) 2003-2010 Erez Zadok
10988     + * Copyright (c) 2003-2006 Charles P. Wright
10989     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10990     + * Copyright (c) 2005-2006 Junjiro Okajima
10991     + * Copyright (c) 2005 Arun M. Krishnakumar
10992     + * Copyright (c) 2004-2006 David P. Quigley
10993     + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
10994     + * Copyright (c) 2003 Puja Gupta
10995     + * Copyright (c) 2003 Harikesavan Krishnan
10996     + * Copyright (c) 2003-2010 Stony Brook University
10997     + * Copyright (c) 2003-2010 The Research Foundation of SUNY
10998     + *
10999     + * This program is free software; you can redistribute it and/or modify
11000     + * it under the terms of the GNU General Public License version 2 as
11001     + * published by the Free Software Foundation.
11002     + */
11003     +
11004     +#include "union.h"
11005     +
11006     +/* This is lifted from fs/xattr.c */
11007     +void *unionfs_xattr_alloc(size_t size, size_t limit)
11008     +{
11009     + void *ptr;
11010     +
11011     + if (size > limit)
11012     + return ERR_PTR(-E2BIG);
11013     +
11014     + if (!size) /* size request, no buffer is needed */
11015     + return NULL;
11016     +
11017     + ptr = kmalloc(size, GFP_KERNEL);
11018     + if (unlikely(!ptr))
11019     + return ERR_PTR(-ENOMEM);
11020     + return ptr;
11021     +}
11022     +
11023     +/*
11024     + * BKL held by caller.
11025     + * dentry->d_inode->i_mutex locked
11026     + */
11027     +ssize_t unionfs_getxattr(struct dentry *dentry, const char *name, void *value,
11028     + size_t size)
11029     +{
11030     + struct dentry *lower_dentry = NULL;
11031     + struct dentry *parent;
11032     + int err = -EOPNOTSUPP;
11033     + bool valid;
11034     +
11035     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
11036     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
11037     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
11038     +
11039     + valid = __unionfs_d_revalidate(dentry, parent, false);
11040     + if (unlikely(!valid)) {
11041     + err = -ESTALE;
11042     + goto out;
11043     + }
11044     +
11045     + lower_dentry = unionfs_lower_dentry(dentry);
11046     +
11047     + err = vfs_getxattr(lower_dentry, (char *) name, value, size);
11048     +
11049     +out:
11050     + unionfs_check_dentry(dentry);
11051     + unionfs_unlock_dentry(dentry);
11052     + unionfs_unlock_parent(dentry, parent);
11053     + unionfs_read_unlock(dentry->d_sb);
11054     + return err;
11055     +}
11056     +
11057     +/*
11058     + * BKL held by caller.
11059     + * dentry->d_inode->i_mutex locked
11060     + */
11061     +int unionfs_setxattr(struct dentry *dentry, const char *name,
11062     + const void *value, size_t size, int flags)
11063     +{
11064     + struct dentry *lower_dentry = NULL;
11065     + struct dentry *parent;
11066     + int err = -EOPNOTSUPP;
11067     + bool valid;
11068     +
11069     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
11070     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
11071     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
11072     +
11073     + valid = __unionfs_d_revalidate(dentry, parent, false);
11074     + if (unlikely(!valid)) {
11075     + err = -ESTALE;
11076     + goto out;
11077     + }
11078     +
11079     + lower_dentry = unionfs_lower_dentry(dentry);
11080     +
11081     + err = vfs_setxattr(lower_dentry, (char *) name, (void *) value,
11082     + size, flags);
11083     +
11084     +out:
11085     + unionfs_check_dentry(dentry);
11086     + unionfs_unlock_dentry(dentry);
11087     + unionfs_unlock_parent(dentry, parent);
11088     + unionfs_read_unlock(dentry->d_sb);
11089     + return err;
11090     +}
11091     +
11092     +/*
11093     + * BKL held by caller.
11094     + * dentry->d_inode->i_mutex locked
11095     + */
11096     +int unionfs_removexattr(struct dentry *dentry, const char *name)
11097     +{
11098     + struct dentry *lower_dentry = NULL;
11099     + struct dentry *parent;
11100     + int err = -EOPNOTSUPP;
11101     + bool valid;
11102     +
11103     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
11104     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
11105     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
11106     +
11107     + valid = __unionfs_d_revalidate(dentry, parent, false);
11108     + if (unlikely(!valid)) {
11109     + err = -ESTALE;
11110     + goto out;
11111     + }
11112     +
11113     + lower_dentry = unionfs_lower_dentry(dentry);
11114     +
11115     + err = vfs_removexattr(lower_dentry, (char *) name);
11116     +
11117     +out:
11118     + unionfs_check_dentry(dentry);
11119     + unionfs_unlock_dentry(dentry);
11120     + unionfs_unlock_parent(dentry, parent);
11121     + unionfs_read_unlock(dentry->d_sb);
11122     + return err;
11123     +}
11124     +
11125     +/*
11126     + * BKL held by caller.
11127     + * dentry->d_inode->i_mutex locked
11128     + */
11129     +ssize_t unionfs_listxattr(struct dentry *dentry, char *list, size_t size)
11130     +{
11131     + struct dentry *lower_dentry = NULL;
11132     + struct dentry *parent;
11133     + int err = -EOPNOTSUPP;
11134     + char *encoded_list = NULL;
11135     + bool valid;
11136     +
11137     + unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
11138     + parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
11139     + unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
11140     +
11141     + valid = __unionfs_d_revalidate(dentry, parent, false);
11142     + if (unlikely(!valid)) {
11143     + err = -ESTALE;
11144     + goto out;
11145     + }
11146     +
11147     + lower_dentry = unionfs_lower_dentry(dentry);
11148     +
11149     + encoded_list = list;
11150     + err = vfs_listxattr(lower_dentry, encoded_list, size);
11151     +
11152     +out:
11153     + unionfs_check_dentry(dentry);
11154     + unionfs_unlock_dentry(dentry);
11155     + unionfs_unlock_parent(dentry, parent);
11156     + unionfs_read_unlock(dentry->d_sb);
11157     + return err;
11158     +}
11159     diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
11160     index da317c7..64f1ced 100644
11161     --- a/include/linux/fs_stack.h
11162     +++ b/include/linux/fs_stack.h
11163     @@ -1,7 +1,19 @@
11164     +/*
11165     + * Copyright (c) 2006-2009 Erez Zadok
11166     + * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
11167     + * Copyright (c) 2006-2009 Stony Brook University
11168     + * Copyright (c) 2006-2009 The Research Foundation of SUNY
11169     + *
11170     + * This program is free software; you can redistribute it and/or modify
11171     + * it under the terms of the GNU General Public License version 2 as
11172     + * published by the Free Software Foundation.
11173     + */
11174     +
11175     #ifndef _LINUX_FS_STACK_H
11176     #define _LINUX_FS_STACK_H
11177    
11178     -/* This file defines generic functions used primarily by stackable
11179     +/*
11180     + * This file defines generic functions used primarily by stackable
11181     * filesystems; none of these functions require i_mutex to be held.
11182     */
11183    
11184     diff --git a/include/linux/magic.h b/include/linux/magic.h
11185     index eb9800f..9770154 100644
11186     --- a/include/linux/magic.h
11187     +++ b/include/linux/magic.h
11188     @@ -47,6 +47,8 @@
11189     #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs"
11190     #define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs"
11191    
11192     +#define UNIONFS_SUPER_MAGIC 0xf15f083d
11193     +
11194     #define SMB_SUPER_MAGIC 0x517B
11195     #define USBDEVICE_SUPER_MAGIC 0x9fa2
11196     #define CGROUP_SUPER_MAGIC 0x27e0eb
11197     diff --git a/include/linux/namei.h b/include/linux/namei.h
11198     index 05b441d..dca6f9a 100644
11199     --- a/include/linux/namei.h
11200     +++ b/include/linux/namei.h
11201     @@ -72,6 +72,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
11202    
11203     extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
11204     int (*open)(struct inode *, struct file *));
11205     +extern void release_open_intent(struct nameidata *);
11206    
11207     extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
11208    
11209     diff --git a/include/linux/splice.h b/include/linux/splice.h
11210     index 997c3b4..54f5501 100644
11211     --- a/include/linux/splice.h
11212     +++ b/include/linux/splice.h
11213     @@ -81,6 +81,11 @@ extern ssize_t splice_to_pipe(struct pipe_inode_info *,
11214     struct splice_pipe_desc *);
11215     extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
11216     splice_direct_actor *);
11217     +extern long vfs_splice_from(struct pipe_inode_info *pipe, struct file *out,
11218     + loff_t *ppos, size_t len, unsigned int flags);
11219     +extern long vfs_splice_to(struct file *in, loff_t *ppos,
11220     + struct pipe_inode_info *pipe, size_t len,
11221     + unsigned int flags);
11222    
11223     /*
11224     * for dynamic pipe sizing
11225     diff --git a/include/linux/union_fs.h b/include/linux/union_fs.h
11226     new file mode 100644
11227     index 0000000..c84d97e
11228     --- /dev/null
11229     +++ b/include/linux/union_fs.h
11230     @@ -0,0 +1,22 @@
11231     +/*
11232     + * Copyright (c) 2003-2009 Erez Zadok
11233     + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
11234     + * Copyright (c) 2003-2009 Stony Brook University
11235     + * Copyright (c) 2003-2009 The Research Foundation of SUNY
11236     + *
11237     + * This program is free software; you can redistribute it and/or modify
11238     + * it under the terms of the GNU General Public License version 2 as
11239     + * published by the Free Software Foundation.
11240     + */
11241     +
11242     +#ifndef _LINUX_UNION_FS_H
11243     +#define _LINUX_UNION_FS_H
11244     +
11245     +/*
11246     + * DEFINITIONS FOR USER AND KERNEL CODE:
11247     + */
11248     +# define UNIONFS_IOCTL_INCGEN _IOR(0x15, 11, int)
11249     +# define UNIONFS_IOCTL_QUERYFILE _IOR(0x15, 15, int)
11250     +
11251     +#endif /* _LINUX_UNIONFS_H */
11252     +
11253     diff --git a/security/security.c b/security/security.c
11254     index 351942a..69505f7 100644
11255     --- a/security/security.c
11256     +++ b/security/security.c
11257     @@ -529,6 +529,7 @@ int security_inode_permission(struct inode *inode, int mask)
11258     return 0;
11259     return security_ops->inode_permission(inode, mask);
11260     }
11261     +EXPORT_SYMBOL(security_inode_permission);
11262    
11263     int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
11264     {