5 |
* |
* |
6 |
* Licensed under GPL version 2, see file LICENSE in this tarball for details. |
* Licensed under GPL version 2, see file LICENSE in this tarball for details. |
7 |
*/ |
*/ |
|
|
|
|
#include "libbb.h" |
|
8 |
#include <sys/vfs.h> |
#include <sys/vfs.h> |
9 |
|
#include <sys/mount.h> |
10 |
// Make up for header deficiencies. |
#include "libbb.h" |
11 |
|
// Make up for header deficiencies |
12 |
#ifndef RAMFS_MAGIC |
#ifndef RAMFS_MAGIC |
13 |
#define RAMFS_MAGIC ((unsigned)0x858458f6) |
# define RAMFS_MAGIC ((unsigned)0x858458f6) |
14 |
#endif |
#endif |
|
|
|
15 |
#ifndef TMPFS_MAGIC |
#ifndef TMPFS_MAGIC |
16 |
#define TMPFS_MAGIC ((unsigned)0x01021994) |
# define TMPFS_MAGIC ((unsigned)0x01021994) |
17 |
#endif |
#endif |
|
|
|
18 |
#ifndef MS_MOVE |
#ifndef MS_MOVE |
19 |
#define MS_MOVE 8192 |
# define MS_MOVE 8192 |
20 |
#endif |
#endif |
21 |
|
|
22 |
// Recursively delete contents of rootfs. |
// Recursively delete contents of rootfs |
23 |
static void delete_contents(const char *directory, dev_t rootdev) |
static void delete_contents(const char *directory, dev_t rootdev) |
24 |
{ |
{ |
25 |
DIR *dir; |
DIR *dir; |
30 |
if (lstat(directory, &st) || st.st_dev != rootdev) |
if (lstat(directory, &st) || st.st_dev != rootdev) |
31 |
return; |
return; |
32 |
|
|
33 |
// Recursively delete the contents of directories. |
// Recursively delete the contents of directories |
34 |
if (S_ISDIR(st.st_mode)) { |
if (S_ISDIR(st.st_mode)) { |
35 |
dir = opendir(directory); |
dir = opendir(directory); |
36 |
if (dir) { |
if (dir) { |
48 |
} |
} |
49 |
closedir(dir); |
closedir(dir); |
50 |
|
|
51 |
// Directory should now be empty. Zap it. |
// Directory should now be empty, zap it |
52 |
rmdir(directory); |
rmdir(directory); |
53 |
} |
} |
54 |
|
} else { |
55 |
// It wasn't a directory. Zap it. |
// It wasn't a directory, zap it |
56 |
} else unlink(directory); |
unlink(directory); |
57 |
|
} |
58 |
} |
} |
59 |
|
|
60 |
int switch_root_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; |
int switch_root_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; |
61 |
int switch_root_main(int argc UNUSED_PARAM, char **argv) |
int switch_root_main(int argc UNUSED_PARAM, char **argv) |
62 |
{ |
{ |
63 |
char *newroot, *console = NULL; |
char *newroot, *console = NULL; |
64 |
struct stat st1, st2; |
struct stat st; |
65 |
struct statfs stfs; |
struct statfs stfs; |
66 |
dev_t rootdev; |
dev_t rootdev; |
67 |
|
|
68 |
// Parse args (-c console) |
// Parse args (-c console) |
69 |
opt_complementary = "-2"; // minimum 2 params |
opt_complementary = "-2"; // minimum 2 params |
70 |
getopt32(argv, "+c:", &console); // '+': stop parsing at first non-option |
getopt32(argv, "+c:", &console); // '+': stop at first non-option |
71 |
argv += optind; |
argv += optind; |
|
|
|
|
// Change to new root directory and verify it's a different fs. |
|
72 |
newroot = *argv++; |
newroot = *argv++; |
73 |
|
|
74 |
|
// Change to new root directory and verify it's a different fs |
75 |
xchdir(newroot); |
xchdir(newroot); |
76 |
if (lstat(".", &st1) || lstat("/", &st2) || st1.st_dev == st2.st_dev) { |
xstat("/", &st); |
77 |
bb_error_msg_and_die("bad newroot %s", newroot); |
rootdev = st.st_dev; |
78 |
|
xstat(".", &st); |
79 |
|
if (st.st_dev == rootdev || getpid() != 1) { |
80 |
|
// Show usage, it says new root must be a mountpoint |
81 |
|
// and we must be PID 1 |
82 |
|
bb_show_usage(); |
83 |
} |
} |
|
rootdev = st2.st_dev; |
|
84 |
|
|
85 |
// Additional sanity checks: we're about to rm -rf /, so be REALLY SURE |
// Additional sanity checks: we're about to rm -rf /, so be REALLY SURE |
86 |
// we mean it. (I could make this a CONFIG option, but I would get email |
// we mean it. I could make this a CONFIG option, but I would get email |
87 |
// from all the people who WILL eat their filesystems.) |
// from all the people who WILL destroy their filesystems. |
88 |
if (lstat("/init", &st1) || !S_ISREG(st1.st_mode) || statfs("/", &stfs) |
if (stat("/init", &st) != 0 || !S_ISREG(st.st_mode)) { |
89 |
|| (((unsigned)stfs.f_type != RAMFS_MAGIC) && ((unsigned)stfs.f_type != TMPFS_MAGIC)) |
bb_error_msg_and_die("/init is not a regular file"); |
90 |
|| (getpid() != 1) |
} |
91 |
|
statfs("/", &stfs); // this never fails |
92 |
|
if ((unsigned)stfs.f_type != RAMFS_MAGIC |
93 |
|
&& (unsigned)stfs.f_type != TMPFS_MAGIC |
94 |
) { |
) { |
95 |
bb_error_msg_and_die("not rootfs"); |
bb_error_msg_and_die("root filesystem is not ramfs/tmpfs"); |
96 |
} |
} |
97 |
|
|
98 |
// Zap everything out of rootdev |
// Zap everything out of rootdev |
99 |
delete_contents("/", rootdev); |
delete_contents("/", rootdev); |
100 |
|
|
101 |
// Overmount / with newdir and chroot into it. The chdir is needed to |
// Overmount / with newdir and chroot into it |
102 |
// recalculate "." and ".." links. |
if (mount(".", "/", NULL, MS_MOVE, NULL)) { |
103 |
if (mount(".", "/", NULL, MS_MOVE, NULL)) |
// For example, fails when newroot is not a mountpoint |
104 |
bb_error_msg_and_die("error moving root"); |
bb_perror_msg_and_die("error moving root"); |
105 |
|
} |
106 |
xchroot("."); |
xchroot("."); |
107 |
|
// The chdir is needed to recalculate "." and ".." links |
108 |
xchdir("/"); |
xchdir("/"); |
109 |
|
|
110 |
// If a new console specified, redirect stdin/stdout/stderr to that. |
// If a new console specified, redirect stdin/stdout/stderr to it |
111 |
if (console) { |
if (console) { |
112 |
close(0); |
close(0); |
113 |
xopen(console, O_RDWR); |
xopen(console, O_RDWR); |
115 |
xdup2(0, 2); |
xdup2(0, 2); |
116 |
} |
} |
117 |
|
|
118 |
// Exec real init. (This is why we must be pid 1.) |
// Exec real init |
119 |
execv(argv[0], argv); |
execv(argv[0], argv); |
120 |
bb_perror_msg_and_die("bad init %s", argv[0]); |
bb_perror_msg_and_die("can't execute '%s'", argv[0]); |
121 |
} |
} |
122 |
|
|
123 |
|
/* |
124 |
|
From: Rob Landley <rob@landley.net> |
125 |
|
Date: Tue, Jun 16, 2009 at 7:47 PM |
126 |
|
Subject: Re: switch_root... |
127 |
|
|
128 |
|
... |
129 |
|
... |
130 |
|
... |
131 |
|
|
132 |
|
If you're _not_ running out of init_ramfs (if for example you're using initrd |
133 |
|
instead), you probably shouldn't use switch_root because it's the wrong tool. |
134 |
|
|
135 |
|
Basically what the sucker does is something like the following shell script: |
136 |
|
|
137 |
|
find / -xdev | xargs rm -rf |
138 |
|
cd "$1" |
139 |
|
shift |
140 |
|
mount --move . / |
141 |
|
exec chroot . "$@" |
142 |
|
|
143 |
|
There are a couple reasons that won't work as a shell script: |
144 |
|
|
145 |
|
1) If you delete the commands out of your $PATH, your shell scripts can't run |
146 |
|
more commands, but you can't start using dynamically linked _new_ commands |
147 |
|
until after you do the chroot because the path to the dynamic linker is wrong. |
148 |
|
So there's a step that needs to be sort of atomic but can't be as a shell |
149 |
|
script. (You can work around this with static linking or very carefully laid |
150 |
|
out paths and sequencing, but it's brittle, ugly, and non-obvious.) |
151 |
|
|
152 |
|
2) The "find | rm" bit will acually delete everything because the mount points |
153 |
|
still show up (even if their contents don't), and rm -rf will then happily zap |
154 |
|
that. So the first line is an oversimplification of what you need to do _not_ |
155 |
|
to descend into other filesystems and delete their contents. |
156 |
|
|
157 |
|
The reason we do this is to free up memory, by the way. Since initramfs is a |
158 |
|
ramfs, deleting its contents frees up the memory it uses. (We leave it with |
159 |
|
one remaining dentry for the new mount point, but that's ok.) |
160 |
|
|
161 |
|
Note that you cannot ever umount rootfs, for approximately the same reason you |
162 |
|
can't kill PID 1. The kernel tracks mount points as a doubly linked list, and |
163 |
|
the pointer to the start/end of that list always points to an entry that's |
164 |
|
known to be there (rootfs), so it never has to worry about moving that pointer |
165 |
|
and it never has to worry about the list being empty. (Back around 2.6.13 |
166 |
|
there _was_ a bug that let you umount rootfs, and the system locked hard the |
167 |
|
instant you did so endlessly looping to find the end of the mount list and |
168 |
|
never stopping. They fixed it.) |
169 |
|
|
170 |
|
Oh, and the reason we mount --move _and_ do the chroot is due to the way "/" |
171 |
|
works. Each process has two special symlinks, ".", and "/". Each of them |
172 |
|
points to the dentry of a directory, and give you a location paths can start |
173 |
|
from. (Historically ".." was also special, because you could enter a |
174 |
|
directory via a symlink so backing out to the directory you came from doesn't |
175 |
|
necessarily mean the one physically above where "." points to. These days I |
176 |
|
think it's just handed off to the filesystem.) |
177 |
|
|
178 |
|
Anyway, path resolution starts with "." or "/" (although the "./" at the start |
179 |
|
of the path may be implicit), meaning it's relative to one of those two |
180 |
|
directories. Your current directory, and your current root directory. The |
181 |
|
chdir() syscall changes where "." points to, and the chroot() syscall changes |
182 |
|
where "/" points to. (Again, both are per-process which is why chroot only |
183 |
|
affects your current process and its child processes.) |
184 |
|
|
185 |
|
Note that chroot() does _not_ change where "." points to, and back before they |
186 |
|
put crazy security checks into the kernel your current directory could be |
187 |
|
somewhere you could no longer access after the chroot. (The command line |
188 |
|
chroot does a cd as well, the chroot _syscall_ is what I'm talking about.) |
189 |
|
|
190 |
|
The reason mounting something new over / has no obvious effect is the same |
191 |
|
reason mounting something over your current directory has no obvious effect: |
192 |
|
the . and / links aren't recalculated after a mount, so they still point to |
193 |
|
the same dentry they did before, even if that dentry is no longer accessible |
194 |
|
by other means. Note that "cd ." is a NOP, and "chroot /" is a nop; both look |
195 |
|
up the cached dentry and set it right back. They don't re-parse any paths, |
196 |
|
because they're what all paths your process uses would be relative to. |
197 |
|
|
198 |
|
That's why the careful sequencing above: we cd into the new mount point before |
199 |
|
we do the mount --move. Moving the mount point would otherwise make it |
200 |
|
totally inaccessible to is because cd-ing to the old path wouldn't give it to |
201 |
|
us anymore, and cd "/" just gives us the cached dentry from when the process |
202 |
|
was created (in this case the old initramfs one). But the "." symlink gives |
203 |
|
us the dentry of the filesystem we just moved, so we can then "chroot ." to |
204 |
|
copy that dentry to "/" and get the new filesystem. If we _didn't_ save that |
205 |
|
dentry in "." we couldn't get it back after the mount --move. |
206 |
|
|
207 |
|
(Yes, this is all screwy and I had to email questions to Linus Torvalds to get |
208 |
|
it straight myself. I keep meaning to write up a "how mount actually works" |
209 |
|
document someday...) |
210 |
|
*/ |