Contents of /trunk/mage/usr/lib/mage/compressdoc
Parent Directory | Revision Log
Revision 24 -
(show annotations)
(download)
Wed Jan 5 05:08:01 2005 UTC (19 years, 8 months ago) by niro
File size: 14613 byte(s)
Wed Jan 5 05:08:01 2005 UTC (19 years, 8 months ago) by niro
File size: 14613 byte(s)
mage-0.3.6-r8 import
1 | #!/bin/bash |
2 | # VERSION: 20040320.0026 |
3 | # |
4 | # Compress (with bzip2 or gzip) all man pages in a hierarchy and |
5 | # update symlinks - By Marc Heerdink <marc @ koelkast.net> |
6 | # Modified to be able to gzip or bzip2 files as an option and to deal |
7 | # with all symlinks properly by Mark Hymers <markh @ linuxfromscratch.org> |
8 | # |
9 | # Modified 20030930 by Yann E. Morin <yann.morin.1998 @ anciens.enib.fr> |
10 | # to accept compression/decompression, to correctly handle hard-links, |
11 | # to allow for changing hard-links into soft- ones, to specify the |
12 | # compression level, to parse the man.conf for all occurrences of MANPATH, |
13 | # to allow for a backup, to allow to keep the newest version of a page. |
14 | # Modified 20040330 by Tushar Teredesai to replace $0 by the name of the script. |
15 | # (Note: It is assumed that the script is in the user's PATH) |
16 | # |
17 | # TODO: |
18 | # - choose a default compress method to be based on the available |
19 | # tool : gzip or bzip2; |
20 | # - offer an option to automagically choose the best compression method |
21 | # on a per page basis (eg. check which ofgzip/bzip2/whatever is the |
22 | # most effective, page per page); |
23 | # - when a MANPATH env var exists, use this instead of /etc/man.conf |
24 | # (useful for users to (de)compress their man pages; |
25 | # - offer an option to restore a previous backup; |
26 | # - add other compression engines (compress, zip, etc?). Needed? |
27 | |
28 | # version: 0.3.6-r8 |
29 | |
30 | # Funny enough, this function prints some help. |
31 | function help () |
32 | { |
33 | if [ -n "$1" ]; then |
34 | echo "Unknown option : $1" |
35 | fi |
36 | ( echo "Usage: $MY_NAME <comp_method> [options] [dirs]" && \ |
37 | cat << EOT |
38 | Where comp_method is one of : |
39 | --gzip, --gz, -g |
40 | --bzip2, --bz2, -b |
41 | Compress using gzip or bzip2. |
42 | |
43 | --decompress, -d |
44 | Decompress the man pages. |
45 | |
46 | --backup Specify a .tar backup shall be done for every directories. |
47 | In case a backup already exists, it is saved as .tar.old prior |
48 | to making the new backup. If an .tar.old backup exist, it is |
49 | removed prior to saving the backup. |
50 | In backup mode, no other action is performed. |
51 | |
52 | And where options are : |
53 | -1 to -9, --fast, --best |
54 | The compression level, as accepted by gzip and bzip2. When not |
55 | specified, uses the default compression level for the given |
56 | method (-6 for gzip, and -9 for bzip2). Not used when in backup |
57 | or decompress modes. |
58 | |
59 | --force, -F Force (re-)compression, even if the previous one was the same |
60 | method. Useful when changing the compression ratio. By default, |
61 | a page will not be re-compressed if it ends with the same suffix |
62 | as the method adds (.bz2 for bzip2, .gz for gzip). |
63 | |
64 | --soft, -S Change hard-links into soft-links. Use with _caution_ as the |
65 | first encountered file will be used as a reference. Not used |
66 | when in backup mode. |
67 | |
68 | --hard, -H Change soft-links into hard-links. Not used when in backup mode. |
69 | |
70 | --conf=dir, --conf dir |
71 | Specify the location of man.conf. Defaults to /etc. |
72 | |
73 | --verbose, -v Verbose mode, print the name of the directory being processed. |
74 | Double the flag to turn it even more verbose, and to print the |
75 | name of the file being processed. |
76 | |
77 | --fake, -f Fakes it. Print the actual parameters compman will use. |
78 | |
79 | dirs A list of space-separated _absolute_ pathname to the man |
80 | directories. |
81 | When empty, and only then, parse ${MAN_CONF}/man.conf for all |
82 | occurrences of MANPATH. |
83 | |
84 | Note about compression |
85 | There has been a discussion on blfs-support about compression ratios of |
86 | both gzip and bzip2 on man pages, taking into account the hosting fs, |
87 | the architecture, etc... On the overall, the conclusion was that gzip |
88 | was much efficient on 'small' files, and bzip2 on 'big' files, small and |
89 | big being very dependent on the content of the files. |
90 | |
91 | See the original post from Mickael A. Peters, titled "Bootable Utility CD", |
92 | and dated 20030409.1816(+0200), and subsequent posts: |
93 | http://linuxfromscratch.org/pipermail/blfs-support/2003-April/038817.html |
94 | |
95 | On my system (x86, ext3), man pages were 35564kiB before compression. gzip -9 |
96 | compressed them down to 20372kiB (57.28%), bzip2 -9 got down to 19812kiB |
97 | (55.71%). That is a 1.57% gain in space. YMMV. |
98 | |
99 | What was not taken into consideration was the decompression speed. But does |
100 | it make sense to? You gain fast access with uncompressed man pages, or you |
101 | gain space at the expense of a slight overhead in time. Well, my P4-2.5GHz |
102 | does not even let me notice this... :-) |
103 | EOT |
104 | ) | less |
105 | } |
106 | |
107 | # This function checks that the man page is unique amongst bzip2'd, gzip'd and |
108 | # uncompressed versions. |
109 | # $1 the directory in which the file resides |
110 | # $2 the file name for the man page |
111 | # Returns 0 (true) if the file is the latest and must be taken care of, and 1 |
112 | # (false) if the file is not the latest (and has therefore been deleted). |
113 | function check_unique () |
114 | { |
115 | # NB. When there are hard-links to this file, these are |
116 | # _not_ deleted. In fact, if there are hard-links, they |
117 | # all have the same date/time, thus making them ready |
118 | # for deletion later on. |
119 | |
120 | # Build the list of all man pages with the same name |
121 | DIR=$1 |
122 | BASENAME=`basename "${2}" .bz2` |
123 | BASENAME=`basename "${BASENAME}" .gz` |
124 | GZ_FILE="$BASENAME".gz |
125 | BZ_FILE="$BASENAME".bz2 |
126 | |
127 | # Look for, and keep, the most recent one |
128 | LATEST=`(cd "$DIR"; ls -1rt "${BASENAME}" "${GZ_FILE}" "${BZ_FILE}" 2>/dev/null | tail -n 1)` |
129 | for i in "${BASENAME}" "${GZ_FILE}" "${BZ_FILE}"; do |
130 | [ "$LATEST" != "$i" ] && rm -f "$DIR"/"$i" |
131 | done |
132 | |
133 | # In case the specified file was the latest, return 0 |
134 | [ "$LATEST" = "$2" ] && return 0 |
135 | # If the file was not the latest, return 1 |
136 | return 1 |
137 | } |
138 | |
139 | # Name of the script |
140 | MY_NAME=`basename $0` |
141 | |
142 | # OK, parse the command-line for arguments, and initialize to some sensible |
143 | # state, that is : don't change links state, parse /etc/man.conf, be most |
144 | # silent, search man.conf in /etc, and don't force (re-)compression. |
145 | COMP_METHOD= |
146 | COMP_SUF= |
147 | COMP_LVL= |
148 | FORCE_OPT= |
149 | LN_OPT= |
150 | MAN_DIR= |
151 | VERBOSE_LVL=0 |
152 | BACKUP=no |
153 | FAKE=no |
154 | MAN_CONF=/etc |
155 | while [ -n "$1" ]; do |
156 | case $1 in |
157 | --gzip|--gz|-g) |
158 | COMP_SUF=.gz |
159 | COMP_METHOD=$1 |
160 | shift |
161 | ;; |
162 | --bzip2|--bz2|-b) |
163 | COMP_SUF=.bz2 |
164 | COMP_METHOD=$1 |
165 | shift |
166 | ;; |
167 | --decompress|-d) |
168 | COMP_SUF= |
169 | COMP_LVL= |
170 | COMP_METHOD=$1 |
171 | shift |
172 | ;; |
173 | -[1-9]|--fast|--best) |
174 | COMP_LVL=$1 |
175 | shift |
176 | ;; |
177 | --force|-F) |
178 | FORCE_OPT=-F |
179 | shift |
180 | ;; |
181 | --soft|-S) |
182 | LN_OPT=-S |
183 | shift |
184 | ;; |
185 | --hard|-H) |
186 | LN_OPT=-H |
187 | shift |
188 | ;; |
189 | --conf=*) |
190 | MAN_CONF=`echo $1 | cut -d '=' -f2-` |
191 | shift |
192 | ;; |
193 | --conf) |
194 | MAN_CONF="$2" |
195 | shift 2 |
196 | ;; |
197 | --verbose|-v) |
198 | let VERBOSE_LVL++ |
199 | shift |
200 | ;; |
201 | --backup) |
202 | BACKUP=yes |
203 | shift |
204 | ;; |
205 | --fake|-f) |
206 | FAKE=yes |
207 | shift |
208 | ;; |
209 | --help|-h) |
210 | help |
211 | exit 0 |
212 | ;; |
213 | /*) |
214 | MAN_DIR="${MAN_DIR} ${1}" |
215 | shift |
216 | ;; |
217 | -*) |
218 | help $1 |
219 | exit 1 |
220 | ;; |
221 | *) |
222 | echo "\"$1\" is not an absolute path name" |
223 | exit 1 |
224 | ;; |
225 | esac |
226 | done |
227 | |
228 | # Redirections |
229 | case $VERBOSE_LVL in |
230 | 0) |
231 | # O, be silent |
232 | DEST_FD0=/dev/null |
233 | DEST_FD1=/dev/null |
234 | VERBOSE_OPT= |
235 | ;; |
236 | 1) |
237 | # 1, be a bit verbose |
238 | DEST_FD0=/dev/stdout |
239 | DEST_FD1=/dev/null |
240 | VERBOSE_OPT=-v |
241 | ;; |
242 | *) |
243 | # 2 and above, be most verbose |
244 | DEST_FD0=/dev/stdout |
245 | DEST_FD1=/dev/stdout |
246 | VERBOSE_OPT="-v -v" |
247 | ;; |
248 | esac |
249 | |
250 | # Note: on my machine, 'man --path' gives /usr/share/man twice, once with a trailing '/', once without. |
251 | if [ -z "$MAN_DIR" ]; then |
252 | MAN_DIR=`man --path -C "$MAN_CONF"/man.conf \ |
253 | | sed 's/:/\\n/g' \ |
254 | | while read foo; do dirname "$foo"/.; done \ |
255 | | sort -u \ |
256 | | while read bar; do echo -n "$bar "; done` |
257 | fi |
258 | |
259 | # If no MANPATH in ${MAN_CONF}/man.conf, abort as well |
260 | if [ -z "$MAN_DIR" ]; then |
261 | echo "No directory specified, and no directory found with \`man --path'" |
262 | exit 1 |
263 | fi |
264 | |
265 | # Fake? |
266 | if [ "$FAKE" != "no" ]; then |
267 | echo "Actual parameters used:" |
268 | echo -n "Compression.......: " |
269 | case $COMP_METHOD in |
270 | --bzip2|--bz2|-b) echo -n "bzip2";; |
271 | --gzip|__gz|-g) echo -n "gzip";; |
272 | --decompress|-d) echo -n "decompressing";; |
273 | *) echo -n "unknown";; |
274 | esac |
275 | echo " ($COMP_METHOD)" |
276 | echo "Compression level.: $COMP_LVL" |
277 | echo "Compression suffix: $COMP_SUF" |
278 | echo -n "Force compression.: " |
279 | [ "foo$FORCE_OPT" = "foo-F" ] && echo "yes" || echo "no" |
280 | echo "man.conf is.......: ${MAN_CONF}/man.conf" |
281 | echo -n "Hard-links........: " |
282 | [ "foo$LN_OPT" = "foo-S" ] && echo "convert to soft-links" || echo "leave as is" |
283 | echo -n "Soft-links........: " |
284 | [ "foo$LN_OPT" = "foo-H" ] && echo "convert to hard-links" || echo "leave as is" |
285 | echo "Backup............: $BACKUP" |
286 | echo "Faking (yes!).....: $FAKE" |
287 | echo "Directories.......: $MAN_DIR" |
288 | echo "Verbosity level...: $VERBOSE_LVL" |
289 | exit 0 |
290 | fi |
291 | |
292 | # If no method was specified, print help |
293 | if [ -z "${COMP_METHOD}" -a "${BACKUP}" = "no" ]; then |
294 | help |
295 | exit 1 |
296 | fi |
297 | |
298 | # In backup mode, do the backup solely |
299 | if [ "$BACKUP" = "yes" ]; then |
300 | for DIR in $MAN_DIR; do |
301 | cd "${DIR}/.." |
302 | DIR_NAME=`basename "${DIR}"` |
303 | echo "Backing up $DIR..." > $DEST_FD0 |
304 | [ -f "${DIR_NAME}.tar.old" ] && rm -f "${DIR_NAME}.tar.old" |
305 | [ -f "${DIR_NAME}.tar" ] && mv "${DIR_NAME}.tar" "${DIR_NAME}.tar.old" |
306 | tar cfv "${DIR_NAME}.tar" "${DIR_NAME}" > $DEST_FD1 |
307 | done |
308 | exit 0 |
309 | fi |
310 | |
311 | # I know MAN_DIR has only absolute path names |
312 | # I need to take into account the localized man, so I'm going recursive |
313 | for DIR in $MAN_DIR; do |
314 | MEM_DIR=`pwd` |
315 | cd "$DIR" |
316 | for FILE in *; do |
317 | # Fixes the case were the directory is empty |
318 | if [ "foo$FILE" = "foo*" ]; then continue; fi |
319 | |
320 | # Fixes the case when hard-links see their compression scheme change |
321 | # (from not compressed to compressed, or from bz2 to gz, or from gz to bz2) |
322 | # Also fixes the case when multiple version of the page are present, which |
323 | # are either compressed or not. |
324 | if [ ! -L "$FILE" -a ! -e "$FILE" ]; then continue; fi |
325 | |
326 | # Do not compress whatis files |
327 | if [ "$FILE" = "whatis" ]; then continue; fi |
328 | |
329 | if [ -d "$FILE" ]; then |
330 | cd "${MEM_DIR}" # Go back to where we ran "$0", in case "$0"=="./compressdoc" ... |
331 | # We are going recursive to that directory |
332 | echo "-> Entering ${DIR}/${FILE}..." > $DEST_FD0 |
333 | # I need not pass --conf, as I specify the directory to work on |
334 | # But I need exit in case of error |
335 | "$MY_NAME" ${COMP_METHOD} ${COMP_LVL} ${LN_OPT} ${VERBOSE_OPT} ${FORCE_OPT} "${DIR}/${FILE}" || exit 1 |
336 | echo "<- Leaving ${DIR}/${FILE}." > $DEST_FD1 |
337 | cd "$DIR" # Needed for the next iteration of the loop |
338 | |
339 | else # !dir |
340 | if ! check_unique "$DIR" "$FILE"; then continue; fi |
341 | |
342 | # Check if the file is already compressed with the specified method |
343 | BASE_FILE=`basename "$FILE" .gz` |
344 | BASE_FILE=`basename "$BASE_FILE" .bz2` |
345 | if [ "${FILE}" = "${BASE_FILE}${COMP_SUF}" -a "foo${FORCE_OPT}" = "foo" ]; then continue; fi |
346 | |
347 | # If we have a symlink |
348 | if [ -h "$FILE" ]; then |
349 | case "$FILE" in |
350 | *.bz2) |
351 | EXT=bz2 ;; |
352 | *.gz) |
353 | EXT=gz ;; |
354 | *) |
355 | EXT=none ;; |
356 | esac |
357 | |
358 | if [ ! "$EXT" = "none" ]; then |
359 | LINK=`ls -l "$FILE" | cut -d ">" -f2 | tr -d " " | sed s/\.$EXT$//` |
360 | NEWNAME=`echo "$FILE" | sed s/\.$EXT$//` |
361 | mv "$FILE" "$NEWNAME" |
362 | FILE="$NEWNAME" |
363 | else |
364 | LINK=`ls -l "$FILE" | cut -d ">" -f2 | tr -d " "` |
365 | fi |
366 | |
367 | if [ "$LN_OPT" = "-H" ]; then |
368 | # Change this soft-link into a hard- one |
369 | rm -f "$FILE" && ln "${LINK}$COMP_SUF" "${FILE}$COMP_SUF" |
370 | chmod --reference "${LINK}$COMP_SUF" "${FILE}$COMP_SUF" |
371 | else |
372 | # Keep this soft-link a soft- one. |
373 | rm -f "$FILE" && ln -s "${LINK}$COMP_SUF" "${FILE}$COMP_SUF" |
374 | fi |
375 | echo "Relinked $FILE" > $DEST_FD1 |
376 | |
377 | # else if we have a plain file |
378 | elif [ -f "$FILE" ]; then |
379 | # Take care of hard-links: build the list of files hard-linked |
380 | # to the one we are {de,}compressing. |
381 | # NB. This is not optimum has the file will eventually be compressed |
382 | # as many times it has hard-links. But for now, that's the safe way. |
383 | inode=`ls -li "$FILE" | awk '{print $1}'` |
384 | HLINKS=`find . \! -name "$FILE" -inum $inode` |
385 | |
386 | if [ -n "$HLINKS" ]; then |
387 | # We have hard-links! Remove them now. |
388 | for i in $HLINKS; do rm -f "$i"; done |
389 | fi |
390 | |
391 | # Now take care of the file that has no hard-link |
392 | # We do decompress first to re-compress with the selected |
393 | # compression ratio later on... |
394 | case "$FILE" in |
395 | *.bz2) |
396 | bunzip2 $FILE |
397 | FILE=`basename "$FILE" .bz2` |
398 | ;; |
399 | *.gz) |
400 | gunzip $FILE |
401 | FILE=`basename "$FILE" .gz` |
402 | ;; |
403 | esac |
404 | |
405 | # Compress the file with the given compression ratio, if needed |
406 | case $COMP_SUF in |
407 | *bz2) |
408 | bzip2 ${COMP_LVL} "$FILE" && chmod 644 "${FILE}${COMP_SUF}" |
409 | echo "Compressed $FILE" > $DEST_FD1 |
410 | ;; |
411 | *gz) |
412 | gzip ${COMP_LVL} "$FILE" && chmod 644 "${FILE}${COMP_SUF}" |
413 | echo "Compressed $FILE" > $DEST_FD1 |
414 | ;; |
415 | *) |
416 | echo "Uncompressed $FILE" > $DEST_FD1 |
417 | ;; |
418 | esac |
419 | |
420 | # If the file had hard-links, recreate those (either hard or soft) |
421 | if [ -n "$HLINKS" ]; then |
422 | for i in $HLINKS; do |
423 | NEWFILE=`echo "$i" | sed s/\.gz$// | sed s/\.bz2$//` |
424 | if [ "$LN_OPT" = "-S" ]; then |
425 | # Make this hard-link a soft- one |
426 | ln -s "${FILE}$COMP_SUF" "${NEWFILE}$COMP_SUF" |
427 | else |
428 | # Keep the hard-link a hard- one |
429 | ln "${FILE}$COMP_SUF" "${NEWFILE}$COMP_SUF" |
430 | fi |
431 | chmod 644 "${NEWFILE}$COMP_SUF" # Really work only for hard-links. Harmless for soft-links |
432 | done |
433 | fi |
434 | |
435 | else |
436 | # There is a problem when we get neither a symlink nor a plain file |
437 | # Obviously, we shall never ever come here... :-( |
438 | echo "Whaooo... \"${DIR}/${FILE}\" is neither a symlink nor a plain file. Please check:" |
439 | ls -l "${DIR}/${FILE}" |
440 | exit 1 |
441 | fi |
442 | fi |
443 | done # for FILE |
444 | done # for DIR |
Properties
Name | Value |
---|---|
svn:executable | * |