Contents of /trunk/mage/usr/lib/mage/compressdoc
Parent Directory | Revision Log
Revision 1585 -
(show annotations)
(download)
Wed Dec 28 12:25:54 2011 UTC (12 years, 9 months ago) by niro
File size: 16972 byte(s)
Wed Dec 28 12:25:54 2011 UTC (12 years, 9 months ago) by niro
File size: 16972 byte(s)
-included several upstream fixes and prepare for man-db support
1 | #!/bin/bash |
2 | # VERSION: 20080421.1623 |
3 | # $LastChangedBy: dnicholson $ |
4 | # $Date: 2008-04-21 16:27:43 -0700 (Mon, 21 Apr 2008) $ |
5 | # |
6 | # Compress (with bzip2 or gzip) all man pages in a hierarchy and |
7 | # update symlinks - By Marc Heerdink <marc @ koelkast.net> |
8 | # |
9 | # Modified to be able to gzip or bzip2 files as an option and to deal |
10 | # with all symlinks properly by Mark Hymers <markh @ linuxfromscratch.org> |
11 | # |
12 | # Modified 20030930 by Yann E. Morin <yann.morin.1998 @ anciens.enib.fr> |
13 | # to accept compression/decompression, to correctly handle hard-links, |
14 | # to allow for changing hard-links into soft- ones, to specify the |
15 | # compression level, to parse the man.conf for all occurrences of MANPATH, |
16 | # to allow for a backup, to allow to keep the newest version of a page. |
17 | # |
18 | # Modified 20040330 by Tushar Teredesai to replace $0 by the name of the |
19 | # script. |
20 | # (Note: It is assumed that the script is in the user's PATH) |
21 | # |
22 | # Modified 20050112 by Randy McMurchy to shorten line lengths and |
23 | # correct grammar errors. |
24 | # |
25 | # Modified 20060128 by Alexander E. Patrakov for compatibility with Man-DB. |
26 | # |
27 | # Modified 20060311 by Archaic to use Man-DB manpath utility which is a |
28 | # replacement for man --path from Man. |
29 | # |
30 | # Modified 20080421 by Dan Nicholson to properly execute the correct |
31 | # compressdoc when working recursively. This means the same compressdoc |
32 | # will be used whether a full path was given or it was resolved from PATH. |
33 | # |
34 | # Modified 20080421 by Dan Nicholson to be more robust with directories |
35 | # that don't exist or don't have sufficient permissions. |
36 | # |
37 | # Modified 20080421 by Lars Bamberger to (sort of) automatically choose |
38 | # a compression method based on the size of the manpage. A couple bug |
39 | # fixes were added by Dan Nicholson. |
40 | # |
41 | # Modified 20080421 by Dan Nicholson to suppress warnings from manpath |
42 | # since these are emitted when $MANPATH is set. Removed the TODO for |
43 | # using the $MANPATH variable since manpath(1) handles this already. |
44 | # |
45 | # TODO: |
46 | # - choose a default compress method to be based on the available |
47 | # tool : gzip or bzip2; |
48 | # - offer an option to restore a previous backup; |
49 | # - add other compression engines (compress, zip, etc?). Needed? |
50 | |
51 | # Funny enough, this function prints some help. |
52 | function help () |
53 | { |
54 | if [ -n "$1" ]; then |
55 | echo "Unknown option : $1" |
56 | fi |
57 | ( echo "Usage: $MY_NAME <comp_method> [options] [dirs]" && \ |
58 | cat << EOT |
59 | Where comp_method is one of : |
60 | --gzip, --gz, -g |
61 | --bzip2, --bz2, -b |
62 | Compress using gzip or bzip2. |
63 | --automatic |
64 | Compress using either gzip or bzip2, depending on the |
65 | size of the file to be compressed. Files larger than 5 |
66 | kB are bzipped, files larger than 1 kB are gzipped and |
67 | files smaller than 1 kB are not compressed. |
68 | |
69 | --decompress, -d |
70 | Decompress the man pages. |
71 | |
72 | --backup Specify a .tar backup shall be done for all directories. |
73 | In case a backup already exists, it is saved as .tar.old |
74 | prior to making the new backup. If a .tar.old backup |
75 | exists, it is removed prior to saving the backup. |
76 | In backup mode, no other action is performed. |
77 | |
78 | And where options are : |
79 | -1 to -9, --fast, --best |
80 | The compression level, as accepted by gzip and bzip2. |
81 | When not specified, uses the default compression level |
82 | for the given method (-6 for gzip, and -9 for bzip2). |
83 | Not used when in backup or decompress modes. |
84 | |
85 | --force, -F Force (re-)compression, even if the previous one was |
86 | the same method. Useful when changing the compression |
87 | ratio. By default, a page will not be re-compressed if |
88 | it ends with the same suffix as the method adds |
89 | (.bz2 for bzip2, .gz for gzip). |
90 | |
91 | --soft, -S Change hard-links into soft-links. Use with _caution_ |
92 | as the first encountered file will be used as a |
93 | reference. Not used when in backup mode. |
94 | |
95 | --hard, -H Change soft-links into hard-links. Not used when in |
96 | backup mode. |
97 | |
98 | --conf=dir, --conf dir |
99 | Specify the location of man_db.conf. Defaults to /etc. |
100 | |
101 | --verbose, -v Verbose mode, print the name of the directory being |
102 | processed. Double the flag to turn it even more verbose, |
103 | and to print the name of the file being processed. |
104 | |
105 | --fake, -f Fakes it. Print the actual parameters compressdoc will use. |
106 | |
107 | dirs A list of space-separated _absolute_ pathnames to the |
108 | man directories. When empty, and only then, use manpath |
109 | to parse ${MAN_CONF}/man_db.conf for all valid occurrences |
110 | of MANDATORY_MANPATH. |
111 | |
112 | Note about compression: |
113 | There has been a discussion on blfs-support about compression ratios of |
114 | both gzip and bzip2 on man pages, taking into account the hosting fs, |
115 | the architecture, etc... On the overall, the conclusion was that gzip |
116 | was much more efficient on 'small' files, and bzip2 on 'big' files, |
117 | small and big being very dependent on the content of the files. |
118 | |
119 | See the original post from Mickael A. Peters, titled |
120 | "Bootable Utility CD", dated 20030409.1816(+0200), and subsequent posts: |
121 | http://linuxfromscratch.org/pipermail/blfs-support/2003-April/038817.html |
122 | |
123 | On my system (x86, ext3), man pages were 35564KB before compression. |
124 | gzip -9 compressed them down to 20372KB (57.28%), bzip2 -9 got down to |
125 | 19812KB (55.71%). That is a 1.57% gain in space. YMMV. |
126 | |
127 | What was not taken into consideration was the decompression speed. But |
128 | does it make sense to? You gain fast access with uncompressed man |
129 | pages, or you gain space at the expense of a slight overhead in time. |
130 | Well, my P4-2.5GHz does not even let me notice this... :-) |
131 | |
132 | EOT |
133 | ) | less |
134 | } |
135 | |
136 | # This function checks that the man page is unique amongst bzip2'd, |
137 | # gzip'd and uncompressed versions. |
138 | # $1 the directory in which the file resides |
139 | # $2 the file name for the man page |
140 | # Returns 0 (true) if the file is the latest and must be taken care of, |
141 | # and 1 (false) if the file is not the latest (and has therefore been |
142 | # deleted). |
143 | function check_unique () |
144 | { |
145 | # NB. When there are hard-links to this file, these are |
146 | # _not_ deleted. In fact, if there are hard-links, they |
147 | # all have the same date/time, thus making them ready |
148 | # for deletion later on. |
149 | |
150 | # Build the list of all man pages with the same name |
151 | DIR=$1 |
152 | BASENAME=`basename "${2}" .bz2` |
153 | BASENAME=`basename "${BASENAME}" .gz` |
154 | GZ_FILE="$BASENAME".gz |
155 | BZ_FILE="$BASENAME".bz2 |
156 | |
157 | # Look for, and keep, the most recent one |
158 | LATEST=`(cd "$DIR"; ls -1rt "${BASENAME}" "${GZ_FILE}" "${BZ_FILE}" \ |
159 | 2>/dev/null | tail -n 1)` |
160 | for i in "${BASENAME}" "${GZ_FILE}" "${BZ_FILE}"; do |
161 | [ "$LATEST" != "$i" ] && rm -f "$DIR"/"$i" |
162 | done |
163 | |
164 | # In case the specified file was the latest, return 0 |
165 | [ "$LATEST" = "$2" ] && return 0 |
166 | # If the file was not the latest, return 1 |
167 | return 1 |
168 | } |
169 | |
170 | # Name of the script |
171 | MY_NAME=`basename $0` |
172 | |
173 | # OK, parse the command-line for arguments, and initialize to some |
174 | # sensible state, that is: don't change links state, parse |
175 | # /etc/man_db.conf, be most silent, search man_db.conf in /etc, and don't |
176 | # force (re-)compression. |
177 | COMP_METHOD= |
178 | COMP_SUF= |
179 | COMP_LVL= |
180 | FORCE_OPT= |
181 | LN_OPT= |
182 | MAN_DIR= |
183 | VERBOSE_LVL=0 |
184 | BACKUP=no |
185 | FAKE=no |
186 | MAN_CONF=/etc |
187 | while [ -n "$1" ]; do |
188 | case $1 in |
189 | --gzip|--gz|-g) |
190 | COMP_SUF=.gz |
191 | COMP_METHOD=$1 |
192 | shift |
193 | ;; |
194 | --bzip2|--bz2|-b) |
195 | COMP_SUF=.bz2 |
196 | COMP_METHOD=$1 |
197 | shift |
198 | ;; |
199 | --automatic) |
200 | COMP_SUF=TBD |
201 | COMP_METHOD=$1 |
202 | shift |
203 | ;; |
204 | --decompress|-d) |
205 | COMP_SUF= |
206 | COMP_LVL= |
207 | COMP_METHOD=$1 |
208 | shift |
209 | ;; |
210 | -[1-9]|--fast|--best) |
211 | COMP_LVL=$1 |
212 | shift |
213 | ;; |
214 | --force|-F) |
215 | FORCE_OPT=-F |
216 | shift |
217 | ;; |
218 | --soft|-S) |
219 | LN_OPT=-S |
220 | shift |
221 | ;; |
222 | --hard|-H) |
223 | LN_OPT=-H |
224 | shift |
225 | ;; |
226 | --conf=*) |
227 | MAN_CONF=`echo $1 | cut -d '=' -f2-` |
228 | shift |
229 | ;; |
230 | --conf) |
231 | MAN_CONF="$2" |
232 | shift 2 |
233 | ;; |
234 | --verbose|-v) |
235 | let VERBOSE_LVL++ |
236 | shift |
237 | ;; |
238 | --backup) |
239 | BACKUP=yes |
240 | shift |
241 | ;; |
242 | --fake|-f) |
243 | FAKE=yes |
244 | shift |
245 | ;; |
246 | --help|-h) |
247 | help |
248 | exit 0 |
249 | ;; |
250 | /*) |
251 | MAN_DIR="${MAN_DIR} ${1}" |
252 | shift |
253 | ;; |
254 | -*) |
255 | help $1 |
256 | exit 1 |
257 | ;; |
258 | *) |
259 | echo "\"$1\" is not an absolute path name" |
260 | exit 1 |
261 | ;; |
262 | esac |
263 | done |
264 | |
265 | # Redirections |
266 | case $VERBOSE_LVL in |
267 | 0) |
268 | # O, be silent |
269 | DEST_FD0=/dev/null |
270 | DEST_FD1=/dev/null |
271 | VERBOSE_OPT= |
272 | ;; |
273 | 1) |
274 | # 1, be a bit verbose |
275 | DEST_FD0=/dev/stdout |
276 | DEST_FD1=/dev/null |
277 | VERBOSE_OPT=-v |
278 | ;; |
279 | *) |
280 | # 2 and above, be most verbose |
281 | DEST_FD0=/dev/stdout |
282 | DEST_FD1=/dev/stdout |
283 | VERBOSE_OPT="-v -v" |
284 | ;; |
285 | esac |
286 | |
287 | # Note: on my machine, 'man --path' gives /usr/share/man twice, once |
288 | # with a trailing '/', once without. |
289 | if [ -z "$MAN_DIR" ]; then |
290 | MAN_DIR=`manpath -q -C "$MAN_CONF"/man_db.conf \ |
291 | | sed 's/:/\\n/g' \ |
292 | | while read foo; do dirname "$foo"/.; done \ |
293 | | sort -u \ |
294 | | while read bar; do echo -n "$bar "; done` |
295 | fi |
296 | |
297 | # If no MANDATORY_MANPATH in ${MAN_CONF}/man_db.conf, abort as well |
298 | if [ -z "$MAN_DIR" ]; then |
299 | echo "No directory specified, and no directory found with \`manpath'" |
300 | exit 1 |
301 | fi |
302 | |
303 | # Check that the specified directories actually exist and are readable |
304 | for DIR in $MAN_DIR; do |
305 | if [ ! -d "$DIR" -o ! -r "$DIR" ]; then |
306 | echo "Directory '$DIR' does not exist or is not readable" |
307 | exit 1 |
308 | fi |
309 | done |
310 | |
311 | # Fake? |
312 | if [ "$FAKE" != "no" ]; then |
313 | echo "Actual parameters used:" |
314 | echo -n "Compression.......: " |
315 | case $COMP_METHOD in |
316 | --bzip2|--bz2|-b) echo -n "bzip2";; |
317 | --gzip|--gz|-g) echo -n "gzip";; |
318 | --automatic) echo -n "compressing";; |
319 | --decompress|-d) echo -n "decompressing";; |
320 | *) echo -n "unknown";; |
321 | esac |
322 | echo " ($COMP_METHOD)" |
323 | echo "Compression level.: $COMP_LVL" |
324 | echo "Compression suffix: $COMP_SUF" |
325 | echo -n "Force compression.: " |
326 | [ "foo$FORCE_OPT" = "foo-F" ] && echo "yes" || echo "no" |
327 | echo "man_db.conf is....: ${MAN_CONF}/man_db.conf" |
328 | echo -n "Hard-links........: " |
329 | [ "foo$LN_OPT" = "foo-S" ] && |
330 | echo "convert to soft-links" || echo "leave as is" |
331 | echo -n "Soft-links........: " |
332 | [ "foo$LN_OPT" = "foo-H" ] && |
333 | echo "convert to hard-links" || echo "leave as is" |
334 | echo "Backup............: $BACKUP" |
335 | echo "Faking (yes!).....: $FAKE" |
336 | echo "Directories.......: $MAN_DIR" |
337 | echo "Verbosity level...: $VERBOSE_LVL" |
338 | exit 0 |
339 | fi |
340 | |
341 | # If no method was specified, print help |
342 | if [ -z "${COMP_METHOD}" -a "${BACKUP}" = "no" ]; then |
343 | help |
344 | exit 1 |
345 | fi |
346 | |
347 | # In backup mode, do the backup solely |
348 | if [ "$BACKUP" = "yes" ]; then |
349 | for DIR in $MAN_DIR; do |
350 | cd "${DIR}/.." |
351 | if [ ! -w "`pwd`" ]; then |
352 | echo "Directory '`pwd`' is not writable" |
353 | exit 1 |
354 | fi |
355 | DIR_NAME=`basename "${DIR}"` |
356 | echo "Backing up $DIR..." > $DEST_FD0 |
357 | [ -f "${DIR_NAME}.tar.old" ] && rm -f "${DIR_NAME}.tar.old" |
358 | [ -f "${DIR_NAME}.tar" ] && |
359 | mv "${DIR_NAME}.tar" "${DIR_NAME}.tar.old" |
360 | tar -cvf "${DIR_NAME}.tar" "${DIR_NAME}" > $DEST_FD1 |
361 | done |
362 | exit 0 |
363 | fi |
364 | |
365 | # I know MAN_DIR has only absolute path names |
366 | # I need to take into account the localized man, so I'm going recursive |
367 | for DIR in $MAN_DIR; do |
368 | MEM_DIR=`pwd` |
369 | if [ ! -w "$DIR" ]; then |
370 | echo "Directory '$DIR' is not writable" |
371 | exit 1 |
372 | fi |
373 | cd "$DIR" |
374 | for FILE in *; do |
375 | # Fixes the case were the directory is empty |
376 | if [ "foo$FILE" = "foo*" ]; then continue; fi |
377 | |
378 | # Fixes the case when hard-links see their compression scheme change |
379 | # (from not compressed to compressed, or from bz2 to gz, or from gz |
380 | # to bz2) |
381 | # Also fixes the case when multiple version of the page are present, |
382 | # which are either compressed or not. |
383 | if [ ! -L "$FILE" -a ! -e "$FILE" ]; then continue; fi |
384 | |
385 | # Do not compress whatis files |
386 | if [ "$FILE" = "whatis" ]; then continue; fi |
387 | |
388 | if [ -d "$FILE" ]; then |
389 | # We are going recursive to that directory |
390 | echo "-> Entering ${DIR}/${FILE}..." > $DEST_FD0 |
391 | # I need not pass --conf, as I specify the directory to work on |
392 | # But I need exit in case of error. We must change back to the |
393 | # original directory so $0 is resolved correctly. |
394 | (cd "$MEM_DIR" && eval "$0" ${COMP_METHOD} ${COMP_LVL} ${LN_OPT} \ |
395 | ${VERBOSE_OPT} ${FORCE_OPT} "${DIR}/${FILE}") || exit $? |
396 | echo "<- Leaving ${DIR}/${FILE}." > $DEST_FD1 |
397 | |
398 | else # !dir |
399 | if ! check_unique "$DIR" "$FILE"; then continue; fi |
400 | |
401 | # With automatic compression, get the uncompressed file size of |
402 | # the file (dereferencing symlinks), and choose an appropriate |
403 | # compression method. |
404 | if [ "$COMP_METHOD" = "--automatic" ]; then |
405 | declare -i SIZE |
406 | case "$FILE" in |
407 | *.bz2) |
408 | SIZE=$(bzcat "$FILE" | wc -c) ;; |
409 | *.gz) |
410 | SIZE=$(zcat "$FILE" | wc -c) ;; |
411 | *) |
412 | SIZE=$(wc -c < "$FILE") ;; |
413 | esac |
414 | if (( $SIZE >= (5 * 2**10) )); then |
415 | COMP_SUF=.bz2 |
416 | elif (( $SIZE >= (1 * 2**10) )); then |
417 | COMP_SUF=.gz |
418 | else |
419 | COMP_SUF= |
420 | fi |
421 | fi |
422 | |
423 | # Check if the file is already compressed with the specified method |
424 | BASE_FILE=`basename "$FILE" .gz` |
425 | BASE_FILE=`basename "$BASE_FILE" .bz2` |
426 | if [ "${FILE}" = "${BASE_FILE}${COMP_SUF}" \ |
427 | -a "foo${FORCE_OPT}" = "foo" ]; then continue; fi |
428 | |
429 | # If we have a symlink |
430 | if [ -h "$FILE" ]; then |
431 | case "$FILE" in |
432 | *.bz2) |
433 | EXT=bz2 ;; |
434 | *.gz) |
435 | EXT=gz ;; |
436 | *) |
437 | EXT=none ;; |
438 | esac |
439 | |
440 | if [ ! "$EXT" = "none" ]; then |
441 | LINK=`ls -l "$FILE" | cut -d ">" -f2 \ |
442 | | tr -d " " | sed s/\.$EXT$//` |
443 | NEWNAME=`echo "$FILE" | sed s/\.$EXT$//` |
444 | mv "$FILE" "$NEWNAME" |
445 | FILE="$NEWNAME" |
446 | else |
447 | LINK=`ls -l "$FILE" | cut -d ">" -f2 | tr -d " "` |
448 | fi |
449 | |
450 | if [ "$LN_OPT" = "-H" ]; then |
451 | # Change this soft-link into a hard- one |
452 | rm -f "$FILE" && ln "${LINK}$COMP_SUF" "${FILE}$COMP_SUF" |
453 | chmod --reference "${LINK}$COMP_SUF" "${FILE}$COMP_SUF" |
454 | else |
455 | # Keep this soft-link a soft- one. |
456 | rm -f "$FILE" && ln -s "${LINK}$COMP_SUF" "${FILE}$COMP_SUF" |
457 | fi |
458 | echo "Relinked $FILE" > $DEST_FD1 |
459 | |
460 | # else if we have a plain file |
461 | elif [ -f "$FILE" ]; then |
462 | # Take care of hard-links: build the list of files hard-linked |
463 | # to the one we are {de,}compressing. |
464 | # NB. This is not optimum has the file will eventually be |
465 | # compressed as many times it has hard-links. But for now, |
466 | # that's the safe way. |
467 | inode=`ls -li "$FILE" | awk '{print $1}'` |
468 | HLINKS=`find . \! -name "$FILE" -inum $inode` |
469 | |
470 | if [ -n "$HLINKS" ]; then |
471 | # We have hard-links! Remove them now. |
472 | for i in $HLINKS; do rm -f "$i"; done |
473 | fi |
474 | |
475 | # Now take care of the file that has no hard-link |
476 | # We do decompress first to re-compress with the selected |
477 | # compression ratio later on... |
478 | case "$FILE" in |
479 | *.bz2) |
480 | bunzip2 $FILE |
481 | FILE=`basename "$FILE" .bz2` |
482 | ;; |
483 | *.gz) |
484 | gunzip $FILE |
485 | FILE=`basename "$FILE" .gz` |
486 | ;; |
487 | esac |
488 | |
489 | # Compress the file with the given compression ratio, if needed |
490 | case $COMP_SUF in |
491 | *bz2) |
492 | bzip2 ${COMP_LVL} "$FILE" && chmod 644 "${FILE}${COMP_SUF}" |
493 | echo "Compressed $FILE" > $DEST_FD1 |
494 | ;; |
495 | *gz) |
496 | gzip ${COMP_LVL} "$FILE" && chmod 644 "${FILE}${COMP_SUF}" |
497 | echo "Compressed $FILE" > $DEST_FD1 |
498 | ;; |
499 | *) |
500 | echo "Uncompressed $FILE" > $DEST_FD1 |
501 | ;; |
502 | esac |
503 | |
504 | # If the file had hard-links, recreate those (either hard or soft) |
505 | if [ -n "$HLINKS" ]; then |
506 | for i in $HLINKS; do |
507 | NEWFILE=`echo "$i" | sed s/\.gz$// | sed s/\.bz2$//` |
508 | if [ "$LN_OPT" = "-S" ]; then |
509 | # Make this hard-link a soft- one |
510 | ln -s "${FILE}$COMP_SUF" "${NEWFILE}$COMP_SUF" |
511 | else |
512 | # Keep the hard-link a hard- one |
513 | ln "${FILE}$COMP_SUF" "${NEWFILE}$COMP_SUF" |
514 | fi |
515 | # Really work only for hard-links. Harmless for soft-links |
516 | chmod 644 "${NEWFILE}$COMP_SUF" |
517 | done |
518 | fi |
519 | |
520 | else |
521 | # There is a problem when we get neither a symlink nor a plain |
522 | # file. Obviously, we shall never ever come here... :-( |
523 | echo -n "Whaooo... \"${DIR}/${FILE}\" is neither a symlink " |
524 | echo "nor a plain file. Please check:" |
525 | ls -l "${DIR}/${FILE}" |
526 | exit 1 |
527 | fi |
528 | fi |
529 | done # for FILE |
530 | done # for DIR |
Properties
Name | Value |
---|---|
svn:executable | * |