Annotation of /trunk/mage/usr/lib/mage/compressdoc
Parent Directory | Revision Log
Revision 24 -
(hide annotations)
(download)
Wed Jan 5 05:08:01 2005 UTC (19 years, 8 months ago) by niro
File size: 14613 byte(s)
Wed Jan 5 05:08:01 2005 UTC (19 years, 8 months ago) by niro
File size: 14613 byte(s)
mage-0.3.6-r8 import
1 | niro | 24 | #!/bin/bash |
2 | # VERSION: 20040320.0026 | ||
3 | # | ||
4 | # Compress (with bzip2 or gzip) all man pages in a hierarchy and | ||
5 | # update symlinks - By Marc Heerdink <marc @ koelkast.net> | ||
6 | # Modified to be able to gzip or bzip2 files as an option and to deal | ||
7 | # with all symlinks properly by Mark Hymers <markh @ linuxfromscratch.org> | ||
8 | # | ||
9 | # Modified 20030930 by Yann E. Morin <yann.morin.1998 @ anciens.enib.fr> | ||
10 | # to accept compression/decompression, to correctly handle hard-links, | ||
11 | # to allow for changing hard-links into soft- ones, to specify the | ||
12 | # compression level, to parse the man.conf for all occurrences of MANPATH, | ||
13 | # to allow for a backup, to allow to keep the newest version of a page. | ||
14 | # Modified 20040330 by Tushar Teredesai to replace $0 by the name of the script. | ||
15 | # (Note: It is assumed that the script is in the user's PATH) | ||
16 | # | ||
17 | # TODO: | ||
18 | # - choose a default compress method to be based on the available | ||
19 | # tool : gzip or bzip2; | ||
20 | # - offer an option to automagically choose the best compression method | ||
21 | # on a per page basis (eg. check which ofgzip/bzip2/whatever is the | ||
22 | # most effective, page per page); | ||
23 | # - when a MANPATH env var exists, use this instead of /etc/man.conf | ||
24 | # (useful for users to (de)compress their man pages; | ||
25 | # - offer an option to restore a previous backup; | ||
26 | # - add other compression engines (compress, zip, etc?). Needed? | ||
27 | |||
28 | # version: 0.3.6-r8 | ||
29 | |||
30 | # Funny enough, this function prints some help. | ||
31 | function help () | ||
32 | { | ||
33 | if [ -n "$1" ]; then | ||
34 | echo "Unknown option : $1" | ||
35 | fi | ||
36 | ( echo "Usage: $MY_NAME <comp_method> [options] [dirs]" && \ | ||
37 | cat << EOT | ||
38 | Where comp_method is one of : | ||
39 | --gzip, --gz, -g | ||
40 | --bzip2, --bz2, -b | ||
41 | Compress using gzip or bzip2. | ||
42 | |||
43 | --decompress, -d | ||
44 | Decompress the man pages. | ||
45 | |||
46 | --backup Specify a .tar backup shall be done for every directories. | ||
47 | In case a backup already exists, it is saved as .tar.old prior | ||
48 | to making the new backup. If an .tar.old backup exist, it is | ||
49 | removed prior to saving the backup. | ||
50 | In backup mode, no other action is performed. | ||
51 | |||
52 | And where options are : | ||
53 | -1 to -9, --fast, --best | ||
54 | The compression level, as accepted by gzip and bzip2. When not | ||
55 | specified, uses the default compression level for the given | ||
56 | method (-6 for gzip, and -9 for bzip2). Not used when in backup | ||
57 | or decompress modes. | ||
58 | |||
59 | --force, -F Force (re-)compression, even if the previous one was the same | ||
60 | method. Useful when changing the compression ratio. By default, | ||
61 | a page will not be re-compressed if it ends with the same suffix | ||
62 | as the method adds (.bz2 for bzip2, .gz for gzip). | ||
63 | |||
64 | --soft, -S Change hard-links into soft-links. Use with _caution_ as the | ||
65 | first encountered file will be used as a reference. Not used | ||
66 | when in backup mode. | ||
67 | |||
68 | --hard, -H Change soft-links into hard-links. Not used when in backup mode. | ||
69 | |||
70 | --conf=dir, --conf dir | ||
71 | Specify the location of man.conf. Defaults to /etc. | ||
72 | |||
73 | --verbose, -v Verbose mode, print the name of the directory being processed. | ||
74 | Double the flag to turn it even more verbose, and to print the | ||
75 | name of the file being processed. | ||
76 | |||
77 | --fake, -f Fakes it. Print the actual parameters compman will use. | ||
78 | |||
79 | dirs A list of space-separated _absolute_ pathname to the man | ||
80 | directories. | ||
81 | When empty, and only then, parse ${MAN_CONF}/man.conf for all | ||
82 | occurrences of MANPATH. | ||
83 | |||
84 | Note about compression | ||
85 | There has been a discussion on blfs-support about compression ratios of | ||
86 | both gzip and bzip2 on man pages, taking into account the hosting fs, | ||
87 | the architecture, etc... On the overall, the conclusion was that gzip | ||
88 | was much efficient on 'small' files, and bzip2 on 'big' files, small and | ||
89 | big being very dependent on the content of the files. | ||
90 | |||
91 | See the original post from Mickael A. Peters, titled "Bootable Utility CD", | ||
92 | and dated 20030409.1816(+0200), and subsequent posts: | ||
93 | http://linuxfromscratch.org/pipermail/blfs-support/2003-April/038817.html | ||
94 | |||
95 | On my system (x86, ext3), man pages were 35564kiB before compression. gzip -9 | ||
96 | compressed them down to 20372kiB (57.28%), bzip2 -9 got down to 19812kiB | ||
97 | (55.71%). That is a 1.57% gain in space. YMMV. | ||
98 | |||
99 | What was not taken into consideration was the decompression speed. But does | ||
100 | it make sense to? You gain fast access with uncompressed man pages, or you | ||
101 | gain space at the expense of a slight overhead in time. Well, my P4-2.5GHz | ||
102 | does not even let me notice this... :-) | ||
103 | EOT | ||
104 | ) | less | ||
105 | } | ||
106 | |||
107 | # This function checks that the man page is unique amongst bzip2'd, gzip'd and | ||
108 | # uncompressed versions. | ||
109 | # $1 the directory in which the file resides | ||
110 | # $2 the file name for the man page | ||
111 | # Returns 0 (true) if the file is the latest and must be taken care of, and 1 | ||
112 | # (false) if the file is not the latest (and has therefore been deleted). | ||
113 | function check_unique () | ||
114 | { | ||
115 | # NB. When there are hard-links to this file, these are | ||
116 | # _not_ deleted. In fact, if there are hard-links, they | ||
117 | # all have the same date/time, thus making them ready | ||
118 | # for deletion later on. | ||
119 | |||
120 | # Build the list of all man pages with the same name | ||
121 | DIR=$1 | ||
122 | BASENAME=`basename "${2}" .bz2` | ||
123 | BASENAME=`basename "${BASENAME}" .gz` | ||
124 | GZ_FILE="$BASENAME".gz | ||
125 | BZ_FILE="$BASENAME".bz2 | ||
126 | |||
127 | # Look for, and keep, the most recent one | ||
128 | LATEST=`(cd "$DIR"; ls -1rt "${BASENAME}" "${GZ_FILE}" "${BZ_FILE}" 2>/dev/null | tail -n 1)` | ||
129 | for i in "${BASENAME}" "${GZ_FILE}" "${BZ_FILE}"; do | ||
130 | [ "$LATEST" != "$i" ] && rm -f "$DIR"/"$i" | ||
131 | done | ||
132 | |||
133 | # In case the specified file was the latest, return 0 | ||
134 | [ "$LATEST" = "$2" ] && return 0 | ||
135 | # If the file was not the latest, return 1 | ||
136 | return 1 | ||
137 | } | ||
138 | |||
139 | # Name of the script | ||
140 | MY_NAME=`basename $0` | ||
141 | |||
142 | # OK, parse the command-line for arguments, and initialize to some sensible | ||
143 | # state, that is : don't change links state, parse /etc/man.conf, be most | ||
144 | # silent, search man.conf in /etc, and don't force (re-)compression. | ||
145 | COMP_METHOD= | ||
146 | COMP_SUF= | ||
147 | COMP_LVL= | ||
148 | FORCE_OPT= | ||
149 | LN_OPT= | ||
150 | MAN_DIR= | ||
151 | VERBOSE_LVL=0 | ||
152 | BACKUP=no | ||
153 | FAKE=no | ||
154 | MAN_CONF=/etc | ||
155 | while [ -n "$1" ]; do | ||
156 | case $1 in | ||
157 | --gzip|--gz|-g) | ||
158 | COMP_SUF=.gz | ||
159 | COMP_METHOD=$1 | ||
160 | shift | ||
161 | ;; | ||
162 | --bzip2|--bz2|-b) | ||
163 | COMP_SUF=.bz2 | ||
164 | COMP_METHOD=$1 | ||
165 | shift | ||
166 | ;; | ||
167 | --decompress|-d) | ||
168 | COMP_SUF= | ||
169 | COMP_LVL= | ||
170 | COMP_METHOD=$1 | ||
171 | shift | ||
172 | ;; | ||
173 | -[1-9]|--fast|--best) | ||
174 | COMP_LVL=$1 | ||
175 | shift | ||
176 | ;; | ||
177 | --force|-F) | ||
178 | FORCE_OPT=-F | ||
179 | shift | ||
180 | ;; | ||
181 | --soft|-S) | ||
182 | LN_OPT=-S | ||
183 | shift | ||
184 | ;; | ||
185 | --hard|-H) | ||
186 | LN_OPT=-H | ||
187 | shift | ||
188 | ;; | ||
189 | --conf=*) | ||
190 | MAN_CONF=`echo $1 | cut -d '=' -f2-` | ||
191 | shift | ||
192 | ;; | ||
193 | --conf) | ||
194 | MAN_CONF="$2" | ||
195 | shift 2 | ||
196 | ;; | ||
197 | --verbose|-v) | ||
198 | let VERBOSE_LVL++ | ||
199 | shift | ||
200 | ;; | ||
201 | --backup) | ||
202 | BACKUP=yes | ||
203 | shift | ||
204 | ;; | ||
205 | --fake|-f) | ||
206 | FAKE=yes | ||
207 | shift | ||
208 | ;; | ||
209 | --help|-h) | ||
210 | help | ||
211 | exit 0 | ||
212 | ;; | ||
213 | /*) | ||
214 | MAN_DIR="${MAN_DIR} ${1}" | ||
215 | shift | ||
216 | ;; | ||
217 | -*) | ||
218 | help $1 | ||
219 | exit 1 | ||
220 | ;; | ||
221 | *) | ||
222 | echo "\"$1\" is not an absolute path name" | ||
223 | exit 1 | ||
224 | ;; | ||
225 | esac | ||
226 | done | ||
227 | |||
228 | # Redirections | ||
229 | case $VERBOSE_LVL in | ||
230 | 0) | ||
231 | # O, be silent | ||
232 | DEST_FD0=/dev/null | ||
233 | DEST_FD1=/dev/null | ||
234 | VERBOSE_OPT= | ||
235 | ;; | ||
236 | 1) | ||
237 | # 1, be a bit verbose | ||
238 | DEST_FD0=/dev/stdout | ||
239 | DEST_FD1=/dev/null | ||
240 | VERBOSE_OPT=-v | ||
241 | ;; | ||
242 | *) | ||
243 | # 2 and above, be most verbose | ||
244 | DEST_FD0=/dev/stdout | ||
245 | DEST_FD1=/dev/stdout | ||
246 | VERBOSE_OPT="-v -v" | ||
247 | ;; | ||
248 | esac | ||
249 | |||
250 | # Note: on my machine, 'man --path' gives /usr/share/man twice, once with a trailing '/', once without. | ||
251 | if [ -z "$MAN_DIR" ]; then | ||
252 | MAN_DIR=`man --path -C "$MAN_CONF"/man.conf \ | ||
253 | | sed 's/:/\\n/g' \ | ||
254 | | while read foo; do dirname "$foo"/.; done \ | ||
255 | | sort -u \ | ||
256 | | while read bar; do echo -n "$bar "; done` | ||
257 | fi | ||
258 | |||
259 | # If no MANPATH in ${MAN_CONF}/man.conf, abort as well | ||
260 | if [ -z "$MAN_DIR" ]; then | ||
261 | echo "No directory specified, and no directory found with \`man --path'" | ||
262 | exit 1 | ||
263 | fi | ||
264 | |||
265 | # Fake? | ||
266 | if [ "$FAKE" != "no" ]; then | ||
267 | echo "Actual parameters used:" | ||
268 | echo -n "Compression.......: " | ||
269 | case $COMP_METHOD in | ||
270 | --bzip2|--bz2|-b) echo -n "bzip2";; | ||
271 | --gzip|__gz|-g) echo -n "gzip";; | ||
272 | --decompress|-d) echo -n "decompressing";; | ||
273 | *) echo -n "unknown";; | ||
274 | esac | ||
275 | echo " ($COMP_METHOD)" | ||
276 | echo "Compression level.: $COMP_LVL" | ||
277 | echo "Compression suffix: $COMP_SUF" | ||
278 | echo -n "Force compression.: " | ||
279 | [ "foo$FORCE_OPT" = "foo-F" ] && echo "yes" || echo "no" | ||
280 | echo "man.conf is.......: ${MAN_CONF}/man.conf" | ||
281 | echo -n "Hard-links........: " | ||
282 | [ "foo$LN_OPT" = "foo-S" ] && echo "convert to soft-links" || echo "leave as is" | ||
283 | echo -n "Soft-links........: " | ||
284 | [ "foo$LN_OPT" = "foo-H" ] && echo "convert to hard-links" || echo "leave as is" | ||
285 | echo "Backup............: $BACKUP" | ||
286 | echo "Faking (yes!).....: $FAKE" | ||
287 | echo "Directories.......: $MAN_DIR" | ||
288 | echo "Verbosity level...: $VERBOSE_LVL" | ||
289 | exit 0 | ||
290 | fi | ||
291 | |||
292 | # If no method was specified, print help | ||
293 | if [ -z "${COMP_METHOD}" -a "${BACKUP}" = "no" ]; then | ||
294 | help | ||
295 | exit 1 | ||
296 | fi | ||
297 | |||
298 | # In backup mode, do the backup solely | ||
299 | if [ "$BACKUP" = "yes" ]; then | ||
300 | for DIR in $MAN_DIR; do | ||
301 | cd "${DIR}/.." | ||
302 | DIR_NAME=`basename "${DIR}"` | ||
303 | echo "Backing up $DIR..." > $DEST_FD0 | ||
304 | [ -f "${DIR_NAME}.tar.old" ] && rm -f "${DIR_NAME}.tar.old" | ||
305 | [ -f "${DIR_NAME}.tar" ] && mv "${DIR_NAME}.tar" "${DIR_NAME}.tar.old" | ||
306 | tar cfv "${DIR_NAME}.tar" "${DIR_NAME}" > $DEST_FD1 | ||
307 | done | ||
308 | exit 0 | ||
309 | fi | ||
310 | |||
311 | # I know MAN_DIR has only absolute path names | ||
312 | # I need to take into account the localized man, so I'm going recursive | ||
313 | for DIR in $MAN_DIR; do | ||
314 | MEM_DIR=`pwd` | ||
315 | cd "$DIR" | ||
316 | for FILE in *; do | ||
317 | # Fixes the case were the directory is empty | ||
318 | if [ "foo$FILE" = "foo*" ]; then continue; fi | ||
319 | |||
320 | # Fixes the case when hard-links see their compression scheme change | ||
321 | # (from not compressed to compressed, or from bz2 to gz, or from gz to bz2) | ||
322 | # Also fixes the case when multiple version of the page are present, which | ||
323 | # are either compressed or not. | ||
324 | if [ ! -L "$FILE" -a ! -e "$FILE" ]; then continue; fi | ||
325 | |||
326 | # Do not compress whatis files | ||
327 | if [ "$FILE" = "whatis" ]; then continue; fi | ||
328 | |||
329 | if [ -d "$FILE" ]; then | ||
330 | cd "${MEM_DIR}" # Go back to where we ran "$0", in case "$0"=="./compressdoc" ... | ||
331 | # We are going recursive to that directory | ||
332 | echo "-> Entering ${DIR}/${FILE}..." > $DEST_FD0 | ||
333 | # I need not pass --conf, as I specify the directory to work on | ||
334 | # But I need exit in case of error | ||
335 | "$MY_NAME" ${COMP_METHOD} ${COMP_LVL} ${LN_OPT} ${VERBOSE_OPT} ${FORCE_OPT} "${DIR}/${FILE}" || exit 1 | ||
336 | echo "<- Leaving ${DIR}/${FILE}." > $DEST_FD1 | ||
337 | cd "$DIR" # Needed for the next iteration of the loop | ||
338 | |||
339 | else # !dir | ||
340 | if ! check_unique "$DIR" "$FILE"; then continue; fi | ||
341 | |||
342 | # Check if the file is already compressed with the specified method | ||
343 | BASE_FILE=`basename "$FILE" .gz` | ||
344 | BASE_FILE=`basename "$BASE_FILE" .bz2` | ||
345 | if [ "${FILE}" = "${BASE_FILE}${COMP_SUF}" -a "foo${FORCE_OPT}" = "foo" ]; then continue; fi | ||
346 | |||
347 | # If we have a symlink | ||
348 | if [ -h "$FILE" ]; then | ||
349 | case "$FILE" in | ||
350 | *.bz2) | ||
351 | EXT=bz2 ;; | ||
352 | *.gz) | ||
353 | EXT=gz ;; | ||
354 | *) | ||
355 | EXT=none ;; | ||
356 | esac | ||
357 | |||
358 | if [ ! "$EXT" = "none" ]; then | ||
359 | LINK=`ls -l "$FILE" | cut -d ">" -f2 | tr -d " " | sed s/\.$EXT$//` | ||
360 | NEWNAME=`echo "$FILE" | sed s/\.$EXT$//` | ||
361 | mv "$FILE" "$NEWNAME" | ||
362 | FILE="$NEWNAME" | ||
363 | else | ||
364 | LINK=`ls -l "$FILE" | cut -d ">" -f2 | tr -d " "` | ||
365 | fi | ||
366 | |||
367 | if [ "$LN_OPT" = "-H" ]; then | ||
368 | # Change this soft-link into a hard- one | ||
369 | rm -f "$FILE" && ln "${LINK}$COMP_SUF" "${FILE}$COMP_SUF" | ||
370 | chmod --reference "${LINK}$COMP_SUF" "${FILE}$COMP_SUF" | ||
371 | else | ||
372 | # Keep this soft-link a soft- one. | ||
373 | rm -f "$FILE" && ln -s "${LINK}$COMP_SUF" "${FILE}$COMP_SUF" | ||
374 | fi | ||
375 | echo "Relinked $FILE" > $DEST_FD1 | ||
376 | |||
377 | # else if we have a plain file | ||
378 | elif [ -f "$FILE" ]; then | ||
379 | # Take care of hard-links: build the list of files hard-linked | ||
380 | # to the one we are {de,}compressing. | ||
381 | # NB. This is not optimum has the file will eventually be compressed | ||
382 | # as many times it has hard-links. But for now, that's the safe way. | ||
383 | inode=`ls -li "$FILE" | awk '{print $1}'` | ||
384 | HLINKS=`find . \! -name "$FILE" -inum $inode` | ||
385 | |||
386 | if [ -n "$HLINKS" ]; then | ||
387 | # We have hard-links! Remove them now. | ||
388 | for i in $HLINKS; do rm -f "$i"; done | ||
389 | fi | ||
390 | |||
391 | # Now take care of the file that has no hard-link | ||
392 | # We do decompress first to re-compress with the selected | ||
393 | # compression ratio later on... | ||
394 | case "$FILE" in | ||
395 | *.bz2) | ||
396 | bunzip2 $FILE | ||
397 | FILE=`basename "$FILE" .bz2` | ||
398 | ;; | ||
399 | *.gz) | ||
400 | gunzip $FILE | ||
401 | FILE=`basename "$FILE" .gz` | ||
402 | ;; | ||
403 | esac | ||
404 | |||
405 | # Compress the file with the given compression ratio, if needed | ||
406 | case $COMP_SUF in | ||
407 | *bz2) | ||
408 | bzip2 ${COMP_LVL} "$FILE" && chmod 644 "${FILE}${COMP_SUF}" | ||
409 | echo "Compressed $FILE" > $DEST_FD1 | ||
410 | ;; | ||
411 | *gz) | ||
412 | gzip ${COMP_LVL} "$FILE" && chmod 644 "${FILE}${COMP_SUF}" | ||
413 | echo "Compressed $FILE" > $DEST_FD1 | ||
414 | ;; | ||
415 | *) | ||
416 | echo "Uncompressed $FILE" > $DEST_FD1 | ||
417 | ;; | ||
418 | esac | ||
419 | |||
420 | # If the file had hard-links, recreate those (either hard or soft) | ||
421 | if [ -n "$HLINKS" ]; then | ||
422 | for i in $HLINKS; do | ||
423 | NEWFILE=`echo "$i" | sed s/\.gz$// | sed s/\.bz2$//` | ||
424 | if [ "$LN_OPT" = "-S" ]; then | ||
425 | # Make this hard-link a soft- one | ||
426 | ln -s "${FILE}$COMP_SUF" "${NEWFILE}$COMP_SUF" | ||
427 | else | ||
428 | # Keep the hard-link a hard- one | ||
429 | ln "${FILE}$COMP_SUF" "${NEWFILE}$COMP_SUF" | ||
430 | fi | ||
431 | chmod 644 "${NEWFILE}$COMP_SUF" # Really work only for hard-links. Harmless for soft-links | ||
432 | done | ||
433 | fi | ||
434 | |||
435 | else | ||
436 | # There is a problem when we get neither a symlink nor a plain file | ||
437 | # Obviously, we shall never ever come here... :-( | ||
438 | echo "Whaooo... \"${DIR}/${FILE}\" is neither a symlink nor a plain file. Please check:" | ||
439 | ls -l "${DIR}/${FILE}" | ||
440 | exit 1 | ||
441 | fi | ||
442 | fi | ||
443 | done # for FILE | ||
444 | done # for DIR |
Properties
Name | Value |
---|---|
svn:executable | * |