Magellan Linux

Annotation of /branches/mage-next/src/compressdoc.in

Parent Directory Parent Directory | Revision Log Revision Log


Revision 33 - (hide annotations) (download)
Thu Jan 6 02:57:12 2005 UTC (19 years, 4 months ago) by niro
Original Path: trunk/mage/usr/lib/mage/compressdoc
File size: 14613 byte(s)
fixed version tags

1 niro 24 #!/bin/bash
2     # VERSION: 20040320.0026
3     #
4     # Compress (with bzip2 or gzip) all man pages in a hierarchy and
5     # update symlinks - By Marc Heerdink <marc @ koelkast.net>
6     # Modified to be able to gzip or bzip2 files as an option and to deal
7     # with all symlinks properly by Mark Hymers <markh @ linuxfromscratch.org>
8     #
9     # Modified 20030930 by Yann E. Morin <yann.morin.1998 @ anciens.enib.fr>
10     # to accept compression/decompression, to correctly handle hard-links,
11     # to allow for changing hard-links into soft- ones, to specify the
12     # compression level, to parse the man.conf for all occurrences of MANPATH,
13     # to allow for a backup, to allow to keep the newest version of a page.
14     # Modified 20040330 by Tushar Teredesai to replace $0 by the name of the script.
15     # (Note: It is assumed that the script is in the user's PATH)
16     #
17     # TODO:
18     # - choose a default compress method to be based on the available
19     # tool : gzip or bzip2;
20     # - offer an option to automagically choose the best compression method
21     # on a per page basis (eg. check which ofgzip/bzip2/whatever is the
22     # most effective, page per page);
23     # - when a MANPATH env var exists, use this instead of /etc/man.conf
24     # (useful for users to (de)compress their man pages;
25     # - offer an option to restore a previous backup;
26     # - add other compression engines (compress, zip, etc?). Needed?
27    
28 niro 33 # version: 0.3.6-r9
29 niro 24
30     # Funny enough, this function prints some help.
31     function help ()
32     {
33     if [ -n "$1" ]; then
34     echo "Unknown option : $1"
35     fi
36     ( echo "Usage: $MY_NAME <comp_method> [options] [dirs]" && \
37     cat << EOT
38     Where comp_method is one of :
39     --gzip, --gz, -g
40     --bzip2, --bz2, -b
41     Compress using gzip or bzip2.
42    
43     --decompress, -d
44     Decompress the man pages.
45    
46     --backup Specify a .tar backup shall be done for every directories.
47     In case a backup already exists, it is saved as .tar.old prior
48     to making the new backup. If an .tar.old backup exist, it is
49     removed prior to saving the backup.
50     In backup mode, no other action is performed.
51    
52     And where options are :
53     -1 to -9, --fast, --best
54     The compression level, as accepted by gzip and bzip2. When not
55     specified, uses the default compression level for the given
56     method (-6 for gzip, and -9 for bzip2). Not used when in backup
57     or decompress modes.
58    
59     --force, -F Force (re-)compression, even if the previous one was the same
60     method. Useful when changing the compression ratio. By default,
61     a page will not be re-compressed if it ends with the same suffix
62     as the method adds (.bz2 for bzip2, .gz for gzip).
63    
64     --soft, -S Change hard-links into soft-links. Use with _caution_ as the
65     first encountered file will be used as a reference. Not used
66     when in backup mode.
67    
68     --hard, -H Change soft-links into hard-links. Not used when in backup mode.
69    
70     --conf=dir, --conf dir
71     Specify the location of man.conf. Defaults to /etc.
72    
73     --verbose, -v Verbose mode, print the name of the directory being processed.
74     Double the flag to turn it even more verbose, and to print the
75     name of the file being processed.
76    
77     --fake, -f Fakes it. Print the actual parameters compman will use.
78    
79     dirs A list of space-separated _absolute_ pathname to the man
80     directories.
81     When empty, and only then, parse ${MAN_CONF}/man.conf for all
82     occurrences of MANPATH.
83    
84     Note about compression
85     There has been a discussion on blfs-support about compression ratios of
86     both gzip and bzip2 on man pages, taking into account the hosting fs,
87     the architecture, etc... On the overall, the conclusion was that gzip
88     was much efficient on 'small' files, and bzip2 on 'big' files, small and
89     big being very dependent on the content of the files.
90    
91     See the original post from Mickael A. Peters, titled "Bootable Utility CD",
92     and dated 20030409.1816(+0200), and subsequent posts:
93     http://linuxfromscratch.org/pipermail/blfs-support/2003-April/038817.html
94    
95     On my system (x86, ext3), man pages were 35564kiB before compression. gzip -9
96     compressed them down to 20372kiB (57.28%), bzip2 -9 got down to 19812kiB
97     (55.71%). That is a 1.57% gain in space. YMMV.
98    
99     What was not taken into consideration was the decompression speed. But does
100     it make sense to? You gain fast access with uncompressed man pages, or you
101     gain space at the expense of a slight overhead in time. Well, my P4-2.5GHz
102     does not even let me notice this... :-)
103     EOT
104     ) | less
105     }
106    
107     # This function checks that the man page is unique amongst bzip2'd, gzip'd and
108     # uncompressed versions.
109     # $1 the directory in which the file resides
110     # $2 the file name for the man page
111     # Returns 0 (true) if the file is the latest and must be taken care of, and 1
112     # (false) if the file is not the latest (and has therefore been deleted).
113     function check_unique ()
114     {
115     # NB. When there are hard-links to this file, these are
116     # _not_ deleted. In fact, if there are hard-links, they
117     # all have the same date/time, thus making them ready
118     # for deletion later on.
119    
120     # Build the list of all man pages with the same name
121     DIR=$1
122     BASENAME=`basename "${2}" .bz2`
123     BASENAME=`basename "${BASENAME}" .gz`
124     GZ_FILE="$BASENAME".gz
125     BZ_FILE="$BASENAME".bz2
126    
127     # Look for, and keep, the most recent one
128     LATEST=`(cd "$DIR"; ls -1rt "${BASENAME}" "${GZ_FILE}" "${BZ_FILE}" 2>/dev/null | tail -n 1)`
129     for i in "${BASENAME}" "${GZ_FILE}" "${BZ_FILE}"; do
130     [ "$LATEST" != "$i" ] && rm -f "$DIR"/"$i"
131     done
132    
133     # In case the specified file was the latest, return 0
134     [ "$LATEST" = "$2" ] && return 0
135     # If the file was not the latest, return 1
136     return 1
137     }
138    
139     # Name of the script
140     MY_NAME=`basename $0`
141    
142     # OK, parse the command-line for arguments, and initialize to some sensible
143     # state, that is : don't change links state, parse /etc/man.conf, be most
144     # silent, search man.conf in /etc, and don't force (re-)compression.
145     COMP_METHOD=
146     COMP_SUF=
147     COMP_LVL=
148     FORCE_OPT=
149     LN_OPT=
150     MAN_DIR=
151     VERBOSE_LVL=0
152     BACKUP=no
153     FAKE=no
154     MAN_CONF=/etc
155     while [ -n "$1" ]; do
156     case $1 in
157     --gzip|--gz|-g)
158     COMP_SUF=.gz
159     COMP_METHOD=$1
160     shift
161     ;;
162     --bzip2|--bz2|-b)
163     COMP_SUF=.bz2
164     COMP_METHOD=$1
165     shift
166     ;;
167     --decompress|-d)
168     COMP_SUF=
169     COMP_LVL=
170     COMP_METHOD=$1
171     shift
172     ;;
173     -[1-9]|--fast|--best)
174     COMP_LVL=$1
175     shift
176     ;;
177     --force|-F)
178     FORCE_OPT=-F
179     shift
180     ;;
181     --soft|-S)
182     LN_OPT=-S
183     shift
184     ;;
185     --hard|-H)
186     LN_OPT=-H
187     shift
188     ;;
189     --conf=*)
190     MAN_CONF=`echo $1 | cut -d '=' -f2-`
191     shift
192     ;;
193     --conf)
194     MAN_CONF="$2"
195     shift 2
196     ;;
197     --verbose|-v)
198     let VERBOSE_LVL++
199     shift
200     ;;
201     --backup)
202     BACKUP=yes
203     shift
204     ;;
205     --fake|-f)
206     FAKE=yes
207     shift
208     ;;
209     --help|-h)
210     help
211     exit 0
212     ;;
213     /*)
214     MAN_DIR="${MAN_DIR} ${1}"
215     shift
216     ;;
217     -*)
218     help $1
219     exit 1
220     ;;
221     *)
222     echo "\"$1\" is not an absolute path name"
223     exit 1
224     ;;
225     esac
226     done
227    
228     # Redirections
229     case $VERBOSE_LVL in
230     0)
231     # O, be silent
232     DEST_FD0=/dev/null
233     DEST_FD1=/dev/null
234     VERBOSE_OPT=
235     ;;
236     1)
237     # 1, be a bit verbose
238     DEST_FD0=/dev/stdout
239     DEST_FD1=/dev/null
240     VERBOSE_OPT=-v
241     ;;
242     *)
243     # 2 and above, be most verbose
244     DEST_FD0=/dev/stdout
245     DEST_FD1=/dev/stdout
246     VERBOSE_OPT="-v -v"
247     ;;
248     esac
249    
250     # Note: on my machine, 'man --path' gives /usr/share/man twice, once with a trailing '/', once without.
251     if [ -z "$MAN_DIR" ]; then
252     MAN_DIR=`man --path -C "$MAN_CONF"/man.conf \
253     | sed 's/:/\\n/g' \
254     | while read foo; do dirname "$foo"/.; done \
255     | sort -u \
256     | while read bar; do echo -n "$bar "; done`
257     fi
258    
259     # If no MANPATH in ${MAN_CONF}/man.conf, abort as well
260     if [ -z "$MAN_DIR" ]; then
261     echo "No directory specified, and no directory found with \`man --path'"
262     exit 1
263     fi
264    
265     # Fake?
266     if [ "$FAKE" != "no" ]; then
267     echo "Actual parameters used:"
268     echo -n "Compression.......: "
269     case $COMP_METHOD in
270     --bzip2|--bz2|-b) echo -n "bzip2";;
271     --gzip|__gz|-g) echo -n "gzip";;
272     --decompress|-d) echo -n "decompressing";;
273     *) echo -n "unknown";;
274     esac
275     echo " ($COMP_METHOD)"
276     echo "Compression level.: $COMP_LVL"
277     echo "Compression suffix: $COMP_SUF"
278     echo -n "Force compression.: "
279     [ "foo$FORCE_OPT" = "foo-F" ] && echo "yes" || echo "no"
280     echo "man.conf is.......: ${MAN_CONF}/man.conf"
281     echo -n "Hard-links........: "
282     [ "foo$LN_OPT" = "foo-S" ] && echo "convert to soft-links" || echo "leave as is"
283     echo -n "Soft-links........: "
284     [ "foo$LN_OPT" = "foo-H" ] && echo "convert to hard-links" || echo "leave as is"
285     echo "Backup............: $BACKUP"
286     echo "Faking (yes!).....: $FAKE"
287     echo "Directories.......: $MAN_DIR"
288     echo "Verbosity level...: $VERBOSE_LVL"
289     exit 0
290     fi
291    
292     # If no method was specified, print help
293     if [ -z "${COMP_METHOD}" -a "${BACKUP}" = "no" ]; then
294     help
295     exit 1
296     fi
297    
298     # In backup mode, do the backup solely
299     if [ "$BACKUP" = "yes" ]; then
300     for DIR in $MAN_DIR; do
301     cd "${DIR}/.."
302     DIR_NAME=`basename "${DIR}"`
303     echo "Backing up $DIR..." > $DEST_FD0
304     [ -f "${DIR_NAME}.tar.old" ] && rm -f "${DIR_NAME}.tar.old"
305     [ -f "${DIR_NAME}.tar" ] && mv "${DIR_NAME}.tar" "${DIR_NAME}.tar.old"
306     tar cfv "${DIR_NAME}.tar" "${DIR_NAME}" > $DEST_FD1
307     done
308     exit 0
309     fi
310    
311     # I know MAN_DIR has only absolute path names
312     # I need to take into account the localized man, so I'm going recursive
313     for DIR in $MAN_DIR; do
314     MEM_DIR=`pwd`
315     cd "$DIR"
316     for FILE in *; do
317     # Fixes the case were the directory is empty
318     if [ "foo$FILE" = "foo*" ]; then continue; fi
319    
320     # Fixes the case when hard-links see their compression scheme change
321     # (from not compressed to compressed, or from bz2 to gz, or from gz to bz2)
322     # Also fixes the case when multiple version of the page are present, which
323     # are either compressed or not.
324     if [ ! -L "$FILE" -a ! -e "$FILE" ]; then continue; fi
325    
326     # Do not compress whatis files
327     if [ "$FILE" = "whatis" ]; then continue; fi
328    
329     if [ -d "$FILE" ]; then
330     cd "${MEM_DIR}" # Go back to where we ran "$0", in case "$0"=="./compressdoc" ...
331     # We are going recursive to that directory
332     echo "-> Entering ${DIR}/${FILE}..." > $DEST_FD0
333     # I need not pass --conf, as I specify the directory to work on
334     # But I need exit in case of error
335     "$MY_NAME" ${COMP_METHOD} ${COMP_LVL} ${LN_OPT} ${VERBOSE_OPT} ${FORCE_OPT} "${DIR}/${FILE}" || exit 1
336     echo "<- Leaving ${DIR}/${FILE}." > $DEST_FD1
337     cd "$DIR" # Needed for the next iteration of the loop
338    
339     else # !dir
340     if ! check_unique "$DIR" "$FILE"; then continue; fi
341    
342     # Check if the file is already compressed with the specified method
343     BASE_FILE=`basename "$FILE" .gz`
344     BASE_FILE=`basename "$BASE_FILE" .bz2`
345     if [ "${FILE}" = "${BASE_FILE}${COMP_SUF}" -a "foo${FORCE_OPT}" = "foo" ]; then continue; fi
346    
347     # If we have a symlink
348     if [ -h "$FILE" ]; then
349     case "$FILE" in
350     *.bz2)
351     EXT=bz2 ;;
352     *.gz)
353     EXT=gz ;;
354     *)
355     EXT=none ;;
356     esac
357    
358     if [ ! "$EXT" = "none" ]; then
359     LINK=`ls -l "$FILE" | cut -d ">" -f2 | tr -d " " | sed s/\.$EXT$//`
360     NEWNAME=`echo "$FILE" | sed s/\.$EXT$//`
361     mv "$FILE" "$NEWNAME"
362     FILE="$NEWNAME"
363     else
364     LINK=`ls -l "$FILE" | cut -d ">" -f2 | tr -d " "`
365     fi
366    
367     if [ "$LN_OPT" = "-H" ]; then
368     # Change this soft-link into a hard- one
369     rm -f "$FILE" && ln "${LINK}$COMP_SUF" "${FILE}$COMP_SUF"
370     chmod --reference "${LINK}$COMP_SUF" "${FILE}$COMP_SUF"
371     else
372     # Keep this soft-link a soft- one.
373     rm -f "$FILE" && ln -s "${LINK}$COMP_SUF" "${FILE}$COMP_SUF"
374     fi
375     echo "Relinked $FILE" > $DEST_FD1
376    
377     # else if we have a plain file
378     elif [ -f "$FILE" ]; then
379     # Take care of hard-links: build the list of files hard-linked
380     # to the one we are {de,}compressing.
381     # NB. This is not optimum has the file will eventually be compressed
382     # as many times it has hard-links. But for now, that's the safe way.
383     inode=`ls -li "$FILE" | awk '{print $1}'`
384     HLINKS=`find . \! -name "$FILE" -inum $inode`
385    
386     if [ -n "$HLINKS" ]; then
387     # We have hard-links! Remove them now.
388     for i in $HLINKS; do rm -f "$i"; done
389     fi
390    
391     # Now take care of the file that has no hard-link
392     # We do decompress first to re-compress with the selected
393     # compression ratio later on...
394     case "$FILE" in
395     *.bz2)
396     bunzip2 $FILE
397     FILE=`basename "$FILE" .bz2`
398     ;;
399     *.gz)
400     gunzip $FILE
401     FILE=`basename "$FILE" .gz`
402     ;;
403     esac
404    
405     # Compress the file with the given compression ratio, if needed
406     case $COMP_SUF in
407     *bz2)
408     bzip2 ${COMP_LVL} "$FILE" && chmod 644 "${FILE}${COMP_SUF}"
409     echo "Compressed $FILE" > $DEST_FD1
410     ;;
411     *gz)
412     gzip ${COMP_LVL} "$FILE" && chmod 644 "${FILE}${COMP_SUF}"
413     echo "Compressed $FILE" > $DEST_FD1
414     ;;
415     *)
416     echo "Uncompressed $FILE" > $DEST_FD1
417     ;;
418     esac
419    
420     # If the file had hard-links, recreate those (either hard or soft)
421     if [ -n "$HLINKS" ]; then
422     for i in $HLINKS; do
423     NEWFILE=`echo "$i" | sed s/\.gz$// | sed s/\.bz2$//`
424     if [ "$LN_OPT" = "-S" ]; then
425     # Make this hard-link a soft- one
426     ln -s "${FILE}$COMP_SUF" "${NEWFILE}$COMP_SUF"
427     else
428     # Keep the hard-link a hard- one
429     ln "${FILE}$COMP_SUF" "${NEWFILE}$COMP_SUF"
430     fi
431     chmod 644 "${NEWFILE}$COMP_SUF" # Really work only for hard-links. Harmless for soft-links
432     done
433     fi
434    
435     else
436     # There is a problem when we get neither a symlink nor a plain file
437     # Obviously, we shall never ever come here... :-(
438     echo "Whaooo... \"${DIR}/${FILE}\" is neither a symlink nor a plain file. Please check:"
439     ls -l "${DIR}/${FILE}"
440     exit 1
441     fi
442     fi
443     done # for FILE
444     done # for DIR

Properties

Name Value
svn:executable *