]> git.decadent.org.uk Git - dak.git/blob - scripts/debian/ddtp-i18n-check.sh
8f5c6c08ffc746bf2d924dbd285f5bcd06547526
[dak.git] / scripts / debian / ddtp-i18n-check.sh
1 #!/bin/bash
2 #
3 # $Id: ddtp_i18n_check.sh 2535 2011-02-19 14:20:52Z nekral-guest $
4
5 # Copyright (C) 2008, 2011 Felipe Augusto van de Wiel <faw@funlabs.org>
6 # Copyright (C) 2008, 2009 Nicolas François <nicolas.francois@centraliens.net>
7 #
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
12 #
13 # On Debian systems, you can find the full text of the license in
14 # /usr/share/common-licenses/GPL-2
15
16 set -eu
17 export LC_ALL=C
18
19 # This must be defined to either 0 or 1
20 # When DEBUG=0, fail after the first error.
21 # Otherwise, list all the errors.
22 DEBUG=0
23
24 # When DRY_RUN=0, generate the compressed version of the Translation-*
25 # files.
26 DRY_RUN=0
27
28 # When GEN_IDX=1, we create the Index files.  There is a runtime option
29 # to not create/generate the Index file.
30 GEN_IDX=1
31
32 dists_parent_dir=""
33 # If no argument indicates the PACKAGES_LISTS_DIR then use '.'
34 PACKAGES_LISTS_DIR=""
35
36 usage () {
37         echo "Usage: $0 [options] <dists_parent_dir> [<packages_lists_directory>]" >&2
38         echo "" >&2
39         echo "    --debug      Debug mode: do not stop after the first error" >&2
40         echo "    --dry-run    Do not generate the compressed version of the " >&2
41         echo "                 Translation files">&2
42         echo "    --no-index   Do not generate the Index files" >&2
43         exit 1
44 }
45
46 # Parse options
47 for opt; do
48         case "$opt" in
49                 "--debug")
50                         DEBUG=1
51                         ;;
52                 "--dry-run")
53                         DRY_RUN=1
54                         ;;
55                 "--no-index")
56                         GEN_IDX=0
57                         ;;
58                 "-*")
59                         usage
60                         ;;
61                 "")
62                         echo "Empty parameter" >&2
63                         echo "" >&2
64                         usage
65                         ;;
66                 *)
67                         if [ -z "$dists_parent_dir" ]; then
68                                 # Removing trailing /
69                                 dists_parent_dir=${opt%/}
70                         elif [ -z "$PACKAGES_LISTS_DIR" ]; then
71                                 PACKAGES_LISTS_DIR=$opt
72                         else
73                                 echo "$0: Invalid option: $opt" >&2
74                                 usage
75                         fi
76                         ;;
77         esac
78 done
79 PACKAGES_LISTS_DIR=${opt:-.}
80
81 if [ ! -d "$dists_parent_dir" ]; then
82         echo "missing dists_parent_dir, or not a directory" >&2
83         echo "" >&2
84         usage
85 elif [ ! -d "$PACKAGES_LISTS_DIR" ]; then
86         echo "missing packages_lists_directory, or not a directory" >&2
87         echo "" >&2
88         usage
89 fi
90
91 #STABLE="squeeze"
92 TESTING="wheezy"
93 UNSTABLE="sid"
94
95 # Original SHA256SUMS, generated by i18n.debian.net
96 CHECKSUMS="SHA256SUMS"
97
98 # DAK Timestamp
99 TIMESTAMP="timestamp"
100
101 # These special files must exist on the top of dists_parent_dir
102 SPECIAL_FILES="$CHECKSUMS $TIMESTAMP $TIMESTAMP.gpg"
103
104 # Temporary working directory. We need a full path to reduce the
105 # complexity of checking CHECKSUMS and cleaning/removing TMPDIR
106 TEMP_WORK_DIR=$(mktemp -d -t ddtp_dinstall_tmpdir.XXXXXX)
107 cd "$TEMP_WORK_DIR"
108 TMP_WORK_DIR=$(pwd)
109 cd "$OLDPWD"
110 unset TEMP_WORK_DIR
111
112 # If it's trapped, something bad happened.
113 trap_exit () {
114         rm -rf "$TMP_WORK_DIR"
115         rm -f "$dists_parent_dir"/dists/*/main/i18n/Translation-*.bz2
116         rm -f "$dists_parent_dir"/dists/*/main/i18n/Index
117         exit 1
118 }
119 trap trap_exit EXIT HUP INT QUIT TERM
120
121 is_filename_okay () {
122         ifo_file="$1"
123
124         # Check that the file in on an "i18n" directory
125         # This ensures that the Translation-$lang files are not e.g. in
126         # dists/etch/ or dists/etch/main/
127         ifo_d=$(basename $(dirname "$ifo_file"))
128         if [ "x$ifo_d" = "xi18n" ]; then
129
130                 # Check that the file is named Translation-$lang
131                 ifo_f=$(basename "$ifo_file")
132                 case "$ifo_f" in
133                         Translation-[a-z][a-z][a-z]_[A-Z][A-Z]) return 0;;
134                         Translation-[a-z][a-z]_[A-Z][A-Z])      return 0;;
135                         Translation-[a-z][a-z][a-z])            return 0;;
136                         Translation-[a-z][a-z])                 return 0;;
137                 esac
138         fi
139
140         return 1
141 }
142
143 # Check a directory name against a directory whitelist 
144 is_dirname_okay () {
145         ido_dir="$1"
146
147         case "$ido_dir" in
148                 "$dists_parent_dir")                               return 0;;
149                 "$dists_parent_dir/dists")                         return 0;;
150 # TODO/FIXME: It is undecided how to update at stable/point-releases, so we
151 #             don't allow files to $STABLE.
152 #               "$dists_parent_dir/dists/$STABLE")                 return 0;;
153 #               "$dists_parent_dir/dists/$STABLE/main")            return 0;;
154 #               "$dists_parent_dir/dists/$STABLE/main/i18n")       return 0;;
155 #               "$dists_parent_dir/dists/$STABLE/contrib")         return 0;;
156 #               "$dists_parent_dir/dists/$STABLE/contrib/i18n")    return 0;;
157 #               "$dists_parent_dir/dists/$STABLE/non-free")        return 0;;
158 #               "$dists_parent_dir/dists/$STABLE/non-free/i18n")   return 0;;
159                 "$dists_parent_dir/dists/$TESTING")                return 0;;
160                 "$dists_parent_dir/dists/$TESTING/main")           return 0;;
161                 "$dists_parent_dir/dists/$TESTING/main/i18n")      return 0;;
162                 "$dists_parent_dir/dists/$TESTING/contrib")        return 0;;
163                 "$dists_parent_dir/dists/$TESTING/contrib/i18n")   return 0;;
164                 "$dists_parent_dir/dists/$TESTING/non-free")       return 0;;
165                 "$dists_parent_dir/dists/$TESTING/non-free/i18n")  return 0;;
166                 "$dists_parent_dir/dists/$UNSTABLE")               return 0;;
167                 "$dists_parent_dir/dists/$UNSTABLE/main")          return 0;;
168                 "$dists_parent_dir/dists/$UNSTABLE/main/i18n")     return 0;;
169                 "$dists_parent_dir/dists/$UNSTABLE/contrib")       return 0;;
170                 "$dists_parent_dir/dists/$UNSTABLE/contrib/i18n")  return 0;;
171                 "$dists_parent_dir/dists/$UNSTABLE/non-free")      return 0;;
172                 "$dists_parent_dir/dists/$UNSTABLE/non-free/i18n") return 0;;
173         esac
174
175         return 1
176 }
177
178 has_valid_fields () {
179         hvf_file="$1"
180         hvf_lang=${hvf_file/*-}
181
182 awk "
183 function print_status () {
184         printf (\"p: %d, m: %d, s: %d, l: %d\n\", package, md5, s_description, l_description)
185 }
186 BEGIN {
187         package       = 0 # Indicates if a Package field was found
188         md5           = 0 # Indicates if a Description-md5 field was found
189         s_description = 0 # Indicates if a short description was found
190         l_description = 0 # Indicates if a long description was found
191
192         failures      = 0 # Number of failures (debug only)
193         failed        = 0 # Failure already reported for the block
194 }
195
196 /^Package: / {
197         if (0 == failed) {
198                 if (   (0 != package)       \
199                     || (0 != md5)           \
200                     || (0 != s_description) \
201                     || (0 != l_description)) {
202                         printf (\"Package field unexpected in $hvf_file (line %d)\n\", NR)
203                         print_status()
204                         failed = 1
205                         if ($DEBUG) { failures++ } else { exit 1 }
206                 }
207                 package++
208         }
209         # Next input line
210         next
211 }
212
213 /^Description-md5: / {
214         if (0 == failed) {
215                 if (   (1 != package)       \
216                     || (0 != md5)           \
217                     || (0 != s_description) \
218                     || (0 != l_description)) {
219                         printf (\"Description-md5 field unexpected in $hvf_file (line %d)\n\", NR)
220                         print_status()
221                         failed = 1
222                         if ($DEBUG) { failures++ } else { exit 1 }
223                 }
224                 md5++
225         }
226         # Next input line
227         next
228 }
229
230 /^Description-$hvf_lang: / {
231         if (0 == failed) {
232                 if (   (1 != package)       \
233                     || (1 != md5)           \
234                     || (0 != s_description) \
235                     || (0 != l_description)) {
236                         printf (\"Description-$hvf_lang field unexpected in $hvf_file (line %d)\n\", NR)
237                         print_status()
238                         failed = 1
239                         if ($DEBUG) { failures++ } else { exit 1 }
240                 }
241                 s_description++
242         }
243         # Next input line
244         next
245 }
246
247 /^ / {
248         if (0 == failed) {
249                 if (   (1 != package)       \
250                     || (1 != md5)           \
251                     || (1 != s_description)) {
252                         printf (\"Long description unexpected in $hvf_file (line %d)\n\", NR)
253                         print_status()
254                         failed = 1
255                         if ($DEBUG) { failures++ } else { exit 1 }
256                 }
257                 l_description = 1 # There can be any number of long description
258                                   # lines. Do not count.
259         }
260         # Next line
261         next
262 }
263
264 /^$/ {
265         if (0 == failed) {
266                 if (   (1 != package)       \
267                     || (1 != md5)           \
268                     || (1 != s_description) \
269                     || (1 != l_description)) {
270                         printf (\"End of block unexpected in $hvf_file (line %d)\n\", NR)
271                         print_status()
272                         failed = 1
273                         if ($DEBUG) { failures++ } else { exit 1 }
274                 }
275         }
276
277         # Next package
278         package = 0; md5 = 0; s_description = 0; l_description = 0
279         failed = 0
280
281         # Next input line
282         next
283 }
284
285 # Anything else: fail
286 {
287         printf (\"Unexpected line '\$0' in $hvf_file (line %d)\n\", NR)
288         print_status()
289         failed = 1
290         if ($DEBUG) { failures++ } else { exit 1 }
291 }
292
293 END {
294         if (0 == failed) {
295                 # They must be all set to 0 or all set to 1
296                 if (   (   (0 == package)        \
297                         || (0 == md5)            \
298                         || (0 == s_description)  \
299                         || (0 == l_description)) \
300                     && (   (0 != package)        \
301                         || (0 != md5)            \
302                         || (0 != s_description)  \
303                         || (0 != l_description))) {
304                         printf (\"End of file unexpected in $hvf_file (line %d)\n\", NR)
305                         print_status()
306                         exit 1
307                 }
308         }
309
310         if (failures > 0) {
311                 exit 1
312         }
313 }
314 " "$hvf_file" || return 1
315
316         return 0
317 }
318
319 # $SPECIAL_FILES must exist
320 for sf in $SPECIAL_FILES; do
321         if [ ! -f "$dists_parent_dir/$sf" ]; then
322                 echo "Special file ($sf) doesn't exist"
323                 exit 1;
324         fi
325 done
326
327 # Comparing CHECKSUMS
328 # We don't use -c because a file could exist in the directory tree and not in
329 # the CHECKSUMS, so we sort the existing CHECKSUMS and we create a new one
330 # already sorted, if cmp fails then files are different and we don't want to
331 # continue.
332 cd "$dists_parent_dir"
333 find dists -type f -print0 |xargs --null sha256sum > "$TMP_WORK_DIR/$CHECKSUMS.new"
334 sort "$CHECKSUMS" > "$TMP_WORK_DIR/$CHECKSUMS.sorted"
335 sort "$TMP_WORK_DIR/$CHECKSUMS.new" > "$TMP_WORK_DIR/$CHECKSUMS.new.sorted"
336 if ! cmp --quiet "$TMP_WORK_DIR/$CHECKSUMS.sorted" "$TMP_WORK_DIR/$CHECKSUMS.new.sorted"; then
337         echo "Failed to compare the $CHECKSUMS, they are not identical!" >&2
338         diff -au "$TMP_WORK_DIR/$CHECKSUMS.sorted" "$TMP_WORK_DIR/$CHECKSUMS.new.sorted" >&2
339         exit 1
340 fi
341 cd "$OLDPWD"
342
343 # Get the list of valid packages (sorted, uniq)
344 for t in "$TESTING" "$UNSTABLE"; do
345         if [ ! -f "$PACKAGES_LISTS_DIR/$t" ]; then
346                 echo "Missing $PACKAGES_LISTS_DIR/$t" >&2
347                 exit 1
348         fi
349         cut -d' ' -f 1 "$PACKAGES_LISTS_DIR/$t" | sort -u > "$TMP_WORK_DIR/$t.pkgs"
350 done
351
352 /usr/bin/find "$dists_parent_dir" |
353 while read f; do
354         if   [ -d "$f" ]; then
355                 if ! is_dirname_okay "$f"; then
356                         echo "Wrong directory name: $f" >&2
357                         exit 1
358                 else
359                         # If the directory name is OK, and if it's name is i18n
360                         # and GEN_IDX is enabled, we generate the header of the
361                         # Index file
362                         if [ "$(basename $f)" = "i18n" -a "$GEN_IDX" = "1" ];
363                         then
364                                 echo "SHA1:" > "$f/Index"
365                         fi
366                 fi
367         elif [ -f "$f" ]; then
368                 # If $f is in $SPECIAL_FILES, we skip to the next loop because
369                 # we won't check it for format, fields and encoding.
370                 for sf in $SPECIAL_FILES; do
371                         if [ "$f" = "$dists_parent_dir/$sf" ]; then
372                                 continue 2
373                         fi
374                 done
375
376                 if ! is_filename_okay "$f"; then
377                         echo "Wrong file: $f" >&2
378                         exit 1
379                 fi
380
381                 # Check that all entries contains the right fields
382                 if ! has_valid_fields "$f"; then
383                         echo "File $f has an invalid format" >&2
384                         exit 1
385                 fi
386
387                 # Check that every packages in Translation-$lang exists
388                 TPKGS=$(basename "$f").pkgs
389                 grep "^Package: " "$f" | cut -d' ' -f 2 | sort -u > "$TMP_WORK_DIR/$TPKGS"
390                 case "$f" in
391                         */$TESTING/*)  t="$TESTING";;
392                         */$UNSTABLE/*) t="$UNSTABLE";;
393                 esac
394                 if diff "$TMP_WORK_DIR/$t.pkgs" "$TMP_WORK_DIR/$TPKGS" | grep -q "^>"; then
395                         diff -au "$TMP_WORK_DIR/$t.pkgs" "$TMP_WORK_DIR/$TPKGS" |grep "^+"
396                         echo "$f contains packages which are not in $t" >&2
397                         exit 1
398                 fi
399
400                 # Check encoding
401                 iconv -f utf-8 -t utf-8 < "$f" > /dev/null 2>&1 || {
402                         echo "$f is not an UTF-8 file" >&2
403                         exit 1
404                 }
405
406                 # We do not check if the md5 in Translation-$lang are
407                 # correct.
408
409                 if [ "$DRY_RUN" = "0" ]; then
410                         # Now generate the compressed files
411                         bzip2 "$f"
412                 fi
413
414                 # Create Index
415                 if [ "$GEN_IDX" = "1" ]; then
416                         fbz=${f}.bz2
417                         IDX=$(dirname $f)
418                         tf_name=$(basename $fbz)
419                         tf_sha1=$(sha1sum $fbz)
420                         tf_size=$(du --bytes $fbz)
421                         printf ' %s % 7s %s\n' "${tf_sha1% *}" \
422                                 "${tf_size%     *}" "${tf_name}" >> "$IDX/Index"
423                 fi
424         else
425                 echo "Neither a file or directory: $f" >&2
426                 exit 1
427         fi
428 done || false
429 # The while will just fail if an internal check "exit 1", but the script
430 # is not exited. "|| false" makes the script fail (and exit) in that case.
431
432 echo "$dists_parent_dir structure validated successfully ($(date +%c))"
433
434 # If we reach this point, everything went fine.
435 trap - EXIT
436 rm -rf "$TMP_WORK_DIR"
437