]> git.decadent.org.uk Git - dak.git/blob - scripts/debian/ddtp-i18n-check.sh
daklib/dbconn.py: DBBinary.get_component_name is gone, use PoolFile.component instead
[dak.git] / scripts / debian / ddtp-i18n-check.sh
1 #!/bin/bash
2 #
3 # $Id: ddtp_i18n_check.sh 2535 2011-02-19 14:20:52Z nekral-guest $
4
5 # Copyright (C) 2008, 2011 Felipe Augusto van de Wiel <faw@funlabs.org>
6 # Copyright (C) 2008, 2009 Nicolas François <nicolas.francois@centraliens.net>
7 #
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
12 #
13 # On Debian systems, you can find the full text of the license in
14 # /usr/share/common-licenses/GPL-2
15
16 set -eu
17 export LC_ALL=C
18
19 # This must be defined to either 0 or 1
20 # When DEBUG=0, fail after the first error.
21 # Otherwise, list all the errors.
22 DEBUG=0
23
24 # When DRY_RUN=0, generate the compressed version of the Translation-*
25 # files.
26 DRY_RUN=0
27
28 dists_parent_dir=""
29 # If no argument indicates the PACKAGES_LISTS_DIR then use '.'
30 PACKAGES_LISTS_DIR=""
31
32 usage () {
33         echo "Usage: $0 [options] <dists_parent_dir> [<packages_lists_directory>]" >&2
34         echo "" >&2
35         echo "    --debug      Debug mode: do not stop after the first error" >&2
36         echo "    --dry-run    Do not generate the compressed version of the " >&2
37         echo "                 Translation files">&2
38         exit 1
39 }
40
41 # Parse options
42 for opt; do
43         case "$opt" in
44                 "--debug")
45                         DEBUG=1
46                         ;;
47                 "--dry-run")
48                         DRY_RUN=1
49                         ;;
50                 "-*")
51                         usage
52                         ;;
53                 "")
54                         echo "Empty parameter" >&2
55                         echo "" >&2
56                         usage
57                         ;;
58                 *)
59                         if [ -z "$dists_parent_dir" ]; then
60                                 # Removing trailing /
61                                 dists_parent_dir=${opt%/}
62                         elif [ -z "$PACKAGES_LISTS_DIR" ]; then
63                                 PACKAGES_LISTS_DIR=$opt
64                         else
65                                 echo "$0: Invalid option: $opt" >&2
66                                 usage
67                         fi
68                         ;;
69         esac
70 done
71 PACKAGES_LISTS_DIR=${opt:-.}
72
73 if [ ! -d "$dists_parent_dir" ]; then
74         echo "missing dists_parent_dir, or not a directory" >&2
75         echo "" >&2
76         usage
77 elif [ ! -d "$PACKAGES_LISTS_DIR" ]; then
78         echo "missing packages_lists_directory, or not a directory" >&2
79         echo "" >&2
80         usage
81 fi
82
83 #STABLE="squeeze"
84 TESTING="wheezy"
85 UNSTABLE="sid"
86
87 # Original SHA256SUMS, generated by i18n.debian.net
88 CHECKSUMS="SHA256SUMS"
89
90 # DAK Timestamp
91 TIMESTAMP="timestamp"
92
93 # These special files must exist on the top of dists_parent_dir
94 SPECIAL_FILES="$CHECKSUMS $TIMESTAMP $TIMESTAMP.gpg"
95
96 # Temporary working directory. We need a full path to reduce the
97 # complexity of checking CHECKSUMS and cleaning/removing TMPDIR
98 TEMP_WORK_DIR=$(mktemp -d -t ddtp_dinstall_tmpdir.XXXXXX)
99 cd "$TEMP_WORK_DIR"
100 TMP_WORK_DIR=$(pwd)
101 cd "$OLDPWD"
102 unset TEMP_WORK_DIR
103
104 # If it's trapped, something bad happened.
105 trap_exit () {
106         rm -rf "$TMP_WORK_DIR"
107         rm -f "$dists_parent_dir"/dists/*/main/i18n/Translation-*.bz2
108         rm -f "$dists_parent_dir"/dists/*/main/i18n/Index
109         exit 1
110 }
111 trap trap_exit EXIT HUP INT QUIT TERM
112
113 is_filename_okay () {
114         ifo_file="$1"
115
116         # Check that the file in on an "i18n" directory
117         # This ensures that the Translation-$lang files are not e.g. in
118         # dists/etch/ or dists/etch/main/
119         ifo_d=$(basename $(dirname "$ifo_file"))
120         if [ "x$ifo_d" = "xi18n" ]; then
121
122                 # Check that the file is named Translation-$lang
123                 ifo_f=$(basename "$ifo_file")
124                 case "$ifo_f" in
125                         Translation-[a-z][a-z][a-z]_[A-Z][A-Z]) return 0;;
126                         Translation-[a-z][a-z]_[A-Z][A-Z])      return 0;;
127                         Translation-[a-z][a-z][a-z])            return 0;;
128                         Translation-[a-z][a-z])                 return 0;;
129                 esac
130         fi
131
132         return 1
133 }
134
135 # Check a directory name against a directory whitelist 
136 is_dirname_okay () {
137         ido_dir="$1"
138
139         case "$ido_dir" in
140                 "$dists_parent_dir")                               return 0;;
141                 "$dists_parent_dir/dists")                         return 0;;
142 # TODO/FIXME: It is undecided how to update at stable/point-releases, so we
143 #             don't allow files to $STABLE.
144 #               "$dists_parent_dir/dists/$STABLE")                 return 0;;
145 #               "$dists_parent_dir/dists/$STABLE/main")            return 0;;
146 #               "$dists_parent_dir/dists/$STABLE/main/i18n")       return 0;;
147 #               "$dists_parent_dir/dists/$STABLE/contrib")         return 0;;
148 #               "$dists_parent_dir/dists/$STABLE/contrib/i18n")    return 0;;
149 #               "$dists_parent_dir/dists/$STABLE/non-free")        return 0;;
150 #               "$dists_parent_dir/dists/$STABLE/non-free/i18n")   return 0;;
151                 "$dists_parent_dir/dists/$TESTING")                return 0;;
152                 "$dists_parent_dir/dists/$TESTING/main")           return 0;;
153                 "$dists_parent_dir/dists/$TESTING/main/i18n")      return 0;;
154                 "$dists_parent_dir/dists/$TESTING/contrib")        return 0;;
155                 "$dists_parent_dir/dists/$TESTING/contrib/i18n")   return 0;;
156                 "$dists_parent_dir/dists/$TESTING/non-free")       return 0;;
157                 "$dists_parent_dir/dists/$TESTING/non-free/i18n")  return 0;;
158                 "$dists_parent_dir/dists/$UNSTABLE")               return 0;;
159                 "$dists_parent_dir/dists/$UNSTABLE/main")          return 0;;
160                 "$dists_parent_dir/dists/$UNSTABLE/main/i18n")     return 0;;
161                 "$dists_parent_dir/dists/$UNSTABLE/contrib")       return 0;;
162                 "$dists_parent_dir/dists/$UNSTABLE/contrib/i18n")  return 0;;
163                 "$dists_parent_dir/dists/$UNSTABLE/non-free")      return 0;;
164                 "$dists_parent_dir/dists/$UNSTABLE/non-free/i18n") return 0;;
165         esac
166
167         return 1
168 }
169
170 has_valid_fields () {
171         hvf_file="$1"
172         hvf_lang=${hvf_file/*-}
173
174 awk "
175 function print_status () {
176         printf (\"p: %d, m: %d, s: %d, l: %d\n\", package, md5, s_description, l_description)
177 }
178 BEGIN {
179         package       = 0 # Indicates if a Package field was found
180         md5           = 0 # Indicates if a Description-md5 field was found
181         s_description = 0 # Indicates if a short description was found
182         l_description = 0 # Indicates if a long description was found
183
184         failures      = 0 # Number of failures (debug only)
185         failed        = 0 # Failure already reported for the block
186 }
187
188 /^Package: / {
189         if (0 == failed) {
190                 if (   (0 != package)       \
191                     || (0 != md5)           \
192                     || (0 != s_description) \
193                     || (0 != l_description)) {
194                         printf (\"Package field unexpected in $hvf_file (line %d)\n\", NR)
195                         print_status()
196                         failed = 1
197                         if ($DEBUG) { failures++ } else { exit 1 }
198                 }
199                 package++
200         }
201         # Next input line
202         next
203 }
204
205 /^Description-md5: / {
206         if (0 == failed) {
207                 if (   (1 != package)       \
208                     || (0 != md5)           \
209                     || (0 != s_description) \
210                     || (0 != l_description)) {
211                         printf (\"Description-md5 field unexpected in $hvf_file (line %d)\n\", NR)
212                         print_status()
213                         failed = 1
214                         if ($DEBUG) { failures++ } else { exit 1 }
215                 }
216                 md5++
217         }
218         # Next input line
219         next
220 }
221
222 /^Description-$hvf_lang: / {
223         if (0 == failed) {
224                 if (   (1 != package)       \
225                     || (1 != md5)           \
226                     || (0 != s_description) \
227                     || (0 != l_description)) {
228                         printf (\"Description-$hvf_lang field unexpected in $hvf_file (line %d)\n\", NR)
229                         print_status()
230                         failed = 1
231                         if ($DEBUG) { failures++ } else { exit 1 }
232                 }
233                 s_description++
234         }
235         # Next input line
236         next
237 }
238
239 /^ / {
240         if (0 == failed) {
241                 if (   (1 != package)       \
242                     || (1 != md5)           \
243                     || (1 != s_description)) {
244                         printf (\"Long description unexpected in $hvf_file (line %d)\n\", NR)
245                         print_status()
246                         failed = 1
247                         if ($DEBUG) { failures++ } else { exit 1 }
248                 }
249                 l_description = 1 # There can be any number of long description
250                                   # lines. Do not count.
251         }
252         # Next line
253         next
254 }
255
256 /^$/ {
257         if (0 == failed) {
258                 if (   (1 != package)       \
259                     || (1 != md5)           \
260                     || (1 != s_description) \
261                     || (1 != l_description)) {
262                         printf (\"End of block unexpected in $hvf_file (line %d)\n\", NR)
263                         print_status()
264                         failed = 1
265                         if ($DEBUG) { failures++ } else { exit 1 }
266                 }
267         }
268
269         # Next package
270         package = 0; md5 = 0; s_description = 0; l_description = 0
271         failed = 0
272
273         # Next input line
274         next
275 }
276
277 # Anything else: fail
278 {
279         printf (\"Unexpected line '\$0' in $hvf_file (line %d)\n\", NR)
280         print_status()
281         failed = 1
282         if ($DEBUG) { failures++ } else { exit 1 }
283 }
284
285 END {
286         if (0 == failed) {
287                 # They must be all set to 0 or all set to 1
288                 if (   (   (0 == package)        \
289                         || (0 == md5)            \
290                         || (0 == s_description)  \
291                         || (0 == l_description)) \
292                     && (   (0 != package)        \
293                         || (0 != md5)            \
294                         || (0 != s_description)  \
295                         || (0 != l_description))) {
296                         printf (\"End of file unexpected in $hvf_file (line %d)\n\", NR)
297                         print_status()
298                         exit 1
299                 }
300         }
301
302         if (failures > 0) {
303                 exit 1
304         }
305 }
306 " "$hvf_file" || return 1
307
308         return 0
309 }
310
311 # $SPECIAL_FILES must exist
312 for sf in $SPECIAL_FILES; do
313         if [ ! -f "$dists_parent_dir/$sf" ]; then
314                 echo "Special file ($sf) doesn't exist"
315                 exit 1;
316         fi
317 done
318
319 # Comparing CHECKSUMS
320 # We don't use -c because a file could exist in the directory tree and not in
321 # the CHECKSUMS, so we sort the existing CHECKSUMS and we create a new one
322 # already sorted, if cmp fails then files are different and we don't want to
323 # continue.
324 cd "$dists_parent_dir"
325 find dists -type f -print0 |xargs --null sha256sum > "$TMP_WORK_DIR/$CHECKSUMS.new"
326 sort "$CHECKSUMS" > "$TMP_WORK_DIR/$CHECKSUMS.sorted"
327 sort "$TMP_WORK_DIR/$CHECKSUMS.new" > "$TMP_WORK_DIR/$CHECKSUMS.new.sorted"
328 if ! cmp --quiet "$TMP_WORK_DIR/$CHECKSUMS.sorted" "$TMP_WORK_DIR/$CHECKSUMS.new.sorted"; then
329         echo "Failed to compare the $CHECKSUMS, they are not identical!" >&2
330         diff -au "$TMP_WORK_DIR/$CHECKSUMS.sorted" "$TMP_WORK_DIR/$CHECKSUMS.new.sorted" >&2
331         exit 1
332 fi
333 cd "$OLDPWD"
334
335 # Get the list of valid packages (sorted, uniq)
336 for t in "$TESTING" "$UNSTABLE"; do
337         if [ ! -f "$PACKAGES_LISTS_DIR/$t" ]; then
338                 echo "Missing $PACKAGES_LISTS_DIR/$t" >&2
339                 exit 1
340         fi
341         cut -d' ' -f 1 "$PACKAGES_LISTS_DIR/$t" | sort -u > "$TMP_WORK_DIR/$t.pkgs"
342 done
343
344 /usr/bin/find "$dists_parent_dir" |
345 while read f; do
346         if   [ -d "$f" ]; then
347                 if ! is_dirname_okay "$f"; then
348                         echo "Wrong directory name: $f" >&2
349                         exit 1
350                 fi
351         elif [ -f "$f" ]; then
352                 # If $f is in $SPECIAL_FILES, we skip to the next loop because
353                 # we won't check it for format, fields and encoding.
354                 for sf in $SPECIAL_FILES; do
355                         if [ "$f" = "$dists_parent_dir/$sf" ]; then
356                                 continue 2
357                         fi
358                 done
359
360                 if ! is_filename_okay "$f"; then
361                         echo "Wrong file: $f" >&2
362                         exit 1
363                 fi
364
365                 # Check that all entries contains the right fields
366                 if ! has_valid_fields "$f"; then
367                         echo "File $f has an invalid format" >&2
368                         exit 1
369                 fi
370
371                 # Check that every packages in Translation-$lang exists
372                 TPKGS=$(basename "$f").pkgs
373                 grep "^Package: " "$f" | cut -d' ' -f 2 | sort -u > "$TMP_WORK_DIR/$TPKGS"
374                 case "$f" in
375                         */$TESTING/*)  t="$TESTING";;
376                         */$UNSTABLE/*) t="$UNSTABLE";;
377                 esac
378                 if diff "$TMP_WORK_DIR/$t.pkgs" "$TMP_WORK_DIR/$TPKGS" | grep -q "^>"; then
379                         diff -au "$TMP_WORK_DIR/$t.pkgs" "$TMP_WORK_DIR/$TPKGS" |grep "^+"
380                         echo "$f contains packages which are not in $t" >&2
381                         exit 1
382                 fi
383
384                 # Check encoding
385                 iconv -f utf-8 -t utf-8 < "$f" > /dev/null 2>&1 || {
386                         echo "$f is not an UTF-8 file" >&2
387                         exit 1
388                 }
389
390                 # We do not check if the md5 in Translation-$lang are
391                 # correct.
392
393                 if [ "$DRY_RUN" = "0" ]; then
394                         # Now generate the compressed files
395                         bzip2 "$f"
396                 fi
397         else
398                 echo "Neither a file or directory: $f" >&2
399                 exit 1
400         fi
401 done || false
402 # The while will just fail if an internal check "exit 1", but the script
403 # is not exited. "|| false" makes the script fail (and exit) in that case.
404
405 echo "$dists_parent_dir structure validated successfully ($(date +%c))"
406
407 # If we reach this point, everything went fine.
408 trap - EXIT
409 rm -rf "$TMP_WORK_DIR"
410