# HG changeset patch # User Aleksej Bobylev # Date 1481514971 -7200 # Node ID 47cacd72d7ceb13a277455b5d99c3268f2cec61f # Parent a3a05fc499d0d4ce80edf61880897357503aca9e cook: add normalize_mo(); update doc/cookopts.txt; tiny edits. diff -r a3a05fc499d0 -r 47cacd72d7ce cook --- a/cook Sun Dec 11 15:46:38 2016 +0100 +++ b/cook Mon Dec 12 05:56:11 2016 +0200 @@ -629,7 +629,7 @@ [ ! -d "$fs/usr/share/locale" ] && return [ -z "$(find $fs/usr/share/locale -type f -name '*.mo')" ] && return - action 'Stripping translations files...' + action 'Thin out translation files...' local size0=$(find $fs/usr/share/locale -type f -name '*.mo' -exec ls -l \{\} \; | awk '{s+=$5}END{print s}') local time0=$(date +%s) @@ -660,6 +660,136 @@ } +# Normalize all *.mo files: unconditionally convert to UTF-8; remove strings that are really not add +# translation (msgid = msgstr) +# Normalization can be disabled with COOKOPTS="!monorm" + +normalize_mo() { + [ "${COOKOPTS/!monorm/}" != "$COOKOPTS" ] && return + [ -z "$(find $install -type f -name '*.mo')" ] && return + + action 'Normalizing mo files...' + local size0=$(find $install -type f -name '*.mo' -exec ls -l \{\} \; | awk '{s+=$5}END{print s}') + local time0=$(date +%s) + + # Gettext functions: msgunfmt, msguniq, msgconv, msgfmt + tazpkg -gi gettext --quiet + # Gconv modules (convert to UTF-8) + tazpkg -gi glibc-locale --quiet + + # Process all existing *.mo files + for mo in $(find "$install" -type f -name '*.mo'); do + tmpfile="$(mktemp)" + + msgunfmt "$mo" | msguniq | msgconv -o "$tmpfile" -t 'UTF-8' + # add newline + echo >> "$tmpfile" + + # get Plural-Forms + awk ' + BEGIN { skip = ""; } + { + if (! skip) { + s = $0; + gsub(/^[^\"]*\"/, "", s); + gsub(/\"$/, "", s); + printf("%s", s); + } + if (! $0) skip = "yes"; + } + ' "$tmpfile" | sed 's|\\n|\n|g' | grep "^Plural-Forms:" > "$tmpfile.pf" + + if ! grep -q 'msgid_plural' "$tmpfile"; then + echo > "$tmpfile.pf" + fi + + # main + awk -v pf="$(cat "$tmpfile.pf")" ' + function clean() { + mode = msgctxt = msgid = msgid_plural = msgstr = msgstr0 = msgstr1 = msgstr2 = msgstr3 = msgstr4 = msgstr5 = ""; + } + + function getstring() { + # Skip unquoted words at the beginning (msgid, msgstr...) and get string from inside quotes + s = $0; + gsub(/^[^\"]*\"/, "", s); + gsub(/\"$/, "", s); + return s; + } + + BEGIN { + printf("msgid \"\"\nmsgstr \"\"\n\"Content-Type: text/plain; charset=UTF-8\\n\"\n"); + if (pf) + printf("\"%s\\n\"\n", pf); + printf("\n"); + skip = 1; + clean(); + } + + { + # Skip the entire header + if (!skip) { + if ($1 == "msgctxt" || $1 == "msgid" || $1 == "msgstr" || $1 == "msgid_plural") + mode = $1; + if ($1 == "msgstr[0]") mode = "msgstr0"; + if ($1 == "msgstr[1]") mode = "msgstr1"; + if ($1 == "msgstr[2]") mode = "msgstr2"; + if ($1 == "msgstr[3]") mode = "msgstr3"; + if ($1 == "msgstr[4]") mode = "msgstr4"; + if ($1 == "msgstr[5]") mode = "msgstr5"; + + if (mode == "msgctxt") msgctxt = msgctxt getstring(); + if (mode == "msgid") msgid = msgid getstring(); + if (mode == "msgstr") msgstr = msgstr getstring(); + if (mode == "msgid_plural") msgid_plural = msgid_plural getstring(); + if (mode == "msgstr0") msgstr0 = msgstr0 getstring(); + if (mode == "msgstr1") msgstr1 = msgstr1 getstring(); + if (mode == "msgstr2") msgstr2 = msgstr2 getstring(); + if (mode == "msgstr3") msgstr3 = msgstr3 getstring(); + if (mode == "msgstr4") msgstr4 = msgstr4 getstring(); + if (mode == "msgstr5") msgstr5 = msgstr5 getstring(); + + if (! $0) { + if (msgid != msgstr) { + if (msgctxt) printf("msgctxt \"%s\"\n", msgctxt); + printf("msgid \"%s\"\n", msgid); + if (msgid_plural) printf("msgid_plural \"%s\"\n", msgid_plural); + if (msgstr) printf("msgstr \"%s\"\n", msgstr); + if (msgstr0) printf("msgstr[0] \"%s\"\n", msgstr0); + if (msgstr1) printf("msgstr[1] \"%s\"\n", msgstr1); + if (msgstr2) printf("msgstr[2] \"%s\"\n", msgstr2); + if (msgstr3) printf("msgstr[3] \"%s\"\n", msgstr3); + if (msgstr4) printf("msgstr[4] \"%s\"\n", msgstr4); + if (msgstr5) printf("msgstr[5] \"%s\"\n", msgstr5); + printf("\n"); + } + clean(); + } + } + if ($0 == "") skip = ""; + } + ' "$tmpfile" > "$tmpfile.awk" + + msgfmt "$tmpfile.awk" -o "$tmpfile.mo" + + if [ -s "$tmpfile.mo" ]; then + rm "$mo"; mv "$tmpfile.mo" "$mo" + else + _ 'Error processing %s' "$mo" + [ -e "$tmpfile.mo" ] && rm "$tmpfile.mo" + fi + + # Clean + rm "$tmpfile" "$tmpfile.pf" "$tmpfile.awk" + done + + local size1=$(find $install -type f -name '*.mo' -exec ls -l \{\} \; | awk '{s+=$5}END{print s}') + local time1=$(date +%s) + status + comp_summary "$time0" "$time1" "$size0" "$size1" +} + + # Update installed.cook.diff update_installed_cook_diff() { @@ -740,7 +870,7 @@ # Function used after compile_rules() to compress all png images # Compressing can be disabled with COOKOPTS="!pngz" -cook_compress_png() { +compress_png() { [ "${COOKOPTS/!pngz/}" != "$COOKOPTS" ] && return case "$ARCH" in arm*) return;; # While SliTaz-arm miss `pngquant` and `optipng` @@ -777,7 +907,7 @@ # Function used after compile_rules() to compress all svg images # Compressing can be disabled with COOKOPTS="!svgz" -cook_compress_svg() { +compress_svg() { [ "${COOKOPTS/!svgz/}" != "$COOKOPTS" ] && return case "$ARCH" in arm*) return;; # While SliTaz-arm miss `svgcleaner` @@ -1053,10 +1183,11 @@ if [ -z "$WANTED" ]; then footer compress_manpages - cook_compress_png - cook_compress_svg + compress_png + compress_svg compress_ui fix_desktop_files + normalize_mo fi footer diff -r a3a05fc499d0 -r 47cacd72d7ce doc/cookopts.txt --- a/doc/cookopts.txt Sun Dec 11 15:46:38 2016 +0100 +++ b/doc/cookopts.txt Mon Dec 12 05:56:11 2016 +0200 @@ -37,15 +37,15 @@ *.mo files. The presence of this option overrides the default action (all existing *.mo files will remain). - Please note, you can add all the translations to the package, for example, using - the command: `cook_copy_files *.mo` (then by default only the supported locales - will be left). + Please note, you can add all the translations to the package, for example, + using the command: `cook_copy_files *.mo` (then by default only the + supported locales will be left). !extradesktops - Default action is to remove extra information from the desktop files: entries - such as 'GenericName', 'X-GNOME-FullName' (all X-* are exposed), - 'Terminal=false', Keywords and other sections. This extra information just isn't - supported in the current (LXDE) environment. + Default action is to remove extra information from the desktop files: + entries such as 'GenericName', 'X-GNOME-FullName' (all X-* are exposed), + 'Terminal=false', Keywords and other sections. This extra information just + isn't supported in the current (LXDE) environment. The presence of this option overrides the default action (extra information will not be removed from the .desktop files). @@ -60,8 +60,8 @@ !manz Default action is to compress all man pages. The presence of this option overrides the default action (all man pages will - be left "as is"). Please note that these files may be installed in a compressed - format. + be left "as is"). Please note that these files may be installed in a + compressed format. !pngz Default action is to compress all PNG images. Image compression allows you @@ -87,9 +87,9 @@ op0 to op8 Default action is to use optimization level 2 while processing PNG images by `optipng`. Using this option you can set the desired `optipng` optimization - level. The higher the level, the slower the compression and the smaller the file - size. Please note that this option has no exclamation mark, because it means - "don't do", and here, on the contrary, "to do". + level. The higher the level, the slower the compression and the smaller the + file size. Please note that this option has no exclamation mark, because it + means "don't do", and here, on the contrary, "to do". !svgz Default action is to compress all SVG images. @@ -101,3 +101,11 @@ of insignificant spaces and comments. The presence of this option overrides the default action (all *.ui and *.glade files will be left "as is"). + +!monorm + Default action is to normalize *.mo files. Normalization means the + unconditional conversion to UTF-8, removal of accidental duplicates, the + most of the header lines as well as catalog entries that are not added the + translation (when string in msgid equals to string in msgstr). + The presence of this option overrides the default action (all *.mo files + will be left "as is").