Mustafa Emre Acer | ac34055c | 2018-03-08 22:07:44 | [diff] [blame] | 1 | # Copyright 2018 The Chromium Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
| 5 | """Helpers for dealing with translation files.""" |
| 6 | |
Raul Tambre | ca9124e4 | 2019-09-27 04:13:35 | [diff] [blame] | 7 | from __future__ import print_function |
| 8 | |
Mustafa Emre Acer | ac34055c | 2018-03-08 22:07:44 | [diff] [blame] | 9 | import ast |
| 10 | import os |
| 11 | import re |
| 12 | import xml.etree.cElementTree as ElementTree |
| 13 | |
| 14 | |
| 15 | class GRDFile(object): |
| 16 | """Class representing a grd xml file. |
| 17 | |
| 18 | Attributes: |
| 19 | path: the path to the grd file. |
| 20 | dir: the path to the the grd's parent directery. |
| 21 | name: the base name of the grd file. |
| 22 | grdp_paths: the list of grdp files included in the grd via <part>. |
| 23 | structure_paths: the paths of any <structure> elements in the grd file. |
| 24 | xtb_paths: the xtb paths where the grd's translations live. |
| 25 | lang_to_xtb_path: maps each language to the xtb path for that language. |
| 26 | appears_translatable: whether the contents of the grd indicate that it's |
| 27 | supposed to be translated. |
| 28 | expected_languages: the languages that this grd is expected to have |
| 29 | translations for, based on the translation expectations file. |
| 30 | """ |
| 31 | |
| 32 | def __init__(self, path): |
| 33 | self.path = path |
| 34 | self.dir, self.name = os.path.split(path) |
| 35 | dom, self.grdp_paths = _parse_grd_file(path) |
| 36 | self.structure_paths = [os.path.join(self.dir, s.get('file')) |
| 37 | for s in dom.findall('.//structure')] |
| 38 | self.xtb_paths = [os.path.join(self.dir, f.get('path')) |
| 39 | for f in dom.findall('.//file')] |
| 40 | self.lang_to_xtb_path = {} |
| 41 | self.appears_translatable = (len(self.xtb_paths) != 0 or |
| 42 | dom.find('.//message') is not None) |
| 43 | self.expected_languages = None |
| 44 | |
| 45 | def _populate_lang_to_xtb_path(self, errors): |
| 46 | """Populates the lang_to_xtb_path attribute.""" |
| 47 | grd_root = os.path.splitext(self.name)[0] |
| 48 | lang_pattern = re.compile(r'%s_([^_]+)\.xtb$' % re.escape(grd_root)) |
| 49 | for xtb_path in self.xtb_paths: |
| 50 | xtb_basename = os.path.basename(xtb_path) |
| 51 | xtb_lang_match = re.match(lang_pattern, xtb_basename) |
| 52 | if not xtb_lang_match: |
| 53 | errors.append('%s: invalid xtb name: %s. xtb name must be %s_<lang>' |
| 54 | '.xtb where <lang> is the language code.' % |
| 55 | (self.name, xtb_basename, grd_root)) |
| 56 | continue |
| 57 | xtb_lang = xtb_lang_match.group(1) |
| 58 | if xtb_lang in self.lang_to_xtb_path: |
| 59 | errors.append('%s: %s is listed twice' % (self.name, xtb_basename)) |
| 60 | continue |
| 61 | self.lang_to_xtb_path[xtb_lang] = xtb_path |
| 62 | |
| 63 | return errors |
| 64 | |
| 65 | |
| 66 | def get_translatable_grds(repo_root, all_grd_paths, |
| 67 | translation_expectations_path): |
| 68 | """Returns all the grds that should be translated as a list of GRDFiles. |
| 69 | |
| 70 | This verifies that every grd file that appears translatable is listed in |
| 71 | the translation expectations, and that every grd in the translation |
| 72 | expectations actually exists. |
| 73 | |
| 74 | Args: |
| 75 | repo_root: The path to the root of the repository. |
| 76 | all_grd_paths: All grd paths in the repository relative to repo_root. |
| 77 | translation_expectations_path: The path to the translation expectations |
| 78 | file, which specifies which grds to translate and into which languages. |
| 79 | """ |
Bruno Santos | ab8144d | 2019-05-07 18:54:41 | [diff] [blame] | 80 | parsed_expectations = _parse_translation_expectations( |
Mustafa Emre Acer | ac34055c | 2018-03-08 22:07:44 | [diff] [blame] | 81 | translation_expectations_path) |
Bruno Santos | ab8144d | 2019-05-07 18:54:41 | [diff] [blame] | 82 | grd_to_langs, untranslated_grds, internal_grds = parsed_expectations |
Mustafa Emre Acer | ac34055c | 2018-03-08 22:07:44 | [diff] [blame] | 83 | |
Bruno Santos | ab8144d | 2019-05-07 18:54:41 | [diff] [blame] | 84 | errors = [] |
| 85 | # Make sure that grds in internal_grds aren't processed, since they might |
| 86 | # contain pieces not available publicly. |
| 87 | for internal_grd in internal_grds: |
| 88 | try: |
| 89 | all_grd_paths.remove(internal_grd) |
| 90 | except ValueError: |
| 91 | errors.append( |
| 92 | '%s is listed in translation expectations as an internal file to be ' |
| 93 | 'ignored, but this grd file does not exist.' % internal_grd) |
Mustafa Emre Acer | ac34055c | 2018-03-08 22:07:44 | [diff] [blame] | 94 | # Check that every grd that appears translatable is listed in |
| 95 | # the translation expectations. |
| 96 | grds_with_expectations = set(grd_to_langs.keys()).union(untranslated_grds) |
| 97 | all_grds = {p: GRDFile(os.path.join(repo_root, p)) for p in all_grd_paths} |
Kyle Horimoto | abee50a | 2021-05-17 04:04:41 | [diff] [blame] | 98 | for path, grd in all_grds.iteritems(): |
Mustafa Emre Acer | ac34055c | 2018-03-08 22:07:44 | [diff] [blame] | 99 | if grd.appears_translatable: |
| 100 | if path not in grds_with_expectations: |
| 101 | errors.append('%s appears to be translatable (because it contains ' |
| 102 | '<file> or <message> elements), but is not listed in the ' |
| 103 | 'translation expectations.' % path) |
| 104 | |
| 105 | # Check that every file in translation_expectations exists. |
| 106 | for path in grds_with_expectations: |
| 107 | if path not in all_grd_paths: |
| 108 | errors.append('%s is listed in the translation expectations, but this ' |
| 109 | 'grd file does not exist.' % path) |
| 110 | |
| 111 | if errors: |
| 112 | raise Exception('%s needs to be updated. Please fix these issues:\n - %s' % |
| 113 | (translation_expectations_path, '\n - '.join(errors))) |
| 114 | |
| 115 | translatable_grds = [] |
Kyle Horimoto | abee50a | 2021-05-17 04:04:41 | [diff] [blame] | 116 | for path, expected_languages_list in grd_to_langs.iteritems(): |
Mustafa Emre Acer | ac34055c | 2018-03-08 22:07:44 | [diff] [blame] | 117 | grd = all_grds[path] |
| 118 | grd.expected_languages = expected_languages_list |
| 119 | grd._populate_lang_to_xtb_path(errors) |
| 120 | translatable_grds.append(grd) |
| 121 | |
| 122 | # Ensure each grd lists the expected languages. |
| 123 | expected_languages = set(expected_languages_list) |
| 124 | actual_languages = set(grd.lang_to_xtb_path.keys()) |
| 125 | if expected_languages.difference(actual_languages): |
| 126 | errors.append('%s: missing translations for these languages: %s. Add ' |
| 127 | '<file> and <output> elements to the grd file, or update ' |
| 128 | 'the translation expectations.' % (grd.name, |
| 129 | sorted(expected_languages.difference(actual_languages)))) |
| 130 | if actual_languages.difference(expected_languages): |
| 131 | errors.append('%s: references translations for unexpected languages: %s. ' |
| 132 | 'Remove the offending <file> and <output> elements from the' |
| 133 | ' grd file, or update the translation expectations.' |
| 134 | % (grd.name, |
| 135 | sorted(actual_languages.difference(expected_languages)))) |
| 136 | |
| 137 | if errors: |
| 138 | raise Exception('Please fix these issues:\n - %s' % |
| 139 | ('\n - '.join(errors))) |
| 140 | |
| 141 | return translatable_grds |
| 142 | |
| 143 | |
| 144 | def _parse_grd_file(grd_path): |
| 145 | """Reads a grd(p) file and any subfiles included via <part file="..." />. |
| 146 | |
| 147 | Args: |
| 148 | grd_path: The path of the .grd or .grdp file. |
| 149 | Returns: |
| 150 | A tuple (grd_dom, grdp_paths). dom is an ElementTree DOM for the grd file, |
| 151 | with the <part> elements inlined. grdp_paths is the list of grdp files that |
| 152 | were included via <part> elements. |
| 153 | """ |
| 154 | grdp_paths = [] |
| 155 | grd_dom = ElementTree.parse(grd_path) |
| 156 | # We modify grd in the loop, so listify this iterable to be safe. |
| 157 | part_nodes = list(grd_dom.findall('.//part')) |
| 158 | for part_node in part_nodes: |
| 159 | grdp_rel_path = part_node.get('file') |
| 160 | grdp_path = os.path.join(os.path.dirname(grd_path), grdp_rel_path) |
| 161 | grdp_paths.append(grdp_path) |
Josiah K | 8c97ce7 | 2020-09-03 16:58:23 | [diff] [blame] | 162 | grdp_dom, grdp_grdp_paths = _parse_grd_file(grdp_path) |
| 163 | grdp_paths.extend(grdp_grdp_paths) |
Mustafa Emre Acer | ac34055c | 2018-03-08 22:07:44 | [diff] [blame] | 164 | part_node.append(grdp_dom.getroot()) |
| 165 | return grd_dom, grdp_paths |
| 166 | |
| 167 | |
| 168 | def _parse_translation_expectations(path): |
| 169 | """Parses a translations expectations file. |
| 170 | |
| 171 | Example translations expectations file: |
| 172 | { |
| 173 | "desktop_grds": { |
| 174 | "languages": ["es", "fr"], |
| 175 | "files": [ |
| 176 | "ash/ash_strings.grd", |
| 177 | "ui/strings/ui_strings.grd", |
| 178 | ], |
| 179 | }, |
| 180 | "android_grds": { |
| 181 | "languages": ["de", "pt-BR"], |
| 182 | "files": [ |
| 183 | "chrome/android/android_chrome_strings.grd", |
Bruno Santos | ab8144d | 2019-05-07 18:54:41 | [diff] [blame] | 184 | ], |
Mustafa Emre Acer | ac34055c | 2018-03-08 22:07:44 | [diff] [blame] | 185 | }, |
| 186 | "untranslated_grds": { |
| 187 | "chrome/locale_settings.grd": "Not UI strings; localized separately", |
| 188 | "chrome/locale_settings_mac.grd": "Not UI strings; localized separately", |
| 189 | }, |
Bruno Santos | ab8144d | 2019-05-07 18:54:41 | [diff] [blame] | 190 | "internal_grds": [ |
| 191 | "chrome/internal.grd", |
| 192 | ], |
Mustafa Emre Acer | ac34055c | 2018-03-08 22:07:44 | [diff] [blame] | 193 | } |
| 194 | |
| 195 | Returns: |
Bruno Santos | ab8144d | 2019-05-07 18:54:41 | [diff] [blame] | 196 | A tuple (grd_to_langs, untranslated_grds, internal_grds). |
| 197 | grd_to_langs maps each grd path to the list of languages into which |
| 198 | that grd should be translated. untranslated_grds is a list of grds |
| 199 | that "appear translatable" but should not be translated. |
| 200 | internal_grds is a list of grds that are internal only and should |
| 201 | not be read by this helper (since they might contain parts not |
| 202 | available publicly). |
Mustafa Emre Acer | ac34055c | 2018-03-08 22:07:44 | [diff] [blame] | 203 | """ |
| 204 | with open(path) as f: |
| 205 | file_contents = f.read() |
| 206 | |
| 207 | def assert_list_of_strings(l, name): |
| 208 | assert isinstance(l, list) and all(isinstance(s, basestring) for s in l), ( |
| 209 | '%s must be a list of strings' % name) |
| 210 | |
| 211 | try: |
| 212 | translations_expectations = ast.literal_eval(file_contents) |
| 213 | assert isinstance(translations_expectations, dict), ( |
| 214 | '%s must be a python dict' % path) |
| 215 | |
| 216 | grd_to_langs = {} |
| 217 | untranslated_grds = [] |
Bruno Santos | ab8144d | 2019-05-07 18:54:41 | [diff] [blame] | 218 | internal_grds = [] |
Mustafa Emre Acer | ac34055c | 2018-03-08 22:07:44 | [diff] [blame] | 219 | |
meacer | ff8a9b6 | 2019-12-10 19:43:58 | [diff] [blame] | 220 | for group_name, settings in translations_expectations.items(): |
Mustafa Emre Acer | ac34055c | 2018-03-08 22:07:44 | [diff] [blame] | 221 | if group_name == 'untranslated_grds': |
| 222 | untranslated_grds = list(settings.keys()) |
| 223 | assert_list_of_strings(untranslated_grds, 'untranslated_grds') |
| 224 | continue |
| 225 | |
Bruno Santos | ab8144d | 2019-05-07 18:54:41 | [diff] [blame] | 226 | if group_name == 'internal_grds': |
| 227 | internal_grds = settings |
| 228 | assert_list_of_strings(internal_grds, 'internal_grds') |
| 229 | continue |
| 230 | |
Mustafa Emre Acer | ac34055c | 2018-03-08 22:07:44 | [diff] [blame] | 231 | languages = settings['languages'] |
| 232 | files = settings['files'] |
| 233 | assert_list_of_strings(languages, group_name + '.languages') |
| 234 | assert_list_of_strings(files, group_name + '.files') |
| 235 | for grd in files: |
| 236 | grd_to_langs[grd] = languages |
| 237 | |
Bruno Santos | ab8144d | 2019-05-07 18:54:41 | [diff] [blame] | 238 | return grd_to_langs, untranslated_grds, internal_grds |
Mustafa Emre Acer | ac34055c | 2018-03-08 22:07:44 | [diff] [blame] | 239 | |
| 240 | except Exception: |
Raul Tambre | ca9124e4 | 2019-09-27 04:13:35 | [diff] [blame] | 241 | print('Error: failed to parse', path) |
Mustafa Emre Acer | ac34055c | 2018-03-08 22:07:44 | [diff] [blame] | 242 | raise |