Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | # Copyright 2023 The Chromium Authors |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | """ |
| 6 | Updates .filelist files using data from corresponding .globlist files (or |
| 7 | checks whether they are up to date). |
| 8 | |
| 9 | bundle_data targets require an explicit source list, but maintaining these large |
| 10 | lists can be cumbersome. This script aims to simplify the process of updating |
| 11 | these lists by either expanding globs to update file lists or check that an |
| 12 | existing file list matches such an expansion (i.e., checking during presubmit). |
| 13 | |
| 14 | The .globlist file contains a list of globs that will be expanded to either |
| 15 | compare or replace a corresponding .filelist. It is possible to exclude items |
| 16 | from the file list with globs as well. These lines are prefixed with '-' and are |
| 17 | processed in order, so be sure that exclusions succeed inclusions in the list of |
| 18 | globs. Comments and empty lines are permitted in .globfiles; comments are |
| 19 | prefixed with '#'. |
| 20 | |
| 21 | By convention, the base name of the .globlist and .filelist files matches the |
| 22 | label of their corresponding bundle_data from the .gn file. In order to ensure |
| 23 | that these filelists don't get stale, there should also be a PRESUBMIT.py |
| 24 | which uses this script to check that list is up to date. |
| 25 | |
| 26 | By default, the script will update the file list to match the expanded globs. |
| 27 | """ |
| 28 | |
| 29 | import argparse |
| 30 | import datetime |
| 31 | import difflib |
| 32 | import glob |
| 33 | import os.path |
| 34 | import re |
Ian Vollick | 4ff0174 | 2023-02-14 12:50:03 | [diff] [blame] | 35 | import subprocess |
Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 36 | import sys |
| 37 | |
| 38 | # Character to set colors in terminal. Taken, along with the printing routine |
| 39 | # below, from update_deps.py. |
| 40 | TERMINAL_ERROR_COLOR = '\033[91m' |
| 41 | TERMINAL_RESET_COLOR = '\033[0m' |
| 42 | |
| 43 | _HEADER = """# Copyright %d The Chromium Authors |
| 44 | # Use of this source code is governed by a BSD-style license that can be |
| 45 | # found in the LICENSE file. |
| 46 | # NOTE: this file is generated by build/ios/update_bundle_filelist.py |
| 47 | # If it requires updating, you should get a presubmit error with |
| 48 | # instructions on how to regenerate. Otherwise, do not edit. |
| 49 | """ % (datetime.datetime.now().year) |
| 50 | |
| 51 | _HEADER_PATTERN = re.compile(r"""# Copyright [0-9]+ The Chromium Authors |
| 52 | # Use of this source code is governed by a BSD-style license that can be |
| 53 | # found in the LICENSE file. |
| 54 | # NOTE: this file is generated by build/ios/update_bundle_filelist.py |
| 55 | # If it requires updating, you should get a presubmit error with |
| 56 | # instructions on how to regenerate. Otherwise, do not edit. |
| 57 | """) |
| 58 | |
| 59 | _HEADER_HEIGHT = 6 |
| 60 | |
Ian Vollick | 63109f69 | 2023-03-02 03:24:17 | [diff] [blame] | 61 | _START_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR = '# push(ignore-relative)' |
| 62 | _STOP_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR = '# pop(ignore-relative)' |
| 63 | |
Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 64 | |
| 65 | def parse_filelist(filelist_name): |
| 66 | try: |
| 67 | with open(filelist_name) as filelist: |
| 68 | unfiltered = [l for l in filelist] |
| 69 | header = ''.join(unfiltered[:_HEADER_HEIGHT]) |
| 70 | files = sorted(l.strip() for l in unfiltered[_HEADER_HEIGHT:]) |
| 71 | return (files, header) |
| 72 | except Exception as e: |
| 73 | print_error(f'Could not read file list: {filelist_name}', f'{type(e)}: {e}') |
| 74 | return [] |
| 75 | |
| 76 | |
Ian Vollick | 4ff0174 | 2023-02-14 12:50:03 | [diff] [blame] | 77 | def get_git_command_name(): |
| 78 | if sys.platform.startswith('win'): |
| 79 | return 'git.bat' |
| 80 | return 'git' |
| 81 | |
| 82 | |
| 83 | def get_tracked_files(directory, globroot, repository_root_relative, verbose): |
| 84 | try: |
Riley Wong | be69cdaf | 2024-04-24 23:00:46 | [diff] [blame] | 85 | if os.getcwd().startswith('/google/cog/cloud'): |
| 86 | files = [] |
| 87 | for root, _, filenames in os.walk(directory): |
| 88 | files.extend([os.path.join(root, f) for f in filenames]) |
| 89 | return set(files) |
| 90 | cmd = [get_git_command_name(), 'ls-files', '--error-unmatch', directory] |
| 91 | with subprocess.Popen(cmd, |
Ian Vollick | 4ff0174 | 2023-02-14 12:50:03 | [diff] [blame] | 92 | stdout=subprocess.PIPE, |
| 93 | stderr=subprocess.PIPE, |
| 94 | cwd=globroot) as p: |
| 95 | output = p.communicate() |
| 96 | if p.returncode != 0: |
| 97 | if verbose: |
| 98 | print_error( |
| 99 | f'Could not gather a list of tracked files in {directory}', |
| 100 | f'{output[1]}') |
| 101 | return set() |
| 102 | |
| 103 | files = [f.decode('utf-8') for f in output[0].splitlines()] |
| 104 | |
| 105 | # Need paths to be relative to directory in order to match expansions. |
| 106 | # This should happen naturally due to cwd above, but we need to take |
| 107 | # special care if relative to the repository root. |
| 108 | if repository_root_relative: |
| 109 | files = ['//' + f for f in files] |
| 110 | |
| 111 | # Handle Windows backslashes |
| 112 | files = [f.replace('\\', '/') for f in files] |
| 113 | |
| 114 | return set(files) |
| 115 | |
| 116 | except Exception as e: |
| 117 | if verbose: |
| 118 | print_error(f'Could not gather a list of tracked files in {directory}', |
| 119 | f'{type(e)}: {e}') |
| 120 | return set() |
| 121 | |
| 122 | |
| 123 | def combine_potentially_repository_root_relative_paths(a, b): |
| 124 | if b.startswith('//'): |
| 125 | # If b is relative to the repository root, os.path will consider it absolute |
| 126 | # and os.path.join will fail. In this case, we can simply concatenate the |
| 127 | # paths. |
| 128 | return (a + b, True) |
| 129 | else: |
| 130 | return (os.path.join(a, b), False) |
| 131 | |
| 132 | |
Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 133 | def parse_and_expand_globlist(globlist_name, glob_root): |
Ian Vollick | 63109f69 | 2023-03-02 03:24:17 | [diff] [blame] | 134 | # The following expects glob_root not to end in a trailing slash. |
| 135 | if glob_root.endswith('/'): |
| 136 | glob_root = glob_root[:-1] |
Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 137 | |
Ian Vollick | 63109f69 | 2023-03-02 03:24:17 | [diff] [blame] | 138 | check_expansions_outside_globlist_dir = True |
| 139 | globlist_dir = os.path.dirname(globlist_name) |
Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 140 | |
Ian Vollick | 63109f69 | 2023-03-02 03:24:17 | [diff] [blame] | 141 | with open(globlist_name) as globlist: |
| 142 | # Paths in |files| and |to_check| must use unix separators. Using a set |
| 143 | # ensures no unwanted duplicates. The files in |to_check| must be in the |
| 144 | # globroot or a subdirectory. |
| 145 | files = set() |
| 146 | to_check = set() |
| 147 | for g in globlist: |
| 148 | g = g.strip() |
Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 149 | |
Ian Vollick | 63109f69 | 2023-03-02 03:24:17 | [diff] [blame] | 150 | # Ignore blank lines |
| 151 | if not g: |
| 152 | continue |
Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 153 | |
Ian Vollick | 63109f69 | 2023-03-02 03:24:17 | [diff] [blame] | 154 | # Toggle error checking. |
| 155 | if g == _START_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR: |
| 156 | check_expansions_outside_globlist_dir = False |
| 157 | elif g == _STOP_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR: |
| 158 | check_expansions_outside_globlist_dir = True |
Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 159 | |
Ian Vollick | 63109f69 | 2023-03-02 03:24:17 | [diff] [blame] | 160 | # Ignore comments. |
| 161 | if not g or g.startswith('#'): |
| 162 | continue |
Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 163 | |
Ian Vollick | 63109f69 | 2023-03-02 03:24:17 | [diff] [blame] | 164 | # Exclusions are prefixed with '-'. |
| 165 | is_exclusion = g.startswith('-') |
| 166 | if is_exclusion: |
| 167 | g = g[1:] |
Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 168 | |
Ian Vollick | 63109f69 | 2023-03-02 03:24:17 | [diff] [blame] | 169 | (combined, |
| 170 | root_relative) = combine_potentially_repository_root_relative_paths( |
| 171 | glob_root, g) |
Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 172 | |
Ian Vollick | 63109f69 | 2023-03-02 03:24:17 | [diff] [blame] | 173 | prefix_size = len(glob_root) |
| 174 | if not root_relative: |
| 175 | # We need to account for the separator. |
| 176 | prefix_size += 1 |
Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 177 | |
Ian Vollick | 63109f69 | 2023-03-02 03:24:17 | [diff] [blame] | 178 | expansion = glob.glob(combined, recursive=True) |
Ian Vollick | 4ff0174 | 2023-02-14 12:50:03 | [diff] [blame] | 179 | |
Ian Vollick | 63109f69 | 2023-03-02 03:24:17 | [diff] [blame] | 180 | # Filter out directories. |
| 181 | expansion = [f for f in expansion if os.path.isfile(f)] |
Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 182 | |
Ian Vollick | 63109f69 | 2023-03-02 03:24:17 | [diff] [blame] | 183 | if check_expansions_outside_globlist_dir: |
| 184 | for f in expansion: |
| 185 | relative = os.path.relpath(f, globlist_dir) |
| 186 | if relative.startswith('..'): |
| 187 | raise Exception(f'Globlist expansion outside globlist dir: {f}') |
Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 188 | |
Ian Vollick | 63109f69 | 2023-03-02 03:24:17 | [diff] [blame] | 189 | # Make relative to |glob_root|. |
| 190 | expansion = [f[prefix_size:] for f in expansion] |
| 191 | |
| 192 | # Handle Windows backslashes |
| 193 | expansion = [f.replace('\\', '/') for f in expansion] |
| 194 | |
| 195 | # Since paths in |expansion| only use unix separators, it is safe to |
| 196 | # compare for both the purpose of exclusion and addition. |
| 197 | if is_exclusion: |
| 198 | files = files.difference(expansion) |
| 199 | else: |
| 200 | files = files.union(expansion) |
| 201 | |
| 202 | # Return a sorted list. |
| 203 | return sorted(files) |
Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 204 | |
| 205 | |
Ian Vollick | 4ff0174 | 2023-02-14 12:50:03 | [diff] [blame] | 206 | def compare_lists(a, b): |
Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 207 | differ = difflib.Differ() |
| 208 | full_diff = differ.compare(a, b) |
Ian Vollick | 4ff0174 | 2023-02-14 12:50:03 | [diff] [blame] | 209 | lines = [d for d in full_diff if not d.startswith(' ')] |
| 210 | additions = [l[2:] for l in lines if l.startswith('+ ')] |
| 211 | removals = [l[2:] for l in lines if l.startswith('- ')] |
| 212 | return (additions, removals) |
Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 213 | |
| 214 | |
| 215 | def write_filelist(filelist_name, files, header): |
| 216 | try: |
Bruce Dawson | 29955b0 | 2023-02-04 00:03:48 | [diff] [blame] | 217 | with open(filelist_name, 'w', encoding='utf-8', newline='') as filelist: |
Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 218 | if not _HEADER_PATTERN.search(header): |
| 219 | header = _HEADER |
| 220 | filelist.write(header) |
| 221 | for file in files: |
| 222 | filelist.write(f'{file}\n') |
| 223 | except Exception as e: |
| 224 | print_error(f'Could not write file list: {filelist_name}', |
| 225 | f'{type(e)}: {e}') |
| 226 | return [] |
| 227 | |
| 228 | |
| 229 | def process_filelist(filelist, globlist, globroot, check=False, verbose=False): |
Ian Vollick | 63109f69 | 2023-03-02 03:24:17 | [diff] [blame] | 230 | files_from_globlist = [] |
| 231 | try: |
| 232 | files_from_globlist = parse_and_expand_globlist(globlist, globroot) |
| 233 | except Exception as e: |
| 234 | if verbose: |
| 235 | print_error(f'Could not read glob list: {globlist}', f'{type(e)}: {e}') |
| 236 | return 1 |
| 237 | |
Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 238 | (files, header) = parse_filelist(filelist) |
Ian Vollick | 4ff0174 | 2023-02-14 12:50:03 | [diff] [blame] | 239 | |
| 240 | (additions, removals) = compare_lists(files, files_from_globlist) |
| 241 | to_ignore = set() |
| 242 | |
| 243 | # Ignore additions of untracked files. |
| 244 | if additions: |
| 245 | directories = set([os.path.dirname(f) for f in additions]) |
| 246 | tracked_files = set() |
| 247 | for d in directories: |
| 248 | (combined, |
| 249 | root_relative) = combine_potentially_repository_root_relative_paths( |
| 250 | globroot, d) |
| 251 | relative = os.path.relpath(combined, globroot) |
| 252 | tracked_files = tracked_files.union( |
| 253 | get_tracked_files(relative, globroot, root_relative, verbose)) |
| 254 | to_ignore = set(additions).difference(tracked_files) |
| 255 | additions = [f for f in additions if f in tracked_files] |
| 256 | |
| 257 | files_from_globlist = [f for f in files_from_globlist if f not in to_ignore] |
| 258 | |
Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 259 | if check: |
| 260 | if not _HEADER_PATTERN.search(header): |
| 261 | if verbose: |
| 262 | print_error(f'Unexpected header for {filelist}', f'{header}') |
| 263 | return 1 |
Ian Vollick | 4ff0174 | 2023-02-14 12:50:03 | [diff] [blame] | 264 | if not additions and not removals: |
Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 265 | return 0 |
Ian Vollick | 4ff0174 | 2023-02-14 12:50:03 | [diff] [blame] | 266 | if verbose: |
| 267 | pretty_additions = ['+ ' + f for f in additions] |
| 268 | pretty_removals = ['- ' + f for f in removals] |
| 269 | pretty_diff = '\n'.join(pretty_additions + pretty_removals) |
| 270 | print_error('File list does not match glob expansion', f'{pretty_diff}') |
Ian Vollick | 00424e5 | 2023-02-03 10:35:50 | [diff] [blame] | 271 | return 1 |
| 272 | else: |
| 273 | write_filelist(filelist, files_from_globlist, header) |
| 274 | return 0 |
| 275 | |
| 276 | |
| 277 | def main(args): |
| 278 | parser = argparse.ArgumentParser( |
| 279 | description=__doc__, formatter_class=argparse.RawTextHelpFormatter) |
| 280 | parser.add_argument('filelist', help='Contains one file per line') |
| 281 | parser.add_argument('globlist', |
| 282 | help='Contains globs that, when expanded, ' |
| 283 | 'should match the filelist. Use ' |
| 284 | '--help for details on syntax') |
| 285 | parser.add_argument('globroot', |
| 286 | help='Directory from which globs are relative') |
| 287 | parser.add_argument('-c', |
| 288 | '--check', |
| 289 | action='store_true', |
| 290 | help='Prevents modifying the file list') |
| 291 | parser.add_argument('-v', |
| 292 | '--verbose', |
| 293 | action='store_true', |
| 294 | help='Use this to print details on differences') |
| 295 | args = parser.parse_args() |
| 296 | return process_filelist(args.filelist, |
| 297 | args.globlist, |
| 298 | args.globroot, |
| 299 | check=args.check, |
| 300 | verbose=args.verbose) |
| 301 | |
| 302 | |
| 303 | def print_error(error_message, error_info): |
| 304 | """ Print the `error_message` with additional `error_info` """ |
| 305 | color_start, color_end = adapted_color_for_output(TERMINAL_ERROR_COLOR, |
| 306 | TERMINAL_RESET_COLOR) |
| 307 | |
| 308 | error_message = color_start + 'ERROR: ' + error_message + color_end |
| 309 | if len(error_info) > 0: |
| 310 | error_message = error_message + '\n' + error_info |
| 311 | print(error_message, file=sys.stderr) |
| 312 | |
| 313 | |
| 314 | def adapted_color_for_output(color_start, color_end): |
| 315 | """ Returns a the `color_start`, `color_end` tuple if the output is a |
| 316 | terminal, or empty strings otherwise """ |
| 317 | if not sys.stdout.isatty(): |
| 318 | return '', '' |
| 319 | return color_start, color_end |
| 320 | |
| 321 | |
| 322 | if __name__ == '__main__': |
| 323 | sys.exit(main(sys.argv[1:])) |