blob: f8d8f12cc31be1a3c47049fc1d5a00a284e2f5b8 [file] [log] [blame]
Ian Vollick00424e52023-02-03 10:35:501#!/usr/bin/env python3
2# Copyright 2023 The Chromium Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""
6Updates .filelist files using data from corresponding .globlist files (or
7checks whether they are up to date).
8
9bundle_data targets require an explicit source list, but maintaining these large
10lists can be cumbersome. This script aims to simplify the process of updating
11these lists by either expanding globs to update file lists or check that an
12existing file list matches such an expansion (i.e., checking during presubmit).
13
14The .globlist file contains a list of globs that will be expanded to either
15compare or replace a corresponding .filelist. It is possible to exclude items
16from the file list with globs as well. These lines are prefixed with '-' and are
17processed in order, so be sure that exclusions succeed inclusions in the list of
18globs. Comments and empty lines are permitted in .globfiles; comments are
19prefixed with '#'.
20
21By convention, the base name of the .globlist and .filelist files matches the
22label of their corresponding bundle_data from the .gn file. In order to ensure
23that these filelists don't get stale, there should also be a PRESUBMIT.py
24which uses this script to check that list is up to date.
25
26By default, the script will update the file list to match the expanded globs.
27"""
28
29import argparse
30import datetime
31import difflib
32import glob
33import os.path
34import re
Ian Vollick4ff01742023-02-14 12:50:0335import subprocess
Ian Vollick00424e52023-02-03 10:35:5036import sys
37
38# Character to set colors in terminal. Taken, along with the printing routine
39# below, from update_deps.py.
40TERMINAL_ERROR_COLOR = '\033[91m'
41TERMINAL_RESET_COLOR = '\033[0m'
42
43_HEADER = """# Copyright %d The Chromium Authors
44# Use of this source code is governed by a BSD-style license that can be
45# found in the LICENSE file.
46# NOTE: this file is generated by build/ios/update_bundle_filelist.py
47# If it requires updating, you should get a presubmit error with
48# instructions on how to regenerate. Otherwise, do not edit.
49""" % (datetime.datetime.now().year)
50
51_HEADER_PATTERN = re.compile(r"""# Copyright [0-9]+ The Chromium Authors
52# Use of this source code is governed by a BSD-style license that can be
53# found in the LICENSE file.
54# NOTE: this file is generated by build/ios/update_bundle_filelist.py
55# If it requires updating, you should get a presubmit error with
56# instructions on how to regenerate. Otherwise, do not edit.
57""")
58
59_HEADER_HEIGHT = 6
60
Ian Vollick63109f692023-03-02 03:24:1761_START_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR = '# push(ignore-relative)'
62_STOP_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR = '# pop(ignore-relative)'
63
Ian Vollick00424e52023-02-03 10:35:5064
65def parse_filelist(filelist_name):
66 try:
67 with open(filelist_name) as filelist:
68 unfiltered = [l for l in filelist]
69 header = ''.join(unfiltered[:_HEADER_HEIGHT])
70 files = sorted(l.strip() for l in unfiltered[_HEADER_HEIGHT:])
71 return (files, header)
72 except Exception as e:
73 print_error(f'Could not read file list: {filelist_name}', f'{type(e)}: {e}')
74 return []
75
76
Ian Vollick4ff01742023-02-14 12:50:0377def get_git_command_name():
78 if sys.platform.startswith('win'):
79 return 'git.bat'
80 return 'git'
81
82
83def get_tracked_files(directory, globroot, repository_root_relative, verbose):
84 try:
Riley Wongbe69cdaf2024-04-24 23:00:4685 if os.getcwd().startswith('/google/cog/cloud'):
86 files = []
87 for root, _, filenames in os.walk(directory):
88 files.extend([os.path.join(root, f) for f in filenames])
89 return set(files)
90 cmd = [get_git_command_name(), 'ls-files', '--error-unmatch', directory]
91 with subprocess.Popen(cmd,
Ian Vollick4ff01742023-02-14 12:50:0392 stdout=subprocess.PIPE,
93 stderr=subprocess.PIPE,
94 cwd=globroot) as p:
95 output = p.communicate()
96 if p.returncode != 0:
97 if verbose:
98 print_error(
99 f'Could not gather a list of tracked files in {directory}',
100 f'{output[1]}')
101 return set()
102
103 files = [f.decode('utf-8') for f in output[0].splitlines()]
104
105 # Need paths to be relative to directory in order to match expansions.
106 # This should happen naturally due to cwd above, but we need to take
107 # special care if relative to the repository root.
108 if repository_root_relative:
109 files = ['//' + f for f in files]
110
111 # Handle Windows backslashes
112 files = [f.replace('\\', '/') for f in files]
113
114 return set(files)
115
116 except Exception as e:
117 if verbose:
118 print_error(f'Could not gather a list of tracked files in {directory}',
119 f'{type(e)}: {e}')
120 return set()
121
122
123def combine_potentially_repository_root_relative_paths(a, b):
124 if b.startswith('//'):
125 # If b is relative to the repository root, os.path will consider it absolute
126 # and os.path.join will fail. In this case, we can simply concatenate the
127 # paths.
128 return (a + b, True)
129 else:
130 return (os.path.join(a, b), False)
131
132
Ian Vollick00424e52023-02-03 10:35:50133def parse_and_expand_globlist(globlist_name, glob_root):
Ian Vollick63109f692023-03-02 03:24:17134 # The following expects glob_root not to end in a trailing slash.
135 if glob_root.endswith('/'):
136 glob_root = glob_root[:-1]
Ian Vollick00424e52023-02-03 10:35:50137
Ian Vollick63109f692023-03-02 03:24:17138 check_expansions_outside_globlist_dir = True
139 globlist_dir = os.path.dirname(globlist_name)
Ian Vollick00424e52023-02-03 10:35:50140
Ian Vollick63109f692023-03-02 03:24:17141 with open(globlist_name) as globlist:
142 # Paths in |files| and |to_check| must use unix separators. Using a set
143 # ensures no unwanted duplicates. The files in |to_check| must be in the
144 # globroot or a subdirectory.
145 files = set()
146 to_check = set()
147 for g in globlist:
148 g = g.strip()
Ian Vollick00424e52023-02-03 10:35:50149
Ian Vollick63109f692023-03-02 03:24:17150 # Ignore blank lines
151 if not g:
152 continue
Ian Vollick00424e52023-02-03 10:35:50153
Ian Vollick63109f692023-03-02 03:24:17154 # Toggle error checking.
155 if g == _START_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR:
156 check_expansions_outside_globlist_dir = False
157 elif g == _STOP_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR:
158 check_expansions_outside_globlist_dir = True
Ian Vollick00424e52023-02-03 10:35:50159
Ian Vollick63109f692023-03-02 03:24:17160 # Ignore comments.
161 if not g or g.startswith('#'):
162 continue
Ian Vollick00424e52023-02-03 10:35:50163
Ian Vollick63109f692023-03-02 03:24:17164 # Exclusions are prefixed with '-'.
165 is_exclusion = g.startswith('-')
166 if is_exclusion:
167 g = g[1:]
Ian Vollick00424e52023-02-03 10:35:50168
Ian Vollick63109f692023-03-02 03:24:17169 (combined,
170 root_relative) = combine_potentially_repository_root_relative_paths(
171 glob_root, g)
Ian Vollick00424e52023-02-03 10:35:50172
Ian Vollick63109f692023-03-02 03:24:17173 prefix_size = len(glob_root)
174 if not root_relative:
175 # We need to account for the separator.
176 prefix_size += 1
Ian Vollick00424e52023-02-03 10:35:50177
Ian Vollick63109f692023-03-02 03:24:17178 expansion = glob.glob(combined, recursive=True)
Ian Vollick4ff01742023-02-14 12:50:03179
Ian Vollick63109f692023-03-02 03:24:17180 # Filter out directories.
181 expansion = [f for f in expansion if os.path.isfile(f)]
Ian Vollick00424e52023-02-03 10:35:50182
Ian Vollick63109f692023-03-02 03:24:17183 if check_expansions_outside_globlist_dir:
184 for f in expansion:
185 relative = os.path.relpath(f, globlist_dir)
186 if relative.startswith('..'):
187 raise Exception(f'Globlist expansion outside globlist dir: {f}')
Ian Vollick00424e52023-02-03 10:35:50188
Ian Vollick63109f692023-03-02 03:24:17189 # Make relative to |glob_root|.
190 expansion = [f[prefix_size:] for f in expansion]
191
192 # Handle Windows backslashes
193 expansion = [f.replace('\\', '/') for f in expansion]
194
195 # Since paths in |expansion| only use unix separators, it is safe to
196 # compare for both the purpose of exclusion and addition.
197 if is_exclusion:
198 files = files.difference(expansion)
199 else:
200 files = files.union(expansion)
201
202 # Return a sorted list.
203 return sorted(files)
Ian Vollick00424e52023-02-03 10:35:50204
205
Ian Vollick4ff01742023-02-14 12:50:03206def compare_lists(a, b):
Ian Vollick00424e52023-02-03 10:35:50207 differ = difflib.Differ()
208 full_diff = differ.compare(a, b)
Ian Vollick4ff01742023-02-14 12:50:03209 lines = [d for d in full_diff if not d.startswith(' ')]
210 additions = [l[2:] for l in lines if l.startswith('+ ')]
211 removals = [l[2:] for l in lines if l.startswith('- ')]
212 return (additions, removals)
Ian Vollick00424e52023-02-03 10:35:50213
214
215def write_filelist(filelist_name, files, header):
216 try:
Bruce Dawson29955b02023-02-04 00:03:48217 with open(filelist_name, 'w', encoding='utf-8', newline='') as filelist:
Ian Vollick00424e52023-02-03 10:35:50218 if not _HEADER_PATTERN.search(header):
219 header = _HEADER
220 filelist.write(header)
221 for file in files:
222 filelist.write(f'{file}\n')
223 except Exception as e:
224 print_error(f'Could not write file list: {filelist_name}',
225 f'{type(e)}: {e}')
226 return []
227
228
229def process_filelist(filelist, globlist, globroot, check=False, verbose=False):
Ian Vollick63109f692023-03-02 03:24:17230 files_from_globlist = []
231 try:
232 files_from_globlist = parse_and_expand_globlist(globlist, globroot)
233 except Exception as e:
234 if verbose:
235 print_error(f'Could not read glob list: {globlist}', f'{type(e)}: {e}')
236 return 1
237
Ian Vollick00424e52023-02-03 10:35:50238 (files, header) = parse_filelist(filelist)
Ian Vollick4ff01742023-02-14 12:50:03239
240 (additions, removals) = compare_lists(files, files_from_globlist)
241 to_ignore = set()
242
243 # Ignore additions of untracked files.
244 if additions:
245 directories = set([os.path.dirname(f) for f in additions])
246 tracked_files = set()
247 for d in directories:
248 (combined,
249 root_relative) = combine_potentially_repository_root_relative_paths(
250 globroot, d)
251 relative = os.path.relpath(combined, globroot)
252 tracked_files = tracked_files.union(
253 get_tracked_files(relative, globroot, root_relative, verbose))
254 to_ignore = set(additions).difference(tracked_files)
255 additions = [f for f in additions if f in tracked_files]
256
257 files_from_globlist = [f for f in files_from_globlist if f not in to_ignore]
258
Ian Vollick00424e52023-02-03 10:35:50259 if check:
260 if not _HEADER_PATTERN.search(header):
261 if verbose:
262 print_error(f'Unexpected header for {filelist}', f'{header}')
263 return 1
Ian Vollick4ff01742023-02-14 12:50:03264 if not additions and not removals:
Ian Vollick00424e52023-02-03 10:35:50265 return 0
Ian Vollick4ff01742023-02-14 12:50:03266 if verbose:
267 pretty_additions = ['+ ' + f for f in additions]
268 pretty_removals = ['- ' + f for f in removals]
269 pretty_diff = '\n'.join(pretty_additions + pretty_removals)
270 print_error('File list does not match glob expansion', f'{pretty_diff}')
Ian Vollick00424e52023-02-03 10:35:50271 return 1
272 else:
273 write_filelist(filelist, files_from_globlist, header)
274 return 0
275
276
277def main(args):
278 parser = argparse.ArgumentParser(
279 description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
280 parser.add_argument('filelist', help='Contains one file per line')
281 parser.add_argument('globlist',
282 help='Contains globs that, when expanded, '
283 'should match the filelist. Use '
284 '--help for details on syntax')
285 parser.add_argument('globroot',
286 help='Directory from which globs are relative')
287 parser.add_argument('-c',
288 '--check',
289 action='store_true',
290 help='Prevents modifying the file list')
291 parser.add_argument('-v',
292 '--verbose',
293 action='store_true',
294 help='Use this to print details on differences')
295 args = parser.parse_args()
296 return process_filelist(args.filelist,
297 args.globlist,
298 args.globroot,
299 check=args.check,
300 verbose=args.verbose)
301
302
303def print_error(error_message, error_info):
304 """ Print the `error_message` with additional `error_info` """
305 color_start, color_end = adapted_color_for_output(TERMINAL_ERROR_COLOR,
306 TERMINAL_RESET_COLOR)
307
308 error_message = color_start + 'ERROR: ' + error_message + color_end
309 if len(error_info) > 0:
310 error_message = error_message + '\n' + error_info
311 print(error_message, file=sys.stderr)
312
313
314def adapted_color_for_output(color_start, color_end):
315 """ Returns a the `color_start`, `color_end` tuple if the output is a
316 terminal, or empty strings otherwise """
317 if not sys.stdout.isatty():
318 return '', ''
319 return color_start, color_end
320
321
322if __name__ == '__main__':
323 sys.exit(main(sys.argv[1:]))