Add check_gn_headers.py to find missing headers in GN

BUG=661774

Review-Url: https://codereview.chromium.org/2510303002
Cr-Commit-Position: refs/heads/master@{#442617}
diff --git a/build/check_gn_headers.py b/build/check_gn_headers.py
new file mode 100755
index 0000000..ae1ef49
--- /dev/null
+++ b/build/check_gn_headers.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python
+# Copyright 2017 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Find header files missing in GN.
+
+This script gets all the header files from ninja_deps, which is from the true
+dependency generated by the compiler, and report if they don't exist in GN.
+"""
+
+import argparse
+import json
+import os
+import re
+import subprocess
+import sys
+
+
+def GetHeadersFromNinja(out_dir):
+  """Return all the header files from ninja_deps"""
+  ninja_out = subprocess.check_output(['ninja', '-C', out_dir, '-t', 'deps'])
+  return ParseNinjaDepsOutput(ninja_out)
+
+
+def ParseNinjaDepsOutput(ninja_out):
+  """Parse ninja output and get the header files"""
+  all_headers = set()
+
+  prefix = '..' + os.sep + '..' + os.sep
+
+  is_valid = False
+  for line in ninja_out.split('\n'):
+    if line.startswith('    '):
+      if not is_valid:
+        continue
+      if line.endswith('.h') or line.endswith('.hh'):
+        f = line.strip()
+        if f.startswith(prefix):
+          f = f[6:]  # Remove the '../../' prefix
+          # build/ only contains build-specific files like build_config.h
+          # and buildflag.h, and system header files, so they should be
+          # skipped.
+          if not f.startswith('build'):
+            all_headers.add(f)
+    else:
+      is_valid = line.endswith('(VALID)')
+
+  return all_headers
+
+
+def GetHeadersFromGN(out_dir):
+  """Return all the header files from GN"""
+  subprocess.check_call(['gn', 'gen', out_dir, '--ide=json', '-q'])
+  gn_json = json.load(open(os.path.join(out_dir, 'project.json')))
+  return ParseGNProjectJSON(gn_json)
+
+
+def ParseGNProjectJSON(gn):
+  """Parse GN output and get the header files"""
+  all_headers = set()
+
+  for _target, properties in gn['targets'].iteritems():
+    for f in properties.get('sources', []):
+      if f.endswith('.h') or f.endswith('.hh'):
+        if f.startswith('//'):
+          f = f[2:]  # Strip the '//' prefix.
+          all_headers.add(f)
+
+  return all_headers
+
+
+def GetDepsPrefixes():
+  """Return all the folders controlled by DEPS file"""
+  gclient_out = subprocess.check_output(
+      ['gclient', 'recurse', '--no-progress', '-j1',
+       'python', '-c', 'import os;print os.environ["GCLIENT_DEP_PATH"]'])
+  prefixes = set()
+  for i in gclient_out.split('\n'):
+    if i.startswith('src/'):
+      i = i[4:]
+      prefixes.add(i)
+  return prefixes
+
+
+def ParseWhiteList(whitelist):
+  out = set()
+  for line in whitelist.split('\n'):
+    line = re.sub(r'#.*', '', line).strip()
+    if line:
+      out.add(line)
+  return out
+
+
+def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--out-dir', default='out/Release')
+  parser.add_argument('--json')
+  parser.add_argument('--whitelist')
+  parser.add_argument('args', nargs=argparse.REMAINDER)
+
+  args, _extras = parser.parse_known_args()
+
+  d = GetHeadersFromNinja(args.out_dir)
+  gn = GetHeadersFromGN(args.out_dir)
+  missing = d - gn
+
+  deps = GetDepsPrefixes()
+  missing = {m for m in missing if not any(m.startswith(d) for d in deps)}
+
+  if args.whitelist:
+    whitelist = ParseWhiteList(open(args.whitelist).read())
+    missing -= whitelist
+
+  missing = sorted(missing)
+
+  if args.json:
+    with open(args.json, 'w') as f:
+      json.dump(missing, f)
+
+  if len(missing) == 0:
+    return 0
+
+  print 'The following files should be included in gn files:'
+  for i in missing:
+    print i
+  return 1
+
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/build/check_gn_headers_unittest.py b/build/check_gn_headers_unittest.py
new file mode 100755
index 0000000..7272ea9
--- /dev/null
+++ b/build/check_gn_headers_unittest.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+# Copyright 2017 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import logging
+import json
+import os
+import unittest
+import check_gn_headers
+
+
+ninja_input = r'''
+obj/a.o: #deps 1, deps mtime 123 (VALID)
+    ../../a.cc
+    ../../dir/path/b.h
+    ../../c.hh
+
+obj/b.o: #deps 1, deps mtime 123 (STALE)
+    ../../b.cc
+    ../../dir2/path/b.h
+    ../../c2.hh
+
+obj/c.o: #deps 1, deps mtime 123 (VALID)
+    ../../c.cc
+    ../../build/a.h
+    gen/b.h
+    ../../dir3/path/b.h
+    ../../c3.hh
+'''
+ninja_input_win = ninja_input.replace('/', '\\')
+
+
+gn_input = json.loads(r'''
+{
+   "others": [],
+   "targets": {
+      "//:All": {
+      },
+      "//:base": {
+         "sources": [ "//base/a.cc", "//base/a.h", "//base/b.hh" ],
+         "visibility": [ "*" ]
+      }
+    }
+}
+''')
+
+
+whitelist = r'''
+   white-front.c
+a/b/c/white-end.c # comment
+ dir/white-both.c  #more comment
+
+# empty line above
+a/b/c
+'''
+
+
+class CheckGnHeadersTest(unittest.TestCase):
+  def testNinja(self):
+    headers = check_gn_headers.ParseNinjaDepsOutput(ninja_input)
+    expected = set([
+        'dir/path/b.h',
+        'c.hh',
+        'dir3/path/b.h',
+        'c3.hh',
+    ])
+    self.assertEquals(headers, expected)
+
+  def testNinjaWin(self):
+    old_sep = os.sep
+    os.sep = '\\'
+
+    headers = check_gn_headers.ParseNinjaDepsOutput(ninja_input_win)
+    expected = set([
+        'dir\\path\\b.h',
+        'c.hh',
+        'dir3\\path\\b.h',
+        'c3.hh',
+    ])
+    self.assertEquals(headers, expected)
+
+    os.sep = old_sep
+
+  def testGn(self):
+    headers = check_gn_headers.ParseGNProjectJSON(gn_input)
+    expected = set([
+        'base/a.h',
+        'base/b.hh',
+    ])
+    self.assertEquals(headers, expected)
+
+  def testWhitelist(self):
+    output = check_gn_headers.ParseWhiteList(whitelist)
+    expected = set([
+        'white-front.c',
+        'a/b/c/white-end.c',
+        'dir/white-both.c',
+        'a/b/c',
+    ])
+    self.assertEquals(output, expected)
+
+
+if __name__ == '__main__':
+  logging.getLogger().setLevel(logging.DEBUG)
+  unittest.main(verbosity=2)