| #!/usr/bin/python |
| # Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| # memcheck_analyze.py |
| |
| ''' Given a valgrind XML file, parses errors and uniques them.''' |
| |
| import gdb_helper |
| |
| import logging |
| import optparse |
| import os |
| import subprocess |
| import sys |
| import time |
| from xml.dom.minidom import parse |
| from xml.parsers.expat import ExpatError |
| |
| # Global symbol table (yuck) |
| TheAddressTable = None |
| |
| |
| # These are functions (using C++ mangled names) that we look for in stack |
| # traces. We don't show stack frames while pretty printing when they are below |
| # any of the following: |
| _TOP_OF_STACK_POINTS = [ |
| # Don't show our testing framework. |
| "testing::Test::Run()", |
| # Also don't show the internals of libc/pthread. |
| "start_thread" |
| ] |
| |
| def getTextOf(top_node, name): |
| ''' Returns all text in all DOM nodes with a certain |name| that are children |
| of |top_node|. |
| ''' |
| |
| text = "" |
| for nodes_named in top_node.getElementsByTagName(name): |
| text += "".join([node.data for node in nodes_named.childNodes |
| if node.nodeType == node.TEXT_NODE]) |
| return text |
| |
| def getCDATAOf(top_node, name): |
| ''' Returns all CDATA in all DOM nodes with a certain |name| that are children |
| of |top_node|. |
| ''' |
| |
| text = "" |
| for nodes_named in top_node.getElementsByTagName(name): |
| text += "".join([node.data for node in nodes_named.childNodes |
| if node.nodeType == node.CDATA_SECTION_NODE]) |
| if (text == ""): |
| return None |
| return text |
| |
| def removeCommonRoot(source_dir, directory): |
| '''Returns a string with the string prefix |source_dir| removed from |
| |directory|.''' |
| if source_dir: |
| # Do this for safety, just in case directory is an absolute path outside of |
| # source_dir. |
| prefix = os.path.commonprefix([source_dir, directory]) |
| return directory[len(prefix) + 1:] |
| |
| return directory |
| |
| # Constants that give real names to the abbreviations in valgrind XML output. |
| INSTRUCTION_POINTER = "ip" |
| OBJECT_FILE = "obj" |
| FUNCTION_NAME = "fn" |
| SRC_FILE_DIR = "dir" |
| SRC_FILE_NAME = "file" |
| SRC_LINE = "line" |
| |
| def gatherFrames(node, source_dir): |
| frames = [] |
| for frame in node.getElementsByTagName("frame"): |
| frame_dict = { |
| INSTRUCTION_POINTER : getTextOf(frame, INSTRUCTION_POINTER), |
| OBJECT_FILE : getTextOf(frame, OBJECT_FILE), |
| FUNCTION_NAME : getTextOf(frame, FUNCTION_NAME), |
| SRC_FILE_DIR : removeCommonRoot( |
| source_dir, getTextOf(frame, SRC_FILE_DIR)), |
| SRC_FILE_NAME : getTextOf(frame, SRC_FILE_NAME), |
| SRC_LINE : getTextOf(frame, SRC_LINE) |
| } |
| frames += [frame_dict] |
| if frame_dict[FUNCTION_NAME] in _TOP_OF_STACK_POINTS: |
| break |
| global TheAddressTable |
| if TheAddressTable != None and frame_dict[SRC_LINE] == "": |
| # Try using gdb |
| TheAddressTable.Add(frame_dict[OBJECT_FILE], frame_dict[INSTRUCTION_POINTER]) |
| return frames |
| |
| class ValgrindError: |
| ''' Takes a <DOM Element: error> node and reads all the data from it. A |
| ValgrindError is immutable and is hashed on its pretty printed output. |
| ''' |
| |
| def __init__(self, source_dir, error_node, commandline): |
| ''' Copies all the relevant information out of the DOM and into object |
| properties. |
| |
| Args: |
| error_node: The <error></error> DOM node we're extracting from. |
| source_dir: Prefix that should be stripped from the <dir> node. |
| commandline: The command that was run under valgrind |
| ''' |
| |
| # Valgrind errors contain one <what><stack> pair, plus an optional |
| # <auxwhat><stack> pair, plus an optional <origin><what><stack></origin>, |
| # plus (since 3.5.0) a <suppression></suppression> pair. |
| # (Origin is nicely enclosed; too bad the other two aren't.) |
| # The most common way to see all three in one report is |
| # a syscall with a parameter that points to uninitialized memory, e.g. |
| # Format: |
| # <error> |
| # <unique>0x6d</unique> |
| # <tid>1</tid> |
| # <kind>SyscallParam</kind> |
| # <what>Syscall param write(buf) points to uninitialised byte(s)</what> |
| # <stack> |
| # <frame> |
| # ... |
| # </frame> |
| # </stack> |
| # <auxwhat>Address 0x5c9af4f is 7 bytes inside a block of ...</auxwhat> |
| # <stack> |
| # <frame> |
| # ... |
| # </frame> |
| # </stack> |
| # <origin> |
| # <what>Uninitialised value was created by a heap allocation</what> |
| # <stack> |
| # <frame> |
| # ... |
| # </frame> |
| # </stack> |
| # </origin> |
| # <suppression> |
| # <sname>insert_a_suppression_name_here</sname> |
| # <skind>Memcheck:Param</skind> |
| # <skaux>write(buf)</skaux> |
| # <sframe> <fun>__write_nocancel</fun> </sframe> |
| # ... |
| # <sframe> <fun>main</fun> </sframe> |
| # <rawtext> |
| # <![CDATA[ |
| # { |
| # <insert_a_suppression_name_here> |
| # Memcheck:Param |
| # write(buf) |
| # fun:__write_nocancel |
| # ... |
| # fun:main |
| # } |
| # ]]> |
| # </rawtext> |
| # </suppression> |
| # </error> |
| # |
| # Each frame looks like this: |
| # <frame> |
| # <ip>0x83751BC</ip> |
| # <obj>/data/dkegel/chrome-build/src/out/Release/base_unittests</obj> |
| # <fn>_ZN7testing8internal12TestInfoImpl7RunTestEPNS_8TestInfoE</fn> |
| # <dir>/data/dkegel/chrome-build/src/testing/gtest/src</dir> |
| # <file>gtest-internal-inl.h</file> |
| # <line>655</line> |
| # </frame> |
| # although the dir, file, and line elements are missing if there is |
| # no debug info. |
| |
| self._kind = getTextOf(error_node, "kind") |
| self._backtraces = [] |
| self._suppression = None |
| self._commandline = commandline |
| |
| # Iterate through the nodes, parsing <what|auxwhat><stack> pairs. |
| description = None |
| for node in error_node.childNodes: |
| if node.localName == "what" or node.localName == "auxwhat": |
| description = "".join([n.data for n in node.childNodes |
| if n.nodeType == n.TEXT_NODE]) |
| elif node.localName == "xwhat": |
| description = getTextOf(node, "text") |
| elif node.localName == "stack": |
| self._backtraces.append([description, gatherFrames(node, source_dir)]) |
| description = None |
| elif node.localName == "origin": |
| description = getTextOf(node, "what") |
| stack = node.getElementsByTagName("stack")[0] |
| frames = gatherFrames(stack, source_dir) |
| self._backtraces.append([description, frames]) |
| description = None |
| stack = None |
| frames = None |
| elif node.localName == "suppression": |
| self._suppression = getCDATAOf(node, "rawtext"); |
| |
| def __str__(self): |
| ''' Pretty print the type and backtrace(s) of this specific error, |
| including suppression (which is just a mangled backtrace).''' |
| output = self._kind + "\n" |
| if (self._commandline): |
| output += self._commandline + "\n" |
| |
| for backtrace in self._backtraces: |
| output += backtrace[0] + "\n" |
| filter = subprocess.Popen("c++filt -n", stdin=subprocess.PIPE, |
| stdout=subprocess.PIPE, |
| stderr=subprocess.STDOUT, |
| shell=True, |
| close_fds=True) |
| buf = "" |
| for frame in backtrace[1]: |
| buf += (frame[FUNCTION_NAME] or frame[INSTRUCTION_POINTER]) + "\n" |
| (stdoutbuf, stderrbuf) = filter.communicate(buf.encode('latin-1')) |
| demangled_names = stdoutbuf.split("\n") |
| |
| i = 0 |
| for frame in backtrace[1]: |
| output += (" " + demangled_names[i]) |
| i = i + 1 |
| |
| global TheAddressTable |
| if TheAddressTable != None and frame[SRC_FILE_DIR] == "": |
| # Try using gdb |
| foo = TheAddressTable.GetFileLine(frame[OBJECT_FILE], |
| frame[INSTRUCTION_POINTER]) |
| if foo[0] != None: |
| output += (" (" + foo[0] + ":" + foo[1] + ")") |
| elif frame[SRC_FILE_DIR] != "": |
| output += (" (" + frame[SRC_FILE_DIR] + "/" + frame[SRC_FILE_NAME] + |
| ":" + frame[SRC_LINE] + ")") |
| else: |
| output += " (" + frame[OBJECT_FILE] + ")" |
| output += "\n" |
| |
| # TODO(dank): stop synthesizing suppressions once everyone has |
| # valgrind-3.5 and we can rely on xml |
| if (self._suppression == None): |
| output += "Suppression:\n" |
| for frame in backtrace[1]: |
| output += " fun:" + (frame[FUNCTION_NAME] or "*") + "\n" |
| |
| if (self._suppression != None): |
| output += "Suppression:" |
| # Widen suppression slightly to make portable between mac and linux |
| supp = self._suppression; |
| supp = supp.replace("fun:_Znwj", "fun:_Znw*") |
| supp = supp.replace("fun:_Znwm", "fun:_Znw*") |
| # Split into lines so we can enforce length limits |
| supplines = supp.split("\n") |
| |
| # Truncate at line 26 (VG_MAX_SUPP_CALLERS plus 2 for name and type) |
| # or at the first 'boring' caller. |
| # (https://bugs.kde.org/show_bug.cgi?id=199468 proposes raising |
| # VG_MAX_SUPP_CALLERS, but we're probably fine with it as is.) |
| # TODO(dkegel): add more boring callers |
| newlen = 26; |
| try: |
| newlen = min(newlen, supplines.index(" fun:_ZN11MessageLoop3RunEv")) |
| except ValueError: |
| pass |
| if (len(supplines) > newlen): |
| supplines = supplines[0:newlen] |
| supplines.append("}") |
| |
| output += "\n".join(supplines) + "\n" |
| |
| return output |
| |
| def UniqueString(self): |
| ''' String to use for object identity. Don't print this, use str(obj) |
| instead.''' |
| rep = self._kind + " " |
| for backtrace in self._backtraces: |
| for frame in backtrace[1]: |
| rep += frame[FUNCTION_NAME] |
| |
| if frame[SRC_FILE_DIR] != "": |
| rep += frame[SRC_FILE_DIR] + "/" + frame[SRC_FILE_NAME] |
| else: |
| rep += frame[OBJECT_FILE] |
| |
| return rep |
| |
| def __hash__(self): |
| return hash(self.UniqueString()) |
| def __eq__(self, rhs): |
| return self.UniqueString() == rhs |
| |
| def find_and_truncate(f): |
| f.seek(0) |
| while True: |
| line = f.readline() |
| if line == "": |
| return False |
| if '</valgrindoutput>' in line: |
| # valgrind often has garbage after </valgrindoutput> upon crash |
| f.truncate() |
| return True |
| |
| class MemcheckAnalyze: |
| ''' Given a set of Valgrind XML files, parse all the errors out of them, |
| unique them and output the results.''' |
| |
| SANITY_TEST_SUPPRESSION = "Memcheck sanity test" |
| def __init__(self, source_dir, files, show_all_leaks=False, use_gdb=False): |
| '''Reads in a set of files. |
| |
| Args: |
| source_dir: Path to top of source tree for this build |
| files: A list of filenames. |
| show_all_leaks: whether to show even less important leaks |
| ''' |
| |
| # Beyond the detailed errors parsed by ValgrindError above, |
| # the xml file contain records describing suppressions that were used: |
| # <suppcounts> |
| # <pair> |
| # <count>28</count> |
| # <name>pango_font_leak_todo</name> |
| # </pair> |
| # <pair> |
| # <count>378</count> |
| # <name>bug_13243</name> |
| # </pair> |
| # </suppcounts |
| # Collect these and print them at the end. |
| # |
| # With our patch for https://bugs.kde.org/show_bug.cgi?id=205000 in, |
| # the file also includes records of the form |
| # <load_obj><obj>/usr/lib/libgcc_s.1.dylib</obj><ip>0x27000</ip></load_obj> |
| # giving the filename and load address of each binary that was mapped |
| # into the process. |
| |
| global TheAddressTable |
| if use_gdb: |
| TheAddressTable = gdb_helper.AddressTable() |
| self._errors = set() |
| self._suppcounts = {} |
| badfiles = set() |
| start = time.time() |
| self._parse_failed = False |
| for file in files: |
| # Wait up to three minutes for valgrind to finish writing all files, |
| # but after that, just skip incomplete files and warn. |
| f = open(file, "r+") |
| found = False |
| firstrun = True |
| origsize = os.path.getsize(file) |
| while (not found and (firstrun or ((time.time() - start) < 180.0))): |
| firstrun = False |
| f.seek(0) |
| found = find_and_truncate(f) |
| if not found: |
| time.sleep(1) |
| f.close() |
| if not found: |
| badfiles.add(file) |
| else: |
| newsize = os.path.getsize(file) |
| if origsize > newsize+1: |
| logging.warn(str(origsize - newsize) + " bytes of junk were after </valgrindoutput> in %s!" % file) |
| try: |
| parsed_file = parse(file); |
| except ExpatError, e: |
| self._parse_failed = True |
| logging.warn("could not parse %s: %s" % (file, e)) |
| lineno = e.lineno - 1 |
| context_lines = 5 |
| context_start = max(0, lineno - context_lines) |
| context_end = lineno + context_lines + 1 |
| context_file = open(file, "r") |
| for i in range(0, context_start): |
| context_file.readline() |
| for i in range(context_start, context_end): |
| context_data = context_file.readline().rstrip() |
| if i != lineno: |
| logging.warn(" %s" % context_data) |
| else: |
| logging.warn("> %s" % context_data) |
| context_file.close() |
| continue |
| if TheAddressTable != None: |
| load_objs = parsed_file.getElementsByTagName("load_obj") |
| for load_obj in load_objs: |
| obj = getTextOf(load_obj, "obj") |
| ip = getTextOf(load_obj, "ip") |
| TheAddressTable.AddBinaryAt(obj, ip) |
| |
| commandline = None |
| preamble = parsed_file.getElementsByTagName("preamble")[0]; |
| for node in preamble.getElementsByTagName("line"): |
| if node.localName == "line": |
| for x in node.childNodes: |
| if x.nodeType == node.TEXT_NODE and "Command" in x.data: |
| commandline = x.data |
| break |
| |
| raw_errors = parsed_file.getElementsByTagName("error") |
| for raw_error in raw_errors: |
| # Ignore "possible" leaks for now by default. |
| if (show_all_leaks or |
| getTextOf(raw_error, "kind") != "Leak_PossiblyLost"): |
| error = ValgrindError(source_dir, raw_error, commandline) |
| self._errors.add(error) |
| |
| suppcountlist = parsed_file.getElementsByTagName("suppcounts") |
| if len(suppcountlist) > 0: |
| suppcountlist = suppcountlist[0] |
| for node in suppcountlist.getElementsByTagName("pair"): |
| count = getTextOf(node, "count"); |
| name = getTextOf(node, "name"); |
| if name in self._suppcounts: |
| self._suppcounts[name] += int(count) |
| else: |
| self._suppcounts[name] = int(count) |
| |
| if len(badfiles) > 0: |
| logging.warn("valgrind didn't finish writing %d files?!" % len(badfiles)) |
| for file in badfiles: |
| logging.warn("Last 20 lines of %s :" % file) |
| os.system("tail -n 20 '%s' 1>&2" % file) |
| |
| def Report(self, check_sanity=False): |
| if self._parse_failed: |
| logging.error("FAIL! Couldn't parse Valgrind output file") |
| return -2 |
| |
| is_sane = False |
| print "-----------------------------------------------------" |
| print "Suppressions used:" |
| print " count name" |
| for item in sorted(self._suppcounts.items(), key=lambda (k,v): (v,k)): |
| print "%7s %s" % (item[1], item[0]) |
| if item[0].startswith(MemcheckAnalyze.SANITY_TEST_SUPPRESSION): |
| is_sane = True |
| print "-----------------------------------------------------" |
| sys.stdout.flush() |
| |
| retcode = 0 |
| if self._errors: |
| logging.error("FAIL! There were %s errors: " % len(self._errors)) |
| |
| global TheAddressTable |
| if TheAddressTable != None: |
| TheAddressTable.ResolveAll() |
| |
| for error in self._errors: |
| logging.error(error) |
| |
| retcode = -1 |
| |
| # Report tool's insanity even if there were errors. |
| if check_sanity and not is_sane: |
| logging.error("FAIL! Sanity check failed!") |
| retcode = -3 |
| |
| if retcode != 0: |
| return retcode |
| |
| logging.info("PASS! No errors found!") |
| return 0 |
| |
| def _main(): |
| '''For testing only. The MemcheckAnalyze class should be imported instead.''' |
| retcode = 0 |
| parser = optparse.OptionParser("usage: %prog [options] <files to analyze>") |
| parser.add_option("", "--source_dir", |
| help="path to top of source tree for this build" |
| "(used to normalize source paths in baseline)") |
| |
| (options, args) = parser.parse_args() |
| if not len(args) >= 1: |
| parser.error("no filename specified") |
| filenames = args |
| |
| analyzer = MemcheckAnalyze(options.source_dir, filenames, use_gdb=True) |
| retcode = analyzer.Report() |
| |
| sys.exit(retcode) |
| |
| if __name__ == "__main__": |
| _main() |