blob: c70df99c0662c0bffa6f70960a1019539649f190 [file] [log] [blame]
#!/usr/bin/python
# Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# memcheck_analyze.py
''' Given a valgrind XML file, parses errors and uniques them.'''
import gdb_helper
import logging
import optparse
import os
import subprocess
import sys
import time
from xml.dom.minidom import parse
from xml.parsers.expat import ExpatError
# Global symbol table (yuck)
TheAddressTable = None
# These are functions (using C++ mangled names) that we look for in stack
# traces. We don't show stack frames while pretty printing when they are below
# any of the following:
_TOP_OF_STACK_POINTS = [
# Don't show our testing framework.
"testing::Test::Run()",
# Also don't show the internals of libc/pthread.
"start_thread"
]
def getTextOf(top_node, name):
''' Returns all text in all DOM nodes with a certain |name| that are children
of |top_node|.
'''
text = ""
for nodes_named in top_node.getElementsByTagName(name):
text += "".join([node.data for node in nodes_named.childNodes
if node.nodeType == node.TEXT_NODE])
return text
def getCDATAOf(top_node, name):
''' Returns all CDATA in all DOM nodes with a certain |name| that are children
of |top_node|.
'''
text = ""
for nodes_named in top_node.getElementsByTagName(name):
text += "".join([node.data for node in nodes_named.childNodes
if node.nodeType == node.CDATA_SECTION_NODE])
if (text == ""):
return None
return text
def removeCommonRoot(source_dir, directory):
'''Returns a string with the string prefix |source_dir| removed from
|directory|.'''
if source_dir:
# Do this for safety, just in case directory is an absolute path outside of
# source_dir.
prefix = os.path.commonprefix([source_dir, directory])
return directory[len(prefix) + 1:]
return directory
# Constants that give real names to the abbreviations in valgrind XML output.
INSTRUCTION_POINTER = "ip"
OBJECT_FILE = "obj"
FUNCTION_NAME = "fn"
SRC_FILE_DIR = "dir"
SRC_FILE_NAME = "file"
SRC_LINE = "line"
def gatherFrames(node, source_dir):
frames = []
for frame in node.getElementsByTagName("frame"):
frame_dict = {
INSTRUCTION_POINTER : getTextOf(frame, INSTRUCTION_POINTER),
OBJECT_FILE : getTextOf(frame, OBJECT_FILE),
FUNCTION_NAME : getTextOf(frame, FUNCTION_NAME),
SRC_FILE_DIR : removeCommonRoot(
source_dir, getTextOf(frame, SRC_FILE_DIR)),
SRC_FILE_NAME : getTextOf(frame, SRC_FILE_NAME),
SRC_LINE : getTextOf(frame, SRC_LINE)
}
frames += [frame_dict]
if frame_dict[FUNCTION_NAME] in _TOP_OF_STACK_POINTS:
break
global TheAddressTable
if TheAddressTable != None and frame_dict[SRC_LINE] == "":
# Try using gdb
TheAddressTable.Add(frame_dict[OBJECT_FILE], frame_dict[INSTRUCTION_POINTER])
return frames
class ValgrindError:
''' Takes a <DOM Element: error> node and reads all the data from it. A
ValgrindError is immutable and is hashed on its pretty printed output.
'''
def __init__(self, source_dir, error_node, commandline):
''' Copies all the relevant information out of the DOM and into object
properties.
Args:
error_node: The <error></error> DOM node we're extracting from.
source_dir: Prefix that should be stripped from the <dir> node.
commandline: The command that was run under valgrind
'''
# Valgrind errors contain one <what><stack> pair, plus an optional
# <auxwhat><stack> pair, plus an optional <origin><what><stack></origin>,
# plus (since 3.5.0) a <suppression></suppression> pair.
# (Origin is nicely enclosed; too bad the other two aren't.)
# The most common way to see all three in one report is
# a syscall with a parameter that points to uninitialized memory, e.g.
# Format:
# <error>
# <unique>0x6d</unique>
# <tid>1</tid>
# <kind>SyscallParam</kind>
# <what>Syscall param write(buf) points to uninitialised byte(s)</what>
# <stack>
# <frame>
# ...
# </frame>
# </stack>
# <auxwhat>Address 0x5c9af4f is 7 bytes inside a block of ...</auxwhat>
# <stack>
# <frame>
# ...
# </frame>
# </stack>
# <origin>
# <what>Uninitialised value was created by a heap allocation</what>
# <stack>
# <frame>
# ...
# </frame>
# </stack>
# </origin>
# <suppression>
# <sname>insert_a_suppression_name_here</sname>
# <skind>Memcheck:Param</skind>
# <skaux>write(buf)</skaux>
# <sframe> <fun>__write_nocancel</fun> </sframe>
# ...
# <sframe> <fun>main</fun> </sframe>
# <rawtext>
# <![CDATA[
# {
# <insert_a_suppression_name_here>
# Memcheck:Param
# write(buf)
# fun:__write_nocancel
# ...
# fun:main
# }
# ]]>
# </rawtext>
# </suppression>
# </error>
#
# Each frame looks like this:
# <frame>
# <ip>0x83751BC</ip>
# <obj>/data/dkegel/chrome-build/src/out/Release/base_unittests</obj>
# <fn>_ZN7testing8internal12TestInfoImpl7RunTestEPNS_8TestInfoE</fn>
# <dir>/data/dkegel/chrome-build/src/testing/gtest/src</dir>
# <file>gtest-internal-inl.h</file>
# <line>655</line>
# </frame>
# although the dir, file, and line elements are missing if there is
# no debug info.
self._kind = getTextOf(error_node, "kind")
self._backtraces = []
self._suppression = None
self._commandline = commandline
# Iterate through the nodes, parsing <what|auxwhat><stack> pairs.
description = None
for node in error_node.childNodes:
if node.localName == "what" or node.localName == "auxwhat":
description = "".join([n.data for n in node.childNodes
if n.nodeType == n.TEXT_NODE])
elif node.localName == "xwhat":
description = getTextOf(node, "text")
elif node.localName == "stack":
self._backtraces.append([description, gatherFrames(node, source_dir)])
description = None
elif node.localName == "origin":
description = getTextOf(node, "what")
stack = node.getElementsByTagName("stack")[0]
frames = gatherFrames(stack, source_dir)
self._backtraces.append([description, frames])
description = None
stack = None
frames = None
elif node.localName == "suppression":
self._suppression = getCDATAOf(node, "rawtext");
def __str__(self):
''' Pretty print the type and backtrace(s) of this specific error,
including suppression (which is just a mangled backtrace).'''
output = self._kind + "\n"
if (self._commandline):
output += self._commandline + "\n"
for backtrace in self._backtraces:
output += backtrace[0] + "\n"
filter = subprocess.Popen("c++filt -n", stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
shell=True,
close_fds=True)
buf = ""
for frame in backtrace[1]:
buf += (frame[FUNCTION_NAME] or frame[INSTRUCTION_POINTER]) + "\n"
(stdoutbuf, stderrbuf) = filter.communicate(buf.encode('latin-1'))
demangled_names = stdoutbuf.split("\n")
i = 0
for frame in backtrace[1]:
output += (" " + demangled_names[i])
i = i + 1
global TheAddressTable
if TheAddressTable != None and frame[SRC_FILE_DIR] == "":
# Try using gdb
foo = TheAddressTable.GetFileLine(frame[OBJECT_FILE],
frame[INSTRUCTION_POINTER])
if foo[0] != None:
output += (" (" + foo[0] + ":" + foo[1] + ")")
elif frame[SRC_FILE_DIR] != "":
output += (" (" + frame[SRC_FILE_DIR] + "/" + frame[SRC_FILE_NAME] +
":" + frame[SRC_LINE] + ")")
else:
output += " (" + frame[OBJECT_FILE] + ")"
output += "\n"
# TODO(dank): stop synthesizing suppressions once everyone has
# valgrind-3.5 and we can rely on xml
if (self._suppression == None):
output += "Suppression:\n"
for frame in backtrace[1]:
output += " fun:" + (frame[FUNCTION_NAME] or "*") + "\n"
if (self._suppression != None):
output += "Suppression:"
# Widen suppression slightly to make portable between mac and linux
supp = self._suppression;
supp = supp.replace("fun:_Znwj", "fun:_Znw*")
supp = supp.replace("fun:_Znwm", "fun:_Znw*")
# Split into lines so we can enforce length limits
supplines = supp.split("\n")
# Truncate at line 26 (VG_MAX_SUPP_CALLERS plus 2 for name and type)
# or at the first 'boring' caller.
# (https://bugs.kde.org/show_bug.cgi?id=199468 proposes raising
# VG_MAX_SUPP_CALLERS, but we're probably fine with it as is.)
# TODO(dkegel): add more boring callers
newlen = 26;
try:
newlen = min(newlen, supplines.index(" fun:_ZN11MessageLoop3RunEv"))
except ValueError:
pass
if (len(supplines) > newlen):
supplines = supplines[0:newlen]
supplines.append("}")
output += "\n".join(supplines) + "\n"
return output
def UniqueString(self):
''' String to use for object identity. Don't print this, use str(obj)
instead.'''
rep = self._kind + " "
for backtrace in self._backtraces:
for frame in backtrace[1]:
rep += frame[FUNCTION_NAME]
if frame[SRC_FILE_DIR] != "":
rep += frame[SRC_FILE_DIR] + "/" + frame[SRC_FILE_NAME]
else:
rep += frame[OBJECT_FILE]
return rep
def __hash__(self):
return hash(self.UniqueString())
def __eq__(self, rhs):
return self.UniqueString() == rhs
def find_and_truncate(f):
f.seek(0)
while True:
line = f.readline()
if line == "":
return False
if '</valgrindoutput>' in line:
# valgrind often has garbage after </valgrindoutput> upon crash
f.truncate()
return True
class MemcheckAnalyze:
''' Given a set of Valgrind XML files, parse all the errors out of them,
unique them and output the results.'''
SANITY_TEST_SUPPRESSION = "Memcheck sanity test"
def __init__(self, source_dir, files, show_all_leaks=False, use_gdb=False):
'''Reads in a set of files.
Args:
source_dir: Path to top of source tree for this build
files: A list of filenames.
show_all_leaks: whether to show even less important leaks
'''
# Beyond the detailed errors parsed by ValgrindError above,
# the xml file contain records describing suppressions that were used:
# <suppcounts>
# <pair>
# <count>28</count>
# <name>pango_font_leak_todo</name>
# </pair>
# <pair>
# <count>378</count>
# <name>bug_13243</name>
# </pair>
# </suppcounts
# Collect these and print them at the end.
#
# With our patch for https://bugs.kde.org/show_bug.cgi?id=205000 in,
# the file also includes records of the form
# <load_obj><obj>/usr/lib/libgcc_s.1.dylib</obj><ip>0x27000</ip></load_obj>
# giving the filename and load address of each binary that was mapped
# into the process.
global TheAddressTable
if use_gdb:
TheAddressTable = gdb_helper.AddressTable()
self._errors = set()
self._suppcounts = {}
badfiles = set()
start = time.time()
self._parse_failed = False
for file in files:
# Wait up to three minutes for valgrind to finish writing all files,
# but after that, just skip incomplete files and warn.
f = open(file, "r+")
found = False
firstrun = True
origsize = os.path.getsize(file)
while (not found and (firstrun or ((time.time() - start) < 180.0))):
firstrun = False
f.seek(0)
found = find_and_truncate(f)
if not found:
time.sleep(1)
f.close()
if not found:
badfiles.add(file)
else:
newsize = os.path.getsize(file)
if origsize > newsize+1:
logging.warn(str(origsize - newsize) + " bytes of junk were after </valgrindoutput> in %s!" % file)
try:
parsed_file = parse(file);
except ExpatError, e:
self._parse_failed = True
logging.warn("could not parse %s: %s" % (file, e))
lineno = e.lineno - 1
context_lines = 5
context_start = max(0, lineno - context_lines)
context_end = lineno + context_lines + 1
context_file = open(file, "r")
for i in range(0, context_start):
context_file.readline()
for i in range(context_start, context_end):
context_data = context_file.readline().rstrip()
if i != lineno:
logging.warn(" %s" % context_data)
else:
logging.warn("> %s" % context_data)
context_file.close()
continue
if TheAddressTable != None:
load_objs = parsed_file.getElementsByTagName("load_obj")
for load_obj in load_objs:
obj = getTextOf(load_obj, "obj")
ip = getTextOf(load_obj, "ip")
TheAddressTable.AddBinaryAt(obj, ip)
commandline = None
preamble = parsed_file.getElementsByTagName("preamble")[0];
for node in preamble.getElementsByTagName("line"):
if node.localName == "line":
for x in node.childNodes:
if x.nodeType == node.TEXT_NODE and "Command" in x.data:
commandline = x.data
break
raw_errors = parsed_file.getElementsByTagName("error")
for raw_error in raw_errors:
# Ignore "possible" leaks for now by default.
if (show_all_leaks or
getTextOf(raw_error, "kind") != "Leak_PossiblyLost"):
error = ValgrindError(source_dir, raw_error, commandline)
self._errors.add(error)
suppcountlist = parsed_file.getElementsByTagName("suppcounts")
if len(suppcountlist) > 0:
suppcountlist = suppcountlist[0]
for node in suppcountlist.getElementsByTagName("pair"):
count = getTextOf(node, "count");
name = getTextOf(node, "name");
if name in self._suppcounts:
self._suppcounts[name] += int(count)
else:
self._suppcounts[name] = int(count)
if len(badfiles) > 0:
logging.warn("valgrind didn't finish writing %d files?!" % len(badfiles))
for file in badfiles:
logging.warn("Last 20 lines of %s :" % file)
os.system("tail -n 20 '%s' 1>&2" % file)
def Report(self, check_sanity=False):
if self._parse_failed:
logging.error("FAIL! Couldn't parse Valgrind output file")
return -2
is_sane = False
print "-----------------------------------------------------"
print "Suppressions used:"
print " count name"
for item in sorted(self._suppcounts.items(), key=lambda (k,v): (v,k)):
print "%7s %s" % (item[1], item[0])
if item[0].startswith(MemcheckAnalyze.SANITY_TEST_SUPPRESSION):
is_sane = True
print "-----------------------------------------------------"
sys.stdout.flush()
retcode = 0
if self._errors:
logging.error("FAIL! There were %s errors: " % len(self._errors))
global TheAddressTable
if TheAddressTable != None:
TheAddressTable.ResolveAll()
for error in self._errors:
logging.error(error)
retcode = -1
# Report tool's insanity even if there were errors.
if check_sanity and not is_sane:
logging.error("FAIL! Sanity check failed!")
retcode = -3
if retcode != 0:
return retcode
logging.info("PASS! No errors found!")
return 0
def _main():
'''For testing only. The MemcheckAnalyze class should be imported instead.'''
retcode = 0
parser = optparse.OptionParser("usage: %prog [options] <files to analyze>")
parser.add_option("", "--source_dir",
help="path to top of source tree for this build"
"(used to normalize source paths in baseline)")
(options, args) = parser.parse_args()
if not len(args) >= 1:
parser.error("no filename specified")
filenames = args
analyzer = MemcheckAnalyze(options.source_dir, filenames, use_gdb=True)
retcode = analyzer.Report()
sys.exit(retcode)
if __name__ == "__main__":
_main()