/* Pedantic checking of DWARF files
Copyright (C) 2009, 2010, 2011 Red Hat, Inc.
This file is part of elfutils.
This file is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
elfutils is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see . */
#ifdef HAVE_CONFIG_H
# include
#endif
#include
#include
#include
#include
#include "../libdw/c++/dwarf"
#include "check_debug_info.hh"
#include "check_debug_abbrev.hh"
#include "pri.hh"
#include "dwarf_version.hh"
#include "sections.hh"
#include "checked_read.hh"
#include "messages.hh"
#include "misc.hh"
char const *
locus_simple_fmt::abbr_offset_n ()
{
return "abbr. offset";
}
abbrev_attrib_locus::abbrev_attrib_locus (uint64_t abbr_offset,
uint64_t attr_offset,
int a_name)
: _m_abbr_offset (abbr_offset)
, _m_attr_offset (attr_offset)
, _m_name (a_name)
{}
abbrev_attrib_locus::abbrev_attrib_locus (abbrev_attrib_locus const ©)
: _m_abbr_offset (copy._m_abbr_offset)
, _m_attr_offset (copy._m_attr_offset)
, _m_name (copy._m_name)
{}
std::string
abbrev_attrib_locus::name () const
{
return pri::attr_name (_m_name);
}
void
abbrev_attrib_locus::set_name (int a_name)
{
_m_name = a_name;
}
abbrev_attrib_locus
abbrev_attrib_locus::non_symbolic ()
{
return abbrev_attrib_locus (_m_abbr_offset, _m_attr_offset);
}
std::string
abbrev_attrib_locus::format (bool brief) const
{
std::stringstream ss;
if (!brief)
ss << section_name[sec_abbrev] << ": ";
if (_m_name != -1)
ss << "abbr. 0x" << std::hex << _m_abbr_offset << ", attr. " << name ();
else
ss << "abbr. attribute 0x" << std::hex << _m_attr_offset;
return ss.str ();
}
checkdescriptor const *
check_debug_abbrev::descriptor ()
{
static checkdescriptor cd
(checkdescriptor::create ("check_debug_abbrev")
.groups ("@low")
.schedule (false)
.description (
"Checks for low-level structure of .debug_abbrev. In addition it "
"checks:\n"
" - that all abbreviation tables are non-empty\n"
" - that certain attribute forms match expectations (mainly those that "
"we have to work with in subsequent check passes. For example we "
"check that DW_AT_low_pc has a form of DW_FORM_{,ref_}addr)\n"
" - that all CUs that share an abbrev table are of the same DWARF "
"version\n"
" - that each abbrev table is used\n"
" - that abbrevs don't share abbrev codes\n"
" - that abbrev tags, attribute names and attribute forms are all known "
"(note that this assumes that elfutils know about all tags used in "
"practice. Be sure to build against recent-enough version)\n"
" - that the value of has_children is either 0 or 1\n"
" - that DW_AT_sibling isn't formed as DW_FORM_ref_addr, and that it "
"isn't present at childless abbrevs\n"
" - that attributes are not duplicated at abbrev\n"
" - that DW_AT_high_pc is never used without DW_AT_low_pc. If both are "
"used, that DW_AT_ranges isn't also used\n"
"This check generally requires CU headers to be readable, i.e. that the "
".debug_info section is roughly well-defined. If that isn't the case, "
"many checks will still be done, operating under assumption that what "
"we see is the latest DWARF format. This may render some checks "
"inaccurate.\n"));
return &cd;
}
static reg reg_debug_abbrev;
abbrev *
abbrev_table::find_abbrev (uint64_t abbrev_code) const
{
size_t a = 0;
size_t b = size;
struct abbrev *ab = NULL;
while (a < b)
{
size_t i = (a + b) / 2;
ab = abbr + i;
if (ab->code > abbrev_code)
b = i;
else if (ab->code < abbrev_code)
a = i + 1;
else
return ab;
}
return NULL;
}
namespace
{
struct cmp_abbrev
{
bool operator () (abbrev const &a, abbrev const &b) const
{
return a.code < b.code;
}
};
void
complain (locus const &loc, int form_name,
bool indirect, char const *qualifier)
{
wr_error (loc)
<< "attribute with " << qualifier << (indirect ? " indirect" : "")
<< " form " << elfutils::dwarf::forms::identifier (form_name)
<< '.' << std::endl;
}
bool
check_no_abbreviations (check_debug_abbrev::abbrev_map const &abbrevs)
{
bool ret = abbrevs.begin () == abbrevs.end ();
// It's not an error when the abbrev table contains no abbrevs.
// But since we got here, apparently there was a .debug_abbrev
// section with size of more than 0 bytes, which is wasteful.
if (ret)
wr_message (section_locus (sec_abbrev),
mc_abbrevs | mc_impact_1 | mc_acc_bloat)
<< "no abbreviations." << std::endl;
return ret;
}
check_debug_abbrev::abbrev_map
load_debug_abbrev (sec §, elf_file &file,
read_cu_headers *cu_headers)
{
check_debug_abbrev::abbrev_map abbrevs;
read_ctx ctx;
read_ctx_init (&ctx, sect.data, file.other_byte_order);
struct abbrev_table *section = NULL;
uint64_t first_attr_off = 0;
// Tolerate failure here.
dwarf_version const *ver = NULL;
static dwarf_version const *latest_ver = dwarf_version::get_latest ();
bool failed = false;
while (true)
{
/* If we get EOF at this point, either the CU was improperly
terminated, or there were no data to begin with. */
if (read_ctx_eof (&ctx))
{
if (!check_no_abbreviations (abbrevs))
wr_error (section_locus (sec_abbrev))
<< "missing zero to mark end-of-table.\n";
break;
}
uint64_t abbr_off;
uint64_t abbr_code;
{
uint64_t prev_abbr_code = (uint64_t)-1;
uint64_t zero_seq_off = (uint64_t)-1;
do
{
abbr_off = read_ctx_get_offset (&ctx);
/* Abbreviation code. */
if (!checked_read_uleb128 (&ctx, &abbr_code,
section_locus (sec_abbrev, abbr_off),
"abbrev code"))
throw check_base::failed ();
/* Note: we generally can't tell the difference between
empty table and (excessive) padding. But NUL byte(s)
at the very beginning of section are almost certainly
the first case. */
if (zero_seq_off == (uint64_t)-1
&& abbr_code == 0
&& (prev_abbr_code == 0
|| abbrevs.empty ()))
zero_seq_off = abbr_off;
if (abbr_code != 0)
break;
else
section = NULL;
prev_abbr_code = abbr_code;
}
while (!read_ctx_eof (&ctx)
/* On EOF, shift the offset so that beyond-EOF
end-position is printed for padding warning.
Necessary as our end position is exclusive. */
|| ((abbr_off += 1), false));
if (zero_seq_off != (uint64_t)-1)
wr_message_padding_0 (mc_abbrevs | mc_header,
section_locus (sec_abbrev),
zero_seq_off, abbr_off);
}
if (read_ctx_eof (&ctx))
{
/* It still could have been empty. */
check_no_abbreviations (abbrevs);
break;
}
abbrev_locus where (abbr_off);
/* OK, we got some genuine abbreviation. See if we need to
allocate a new section. */
if (section == NULL)
{
abbrev_table t;
section = &abbrevs.insert (std::make_pair (abbr_off, t)).first->second;
section->offset = abbr_off;
// Find CU that uses this abbrev table, so that we know what
// version to validate against.
if (cu_headers != NULL)
{
ver = NULL;
cu_head const *other_head = NULL;
for (std::vector ::const_iterator it
= cu_headers->cu_headers.begin ();
it != cu_headers->cu_headers.end (); ++it)
if (it->abbrev_offset == abbr_off)
{
section->used = true;
dwarf_version const *nver
= dwarf_version::get (it->version);
if (ver == NULL)
ver = nver;
else if (nver != ver)
{
wr_error (it->where)
<< " and " << other_head->where << " both use "
<< where << ", but each has a different version ("
<< it->version << " vs. " << other_head->version
<< ")." << std::endl;
// Arbitrarily pick newer version.
if (it->version > other_head->version)
ver = nver;
}
other_head = &*it;
}
if (ver == NULL)
{
// This is hard error, we can't validate abbrev
// table without knowing what version to use.
wr_error (where)
<< "abbreviation table is never used." << std::endl;
ver = dwarf_version::get_latest ();
}
}
else if (ver == NULL) // Only emit this once.
{
wr_error (section_locus (sec_info))
<< "couldn't load CU headers for processing .debug_abbrev; "
"assuming latest DWARF flavor."
<< std::endl;
ver = latest_ver;
}
assert (ver != NULL);
}
abbrev *original = section->find_abbrev (abbr_code);
abbrev *cur;
abbrev fake (where);
if (unlikely (original != NULL))
{
wr_error (where) << "duplicate abbrev code (first was at "
<< original->where << ").\n";
/* Don't actually save this abbrev if it's duplicate. */
cur = &fake;
}
else
{
REALLOC (section, abbr);
cur = section->abbr + section->size++;
new (cur) abbrev (where);
}
cur->code = abbr_code;
/* Abbreviation tag. */
uint64_t abbr_tag;
if (!checked_read_uleb128 (&ctx, &abbr_tag, where, "abbrev tag"))
throw check_base::failed ();
if (abbr_tag > DW_TAG_hi_user)
{
wr_error (where)
<< "invalid abbrev tag " << pri::hex (abbr_tag)
<< '.' << std::endl;
throw check_base::failed ();
}
cur->tag = (typeof (cur->tag))abbr_tag;
/* Abbreviation has_children. */
uint8_t has_children;
if (!read_ctx_read_ubyte (&ctx, &has_children))
{
wr_error (&where, ": can't read abbrev has_children.\n");
throw check_base::failed ();
}
if (has_children != DW_CHILDREN_no
&& has_children != DW_CHILDREN_yes)
{
wr_error (where)
<< "invalid has_children value " << pri::hex (cur->has_children)
<< '.' << std::endl;
throw check_base::failed ();
}
cur->has_children = has_children == DW_CHILDREN_yes;
bool null_attrib;
bool low_pc = false;
bool high_pc = false;
bool ranges = false;
std::map seen;
do
{
uint64_t attr_off = read_ctx_get_offset (&ctx);
uint64_t attrib_name, attrib_form;
if (first_attr_off == 0)
first_attr_off = attr_off;
/* Shift to match elfutils reporting. */
attr_off -= first_attr_off;
abbrev_attrib_locus attr_locus (abbr_off, attr_off);
/* Load attribute name and form. */
if (!checked_read_uleb128 (&ctx, &attrib_name, attr_locus,
"attribute name"))
throw check_base::failed ();
if (!checked_read_uleb128 (&ctx, &attrib_form, attr_locus,
"attribute form"))
throw check_base::failed ();
/* Now if both are zero, this was the last attribute. */
null_attrib = attrib_name == 0 && attrib_form == 0;
REALLOC (cur, attribs);
attr_locus.set_name (attrib_name);
struct abbrev_attrib *acur = cur->attribs + cur->size++;
new (acur) abbrev_attrib ();
acur->name = attrib_name;
acur->form = attrib_form;
acur->where = attr_locus;
if (null_attrib)
break;
/* Otherwise validate name and form. */
if (attrib_name == 0)
{
wr_error (attr_locus.non_symbolic ())
<< "invalid attribute code 0." << std::endl;
// We can handle this, so keep going. But this is not
// kosher for high-level checks.
failed = true;
continue;
}
attribute const *attribute = ver->get_attribute (attrib_name);
if (attribute == NULL)
{
// GCC commonly emits DWARF 2 with trivial extensions
// (such as attribute names) from newer versions. In
// GNU mode, don't even mind this. In non-gnu, emit
// warning. We explicitly don't do this for forms,
// where the consumer wouldn't know how to read or
// skip the datum.
attribute = latest_ver->get_attribute (attrib_name);
if (attribute == NULL)
// libdw should handle unknown attribute, as long as
// the form is kosher, so don't fail the check.
wr_message (attr_locus.non_symbolic (),
mc_abbrevs | mc_impact_1)
<< "invalid or unknown name " << pri::hex (attrib_name)
<< '.' << std::endl;
else if (opt_nognu)
wr_message (attr_locus, mc_abbrevs | mc_impact_1)
<< "attribute from later DWARF version."
<< std::endl;
}
form const *form = check_debug_abbrev::check_form
(ver, attribute, attrib_form, attr_locus, false);
if (form == NULL)
{
// Error message has been emitted in check_form.
failed = true;
continue;
}
std::pair::iterator, bool> inserted
= seen.insert (std::make_pair (attrib_name, attr_off));
if (!inserted.second)
{
wr_error (attr_locus.non_symbolic ())
<< "duplicate attribute " << attr_locus.name ()
<< " (first was at " << pri::hex (inserted.first->second)
<< ")." << std::endl;
// I think we may allow such files for high-level
// consumption, so don't fail the check...
if (attrib_name == DW_AT_sibling)
// ... unless it's DW_AT_sibling.
failed = true;
}
if (attrib_name == DW_AT_sibling && !cur->has_children)
wr_message (attr_locus, mc_die_rel | mc_acc_bloat | mc_impact_1)
<< "superfluous DW_AT_sibling attribute at childless abbrev."
<< std::endl;
if (attrib_name == DW_AT_ranges)
ranges = true;
else if (attrib_name == DW_AT_low_pc)
low_pc = true;
else if (attrib_name == DW_AT_high_pc)
high_pc = true;
}
while (!null_attrib);
if (high_pc && !low_pc)
wr_error (where)
<< "the abbrev has DW_AT_high_pc without also having DW_AT_low_pc."
<< std::endl;
else if (high_pc && ranges)
wr_error (where)
<< "the abbrev has DW_AT_high_pc & DW_AT_low_pc, "
<< "but also has DW_AT_ranges." << std::endl;
}
if (failed)
throw check_base::failed ();
abbrev_table *last = NULL;
for (check_debug_abbrev::abbrev_map::iterator it = abbrevs.begin ();
it != abbrevs.end (); ++it)
{
std::sort (it->second.abbr, it->second.abbr + it->second.size,
cmp_abbrev ());
if (last != NULL)
last->next = &it->second;
last = &it->second;
}
return abbrevs;
}
}
check_debug_abbrev::check_debug_abbrev (checkstack &stack, dwarflint &lint)
: _m_sec_abbr (lint.check (stack, _m_sec_abbr))
, _m_cu_headers (lint.toplev_check (stack, _m_cu_headers))
, abbrevs (load_debug_abbrev (_m_sec_abbr->sect,
_m_sec_abbr->file,
_m_cu_headers))
{
}
form const *
check_debug_abbrev::check_form (dwarf_version const *ver,
attribute const *attribute,
int form_name, locus const &loc, bool indirect)
{
form const *form = ver->get_form (form_name);
if (form == NULL)
{
wr_error (loc)
<< "invalid form " << pri::hex (form_name)
<< '.' << std::endl;
return NULL;
}
if (attribute != NULL)
{
int attrib_name = attribute->name ();
if (!ver->form_allowed (attribute, form))
{
complain (loc, form_name, indirect, "invalid");
return NULL;
}
else if (attrib_name == DW_AT_sibling
&& sibling_form_suitable (ver, form) == sfs_long)
complain (loc, form_name, indirect, "unsuitable");
}
return form;
}
check_debug_abbrev::~check_debug_abbrev ()
{
// xxx So using new[]/delete[] would be nicer (delete ignores
// const-ness), but I'm not dipping into that right now. Just cast
// away the const, we're in the dtor so what the heck.
abbrev_map &my_abbrevs = const_cast (abbrevs);
for (abbrev_map::iterator it = my_abbrevs.begin ();
it != my_abbrevs.end (); ++it)
{
for (size_t i = 0; i < it->second.size; ++i)
free (it->second.abbr[i].attribs);
free (it->second.abbr);
}
}