diff options
Diffstat (limited to 'debuginfod')
| -rw-r--r-- | debuginfod/ChangeLog | 116 | ||||
| -rw-r--r-- | debuginfod/Makefile.am | 32 | ||||
| -rw-r--r-- | debuginfod/debuginfod-client.c | 95 | ||||
| -rw-r--r-- | debuginfod/debuginfod-find.c | 11 | ||||
| -rw-r--r-- | debuginfod/debuginfod.cxx | 319 |
5 files changed, 433 insertions, 140 deletions
diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index bc3bce32..34363e7e 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -1,3 +1,119 @@ +2020-10-31 Frank Ch. Eigler <[email protected]> + + * debuginfod.cxx (scan_source_file, scan_archive_file): Add new scanned_bytes_total, + scanned_files_total metrics. + (archive_classify): Exit early if interrupted. + (scan_source_paths): Perform realpath/regex checks only on FTS_F files. + Tweak metrics. + +2020-10-30 Frank Ch. Eigler <[email protected]> + + PR26775 cont'd. + * debuginfod.cxx (thread_main_scanner): Ensure control doesn't + leave infinite loop until program exit, even if SIGUSR2. + (scan_source_paths): Have traverser clean scanq on + SIGUSR2. Emit additional traversed_total metrics. + (groom): Emit additional groomed_total metrics. + (thread_main_groom): Restore previous thread_work_total + metric. + +2020-10-29 Frank Ch. Eigler <[email protected]> + + PR26775 + * debuginfod.cxx (forced_*_count): Make these global. + (runq::clear): New function. + (thread_main_scanner): Check for pending SIGUSR2; interrupt. + (scan_source_paths): Check for pending SIGUSR2; interrupt. + (groom): Report prometheus stats before groom also. Check for + pending SIGUSR1; interrupt. Increment thread_work_total for + each file scanned, not the entire cycle. + +2020-10-29 Frank Ch. Eigler <[email protected]> + + PR26810 + * debuginfod.cxx (handle_buildid_*_match): Throw exceptions for + more lower level libc errors. + (handle_buildid_match): Catch & report exceptions but return 0 + for continued iteration in the caller. + +2020-10-25 Mark Wielaard <[email protected]> + + * debuginfod-client.c (debuginfod_query_server): Translate + CURLE_PEER_FAILED_VERIFICATION to ECONNREFUSED. + +2020-10-20 Frank Ch. Eigler <[email protected]> + + PR26756: more prometheus metrics + * debuginfod.cxx (*_exception): Add counters for error occurrences. + (fdcache::*): Add counters for fdcache operations and status. + (fdcache::set_metric): New fn for overall stat counts. + (fdcache::limit): ... allow metric-less use from dtors. + +2020-10-20 Frank Ch. Eigler <[email protected]> + + * debuginfod.cxx (handle_buildid*): Add a parameter for detecting + internally-originated lookups for dwz resolution. + +2020-09-18 Frank Ch. Eigler <[email protected]> + + * debuginfod.cxx (scan_source_file, archive_classify): Store only + canonicalized file names in sdef & sref records in the database. + +2020-09-08 Mark Wielaard <[email protected]> + + * Makefile.am (BUILD_STATIC): Include libcurl_LIBS in libdebuginfod + when NOT DUMMY_LIBDEBUGINFOD. + +2020-09-16 Mark Wielaard <[email protected]> + + * debuginfod-find.c: Fix license block comment. + +2020-09-15 Mark Wielaard <[email protected]> + + * debuginfod-find.c (main): Use dwelf_elf_begin. + +2020-07-03 Alice Zhang <[email protected]> + + * debuginfod-client.c (debuginfod_query_server): Use strncasecmp + to compare effective_url. Try CURLINFO_SCHEME as fallback. + +2020-06-19 Mark Wielaard <[email protected]> + + * Makefile.am (bin_PROGRAMS): Guard with DEBUGINFOD and + LIBDEBUGINFOD. + (debuginfod_LDADD): Remove libcurl. + (libdebuginfod): When static and DUMMY_LIBDEBUGINFO remove libcurl. + (noinst_LIBRARIES): Guard with LIBDEBUGINFOD. + (AM_CPPFLAGS): Add -Wno-unused-parameter when DUMMY_LIBDEBUGINFOD. + (pkginclude_headers): Guard with LIBDEBUGINFOD + (libdebuginfod_so_LIBS): Likewise. + (+libdebuginfod_so_LDLIBS): Likewise. + (install): Likewise. + (uninstall): Likewise. + * debuginfod-client.c: Include dummy functions when + DUMMY_LIBDEBUGINFOD. + * debuginfod.cxx: Remove curl.h include. + +2020-06-16 Mark Wielaard <[email protected]> + + * debuginfod-client.c (debuginfod_query_server): Check malloc. + Move curl_multi_init call before handle_data malloc call. + +2020-06-16 Mark Wielaard <[email protected]> + + * debuginfod-client.c (debuginfod_query_server): Replace sizeof + build_id_bytes check with strlen build_id check. + +2020-06-16 Mark Wielaard <[email protected]> + + * debuginfod-client.c (debuginfod_query_server): Increase suffix + array and prepare having to escape 1 character with 2. + +2020-06-16 Mark Wielaard <[email protected]> + + * debuginfod-client.c (debuginfod_clean_cache): Handle failing + fopen (interval_path). + 2020-03-29 Mark Wielaard <[email protected]> * debuginfod-client.c (debuginfod_add_http_header): Check header diff --git a/debuginfod/Makefile.am b/debuginfod/Makefile.am index 47b6e431..8b07ed0b 100644 --- a/debuginfod/Makefile.am +++ b/debuginfod/Makefile.am @@ -45,7 +45,11 @@ if BUILD_STATIC libasm = ../libasm/libasm.a libdw = ../libdw/libdw.a -lz $(zip_LIBS) $(libelf) $(libebl) -ldl -lpthread libelf = ../libelf/libelf.a -lz +if DUMMY_LIBDEBUGINFOD +libdebuginfod = ./libdebuginfod.a +else libdebuginfod = ./libdebuginfod.a $(libcurl_LIBS) +endif else libasm = ../libasm/libasm.so libdw = ../libdw/libdw.so @@ -57,25 +61,46 @@ libeu = ../lib/libeu.a AM_LDFLAGS = -Wl,-rpath-link,../libelf:../libdw:. -bin_PROGRAMS = debuginfod debuginfod-find +bin_PROGRAMS = +if DEBUGINFOD +bin_PROGRAMS += debuginfod +endif + +if LIBDEBUGINFOD +bin_PROGRAMS += debuginfod-find +endif + debuginfod_SOURCES = debuginfod.cxx -debuginfod_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) $(libmicrohttpd_LIBS) $(libcurl_LIBS) $(sqlite3_LIBS) $(libarchive_LIBS) -lpthread -ldl +debuginfod_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) $(libmicrohttpd_LIBS) $(sqlite3_LIBS) $(libarchive_LIBS) -lpthread -ldl debuginfod_find_SOURCES = debuginfod-find.c debuginfod_find_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) +if LIBDEBUGINFOD lib_LIBRARIES = libdebuginfod.a noinst_LIBRARIES = libdebuginfod_pic.a +endif libdebuginfod_a_CFLAGS = -fPIC $(AM_CFLAGS) libdebuginfod_a_SOURCES = debuginfod-client.c libdebuginfod_pic_a_SOURCES = debuginfod-client.c am_libdebuginfod_pic_a_OBJECTS = $(libdebuginfod_a_SOURCES:.c=.os) +if DUMMY_LIBDEBUGINFOD +AM_CPPFLAGS += -Wno-unused-parameter +endif + +if LIBDEBUGINFOD pkginclude_HEADERS = debuginfod.h +endif +if LIBDEBUGINFOD libdebuginfod_so_LIBS = libdebuginfod_pic.a +if DUMMY_LIBDEBUGINFOD +libdebuginfod_so_LDLIBS = +else libdebuginfod_so_LDLIBS = $(libcurl_LIBS) +endif libdebuginfod.so$(EXEEXT): $(srcdir)/libdebuginfod.map $(libdebuginfod_so_LIBS) $(AM_V_CCLD)$(LINK) $(dso_LDFLAGS) -o $@ \ -Wl,--soname,$@.$(VERSION) \ @@ -84,7 +109,9 @@ libdebuginfod.so$(EXEEXT): $(srcdir)/libdebuginfod.map $(libdebuginfod_so_LIBS) $(libdebuginfod_so_LDLIBS) @$(textrel_check) $(AM_V_at)ln -fs $@ $@.$(VERSION) +endif +if LIBDEBUGINFOD install: install-am libdebuginfod.so $(mkinstalldirs) $(DESTDIR)$(libdir) $(INSTALL_PROGRAM) libdebuginfod.so $(DESTDIR)$(libdir)/libdebuginfod-$(PACKAGE_VERSION).so @@ -96,6 +123,7 @@ uninstall: uninstall-am rm -f $(DESTDIR)$(libdir)/libdebuginfod.so.$(VERSION) rm -f $(DESTDIR)$(libdir)/libdebuginfod.so rmdir --ignore-fail-on-non-empty $(DESTDIR)$(includedir)/elfutils +endif EXTRA_DIST = libdebuginfod.map MOSTLYCLEANFILES = $(am_libdebuginfod_pic_a_OBJECTS) libdebuginfod.so.$(VERSION) diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c index a7dfbfb1..0e5177bc 100644 --- a/debuginfod/debuginfod-client.c +++ b/debuginfod/debuginfod-client.c @@ -41,13 +41,35 @@ #include "config.h" #include "debuginfod.h" #include "system.h" +#include <errno.h> +#include <stdlib.h> + +/* We might be building a bootstrap dummy library, which is really simple. */ +#ifdef DUMMY_LIBDEBUGINFOD + +debuginfod_client *debuginfod_begin (void) { errno = ENOSYS; return NULL; } +int debuginfod_find_debuginfo (debuginfod_client *c, const unsigned char *b, + int s, char **p) { return -ENOSYS; } +int debuginfod_find_executable (debuginfod_client *c, const unsigned char *b, + int s, char **p) { return -ENOSYS; } +int debuginfod_find_source (debuginfod_client *c, const unsigned char *b, + int s, const char *f, char **p) { return -ENOSYS; } +void debuginfod_set_progressfn(debuginfod_client *c, + debuginfod_progressfn_t fn) { } +void debuginfod_set_user_data (debuginfod_client *c, void *d) { } +void* debuginfod_get_user_data (debuginfod_client *c) { return NULL; } +const char* debuginfod_get_url (debuginfod_client *c) { return NULL; } +int debuginfod_add_http_header (debuginfod_client *c, + const char *h) { return -ENOSYS; } +void debuginfod_end (debuginfod_client *c) { } + +#else /* DUMMY_LIBDEBUGINFOD */ + #include <assert.h> #include <dirent.h> #include <stdio.h> -#include <stdlib.h> #include <errno.h> #include <unistd.h> -#include <errno.h> #include <fcntl.h> #include <fts.h> #include <regex.h> @@ -244,9 +266,14 @@ debuginfod_clean_cache(debuginfod_client *c, /* Check timestamp of interval file to see whether cleaning is necessary. */ time_t clean_interval; interval_file = fopen(interval_path, "r"); - if (fscanf(interval_file, "%ld", &clean_interval) != 1) + if (interval_file) + { + if (fscanf(interval_file, "%ld", &clean_interval) != 1) + clean_interval = cache_clean_default_interval_s; + fclose(interval_file); + } + else clean_interval = cache_clean_default_interval_s; - fclose(interval_file); if (time(NULL) - st.st_mtime < clean_interval) /* Interval has not passed, skip cleaning. */ @@ -469,7 +496,7 @@ debuginfod_query_server (debuginfod_client *c, char *target_cache_dir = NULL; char *target_cache_path = NULL; char *target_cache_tmppath = NULL; - char suffix[PATH_MAX]; + char suffix[PATH_MAX + 1]; /* +1 for zero terminator. */ char build_id_bytes[MAX_BUILD_ID_BYTES * 2 + 1]; int rc; @@ -491,7 +518,7 @@ debuginfod_query_server (debuginfod_client *c, /* Copy lowercase hex representation of build_id into buf. */ if ((build_id_len >= MAX_BUILD_ID_BYTES) || (build_id_len == 0 && - sizeof(build_id_bytes) > MAX_BUILD_ID_BYTES*2 + 1)) + strlen ((const char *) build_id) > MAX_BUILD_ID_BYTES*2)) return -EINVAL; if (build_id_len == 0) /* expect clean hexadecimal */ strcpy (build_id_bytes, (const char *) build_id); @@ -506,7 +533,7 @@ debuginfod_query_server (debuginfod_client *c, /* copy the filename to suffix, s,/,#,g */ unsigned q = 0; - for (unsigned fi=0; q < PATH_MAX-1; fi++) + for (unsigned fi=0; q < PATH_MAX-2; fi++) /* -2, escape is 2 chars. */ switch (filename[fi]) { case '\0': @@ -660,10 +687,24 @@ debuginfod_query_server (debuginfod_client *c, && (i == 0 || server_urls[i - 1] == url_delim_char)) num_urls++; + CURLM *curlm = curl_multi_init(); + if (curlm == NULL) + { + rc = -ENETUNREACH; + goto out0; + } + /* Tracks which handle should write to fd. Set to the first handle that is ready to write the target file to the cache. */ CURL *target_handle = NULL; struct handle_data *data = malloc(sizeof(struct handle_data) * num_urls); + if (data == NULL) + { + rc = -ENOMEM; + goto out0; + } + + /* thereafter, goto out1 on error. */ /* Initalize handle_data with default values. */ for (int i = 0; i < num_urls; i++) @@ -672,14 +713,6 @@ debuginfod_query_server (debuginfod_client *c, data[i].fd = -1; } - CURLM *curlm = curl_multi_init(); - if (curlm == NULL) - { - rc = -ENETUNREACH; - goto out0; - } - /* thereafter, goto out1 on error. */ - char *strtok_saveptr; char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr); @@ -838,6 +871,7 @@ debuginfod_query_server (debuginfod_client *c, case CURLE_COULDNT_RESOLVE_HOST: rc = -EHOSTUNREACH; break; // no NXDOMAIN case CURLE_URL_MALFORMAT: rc = -EINVAL; break; case CURLE_COULDNT_CONNECT: rc = -ECONNREFUSED; break; + case CURLE_PEER_FAILED_VERIFICATION: rc = -ECONNREFUSED; break; case CURLE_REMOTE_ACCESS_DENIED: rc = -EACCES; break; case CURLE_WRITE_ERROR: rc = -EIO; break; case CURLE_OUT_OF_MEMORY: rc = -ENOMEM; break; @@ -866,19 +900,44 @@ debuginfod_query_server (debuginfod_client *c, &resp_code); if(ok1 == CURLE_OK && ok2 == CURLE_OK && effective_url) { - if (strncmp (effective_url, "http", 4) == 0) + if (strncasecmp (effective_url, "HTTP", 4) == 0) if (resp_code == 200) { verified_handle = msg->easy_handle; break; } - if (strncmp (effective_url, "file", 4) == 0) + if (strncasecmp (effective_url, "FILE", 4) == 0) if (resp_code == 0) { verified_handle = msg->easy_handle; break; } } + /* - libcurl since 7.52.0 version start to support + CURLINFO_SCHEME; + - before 7.61.0, effective_url would give us a + url with upper case SCHEME added in the front; + - effective_url between 7.61 and 7.69 can be lack + of scheme if the original url doesn't include one; + - since version 7.69 effective_url will be provide + a scheme in lower case. */ + #if LIBCURL_VERSION_NUM >= 0x073d00 /* 7.61.0 */ + #if LIBCURL_VERSION_NUM <= 0x074500 /* 7.69.0 */ + char *scheme = NULL; + CURLcode ok3 = curl_easy_getinfo (target_handle, + CURLINFO_SCHEME, + &scheme); + if(ok3 == CURLE_OK && scheme) + { + if (strncmp (scheme, "HTTP", 4) == 0) + if (resp_code == 200) + { + verified_handle = msg->easy_handle; + break; + } + } + #endif + #endif } } } @@ -1079,3 +1138,5 @@ __attribute__((destructor)) attribute_hidden void libdebuginfod_dtor(void) /* ... so don't do this: */ /* curl_global_cleanup(); */ } + +#endif /* DUMMY_LIBDEBUGINFOD */ diff --git a/debuginfod/debuginfod-find.c b/debuginfod/debuginfod-find.c index 83a43ce4..88a460f8 100644 --- a/debuginfod/debuginfod-find.c +++ b/debuginfod/debuginfod-find.c @@ -13,9 +13,9 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see <http://www.gnu.org/licenses/>. */ + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + #include "config.h" #include "printversion.h" @@ -138,9 +138,10 @@ main(int argc, char** argv) } if (fd >= 0) { - elf = elf_begin (fd, ELF_C_READ_MMAP_PRIVATE, NULL); + elf = dwelf_elf_begin (fd); if (elf == NULL) - fprintf (stderr, "Cannot elf_begin %s: %s\n", build_id, elf_errmsg(-1)); + fprintf (stderr, "Cannot open as ELF file %s: %s\n", build_id, + elf_errmsg (-1)); } if (elf != NULL) { diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx index 76f1fa52..3085f388 100644 --- a/debuginfod/debuginfod.cxx +++ b/debuginfod/debuginfod.cxx @@ -92,6 +92,14 @@ using namespace std; #include <libdwelf.h> #include <microhttpd.h> + +#if MHD_VERSION >= 0x00097002 +// libmicrohttpd 0.9.71 broke API +#define MHD_RESULT enum MHD_Result +#else +#define MHD_RESULT int +#endif + #include <curl/curl.h> #include <archive.h> #include <archive_entry.h> @@ -380,7 +388,9 @@ static string db_path; static sqlite3 *db; // single connection, serialized across all our threads! static unsigned verbose; static volatile sig_atomic_t interrupted = 0; +static volatile sig_atomic_t forced_rescan_count = 0; static volatile sig_atomic_t sigusr1 = 0; +static volatile sig_atomic_t forced_groom_count = 0; static volatile sig_atomic_t sigusr2 = 0; static unsigned http_port = 8002; static unsigned rescan_s = 300; @@ -519,12 +529,12 @@ struct reportable_exception void report(ostream& o) const; // defined under obatched() class below - int mhd_send_response(MHD_Connection* c) const { + MHD_RESULT mhd_send_response(MHD_Connection* c) const { MHD_Response* r = MHD_create_response_from_buffer (message.size(), (void*) message.c_str(), MHD_RESPMEM_MUST_COPY); MHD_add_response_header (r, "Content-Type", "text/plain"); - int rc = MHD_queue_response (c, code, r); + MHD_RESULT rc = MHD_queue_response (c, code, r); MHD_destroy_response (r); return rc; } @@ -540,23 +550,31 @@ struct sqlite_exception: public reportable_exception struct libc_exception: public reportable_exception { libc_exception(int rc, const string& msg): - reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {} + reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) { + inc_metric("error_count","libc",strerror(rc)); + } }; struct archive_exception: public reportable_exception { archive_exception(const string& msg): - reportable_exception(string("libarchive error: ") + msg) {} + reportable_exception(string("libarchive error: ") + msg) { + inc_metric("error_count","libarchive",msg); + } archive_exception(struct archive* a, const string& msg): - reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {} + reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) { + inc_metric("error_count","libarchive",msg); + } }; struct elfutils_exception: public reportable_exception { elfutils_exception(int rc, const string& msg): - reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {} + reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) { + inc_metric("error_count","elfutils",elf_errmsg(rc)); + } }; @@ -591,6 +609,14 @@ public: cv.notify_all(); } + // clear the workqueue, when scanning is interrupted with USR2 + void clear() { + unique_lock<mutex> lock(mtx); + q.clear(); + set_metric("thread_work_pending","role","scan", q.size()); + cv.notify_all(); // maybe wake up waiting idlers + } + // block this scanner thread until there is work to do and no active bool wait_front (Payload& p) { @@ -882,18 +908,15 @@ add_mhd_last_modified (struct MHD_Response *resp, time_t mtime) static struct MHD_Response* -handle_buildid_f_match (int64_t b_mtime, +handle_buildid_f_match (bool internal_req_t, + int64_t b_mtime, const string& b_source0, int *result_fd) { + (void) internal_req_t; // ignored int fd = open(b_source0.c_str(), O_RDONLY); if (fd < 0) - { - if (verbose) - obatched(clog) << "cannot open " << b_source0 << endl; - // if still missing, a periodic groom pass will delete this buildid record - return 0; - } + throw libc_exception (errno, string("open ") + b_source0); // NB: use manual close(2) in error case instead of defer_dtor, because // in the normal case, we want to hand the fd over to libmicrohttpd for @@ -903,10 +926,8 @@ handle_buildid_f_match (int64_t b_mtime, int rc = fstat(fd, &s); if (rc < 0) { - if (verbose) - clog << "cannot fstat " << b_source0 << endl; close(fd); - return 0; + throw libc_exception (errno, string("fstat ") + b_source0); } if ((int64_t) s.st_mtime != b_mtime) @@ -1075,6 +1096,15 @@ private: long max_mbs; public: + void set_metrics() + { + double total_mb = 0.0; + for (auto i = lru.begin(); i < lru.end(); i++) + total_mb += i->fd_size_mb; + set_metric("fdcache_bytes", (int64_t)(total_mb*1024.0*1024.0)); + set_metric("fdcache_count", lru.size()); + } + void intern(const string& a, const string& b, string fd, off_t sz, bool front_p) { { @@ -1085,19 +1115,27 @@ public: { unlink (i->fd.c_str()); lru.erase(i); + inc_metric("fdcache_op_count","op","dequeue"); break; // must not continue iterating } } double mb = (sz+65535)/1048576.0; // round up to 64K block fdcache_entry n = { a, b, fd, mb }; if (front_p) - lru.push_front(n); + { + inc_metric("fdcache_op_count","op","enqueue_front"); + lru.push_front(n); + } else - lru.push_back(n); - if (verbose > 3) - obatched(clog) << "fdcache interned a=" << a << " b=" << b - << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl; + { + inc_metric("fdcache_op_count","op","enqueue_back"); + lru.push_back(n); + } + if (verbose > 3) + obatched(clog) << "fdcache interned a=" << a << " b=" << b + << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl; } + set_metrics(); // NB: we age the cache at lookup time too if (front_p) @@ -1116,7 +1154,7 @@ public: fdcache_entry n = *i; lru.erase(i); // invalidates i, so no more iteration! lru.push_front(n); - + inc_metric("fdcache_op_count","op","requeue_front"); fd = open(n.fd.c_str(), O_RDONLY); // NB: no problem if dup() fails; looks like cache miss break; } @@ -1135,8 +1173,12 @@ public: for (auto i = lru.begin(); i < lru.end(); i++) { if (i->archive == a && i->entry == b) - return true; + { + inc_metric("fdcache_op_count","op","probe_hit"); + return true; + } } + inc_metric("fdcache_op_count","op","probe_miss"); return false; } @@ -1149,13 +1191,15 @@ public: { // found it; move it to head of lru fdcache_entry n = *i; lru.erase(i); // invalidates i, so no more iteration! + inc_metric("fdcache_op_count","op","clear"); unlink (n.fd.c_str()); + set_metrics(); return; } } } - void limit(long maxfds, long maxmbs) + void limit(long maxfds, long maxmbs, bool metrics_p = true) { if (verbose > 3 && (this->max_fds != maxfds || this->max_mbs != maxmbs)) obatched(clog) << "fdcache limited to maxfds=" << maxfds << " maxmbs=" << maxmbs << endl; @@ -1180,19 +1224,23 @@ public: if (verbose > 3) obatched(clog) << "fdcache evicted a=" << j->archive << " b=" << j->entry << " fd=" << j->fd << " mb=" << j->fd_size_mb << endl; + if (metrics_p) + inc_metric("fdcache_op_count","op","evict"); unlink (j->fd.c_str()); } lru.erase(i, lru.end()); // erase the nodes generally break; } - } + if (metrics_p) set_metrics(); } ~libarchive_fdcache() { - limit(0, 0); + // unlink any fdcache entries in $TMPDIR + // don't update metrics; those globals may be already destroyed + limit(0, 0, false); } }; static libarchive_fdcache fdcache; @@ -1218,7 +1266,8 @@ string canonicalized_archive_entry_pathname(struct archive_entry *e) static struct MHD_Response* -handle_buildid_r_match (int64_t b_mtime, +handle_buildid_r_match (bool internal_req_p, + int64_t b_mtime, const string& b_source0, const string& b_source1, int *result_fd) @@ -1322,7 +1371,8 @@ handle_buildid_r_match (int64_t b_mtime, // 3) extract some number of prefetched entries (just into fdcache) // 4) abort any further processing struct MHD_Response* r = 0; // will set in stage 2 - unsigned prefetch_count = fdcache_prefetch; // will decrement in stage 3 + unsigned prefetch_count = + internal_req_p ? 0 : fdcache_prefetch; // will decrement in stage 3 while(r == 0 || prefetch_count > 0) // stage 1, 2, or 3 { @@ -1355,7 +1405,8 @@ handle_buildid_r_match (int64_t b_mtime, throw libc_exception (errno, "cannot create temporary file"); // NB: don't unlink (tmppath), as fdcache will take charge of it. - rc = archive_read_data_into_fd (a, fd); + // NB: this can take many uninterruptible seconds for a huge file + rc = archive_read_data_into_fd (a, fd); if (rc != ARCHIVE_OK) // e.g. ENOSPC! { close (fd); @@ -1416,18 +1467,28 @@ handle_buildid_r_match (int64_t b_mtime, static struct MHD_Response* -handle_buildid_match (int64_t b_mtime, +handle_buildid_match (bool internal_req_p, + int64_t b_mtime, const string& b_stype, const string& b_source0, const string& b_source1, int *result_fd) { - if (b_stype == "F") - return handle_buildid_f_match(b_mtime, b_source0, result_fd); - else if (b_stype == "R") - return handle_buildid_r_match(b_mtime, b_source0, b_source1, result_fd); - else - return 0; + try + { + if (b_stype == "F") + return handle_buildid_f_match(internal_req_p, b_mtime, b_source0, result_fd); + else if (b_stype == "R") + return handle_buildid_r_match(internal_req_p, b_mtime, b_source0, b_source1, result_fd); + } + catch (const reportable_exception &e) + { + e.report(clog); + // Report but swallow libc etc. errors here; let the caller + // iterate to other matches of the content. + } + + return 0; } @@ -1497,6 +1558,8 @@ handle_buildid (MHD_Connection* conn, "order by sharedprefix(source0,source0ref) desc, mtime desc"); pp->reset(); pp->bind(1, buildid); + // NB: we don't store the non-canonicalized path names any more, but old databases + // might have them (and no canon ones), so we keep searching for both. pp->bind(2, suffix); pp->bind(3, canon_pathname(suffix)); } @@ -1521,7 +1584,8 @@ handle_buildid (MHD_Connection* conn, // Try accessing the located match. // XXX: in case of multiple matches, attempt them in parallel? - auto r = handle_buildid_match (b_mtime, b_stype, b_source0, b_source1, result_fd); + auto r = handle_buildid_match (conn ? false : true, + b_mtime, b_stype, b_source0, b_source1, result_fd); if (r) return r; } @@ -1622,6 +1686,8 @@ handle_buildid (MHD_Connection* conn, static map<string,int64_t> metrics; // arbitrary data for /metrics query // NB: store int64_t since all our metrics are integers; prometheus accepts double static mutex metrics_lock; +// NB: these objects get released during the process exit via global dtors +// do not call them from within other global dtors // utility function for assembling prometheus-compatible // name="escaped-value" strings @@ -1723,7 +1789,7 @@ handle_metrics (off_t* size) /* libmicrohttpd callback */ -static int +static MHD_RESULT handler_cb (void * /*cls*/, struct MHD_Connection *connection, const char *url, @@ -1736,7 +1802,11 @@ handler_cb (void * /*cls*/, struct MHD_Response *r = NULL; string url_copy = url; +#if MHD_VERSION >= 0x00097002 + enum MHD_Result rc; +#else int rc = MHD_NO; // mhd +#endif int http_code = 500; off_t http_size = -1; struct timeval tv_start, tv_end; @@ -2159,7 +2229,9 @@ scan_source_file (const string& rps, const stat_t& st, elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles); else throw libc_exception(errno, string("open ") + rps); - inc_metric ("scanned_total","source","file"); + add_metric ("scanned_bytes_total","source","file", + st.st_size); + inc_metric ("scanned_files_total","source","file"); } // NB: we catch exceptions here too, so that we can // cache the corrupt-elf case (!executable_p && @@ -2242,41 +2314,27 @@ scan_source_file (const string& rps, const stat_t& st, .bind(1, srps) .step_ok_done(); - // register the dwarfsrc name in the interning table too + // PR25548: store canonicalized dwarfsrc path + string dwarfsrc_canon = canon_pathname (dwarfsrc); + if (dwarfsrc_canon != dwarfsrc) + { + if (verbose > 3) + obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl; + } + ps_upsert_files .reset() - .bind(1, dwarfsrc) + .bind(1, dwarfsrc_canon) .step_ok_done(); ps_upsert_s .reset() .bind(1, buildid) - .bind(2, dwarfsrc) + .bind(2, dwarfsrc_canon) .bind(3, srps) .bind(4, sfs.st_mtime) .step_ok_done(); - // PR25548: also store canonicalized source path - string dwarfsrc_canon = canon_pathname (dwarfsrc); - if (dwarfsrc_canon != dwarfsrc) - { - if (verbose > 3) - obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl; - - ps_upsert_files - .reset() - .bind(1, dwarfsrc_canon) - .step_ok_done(); - - ps_upsert_s - .reset() - .bind(1, buildid) - .bind(2, dwarfsrc_canon) - .bind(3, srps) - .bind(4, sfs.st_mtime) - .step_ok_done(); - } - inc_metric("found_sourcerefs_total","source","files"); } } @@ -2356,9 +2414,12 @@ archive_classify (const string& rps, string& archive_extension, if (verbose > 3) obatched(clog) << "libarchive scanning " << rps << endl; - while(1) // parse cpio archive entries + while(1) // parse archive entries { - try + if (interrupted) + break; + + try { struct archive_entry *e; rc = archive_read_next_header (a, &e); @@ -2427,36 +2488,25 @@ archive_classify (const string& rps, string& archive_extension, continue; } - ps_upsert_files - .reset() - .bind(1, s) - .step_ok_done(); - - ps_upsert_sref - .reset() - .bind(1, buildid) - .bind(2, s) - .step_ok_done(); - - // PR25548: also store canonicalized source path + // PR25548: store canonicalized source path const string& dwarfsrc = s; string dwarfsrc_canon = canon_pathname (dwarfsrc); if (dwarfsrc_canon != dwarfsrc) { if (verbose > 3) obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl; + } - ps_upsert_files - .reset() - .bind(1, dwarfsrc_canon) - .step_ok_done(); + ps_upsert_files + .reset() + .bind(1, dwarfsrc_canon) + .step_ok_done(); - ps_upsert_sref - .reset() - .bind(1, buildid) - .bind(2, dwarfsrc_canon) - .step_ok_done(); - } + ps_upsert_sref + .reset() + .bind(1, buildid) + .bind(2, dwarfsrc_canon) + .step_ok_done(); fts_sref ++; } @@ -2558,7 +2608,9 @@ scan_archive_file (const string& rps, const stat_t& st, st.st_mtime, my_fts_executable, my_fts_debuginfo, my_fts_sref, my_fts_sdef, my_fts_sref_complete_p); - inc_metric ("scanned_total","source",archive_extension + " archive"); + add_metric ("scanned_bytes_total","source",archive_extension + " archive", + st.st_size); + inc_metric ("scanned_files_total","source",archive_extension + " archive"); add_metric("found_debuginfo_total","source",archive_extension + " archive", my_fts_debuginfo); add_metric("found_executable_total","source",archive_extension + " archive", @@ -2667,7 +2719,8 @@ thread_main_scanner (void* arg) add_metric("thread_busy", "role", "scan", -1); bool gotone = scanq.wait_front(p); add_metric("thread_busy", "role", "scan", 1); - if (! gotone) continue; // or break + + if (! gotone) continue; // go back to waiting try { @@ -2706,7 +2759,9 @@ thread_main_scanner (void* arg) e.report(cerr); } - inc_metric("thread_work_total", "role","scan"); + // finished a scanning step -- not a "loop", because we just + // consume the traversal loop's work, whenever + inc_metric("thread_work_total","role","scan"); } add_metric("thread_busy", "role", "scan", -1); @@ -2750,35 +2805,50 @@ scan_source_paths() { if (interrupted) break; + if (sigusr2 != forced_groom_count) // stop early if groom triggered + { + scanq.clear(); // clear previously issued work for scanner threads + break; + } + fts_scanned ++; if (verbose > 2) obatched(clog) << "fts traversing " << f->fts_path << endl; - /* Found a file. Convert it to an absolute path, so - the buildid database does not have relative path - names that are unresolvable from a subsequent run - in a different cwd. */ - char *rp = realpath(f->fts_path, NULL); - if (rp == NULL) - continue; // ignore dangling symlink or such - string rps = string(rp); - free (rp); - - bool ri = !regexec (&file_include_regex, rps.c_str(), 0, 0, 0); - bool rx = !regexec (&file_exclude_regex, rps.c_str(), 0, 0, 0); - if (!ri || rx) - { - if (verbose > 3) - obatched(clog) << "fts skipped by regex " << (!ri ? "I" : "") << (rx ? "X" : "") << endl; - fts_regex ++; - continue; - } - switch (f->fts_info) { case FTS_F: - scanq.push_back (make_pair(rps, *f->fts_statp)); + { + /* Found a file. Convert it to an absolute path, so + the buildid database does not have relative path + names that are unresolvable from a subsequent run + in a different cwd. */ + char *rp = realpath(f->fts_path, NULL); + if (rp == NULL) + continue; // ignore dangling symlink or such + string rps = string(rp); + free (rp); + + bool ri = !regexec (&file_include_regex, rps.c_str(), 0, 0, 0); + bool rx = !regexec (&file_exclude_regex, rps.c_str(), 0, 0, 0); + if (!ri || rx) + { + if (verbose > 3) + obatched(clog) << "fts skipped by regex " + << (!ri ? "I" : "") << (rx ? "X" : "") << endl; + fts_regex ++; + if (!ri) + inc_metric("traversed_total","type","file-skipped-I"); + if (rx) + inc_metric("traversed_total","type","file-skipped-X"); + } + else + { + scanq.push_back (make_pair(rps, *f->fts_statp)); + inc_metric("traversed_total","type","file"); + } + } break; case FTS_ERR: @@ -2788,11 +2858,20 @@ scan_source_paths() auto x = libc_exception(f->fts_errno, string("fts traversal ") + string(f->fts_path)); x.report(cerr); } + inc_metric("traversed_total","type","error"); + break; + + case FTS_SL: // ignore, but count because debuginfod -L would traverse these + inc_metric("traversed_total","type","symlink"); break; - default: - ; - /* ignore */ + case FTS_D: // ignore + inc_metric("traversed_total","type","directory"); + break; + + default: // ignore + inc_metric("traversed_total","type","other"); + break; } } gettimeofday (&tv_end, NULL); @@ -2808,7 +2887,6 @@ thread_main_fts_source_paths (void* arg) { (void) arg; // ignore; we operate on global data - sig_atomic_t forced_rescan_count = 0; set_metric("thread_tid", "role","traverse", tid()); add_metric("thread_count", "role", "traverse", 1); @@ -2838,6 +2916,7 @@ thread_main_fts_source_paths (void* arg) set_metric("thread_busy", "role","traverse", 1); scan_source_paths(); last_rescan = time(NULL); // NB: now was before scanning + // finished a traversal loop inc_metric("thread_work_total", "role","traverse"); set_metric("thread_busy", "role","traverse", 0); } @@ -2889,6 +2968,8 @@ void groom() struct timeval tv_start, tv_end; gettimeofday (&tv_start, NULL); + database_stats_report(); + // scan for files that have disappeared sqlite_ps files (db, "check old files", "select s.mtime, s.file, f.name from " BUILDIDS "_file_mtime_scanned s, " BUILDIDS "_files f " @@ -2916,7 +2997,13 @@ void groom() files_del_f_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done(); files_del_r_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done(); files_del_scan.reset().bind(1,fileid).bind(2,mtime).step_ok_done(); + inc_metric("groomed_total", "decision", "stale"); } + else + inc_metric("groomed_total", "decision", "fresh"); + + if (sigusr1 != forced_rescan_count) // stop early if scan triggered + break; } files.reset(); @@ -2953,7 +3040,6 @@ void groom() static void* thread_main_groom (void* /*arg*/) { - sig_atomic_t forced_groom_count = 0; set_metric("thread_tid", "role", "groom", tid()); add_metric("thread_count", "role", "groom", 1); @@ -2982,6 +3068,7 @@ thread_main_groom (void* /*arg*/) set_metric("thread_busy", "role", "groom", 1); groom (); last_groom = time(NULL); // NB: now was before grooming + // finished a grooming loop inc_metric("thread_work_total", "role", "groom"); set_metric("thread_busy", "role", "groom", 0); } |
