diff options
author | Frank Ch. Eigler <[email protected]> | 2022-10-31 17:40:01 -0400 |
---|---|---|
committer | Frank Ch. Eigler <[email protected]> | 2024-06-03 11:22:49 -0400 |
commit | d47d93b1049ecfad2f9bb9db30dc630c3d6131ca (patch) | |
tree | f5b2beb0c35c98d0d6d3084ffafe1e6f0371861e /debuginfod | |
parent | f4d79637f994457b915af5d8a52b922453f6e6fe (diff) |
PR29472: debuginfod: add metadata query webapi, C api, client
This patch extends the debuginfod API with a "metadata query"
operation. It allows clients to request an enumeration of file names
known to debuginfod servers, returning a JSON response including the
matching buildids. This lets clients later download debuginfo for a
range of versions of the same named binaries, in case they need to to
prospective work (like systemtap-based live-patching). It also lets
server operators implement prefetch triggering operations for popular
but slow debuginfo slivers like kernel vdso.debug files on fedora.
Implementation requires a modern enough json-c library, namely 0.11,
which dates from 2014. Without that, debuginfod client/server bits
will refuse to build.
% debuginfod-find metadata file /bin/ls
% debuginfod-find metadata glob "/usr/local/bin/c*"
Refactored several functions in debuginfod-client.c, because the
metadata search logic is different for multiple servers (merge all
responses instead of first responder wins).
Documentation and testing are included.
Signed-off-by: Ryan Goldberg <[email protected]>
Signed-off-by: Frank Ch. Eigler <[email protected]>
Diffstat (limited to 'debuginfod')
-rw-r--r-- | debuginfod/Makefile.am | 8 | ||||
-rw-r--r-- | debuginfod/debuginfod-client.c | 1286 | ||||
-rw-r--r-- | debuginfod/debuginfod-find.c | 126 | ||||
-rw-r--r-- | debuginfod/debuginfod.cxx | 370 | ||||
-rw-r--r-- | debuginfod/debuginfod.h.in | 22 | ||||
-rw-r--r-- | debuginfod/libdebuginfod.map | 3 |
6 files changed, 1312 insertions, 503 deletions
diff --git a/debuginfod/Makefile.am b/debuginfod/Makefile.am index 5e4f9669..b74e3673 100644 --- a/debuginfod/Makefile.am +++ b/debuginfod/Makefile.am @@ -33,7 +33,7 @@ include $(top_srcdir)/config/eu.am AM_CPPFLAGS += -I$(srcdir) -I$(srcdir)/../libelf -I$(srcdir)/../libebl \ -I$(srcdir)/../libdw -I$(srcdir)/../libdwelf \ $(libmicrohttpd_CFLAGS) $(libcurl_CFLAGS) $(sqlite3_CFLAGS) \ - $(libarchive_CFLAGS) + $(libarchive_CFLAGS) $(jsonc_CFLAGS) # Disable eu- prefixing for artifacts (binaries & man pages) in this # directory, since they do not conflict with binutils tools. @@ -70,10 +70,10 @@ bin_PROGRAMS += debuginfod-find endif debuginfod_SOURCES = debuginfod.cxx -debuginfod_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) $(argp_LDADD) $(fts_LIBS) $(libmicrohttpd_LIBS) $(sqlite3_LIBS) $(libarchive_LIBS) $(rpm_LIBS) -lpthread -ldl +debuginfod_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) $(argp_LDADD) $(fts_LIBS) $(libmicrohttpd_LIBS) $(sqlite3_LIBS) $(libarchive_LIBS) $(rpm_LIBS) $(jsonc_LIBS) $(libcurl_LIBS) -lpthread -ldl debuginfod_find_SOURCES = debuginfod-find.c -debuginfod_find_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) $(argp_LDADD) $(fts_LIBS) +debuginfod_find_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) $(argp_LDADD) $(fts_LIBS) $(jsonc_LIBS) if LIBDEBUGINFOD noinst_LIBRARIES = libdebuginfod.a @@ -97,7 +97,7 @@ libdebuginfod_so_LIBS = libdebuginfod_pic.a if DUMMY_LIBDEBUGINFOD libdebuginfod_so_LDLIBS = else -libdebuginfod_so_LDLIBS = -lpthread $(libcurl_LIBS) $(fts_LIBS) $(libelf) $(crypto_LIBS) +libdebuginfod_so_LDLIBS = -lpthread $(libcurl_LIBS) $(fts_LIBS) $(libelf) $(crypto_LIBS) $(jsonc_LIBS) endif $(LIBDEBUGINFOD_SONAME): $(srcdir)/libdebuginfod.map $(libdebuginfod_so_LIBS) $(AM_V_CCLD)$(LINK) $(dso_LDFLAGS) -o $@ \ diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c index f01d1f0e..3d6f8d8c 100644 --- a/debuginfod/debuginfod-client.c +++ b/debuginfod/debuginfod-client.c @@ -71,6 +71,8 @@ int debuginfod_find_source (debuginfod_client *c, const unsigned char *b, int debuginfod_find_section (debuginfod_client *c, const unsigned char *b, int s, const char *scn, char **p) { return -ENOSYS; } +int debuginfod_find_metadata (debuginfod_client *c, + const char *k, const char *v, char **p) { return -ENOSYS; } void debuginfod_set_progressfn(debuginfod_client *c, debuginfod_progressfn_t fn) { } void debuginfod_set_verbose_fd(debuginfod_client *c, int fd) { } @@ -104,6 +106,7 @@ void debuginfod_end (debuginfod_client *c) { } #include <sys/utsname.h> #include <curl/curl.h> #include <fnmatch.h> +#include <json-c/json.h> /* If fts.h is included before config.h, its indirect inclusions may not give us the right LFS aliases of these functions, so map them manually. */ @@ -211,6 +214,11 @@ static const char *cache_miss_filename = "cache_miss_s"; static const char *cache_max_unused_age_filename = "max_unused_age_s"; static const long cache_default_max_unused_age_s = 604800; /* 1 week */ +/* The metadata_retention_default_s file within the debuginfod cache + specifies how long metadata query results should be cached. */ +static const long metadata_retention_default_s = 3600; /* 1 hour */ +static const char *metadata_retention_filename = "metadata_retention_s"; + /* Location of the cache of files downloaded from debuginfods. The default parent directory is $HOME, or '/' if $HOME doesn't exist. */ static const char *cache_default_name = ".debuginfod_client_cache"; @@ -249,9 +257,14 @@ struct handle_data to the cache. Used to ensure that a file is not downloaded from multiple servers unnecessarily. */ CURL **target_handle; + /* Response http headers for this client handle, sent from the server */ char *response_data; size_t response_data_size; + + /* Response metadata values for this client handle, sent from the server */ + char *metadata; + size_t metadata_size; }; @@ -556,7 +569,8 @@ debuginfod_clean_cache(debuginfod_client *c, return -errno; regex_t re; - const char * pattern = ".*/[a-f0-9]+(/debuginfo|/executable|/source.*|)$"; /* include dirs */ + const char * pattern = ".*/(metadata.*|[a-f0-9]+(/debuginfo|/executable|/source.*|))$"; /* include dirs */ + /* NB: also matches .../section/ subdirs, so extracted section files also get cleaned. */ if (regcomp (&re, pattern, REG_EXTENDED | REG_NOSUB) != 0) return -ENOMEM; @@ -794,18 +808,9 @@ header_callback (char * buffer, size_t size, size_t numitems, void * userdata) } /* Temporary buffer for realloc */ char *temp = NULL; - if (data->response_data == NULL) - { - temp = malloc(numitems); - if (temp == NULL) - return 0; - } - else - { - temp = realloc(data->response_data, data->response_data_size + numitems); - if (temp == NULL) - return 0; - } + temp = realloc(data->response_data, data->response_data_size + numitems); + if (temp == NULL) + return 0; memcpy(temp + data->response_data_size, buffer, numitems-1); data->response_data = temp; @@ -815,6 +820,384 @@ header_callback (char * buffer, size_t size, size_t numitems, void * userdata) return numitems; } + +static size_t +metadata_callback (char * buffer, size_t size, size_t numitems, void * userdata) +{ + if (size != 1) + return 0; + /* Temporary buffer for realloc */ + char *temp = NULL; + struct handle_data *data = (struct handle_data *) userdata; + temp = realloc(data->metadata, data->metadata_size + numitems + 1); + if (temp == NULL) + return 0; + + memcpy(temp + data->metadata_size, buffer, numitems); + data->metadata = temp; + data->metadata_size += numitems; + data->metadata[data->metadata_size] = '\0'; + return numitems; +} + + +/* This function takes a copy of DEBUGINFOD_URLS, server_urls, and + * separates it into an array of urls to query, each with a + * corresponding IMA policy. The url_subdir is either 'buildid' or + * 'metadata', corresponding to the query type. Returns 0 on success + * and -Posix error on failure. + */ +int +init_server_urls(char* url_subdir, const char* type, + char *server_urls, char ***server_url_list, ima_policy_t **url_ima_policies, + int *num_urls, int vfd) +{ + /* Initialize the memory to zero */ + char *strtok_saveptr; + ima_policy_t verification_mode = ignore; // The default mode + char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr); + /* Count number of URLs. */ + int n = 0; + + while (server_url != NULL) + { + // When we encountered a (well-formed) token off the form + // ima:foo, we update the policy under which results from that + // server will be ima verified + if (startswith(server_url, "ima:")) + { +#ifdef ENABLE_IMA_VERIFICATION + ima_policy_t m = ima_policy_str2enum(server_url + strlen("ima:")); + if(m != undefined) + verification_mode = m; + else if (vfd >= 0) + dprintf(vfd, "IMA mode not recognized, skipping %s\n", server_url); +#else + if (vfd >= 0) + dprintf(vfd, "IMA signature verification is not enabled, treating %s as ima:ignore\n", server_url); +#endif + goto continue_next_url; + } + + if (verification_mode==enforcing && + 0==strcmp(url_subdir, "buildid") && + 0==strcmp(type,"section")) // section queries are unsecurable + { + if (vfd >= 0) + dprintf(vfd, "skipping server %s section query in IMA enforcing mode\n", server_url); + goto continue_next_url; + } + + // Construct actual URL for libcurl + int r; + char *tmp_url; + if (strlen(server_url) > 1 && server_url[strlen(server_url)-1] == '/') + r = asprintf(&tmp_url, "%s%s", server_url, url_subdir); + else + r = asprintf(&tmp_url, "%s/%s", server_url, url_subdir); + + if (r == -1) + return -ENOMEM; + + /* PR 27983: If the url is duplicate, skip it */ + int url_index; + for (url_index = 0; url_index < n; ++url_index) + { + if(strcmp(tmp_url, (*server_url_list)[url_index]) == 0) + { + url_index = -1; + break; + } + } + if (url_index == -1) + { + if (vfd >= 0) + dprintf(vfd, "duplicate url: %s, skipping\n", tmp_url); + free(tmp_url); + } + else + { + /* Have unique URL, save it, along with its IMA verification tag. */ + n ++; + if (NULL == (*server_url_list = reallocarray(*server_url_list, n, sizeof(char*))) + || NULL == (*url_ima_policies = reallocarray(*url_ima_policies, n, sizeof(ima_policy_t)))) + { + free (tmp_url); + return -ENOMEM; + } + (*server_url_list)[n-1] = tmp_url; + if(NULL != url_ima_policies) (*url_ima_policies)[n-1] = verification_mode; + } + + continue_next_url: + server_url = strtok_r(NULL, url_delim, &strtok_saveptr); + } + *num_urls = n; + return 0; +} + +/* Some boilerplate for checking curl_easy_setopt. */ +#define curl_easy_setopt_ck(H,O,P) do { \ + CURLcode curl_res = curl_easy_setopt (H,O,P); \ + if (curl_res != CURLE_OK) \ + { \ + if (vfd >= 0) \ + dprintf (vfd, \ + "Bad curl_easy_setopt: %s\n", \ + curl_easy_strerror(curl_res)); \ + return -EINVAL; \ + } \ + } while (0) + + +/* + * This function initializes a CURL handle. It takes optional callbacks for the write + * function and the header function, which if defined will use userdata of type struct handle_data*. + * Specifically the data[i] within an array of struct handle_data's. + * Returns 0 on success and -Posix error on failure. + */ +int +init_handle(debuginfod_client *client, + size_t (*w_callback)(char *buffer, size_t size, size_t nitems, void *userdata), + size_t (*h_callback)(char *buffer, size_t size, size_t nitems, void *userdata), + struct handle_data *data, int i, long timeout, + int vfd) +{ + data->handle = curl_easy_init(); + if (data->handle == NULL) + return -ENETUNREACH; + + if (vfd >= 0) + dprintf (vfd, "url %d %s\n", i, data->url); + + /* Only allow http:// + https:// + file:// so we aren't being + redirected to some unsupported protocol. + libcurl will fail if we request a single protocol that is not + available. https missing is the most likely issue */ +#if CURL_AT_LEAST_VERSION(7, 85, 0) + curl_easy_setopt_ck(data->handle, CURLOPT_PROTOCOLS_STR, + curl_has_https ? "https,http,file" : "http,file"); +#else + curl_easy_setopt_ck(data->handle, CURLOPT_PROTOCOLS, + ((curl_has_https ? CURLPROTO_HTTPS : 0) | CURLPROTO_HTTP | CURLPROTO_FILE)); +#endif + curl_easy_setopt_ck(data->handle, CURLOPT_URL, data->url); + if (vfd >= 0) + curl_easy_setopt_ck(data->handle, CURLOPT_ERRORBUFFER, + data->errbuf); + if (w_callback) + { + curl_easy_setopt_ck(data->handle, + CURLOPT_WRITEFUNCTION, w_callback); + curl_easy_setopt_ck(data->handle, CURLOPT_WRITEDATA, data); + } + if (timeout > 0) + { + /* Make sure there is at least some progress, + try to get at least 100K per timeout seconds. */ + curl_easy_setopt_ck (data->handle, CURLOPT_LOW_SPEED_TIME, + timeout); + curl_easy_setopt_ck (data->handle, CURLOPT_LOW_SPEED_LIMIT, + 100 * 1024L); + } + curl_easy_setopt_ck(data->handle, CURLOPT_FILETIME, (long) 1); + curl_easy_setopt_ck(data->handle, CURLOPT_FOLLOWLOCATION, (long) 1); + curl_easy_setopt_ck(data->handle, CURLOPT_FAILONERROR, (long) 1); + curl_easy_setopt_ck(data->handle, CURLOPT_NOSIGNAL, (long) 1); + if (h_callback) + { + curl_easy_setopt_ck(data->handle, + CURLOPT_HEADERFUNCTION, h_callback); + curl_easy_setopt_ck(data->handle, CURLOPT_HEADERDATA, data); + } + #if LIBCURL_VERSION_NUM >= 0x072a00 /* 7.42.0 */ + curl_easy_setopt_ck(data->handle, CURLOPT_PATH_AS_IS, (long) 1); + #else + /* On old curl; no big deal, canonicalization here is almost the + same, except perhaps for ? # type decorations at the tail. */ + #endif + curl_easy_setopt_ck(data->handle, CURLOPT_AUTOREFERER, (long) 1); + curl_easy_setopt_ck(data->handle, CURLOPT_ACCEPT_ENCODING, ""); + curl_easy_setopt_ck(data->handle, CURLOPT_HTTPHEADER, client->headers); + + return 0; +} + + +/* + * This function busy-waits on one or more curl queries to complete. This can + * be controlled via only_one, which, if true, will find the first winner and exit + * once found. If positive maxtime and maxsize dictate the maximum allowed wait times + * and download sizes respectively. Returns 0 on success and -Posix error on failure. + */ +int +perform_queries(CURLM *curlm, CURL **target_handle, struct handle_data *data, debuginfod_client *c, + int num_urls, long maxtime, long maxsize, bool only_one, int vfd, int *committed_to) +{ + int still_running = -1; + long loops = 0; + *committed_to = -1; + bool verbose_reported = false; + struct timespec start_time, cur_time; + if (c->winning_headers != NULL) + { + free (c->winning_headers); + c->winning_headers = NULL; + } + if (maxtime > 0 && clock_gettime(CLOCK_MONOTONIC_RAW, &start_time) == -1) + return -errno; + long delta = 0; + do + { + /* Check to see how long querying is taking. */ + if (maxtime > 0) + { + if (clock_gettime(CLOCK_MONOTONIC_RAW, &cur_time) == -1) + return -errno; + delta = cur_time.tv_sec - start_time.tv_sec; + if ( delta > maxtime) + { + dprintf(vfd, "Timeout with max time=%lds and transfer time=%lds\n", maxtime, delta ); + return -ETIME; + } + } + /* Wait 1 second, the minimum DEBUGINFOD_TIMEOUT. */ + curl_multi_wait(curlm, NULL, 0, 1000, NULL); + CURLMcode curlm_res = curl_multi_perform(curlm, &still_running); + + if (only_one) + { + /* If the target file has been found, abort the other queries. */ + if (target_handle && *target_handle != NULL) + { + for (int i = 0; i < num_urls; i++) + if (data[i].handle != *target_handle) + curl_multi_remove_handle(curlm, data[i].handle); + else + { + *committed_to = i; + if (c->winning_headers == NULL) + { + c->winning_headers = data[*committed_to].response_data; + if (vfd >= 0 && c->winning_headers != NULL) + dprintf(vfd, "\n%s", c->winning_headers); + data[*committed_to].response_data = NULL; + data[*committed_to].response_data_size = 0; + } + } + } + + if (vfd >= 0 && !verbose_reported && *committed_to >= 0) + { + bool pnl = (c->default_progressfn_printed_p && vfd == STDERR_FILENO); + dprintf (vfd, "%scommitted to url %d\n", pnl ? "\n" : "", + *committed_to); + if (pnl) + c->default_progressfn_printed_p = 0; + verbose_reported = true; + } + } + + if (curlm_res != CURLM_OK) + { + switch (curlm_res) + { + case CURLM_CALL_MULTI_PERFORM: continue; + case CURLM_OUT_OF_MEMORY: return -ENOMEM; + default: return -ENETUNREACH; + } + } + + long dl_size = -1; + if (target_handle && *target_handle && (c->progressfn || maxsize > 0)) + { + /* Get size of file being downloaded. NB: If going through + deflate-compressing proxies, this number is likely to be + unavailable, so -1 may show. */ + CURLcode curl_res; +#if CURL_AT_LEAST_VERSION(7, 55, 0) + curl_off_t cl; + curl_res = curl_easy_getinfo(*target_handle, + CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, + &cl); + if (curl_res == CURLE_OK && cl >= 0) + dl_size = (cl > LONG_MAX ? LONG_MAX : (long)cl); +#else + double cl; + curl_res = curl_easy_getinfo(*target_handle, + CURLINFO_CONTENT_LENGTH_DOWNLOAD, + &cl); + if (curl_res == CURLE_OK && cl >= 0) + dl_size = (cl >= (double)(LONG_MAX+1UL) ? LONG_MAX : (long)cl); +#endif + /* If Content-Length is -1, try to get the size from + X-Debuginfod-Size */ + if (dl_size == -1 && c->winning_headers != NULL) + { + long xdl; + char *hdr = strcasestr(c->winning_headers, "x-debuginfod-size"); + size_t off = strlen("x-debuginfod-size:"); + + if (hdr != NULL && sscanf(hdr + off, "%ld", &xdl) == 1) + dl_size = xdl; + } + } + + if (c->progressfn) /* inform/check progress callback */ + { + loops ++; + long pa = loops; /* default param for progress callback */ + if (target_handle && *target_handle) /* we've committed to a server; report its download progress */ + { + /* PR30809: Check actual size of cached file. This same + fd is shared by all the multi-curl handles (but only + one will end up writing to it). Another way could be + to tabulate totals in debuginfod_write_callback(). */ + struct stat cached; + int statrc = fstat(data[*committed_to].fd, &cached); + if (statrc == 0) + pa = (long) cached.st_size; + else + { + /* Otherwise, query libcurl for its tabulated total. + However, that counts http body length, not + decoded/decompressed content length, so does not + measure quite the same thing as dl. */ + CURLcode curl_res; +#if CURL_AT_LEAST_VERSION(7, 55, 0) + curl_off_t dl; + curl_res = curl_easy_getinfo(target_handle, + CURLINFO_SIZE_DOWNLOAD_T, + &dl); + if (curl_res == 0 && dl >= 0) + pa = (dl > LONG_MAX ? LONG_MAX : (long)dl); +#else + double dl; + curl_res = curl_easy_getinfo(target_handle, + CURLINFO_SIZE_DOWNLOAD, + &dl); + if (curl_res == 0) + pa = (dl >= (double)(LONG_MAX+1UL) ? LONG_MAX : (long)dl); +#endif + } + + if ((*c->progressfn) (c, pa, dl_size == -1 ? 0 : dl_size)) + break; + } + } + /* Check to see if we are downloading something which exceeds maxsize, if set.*/ + if (target_handle && *target_handle && dl_size > maxsize && maxsize > 0) + { + if (vfd >=0) + dprintf(vfd, "Content-Length too large.\n"); + return -EFBIG; + } + } while (still_running); + + return 0; +} + + /* Copy SRC to DEST, s,/,#,g */ static void @@ -1258,6 +1641,84 @@ debuginfod_validate_imasig (debuginfod_client *c, int fd) + +/* Helper function to create client cache directory. + $XDG_CACHE_HOME takes priority over $HOME/.cache. + $DEBUGINFOD_CACHE_PATH takes priority over $HOME/.cache and $XDG_CACHE_HOME. + + Return resulting path name or NULL on error. Caller must free resulting string. + */ +static char * +make_cache_path(void) +{ + char* cache_path = NULL; + int rc = 0; + /* Determine location of the cache. The path specified by the debuginfod + cache environment variable takes priority. */ + char *cache_var = getenv(DEBUGINFOD_CACHE_PATH_ENV_VAR); + if (cache_var != NULL && strlen (cache_var) > 0) + xalloc_str (cache_path, "%s", cache_var); + else + { + /* If a cache already exists in $HOME ('/' if $HOME isn't set), then use + that. Otherwise use the XDG cache directory naming format. */ + xalloc_str (cache_path, "%s/%s", getenv ("HOME") ?: "/", cache_default_name); + + struct stat st; + if (stat (cache_path, &st) < 0) + { + char cachedir[PATH_MAX]; + char *xdg = getenv ("XDG_CACHE_HOME"); + + if (xdg != NULL && strlen (xdg) > 0) + snprintf (cachedir, PATH_MAX, "%s", xdg); + else + snprintf (cachedir, PATH_MAX, "%s/.cache", getenv ("HOME") ?: "/"); + + /* Create XDG cache directory if it doesn't exist. */ + if (stat (cachedir, &st) == 0) + { + if (! S_ISDIR (st.st_mode)) + { + rc = -EEXIST; + goto out1; + } + } + else + { + rc = mkdir (cachedir, 0700); + + /* Also check for EEXIST and S_ISDIR in case another client just + happened to create the cache. */ + if (rc < 0 + && (errno != EEXIST + || stat (cachedir, &st) != 0 + || ! S_ISDIR (st.st_mode))) + { + rc = -errno; + goto out1; + } + } + + free (cache_path); + xalloc_str (cache_path, "%s/%s", cachedir, cache_xdg_name); + } + } + + goto out; + + out1: + (void) rc; + free (cache_path); + cache_path = NULL; + + out: + if (cache_path != NULL) + (void) mkdir (cache_path, 0700); // failures with this mkdir would be caught later too + return cache_path; +} + + /* Query each of the server URLs found in $DEBUGINFOD_URLS for the file with the specified build-id and type (debuginfo, executable, source or section). If type is source, then type_arg should be a filename. If @@ -1266,7 +1727,7 @@ debuginfod_validate_imasig (debuginfod_client *c, int fd) for the target if successful, otherwise return an error code. */ static int -debuginfod_query_server (debuginfod_client *c, +debuginfod_query_server_by_buildid (debuginfod_client *c, const unsigned char *build_id, int build_id_len, const char *type, @@ -1287,7 +1748,7 @@ debuginfod_query_server (debuginfod_client *c, char suffix[PATH_MAX + 1]; /* +1 for zero terminator. */ char build_id_bytes[MAX_BUILD_ID_BYTES * 2 + 1]; int vfd = c->verbose_fd; - int rc; + int rc, r; c->progressfn_cancel = false; @@ -1412,70 +1873,22 @@ debuginfod_query_server (debuginfod_client *c, dprintf (vfd, "suffix %s\n", suffix); /* set paths needed to perform the query - - example format + example format: cache_path: $HOME/.cache target_cache_dir: $HOME/.cache/0123abcd target_cache_path: $HOME/.cache/0123abcd/debuginfo target_cache_path: $HOME/.cache/0123abcd/source#PATH#TO#SOURCE ? - - $XDG_CACHE_HOME takes priority over $HOME/.cache. - $DEBUGINFOD_CACHE_PATH takes priority over $HOME/.cache and $XDG_CACHE_HOME. */ - /* Determine location of the cache. The path specified by the debuginfod - cache environment variable takes priority. */ - char *cache_var = getenv(DEBUGINFOD_CACHE_PATH_ENV_VAR); - if (cache_var != NULL && strlen (cache_var) > 0) - xalloc_str (cache_path, "%s", cache_var); - else + cache_path = make_cache_path(); + if (!cache_path) { - /* If a cache already exists in $HOME ('/' if $HOME isn't set), then use - that. Otherwise use the XDG cache directory naming format. */ - xalloc_str (cache_path, "%s/%s", getenv ("HOME") ?: "/", cache_default_name); - - struct stat st; - if (stat (cache_path, &st) < 0) - { - char cachedir[PATH_MAX]; - char *xdg = getenv ("XDG_CACHE_HOME"); - - if (xdg != NULL && strlen (xdg) > 0) - snprintf (cachedir, PATH_MAX, "%s", xdg); - else - snprintf (cachedir, PATH_MAX, "%s/.cache", getenv ("HOME") ?: "/"); - - /* Create XDG cache directory if it doesn't exist. */ - if (stat (cachedir, &st) == 0) - { - if (! S_ISDIR (st.st_mode)) - { - rc = -EEXIST; - goto out; - } - } - else - { - rc = mkdir (cachedir, 0700); - - /* Also check for EEXIST and S_ISDIR in case another client just - happened to create the cache. */ - if (rc < 0 - && (errno != EEXIST - || stat (cachedir, &st) != 0 - || ! S_ISDIR (st.st_mode))) - { - rc = -errno; - goto out; - } - } - - free (cache_path); - xalloc_str (cache_path, "%s/%s", cachedir, cache_xdg_name); - } + rc = -ENOMEM; + goto out; } - xalloc_str (target_cache_dir, "%s/%s", cache_path, build_id_bytes); + (void) mkdir (target_cache_dir, 0700); // failures with this mkdir would be caught later too + if (section != NULL) xalloc_str (target_cache_path, "%s/%s-%s", target_cache_dir, type, suffix); else @@ -1594,102 +2007,32 @@ debuginfod_query_server (debuginfod_client *c, /* thereafter, goto out0 on error*/ /* Because of a race with cache cleanup / rmdir, try to mkdir/mkstemp up to twice. */ - for(int i=0; i<2; i++) { - /* (re)create target directory in cache */ - (void) mkdir(target_cache_dir, 0700); /* files will be 0400 later */ - - /* NB: write to a temporary file first, to avoid race condition of - multiple clients checking the cache, while a partially-written or empty - file is in there, being written from libcurl. */ - fd = mkstemp (target_cache_tmppath); - if (fd >= 0) break; - } + for(int i=0; i<2; i++) + { + /* (re)create target directory in cache */ + (void) mkdir(target_cache_dir, 0700); /* files will be 0400 later */ + + /* NB: write to a temporary file first, to avoid race condition of + multiple clients checking the cache, while a partially-written or empty + file is in there, being written from libcurl. */ + fd = mkstemp (target_cache_tmppath); + if (fd >= 0) break; + } if (fd < 0) /* Still failed after two iterations. */ { rc = -errno; goto out0; } - /* Initialize the memory to zero */ - char *strtok_saveptr; char **server_url_list = NULL; ima_policy_t* url_ima_policies = NULL; - char* server_url; - /* Count number of URLs. */ - int num_urls = 0; - - ima_policy_t verification_mode = ignore; // The default mode - for(server_url = strtok_r(server_urls, url_delim, &strtok_saveptr); - server_url != NULL; server_url = strtok_r(NULL, url_delim, &strtok_saveptr)) + char *server_url; + int num_urls; + r = init_server_urls("buildid", type, server_urls, &server_url_list, &url_ima_policies, &num_urls, vfd); + if (0 != r) { - // When we encounted a (well-formed) token off the form ima:foo, we update the policy - // under which results from that server will be ima verified - if(startswith(server_url, "ima:")) - { -#ifdef ENABLE_IMA_VERIFICATION - ima_policy_t m = ima_policy_str2enum(server_url + strlen("ima:")); - if(m != undefined) - verification_mode = m; - else if (vfd >= 0) - dprintf(vfd, "IMA mode not recognized, skipping %s\n", server_url); -#else - if (vfd >= 0) - dprintf(vfd, "IMA signature verification is not enabled, skipping %s\n", server_url); -#endif - continue; // Not a url, just a mode change so keep going - } - - if (verification_mode==enforcing && 0==strcmp(type,"section")) - { - if (vfd >= 0) - dprintf(vfd, "skipping server %s section query in IMA enforcing mode\n", server_url); - continue; - } - - /* PR 27983: If the url is already set to be used use, skip it */ - char *slashbuildid; - if (strlen(server_url) > 1 && server_url[strlen(server_url)-1] == '/') - slashbuildid = "buildid"; - else - slashbuildid = "/buildid"; - - char *tmp_url; - if (asprintf(&tmp_url, "%s%s", server_url, slashbuildid) == -1) - { - rc = -ENOMEM; - goto out1; - } - int url_index; - for (url_index = 0; url_index < num_urls; ++url_index) - { - if(strcmp(tmp_url, server_url_list[url_index]) == 0) - { - url_index = -1; - break; - } - } - if (url_index == -1) - { - if (vfd >= 0) - dprintf(vfd, "duplicate url: %s, skipping\n", tmp_url); - free(tmp_url); - } - else - { - num_urls++; - if (NULL == (server_url_list = reallocarray(server_url_list, num_urls, sizeof(char*))) -#ifdef ENABLE_IMA_VERIFICATION - || NULL == (url_ima_policies = reallocarray(url_ima_policies, num_urls, sizeof(ima_policy_t))) -#endif - ) - { - free (tmp_url); - rc = -ENOMEM; - goto out1; - } - server_url_list[num_urls-1] = tmp_url; - if(NULL != url_ima_policies) url_ima_policies[num_urls-1] = verification_mode; - } + rc = r; + goto out1; } /* No URLs survived parsing / filtering? Abort abort abort. */ @@ -1705,7 +2048,6 @@ debuginfod_query_server (debuginfod_client *c, retry_limit = atoi (retry_limit_envvar); CURLM *curlm = c->server_mhandle; - assert (curlm != NULL); /* Tracks which handle should write to fd. Set to the first handle that is ready to write the target file to the cache. */ @@ -1773,13 +2115,6 @@ debuginfod_query_server (debuginfod_client *c, data[i].fd = fd; data[i].target_handle = &target_handle; - data[i].handle = curl_easy_init(); - if (data[i].handle == NULL) - { - if (filename) curl_free (escaped_string); - rc = -ENETUNREACH; - goto out2; - } data[i].client = c; if (filename) /* must start with / */ @@ -1793,242 +2128,30 @@ debuginfod_query_server (debuginfod_client *c, build_id_bytes, type, section); else snprintf(data[i].url, PATH_MAX, "%s/%s/%s", server_url, build_id_bytes, type); - if (vfd >= 0) - dprintf (vfd, "url %d %s\n", i, data[i].url); - /* Some boilerplate for checking curl_easy_setopt. */ -#define curl_easy_setopt_ck(H,O,P) do { \ - CURLcode curl_res = curl_easy_setopt (H,O,P); \ - if (curl_res != CURLE_OK) \ - { \ - if (vfd >= 0) \ - dprintf (vfd, \ - "Bad curl_easy_setopt: %s\n", \ - curl_easy_strerror(curl_res)); \ - rc = -EINVAL; \ - goto out2; \ - } \ - } while (0) - - /* Only allow http:// + https:// + file:// so we aren't being - redirected to some unsupported protocol. - libcurl will fail if we request a single protocol that is not - available. https missing is the most likely issue */ -#if CURL_AT_LEAST_VERSION(7, 85, 0) - curl_easy_setopt_ck(data[i].handle, CURLOPT_PROTOCOLS_STR, - curl_has_https ? "https,http,file" : "http,file"); -#else - curl_easy_setopt_ck(data[i].handle, CURLOPT_PROTOCOLS, - ((curl_has_https ? CURLPROTO_HTTPS : 0) | CURLPROTO_HTTP | CURLPROTO_FILE)); -#endif - curl_easy_setopt_ck(data[i].handle, CURLOPT_URL, data[i].url); - if (vfd >= 0) - curl_easy_setopt_ck(data[i].handle, CURLOPT_ERRORBUFFER, - data[i].errbuf); - curl_easy_setopt_ck(data[i].handle, - CURLOPT_WRITEFUNCTION, - debuginfod_write_callback); - curl_easy_setopt_ck(data[i].handle, CURLOPT_WRITEDATA, (void*)&data[i]); - if (timeout > 0) - { - /* Make sure there is at least some progress, - try to get at least 100K per timeout seconds. */ - curl_easy_setopt_ck (data[i].handle, CURLOPT_LOW_SPEED_TIME, - timeout); - curl_easy_setopt_ck (data[i].handle, CURLOPT_LOW_SPEED_LIMIT, - 100 * 1024L); - } - curl_easy_setopt_ck(data[i].handle, CURLOPT_FILETIME, (long) 1); - curl_easy_setopt_ck(data[i].handle, CURLOPT_FOLLOWLOCATION, (long) 1); - curl_easy_setopt_ck(data[i].handle, CURLOPT_FAILONERROR, (long) 1); - curl_easy_setopt_ck(data[i].handle, CURLOPT_NOSIGNAL, (long) 1); - curl_easy_setopt_ck(data[i].handle, CURLOPT_HEADERFUNCTION, - header_callback); - curl_easy_setopt_ck(data[i].handle, CURLOPT_HEADERDATA, - (void *) &(data[i])); -#if LIBCURL_VERSION_NUM >= 0x072a00 /* 7.42.0 */ - curl_easy_setopt_ck(data[i].handle, CURLOPT_PATH_AS_IS, (long) 1); -#else - /* On old curl; no big deal, canonicalization here is almost the - same, except perhaps for ? # type decorations at the tail. */ -#endif - curl_easy_setopt_ck(data[i].handle, CURLOPT_AUTOREFERER, (long) 1); - curl_easy_setopt_ck(data[i].handle, CURLOPT_ACCEPT_ENCODING, ""); - curl_easy_setopt_ck(data[i].handle, CURLOPT_HTTPHEADER, c->headers); + r = init_handle(c, debuginfod_write_callback, header_callback, &data[i], i, timeout, vfd); + if (0 != r) + { + rc = r; + if (filename) curl_free (escaped_string); + goto out2; + } curl_multi_add_handle(curlm, data[i].handle); } if (filename) curl_free(escaped_string); + /* Query servers in parallel. */ if (vfd >= 0) dprintf (vfd, "query %d urls in parallel\n", num_urls); - int still_running; - long loops = 0; - int committed_to = -1; - bool verbose_reported = false; - struct timespec start_time, cur_time; - - free (c->winning_headers); - c->winning_headers = NULL; - if ( maxtime > 0 && clock_gettime(CLOCK_MONOTONIC_RAW, &start_time) == -1) + int committed_to; + r = perform_queries(curlm, &target_handle, data, c, num_urls, maxtime, maxsize, true, vfd, &committed_to); + if (0 != r) { - rc = -errno; + rc = r; goto out2; } - long delta = 0; - do - { - /* Check to see how long querying is taking. */ - if (maxtime > 0) - { - if (clock_gettime(CLOCK_MONOTONIC_RAW, &cur_time) == -1) - { - rc = -errno; - goto out2; - } - delta = cur_time.tv_sec - start_time.tv_sec; - if ( delta > maxtime) - { - dprintf(vfd, "Timeout with max time=%lds and transfer time=%lds\n", maxtime, delta ); - rc = -ETIME; - goto out2; - } - } - /* Wait 1 second, the minimum DEBUGINFOD_TIMEOUT. */ - curl_multi_wait(curlm, NULL, 0, 1000, NULL); - CURLMcode curlm_res = curl_multi_perform(curlm, &still_running); - - /* If the target file has been found, abort the other queries. */ - if (target_handle != NULL) - { - for (int i = 0; i < num_urls; i++) - if (data[i].handle != target_handle) - curl_multi_remove_handle(curlm, data[i].handle); - else - { - committed_to = i; - if (c->winning_headers == NULL) - { - c->winning_headers = data[committed_to].response_data; - data[committed_to].response_data = NULL; - data[committed_to].response_data_size = 0; - } - - } - } - - if (vfd >= 0 && !verbose_reported && committed_to >= 0) - { - bool pnl = (c->default_progressfn_printed_p && vfd == STDERR_FILENO); - dprintf (vfd, "%scommitted to url %d\n", pnl ? "\n" : "", - committed_to); - if (pnl) - c->default_progressfn_printed_p = 0; - verbose_reported = true; - } - - if (curlm_res != CURLM_OK) - { - switch (curlm_res) - { - case CURLM_CALL_MULTI_PERFORM: continue; - case CURLM_OUT_OF_MEMORY: rc = -ENOMEM; break; - default: rc = -ENETUNREACH; break; - } - goto out2; - } - - long dl_size = -1; - if (target_handle && (c->progressfn || maxsize > 0)) - { - /* Get size of file being downloaded. NB: If going through - deflate-compressing proxies, this number is likely to be - unavailable, so -1 may show. */ - CURLcode curl_res; -#if CURL_AT_LEAST_VERSION(7, 55, 0) - curl_off_t cl; - curl_res = curl_easy_getinfo(target_handle, - CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, - &cl); - if (curl_res == CURLE_OK && cl >= 0) - dl_size = (cl > LONG_MAX ? LONG_MAX : (long)cl); -#else - double cl; - curl_res = curl_easy_getinfo(target_handle, - CURLINFO_CONTENT_LENGTH_DOWNLOAD, - &cl); - if (curl_res == CURLE_OK && cl >= 0) - dl_size = (cl >= (double)(LONG_MAX+1UL) ? LONG_MAX : (long)cl); -#endif - /* If Content-Length is -1, try to get the size from - X-Debuginfod-Size */ - if (dl_size == -1 && c->winning_headers != NULL) - { - long xdl; - char *hdr = strcasestr(c->winning_headers, "x-debuginfod-size"); - size_t off = strlen("x-debuginfod-size:"); - - if (hdr != NULL && sscanf(hdr + off, "%ld", &xdl) == 1) - dl_size = xdl; - } - } - - if (c->progressfn) /* inform/check progress callback */ - { - loops ++; - long pa = loops; /* default param for progress callback */ - if (target_handle) /* we've committed to a server; report its download progress */ - { - /* PR30809: Check actual size of cached file. This same - fd is shared by all the multi-curl handles (but only - one will end up writing to it). Another way could be - to tabulate totals in debuginfod_write_callback(). */ - struct stat cached; - int statrc = fstat(fd, &cached); - if (statrc == 0) - pa = (long) cached.st_size; - else - { - /* Otherwise, query libcurl for its tabulated total. - However, that counts http body length, not - decoded/decompressed content length, so does not - measure quite the same thing as dl. */ - CURLcode curl_res; -#if CURL_AT_LEAST_VERSION(7, 55, 0) - curl_off_t dl; - curl_res = curl_easy_getinfo(target_handle, - CURLINFO_SIZE_DOWNLOAD_T, - &dl); - if (curl_res == 0 && dl >= 0) - pa = (dl > LONG_MAX ? LONG_MAX : (long)dl); -#else - double dl; - curl_res = curl_easy_getinfo(target_handle, - CURLINFO_SIZE_DOWNLOAD, - &dl); - if (curl_res == 0) - pa = (dl >= (double)(LONG_MAX+1UL) ? LONG_MAX : (long)dl); -#endif - } - } - - if ((*c->progressfn) (c, pa, dl_size == -1 ? 0 : dl_size)) - { - c->progressfn_cancel = true; - break; - } - } - - /* Check to see if we are downloading something which exceeds maxsize, if set.*/ - if (target_handle && dl_size > maxsize && maxsize > 0) - { - if (vfd >=0) - dprintf(vfd, "Content-Length too large.\n"); - rc = -EFBIG; - goto out2; - } - } while (still_running); /* Check whether a query was successful. If so, assign its handle to verified_handle. */ @@ -2180,6 +2303,7 @@ debuginfod_query_server (debuginfod_client *c, curl_multi_remove_handle(curlm, data[i].handle); /* ok to repeat */ curl_easy_cleanup (data[i].handle); free(data[i].response_data); + data[i].response_data = NULL; } free(c->winning_headers); c->winning_headers = NULL; @@ -2427,7 +2551,7 @@ debuginfod_find_debuginfo (debuginfod_client *client, const unsigned char *build_id, int build_id_len, char **path) { - return debuginfod_query_server(client, build_id, build_id_len, + return debuginfod_query_server_by_buildid(client, build_id, build_id_len, "debuginfo", NULL, path); } @@ -2438,7 +2562,7 @@ debuginfod_find_executable(debuginfod_client *client, const unsigned char *build_id, int build_id_len, char **path) { - return debuginfod_query_server(client, build_id, build_id_len, + return debuginfod_query_server_by_buildid(client, build_id, build_id_len, "executable", NULL, path); } @@ -2447,7 +2571,7 @@ int debuginfod_find_source(debuginfod_client *client, const unsigned char *build_id, int build_id_len, const char *filename, char **path) { - return debuginfod_query_server(client, build_id, build_id_len, + return debuginfod_query_server_by_buildid(client, build_id, build_id_len, "source", filename, path); } @@ -2456,8 +2580,8 @@ debuginfod_find_section (debuginfod_client *client, const unsigned char *build_id, int build_id_len, const char *section, char **path) { - int rc = debuginfod_query_server(client, build_id, build_id_len, - "section", section, path); + int rc = debuginfod_query_server_by_buildid(client, build_id, build_id_len, + "section", section, path); if (rc != -EINVAL && rc != -ENOSYS) return rc; /* NB: we fall through in case of ima:enforcing-filtered DEBUGINFOD_URLS servers, @@ -2508,6 +2632,380 @@ debuginfod_find_section (debuginfod_client *client, return rc; } + +int debuginfod_find_metadata (debuginfod_client *client, + const char* key, const char* value, char **path) +{ + char *server_urls = NULL; + char *urls_envvar = NULL; + char *cache_path = NULL; + char *target_cache_dir = NULL; + char *target_cache_path = NULL; + char *target_cache_tmppath = NULL; + char *target_file_name = NULL; + char *key_and_value = NULL; + int rc = 0, r; + int vfd = client->verbose_fd; + struct handle_data *data = NULL; + + json_object *json_metadata = json_object_new_object(); + json_bool json_metadata_complete = true; + json_object *json_metadata_arr = json_object_new_array(); + if (NULL == json_metadata) + { + rc = -ENOMEM; + goto out; + } + json_object_object_add(json_metadata, "results", + json_metadata_arr ?: json_object_new_array() /* Empty array */); + + if (NULL == value || NULL == key) + { + rc = -EINVAL; + goto out; + } + + if (vfd >= 0) + dprintf (vfd, "debuginfod_find_metadata %s %s\n", key, value); + + /* Without query-able URL, we can stop here*/ + urls_envvar = getenv(DEBUGINFOD_URLS_ENV_VAR); + if (vfd >= 0) + dprintf (vfd, "server urls \"%s\"\n", + urls_envvar != NULL ? urls_envvar : ""); + if (urls_envvar == NULL || urls_envvar[0] == '\0') + { + rc = -ENOSYS; + goto out; + } + + /* set paths needed to perform the query + example format: + cache_path: $HOME/.cache + target_cache_dir: $HOME/.cache/metadata + target_cache_path: $HOME/.cache/metadata/KEYENCODED_VALUEENCODED + target_cache_path: $HOME/.cache/metadata/KEYENCODED_VALUEENCODED.XXXXXX + */ + + // libcurl > 7.62ish has curl_url_set()/etc. to construct these things more properly. + // curl_easy_escape() is older + { + CURL *c = curl_easy_init(); + if (!c) + { + rc = -ENOMEM; + goto out; + } + char *key_escaped = curl_easy_escape(c, key, 0); + char *value_escaped = curl_easy_escape(c, value, 0); + + // fallback to unescaped values in unlikely case of error + xalloc_str (key_and_value, "key=%s&value=%s", key_escaped ?: key, value_escaped ?: value); + xalloc_str (target_file_name, "%s_%s", key_escaped ?: key, value_escaped ?: value); + curl_free(value_escaped); + curl_free(key_escaped); + curl_easy_cleanup(c); + } + + /* Check if we have a recent result already in the cache. */ + cache_path = make_cache_path(); + if (! cache_path) + { + rc = -ENOMEM; + goto out; + } + xalloc_str (target_cache_dir, "%s/metadata", cache_path); + (void) mkdir (target_cache_dir, 0700); + xalloc_str (target_cache_path, "%s/%s", target_cache_dir, target_file_name); + xalloc_str (target_cache_tmppath, "%s/%s.XXXXXX", target_cache_dir, target_file_name); + + int fd = open(target_cache_path, O_RDONLY); + if (fd >= 0) + { + struct stat st; + int metadata_retention = 0; + time_t now = time(NULL); + char *metadata_retention_path = 0; + + xalloc_str (metadata_retention_path, "%s/%s", cache_path, metadata_retention_filename); + if (metadata_retention_path) + { + rc = debuginfod_config_cache(client, metadata_retention_path, + metadata_retention_default_s, &st); + free (metadata_retention_path); + if (rc < 0) + rc = 0; + } + else + rc = 0; + metadata_retention = rc; + + if (fstat(fd, &st) != 0) + { + rc = -errno; + close (fd); + goto out; + } + + if (metadata_retention > 0 && (now - st.st_mtime <= metadata_retention)) + { + if (client && client->verbose_fd >= 0) + dprintf (client->verbose_fd, "cached metadata %s", target_file_name); + + if (path != NULL) + { + *path = target_cache_path; // pass over the pointer + target_cache_path = NULL; // prevent free() in our own cleanup + } + + /* Success!!!! */ + rc = fd; + goto out; + } + + /* We don't have to clear the likely-expired cached object here + by unlinking. We will shortly make a new request and save + results right on top. Erasing here could trigger a TOCTOU + race with another thread just finishing a query and passing + its results back. + */ + // (void) unlink (target_cache_path); + + close (fd); + } + + /* No valid cached metadata found: time to make the queries. */ + + free (client->url); + client->url = NULL; + + long maxtime = 0; + const char *maxtime_envvar; + maxtime_envvar = getenv(DEBUGINFOD_MAXTIME_ENV_VAR); + if (maxtime_envvar != NULL) + maxtime = atol (maxtime_envvar); + if (maxtime && vfd >= 0) + dprintf(vfd, "using max time %lds\n", maxtime); + + long timeout = default_timeout; + const char* timeout_envvar = getenv(DEBUGINFOD_TIMEOUT_ENV_VAR); + if (timeout_envvar != NULL) + timeout = atoi (timeout_envvar); + if (vfd >= 0) + dprintf (vfd, "using timeout %ld\n", timeout); + + add_default_headers(client); + + /* Make a copy of the envvar so it can be safely modified. */ + server_urls = strdup(urls_envvar); + if (server_urls == NULL) + { + rc = -ENOMEM; + goto out; + } + + /* Thereafter, goto out1 on error*/ + + char **server_url_list = NULL; + ima_policy_t* url_ima_policies = NULL; + char *server_url; + int num_urls = 0; + r = init_server_urls("metadata", NULL, server_urls, &server_url_list, &url_ima_policies, &num_urls, vfd); + if (0 != r) + { + rc = r; + goto out1; + } + + CURLM *curlm = client->server_mhandle; + + CURL *target_handle = NULL; + data = malloc(sizeof(struct handle_data) * num_urls); + if (data == NULL) + { + rc = -ENOMEM; + goto out1; + } + + /* thereafter, goto out2 on error. */ + + /* Initialize handle_data */ + for (int i = 0; i < num_urls; i++) + { + if ((server_url = server_url_list[i]) == NULL) + break; + if (vfd >= 0) + dprintf (vfd, "init server %d %s\n", i, server_url); + + data[i].errbuf[0] = '\0'; + data[i].target_handle = &target_handle; + data[i].client = client; + data[i].metadata = NULL; + data[i].metadata_size = 0; + data[i].response_data = NULL; + data[i].response_data_size = 0; + + snprintf(data[i].url, PATH_MAX, "%s?%s", server_url, key_and_value); + + r = init_handle(client, metadata_callback, header_callback, &data[i], i, timeout, vfd); + if (0 != r) + { + rc = r; + goto out2; + } + curl_multi_add_handle(curlm, data[i].handle); + } + + /* Query servers */ + if (vfd >= 0) + dprintf (vfd, "Starting %d queries\n",num_urls); + int committed_to; + r = perform_queries(curlm, NULL, data, client, num_urls, maxtime, 0, false, vfd, &committed_to); + if (0 != r) + { + rc = r; + goto out2; + } + + /* NOTE: We don't check the return codes of the curl messages since + a metadata query failing silently is just fine. We want to know what's + available from servers which can be connected with no issues. + If running with additional verbosity, the failure will be noted in stderr */ + + /* Building the new json array from all the upstream data and + cleanup while at it. + */ + for (int i = 0; i < num_urls; i++) + { + curl_multi_remove_handle(curlm, data[i].handle); /* ok to repeat */ + curl_easy_cleanup (data[i].handle); + free (data[i].response_data); + + if (NULL == data[i].metadata) + { + if (vfd >= 0) + dprintf (vfd, "Query to %s failed with error message:\n\t\"%s\"\n", + data[i].url, data[i].errbuf); + json_metadata_complete = false; + continue; + } + + json_object *upstream_metadata = json_tokener_parse(data[i].metadata); + json_object *upstream_complete; + json_object *upstream_metadata_arr; + if (NULL == upstream_metadata || + !json_object_object_get_ex(upstream_metadata, "results", &upstream_metadata_arr) || + !json_object_object_get_ex(upstream_metadata, "complete", &upstream_complete)) + continue; + json_metadata_complete &= json_object_get_boolean(upstream_complete); + // Combine the upstream metadata into the json array + for (int j = 0, n = json_object_array_length(upstream_metadata_arr); j < n; j++) + { + json_object *entry = json_object_array_get_idx(upstream_metadata_arr, j); + json_object_get(entry); // increment reference count + json_object_array_add(json_metadata_arr, entry); + } + json_object_put(upstream_metadata); + + free (data[i].metadata); + } + + /* Because of race with cache cleanup / rmdir, try to mkdir/mkstemp up to twice. */ + for (int i=0; i<2; i++) + { + /* (re)create target directory in cache */ + (void) mkdir(target_cache_dir, 0700); /* files will be 0400 later */ + + /* NB: write to a temporary file first, to avoid race condition of + multiple clients checking the cache, while a partially-written or empty + file is in there, being written from libcurl. */ + fd = mkstemp (target_cache_tmppath); + if (fd >= 0) break; + } + if (fd < 0) /* Still failed after two iterations. */ + { + rc = -errno; + goto out1; + } + + /* Plop the complete json_metadata object into the cache. */ + json_object_object_add(json_metadata, "complete", json_object_new_boolean(json_metadata_complete)); + const char* json_string = json_object_to_json_string_ext(json_metadata, JSON_C_TO_STRING_PRETTY); + if (json_string == NULL) + { + rc = -ENOMEM; + goto out1; + } + ssize_t res = write_retry (fd, json_string, strlen(json_string)); + (void) lseek(fd, 0, SEEK_SET); // rewind file so client can read it from the top + + /* NB: json_string is auto deleted when json_metadata object is nuked */ + if (res < 0 || (size_t) res != strlen(json_string)) + { + rc = -EIO; + goto out1; + } + /* PR27571: make cache files casually unwriteable; dirs are already 0700 */ + (void) fchmod(fd, 0400); + + /* rename tmp->real */ + rc = rename (target_cache_tmppath, target_cache_path); + if (rc < 0) + { + rc = -errno; + goto out1; + /* Perhaps we need not give up right away; could retry or something ... */ + } + + /* don't close fd - we're returning it */ + /* don't unlink the tmppath; it's already been renamed. */ + if (path != NULL) + *path = strdup(target_cache_path); + + rc = fd; + goto out1; + +/* error exits */ +out2: + /* remove all handles from multi */ + for (int i = 0; i < num_urls; i++) + { + if (data[i].handle != NULL) + { + curl_multi_remove_handle(curlm, data[i].handle); /* ok to repeat */ + curl_easy_cleanup (data[i].handle); + free (data[i].response_data); + free (data[i].metadata); + } + } + +out1: + free(data); + + for (int i = 0; i < num_urls; ++i) + free(server_url_list[i]); + free(server_url_list); + free(url_ima_policies); + +out: + free (server_urls); + json_object_put(json_metadata); + /* Reset sent headers */ + curl_slist_free_all (client->headers); + client->headers = NULL; + client->user_agent_set_p = 0; + + free (target_cache_dir); + free (target_cache_path); + free (target_cache_tmppath); + free (key_and_value); + free (target_file_name); + free (cache_path); + + return rc; +} + + /* Add an outgoing HTTP header. */ int debuginfod_add_http_header (debuginfod_client *client, const char* header) { diff --git a/debuginfod/debuginfod-find.c b/debuginfod/debuginfod-find.c index 080dd8f2..0ef80377 100644 --- a/debuginfod/debuginfod-find.c +++ b/debuginfod/debuginfod-find.c @@ -1,6 +1,6 @@ /* Command-line frontend for retrieving ELF / DWARF / source files from the debuginfod. - Copyright (C) 2019-2020 Red Hat, Inc. + Copyright (C) 2019-2023 Red Hat, Inc. This file is part of elfutils. This file is free software; you can redistribute it and/or modify @@ -30,7 +30,7 @@ #include <fcntl.h> #include <gelf.h> #include <libdwelf.h> - +#include <json-c/json.h> /* Name and version of program. */ ARGP_PROGRAM_VERSION_HOOK_DEF = print_version; @@ -49,9 +49,10 @@ static const char args_doc[] = N_("debuginfo BUILDID\n" "executable PATH\n" "source BUILDID /FILENAME\n" "source PATH /FILENAME\n" - "section BUILDID SECTION-NAME\n" - "section PATH SECTION-NAME\n"); - + "section BUILDID SECTION-NAME\n" + "section PATH SECTION-NAME\n" + "metadata (glob|file|KEY) (GLOB|FILENAME|VALUE)\n" + ); /* Definitions of arguments for argp functions. */ static const struct argp_option options[] = @@ -145,49 +146,60 @@ main(int argc, char** argv) /* If we were passed an ELF file name in the BUILDID slot, look in there. */ unsigned char* build_id = (unsigned char*) argv[remaining+1]; int build_id_len = 0; /* assume text */ - - int any_non_hex = 0; - int i; - for (i = 0; build_id[i] != '\0'; i++) - if ((build_id[i] >= '0' && build_id[i] <= '9') || - (build_id[i] >= 'a' && build_id[i] <= 'f')) - ; - else - any_non_hex = 1; - - int fd = -1; Elf* elf = NULL; - if (any_non_hex) /* raw build-id */ - { - fd = open ((char*) build_id, O_RDONLY); - if (fd < 0) - fprintf (stderr, "Cannot open %s: %s\n", build_id, strerror(errno)); - } - if (fd >= 0) - { - elf = dwelf_elf_begin (fd); - if (elf == NULL) - fprintf (stderr, "Cannot open as ELF file %s: %s\n", build_id, - elf_errmsg (-1)); - } - if (elf != NULL) + + /* Process optional buildid given via ELF file name, for some query types only. */ + if (strcmp(argv[remaining], "debuginfo") == 0 + || strcmp(argv[remaining], "executable") == 0 + || strcmp(argv[remaining], "source") == 0 + || strcmp(argv[remaining], "section") == 0) { - const void *extracted_build_id; - ssize_t s = dwelf_elf_gnu_build_id(elf, &extracted_build_id); - if (s > 0) + int any_non_hex = 0; + int i; + for (i = 0; build_id[i] != '\0'; i++) + if ((build_id[i] >= '0' && build_id[i] <= '9') || + (build_id[i] >= 'a' && build_id[i] <= 'f')) + ; + else + any_non_hex = 1; + + int fd = -1; + if (any_non_hex) /* raw build-id */ + { + fd = open ((char*) build_id, O_RDONLY); + if (fd < 0) + fprintf (stderr, "Cannot open %s: %s\n", build_id, strerror(errno)); + } + if (fd >= 0) + { + elf = dwelf_elf_begin (fd); + if (elf == NULL) + fprintf (stderr, "Cannot open as ELF file %s: %s\n", build_id, + elf_errmsg (-1)); + } + if (elf != NULL) { - /* Success: replace the build_id pointer/len with the binary blob - that elfutils is keeping for us. It'll remain valid until elf_end(). */ - build_id = (unsigned char*) extracted_build_id; - build_id_len = s; + const void *extracted_build_id; + ssize_t s = dwelf_elf_gnu_build_id(elf, &extracted_build_id); + if (s > 0) + { + /* Success: replace the build_id pointer/len with the binary blob + that elfutils is keeping for us. It'll remain valid until elf_end(). */ + build_id = (unsigned char*) extracted_build_id; + build_id_len = s; + } + else + fprintf (stderr, "Cannot extract build-id from %s: %s\n", build_id, elf_errmsg(-1)); } - else - fprintf (stderr, "Cannot extract build-id from %s: %s\n", build_id, elf_errmsg(-1)); } char *cache_name; int rc = 0; + /* By default the stdout output is the path of the cached file. + Some requests (ex. metadata query may instead choose to do a different output, + in that case a stringified json object) */ + bool print_cached_file = true; /* Check whether FILETYPE is valid and call the appropriate debuginfod_find_* function. If FILETYPE is "source" then ensure a FILENAME was also supplied as an argument. */ @@ -221,6 +233,38 @@ main(int argc, char** argv) rc = debuginfod_find_section(client, build_id, build_id_len, argv[remaining+2], &cache_name); } + else if (strcmp(argv[remaining], "metadata") == 0) /* no buildid! */ + { + if (remaining+2 == argc) + { + fprintf(stderr, "Require KEY and VALUE for \"metadata\"\n"); + return 1; + } + + rc = debuginfod_find_metadata (client, argv[remaining+1], argv[remaining+2], + &cache_name); + if (rc >= 0) + { + /* We output a pprinted JSON object, not the regular debuginfod-find cached file path */ + print_cached_file = false; + json_object *metadata = json_object_from_file(cache_name); + if(metadata) + { + printf("%s\n", json_object_to_json_string_ext(metadata, + JSON_C_TO_STRING_PRETTY +#ifdef JSON_C_TO_STRING_NOSLASHESCAPE /* json-c 0.15 */ + | JSON_C_TO_STRING_NOSLASHESCAPE +#endif + )); + json_object_put(metadata); + } + else + { + fprintf(stderr, "%s does not contain a valid JSON format object\n", cache_name); + return 1; + } + } + } else { argp_help (&argp, stderr, ARGP_HELP_USAGE, argv[0]); @@ -240,8 +284,6 @@ main(int argc, char** argv) debuginfod_end (client); if (elf) elf_end(elf); - if (fd >= 0) - close (fd); if (rc < 0) { @@ -251,7 +293,7 @@ main(int argc, char** argv) else close (rc); - printf("%s\n", cache_name); + if(print_cached_file) printf("%s\n", cache_name); free (cache_name); return 0; diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx index d9259ad2..305edde8 100644 --- a/debuginfod/debuginfod.cxx +++ b/debuginfod/debuginfod.cxx @@ -76,6 +76,7 @@ extern "C" { #include <netdb.h> #include <math.h> #include <float.h> +#include <fnmatch.h> /* If fts.h is included before config.h, its indirect inclusions may not @@ -148,6 +149,7 @@ extern "C" { #include "printversion.h" #include "system.h" } +#include <json-c/json.h> inline bool @@ -220,7 +222,7 @@ static const char DEBUGINFOD_SQLITE_DDL[] = " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n" " primary key (buildid, file, mtime)\n" " ) " WITHOUT_ROWID ";\n" - // Index for faster delete by file identifier + // Index for faster delete by file identifier and metadata searches "create index if not exists " BUILDIDS "_f_de_idx on " BUILDIDS "_f_de (file, mtime);\n" "create table if not exists " BUILDIDS "_f_s (\n" " buildid integer not null,\n" @@ -246,6 +248,8 @@ static const char DEBUGINFOD_SQLITE_DDL[] = " ) " WITHOUT_ROWID ";\n" // Index for faster delete by archive file identifier "create index if not exists " BUILDIDS "_r_de_idx on " BUILDIDS "_r_de (file, mtime);\n" + // Index for metadata searches + "create index if not exists " BUILDIDS "_r_de_idx2 on " BUILDIDS "_r_de (content);\n" "create table if not exists " BUILDIDS "_r_sref (\n" // outgoing dwarf sourcefile references from rpm " buildid integer not null,\n" " artifactsrc integer not null,\n" @@ -454,6 +458,9 @@ static const struct argp_option options[] = #define ARGP_KEY_KOJI_SIGCACHE 0x100B { "koji-sigcache", ARGP_KEY_KOJI_SIGCACHE, NULL, 0, "Do a koji specific mapping of rpm paths to get IMA signatures.", 0 }, #endif +#define ARGP_KEY_METADATA_MAXTIME 0x100C + { "metadata-maxtime", ARGP_KEY_METADATA_MAXTIME, "SECONDS", 0, + "Number of seconds to limit metadata query run time, 0=unlimited.", 0 }, { NULL, 0, NULL, 0, NULL, 0 }, }; @@ -509,6 +516,7 @@ static long scan_checkpoint = 256; #ifdef ENABLE_IMA_VERIFICATION static bool requires_koji_sigcache_mapping = false; #endif +static unsigned metadata_maxtime_s = 5; static void set_metric(const string& key, double value); static void inc_metric(const string& key); @@ -711,7 +719,10 @@ parse_opt (int key, char *arg, case ARGP_SCAN_CHECKPOINT: scan_checkpoint = atol (arg); if (scan_checkpoint < 0) - argp_failure(state, 1, EINVAL, "scan checkpoint"); + argp_failure(state, 1, EINVAL, "scan checkpoint"); + break; + case ARGP_KEY_METADATA_MAXTIME: + metadata_maxtime_s = (unsigned) atoi(arg); break; #ifdef ENABLE_IMA_VERIFICATION case ARGP_KEY_KOJI_SIGCACHE: @@ -2382,6 +2393,58 @@ handle_buildid_r_match (bool internal_req_p, return r; } +void +add_client_federation_headers(debuginfod_client *client, MHD_Connection* conn){ + // Transcribe incoming User-Agent: + string ua = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: ""; + string ua_complete = string("User-Agent: ") + ua; + debuginfod_add_http_header (client, ua_complete.c_str()); + + // Compute larger XFF:, for avoiding info loss during + // federation, and for future cyclicity detection. + string xff = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: ""; + if (xff != "") + xff += string(", "); // comma separated list + + unsigned int xff_count = 0; + for (auto&& i : xff){ + if (i == ',') xff_count++; + } + + // if X-Forwarded-For: exceeds N hops, + // do not delegate a local lookup miss to upstream debuginfods. + if (xff_count >= forwarded_ttl_limit) + throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found, --forwared-ttl-limit reached \ +and will not query the upstream servers"); + + // Compute the client's numeric IP address only - so can't merge with conninfo() + const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn, + MHD_CONNECTION_INFO_CLIENT_ADDRESS); + struct sockaddr *so = u ? u->client_addr : 0; + char hostname[256] = ""; // RFC1035 + if (so && so->sa_family == AF_INET) { + (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), NULL, 0, + NI_NUMERICHOST); + } else if (so && so->sa_family == AF_INET6) { + struct sockaddr_in6* addr6 = (struct sockaddr_in6*) so; + if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) { + struct sockaddr_in addr4; + memset (&addr4, 0, sizeof(addr4)); + addr4.sin_family = AF_INET; + addr4.sin_port = addr6->sin6_port; + memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeof(addr4.sin_addr.s_addr)); + (void) getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4), + hostname, sizeof (hostname), NULL, 0, + NI_NUMERICHOST); + } else { + (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), NULL, 0, + NI_NUMERICHOST); + } + } + + string xff_complete = string("X-Forwarded-For: ")+xff+string(hostname); + debuginfod_add_http_header (client, xff_complete.c_str()); +} static struct MHD_Response* handle_buildid_match (bool internal_req_p, @@ -2615,58 +2678,8 @@ handle_buildid (MHD_Connection* conn, debuginfod_set_progressfn (client, & debuginfod_find_progress); if (conn) - { - // Transcribe incoming User-Agent: - string ua = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: ""; - string ua_complete = string("User-Agent: ") + ua; - debuginfod_add_http_header (client, ua_complete.c_str()); - - // Compute larger XFF:, for avoiding info loss during - // federation, and for future cyclicity detection. - string xff = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: ""; - if (xff != "") - xff += string(", "); // comma separated list - - unsigned int xff_count = 0; - for (auto&& i : xff){ - if (i == ',') xff_count++; - } + add_client_federation_headers(client, conn); - // if X-Forwarded-For: exceeds N hops, - // do not delegate a local lookup miss to upstream debuginfods. - if (xff_count >= forwarded_ttl_limit) - throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found, --forwared-ttl-limit reached \ -and will not query the upstream servers"); - - // Compute the client's numeric IP address only - so can't merge with conninfo() - const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn, - MHD_CONNECTION_INFO_CLIENT_ADDRESS); - struct sockaddr *so = u ? u->client_addr : 0; - char hostname[256] = ""; // RFC1035 - if (so && so->sa_family == AF_INET) { - (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), NULL, 0, - NI_NUMERICHOST); - } else if (so && so->sa_family == AF_INET6) { - struct sockaddr_in6* addr6 = (struct sockaddr_in6*) so; - if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) { - struct sockaddr_in addr4; - memset (&addr4, 0, sizeof(addr4)); - addr4.sin_family = AF_INET; - addr4.sin_port = addr6->sin6_port; - memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeof(addr4.sin_addr.s_addr)); - (void) getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4), - hostname, sizeof (hostname), NULL, 0, - NI_NUMERICHOST); - } else { - (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), NULL, 0, - NI_NUMERICHOST); - } - } - - string xff_complete = string("X-Forwarded-For: ")+xff+string(hostname); - debuginfod_add_http_header (client, xff_complete.c_str()); - } - if (artifacttype == "debuginfo") fd = debuginfod_find_debuginfo (client, (const unsigned char*) buildid.c_str(), @@ -2873,6 +2886,225 @@ handle_metrics (off_t* size) return r; } + +static struct MHD_Response* +handle_metadata (MHD_Connection* conn, + string key, string value, off_t* size) +{ + MHD_Response* r; + sqlite3 *thisdb = dbq; + + // Query locally for matching e, d files + string op; + if (key == "glob") + op = "glob"; + else if (key == "file") + op = "="; + else + throw reportable_exception("/metadata webapi error, unsupported key"); + + // Since PR30378, the file names are segmented into two tables. We + // could do a glob/= search over the _files_v view that combines + // them, but that means that the entire _files_v thing has to be + // materialized & scanned to do the query. Slow! Instead, we can + // segment the incoming file/glob pattern into dirname / basename + // parts, and apply them to the corresponding table. This is done + // by splitting the value at the last "/". If absent, the same + // convention as is used in register_file_name(). + + string dirname, bname; // basename is a "poisoned" identifier on some distros + size_t slash = value.rfind('/'); + if (slash == std::string::npos) { + dirname = ""; + bname = value; + } else { + dirname = value.substr(0, slash); + bname = value.substr(slash+1); + } + + // NB: further optimization is possible: replacing the 'glob' op + // with simple equality, if the corresponding value segment lacks + // metacharacters. sqlite may or may not be smart enough to do so, + // so we help out. + string metacharacters = "[]*?"; + string dop = (op == "glob" && dirname.find_first_of(metacharacters) == string::npos) ? "=" : op; + string bop = (op == "glob" && bname.find_first_of(metacharacters) == string::npos) ? "=" : op; + + string sql = string( + // explicit query r_de and f_de once here, rather than the query_d and query_e + // separately, because they scan the same tables, so we'd double the work + "select d1.executable_p, d1.debuginfo_p, 0 as source_p, " + " b1.hex, f1d.name || '/' || f1b.name as file, a1.name as archive " + "from " BUILDIDS "_r_de d1, " BUILDIDS "_files f1, " BUILDIDS "_fileparts f1b, " BUILDIDS "_fileparts f1d, " + BUILDIDS "_buildids b1, " BUILDIDS "_files_v a1 " + "where f1.id = d1.content and a1.id = d1.file and d1.buildid = b1.id " + " and f1d.name " + dop + " ? and f1b.name " + bop + " ? and f1.dirname = f1d.id and f1.basename = f1b.id " + "union all \n" + "select d2.executable_p, d2.debuginfo_p, 0, " + " b2.hex, f2d.name || '/' || f2b.name, NULL " + "from " BUILDIDS "_f_de d2, " BUILDIDS "_files f2, " BUILDIDS "_fileparts f2b, " BUILDIDS "_fileparts f2d, " + BUILDIDS "_buildids b2 " + "where f2.id = d2.file and d2.buildid = b2.id " + " and f2d.name " + dop + " ? and f2b.name " + bop + " ? " + " and f2.dirname = f2d.id and f2.basename = f2b.id"); + + // NB: we could query source file names too, thusly: + // + // select * from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f1, " BUILDIDS "_r_sref sr + // where b.id = sr.buildid and f1.id = sr.artifactsrc and f1.name " + op + "?" + // UNION ALL something with BUILDIDS "_f_s" + // + // But the first part of this query cannot run fast without the same index temp-created + // during "maxigroom": + // create index " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc); + // and unfortunately this index is HUGE. It's similar to the size of the _r_sref + // table, which is already the largest part of a debuginfod index. Adding that index + // would nearly double the .sqlite db size. + + sqlite_ps *pp = new sqlite_ps (thisdb, "mhd-query-meta-glob", sql); + pp->reset(); + pp->bind(1, dirname); + pp->bind(2, bname); + pp->bind(3, dirname); + pp->bind(4, bname); + unique_ptr<sqlite_ps> ps_closer(pp); // release pp if exception or return + + json_object *metadata = json_object_new_object(); + if (!metadata) throw libc_exception(ENOMEM, "json allocation"); + defer_dtor<json_object*,int> metadata_d(metadata, json_object_put); + json_object *metadata_arr = json_object_new_array(); + if (!metadata_arr) throw libc_exception(ENOMEM, "json allocation"); + json_object_object_add(metadata, "results", metadata_arr); + // consume all the rows + struct timespec ts_start; + clock_gettime (CLOCK_MONOTONIC, &ts_start); + + int rc; + bool metadata_complete = true; + while (SQLITE_DONE != (rc = pp->step())) + { + // break out of loop if we have searched too long + struct timespec ts_end; + clock_gettime (CLOCK_MONOTONIC, &ts_end); + double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9; + if (metadata_maxtime_s > 0 && deltas > metadata_maxtime_s) + { + metadata_complete = false; + break; + } + + if (rc != SQLITE_ROW) throw sqlite_exception(rc, "step"); + + int m_executable_p = sqlite3_column_int (*pp, 0); + int m_debuginfo_p = sqlite3_column_int (*pp, 1); + int m_source_p = sqlite3_column_int (*pp, 2); + string m_buildid = (const char*) sqlite3_column_text (*pp, 3) ?: ""; // should always be non-null + string m_file = (const char*) sqlite3_column_text (*pp, 4) ?: ""; + string m_archive = (const char*) sqlite3_column_text (*pp, 5) ?: ""; + + // Confirm that m_file matches in the fnmatch(FNM_PATHNAME) + // sense, since sqlite's GLOB operator is a looser filter. + if (key == "glob" && fnmatch(value.c_str(), m_file.c_str(), FNM_PATHNAME) != 0) + continue; + + auto add_metadata = [metadata_arr, m_buildid, m_file, m_archive](const string& type) { + json_object* entry = json_object_new_object(); + if (NULL == entry) throw libc_exception (ENOMEM, "cannot allocate json"); + defer_dtor<json_object*,int> entry_d(entry, json_object_put); + + auto add_entry_metadata = [entry](const char* k, string v) { + json_object* s; + if(v != "") { + s = json_object_new_string(v.c_str()); + if (NULL == s) throw libc_exception (ENOMEM, "cannot allocate json"); + json_object_object_add(entry, k, s); + } + }; + + add_entry_metadata("type", type.c_str()); + add_entry_metadata("buildid", m_buildid); + add_entry_metadata("file", m_file); + if (m_archive != "") add_entry_metadata("archive", m_archive); + if (verbose > 3) + obatched(clog) << "metadata found local " + << json_object_to_json_string_ext(entry, + JSON_C_TO_STRING_PRETTY) + << endl; + + // Increase ref count to switch its ownership + json_object_array_add(metadata_arr, json_object_get(entry)); + }; + + if (m_executable_p) add_metadata("executable"); + if (m_debuginfo_p) add_metadata("debuginfo"); + if (m_source_p) add_metadata("source"); + } + pp->reset(); + + unsigned num_local_results = json_object_array_length(metadata_arr); + + // Query upstream as well + debuginfod_client *client = debuginfod_pool_begin(); + if (client != NULL) + { + add_client_federation_headers(client, conn); + + int upstream_metadata_fd; + char *upstream_metadata_file = NULL; + upstream_metadata_fd = debuginfod_find_metadata(client, key.c_str(), (char*)value.c_str(), + &upstream_metadata_file); + if (upstream_metadata_fd >= 0) { + /* json-c >= 0.13 has json_object_from_fd(). */ + json_object *upstream_metadata_json = json_object_from_file(upstream_metadata_file); + free (upstream_metadata_file); + json_object *upstream_metadata_json_arr; + json_object *upstream_complete; + if (NULL != upstream_metadata_json && + json_object_object_get_ex(upstream_metadata_json, "results", &upstream_metadata_json_arr) && + json_object_object_get_ex(upstream_metadata_json, "complete", &upstream_complete)) + { + metadata_complete &= json_object_get_boolean(upstream_complete); + for (int i = 0, n = json_object_array_length(upstream_metadata_json_arr); i < n; i++) + { + json_object *entry = json_object_array_get_idx(upstream_metadata_json_arr, i); + if (verbose > 3) + obatched(clog) << "metadata found remote " + << json_object_to_json_string_ext(entry, + JSON_C_TO_STRING_PRETTY) + << endl; + + json_object_get(entry); // increment reference count + json_object_array_add(metadata_arr, entry); + } + json_object_put(upstream_metadata_json); + } + close(upstream_metadata_fd); + } + debuginfod_pool_end (client); + } + + unsigned num_total_results = json_object_array_length(metadata_arr); + + if (verbose > 2) + obatched(clog) << "metadata found local=" << num_local_results + << " remote=" << (num_total_results-num_local_results) + << " total=" << num_total_results + << endl; + + json_object_object_add(metadata, "complete", json_object_new_boolean(metadata_complete)); + const char* metadata_str = json_object_to_json_string(metadata); + if (!metadata_str) + throw libc_exception (ENOMEM, "cannot allocate json"); + r = MHD_create_response_from_buffer (strlen(metadata_str), + (void*) metadata_str, + MHD_RESPMEM_MUST_COPY); + *size = strlen(metadata_str); + if (r) + add_mhd_response_header(r, "Content-Type", "application/json"); + return r; +} + + static struct MHD_Response* handle_root (off_t* size) { @@ -2939,6 +3171,7 @@ handler_cb (void * /*cls*/, clock_gettime (CLOCK_MONOTONIC, &ts_start); double afteryou = 0.0; string artifacttype, suffix; + string urlargs; // for logging try { @@ -3007,6 +3240,19 @@ handler_cb (void * /*cls*/, inc_metric("http_requests_total", "type", artifacttype); r = handle_metrics(& http_size); } + else if (url1 == "/metadata") + { + tmp_inc_metric m ("thread_busy", "role", "http-metadata"); + const char* key = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "key"); + const char* value = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "value"); + if (NULL == value || NULL == key) + throw reportable_exception("/metadata webapi error, need key and value"); + + urlargs = string("?key=") + string(key) + string("&value=") + string(value); // apprx., for logging + artifacttype = "metadata"; + inc_metric("http_requests_total", "type", artifacttype); + r = handle_metadata(connection, key, value, &http_size); + } else if (url1 == "/") { artifacttype = "/"; @@ -3043,7 +3289,7 @@ handler_cb (void * /*cls*/, // afteryou: delay waiting for other client's identical query to complete // deltas: total latency, including afteryou waiting obatched(clog) << conninfo(connection) - << ' ' << method << ' ' << url + << ' ' << method << ' ' << url << urlargs << ' ' << http_code << ' ' << http_size << ' ' << (int)(afteryou*1000) << '+' << (int)((deltas-afteryou)*1000) << "ms" << endl; @@ -3396,6 +3642,7 @@ register_file_name(sqlite_ps& ps_upsert_fileparts, dirname = name.substr(0, slash); filename = name.substr(slash+1); } + // NB: see also handle_metadata() // intern the two substrings ps_upsert_fileparts @@ -4379,12 +4626,13 @@ void groom() if (interrupted) return; // NB: "vacuum" is too heavy for even daily runs: it rewrites the entire db, so is done as maxigroom -G - sqlite_ps g1 (db, "incremental vacuum", "pragma incremental_vacuum"); - g1.reset().step_ok_done(); - sqlite_ps g2 (db, "optimize", "pragma optimize"); - g2.reset().step_ok_done(); - sqlite_ps g3 (db, "wal checkpoint", "pragma wal_checkpoint=truncate"); - g3.reset().step_ok_done(); + { sqlite_ps g (db, "incremental vacuum", "pragma incremental_vacuum"); g.reset().step_ok_done(); } + // https://www.sqlite.org/lang_analyze.html#approx + { sqlite_ps g (db, "analyze setup", "pragma analysis_limit = 1000;\n"); g.reset().step_ok_done(); } + { sqlite_ps g (db, "analyze", "analyze"); g.reset().step_ok_done(); } + { sqlite_ps g (db, "analyze reload", "analyze sqlite_schema"); g.reset().step_ok_done(); } + { sqlite_ps g (db, "optimize", "pragma optimize"); g.reset().step_ok_done(); } + { sqlite_ps g (db, "wal checkpoint", "pragma wal_checkpoint=truncate"); g.reset().step_ok_done(); } database_stats_report(); @@ -4769,6 +5017,8 @@ main (int argc, char *argv[]) if (maxigroom) { obatched(clog) << "maxigrooming database, please wait." << endl; + // NB: this index alone can nearly double the database size! + // NB: this index would be necessary to run source-file metadata searches fast extra_ddl.push_back("create index if not exists " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc);"); extra_ddl.push_back("delete from " BUILDIDS "_r_sdef where not exists (select 1 from " BUILDIDS "_r_sref b where " BUILDIDS "_r_sdef.content = b.artifactsrc);"); extra_ddl.push_back("drop index if exists " BUILDIDS "_r_sref_arc;"); diff --git a/debuginfod/debuginfod.h.in b/debuginfod/debuginfod.h.in index 73f633f0..0a6a4a22 100644 --- a/debuginfod/debuginfod.h.in +++ b/debuginfod/debuginfod.h.in @@ -63,9 +63,9 @@ debuginfod_client *debuginfod_begin (void); it is a binary blob of given length. If successful, return a file descriptor to the target, otherwise - return a posix error code. If successful, set *path to a - strdup'd copy of the name of the same file in the cache. - Caller must free() it later. */ + return a negative POSIX error code. If successful, set *path to a + strdup'd copy of the name of the same file in the cache. Caller + must free() it later. */ int debuginfod_find_debuginfo (debuginfod_client *client, const unsigned char *build_id, @@ -89,6 +89,22 @@ int debuginfod_find_section (debuginfod_client *client, const char *section, char **path); +/* Query the urls contained in $DEBUGINFOD_URLS for metadata + with given query key/value. + + If successful, return a file descriptor to the JSON document + describing matches, otherwise return a negative POSIX error code. If + successful, set *path to a strdup'd copy of the name of the same + file in the cache. Caller must free() it later. + + See the debuginfod-find(1) man page for examples of the supported types + of key/value queries and their JSON results. + */ +int debuginfod_find_metadata (debuginfod_client *client, + const char *key, + const char* value, + char **path); + typedef int (*debuginfod_progressfn_t)(debuginfod_client *c, long a, long b); void debuginfod_set_progressfn(debuginfod_client *c, debuginfod_progressfn_t fn); diff --git a/debuginfod/libdebuginfod.map b/debuginfod/libdebuginfod.map index 6334373f..9cee91cd 100644 --- a/debuginfod/libdebuginfod.map +++ b/debuginfod/libdebuginfod.map @@ -22,3 +22,6 @@ ELFUTILS_0.188 { debuginfod_get_headers; debuginfod_find_section; } ELFUTILS_0.183; +ELFUTILS_0.192 { + debuginfod_find_metadata; +} ELFUTILS_0.188; |