diff options
author | Frank Ch. Eigler <[email protected]> | 2024-10-10 16:30:19 -0400 |
---|---|---|
committer | Frank Ch. Eigler <[email protected]> | 2024-10-16 09:49:35 -0400 |
commit | 6814e0edc112583428114bc20f8d78f864c57128 (patch) | |
tree | 5201ec0283a1994c445157c6fc3091c588df381e /debuginfod | |
parent | b68f34725229b08380a1612899b0537f8f597dad (diff) |
PR32218: debuginfod-client: support very long source file names
debuginfod clients & servers may sometimes encounter very long source
file names. Previously, the client would synthesize a path name like
$CACHEDIR/$BUILDID/source-$PATHNAME
where $PATHNAME was a funky ##-encoded version of the entire source
path name. See https://sourceware.org/PR32218 for a horror case.
This can get too long to store as a single component of a file system
pathname (e.g. linux/limits.h NAME_MAX), resulting on client-side
errors even after a successful download.
New code switches encoding of the $PATHNAME part to use less escaping,
and a merciless truncation to the tail part of the filename. (We keep
the tail rather than the head, so that the extension is preserved,
which makes some consumers happier.) To limit collision damage from
truncation, we add also insert a goofy hash (4-byte DJBX33A) of the
name into the path name. The result is a relatively short name:
$CACHEDIR/$BUILDID/source-$HASH-$NAMETAIL
This is a transparent change to clients, who are not to make any
assumptions about cache file naming structure. However, one existing
test did make such assumptions, so is fixed with some globness. A new
test is also added, using a pre-baked tarball with a very long srcfile
name.
Signed-off-by: Frank Ch. Eigler <[email protected]>
Diffstat (limited to 'debuginfod')
-rw-r--r-- | debuginfod/debuginfod-client.c | 108 |
1 files changed, 70 insertions, 38 deletions
diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c index 24ede19a..deff19ff 100644 --- a/debuginfod/debuginfod-client.c +++ b/debuginfod/debuginfod-client.c @@ -1201,29 +1201,69 @@ perform_queries(CURLM *curlm, CURL **target_handle, struct handle_data *data, de /* Copy SRC to DEST, s,/,#,g */ static void -path_escape (const char *src, char *dest) +path_escape (const char *src, char *dest, size_t dest_len) { - unsigned q = 0; + /* PR32218: Reversibly-escaping src character-by-character, for + large enough strings, risks ENAMETOOLONG errors. For long names, + a simple hash based generated name instead, while still + attempting to preserve the as much of the content as possible. + It's possible that absurd choices of incoming names will collide + or still get truncated, but c'est la vie. + */ + + /* Compute a three-way min() for the actual output string generated. */ + assert (dest_len > 10); /* Space enough for degenerate case of + "HASHHASH-\0". NB: dest_len is not + user-controlled. */ + /* Use only NAME_MAX/2 characters in the output file name. + ENAMETOOLONG has been observed even on 300-ish character names on + some filesystems. */ + const size_t max_dest_len = NAME_MAX/2; + dest_len = dest_len > max_dest_len ? max_dest_len : dest_len; + /* Use only strlen(src)+10 bytes, if that's smaller. Yes, we could + just fit an entire escaped name in there in theory, without the + hash+etc. But then again the hashing protects against #-escape + aliasing collisions: "foo[bar" "foo]bar" both escape to + "foo#bar", thus aliasing, but have different "HASH-foo#bar". + */ + const size_t hash_prefix_destlen = strlen(src)+10; /* DEADBEEF-src\0 */ + dest_len = dest_len > hash_prefix_destlen ? hash_prefix_destlen : dest_len; - for (unsigned fi=0; q < PATH_MAX-2; fi++) /* -2, escape is 2 chars. */ - switch (src[fi]) - { - case '\0': - dest[q] = '\0'; - return; - case '/': /* escape / to prevent dir escape */ - dest[q++]='#'; - dest[q++]='#'; - break; - case '#': /* escape # to prevent /# vs #/ collisions */ - dest[q++]='#'; - dest[q++]='_'; - break; - default: - dest[q++]=src[fi]; - } + char *dest_write = dest + dest_len - 1; + (*dest_write--) = '\0'; /* Ensure a \0 there. */ - dest[q] = '\0'; + /* Copy from back toward front, preferring to keep the .extension. */ + for (int fi=strlen(src)-1; fi >= 0 && dest_write >= dest; fi--) + { + char src_char = src[fi]; + switch (src_char) + { + /* Pass through ordinary identifier chars. */ + case '.': case '-': case '_': + case 'a'...'z': + case 'A'...'Z': + case '0'...'9': + *dest_write-- = src_char; + break; + + /* Replace everything else, esp. security-sensitive /. */ + default: + *dest_write-- = '#'; + break; + } + } + + /* djb2 hash algorithm: DJBX33A */ + unsigned long hash = 5381; + const char *c = src; + while (*c) + hash = ((hash << 5) + hash) + *c++; + char name_hash_str [9]; + /* Truncate to 4 bytes; along with the remaining hundredish bytes of text, + should be ample against accidental collisions. */ + snprintf (name_hash_str, sizeof(name_hash_str), "%08x", (unsigned int) hash); + memcpy (&dest[0], name_hash_str, 8); /* Overwrite the first few characters */ + dest[8] = '-'; /* Add a bit of punctuation to make hash stand out */ } /* Attempt to update the atime */ @@ -1267,7 +1307,6 @@ extract_section (int fd, const char *section, char *fd_path, char **usr_path) } int sec_fd = -1; - char *escaped_name = NULL; char *sec_path_tmp = NULL; Elf_Scn *scn = NULL; @@ -1332,16 +1371,10 @@ extract_section (int fd, const char *section, char *fd_path, char **usr_path) --i; } - escaped_name = malloc (strlen (section) * 2 + 1); - if (escaped_name == NULL) - { - rc = -ENOMEM; - goto out; - } - path_escape (section, escaped_name); - + char suffix[NAME_MAX]; + path_escape (section, suffix, sizeof(suffix)); rc = asprintf (&sec_path_tmp, "%s/section-%s.XXXXXX", - fd_path, escaped_name); + fd_path, suffix); if (rc == -1) { rc = -ENOMEM; @@ -1396,8 +1429,6 @@ out2: free (sec_path_tmp); out1: - free (escaped_name); - out: elf_end (elf); return rc; @@ -1747,7 +1778,7 @@ debuginfod_query_server_by_buildid (debuginfod_client *c, char *target_cache_dir = NULL; char *target_cache_path = NULL; char *target_cache_tmppath = NULL; - char suffix[PATH_MAX + 1]; /* +1 for zero terminator. */ + char suffix[NAME_MAX]; char build_id_bytes[MAX_BUILD_ID_BYTES * 2 + 1]; int vfd = c->verbose_fd; int rc, r; @@ -1861,13 +1892,13 @@ debuginfod_query_server_by_buildid (debuginfod_client *c, goto out; } - path_escape (filename, suffix); + path_escape (filename, suffix, sizeof(suffix)); /* If the DWARF filenames are super long, this could exceed PATH_MAX and truncate/collide. Oh well, that'll teach them! */ } else if (section != NULL) - path_escape (section, suffix); + path_escape (section, suffix, sizeof(suffix)); else suffix[0] = '\0'; @@ -1879,7 +1910,8 @@ debuginfod_query_server_by_buildid (debuginfod_client *c, cache_path: $HOME/.cache target_cache_dir: $HOME/.cache/0123abcd target_cache_path: $HOME/.cache/0123abcd/debuginfo - target_cache_path: $HOME/.cache/0123abcd/source#PATH#TO#SOURCE ? + target_cache_path: $HOME/.cache/0123abcd/executable-.debug_info + target_cache_path: $HOME/.cache/0123abcd/source-HASH-#PATH#TO#SOURCE */ cache_path = make_cache_path(); @@ -1891,10 +1923,10 @@ debuginfod_query_server_by_buildid (debuginfod_client *c, xalloc_str (target_cache_dir, "%s/%s", cache_path, build_id_bytes); (void) mkdir (target_cache_dir, 0700); // failures with this mkdir would be caught later too - if (section != NULL) + if (suffix[0] != '\0') /* section, source queries */ xalloc_str (target_cache_path, "%s/%s-%s", target_cache_dir, type, suffix); else - xalloc_str (target_cache_path, "%s/%s%s", target_cache_dir, type, suffix); + xalloc_str (target_cache_path, "%s/%s", target_cache_dir, type); xalloc_str (target_cache_tmppath, "%s.XXXXXX", target_cache_path); /* XXX combine these */ |