diff options
author | Frank Ch. Eigler <[email protected]> | 2024-10-10 16:30:19 -0400 |
---|---|---|
committer | Frank Ch. Eigler <[email protected]> | 2024-10-16 09:49:35 -0400 |
commit | 6814e0edc112583428114bc20f8d78f864c57128 (patch) | |
tree | 5201ec0283a1994c445157c6fc3091c588df381e | |
parent | b68f34725229b08380a1612899b0537f8f597dad (diff) |
PR32218: debuginfod-client: support very long source file names
debuginfod clients & servers may sometimes encounter very long source
file names. Previously, the client would synthesize a path name like
$CACHEDIR/$BUILDID/source-$PATHNAME
where $PATHNAME was a funky ##-encoded version of the entire source
path name. See https://sourceware.org/PR32218 for a horror case.
This can get too long to store as a single component of a file system
pathname (e.g. linux/limits.h NAME_MAX), resulting on client-side
errors even after a successful download.
New code switches encoding of the $PATHNAME part to use less escaping,
and a merciless truncation to the tail part of the filename. (We keep
the tail rather than the head, so that the extension is preserved,
which makes some consumers happier.) To limit collision damage from
truncation, we add also insert a goofy hash (4-byte DJBX33A) of the
name into the path name. The result is a relatively short name:
$CACHEDIR/$BUILDID/source-$HASH-$NAMETAIL
This is a transparent change to clients, who are not to make any
assumptions about cache file naming structure. However, one existing
test did make such assumptions, so is fixed with some globness. A new
test is also added, using a pre-baked tarball with a very long srcfile
name.
Signed-off-by: Frank Ch. Eigler <[email protected]>
-rw-r--r-- | debuginfod/debuginfod-client.c | 108 | ||||
-rw-r--r-- | tests/Makefile.am | 7 | ||||
-rw-r--r-- | tests/debuginfod-tars/bighello-sources/bighello.c | 7 | ||||
-rw-r--r-- | tests/debuginfod-tars/bighello-sources/bighello.h | 1 | ||||
l--------- | tests/debuginfod-tars/bighello-sources/moremoremoremoremoremoremoremore | 1 | ||||
-rw-r--r-- | tests/debuginfod-tars/bighello.tar | bin | 0 -> 51200 bytes | |||
-rwxr-xr-x | tests/run-debuginfod-longsource.sh | 69 | ||||
-rwxr-xr-x | tests/run-debuginfod-section.sh | 16 |
8 files changed, 162 insertions, 47 deletions
diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c index 24ede19a..deff19ff 100644 --- a/debuginfod/debuginfod-client.c +++ b/debuginfod/debuginfod-client.c @@ -1201,29 +1201,69 @@ perform_queries(CURLM *curlm, CURL **target_handle, struct handle_data *data, de /* Copy SRC to DEST, s,/,#,g */ static void -path_escape (const char *src, char *dest) +path_escape (const char *src, char *dest, size_t dest_len) { - unsigned q = 0; + /* PR32218: Reversibly-escaping src character-by-character, for + large enough strings, risks ENAMETOOLONG errors. For long names, + a simple hash based generated name instead, while still + attempting to preserve the as much of the content as possible. + It's possible that absurd choices of incoming names will collide + or still get truncated, but c'est la vie. + */ + + /* Compute a three-way min() for the actual output string generated. */ + assert (dest_len > 10); /* Space enough for degenerate case of + "HASHHASH-\0". NB: dest_len is not + user-controlled. */ + /* Use only NAME_MAX/2 characters in the output file name. + ENAMETOOLONG has been observed even on 300-ish character names on + some filesystems. */ + const size_t max_dest_len = NAME_MAX/2; + dest_len = dest_len > max_dest_len ? max_dest_len : dest_len; + /* Use only strlen(src)+10 bytes, if that's smaller. Yes, we could + just fit an entire escaped name in there in theory, without the + hash+etc. But then again the hashing protects against #-escape + aliasing collisions: "foo[bar" "foo]bar" both escape to + "foo#bar", thus aliasing, but have different "HASH-foo#bar". + */ + const size_t hash_prefix_destlen = strlen(src)+10; /* DEADBEEF-src\0 */ + dest_len = dest_len > hash_prefix_destlen ? hash_prefix_destlen : dest_len; - for (unsigned fi=0; q < PATH_MAX-2; fi++) /* -2, escape is 2 chars. */ - switch (src[fi]) - { - case '\0': - dest[q] = '\0'; - return; - case '/': /* escape / to prevent dir escape */ - dest[q++]='#'; - dest[q++]='#'; - break; - case '#': /* escape # to prevent /# vs #/ collisions */ - dest[q++]='#'; - dest[q++]='_'; - break; - default: - dest[q++]=src[fi]; - } + char *dest_write = dest + dest_len - 1; + (*dest_write--) = '\0'; /* Ensure a \0 there. */ - dest[q] = '\0'; + /* Copy from back toward front, preferring to keep the .extension. */ + for (int fi=strlen(src)-1; fi >= 0 && dest_write >= dest; fi--) + { + char src_char = src[fi]; + switch (src_char) + { + /* Pass through ordinary identifier chars. */ + case '.': case '-': case '_': + case 'a'...'z': + case 'A'...'Z': + case '0'...'9': + *dest_write-- = src_char; + break; + + /* Replace everything else, esp. security-sensitive /. */ + default: + *dest_write-- = '#'; + break; + } + } + + /* djb2 hash algorithm: DJBX33A */ + unsigned long hash = 5381; + const char *c = src; + while (*c) + hash = ((hash << 5) + hash) + *c++; + char name_hash_str [9]; + /* Truncate to 4 bytes; along with the remaining hundredish bytes of text, + should be ample against accidental collisions. */ + snprintf (name_hash_str, sizeof(name_hash_str), "%08x", (unsigned int) hash); + memcpy (&dest[0], name_hash_str, 8); /* Overwrite the first few characters */ + dest[8] = '-'; /* Add a bit of punctuation to make hash stand out */ } /* Attempt to update the atime */ @@ -1267,7 +1307,6 @@ extract_section (int fd, const char *section, char *fd_path, char **usr_path) } int sec_fd = -1; - char *escaped_name = NULL; char *sec_path_tmp = NULL; Elf_Scn *scn = NULL; @@ -1332,16 +1371,10 @@ extract_section (int fd, const char *section, char *fd_path, char **usr_path) --i; } - escaped_name = malloc (strlen (section) * 2 + 1); - if (escaped_name == NULL) - { - rc = -ENOMEM; - goto out; - } - path_escape (section, escaped_name); - + char suffix[NAME_MAX]; + path_escape (section, suffix, sizeof(suffix)); rc = asprintf (&sec_path_tmp, "%s/section-%s.XXXXXX", - fd_path, escaped_name); + fd_path, suffix); if (rc == -1) { rc = -ENOMEM; @@ -1396,8 +1429,6 @@ out2: free (sec_path_tmp); out1: - free (escaped_name); - out: elf_end (elf); return rc; @@ -1747,7 +1778,7 @@ debuginfod_query_server_by_buildid (debuginfod_client *c, char *target_cache_dir = NULL; char *target_cache_path = NULL; char *target_cache_tmppath = NULL; - char suffix[PATH_MAX + 1]; /* +1 for zero terminator. */ + char suffix[NAME_MAX]; char build_id_bytes[MAX_BUILD_ID_BYTES * 2 + 1]; int vfd = c->verbose_fd; int rc, r; @@ -1861,13 +1892,13 @@ debuginfod_query_server_by_buildid (debuginfod_client *c, goto out; } - path_escape (filename, suffix); + path_escape (filename, suffix, sizeof(suffix)); /* If the DWARF filenames are super long, this could exceed PATH_MAX and truncate/collide. Oh well, that'll teach them! */ } else if (section != NULL) - path_escape (section, suffix); + path_escape (section, suffix, sizeof(suffix)); else suffix[0] = '\0'; @@ -1879,7 +1910,8 @@ debuginfod_query_server_by_buildid (debuginfod_client *c, cache_path: $HOME/.cache target_cache_dir: $HOME/.cache/0123abcd target_cache_path: $HOME/.cache/0123abcd/debuginfo - target_cache_path: $HOME/.cache/0123abcd/source#PATH#TO#SOURCE ? + target_cache_path: $HOME/.cache/0123abcd/executable-.debug_info + target_cache_path: $HOME/.cache/0123abcd/source-HASH-#PATH#TO#SOURCE */ cache_path = make_cache_path(); @@ -1891,10 +1923,10 @@ debuginfod_query_server_by_buildid (debuginfod_client *c, xalloc_str (target_cache_dir, "%s/%s", cache_path, build_id_bytes); (void) mkdir (target_cache_dir, 0700); // failures with this mkdir would be caught later too - if (section != NULL) + if (suffix[0] != '\0') /* section, source queries */ xalloc_str (target_cache_path, "%s/%s-%s", target_cache_dir, type, suffix); else - xalloc_str (target_cache_path, "%s/%s%s", target_cache_dir, type, suffix); + xalloc_str (target_cache_path, "%s/%s", target_cache_dir, type); xalloc_str (target_cache_tmppath, "%s.XXXXXX", target_cache_path); /* XXX combine these */ diff --git a/tests/Makefile.am b/tests/Makefile.am index 424c184b..ffccb0cd 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -273,7 +273,8 @@ TESTS += run-srcfiles-self.sh \ run-debuginfod-section.sh \ run-debuginfod-IXr.sh \ run-debuginfod-client-profile.sh \ - run-debuginfod-find-metadata.sh + run-debuginfod-find-metadata.sh \ + run-debuginfod-longsource.sh endif if !OLD_LIBMICROHTTPD # Will crash on too old libmicrohttpd @@ -607,6 +608,7 @@ EXTRA_DIST = run-arextract.sh run-arsymtest.sh run-ar.sh \ run-debuginfod-IXr.sh \ run-debuginfod-ima-verification.sh \ run-debuginfod-find-metadata.sh \ + run-debuginfod-longsource.sh \ debuginfod-rpms/fedora30/hello2-1.0-2.src.rpm \ debuginfod-rpms/fedora30/hello2-1.0-2.x86_64.rpm \ debuginfod-rpms/fedora30/hello2-debuginfo-1.0-2.x86_64.rpm \ @@ -654,6 +656,9 @@ EXTRA_DIST = run-arextract.sh run-arsymtest.sh run-ar.sh \ debuginfod-tars/pacman-sources/PKGBUILD \ debuginfod-tars/pacman-sources/README.md \ debuginfod-tars/pacman-sources/hello.c \ + debuginfod-tars/bighello.tar \ + debuginfod-tars/bighello-sources/bighello.c \ + debuginfod-tars/bighello-sources/bighello.h \ run-pt_gnu_prop-tests.sh \ testfile_pt_gnu_prop.bz2 testfile_pt_gnu_prop32.bz2 \ run-getphdrnum.sh testfile-phdrs.elf.bz2 \ diff --git a/tests/debuginfod-tars/bighello-sources/bighello.c b/tests/debuginfod-tars/bighello-sources/bighello.c new file mode 100644 index 00000000..e1145271 --- /dev/null +++ b/tests/debuginfod-tars/bighello-sources/bighello.c @@ -0,0 +1,7 @@ +#include <stdio.h> +#include "moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/bighello.h" + +int main() { + printf("%s\n", HELLO); + return 0; +} diff --git a/tests/debuginfod-tars/bighello-sources/bighello.h b/tests/debuginfod-tars/bighello-sources/bighello.h new file mode 100644 index 00000000..e3de04b8 --- /dev/null +++ b/tests/debuginfod-tars/bighello-sources/bighello.h @@ -0,0 +1 @@ +char *HELLO = "hello"; diff --git a/tests/debuginfod-tars/bighello-sources/moremoremoremoremoremoremoremore b/tests/debuginfod-tars/bighello-sources/moremoremoremoremoremoremoremore new file mode 120000 index 00000000..945c9b46 --- /dev/null +++ b/tests/debuginfod-tars/bighello-sources/moremoremoremoremoremoremoremore @@ -0,0 +1 @@ +.
\ No newline at end of file diff --git a/tests/debuginfod-tars/bighello.tar b/tests/debuginfod-tars/bighello.tar Binary files differnew file mode 100644 index 00000000..6d5d1d67 --- /dev/null +++ b/tests/debuginfod-tars/bighello.tar diff --git a/tests/run-debuginfod-longsource.sh b/tests/run-debuginfod-longsource.sh new file mode 100755 index 00000000..773af1f1 --- /dev/null +++ b/tests/run-debuginfod-longsource.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2024 Red Hat, Inc. +# This file is part of elfutils. +# +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# elfutils is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +. $srcdir/debuginfod-subr.sh + +# for test case debugging +set -x +unset VALGRIND_CMD + +DB=${PWD}/.debuginfod_tmp.sqlite +tempfiles $DB +export DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache + +# Set up directories for scanning +mkdir Z +cp -rvp ${abs_srcdir}/debuginfod-tars/bighello.tar Z + +# This variable is essential and ensures no time-race for claiming ports occurs +# set base to a unique multiple of 100 not used in any other 'run-debuginfod-*' test +base=14200 +get_ports + +# We use -t0 and -g0 here to turn off time-based scanning & grooming. +# For testing purposes, we just sic SIGUSR1 at the process. + +env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE \ + -Ztar -p $PORT1 -d $DB -t0 -g0 -v ./Z > vlog$PORT1 2>&1 & +PID1=$! +tempfiles vlog$PORT1 +errfiles vlog$PORT1 +# Server must become ready +wait_ready $PORT1 'ready' 1 +# And initial scan should be done +wait_ready $PORT1 'thread_work_total{role="traverse"}' 1 + +kill -USR1 $PID1 # run another index pass to make sure the srcdef/srcref stuff is fully located + +# Wait till both files are in the index. +wait_ready $PORT1 'thread_work_total{role="traverse"}' 2 +wait_ready $PORT1 'thread_work_pending{role="scan"}' 0 +wait_ready $PORT1 'thread_busy{role="scan"}' 0 + +export DEBUGINFOD_URLS=http://127.0.0.1:$PORT1 + +######################################################################## + +# Build-id for a.out in said tarball +BUILDID=7fc69cb0e8fb9d4b57e594271b9941b67410aaaa + +# Download short & long files +testrun ${abs_top_builddir}/debuginfod/debuginfod-find -vvv source $BUILDID /tmp/bighello-sources/bighello.c +testrun ${abs_top_builddir}/debuginfod/debuginfod-find -vvv source $BUILDID /tmp/bighello-sources/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/moremoremoremoremoremoremoremore/bighello.h + +exit 0 diff --git a/tests/run-debuginfod-section.sh b/tests/run-debuginfod-section.sh index 6ac59688..1746d7ef 100755 --- a/tests/run-debuginfod-section.sh +++ b/tests/run-debuginfod-section.sh @@ -95,11 +95,11 @@ testrun ${abs_top_builddir}/debuginfod/debuginfod-find -vvv section $RPM_BUILDID # Verify that the downloaded files match the contents of the original sections tempfiles ${BUILDID}.debug_info objcopy F/prog.debug -O binary --only-section=.debug_info --set-section-flags .debug_info=alloc $BUILDID.debug_info -cmp ${BUILDID}.debug_info ${DEBUGINFOD_CACHE_PATH}/${BUILDID}/section-.debug_info +cmp ${BUILDID}.debug_info ${DEBUGINFOD_CACHE_PATH}/${BUILDID}/section-*.debug_info tempfiles ${BUILDID}.text objcopy F/prog -O binary --only-section=.text ${BUILDID}.text -cmp ${BUILDID}.text ${DEBUGINFOD_CACHE_PATH}/${BUILDID}/section-.text +cmp ${BUILDID}.text ${DEBUGINFOD_CACHE_PATH}/${BUILDID}/section-*.text # Download the original debuginfo/executable files. DEBUGFILE=`env LD_LIBRARY_PATH=$ldpath ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo $RPM_BUILDID` @@ -110,11 +110,11 @@ testrun ${abs_top_builddir}/debuginfod/debuginfod-find -vvv executable $BUILDID if test "$(arch)" == "x86_64"; then tempfiles DEBUGFILE.debug_info objcopy $DEBUGFILE -O binary --only-section=.debug_info --set-section-flags .debug_info=alloc DEBUGFILE.debug_info - testrun diff -u DEBUGFILE.debug_info ${DEBUGINFOD_CACHE_PATH}/${RPM_BUILDID}/section-.debug_info + testrun diff -u DEBUGFILE.debug_info ${DEBUGINFOD_CACHE_PATH}/${RPM_BUILDID}/section-*.debug_info tempfiles EXECFILE.text objcopy $EXECFILE -O binary --only-section=.text EXECFILE.text - testrun diff -u EXECFILE.text ${DEBUGINFOD_CACHE_PATH}/${RPM_BUILDID}/section-.text + testrun diff -u EXECFILE.text ${DEBUGINFOD_CACHE_PATH}/${RPM_BUILDID}/section-*.text fi # Kill the server. @@ -123,10 +123,10 @@ wait $PID1 PID1=0 # Delete the section files from the cache. -rm -f ${DEBUGINFOD_CACHE_PATH}/${RPM_BUILDID}/section-.text -rm -f ${DEBUGINFOD_CACHE_PATH}/${RPM_BUILDID}/section-.debug_info -rm -f ${DEBUGINFOD_CACHE_PATH}/${BUILDID}/section-.text -rm -f ${DEBUGINFOD_CACHE_PATH}/${BUILDID}/section-.debug_info +rm -f ${DEBUGINFOD_CACHE_PATH}/${RPM_BUILDID}/section-*.text +rm -f ${DEBUGINFOD_CACHE_PATH}/${RPM_BUILDID}/section-*.debug_info +rm -f ${DEBUGINFOD_CACHE_PATH}/${BUILDID}/section-*.text +rm -f ${DEBUGINFOD_CACHE_PATH}/${BUILDID}/section-*.debug_info # Verify that the client can extract sections from the debuginfo or executable # if they're already in the cache. |