Skip to content

Commit a7c4d03

Browse files
committed
workbench: log and add cpu model and kernel version as reproducibility constraints
1 parent 65e7241 commit a7c4d03

File tree

4 files changed

+86
-27
lines changed

4 files changed

+86
-27
lines changed

nix/workbench/backend/nomad-job.nix

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,10 @@ let
7676
supervisor = containerSpecs.containerPkgs.supervisor.nix-store-path;
7777
in escapeTemplate
7878
''
79-
# Store the entrypoint's envars in a file for debugging purposes.
80-
${coreutils}/bin/env > /local/entrypoint.env
79+
# Store entrypoint's envars and "uname" in a file for debugging purposes.
80+
${coreutils}/bin/env > /local/entrypoint.env
81+
${coreutils}/bin/uname -a > /local/entrypoint.uname
82+
${coreutils}/bin/cat /proc/cpuinfo > /local/entrypoint.cpuinfo
8183
8284
# Only needed for "exec" ?
8385
if test "''${TASK_DRIVER}" = "exec"

nix/workbench/backend/nomad.sh

Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2400,6 +2400,14 @@ backend_nomad() {
24002400
backend_nomad task-file-contents "${dir}" "${node}" \
24012401
/local/entrypoint.env \
24022402
> "${dir}"/nomad/"${node}"/entrypoint.env
2403+
# Dynamically generated file with system info!
2404+
backend_nomad task-file-contents "${dir}" "${node}" \
2405+
/local/entrypoint.uname \
2406+
> "${dir}"/nomad/"${node}"/entrypoint.uname
2407+
# Dynamically generated file with cpu info!
2408+
backend_nomad task-file-contents "${dir}" "${node}" \
2409+
/local/entrypoint.cpuinfo \
2410+
> "${dir}"/nomad/"${node}"/entrypoint.cpuinfo
24032411
# Dynamically generated file with all the services/addresses found!
24042412
backend_nomad task-file-contents "${dir}" "${node}" \
24052413
/local/networking.json \
@@ -2448,6 +2456,14 @@ backend_nomad() {
24482456
backend_nomad task-file-contents "${dir}" "tracer" \
24492457
/local/entrypoint.env \
24502458
> "${dir}"/nomad/tracer/entrypoint.env
2459+
# Dynamically generated file with system info!
2460+
backend_nomad task-file-contents "${dir}" "tracer" \
2461+
/local/entrypoint.uname \
2462+
> "${dir}"/nomad/tracer/entrypoint.uname
2463+
# Dynamically generated file with cpu info!
2464+
backend_nomad task-file-contents "${dir}" "tracer" \
2465+
/local/entrypoint.cpuinfo \
2466+
> "${dir}"/nomad/tracer/entrypoint.cpuinfo
24512467
# Dynamically generated file with all the services/addresses found!
24522468
backend_nomad task-file-contents "${dir}" "tracer" \
24532469
/local/networking.json \
@@ -2667,6 +2683,7 @@ backend_nomad() {
26672683
"$@"
26682684
;;
26692685

2686+
# Generic function, tries all known runtime log files names.
26702687
task-exec-program-run-files-tar-zstd )
26712688
local usage="USAGE: wb backend pass $op RUN-DIR TASK-NAME"
26722689
local dir=${1:?$usage}; shift
@@ -2684,27 +2701,28 @@ backend_nomad() {
26842701
# tar (child): xz: Cannot exec: No such file or directory
26852702
# tar (child): Error is not recoverable: exiting now
26862703
# Code example of the files needed: https://github.com/input-output-hk/cardano-ops/blob/bench-master/bench/bench.sh#L646-L670
2687-
backend_nomad task-exec "${dir}" "${task}" \
2688-
"${bash_path}" -c \
2689-
" \
2690-
\"${find_path}\" \"${prog_dir}\" \
2691-
-mindepth 1 -maxdepth 1 -type f \
2692-
\( \
2693-
-name "exit_code" \
2694-
-o -name "stdout" \
2695-
-o -name "stderr" \
2696-
-o -name "*.prof" \
2697-
-o -name "*.eventlog" \
2698-
-o -name "*.gcstats" \
2699-
-o -name "*.log" \
2700-
-o -name "start.sh.debug" \
2701-
\) \
2702-
-printf \"%P\\n\" \
2703-
| \
2704-
\"${tar_path}\" --create \
2705-
--directory=\"${prog_dir}\" --files-from=- \
2706-
| \
2707-
\"${cat_path}\" \
2704+
backend_nomad task-exec "${dir}" "${task}" \
2705+
"${bash_path}" -c \
2706+
" \
2707+
\"${find_path}\" \"${prog_dir}\" \
2708+
-mindepth 1 -maxdepth 1 -type f \
2709+
\( \
2710+
-name "exit_code" \
2711+
-o -name "stdout" \
2712+
-o -name "stderr" \
2713+
-o -name "*.prof" \
2714+
-o -name "*.eventlog" \
2715+
-o -name "*.gcstats" \
2716+
-o -name "*.log" \
2717+
-o -name "protocol-parameters-queried.json" \
2718+
-o -name "start.sh.debug" \
2719+
\) \
2720+
-printf \"%P\\n\" \
2721+
| \
2722+
\"${tar_path}\" --create \
2723+
--directory=\"${prog_dir}\" --files-from=- \
2724+
| \
2725+
\"${cat_path}\" \
27082726
"
27092727
;;
27102728

nix/workbench/backend/nomad/cloud.sh

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -545,8 +545,9 @@ allocate-run-nomadcloud() {
545545
# ".datacenter", ".attributes.platform.aws["instance-type"]",
546546
# ".attributes.platform.aws.placement["availability-zone"]",
547547
# ".attributes.unique.platform.aws["instance-id"]",
548-
# ".attributes.unique.platform.aws.["public-ipv4"]" and
549-
# ".attributes.unique.platform.aws.mac".
548+
# ".attributes.unique.platform.aws.["public-ipv4"]"
549+
# ".attributes.unique.platform.aws.mac", ".attributes.cpu.modelname" and
550+
# ".attributes.kernel.version".
550551
if test -z "${NOMAD_CLIENTS_FILE:-}" || ! test -f "${NOMAD_CLIENTS_FILE}"
551552
then
552553
fatal "No \"\$NOMAD_CLIENTS_FILE\". For reproducible builds provide this file that ensures cluster nodes are always placed on the same machines, or create a new one with 'wb nomad nodes' if Nomad Clients have suffered changes and runs fail with \"placement errors\""
@@ -614,7 +615,7 @@ allocate-run-nomadcloud() {
614615
" \
615616
"${NOMAD_CLIENTS_FILE}" \
616617
)
617-
local instance_id availability_zone public_ipv4 mac_address
618+
local instance_id availability_zone public_ipv4 mac_address cpu_model kernel_version
618619
instance_id="$( \
619620
echo "${actual_client}" \
620621
| \
@@ -639,6 +640,18 @@ allocate-run-nomadcloud() {
639640
jq -r \
640641
'.attributes.unique.platform.aws.mac' \
641642
)"
643+
cpu_model="$( \
644+
echo "${actual_client}" \
645+
| \
646+
jq -r \
647+
'.attributes.cpu.modelname' \
648+
)"
649+
kernel_version="$( \
650+
echo "${actual_client}" \
651+
| \
652+
jq -r \
653+
'.attributes.kernel.version' \
654+
)"
642655
# Pin the actual node to an specific Nomad Client / AWS instance
643656
# by appending below constraints to the already there group
644657
# constraints.
@@ -672,6 +685,16 @@ allocate-run-nomadcloud() {
672685
\"attribute\": \"\${attr.unique.platform.aws.mac}\" \
673686
, \"value\": \"${mac_address}\" \
674687
} \
688+
,
689+
{ \
690+
\"attribute\": \"\${attr.cpu.modelname}\" \
691+
, \"value\": \"${cpu_model}\" \
692+
} \
693+
,
694+
{ \
695+
\"attribute\": \"\${attr.kernel.version}\" \
696+
, \"value\": \"${kernel_version}\" \
697+
} \
675698
] \
676699
"
677700
jq \

nix/workbench/nomad.sh

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,23 @@ EOL
446446
.attributes \
447447
|= \
448448
{ \
449-
\"os\": { \
449+
\"cpu\": { \
450+
\"arch\": \$attrs[\"cpu.arch\"] \
451+
, \"frequency\": \$attrs[\"cpu.frequency\"] \
452+
, \"modelname\": \$attrs[\"cpu.modelname\"] \
453+
, \"numcores\": \$attrs[\"cpu.numcores\"] \
454+
, \"reservablecores\": \$attrs[\"cpu.reservablecores\"] \
455+
, \"totalcompute\": \$attrs[\"cpu.totalcompute\"] \
456+
} \
457+
, \"kernel\": { \
458+
\"arch\": \$attrs[\"kernel.arch\"] \
459+
, \"name\": \$attrs[\"kernel.name\"] \
460+
, \"version\": \$attrs[\"kernel.version\"] \
461+
} \
462+
, \"memory\": { \
463+
\"totalbytes\": \$attrs[\"memory.totalbytes\"] \
464+
} \
465+
, \"os\": { \
450466
\"name\": \$attrs[\"os.name\"] \
451467
, \"version\": \$attrs[\"os.version\"] \
452468
} \

0 commit comments

Comments
 (0)