From a1d14bd2e1eec52853896a5ca477be0134c14b55 Mon Sep 17 00:00:00 2001 From: Matteo Cherubini Date: Tue, 9 Jun 2026 19:43:47 +0200 Subject: [PATCH] docs: Add developer notes on code smells and design tradeoffs --- lib/structure.sh | 9 +++++++++ scripts/add-genome.sh | 9 +++++++++ skills/ingest/scripts/run-ingest.sh | 12 ++++++++---- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/lib/structure.sh b/lib/structure.sh index f94bba1..e1d983a 100644 --- a/lib/structure.sh +++ b/lib/structure.sh @@ -9,6 +9,14 @@ # structure check can never drift apart. # ============================================================================= +# NOTE — Return-code smell +# Several functions in this file (and in lint.sh) use the return code as a +# numeric counter (e.g. return $missing). This is a known smell: exit codes +# wrap at 256 and conflate "count of problems" with "exit status". At the +# current scale (<10 problems per run) the wrap-around risk is zero, so we +# accept it pragmatically. If counts ever grow, switch to stdout counters +# or dedicated global variables. + # Canonical directories every genome must have. # raw/* are input buckets (collaborator-writable); wiki/* is the agent-owned, # contract-bound layout the lint, the index sections and the ingest skill depend on. @@ -43,6 +51,7 @@ structure_report() { info "extra (not in canon): ${d}" done < <(find "${base}/raw" "${base}/wiki" -mindepth 1 -type d 2>/dev/null) + # NOTE: return $missing is a smell — see header. Kept for compatibility. return $missing } diff --git a/scripts/add-genome.sh b/scripts/add-genome.sh index 53cab5d..dc4dd6c 100644 --- a/scripts/add-genome.sh +++ b/scripts/add-genome.sh @@ -24,6 +24,15 @@ step "Adding New Genome: ${GENOME_NAME}" # Build a 3-field registry entry (linked_repo may be empty) GENOMES=("${GENOME_NAME}|${GENOME_DESC}|${GENOME_LINKED}") +# NOTE — Maintenance smell +# We source setup-genomes.sh as a library/orchestrator hybrid. This works because: +# - registry.sh is guarded against double-source (idempotent guard) +# - setup-genomes.sh checks WORK_DIR before re-sourcing registry.sh +# - GENOMES is built locally just before the source, so it is not clobbered +# However, sourcing an orchestration script as a library makes the control flow +# harder to trace. If this grows, refactor into a shared function (e.g. setup_one_genome) +# called by both add-genome.sh and setup-genomes.sh. + source "scripts/setup-genomes.sh" success "Genome '${GENOME_NAME}' added and linked successfully!" diff --git a/skills/ingest/scripts/run-ingest.sh b/skills/ingest/scripts/run-ingest.sh index bc68bcf..2ffe95d 100644 --- a/skills/ingest/scripts/run-ingest.sh +++ b/skills/ingest/scripts/run-ingest.sh @@ -59,10 +59,14 @@ all_paths=( "${created_paths[@]}" "${modified_paths[@]}" ) conflict_label="" -# NOTE: no rollback. Steps below mutate the working tree in order (index → log → commit). -# All are idempotent on re-run EXCEPT log-append (append-only). If a step fails midway, -# nothing is committed (open-pr is the only committer) — the operator re-runs, or inspects -# wiki/ if log-append already wrote a line. The manifest is removed only on full success. +# NOTE: No rollback. The steps below modify the working tree in order (index → log → commit). +# All steps are idempotent on re-run EXCEPT log-append (append-only). If a step fails midway, +# nothing is committed (open-pr is the only committer) — the operator re-runs, or checks +# wiki/ if log-append has already written a line. The manifest is removed only upon full success. +# log-append is not idempotent: a re-run after a post-log failure produces +# duplicate lines. This is accepted by design (append-only ledger, no rollback). If this +# becomes a nuisance tomorrow, add a dedup check on run_id in log-append.sh +# (grep for run_id before appending). Manual recovery: grep for run_id in wiki/log.md. # --- 1. index entries (created pages only), inserted in order --- while IFS=$'\t' read -r path summary maturity; do