From 3c9b24c3b228f14bf72425867e4b229faadbdb39 Mon Sep 17 00:00:00 2001 From: Matteo Cherubini Date: Sat, 27 Jun 2026 17:17:36 +0200 Subject: [PATCH] feature: Make ingest log entries idempotent with stable run_id --- skills/ingest/scripts/log-append.sh | 9 ++++++++- skills/ingest/scripts/run-ingest.sh | 6 +++++- tests/scripts.bats | 14 ++++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/skills/ingest/scripts/log-append.sh b/skills/ingest/scripts/log-append.sh index b3108a2..8c6e40a 100755 --- a/skills/ingest/scripts/log-append.sh +++ b/skills/ingest/scripts/log-append.sh @@ -21,6 +21,7 @@ while [[ $# -gt 0 ]]; do --context) context="$2"; shift 2 ;; --output) output="$2"; shift 2 ;; --reasoning) reasoning="$2"; shift 2 ;; + --run-id) run_id_arg="$2"; shift 2 ;; *) echo "log-append: unknown arg: $1" >&2; exit 1 ;; esac done @@ -35,9 +36,15 @@ esac [[ -f "$LOG_FILE" ]] || { echo "log-append: not found: $LOG_FILE" >&2; exit 1; } -run_id="$(uuidgen 2>/dev/null || cat /proc/sys/kernel/random/uuid 2>/dev/null || python3 -c 'import uuid; print(uuid.uuid4())')" +run_id="${run_id_arg:-$(uuidgen 2>/dev/null || cat /proc/sys/kernel/random/uuid 2>/dev/null || python3 -c 'import uuid; print(uuid.uuid4())')}" today="$(date +%Y-%m-%d)" +if grep -qF "run_id: \`${run_id}\`" "$LOG_FILE" 2>/dev/null; then + echo "log-append: run_id ${run_id} already present — skipping (idempotent)" >&2 + echo "run_id=${run_id}" + exit 0 +fi + { printf '\n## [%s] %s | %s\n\n' "$today" "$type" "$subject" printf -- '- run_id: `%s`\n' "$run_id" diff --git a/skills/ingest/scripts/run-ingest.sh b/skills/ingest/scripts/run-ingest.sh index 07197b2..1b90882 100755 --- a/skills/ingest/scripts/run-ingest.sh +++ b/skills/ingest/scripts/run-ingest.sh @@ -107,8 +107,12 @@ done < <(jq -r '.pages[] | select(.status=="created") | [.path, (.summary // ""), (.maturity // "draft")] | @tsv' "$manifest") # --- 2. log entry --- +# Stable run_id: deterministic from the input (raw path + content hash). Survives wrapper +# re-runs and makes the append-only log idempotent (paired with the guard in log-append.sh). +src_sha="$(sha256sum "$raw_source" 2>/dev/null | cut -d' ' -f1)" || src_sha="unknown" +run_id="$(printf '%s' "${raw_source}:${src_sha}" | sha256sum | cut -c1-16)" out="$(jq -r '[.pages[].path | "[[" + (sub("^wiki/";"") | sub("\\.md$";"")) + "]]"] | join(", ")' "$manifest")" -bash "${SCRIPTS}/log-append.sh" --type INGEST --subject "$slug" --model "$model" \ +bash "${SCRIPTS}/log-append.sh" --run-id "$run_id" --type INGEST --subject "$slug" --model "$model" \ --context "[[${raw_source}]]" --output "${out:-*(none)*}" --reasoning "$reasoning" \ || fail "log" "log-append failed" diff --git a/tests/scripts.bats b/tests/scripts.bats index 19f758e..cef17b7 100644 --- a/tests/scripts.bats +++ b/tests/scripts.bats @@ -86,3 +86,17 @@ EOF python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/foo]] — s. `maturity: draft`' grep -q "^last_updated: $(date +%F)$" wiki/index.md } + +@test "log-append: dedup on stable run_id prevents duplicate entries" { + G="$(make_fixture_genome)"; cd "$G" + stable_id="test-stable-run-id-001" + run bash "$SKILL_SCRIPTS/log-append.sh" --run-id "$stable_id" --type INGEST --subject "test" --model "m" \ + --context "[[raw/x]]" --output "[[sources/x]]" --reasoning "r" + [ "$status" -eq 0 ] + run bash "$SKILL_SCRIPTS/log-append.sh" --run-id "$stable_id" --type INGEST --subject "test" --model "m" \ + --context "[[raw/x]]" --output "[[sources/x]]" --reasoning "r" + [ "$status" -eq 0 ] + [[ "$output" == *"already present"* ]] + count="$(grep -cF "run_id: \`${stable_id}\`" wiki/log.md || true)" + [ "$count" -eq 1 ] +}