feature: Make ingest log entries idempotent with stable run_id

This commit is contained in:
Matteo Cherubini 2026-06-27 17:17:36 +02:00
parent 13d08866ef
commit 3c9b24c3b2
3 changed files with 27 additions and 2 deletions

View file

@ -21,6 +21,7 @@ while [[ $# -gt 0 ]]; do
--context) context="$2"; shift 2 ;;
--output) output="$2"; shift 2 ;;
--reasoning) reasoning="$2"; shift 2 ;;
--run-id) run_id_arg="$2"; shift 2 ;;
*) echo "log-append: unknown arg: $1" >&2; exit 1 ;;
esac
done
@ -35,9 +36,15 @@ esac
[[ -f "$LOG_FILE" ]] || { echo "log-append: not found: $LOG_FILE" >&2; exit 1; }
run_id="$(uuidgen 2>/dev/null || cat /proc/sys/kernel/random/uuid 2>/dev/null || python3 -c 'import uuid; print(uuid.uuid4())')"
run_id="${run_id_arg:-$(uuidgen 2>/dev/null || cat /proc/sys/kernel/random/uuid 2>/dev/null || python3 -c 'import uuid; print(uuid.uuid4())')}"
today="$(date +%Y-%m-%d)"
if grep -qF "run_id: \`${run_id}\`" "$LOG_FILE" 2>/dev/null; then
echo "log-append: run_id ${run_id} already present — skipping (idempotent)" >&2
echo "run_id=${run_id}"
exit 0
fi
{
printf '\n## [%s] %s | %s\n\n' "$today" "$type" "$subject"
printf -- '- run_id: `%s`\n' "$run_id"

View file

@ -107,8 +107,12 @@ done < <(jq -r '.pages[] | select(.status=="created")
| [.path, (.summary // ""), (.maturity // "draft")] | @tsv' "$manifest")
# --- 2. log entry ---
# Stable run_id: deterministic from the input (raw path + content hash). Survives wrapper
# re-runs and makes the append-only log idempotent (paired with the guard in log-append.sh).
src_sha="$(sha256sum "$raw_source" 2>/dev/null | cut -d' ' -f1)" || src_sha="unknown"
run_id="$(printf '%s' "${raw_source}:${src_sha}" | sha256sum | cut -c1-16)"
out="$(jq -r '[.pages[].path | "[[" + (sub("^wiki/";"") | sub("\\.md$";"")) + "]]"] | join(", ")' "$manifest")"
bash "${SCRIPTS}/log-append.sh" --type INGEST --subject "$slug" --model "$model" \
bash "${SCRIPTS}/log-append.sh" --run-id "$run_id" --type INGEST --subject "$slug" --model "$model" \
--context "[[${raw_source}]]" --output "${out:-*(none)*}" --reasoning "$reasoning" \
|| fail "log" "log-append failed"

View file

@ -86,3 +86,17 @@ EOF
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/foo]] — s. `maturity: draft`'
grep -q "^last_updated: $(date +%F)$" wiki/index.md
}
@test "log-append: dedup on stable run_id prevents duplicate entries" {
G="$(make_fixture_genome)"; cd "$G"
stable_id="test-stable-run-id-001"
run bash "$SKILL_SCRIPTS/log-append.sh" --run-id "$stable_id" --type INGEST --subject "test" --model "m" \
--context "[[raw/x]]" --output "[[sources/x]]" --reasoning "r"
[ "$status" -eq 0 ]
run bash "$SKILL_SCRIPTS/log-append.sh" --run-id "$stable_id" --type INGEST --subject "test" --model "m" \
--context "[[raw/x]]" --output "[[sources/x]]" --reasoning "r"
[ "$status" -eq 0 ]
[[ "$output" == *"already present"* ]]
count="$(grep -cF "run_id: \`${stable_id}\`" wiki/log.md || true)"
[ "$count" -eq 1 ]
}