diff --git a/skills/ingest/scripts/run-ingest.sh b/skills/ingest/scripts/run-ingest.sh index 5a9b738..9ccd995 100644 --- a/skills/ingest/scripts/run-ingest.sh +++ b/skills/ingest/scripts/run-ingest.sh @@ -1,13 +1,17 @@ #!/usr/bin/env bash # ============================================================================= # skills/ingest/scripts/run-ingest.sh -# Post-pi orchestrator. Runs OUTSIDE pi's loop, on vm101, in the genome checkout. -# Consumes .ingest-manifest.json (written by the ingest skill) and performs every -# deterministic step — index, log, scoped lint, PR — so pi's context stays clean. +# Post-semantic orchestrator. Runs OUTSIDE the model, on vm101, in the genome +# checkout. Consumes .ingest-manifest.json (written by ingest-semantic.py) and +# performs every deterministic step — index, log, scoped lint, PR. # # run-ingest.sh [manifest_path] # # Emits a single JSON result line on stdout for n8n to parse. +# +# every page listed in the manifest must exist on disk before we trust the run. +# Everything else is unchanged: the manifest the semantic phase now produces is +# already in this script's expected schema. # ============================================================================= set -euo pipefail @@ -57,6 +61,13 @@ mapfile -t modified_paths < <(jq -r '.pages[] | select(.status=="modified") | .p all_paths=( "${created_paths[@]}" "${modified_paths[@]}" ) [[ ${#all_paths[@]} -gt 0 ]] || fail "manifest" "no pages reported" +# --- the semantic phase (ingest-semantic.py) writes the files; verify +# every manifest page actually exists on disk before trusting the run. Catches any +# drift between what the manifest claims and what was really written. --- +for _p in "${all_paths[@]}"; do + [[ -f "$_p" ]] || fail "pages" "manifest lists a file not present on disk: ${_p}" +done + conflict_label="" # NOTE: No rollback. The steps below modify the working tree in order (index → log → commit).