refactor(ingest): Update run-ingest.sh for new semantic pipeline

This commit is contained in:
Matteo Cherubini 2026-06-18 15:26:53 +02:00
parent fdd7e1e92b
commit e396bc93e2

View file

@ -1,13 +1,17 @@
#!/usr/bin/env bash
# =============================================================================
# skills/ingest/scripts/run-ingest.sh
# Post-pi orchestrator. Runs OUTSIDE pi's loop, on vm101, in the genome checkout.
# Consumes .ingest-manifest.json (written by the ingest skill) and performs every
# deterministic step — index, log, scoped lint, PR — so pi's context stays clean.
# Post-semantic orchestrator. Runs OUTSIDE the model, on vm101, in the genome
# checkout. Consumes .ingest-manifest.json (written by ingest-semantic.py) and
# performs every deterministic step — index, log, scoped lint, PR.
#
# run-ingest.sh <genome_name> [manifest_path]
#
# Emits a single JSON result line on stdout for n8n to parse.
#
# every page listed in the manifest must exist on disk before we trust the run.
# Everything else is unchanged: the manifest the semantic phase now produces is
# already in this script's expected schema.
# =============================================================================
set -euo pipefail
@ -57,6 +61,13 @@ mapfile -t modified_paths < <(jq -r '.pages[] | select(.status=="modified") | .p
all_paths=( "${created_paths[@]}" "${modified_paths[@]}" )
[[ ${#all_paths[@]} -gt 0 ]] || fail "manifest" "no pages reported"
# --- the semantic phase (ingest-semantic.py) writes the files; verify
# every manifest page actually exists on disk before trusting the run. Catches any
# drift between what the manifest claims and what was really written. ---
for _p in "${all_paths[@]}"; do
[[ -f "$_p" ]] || fail "pages" "manifest lists a file not present on disk: ${_p}"
done
conflict_label=""
# NOTE: No rollback. The steps below modify the working tree in order (index → log → commit).