refactor(ingest): Update run-ingest.sh for new semantic pipeline

This commit is contained in:
Matteo Cherubini 2026-06-18 15:26:53 +02:00
parent fdd7e1e92b
commit e396bc93e2

View file

@ -1,13 +1,17 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# ============================================================================= # =============================================================================
# skills/ingest/scripts/run-ingest.sh # skills/ingest/scripts/run-ingest.sh
# Post-pi orchestrator. Runs OUTSIDE pi's loop, on vm101, in the genome checkout. # Post-semantic orchestrator. Runs OUTSIDE the model, on vm101, in the genome
# Consumes .ingest-manifest.json (written by the ingest skill) and performs every # checkout. Consumes .ingest-manifest.json (written by ingest-semantic.py) and
# deterministic step — index, log, scoped lint, PR — so pi's context stays clean. # performs every deterministic step — index, log, scoped lint, PR.
# #
# run-ingest.sh <genome_name> [manifest_path] # run-ingest.sh <genome_name> [manifest_path]
# #
# Emits a single JSON result line on stdout for n8n to parse. # Emits a single JSON result line on stdout for n8n to parse.
#
# every page listed in the manifest must exist on disk before we trust the run.
# Everything else is unchanged: the manifest the semantic phase now produces is
# already in this script's expected schema.
# ============================================================================= # =============================================================================
set -euo pipefail set -euo pipefail
@ -57,6 +61,13 @@ mapfile -t modified_paths < <(jq -r '.pages[] | select(.status=="modified") | .p
all_paths=( "${created_paths[@]}" "${modified_paths[@]}" ) all_paths=( "${created_paths[@]}" "${modified_paths[@]}" )
[[ ${#all_paths[@]} -gt 0 ]] || fail "manifest" "no pages reported" [[ ${#all_paths[@]} -gt 0 ]] || fail "manifest" "no pages reported"
# --- the semantic phase (ingest-semantic.py) writes the files; verify
# every manifest page actually exists on disk before trusting the run. Catches any
# drift between what the manifest claims and what was really written. ---
for _p in "${all_paths[@]}"; do
[[ -f "$_p" ]] || fail "pages" "manifest lists a file not present on disk: ${_p}"
done
conflict_label="" conflict_label=""
# NOTE: No rollback. The steps below modify the working tree in order (index → log → commit). # NOTE: No rollback. The steps below modify the working tree in order (index → log → commit).