#!/usr/bin/env bash # ============================================================================= # skills/ingest/scripts/run-ingest.sh # Post-pi orchestrator. Runs OUTSIDE pi's loop, on vm101, in the genome checkout. # Consumes .ingest-manifest.json (written by the ingest skill) and performs every # deterministic step — index, log, scoped lint, PR — so pi's context stays clean. # # run-ingest.sh [manifest_path] # # Emits a single JSON result line on stdout for n8n to parse. # ============================================================================= set -euo pipefail genome="${1:?usage: run-ingest.sh [manifest]}" manifest="${2:-.ingest-manifest.json}" SCRIPTS="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" fail() { jq -nc --arg stage "$1" --arg reason "$2" \ '{status:"error", stage:$stage, reason:$reason}' exit 1 } command -v jq >/dev/null 2>&1 || { echo '{"status":"error","reason":"jq missing"}'; exit 1; } command -v python3 >/dev/null 2>&1 || fail "deps" "python3 missing (needed by index-append.py)" [[ -f "$manifest" ]] || fail "manifest" "manifest not found: ${manifest}" # --- read manifest scalars --- raw_source="$(jq -r '.raw_source' "$manifest")" # model name comes from the orchestrator/wrapper (INGEST_MODEL); the agent cannot know its # own tag, so we do not trust a self-reported manifest field. Fall back only if unset. model="${INGEST_MODEL:-$(jq -r '.model // "unknown"' "$manifest")}" reasoning="$(jq -r '.reasoning // "Ingest."' "$manifest")" pr_summary="$(jq -r '.pr_summary // "Ingest."' "$manifest")" contradictions="$(jq -r '.contradictions // "None"' "$manifest")" [[ -n "$raw_source" && "$raw_source" != "null" ]] || fail "manifest" "raw_source missing" slug="$(bash "${SCRIPTS}/slug.sh" "$raw_source")" || fail "slug" "empty or invalid slug for ${raw_source}" # --- collect touched paths --- mapfile -t created_paths < <(jq -r '.pages[] | select(.status=="created") | .path' "$manifest") mapfile -t modified_paths < <(jq -r '.pages[] | select(.status=="modified") | .path' "$manifest") all_paths=( "${created_paths[@]}" "${modified_paths[@]}" ) [[ ${#all_paths[@]} -gt 0 ]] || fail "manifest" "no pages reported" conflict_label="" # NOTE: no rollback. Steps below mutate the working tree in order (index → log → commit). # All are idempotent on re-run EXCEPT log-append (append-only). If a step fails midway, # nothing is committed (open-pr is the only committer) — the operator re-runs, or inspects # wiki/ if log-append already wrote a line. The manifest is removed only on full success. # --- 1. index entries (created pages only), inserted in order --- while IFS=$'\t' read -r path summary maturity; do [[ -z "$path" ]] && continue link="${path#wiki/}"; link="${link%.md}" # e.g. sources/foo folder="${link%%/*}" case "$folder" in sources) section="Sources" ;; entities) section="Entities" ;; concepts) section="Concepts" ;; queries) if [[ "$link" == queries/conflict-* ]]; then section="Conflicts"; conflict_label="CONFLICT" else section="Queries"; fi ;; *) section="Sources" ;; esac if [[ "$section" == "Conflicts" ]]; then entry="- [[${link}]]" # conflicts: slug only else entry="- [[${link}]] — ${summary} \`maturity: ${maturity}\`" fi python3 "${SCRIPTS}/index-append.py" --section "$section" --entry "$entry" \ || fail "index" "index-append failed for ${path}" done < <(jq -r '.pages[] | select(.status=="created") | [.path, (.summary // ""), (.maturity // "draft")] | @tsv' "$manifest") # --- 2. log entry --- out="$(jq -r '[.pages[].path | "[[" + (sub("^wiki/";"") | sub("\\.md$";"")) + "]]"] | join(", ")' "$manifest")" bash "${SCRIPTS}/log-append.sh" --type INGEST --subject "$slug" --model "$model" \ --context "[[${raw_source}]]" --output "${out:-*(none)*}" --reasoning "$reasoning" \ || fail "log" "log-append failed" # --- 3. scoped lint (capture findings for the PR; never aborts the run) --- lint_out="$( bash "${SCRIPTS}/scoped-lint.sh" "$genome" "${all_paths[@]}" 2>&1 )" && lint_rc=0 || lint_rc=$? # --- 4. assemble the PR body (manifest tables + lint results) --- body="$(mktemp)" { echo "## Summary" echo "$pr_summary" echo "" echo "## Pages" echo "| Path | Status | Maturity |" echo "|------|--------|----------|" jq -r '.pages[] | "| `\(.path)` | \(.status) | \(.maturity // "draft") |"' "$manifest" echo "" echo "## Contradictions" echo "$contradictions" echo "" echo "## Scoped Lint (post-ingest)" echo '```' echo "$lint_out" echo '```' } > "$body" # --- 5. open the PR --- pr_args=( --slug "$slug" --title "feat: ingest ${slug}" --body-file "$body" ) [[ -n "$conflict_label" ]] && pr_args+=( --label "$conflict_label" ) pr_out="$( bash "${SCRIPTS}/open-pr.sh" "${pr_args[@]}" 2>&1 )" && pr_rc=0 || pr_rc=$? pr_url="$(printf '%s\n' "$pr_out" | sed -n 's/^PR opened: //p' | head -n1)" rm -f "$body" # --- final result line for n8n --- jq -nc \ --arg status "$([[ $pr_rc -eq 0 ]] && echo ok || echo pr_failed)" \ --arg slug "$slug" \ --arg pr_url "$pr_url" \ --argjson lint_clean "$([[ $lint_rc -eq 0 ]] && echo true || echo false)" \ --argjson conflict "$([[ -n "$conflict_label" ]] && echo true || echo false)" \ --arg detail "$pr_out" \ '{status:$status, slug:$slug, pr_url:$pr_url, lint_clean:$lint_clean, conflict:$conflict, detail:$detail}' # The manifest is a single file overwritten by each pi run (not accumulating), but on full # success we remove it so a stale manifest can never be re-processed by mistake. if [[ $pr_rc -eq 0 ]]; then rm -f "$manifest" else exit 1 fi