#!/usr/bin/env bash # ============================================================================= # skills/ingest/scripts/run-prune.sh # Symmetric companion to run-ingest: prune source pages whose raw source no # longer exists. RE-DERIVES the orphan set itself (mirrors orphan-wiki.sh) — it # never trusts a list handed in by n8n, so there is no "detected-vs-pruned" # race. Removes ONLY the pages it derived plus their index entries, commits # ONLY wiki/ on chore/prune-orphans-, and opens a GATED removal PR (the # operator approves the deletion; principle 2). Never deletes of its own accord. # # Runs OUTSIDE the model, on vm101, cwd = genome checkout. The wrapper (`pi # prune`) has already taken the per-genome lock and done clean_start, exactly # like `pi ingest` — so this script does neither. # # run-prune.sh # # Emits a single JSON result line on stdout for n8n to parse. # ============================================================================= set -euo pipefail genome="${1:?usage: run-prune.sh }" SCRIPTS="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" fail() { jq -nc --arg stage "$1" --arg reason "$2" '{status:"error", stage:$stage, reason:$reason}' exit 1 } command -v jq >/dev/null 2>&1 || { echo '{"status":"error","reason":"jq missing"}'; exit 1; } command -v python3 >/dev/null 2>&1 || fail "deps" "python3 missing (needed by index-append.py)" # --- re-derive orphans (same rule as orphan-wiki.sh; computed fresh, here, now) --- # A wiki/sources/*.md page is orphaned when its frontmatter source_path points at # a raw file that no longer exists. Legacy pages without source_path are ignored. declare -a ORPH=() for page in wiki/sources/*.md; do [[ -e "$page" ]] || continue sp="$(sed -n 's/^source_path:[[:space:]]*//p' "$page" | tr -d '\r' | head -n1)" [[ -n "$sp" ]] || continue [[ -f "$sp" ]] || ORPH+=("$page") done if [[ ${#ORPH[@]} -eq 0 ]]; then jq -nc '{status:"ok", count:0, pruned:[], detail:"no orphans"}' exit 0 fi # --- remove each orphan page + its index entry (anti-traversal, wiki/-only) --- declare -a PRUNED=() for page in "${ORPH[@]}"; do case "$page" in wiki/*) : ;; *) fail "prune" "refusing to remove outside wiki/: ${page}" ;; esac case "$page" in *..*) fail "prune" "path traversal in page: ${page}" ;; esac [[ -f "$page" ]] || continue rm -f "$page" link="${page#wiki/}"; link="${link%.md}" # e.g. sources/foo python3 "${SCRIPTS}/index-append.py" --remove "$link" \ || fail "index" "index-append --remove failed for ${link}" PRUNED+=("$link") done # --- assemble the PR body --- date_tag="$(date +%F)" body="$(mktemp)" trap 'rm -f "$body"' EXIT { echo "## Prune orphaned sources" echo "" echo "These source pages reference a \`source_path\` whose raw file no longer exists" echo "in \`raw/\`. Removing them keeps the wiki in sync with git (the source of truth)." echo "" echo "| Removed page |" echo "|--------------|" for l in "${PRUNED[@]}"; do echo "| \`wiki/${l}.md\` |"; done } > "$body" # --- open the GATED removal PR on a chore/ branch (open-pr --branch override) --- branch="chore/prune-orphans-${date_tag}" pr_out="$( bash "${SCRIPTS}/open-pr.sh" \ --branch "$branch" \ --title "chore: prune ${#PRUNED[@]} orphaned source(s)" \ --body-file "$body" --base "${INGEST_BASE:-main}" 2>&1 )" && pr_rc=0 || pr_rc=$? pr_url="$(printf '%s\n' "$pr_out" | sed -n 's/^PR opened: //p' | head -n1)" # --- result line for n8n --- jq -nc \ --arg status "$([[ $pr_rc -eq 0 ]] && echo ok || echo pr_failed)" \ --argjson count "${#PRUNED[@]}" \ --arg pr_url "$pr_url" \ --arg detail "$pr_out" \ --argjson pruned "$(printf '%s\n' "${PRUNED[@]}" | jq -R . | jq -s .)" \ '{status:$status, count:$count, pr_url:$pr_url, pruned:$pruned, detail:$detail}' [[ $pr_rc -eq 0 ]] || exit 1