96 lines
3.8 KiB
Bash
Executable file
96 lines
3.8 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# =============================================================================
|
|
# skills/ingest/scripts/run-prune.sh
|
|
# Symmetric companion to run-ingest: prune source pages whose raw source no
|
|
# longer exists. RE-DERIVES the orphan set itself (mirrors orphan-wiki.sh) — it
|
|
# never trusts a list handed in by n8n, so there is no "detected-vs-pruned"
|
|
# race. Removes ONLY the pages it derived plus their index entries, commits
|
|
# ONLY wiki/ on chore/prune-orphans-<date>, and opens a GATED removal PR (the
|
|
# operator approves the deletion; principle 2). Never deletes of its own accord.
|
|
#
|
|
# Runs OUTSIDE the model, on vm101, cwd = genome checkout. The wrapper (`pi
|
|
# prune`) has already taken the per-genome lock and done clean_start, exactly
|
|
# like `pi ingest` — so this script does neither.
|
|
#
|
|
# run-prune.sh <genome>
|
|
#
|
|
# Emits a single JSON result line on stdout for n8n to parse.
|
|
# =============================================================================
|
|
set -euo pipefail
|
|
|
|
genome="${1:?usage: run-prune.sh <genome>}"
|
|
SCRIPTS="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
|
fail() {
|
|
jq -nc --arg stage "$1" --arg reason "$2" '{status:"error", stage:$stage, reason:$reason}'
|
|
exit 1
|
|
}
|
|
|
|
command -v jq >/dev/null 2>&1 || { echo '{"status":"error","reason":"jq missing"}'; exit 1; }
|
|
command -v python3 >/dev/null 2>&1 || fail "deps" "python3 missing (needed by index-append.py)"
|
|
|
|
# --- re-derive orphans (same rule as orphan-wiki.sh; computed fresh, here, now) ---
|
|
# A wiki/sources/*.md page is orphaned when its frontmatter source_path points at
|
|
# a raw file that no longer exists. Legacy pages without source_path are ignored.
|
|
declare -a ORPH=()
|
|
for page in wiki/sources/*.md; do
|
|
[[ -e "$page" ]] || continue
|
|
sp="$(sed -n 's/^source_path:[[:space:]]*//p' "$page" | tr -d '\r' | head -n1)"
|
|
[[ -n "$sp" ]] || continue
|
|
[[ -f "$sp" ]] || ORPH+=("$page")
|
|
done
|
|
|
|
if [[ ${#ORPH[@]} -eq 0 ]]; then
|
|
jq -nc '{status:"ok", count:0, pruned:[], detail:"no orphans"}'
|
|
exit 0
|
|
fi
|
|
|
|
# --- remove each orphan page + its index entry (anti-traversal, wiki/-only) ---
|
|
declare -a PRUNED=()
|
|
for page in "${ORPH[@]}"; do
|
|
case "$page" in
|
|
wiki/*) : ;;
|
|
*) fail "prune" "refusing to remove outside wiki/: ${page}" ;;
|
|
esac
|
|
case "$page" in *..*) fail "prune" "path traversal in page: ${page}" ;; esac
|
|
[[ -f "$page" ]] || continue
|
|
rm -f "$page"
|
|
link="${page#wiki/}"; link="${link%.md}" # e.g. sources/foo
|
|
python3 "${SCRIPTS}/index-append.py" --remove "$link" \
|
|
|| fail "index" "index-append --remove failed for ${link}"
|
|
PRUNED+=("$link")
|
|
done
|
|
|
|
# --- assemble the PR body ---
|
|
date_tag="$(date +%F)"
|
|
body="$(mktemp)"
|
|
trap 'rm -f "$body"' EXIT
|
|
{
|
|
echo "## Prune orphaned sources"
|
|
echo ""
|
|
echo "These source pages reference a \`source_path\` whose raw file no longer exists"
|
|
echo "in \`raw/\`. Removing them keeps the wiki in sync with git (the source of truth)."
|
|
echo ""
|
|
echo "| Removed page |"
|
|
echo "|--------------|"
|
|
for l in "${PRUNED[@]}"; do echo "| \`wiki/${l}.md\` |"; done
|
|
} > "$body"
|
|
|
|
# --- open the GATED removal PR on a chore/ branch (open-pr --branch override) ---
|
|
branch="chore/prune-orphans-${date_tag}"
|
|
pr_out="$( bash "${SCRIPTS}/open-pr.sh" \
|
|
--branch "$branch" \
|
|
--title "chore: prune ${#PRUNED[@]} orphaned source(s)" \
|
|
--body-file "$body" --base "${INGEST_BASE:-main}" 2>&1 )" && pr_rc=0 || pr_rc=$?
|
|
pr_url="$(printf '%s\n' "$pr_out" | sed -n 's/^PR opened: //p' | head -n1)"
|
|
|
|
# --- result line for n8n ---
|
|
jq -nc \
|
|
--arg status "$([[ $pr_rc -eq 0 ]] && echo ok || echo pr_failed)" \
|
|
--argjson count "${#PRUNED[@]}" \
|
|
--arg pr_url "$pr_url" \
|
|
--arg detail "$pr_out" \
|
|
--argjson pruned "$(printf '%s\n' "${PRUNED[@]}" | jq -R . | jq -s .)" \
|
|
'{status:$status, count:$count, pr_url:$pr_url, pruned:$pruned, detail:$detail}'
|
|
|
|
[[ $pr_rc -eq 0 ]] || exit 1
|