diff --git a/deploy/vm101/n8n-pi-wrap b/deploy/vm101/n8n-pi-wrap index 84fccda..fc5321f 100755 --- a/deploy/vm101/n8n-pi-wrap +++ b/deploy/vm101/n8n-pi-wrap @@ -11,6 +11,13 @@ case "$cmd" in # lines 54/59), so pending-raw.sh resolves its sibling slug.sh via BASH_SOURCE. exec "${HOME}/.pi/agent/skills/ingest/scripts/pending-raw.sh" "$genome" ;; + "pi orphan-wiki "*) + genome="${cmd#pi orphan-wiki }" + case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac + logger -t n8n-pi-wrap "ok: pi orphan-wiki ${genome}" + set -a; . "${HOME}/.config/knowledge-genome.env" 2>/dev/null || true; set +a + exec "${HOME}/.pi/agent/skills/ingest/scripts/orphan-wiki.sh" "$genome" + ;; "pi run") logger -t n8n-pi-wrap "ok: pi run (prompt via stdin)" prompt=$(cat) diff --git a/skills/ingest/scripts/orphan-wiki.sh b/skills/ingest/scripts/orphan-wiki.sh new file mode 100755 index 0000000..686d167 --- /dev/null +++ b/skills/ingest/scripts/orphan-wiki.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# ============================================================================= +# orphan-wiki.sh — find source pages whose raw source no longer exists. +# Reads source_path from each wiki/sources/*.md frontmatter. If the raw is gone, +# the page is orphaned. Emits JSON envelope: {status, genome, count, files[], detail[]}. +# Read-only: no lock needed (same policy as pending-raw). +# ============================================================================= +set -euo pipefail + +genome="${1:?usage: orphan-wiki.sh }" +base_dir="${GENOMES_ROOT:-${HOME}/genomes}" +cd "${base_dir}/${genome}" 2>/dev/null || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; } + +# Clean start on the configured base (single source of truth in lib/clean-start.sh). +: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}" +source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \ + || { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; } +clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; } + +declare -a ORPH=() +for page in wiki/sources/*.md; do + [[ -e "$page" ]] || continue + sp="$(sed -n 's/^source_path:[[:space:]]*//p' "$page" | tr -d '\r' | head -n1)" + # Pages without source_path are pre-Step-2 legacy: ignore, don't false-positive. + [[ -n "$sp" ]] || continue + [[ -f "$sp" ]] || ORPH+=("$page") +done + +if [[ ${#ORPH[@]} -eq 0 ]]; then + echo '{"status":"ok","genome":"'"$genome"'","count":0,"files":[],"detail":[]}' +else + for x in "${ORPH[@]}"; do printf '%s\torphan\n' "$x"; done \ + | jq -R 'split("\t") | {path: .[0], reason: .[1]}' \ + | jq -s --arg g "$genome" '{status:"ok", genome:$g, count:length, files:[.[].path], detail:.}' +fi diff --git a/tests/orphan-wiki.bats b/tests/orphan-wiki.bats new file mode 100644 index 0000000..c8fb8b8 --- /dev/null +++ b/tests/orphan-wiki.bats @@ -0,0 +1,38 @@ +#!/usr/bin/env bats +setup() { + load 'helpers' + export ORPHAN="${SKILL_SCRIPTS}/orphan-wiki.sh" + export GENOMES_ROOT="${BATS_TEST_TMPDIR}" + export INGEST_BASE="main" + export KG_LIB_DIR="${LIB_DIR}" # orphan-wiki.sh sources clean-start.sh via KG_LIB_DIR + g_src="$(make_fixture_genome)" + export g_name="fixture-genome" + mv "$g_src" "${GENOMES_ROOT}/${g_name}" + export g="${GENOMES_ROOT}/${g_name}" + ( cd "$g" && rm -f raw/articles/test.md && git add -A && git commit -q -m "clear" && git push -q ) +} +@test "orphan-wiki: no orphans when raw and source page match" { + mkdir -p "${g}/raw/articles"; echo "content" > "${g}/raw/articles/existing.md" + hash="$(sha256sum "${g}/raw/articles/existing.md" | cut -d' ' -f1)" + mkdir -p "${g}/wiki/sources" + printf -- '---\nsource_path: raw/articles/existing.md\nsource_sha256: %s\n---\n' "$hash" > "${g}/wiki/sources/existing.md" + ( cd "$g" && git add . && git commit -q -m "setup" && git push -q ) + run bash "$ORPHAN" "$g_name" + [ "$status" -eq 0 ]; echo "$output" | jq -e '.count == 0' +} +@test "orphan-wiki: detects orphaned source page" { + mkdir -p "${g}/wiki/sources" + printf -- '---\nsource_path: raw/articles/deleted.md\nsource_sha256: abc123\n---\n' > "${g}/wiki/sources/orphaned.md" + ( cd "$g" && git add . && git commit -q -m "orphan" && git push -q ) + run bash "$ORPHAN" "$g_name" + [ "$status" -eq 0 ] + echo "$output" | jq -e '.count == 1' + echo "$output" | jq -e '.detail[0].reason == "orphan"' +} +@test "orphan-wiki: ignores legacy pages without source_path" { + mkdir -p "${g}/wiki/sources" + printf -- '---\ntitle: "Legacy"\ntype: source\n---\n' > "${g}/wiki/sources/legacy.md" + ( cd "$g" && git add . && git commit -q -m "legacy" && git push -q ) + run bash "$ORPHAN" "$g_name" + [ "$status" -eq 0 ]; echo "$output" | jq -e '.count == 0' +} diff --git a/tests/permissions.bats b/tests/permissions.bats index b77c30f..5850791 100644 --- a/tests/permissions.bats +++ b/tests/permissions.bats @@ -18,6 +18,7 @@ EXECUTABLES=( skills/ingest/scripts/log-append.sh skills/ingest/scripts/slug.sh skills/ingest/scripts/pending-raw.sh + skills/ingest/scripts/orphan-wiki.sh skills/ingest/scripts/index-append.py scripts/add-genome.sh scripts/setup.sh