#!/bin/bash set -eu cmd="${SSH_ORIGINAL_COMMAND:-}" case "$cmd" in "pi run") logger -t n8n-pi-wrap "ok: pi run (prompt via stdin)" prompt=$(cat) exec /usr/local/bin/pi --no-tools --mode json -p "$prompt" ` (two tokens). rest="${cmd#pi ingest }" genome="${rest%% *}" raw_path="${rest#* }" # reject: missing second token, or any extra token (a space left in raw_path) if [ "$genome" = "$rest" ] || [ -z "$raw_path" ] || [ "$raw_path" != "${raw_path#* }" ]; then echo '{"status":"error","reason":"usage: pi ingest "}'; exit 1 fi # genome slug: kebab lowercase only case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac # raw_path whitelist: MUST live under raw/, no traversal, restricted charset. # - must start with "raw/" - no ".." segment - no absolute path / leading slash # - allowed chars: [A-Za-z0-9._/-] (kebab slugs + subdirs like raw/articles/foo.md) case "$raw_path" in raw/*) : ;; *) echo '{"status":"error","reason":"raw_path must be under raw/"}'; exit 1;; esac case "$raw_path" in *..*|*//*) echo '{"status":"error","reason":"raw_path traversal"}'; exit 1;; esac case "$raw_path" in *[!A-Za-z0-9._/-]*) echo '{"status":"error","reason":"raw_path illegal chars"}'; exit 1;; esac logger -t n8n-pi-wrap "ok: pi ingest ${genome} ${raw_path}" set -a; . "${HOME}/.config/knowledge-genome.env"; set +a cd "${GENOMES_ROOT}/${genome}" || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; } # The raw file must actually exist under the genome's raw/ dir. [ -f "$raw_path" ] || { echo '{"status":"error","reason":"raw file not found"}'; exit 1; } # Clean start on the configured base (develop), pinned to the remote. Destroys only # vm101's scratch checkout (never a shared branch, never a force-push) — this is by design. # `clean -fd` also removes leftover UNTRACKED files (e.g. wiki/sources/* or a stale # .ingest-manifest.json from a half-finished previous run) that `reset --hard` won't touch. git fetch -q origin \ && git switch -q "${INGEST_BASE:-main}" 2>/dev/null \ && git reset -q --hard "origin/${INGEST_BASE:-main}" \ && git clean -q -fd # SEMANTIC step: dedicated script drives pi to WRITE wiki pages + manifest. # (NOT `pi -p "/skill:ingest ..."`, which makes the model reply in chat and write nothing.) log="$(mktemp -t pi-ingest.XXXXXX.log)" "${HOME}/.pi/agent/skills/ingest/scripts/ingest-semantic.py" "${genome}" "${raw_path}" \ >"$log" 2>&1 \ || { echo "{\"status\":\"error\",\"stage\":\"semantic\",\"reason\":\"ingest-semantic failed\",\"log\":\"${log}\"}"; exit 1; } # MECHANICAL step: validate manifest -> index/log/scoped-lint/commit/PR -> 1 JSON line exec "${HOME}/.pi/agent/skills/ingest/scripts/run-ingest.sh" "${genome}" ;; "pi changed-raw "*) # List raw/ files changed between two commits, one per line (the webhook payload # does NOT include file lists, so vm101's checkout computes the diff itself). rest="${cmd#pi changed-raw }" genome="${rest%% *}" range="${rest#* }" before="${range%% *}" after="${range#* }" case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac case "$before$after" in *[!a-f0-9]*|"") echo '{"status":"error","reason":"invalid commit range"}'; exit 1;; esac logger -t n8n-pi-wrap "ok: pi changed-raw ${genome} ${before}..${after}" set -a; . "${HOME}/.config/knowledge-genome.env"; set +a cd "${GENOMES_ROOT}/${genome}" 2>/dev/null || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; } git fetch -q origin # Resolve the diff base robustly: # - before all-zero (brand-new branch) or unreachable (force-push) -> fall back to after~1 # - if even after~1 is missing (root commit) -> list all raw files in `after` base="$before" case "$before" in *[!0]*) : ;; *) base="" ;; esac # all-zero -> empty if [ -n "$base" ] && ! git cat-file -e "${base}^{commit}" 2>/dev/null; then base=""; fi if [ -z "$base" ]; then if git cat-file -e "${after}~1^{commit}" 2>/dev/null; then base="${after}~1"; else base=""; fi fi if [ -n "$base" ]; then files="$(git diff --name-only --diff-filter=d "${base}" "${after}" -- raw/ 2>/dev/null \ | grep -vE '(^|/)\.st(folder|ignore)' || true)" else # no usable base: enumerate raw files present at `after` files="$(git ls-tree -r --name-only "${after}" -- raw/ 2>/dev/null \ | grep -vE '(^|/)\.st(folder|ignore)' || true)" fi # emit a JSON array via jq (safe escaping) printf '%s\n' "$files" | grep -c . >/dev/null 2>&1 || files="" if [ -z "$files" ]; then echo '{"status":"ok","genome":"'"$genome"'","count":0,"files":[]}' else printf '%s\n' "$files" | jq -R . | jq -s \ --arg g "$genome" '{status:"ok", genome:$g, count:length, files:.}' fi ;; "ollama list") logger -t n8n-pi-wrap "ok: ollama list" exec /usr/local/bin/ollama list ;; "ollama ps") logger -t n8n-pi-wrap "ok: ollama ps" exec /usr/local/bin/ollama ps ;; *) logger -t n8n-pi-wrap "denied: ${cmd:-}" echo "unauthorized command" >&2 exit 1 ;; esac