Compare commits
14 commits
4e81f650e2
...
ea5bbe68b0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ea5bbe68b0 | ||
| cb75558724 | |||
| b76e962fd1 | |||
| 7570613289 | |||
| 418ca57dc3 | |||
| 49be5ada89 | |||
| d44851b25a | |||
| 5e8b72a04f | |||
| a23b679a10 | |||
| 726a1e2ed4 | |||
| e1a00d2db7 | |||
| fb96578987 | |||
| 80fa4c8eda | |||
| a1de7ad954 |
5 changed files with 178 additions and 27 deletions
2
Makefile
2
Makefile
|
|
@ -1,5 +1,5 @@
|
|||
# =============================================================================
|
||||
# Knowledge Genome - Makefile v. 1.5.0
|
||||
# Knowledge Genome - Makefile v. 1.6.0
|
||||
# Orchestrates the setup and management of the knowledge base.
|
||||
# =============================================================================
|
||||
|
||||
|
|
|
|||
|
|
@ -1,13 +1,11 @@
|
|||
#!/bin/bash
|
||||
# genome-raw-commit <genome>
|
||||
#
|
||||
# Commits raw files synchronized by Syncthing into the vault and pushes them to origin/<base>.
|
||||
# - Committer = n8n-bot (robotic identity responsible for pushing)
|
||||
# - Author = deduced from the Syncthing device ID (modifiedBy field), resolved via .authors.json.
|
||||
# Falls back to default values if unknown.
|
||||
# - One commit per author/device to ensure clear attribution.
|
||||
# - No-op if no changes are present. Excludes infrastructure files and private folders.
|
||||
|
||||
# Commit the raw files that Syncthing has placed in the vault and push them to origin/<base>.
|
||||
# - Committer = n8n-bot (sole pusher); Author = the person who wrote it (Syncthing modifiedBy -> .authors.json)
|
||||
# - One commit per author (single-device => one commit). No-op if there is nothing.
|
||||
# - JSON output built with jq (safe escaping), with a `files` array:
|
||||
# for each raw -> file, author, local_path, local_url (file://), remote_url (Forgejo web).
|
||||
set -euo pipefail
|
||||
genome="${1:?usage: genome-raw-commit <genome>}"
|
||||
|
||||
|
|
@ -20,6 +18,7 @@ set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
|
|||
: "${FORGEJO_USER:=n8n-bot}"
|
||||
: "${FORGEJO_HOST:=127.0.0.1:3001}"
|
||||
: "${FORGEJO_OWNER:=Keru}"
|
||||
: "${FORGEJO_WEB_BASE:=https://git.keruhomelab.com}" # human-facing URL for remote links (not the loopback)
|
||||
: "${SYNCTHING_URL:=http://127.0.0.1:8384}"
|
||||
: "${COMMITTER_NAME:=n8n-bot}"
|
||||
: "${COMMITTER_EMAIL:=n8n-bot@homelab}"
|
||||
|
|
@ -38,43 +37,42 @@ git config user.name "$COMMITTER_NAME"
|
|||
git config user.email "$COMMITTER_EMAIL"
|
||||
git config commit.gpgsign false
|
||||
|
||||
# Scope restricted to raw/ directory. raw/.stignore is omitted via .git/info/exclude
|
||||
git add -A -- raw/
|
||||
git reset -q -- raw/.stignore 2>/dev/null || true
|
||||
grep -qxF 'raw/.stignore' "${vault}/.git/info/exclude" 2>/dev/null || echo 'raw/.stignore' >> "${vault}/.git/info/exclude"
|
||||
grep -qxF 'raw/.stfolder' "${vault}/.git/info/exclude" 2>/dev/null || echo 'raw/.stfolder' >> "${vault}/.git/info/exclude"
|
||||
|
||||
git add -A -- raw/
|
||||
git reset -q -- raw/.stignore raw/.stfolder 2>/dev/null || true
|
||||
if git diff --cached --quiet; then
|
||||
printf '{"status":"noop","genome":"%s"}\n' "$genome"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Map Syncthing device ID to author information (name, email)
|
||||
resolve_dev() {
|
||||
# $1 = file path relative to the vault root (e.g., raw/file.txt)
|
||||
resolve_dev() { # $1 = path relative to the vault (raw/...) -> prints the short device id, or empty
|
||||
[[ -z "${SYNCTHING_API_KEY:-}" ]] && return 0
|
||||
curl -fsS -H "X-API-Key: ${SYNCTHING_API_KEY}" --get "${SYNCTHING_URL}/rest/db/file" \
|
||||
--data-urlencode "folder=${fid}" --data-urlencode "file=${1#raw/}" 2>/dev/null \
|
||||
| jq -r '.local.modifiedBy // empty' 2>/dev/null || true
|
||||
}
|
||||
|
||||
author_for_dev() {
|
||||
# $1 = device ID
|
||||
author_for_dev() { # $1 = device id -> prints "name\temail"
|
||||
local dev="$1" name="$DEFAULT_AUTHOR_NAME" email="$DEFAULT_AUTHOR_EMAIL"
|
||||
if [[ -n "$dev" && -f "$authors_map" ]] && jq -e --arg d "$dev" '.[$d]' "$authors_map" >/dev/null 2>&1; then
|
||||
name="$(jq -r --arg d "$dev" '.[$d].name' "$authors_map")"
|
||||
email="$(jq -r --arg d "$dev" '.[$d].email' "$authors_map")"
|
||||
fi
|
||||
printf '%s\t%s\t%s' "$name" "$email" "${dev:-unknown}"
|
||||
printf '%s\t%s' "$name" "$email"
|
||||
}
|
||||
|
||||
# Group staged files by author identity
|
||||
declare -A G_FILES G_NAME G_EMAIL G_DEV
|
||||
# Collect per-file (relpath, author) and group by author for committing
|
||||
declare -A G_FILES G_NAME G_EMAIL
|
||||
declare -a ROWS
|
||||
while IFS= read -r f; do
|
||||
[[ -z "$f" ]] && continue
|
||||
dev="$(resolve_dev "$f")"
|
||||
IFS=$'\t' read -r aname aemail adev <<< "$(author_for_dev "$dev")"
|
||||
IFS=$'\t' read -r aname aemail <<< "$(author_for_dev "$dev")"
|
||||
ROWS+=("${f}"$'\t'"${aname}")
|
||||
key="${aname} <${aemail}>"
|
||||
G_FILES["$key"]+="${f}"$'\n'
|
||||
G_NAME["$key"]="$aname"; G_EMAIL["$key"]="$aemail"; G_DEV["$key"]="$adev"
|
||||
G_NAME["$key"]="$aname"; G_EMAIL["$key"]="$aemail"
|
||||
done < <(git diff --cached --name-only -- raw/)
|
||||
|
||||
ts="$(date +%Y-%m-%dT%H:%M:%S%z)"
|
||||
|
|
@ -82,20 +80,34 @@ commits=0; summary=""
|
|||
for key in "${!G_FILES[@]}"; do
|
||||
mapfile -t files < <(printf '%s' "${G_FILES[$key]}")
|
||||
short="$(printf '%s\n' "${files[@]}" | sed 's#^raw/##' | paste -sd, -)"
|
||||
msg="$(printf 'raw(%s): sync %s\n\nAdded-by-device: %s\nSyncthing-device-id: %s\nSource: syncthing-autocommit\nSynced-at: %s\n' \
|
||||
"$genome" "$short" "${G_DEV[$key]}" "${G_DEV[$key]}" "$ts")"
|
||||
msg="$(printf 'raw(%s): sync %s\n\nAdded-by: %s\nSource: syncthing-autocommit\nSynced-at: %s\n' \
|
||||
"$genome" "$short" "${G_NAME[$key]}" "$ts")"
|
||||
git commit -q --author="$key" -m "$msg" -- "${files[@]}"
|
||||
commits=$((commits+1))
|
||||
summary="${summary}${summary:+; }${G_NAME[$key]}:${short}"
|
||||
done
|
||||
|
||||
# Fetch updates from origin to merge upstream modifications before pushing
|
||||
# Pull in any remote advances (e.g. a merged wiki PR), then push
|
||||
git fetch -q origin
|
||||
if git show-ref --verify --quiet "refs/remotes/origin/${GENOME_BASE}"; then
|
||||
git rebase -q "origin/${GENOME_BASE}" \
|
||||
|| { git rebase --abort 2>/dev/null || true; printf '{"status":"error","reason":"rebase-conflict","genome":"%s"}\n' "$genome"; exit 1; }
|
||||
fi
|
||||
git push -q "$clone_url" "HEAD:${GENOME_BASE}"
|
||||
head="$(git rev-parse --short HEAD)"
|
||||
|
||||
printf '{"status":"ok","genome":"%s","base":"%s","commits":%d","head":"%s","summary":"%s"}\n' \
|
||||
"$genome" "$GENOME_BASE" $commits "$(git rev-parse --short HEAD)" "$summary"
|
||||
# `files` array: local (file://) and remote (Forgejo web) link for each committed raw
|
||||
files_json="$(
|
||||
for row in "${ROWS[@]}"; do
|
||||
IFS=$'\t' read -r rel aname <<< "$row"
|
||||
jq -n --arg file "$rel" --arg author "$aname" \
|
||||
--arg lpath "${vault}/${rel}" \
|
||||
--arg lurl "file://${vault}/${rel}" \
|
||||
--arg rurl "${FORGEJO_WEB_BASE}/${FORGEJO_OWNER}/${genome}/src/branch/${GENOME_BASE}/${rel}" \
|
||||
'{file:$file, author:$author, local_path:$lpath, local_url:$lurl, remote_url:$rurl}'
|
||||
done | jq -s '.'
|
||||
)"
|
||||
|
||||
jq -n --arg genome "$genome" --arg base "$GENOME_BASE" --argjson commits "$commits" \
|
||||
--arg head "$head" --arg summary "$summary" --argjson files "$files_json" \
|
||||
'{status:"ok", genome:$genome, base:$base, commits:$commits, head:$head, summary:$summary, files:$files}'
|
||||
|
|
|
|||
60
deploy/vm101/README.md
Normal file
60
deploy/vm101/README.md
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
# deploy/vm101
|
||||
|
||||
System artifacts deployed to **vm101** (the GPU ingest node). The repo is the
|
||||
source of truth; the live copies live in `/usr/local/bin/`. Edit here, then
|
||||
`sudo ./install.sh` on vm101 to push changes.
|
||||
|
||||
## Contents
|
||||
|
||||
- `n8n-pi-wrap` — forced-command wrapper that fronts every n8n→vm101 SSH call.
|
||||
- `install.sh` — installs the wrapper(s) into `/usr/local/bin` (idempotent).
|
||||
|
||||
## n8n-pi-wrap
|
||||
|
||||
The only entry point for the `n8n-runner` identity onto vm101. n8n never gets a
|
||||
shell here: whatever it sends arrives as `SSH_ORIGINAL_COMMAND`, and a `case`
|
||||
whitelist decides what runs. Anything outside the whitelist is denied and logged.
|
||||
|
||||
Allowed commands:
|
||||
|
||||
| Command | What it does |
|
||||
|---|---|
|
||||
| `pi run` | one-shot prompt via stdin (proof-of-life / health) |
|
||||
| `pi ingest <genome> <raw_path>` | the real two-phase ingest (below) |
|
||||
| `ollama list` / `ollama ps` | model introspection |
|
||||
|
||||
### The two-phase ingest
|
||||
|
||||
`pi ingest` runs the clean-start + two phases, then stops:
|
||||
|
||||
1. **Clean start** — `git fetch && switch <INGEST_BASE> && reset --hard origin/<base>`.
|
||||
Destroys only vm101's *scratch* checkout (never a shared branch, never a
|
||||
force-push) — this determinism is by design.
|
||||
2. **Semantic** — `skills/ingest/scripts/ingest-semantic.py <genome> <raw_path>`
|
||||
drives `pi` to WRITE `wiki/*` pages + `.ingest-manifest.json`.
|
||||
NOTE: this is the script, NOT `pi -p "/skill:ingest ..."` (that form makes the
|
||||
model reply in chat and write nothing — the classic "manifest not found" trap).
|
||||
3. **Mechanical** — `skills/ingest/scripts/run-ingest.sh <genome>` validates the
|
||||
manifest, then index/log/scoped-lint/commit on `feat/ai-ingest-<slug>` and opens
|
||||
a PR onto `<INGEST_BASE>`. Emits one JSON line `{status,slug,pr_url,...}`.
|
||||
|
||||
The PR then waits for the human gate. One raw per session, sequential.
|
||||
|
||||
### Input hardening
|
||||
|
||||
Both inputs come from `SSH_ORIGINAL_COMMAND`, so both are validated:
|
||||
|
||||
- `genome` — kebab lowercase `^[a-z0-9-]+$`.
|
||||
- `raw_path` — must be under `raw/`, no `..` traversal, restricted charset
|
||||
`[A-Za-z0-9._/-]`, and the file must exist. Rejected paths return a JSON error.
|
||||
|
||||
Config (`INGEST_BASE`, `GENOMES_ROOT`, `INGEST_MODEL`, Forgejo token) is sourced
|
||||
from `~/.config/knowledge-genome.env` (0600, owner-only).
|
||||
|
||||
## Install / update
|
||||
|
||||
```bash
|
||||
# on vm101
|
||||
cd ~/knowledge-genome-orchestrator/deploy/vm101
|
||||
sudo ./install.sh
|
||||
```
|
||||
8
deploy/vm101/install.sh
Executable file
8
deploy/vm101/install.sh
Executable file
|
|
@ -0,0 +1,8 @@
|
|||
#!/bin/bash
|
||||
# deploy/vm101/install.sh — install vm101 wrappers from repo -> /usr/local/bin (idempotent).
|
||||
# Run ON vm101 with sudo: sudo ./install.sh
|
||||
set -euo pipefail
|
||||
here="$(cd "$(dirname "$0")" && pwd)"
|
||||
install -m 0755 "${here}/n8n-pi-wrap" /usr/local/bin/n8n-pi-wrap
|
||||
echo "installed: /usr/local/bin/n8n-pi-wrap"
|
||||
bash -n /usr/local/bin/n8n-pi-wrap && echo "syntax: ok"
|
||||
71
deploy/vm101/n8n-pi-wrap
Executable file
71
deploy/vm101/n8n-pi-wrap
Executable file
|
|
@ -0,0 +1,71 @@
|
|||
#!/bin/bash
|
||||
set -eu
|
||||
cmd="${SSH_ORIGINAL_COMMAND:-}"
|
||||
case "$cmd" in
|
||||
"pi run")
|
||||
logger -t n8n-pi-wrap "ok: pi run (prompt via stdin)"
|
||||
prompt=$(cat)
|
||||
exec /usr/local/bin/pi --no-tools --mode json -p "$prompt" </dev/null
|
||||
;;
|
||||
"pi ingest "*)
|
||||
# Strict positional parse: EXACTLY `pi ingest <genome> <raw_path>` (two tokens).
|
||||
rest="${cmd#pi ingest }"
|
||||
genome="${rest%% *}"
|
||||
raw_path="${rest#* }"
|
||||
# reject: missing second token, or any extra token (a space left in raw_path)
|
||||
if [ "$genome" = "$rest" ] || [ -z "$raw_path" ] || [ "$raw_path" != "${raw_path#* }" ]; then
|
||||
echo '{"status":"error","reason":"usage: pi ingest <genome> <raw_path>"}'; exit 1
|
||||
fi
|
||||
# genome slug: kebab lowercase only
|
||||
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac
|
||||
# raw_path whitelist: MUST live under raw/, no traversal, restricted charset.
|
||||
# - must start with "raw/" - no ".." segment - no absolute path / leading slash
|
||||
# - allowed chars: [A-Za-z0-9._/-] (kebab slugs + subdirs like raw/articles/foo.md)
|
||||
case "$raw_path" in
|
||||
raw/*) : ;;
|
||||
*) echo '{"status":"error","reason":"raw_path must be under raw/"}'; exit 1;;
|
||||
esac
|
||||
case "$raw_path" in
|
||||
*..*|*//*) echo '{"status":"error","reason":"raw_path traversal"}'; exit 1;;
|
||||
esac
|
||||
case "$raw_path" in
|
||||
*[!A-Za-z0-9._/-]*) echo '{"status":"error","reason":"raw_path illegal chars"}'; exit 1;;
|
||||
esac
|
||||
|
||||
logger -t n8n-pi-wrap "ok: pi ingest ${genome} ${raw_path}"
|
||||
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
|
||||
cd "${GENOMES_ROOT}/${genome}" || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
|
||||
|
||||
# The raw file must actually exist under the genome's raw/ dir.
|
||||
[ -f "$raw_path" ] || { echo '{"status":"error","reason":"raw file not found"}'; exit 1; }
|
||||
|
||||
# Clean start on the configured base (develop), pinned to the remote. Destroys only
|
||||
# vm101's scratch checkout (never a shared branch, never a force-push) — this is by design.
|
||||
git fetch -q origin \
|
||||
&& git switch -q "${INGEST_BASE:-main}" 2>/dev/null \
|
||||
&& git reset -q --hard "origin/${INGEST_BASE:-main}"
|
||||
|
||||
# SEMANTIC step: dedicated script drives pi to WRITE wiki pages + manifest.
|
||||
# (NOT `pi -p "/skill:ingest ..."`, which makes the model reply in chat and write nothing.)
|
||||
log="$(mktemp -t pi-ingest.XXXXXX.log)"
|
||||
"${HOME}/.pi/agent/skills/ingest/scripts/ingest-semantic.py" "${genome}" "${raw_path}" \
|
||||
>"$log" 2>&1 \
|
||||
|| { echo "{\"status\":\"error\",\"stage\":\"semantic\",\"reason\":\"ingest-semantic failed\",\"log\":\"${log}\"}"; exit 1; }
|
||||
|
||||
# MECHANICAL step: validate manifest -> index/log/scoped-lint/commit/PR -> 1 JSON line
|
||||
exec "${HOME}/.pi/agent/skills/ingest/scripts/run-ingest.sh" "${genome}"
|
||||
;;
|
||||
"ollama list")
|
||||
logger -t n8n-pi-wrap "ok: ollama list"
|
||||
exec /usr/local/bin/ollama list
|
||||
;;
|
||||
"ollama ps")
|
||||
logger -t n8n-pi-wrap "ok: ollama ps"
|
||||
exec /usr/local/bin/ollama ps
|
||||
;;
|
||||
*)
|
||||
logger -t n8n-pi-wrap "denied: ${cmd:-<empty>}"
|
||||
echo "unauthorized command" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
Loading…
Add table
Reference in a new issue