diff --git a/Makefile b/Makefile index 5913288..0e25953 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # ============================================================================= -# Knowledge Genome - Makefile v. 1.4.0 +# Knowledge Genome - Makefile v. 1.5.0 # Orchestrates the setup and management of the knowledge base. # ============================================================================= diff --git a/deploy/nexus/README.md b/deploy/nexus/README.md new file mode 100644 index 0000000..bd8ada0 --- /dev/null +++ b/deploy/nexus/README.md @@ -0,0 +1,81 @@ +# Componenti di Sistema — Gestione Sincronizzazione e Automazione Genoma + +Questo modulo contiene gli script di backend che vengono installati sul server `nexus` per gestire il ciclo di vita dei vault locali (scratch di lavoro), l'integrazione con Syncthing e l'autocommit dei file grezzi (`raw/`) provenienti dai dispositivi mobili o desktop (es. Obsidian). + +## Architettura dei File di Sistema + +Gli script sono progettati per girare in un ambiente multi-utente protetto, dove l'istanza globale di `n8n` (tramite l'utente di sistema `n8n-runner`) pilota le operazioni senza possedere i diritti di lettura/scrittura diretti sui file del genoma o sui segreti di configurazione. + +### 1. Posizionamento e Permessi degli Script + +I file inclusi in questa cartella devono essere installati sul server di produzione nella directory `/usr/local/bin/` con privilegi di esecuzione globali, ma modificabili solo da `root`. + +- **Destinazione:** `/usr/local/bin/` +- **Proprietario (Owner):** `root:root` +- **Permessi (Chmod):** `0755` (`-rwxr-xr-x`) + +#### Elenco degli Script: + +- `ensure-genome-vault`: Script idempotente che inizializza o riallinea il vault locale clonandolo da Forgejo (in loopback) sul branch `develop`, configura gli `.stignore` ed effettua il provisioning automatico della cartella condivisa su Syncthing via API. +- `genome-askpass`: Helper di autenticazione per Git (`GIT_ASKPASS`). Intercetta le richieste di credenziali di Git durante i cloni e i push HTTP su Forgejo, iniettando l'utente e il token applicativo senza esporli nei log di sistema o negli argomenti dei processi. +- `genome-raw-commit`: Script di polling periodico invocato da n8n. Isola i file modificati nella cartella `raw/`, interroga Syncthing per capire quale dispositivo (e quindi quale autore umano) ha generato la modifica, crea commit atomici attribuiti al singolo autore e pusha le modifiche su Forgejo (`develop`). + +--- + +## Modello di Sicurezza e Visibilità + +Per garantire l'isolamento del sistema operativo, l'infrastruttura si basa su tre livelli di confinamento: + +### A. Variabili d'Ambiente Protette (`.env`) + +Le credenziali (Token Forgejo, API Key Syncthing) risiedono nella Home dell'utente operativo del servizio (`homelab`) e sono completamente invisibili a n8n e ad altri utenti del sistema. + +- **Path:** `/home/homelab/.config/knowledge-genome.env` +- **Permessi:** `0600` (`-rw-------`), di proprietà esclusiva di `homelab:homelab`. + +#### env + +Nella cartella `~/.config/knowledge-genome.env`. + +```text +# knowledge-genome.env Configuration Profile +# Requirements: Must be owned by the service user with 0600 permissions. + +# Vault path and operational branch +GENOME_VAULTS_ROOT=/srv/genome-vaults +GENOME_BASE=develop + +# Forgejo Target Instance +# Replace 127.0.0.1 with vm101 IP if Forgejo is hosted on the virtual machine +FORGEJO_HOST=127.0.0.1:3001 +FORGEJO_OWNER=Keru +FORGEJO_USER=n8n-bot +FORGEJO_TOKEN="............" + +# Git Commit Identity +COMMITTER_NAME=n8n-bot +COMMITTER_EMAIL=n8n-bot@homelab +DEFAULT_AUTHOR_NAME="Matteo Cherubini" +DEFAULT_AUTHOR_EMAIL=matteo@keruhomelab.com + +# Syncthing Target Instance +# Replace 127.0.0.1 with vm101 IP if Syncthing API is hosted on the virtual machine +SYNCTHING_URL=http://127.0.0.1:8384 +SYNCTHING_API_KEY="............" +``` + +### B. Confine dei Privilegi in Sudoers + +L'utente di automazione `n8n-runner` (usato dall'agente SSH di n8n) non ha accesso alla shell e non può invocare comandi arbitrari. Può unicamente chiamare i due script principali impersonando l'utente `homelab` senza l'inserimento della password. + +Configurazione da applicare in `/etc/sudoers.d/n8n-genome` (con permessi rigorosi `0440`): + +```text +n8n-runner ALL=(homelab) NOPASSWD: /usr/local/bin/ensure-genome-vault, /usr/local/bin/genome-raw-commit +``` + +### C. Directory dei Vault + +I dati veri e propri sincronizzati da Syncthing risiedono isolati in `/srv/genome-vaults/`. + +- **Proprietario**: homelab:homelab (UID/GID 1000), permettendo la convivenza nativa e fluida tra il demone Syncthing in esecuzione nel container e gli script Git locali. diff --git a/deploy/nexus/ensure-genome-vault.sh b/deploy/nexus/ensure-genome-vault.sh new file mode 100644 index 0000000..42ed105 --- /dev/null +++ b/deploy/nexus/ensure-genome-vault.sh @@ -0,0 +1,118 @@ +#!/bin/bash +# ensure-genome-vault [--status-only] +# +# Idempotent, unified command for managing genome vaults. +# Called by n8n during genome creation and as a safety net mechanism. +# +# Operation workflow: +# - Vault absent -> Clone from Forgejo (loopback) + track develop branch +# - Vault present -> Realign to origin/develop (treated as a rebuildable scratchpad) +# - Post-clone/fetch -> Write raw/.stignore and register/update the Syncthing folder. +# +# Source of truth is Forgejo. Vaults are scratch spaces and not backed up directly. +# All operations run locally via loopback. + +set -euo pipefail +genome="${1:?usage: ensure-genome-vault [--status-only]}" +mode="${2:-}" + +# Slug validation inside the script to prevent path/URL traversal: +# Lowercase kebab-case, no '/', '..', or spaces. +[[ "$genome" =~ ^[a-z0-9][a-z0-9-]{0,63}$ ]] || { echo '{"status":"error","reason":"invalid genome name"}'; exit 1; } + +set -a; . "${HOME}/.config/knowledge-genome.env"; set +a +: "${GENOME_VAULTS_ROOT:=/srv/genome-vaults}" +: "${GENOME_BASE:=develop}" +: "${FORGEJO_USER:=n8n-bot}" +: "${FORGEJO_HOST:=127.0.0.1:3001}" +: "${FORGEJO_OWNER:=Keru}" +: "${SYNCTHING_URL:=http://127.0.0.1:8384}" + +vault="${GENOME_VAULTS_ROOT}/${genome}" +fid="${genome}-public" +clone_url="http://${FORGEJO_USER}@${FORGEJO_HOST}/${FORGEJO_OWNER}/${genome}.git" +export GIT_ASKPASS=/usr/local/bin/genome-askpass # Provides the n8n-bot token + +mkdir -p "$GENOME_VAULTS_ROOT" + +# ── 1. Clone (if missing) or realign (if present) ──────────────────────────── +if [[ ! -d "${vault}/.git" ]]; then + [[ "$mode" == "--status-only" ]] && { printf '{"status":"absent","genome":"%s"}\n' "$genome"; exit 0; } + git clone -q "$clone_url" "$vault" + cd "$vault" + if git show-ref --verify --quiet "refs/remotes/origin/${GENOME_BASE}"; then + git switch -q -c "$GENOME_BASE" --track "origin/${GENOME_BASE}" 2>/dev/null || git switch -q "$GENOME_BASE" + else + # develop does not exist on remote yet: create it from current base and publish + git switch -q -c "$GENOME_BASE" + git push -q "$clone_url" "${GENOME_BASE}:${GENOME_BASE}" + fi + state="cloned" +else + cd "$vault" + if [[ "$mode" == "--status-only" ]]; then + printf '{"status":"present","genome":"%s","head":"%s"}\n' "$genome" "$(git rev-parse --short HEAD)" + exit 0 + fi + git fetch -q origin + if git show-ref --verify --quiet "refs/remotes/origin/${GENOME_BASE}"; then + git switch -q "$GENOME_BASE" 2>/dev/null || git switch -q -c "$GENOME_BASE" --track "origin/${GENOME_BASE}" + # GUARD: hard reset is allowed ONLY if the working tree is clean. + # If Syncthing has already written uncommitted raw files, DO NOT destroy them: soft fast-forward. + if [[ -z "$(git status --porcelain -- raw/ 2>/dev/null)" ]]; then + git reset -q --hard "origin/${GENOME_BASE}" + state="realigned" + else + git merge -q --ff-only "origin/${GENOME_BASE}" 2>/dev/null || true + state="realigned-kept-dirty" + fi + else + git switch -q -c "$GENOME_BASE" 2>/dev/null || true + git push -q "$clone_url" "${GENOME_BASE}:${GENOME_BASE}" + state="base-created" + fi +fi + +# ── 2. raw/.stignore + exclusion from git (infrastructure, not content) ──────────── +mkdir -p "${vault}/raw" +cat > "${vault}/raw/.stignore" <<'EOF' +// Knowledge Genome — Syncthing exclusions for raw/ +// NEVER unencrypted private data: git-crypt protects INSIDE the repo, not in Syncthing transit +private +// Obsidian / editor noise +.obsidian +.trash +*.tmp +workspace*.json +// security +.git +EOF +# .stignore must not be included in genome commits +grep -qxF 'raw/.stignore' "${vault}/.git/info/exclude" 2>/dev/null \ + || echo 'raw/.stignore' >> "${vault}/.git/info/exclude" + +# ── 3. Idempotent Syncthing folder configuration (best-effort, does not block the vault) ──────── +folder_state="skipped(no api key)" +if [[ -n "${SYNCTHING_API_KEY:-}" ]]; then + if curl -fsS -o /dev/null -H "X-API-Key: ${SYNCTHING_API_KEY}" \ + "${SYNCTHING_URL}/rest/config/folders/${fid}" 2>/dev/null; then + folder_state="exists" + else + body="$(curl -fsS -H "X-API-Key: ${SYNCTHING_API_KEY}" \ + "${SYNCTHING_URL}/rest/config/defaults/folder" \ + | jq --arg id "$fid" --arg label "${genome} (raw public)" --arg path "${vault}/raw" \ + '.id=$id | .label=$label | .path=$path | .type="sendreceive" + | .fsWatcherEnabled=true | .rescanIntervalS=3600')" + + if curl -fsS -o /dev/null -X PUT \ + -H "X-API-Key: ${SYNCTHING_API_KEY}" -H "Content-Type: application/json" \ + -d "$body" "${SYNCTHING_URL}/rest/config/folders/${fid}" 2>/dev/null; then + folder_state="created" + else + folder_state="error(check syncthing api)" + fi + fi +fi + +printf '{"status":"ok","genome":"%s","vault":"%s","state":"%s","syncthing_folder":"%s"}\n' \ + "$genome" "$vault" "$state" "$folder_state" diff --git a/deploy/nexus/genome-askpass.sh b/deploy/nexus/genome-askpass.sh new file mode 100644 index 0000000..104b3b7 --- /dev/null +++ b/deploy/nexus/genome-askpass.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# +# GIT_ASKPASS helper for Forgejo HTTP authentication. +# Git invokes this script when it needs a username or password. +# + +set -eu + +# Load environment variables +. "${HOME}/.config/knowledge-genome.env" + +case "${1:-}" in + *[Uu]sername*) + printf '%s\n' "${FORGEJO_USER:-n8n-bot}" + ;; + *) + printf '%s\n' "${FORGEJO_TOKEN:?FORGEJO_TOKEN not set}" + ;; +esac diff --git a/deploy/nexus/genome-raw-commit.sh b/deploy/nexus/genome-raw-commit.sh new file mode 100644 index 0000000..3989346 --- /dev/null +++ b/deploy/nexus/genome-raw-commit.sh @@ -0,0 +1,101 @@ +#!/bin/bash +# genome-raw-commit +# +# Commits raw files synchronized by Syncthing into the vault and pushes them to origin/. +# - Committer = n8n-bot (robotic identity responsible for pushing) +# - Author = deduced from the Syncthing device ID (modifiedBy field), resolved via .authors.json. +# Falls back to default values if unknown. +# - One commit per author/device to ensure clear attribution. +# - No-op if no changes are present. Excludes infrastructure files and private folders. + +set -euo pipefail +genome="${1:?usage: genome-raw-commit }" + +# Input validation to prevent path or URL traversal inside the script +[[ "$genome" =~ ^[a-z0-9][a-z0-9-]{0,63}$ ]] || { echo '{"status":"error","reason":"invalid genome name"}'; exit 1; } + +set -a; . "${HOME}/.config/knowledge-genome.env"; set +a +: "${GENOME_VAULTS_ROOT:=/srv/genome-vaults}" +: "${GENOME_BASE:=develop}" +: "${FORGEJO_USER:=n8n-bot}" +: "${FORGEJO_HOST:=127.0.0.1:3001}" +: "${FORGEJO_OWNER:=Keru}" +: "${SYNCTHING_URL:=http://127.0.0.1:8384}" +: "${COMMITTER_NAME:=n8n-bot}" +: "${COMMITTER_EMAIL:=n8n-bot@homelab}" +: "${DEFAULT_AUTHOR_NAME:=Unknown}" +: "${DEFAULT_AUTHOR_EMAIL:=unknown@syncthing}" + +vault="${GENOME_VAULTS_ROOT}/${genome}" +fid="${genome}-public" +authors_map="${GENOME_VAULTS_ROOT}/.authors.json" +clone_url="http://${FORGEJO_USER}@${FORGEJO_HOST}/${FORGEJO_OWNER}/${genome}.git" +export GIT_ASKPASS=/usr/local/bin/genome-askpass + +[[ -d "${vault}/.git" ]] || { printf '{"status":"error","reason":"vault absent","genome":"%s"}\n' "$genome"; exit 1; } +cd "$vault" +git config user.name "$COMMITTER_NAME" +git config user.email "$COMMITTER_EMAIL" +git config commit.gpgsign false + +# Scope restricted to raw/ directory. raw/.stignore is omitted via .git/info/exclude +git add -A -- raw/ +git reset -q -- raw/.stignore 2>/dev/null || true + +if git diff --cached --quiet; then + printf '{"status":"noop","genome":"%s"}\n' "$genome" + exit 0 +fi + +# Map Syncthing device ID to author information (name, email) +resolve_dev() { + # $1 = file path relative to the vault root (e.g., raw/file.txt) + [[ -z "${SYNCTHING_API_KEY:-}" ]] && return 0 + curl -fsS -H "X-API-Key: ${SYNCTHING_API_KEY}" --get "${SYNCTHING_URL}/rest/db/file" \ + --data-urlencode "folder=${fid}" --data-urlencode "file=${1#raw/}" 2>/dev/null \ + | jq -r '.local.modifiedBy // empty' 2>/dev/null || true +} + +author_for_dev() { + # $1 = device ID + local dev="$1" name="$DEFAULT_AUTHOR_NAME" email="$DEFAULT_AUTHOR_EMAIL" + if [[ -n "$dev" && -f "$authors_map" ]] && jq -e --arg d "$dev" '.[$d]' "$authors_map" >/dev/null 2>&1; then + name="$(jq -r --arg d "$dev" '.[$d].name' "$authors_map")" + email="$(jq -r --arg d "$dev" '.[$d].email' "$authors_map")" + fi + printf '%s\t%s\t%s' "$name" "$email" "${dev:-unknown}" +} + +# Group staged files by author identity +declare -A G_FILES G_NAME G_EMAIL G_DEV +while IFS= read -r f; do + [[ -z "$f" ]] && continue + dev="$(resolve_dev "$f")" + IFS=$'\t' read -r aname aemail adev <<< "$(author_for_dev "$dev")" + key="${aname} <${aemail}>" + G_FILES["$key"]+="${f}"$'\n' + G_NAME["$key"]="$aname"; G_EMAIL["$key"]="$aemail"; G_DEV["$key"]="$adev" +done < <(git diff --cached --name-only -- raw/) + +ts="$(date +%Y-%m-%dT%H:%M:%S%z)" +commits=0; summary="" +for key in "${!G_FILES[@]}"; do + mapfile -t files < <(printf '%s' "${G_FILES[$key]}") + short="$(printf '%s\n' "${files[@]}" | sed 's#^raw/##' | paste -sd, -)" + msg="$(printf 'raw(%s): sync %s\n\nAdded-by-device: %s\nSyncthing-device-id: %s\nSource: syncthing-autocommit\nSynced-at: %s\n' \ + "$genome" "$short" "${G_DEV[$key]}" "${G_DEV[$key]}" "$ts")" + git commit -q --author="$key" -m "$msg" -- "${files[@]}" + commits=$((commits+1)) + summary="${summary}${summary:+; }${G_NAME[$key]}:${short}" +done + +# Fetch updates from origin to merge upstream modifications before pushing +git fetch -q origin +if git show-ref --verify --quiet "refs/remotes/origin/${GENOME_BASE}"; then + git rebase -q "origin/${GENOME_BASE}" \ + || { git rebase --abort 2>/dev/null || true; printf '{"status":"error","reason":"rebase-conflict","genome":"%s"}\n' "$genome"; exit 1; } +fi +git push -q "$clone_url" "HEAD:${GENOME_BASE}" + +printf '{"status":"ok","genome":"%s","base":"%s","commits":%d","head":"%s","summary":"%s"}\n' \ + "$genome" "$GENOME_BASE" $commits "$(git rev-parse --short HEAD)" "$summary"