Compare commits

..

168 commits

Author SHA1 Message Date
06a16f1e81 Merge branch 'release/1.13.0' into main 2026-07-02 17:55:42 +02:00
a1f521d43f Update version 2026-07-02 17:55:32 +02:00
3c29d36656 feat(n8n): Harden all genome workflow JS nodes with defensive coding 2026-07-02 12:54:14 +02:00
0267661de8 Merge branch 'release/1.12.0' into main 2026-07-02 10:17:12 +02:00
15f6d7f4a7 Merge branch 'release/1.12.0' into develop 2026-07-02 10:17:12 +02:00
12fc807147 Update version 2026-07-02 10:17:00 +02:00
5cfdce66b8 Merge branch 'feature/quiet-window-rolling-PR' into develop 2026-07-02 10:16:15 +02:00
f27704f054 test(open-pr): add test for rolling PR branch updates 2026-07-02 10:16:02 +02:00
6c4468cc12 test(raw-commit): add tests for quiet window behavior 2026-07-02 10:16:02 +02:00
eed2251c28 feat(raw-commit): implement raw file quiet window for ingest 2026-07-02 10:16:02 +02:00
bf657c3708 feat(raw-commit): introduce GENOME_PUSH_URL test seam 2026-07-02 10:16:02 +02:00
601277fce4 Merge branch 'release/1.11.1' into main 2026-07-02 10:15:45 +02:00
4f9fbdec8b Merge branch 'release/1.11.1' into develop 2026-07-02 10:15:45 +02:00
fd5c07043c Update version 2026-07-02 10:15:32 +02:00
c4aba8507c feat(n8n): add workflow for genome content pruning on raw/ file deletion 2026-07-01 19:53:57 +02:00
1c6d7a4ecd feat(n8n): add 'pi prune' command to n8n-pi-wrap 2026-07-01 19:37:14 +02:00
066db00e89 test(ingest): add tests for run-prune.sh 2026-07-01 19:37:14 +02:00
c0659d5ce9 feat(ingest): introduce run-prune.sh for orphaned source removal 2026-07-01 19:37:14 +02:00
101eef98aa feat(infra): enhance open-pr.sh for rolling PRs and custom branches 2026-07-01 19:37:14 +02:00
8082bc3003 test(ingest): add tests for index-append.py --remove 2026-07-01 19:37:14 +02:00
990118de71 feat(ingest): allow index-append.py to remove entries 2026-07-01 19:37:14 +02:00
95b3866549 feat(ingest-semantic.py): Add pre-flight context window check 2026-07-01 18:24:25 +02:00
111ffd266a refactor(genome-ingest): Use run-one-ingest and improve filtering 2026-07-01 18:24:25 +02:00
88aa6a0798 refactor: Remove old raw-commit workflow 2026-07-01 18:24:25 +02:00
79c4f6dde2 feat: Implement PR review directive workflow 2026-07-01 18:24:25 +02:00
047330b384 feat: Introduce run-one-ingest sub-workflow 2026-07-01 18:24:25 +02:00
c8b45d537c feat: Add global n8n error handling workflow 2026-07-01 18:24:25 +02:00
799cc1f2e0 Merge branch 'release/1.11.0' into main 2026-06-30 10:33:14 +02:00
e57b811956 Merge branch 'release/1.11.0' into develop 2026-06-30 10:33:14 +02:00
02c3a39101 Update version 2026-06-30 10:33:01 +02:00
19529531cf feat(ingest): Implement 'pi ingest-rework' command 2026-06-30 10:32:18 +02:00
c5f113de52 feat(ingest): Add raw source marker to PR description 2026-06-30 10:32:18 +02:00
06e877ec1d feat(ingest): Allow semantic ingest to accept feedback 2026-06-30 10:32:18 +02:00
3360388f00 doc: Add PR review guidelines to template 2026-06-30 10:32:18 +02:00
5fb2baedef trivial: Fix indentation in n8n-pi-wrap service script 2026-06-30 10:32:18 +02:00
bab4e987c3 Merge branch 'release/1.10.0' into main 2026-06-27 17:20:40 +02:00
896e2adfa4 Merge branch 'release/1.10.0' into develop 2026-06-27 17:20:40 +02:00
8d0e1b91cc Update version 2026-06-27 17:20:20 +02:00
f962a7fb13 fix: Implement retry logic for Ollama model API calls 2026-06-27 17:18:32 +02:00
3c9b24c3b2 feature: Make ingest log entries idempotent with stable run_id 2026-06-27 17:18:22 +02:00
13d08866ef feature: Add pi orphan-wiki command to detect unlinked pages 2026-06-27 17:18:15 +02:00
31340f9d19 refactor: Integrate shared clean_start into ingest preparation 2026-06-27 17:18:09 +02:00
e0a39d8a15 refactor: Extract git clean start logic into shared library 2026-06-27 17:18:02 +02:00
502fdcc3bd Merge branch 'release/1.9.1' into main 2026-06-27 14:44:07 +02:00
aaec7002d7 Merge branch 'release/1.9.1' into develop 2026-06-27 14:44:07 +02:00
bea1b70af4 Update version 2026-06-27 14:43:57 +02:00
f74f755d87 Add Python cache files to .gitignore 2026-06-27 14:42:56 +02:00
cdee98d7fa Merge branch 'release/1.9.0' into main 2026-06-27 12:17:16 +02:00
4349b7f2a2 Merge branch 'release/1.9.0' into develop 2026-06-27 12:17:16 +02:00
1b19a03971 Update version 2026-06-27 12:17:03 +02:00
32c722a6ae chore: Integrate pi pending-raw command into n8n-pi-wrap 2026-06-27 12:16:05 +02:00
918d632b41 feat: Implement pending-raw.sh to identify changed sources 2026-06-27 12:16:05 +02:00
0ff98e1ebd feat: Enhance ingest to track raw source path and SHA256 hash 2026-06-27 12:16:05 +02:00
e62ad0c831 feat: Add slug.sh --raw for deterministic raw file slugging 2026-06-27 12:16:05 +02:00
eeb2c6d48a Merge branch 'release/1.8.1' into main 2026-06-25 17:38:05 +02:00
64125d91b4 Merge branch 'release/1.8.1' into develop 2026-06-25 17:38:05 +02:00
5fb6a09a96 Update version 2026-06-25 17:37:44 +02:00
e33f4653f1 test: improve hermetic git repository setup in helpers 2026-06-25 17:36:56 +02:00
79f0ef9ac6 refactor(tests): improve formatting of private wiki instructions 2026-06-25 17:36:56 +02:00
69c189955b Merge branch 'release/1.8.0' into main 2026-06-25 13:09:21 +02:00
efc86e11a4 Merge branch 'release/1.8.0' into develop 2026-06-25 13:09:21 +02:00
1cb3da41c3 Update version 2026-06-25 13:09:10 +02:00
940eb49a9e refactor: Standardize n8n workflow file naming to kebab-case 2026-06-25 13:08:29 +02:00
13b6d47574 feat(n8n): Add scheduled Genome raw file committer 2026-06-25 12:59:34 +02:00
2e557ad48f feat(n8n): Implement automated Genome ingest workflow 2026-06-25 12:59:34 +02:00
4462d18866 feat(n8n): Add manual Genome ingest workflow (scratch) 2026-06-25 12:59:34 +02:00
c77a2b02b9 deploy: nexus: Add Syncthing folder marker for raw vault 2026-06-25 12:55:45 +02:00
5339a7018b Merge branch 'release/1.7.0' into main 2026-06-21 19:25:20 +02:00
bcfb618869 Merge branch 'release/1.7.0' into develop 2026-06-21 19:25:20 +02:00
52db07c9b1 Update version 2026-06-21 19:25:08 +02:00
23ede343df deploy/n8n: Improve robustness of diff base resolution for ingestion 2026-06-21 19:24:28 +02:00
a3de9f673f deploy/nexus: Refactor raw commit push strategy for robustness 2026-06-21 19:24:28 +02:00
813961544e Merge branch 'release/1.6.3' into main 2026-06-21 15:53:17 +02:00
d26fdc857a Merge branch 'release/1.6.3' into develop 2026-06-21 15:53:17 +02:00
0a98e5f2ba Update version 2026-06-21 15:53:04 +02:00
9ec3d9d785 feat: Add 'pi changed-raw' command to list changed raw files 2026-06-21 15:52:22 +02:00
0912ef457a Merge branch 'release/1.6.2' into main 2026-06-21 15:35:29 +02:00
05d7b99807 Merge branch 'release/1.6.2' into develop 2026-06-21 15:35:29 +02:00
acbfc8a715 Update version 2026-06-21 15:35:19 +02:00
29d9511e13 feat(deploy): Add git clean -fd to n8n-pi-wrap start command 2026-06-21 15:34:41 +02:00
d37c2d51c2 Merge branch 'release/1.6.1' into main 2026-06-21 14:34:30 +02:00
e711413257 Merge branch 'release/1.6.1' into develop 2026-06-21 14:34:30 +02:00
ec851e3caa Update version 2026-06-21 14:34:21 +02:00
9eeb340de4 Merge branch 'feat/vm101-ingest-wrapper' 2026-06-21 14:33:09 +02:00
n8n-bot
ea5bbe68b0 feat: Adds vm101 ingest wrapper and deploy tooling 2026-06-21 14:32:50 +02:00
cb75558724 Merge branch 'release/1.6.0' into main 2026-06-21 00:43:43 +02:00
b76e962fd1 Update version 2026-06-21 00:43:31 +02:00
7570613289 cleanup: Remove redundant comment and blank line 2026-06-21 00:42:42 +02:00
418ca57dc3 refactor: Translate script comments from Italian to English 2026-06-21 00:42:33 +02:00
49be5ada89 fix: Correctly close FORGEJO_WEB_BASE variable definition and clarify purpose 2026-06-21 00:42:27 +02:00
d44851b25a Merge branch 'release/1.5.1' into main 2026-06-21 00:06:05 +02:00
5e8b72a04f Merge branch 'release/1.5.1' into develop 2026-06-21 00:06:05 +02:00
a23b679a10 Update version 2026-06-21 00:05:54 +02:00
726a1e2ed4 feat: Enhance output with detailed JSON including file URLs 2026-06-21 00:05:06 +02:00
e1a00d2db7 feat: Add configurable Forgejo web base URL 2026-06-21 00:04:57 +02:00
fb96578987 refactor: Exclude Syncthing special files and streamline author logic 2026-06-21 00:04:52 +02:00
80fa4c8eda chore: Improve script documentation and initial comments 2026-06-21 00:04:47 +02:00
9d31ba69d8 Merge branch 'release/1.5.0' into main 2026-06-20 22:25:11 +02:00
a1de7ad954 Merge branch 'release/1.5.0' into develop 2026-06-20 22:25:11 +02:00
3d145574f0 Version update 2026-06-20 22:24:55 +02:00
a3acabb88f feat: Implement 'genome-raw-commit' automation script 2026-06-20 22:24:01 +02:00
add1cea732 feat: Implement 'ensure-genome-vault' management script 2026-06-20 22:24:01 +02:00
a9c5d56136 feat: Add genome Git ASKPASS helper 2026-06-20 22:24:01 +02:00
f5b7367f75 docs: Add Nexus deployment README 2026-06-20 22:24:01 +02:00
82c849c541 Merge branch 'release/1.4.0' into main 2026-06-19 11:28:43 +02:00
63a8dacb74 Merge branch 'release/1.4.0' into develop 2026-06-19 11:28:43 +02:00
6345507293 Update version 2026-06-19 11:28:31 +02:00
dfa1862ddf fix(ingest): Ollama call leggibile sugli errori + toggle INGEST_THINK; script eseguibili + permissions.bats 2026-06-19 11:21:39 +02:00
4ae1a3de5f Merge branch 'release/1.3.1' into main 2026-06-19 05:53:48 +02:00
a18a04a38c Merge branch 'release/1.3.1' into develop 2026-06-19 05:53:48 +02:00
b0de520f9d Update version 2026-06-19 05:53:36 +02:00
e08248a15c docs: Add status note for planned cross-genome references skill 2026-06-19 05:49:41 +02:00
1788c40ad1 docs: Detail duplicate-slug advisory in AI ingest linting process 2026-06-19 05:49:41 +02:00
6615d9b1d6 docs: Clarify genome naming convention and registry setup in README 2026-06-19 05:49:41 +02:00
5491e807e0 docs: Clarify ingest pipeline roles and automation 2026-06-19 05:47:26 +02:00
593819451e docs: Update tests README for duplicate slug advisory 2026-06-19 05:45:06 +02:00
8e4f0386c6 test: Add comprehensive tests for duplicate slug advisory and helpers 2026-06-19 05:45:06 +02:00
a843c30a9e feat: Integrate duplicate slug advisory into ingest linting workflow 2026-06-19 05:45:06 +02:00
f703498fd9 feat: Add Levenshtein distance, string similarity, and duplicate slug advisory 2026-06-19 05:45:06 +02:00
b808f0fc8f refactor: Update script header documentation and remove outdated comments 2026-06-19 05:41:53 +02:00
91e443ad16 feat: Enhance AI model prompt and schema for richer content and de-duplication 2026-06-19 05:41:53 +02:00
cdab1e089e feat: Implement shared page generation helpers and frontmatter title support 2026-06-19 05:41:53 +02:00
4b99b0acd2 Merge branch 'release/1.3.0' into main 2026-06-18 15:28:19 +02:00
ed63895fea Merge branch 'release/1.3.0' into develop 2026-06-18 15:28:19 +02:00
b7b5da0c3b Update version 2026-06-18 15:28:05 +02:00
e396bc93e2 refactor(ingest): Update run-ingest.sh for new semantic pipeline 2026-06-18 15:27:02 +02:00
fdd7e1e92b feat(ingest): Implement 'light' semantic ingest with ingest-semantic.py 2026-06-18 15:27:02 +02:00
15fc829e46 Merge branch 'release/1.2.5' into main 2026-06-12 17:39:41 +02:00
d207a0fc91 Merge branch 'release/1.2.5' into develop 2026-06-12 17:39:41 +02:00
2c2318936d Update version 2026-06-12 17:38:57 +02:00
f887319450 Merge branch 'release/1.2.5' into main 2026-06-12 17:37:08 +02:00
80c5cc753f chore: Prevent empty commits when registering submodules 2026-06-12 17:36:41 +02:00
d79177fd3f Merge branch 'release/1.2.4' into main 2026-06-12 16:18:16 +02:00
48c107e59c Merge branch 'release/1.2.4' into develop 2026-06-12 16:18:16 +02:00
5792809333 Update version 2026-06-12 16:18:03 +02:00
a0a12cfa71 refactor: Reorder gcrypt key export and instructions 2026-06-12 16:17:23 +02:00
782c7cebf4 Merge branch 'release/1.2.3' into main 2026-06-12 16:07:38 +02:00
8f2e7da157 Merge branch 'release/1.2.3' into develop 2026-06-12 16:07:38 +02:00
77d16586ab Update version 2026-06-12 16:07:31 +02:00
009bd83977 fix(pre-commit): Enhance git-crypt check and simplify error output 2026-06-12 16:06:55 +02:00
58147ae99c Merge branch 'release/1.2.2' into main 2026-06-12 12:17:21 +02:00
7dc97bd349 Merge branch 'release/1.2.2' into develop 2026-06-12 12:17:21 +02:00
3e8af163c0 Update version 2026-06-12 12:17:11 +02:00
eeafddbf1a feat: Automatically register new genomes as submodules 2026-06-12 12:16:23 +02:00
813924460d feat: Improve pre-commit hook installation robustness 2026-06-12 12:16:23 +02:00
c81b7e4185 Merge branch 'release/1.2.1' into main 2026-06-12 11:47:26 +02:00
ec7d3b89cd Merge branch 'release/1.2.1' into develop 2026-06-12 11:47:26 +02:00
ca1943471f Update version 2026-06-12 11:47:15 +02:00
261fdc56bf feat(setup-genomes): Enable auto_init when creating genome repos 2026-06-12 11:46:35 +02:00
6ef9269df1 feat(forgejo): Add auto_init parameter to provider_create_repo 2026-06-12 11:46:35 +02:00
d5c155b82f Merge branch 'release/1.2.0' into main 2026-06-12 11:04:55 +02:00
e169c09015 Merge branch 'release/1.2.0' into develop 2026-06-12 11:04:54 +02:00
f24436f583 Update version 2026-06-12 11:04:42 +02:00
fa7b3eed82 chore: General cleanup and minor configuration adjustments 2026-06-12 10:49:10 +02:00
1717752ab3 docs: Update cross-genome linking strategy documentation 2026-06-12 10:49:10 +02:00
6558ee486a feat: Add 'cross_source' property to genome registry 2026-06-12 10:49:10 +02:00
b48c99b0ff feat: Standardize WORK_DIR and master repository interactions 2026-06-12 10:49:10 +02:00
d5430928b3 Merge branch 'release/1.1.6' into main 2026-06-11 10:25:21 +02:00
1536262acf Merge branch 'release/1.1.6' into develop 2026-06-11 10:25:21 +02:00
2f3173e178 Merge branch 'release/1.1.5' into main 2026-06-09 19:45:31 +02:00
5ee87f4187 Merge branch 'release/1.1.4' into main 2026-06-09 18:19:43 +02:00
cfd684fded Merge branch 'release/1.1.3' into main 2026-06-09 12:37:58 +02:00
7c5f50243b Merge branch 'release/1.1.2' into main 2026-06-05 15:37:27 +02:00
bcba29fc76 Merge branch 'release/1.1.1' into main 2026-06-05 12:21:20 +02:00
66346f5f4e Merge branch 'release/1.0.0' into main 2026-05-11 18:58:07 +02:00
e1605124dc Merge branch 'release/0.3.0' into main 2026-05-10 22:15:01 +02:00
509ea78d66 Merge branch 'release/0.2.0' into main 2026-05-10 11:29:01 +02:00
fedda12919 Merge branch 'release/0.1.0' into main 2026-05-08 21:11:38 +02:00
58 changed files with 4414 additions and 215 deletions

8
.gitignore vendored
View file

@ -1,4 +1,12 @@
# VS Code — only shared workspace settings # VS Code — only shared workspace settings
.vscode/* .vscode/*
!.vscode/
!.vscode/settings.json !.vscode/settings.json
!.vscode/extensions.json !.vscode/extensions.json
# framework
/master-knowledge-genome/
/keys/
*.key
__pycache__/
*.pyc

View file

@ -1,5 +1,5 @@
# ============================================================================= # =============================================================================
# Knowledge Genome - Makefile v. 1.1.6 # Knowledge Genome - Makefile v. 1.13.0
# Orchestrates the setup and management of the knowledge base. # Orchestrates the setup and management of the knowledge base.
# ============================================================================= # =============================================================================
@ -11,7 +11,7 @@ export $(shell grep -v '^[#[:space:]]' globals.env | sed 's/=.*//')
help: help:
@echo "Available commands:" @echo "Available commands:"
@echo " make setup - Full system initialization" @echo " make setup - Full system initialization"
@echo " make add-genome - Register and scaffold a new genome [LINKED=owner/repo]" @echo " make add-genome - Register and scaffold a new genome [LINKED=owner/repo] [CROSS=yes|no]"
@echo " make status - Check submodule and encryption status" @echo " make status - Check submodule and encryption status"
@echo " make lint - Verify schema, privacy flags, and metadata" @echo " make lint - Verify schema, privacy flags, and metadata"
@echo " make verify-structure - Report directory drift across all genomes" @echo " make verify-structure - Report directory drift across all genomes"
@ -30,16 +30,17 @@ setup:
add-genome: add-genome:
@if [ -z "$(NAME)" ] || [ -z "$(DESC)" ]; then \ @if [ -z "$(NAME)" ] || [ -z "$(DESC)" ]; then \
echo "Error: NAME and DESC are required."; \ echo "Error: NAME and DESC are required."; \
echo "Usage: make add-genome NAME=my-genome DESC='My description' [LINKED=owner/project-repo]"; \ echo "Usage: make add-genome NAME=my-genome DESC='My description' [LINKED=owner/project-repo] [CROSS=yes|no]"; \
exit 1; \ exit 1; \
fi fi
@bash scripts/add-genome.sh "$(NAME)" "$(DESC)" "$(LINKED)" @bash scripts/add-genome.sh "$(NAME)" "$(DESC)" "$(LINKED)" "$(or $(CROSS),no)"
status: status:
@[ -d "$(MASTER_REPO)" ] || { echo "Master non trovato. Esegui 'make setup'."; exit 1; }
@echo "--- Master Status ---" @echo "--- Master Status ---"
@git submodule status @cd $(MASTER_REPO) && git submodule status
@echo "--- Encryption Status (per genome) ---" @echo "--- Encryption Status (per genome) ---"
@git submodule foreach 'git-crypt status 2>/dev/null | head -n 10 || true' @cd $(MASTER_REPO) && git submodule foreach 'git-crypt status 2>/dev/null | head -n 10 || true'
verify-structure: verify-structure:
@bash scripts/verify-genomes.sh @bash scripts/verify-genomes.sh
@ -62,14 +63,16 @@ doctor:
@echo "System ready." @echo "System ready."
sync: sync:
@[ -d "$(MASTER_REPO)" ] || { echo "Master non trovato. Esegui 'make setup'."; exit 1; }
@echo "Syncing submodules..." @echo "Syncing submodules..."
@git submodule update --init --recursive @cd $(MASTER_REPO) && git submodule update --init --recursive
@echo "--- Unpushed commits per genome ---" @echo "--- Unpushed commits per genome ---"
@git submodule foreach 'git log --oneline @{u}.. 2>/dev/null | head -5 || true' @cd $(MASTER_REPO) && git submodule foreach 'git log --oneline @{u}.. 2>/dev/null | head -5 || true'
lock: lock:
@[ -d "$(MASTER_REPO)" ] || { echo "Master non trovato. Esegui 'make setup'."; exit 1; }
@echo "Locking master repository..." @echo "Locking master repository..."
@git-crypt lock 2>/dev/null || true @cd $(MASTER_REPO) && git-crypt lock 2>/dev/null || true
@echo "Locking all submodules..." @echo "Locking all submodules..."
@git submodule foreach 'git-crypt lock 2>/dev/null || true' @cd $(MASTER_REPO) && git submodule foreach 'git-crypt lock 2>/dev/null || true'
@echo "All genomes securely locked." @echo "All genomes securely locked."

View file

@ -77,6 +77,11 @@ master-knowledge-genome/ ← Root orchestrator (submodule registry)
└── AGENTS.md ← Global coordination schema (cross-genome rules) └── AGENTS.md ← Global coordination schema (cross-genome rules)
``` ```
> The genome names above (`genome-dev`, `genome-finance`, `genome-homelab`) are
> **illustrative** — they show the kind of multi-domain layout this orchestrator targets.
> The shipped `registry.sh` defines a single disposable sandbox, **`genome-test`**; you
> create real genomes yourself with `make add-genome` (see the registry examples below).
Each genome is an independent git repository: Each genome is an independent git repository:
```text ```text
@ -175,6 +180,11 @@ knowledge-genome-orchestrator/ ← This repository (setup tooling)
> The `skills/ingest/` directory is version-controlled here but **deployed** to the AI > The `skills/ingest/` directory is version-controlled here but **deployed** to the AI
> node (vm101) under `~/.pi/agent/skills/ingest`. The agent (`pi`) does only semantic work > node (vm101) under `~/.pi/agent/skills/ingest`. The agent (`pi`) does only semantic work
> and writes a manifest; `run-ingest.sh` does the mechanical steps. See [Workflows → Ingest](#ingest). > and writes a manifest; `run-ingest.sh` does the mechanical steps. See [Workflows → Ingest](#ingest).
>
> ingest-semantic.py: one schema-constrained call to local model, returns JSON. run-ingest.sh: index/log/lint/PR.
> Semantic JSON extraction → deterministic wiki conform + manifest.
>
> cp skills/ingest/\* ~/.pi/agent/skills/ingest/ after make setup. Updated via git pull on laptop, pushed to vm101 via SSH in n8n flow.
--- ---
@ -330,13 +340,14 @@ WORK_DIR="${HOME}/knowledge-genome-orchestrator"
KEYS_DIR="${WORK_DIR}/keys" KEYS_DIR="${WORK_DIR}/keys"
# Genome registry — format: "name|description|linked_repo" # Genome registry — format: "name|description|linked_repo"
# The third field is OPTIONAL: # The third and fourth fields are OPTIONAL:
# - leave it empty → knowledge-only genome (no linked project) # - leave it empty → knowledge-only genome (no linked project)
# - owner/repo → genome is linked to that project repository (rendered into AGENTS.md) # - owner/repo → genome is linked to that project repository (rendered into AGENTS.md)
# - cross_source → yes|no (default no): whether the cross-genome collector may read this genome as a source
GENOMES=( GENOMES=(
"genome-dev|Web development, TUI, Angular, software architecture|myorg/my-app" "genome-dev|Web development, TUI, Angular, software architecture|myorg/my-app|no"
"genome-finance|Personal finance, investments, market analysis|" "genome-finance|Personal finance, investments, market analysis||no"
"genome-homelab|Infrastructure, network configs, architecture logs|" "genome-homelab|Infrastructure, network configs, architecture logs||no"
) )
``` ```
@ -692,7 +703,9 @@ If a key is lost or compromised:
```bash ```bash
# From the knowledge-genome-orchestrator/ directory # From the knowledge-genome-orchestrator/ directory
source lib/git-crypt.sh source lib/git-crypt.sh
cd ~/knowledge-genome-orchestrator/genome-dev # If gcrypt_rotate_key operates on the CWD: cd into .../master-knowledge-genome/genome-dev
# If it navigates by name instead: cd into .../master-knowledge-genome
cd ~/knowledge-genome-orchestrator/master-knowledge-genome
gcrypt_rotate_key "genome-dev" gcrypt_rotate_key "genome-dev"
``` ```
@ -804,7 +817,10 @@ model must not waste context on:
8. Appends the `INGEST | <slug>` entry to `wiki/log.md` (the model name comes from the 8. Appends the `INGEST | <slug>` entry to `wiki/log.md` (the model name comes from the
orchestrator via `INGEST_MODEL` — the agent cannot reliably know its own tag) orchestrator via `INGEST_MODEL` — the agent cannot reliably know its own tag)
9. Runs scoped lint on exactly the pages touched this run (`scoped-lint.sh`, reusing 9. Runs scoped lint on exactly the pages touched this run (`scoped-lint.sh`, reusing
`lib/lint.sh`) `lib/lint.sh`), including a **duplicate-slug advisory**: a slug created this run that is
highly similar to an entity/concept already in `wiki/index.md` is flagged in the PR so a
human can merge them. It is advisory only — it never fails the lint or blocks the PR
(threshold tunable via `KG_DUP_THRESHOLD`, default 70)
10. Commits **only `wiki/`** on `feat/ai-ingest-<slug>` and opens a PR against the integration 10. Commits **only `wiki/`** on `feat/ai-ingest-<slug>` and opens a PR against the integration
base (`INGEST_BASE`, default `main`); the body matches the `templates/pr-description.md` base (`INGEST_BASE`, default `main`); the body matches the `templates/pr-description.md`
structure (Summary / Pages / Contradictions / Scoped Lint) structure (Summary / Pages / Contradictions / Scoped Lint)
@ -957,6 +973,10 @@ The agent proposes re-validation but does not change `maturity` without new sour
### Cross-genome references ### Cross-genome references
> **Status: planned.** The cross-genome collector and **navigation skill** described in this
> section are specified but **not yet implemented** in this release — only the `ingest` skill
> ships today. What follows documents the intended design and the boundary contract it will honour.
Cross-domain knowledge moves by **pull, never push**: the genome you are working in draws Cross-domain knowledge moves by **pull, never push**: the genome you are working in draws
material _in_; nothing is ever written into another genome. There are **no cross-genome material _in_; nothing is ever written into another genome. There are **no cross-genome
wikilinks** — submodule pointers make relative paths brittle. wikilinks** — submodule pointers make relative paths brittle.
@ -1059,7 +1079,7 @@ grep "^## \[" wiki/log.md | grep "CONFLICT" # All conflicts
grep "^## \[2026-05" wiki/log.md # Entries from a specific month grep "^## \[2026-05" wiki/log.md # Entries from a specific month
``` ```
The orchestrator always injects only `tail -n 20 wiki/log.md` into agent context. ingest-semantic.py receives source text + existing entity/concept names (from index) as prompt context.
The LLM never loads the full log. The LLM never loads the full log.
--- ---
@ -1119,6 +1139,8 @@ Note: `.obsidian/` is in `.gitignore`. Workspace and plugin settings are local
### n8n automation ### n8n automation
n8n → SSH → ingest-semantic.py <genome> <raw> → run-ingest.sh <genome>.
n8n (running on the storage node) can automate the ingest pipeline: n8n (running on the storage node) can automate the ingest pipeline:
1. Forgejo webhook fires on push to a genome's `raw/` directory 1. Forgejo webhook fires on push to a genome's `raw/` directory

View file

@ -0,0 +1,773 @@
{
"name": "Genome: PR review",
"nodes": [
{
"parameters": {
"httpMethod": "POST",
"path": "forgejo-pr-review-23319ab8687b16f10e0f278fb920c112",
"options": {}
},
"id": "58df1ca9-e48e-4834-b231-d97c974cd01b",
"name": "Webhook PR Review",
"type": "n8n-nodes-base.webhook",
"typeVersion": 2.1,
"position": [
2272,
1344
],
"webhookId": "61ff3a5baa304571"
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// THE only parser of the review side: parse the directive, VALIDATE, prepare the rework payload.\n// Security: only allow-listed maintainers may drive the gate; destructive directives require a\n// feat/ai-ingest-* branch on the expected base; raw_source is recovered from a machine-readable\n// marker that run-ingest.sh writes into the PR body.\nconst ALLOWED_SENDERS = ['Keru']; // <-- maintainers allowed to issue directives\nconst BASE = 'develop';\n\n// n8n Run Once for Each Item: $json is the current webhook payload\nconst j = $json.body || $json;\nif (!j || typeof j !== 'object') {\n return { directive: 'INVALID', reason: 'malformed webhook payload' };\n}\n\nconst review = j.review || null;\nconst comment = j.comment || null;\nconst pr = j.pull_request || j.issue || null;\n\n// Extract directive text from review content or comment body\nconst body = String(\n (review && review.content) ||\n (comment && comment.body) ||\n ''\n);\nconst sender = String((j.sender && j.sender.login) || 'unknown');\n\n// Match directive at the start of the text (case-insensitive)\nconst m = body.match(/^\\s*(REWORK|RESTART|REVERT\\s+\\d+|SPLIT|REJECT|MERGE)\\s*:?/i);\nif (!m) return { directive: 'NONE' };\n\nconst headTok = m[1].toUpperCase().replace(/\\s+/g, ' ');\nconst directive = headTok.startsWith('REVERT') ? 'REVERT' : headTok;\nconst feedback = body.slice(m[0].length).trim() || '(nessun dettaglio fornito)';\n\n// Extract PR metadata safely\nconst prNumber = (pr && pr.number) || null;\nconst branch = (pr && pr.head && pr.head.ref) || null;\nconst base = (pr && pr.base && pr.base.ref) || null;\nconst repo = (pr && pr.base && pr.base.repo && pr.base.repo.name) ||\n (j.repository && j.repository.name) || null;\nconst owner = (pr && pr.base && pr.base.repo && pr.base.repo.owner && pr.base.repo.owner.login) ||\n (j.repository && j.repository.owner && j.repository.owner.login) || null;\nconst prBody = (pr && pr.body) || (j.issue && j.issue.body) || '';\n\n// Recover raw_source from machine-readable marker: <!-- kg:raw=path -->\n// Restricted to valid path characters, no spaces, no HTML breaking\nconst rawMatch = prBody.match(/<!--\\s*kg:raw=([^\\s>]+)\\s*-->/);\nconst raw = rawMatch ? rawMatch[1] : null;\n\n// REVERT is reserved for future Step 7 implementation\nif (directive === 'REVERT') {\n return { directive: 'NONE', note: 'REVERT reserved for Step 7' };\n}\n\n// Authorization gate\nif (!ALLOWED_SENDERS.includes(sender)) {\n return {\n directive: 'UNAUTHORIZED',\n attempted: directive,\n sender,\n prNumber,\n owner,\n repo\n };\n}\n\n// Validation rules\nconst okGenome = !!repo && /^[a-z0-9][a-z0-9-]{0,63}$/.test(repo);\nconst okPr = !!prNumber && /^[0-9]+$/.test(String(prNumber));\nconst okBranch = !!branch && /^feat\\/ai-ingest-[a-z0-9-]+$/.test(branch);\nconst okBase = base === BASE;\nconst okRaw = (directive === 'MERGE')\n ? true\n : (!!raw && raw.startsWith('raw/') && !raw.includes('..') && /^[A-Za-z0-9._\\/-]+$/.test(raw));\n\nif (!okGenome || !okPr || !okBase || (directive !== 'MERGE' && !okBranch) || !okRaw) {\n return {\n directive: 'INVALID',\n attempted: directive,\n prNumber,\n owner,\n repo,\n why: { okGenome, okPr, okBranch, okBase, okRaw }\n };\n}\n\n// Encode feedback for safe transport through SSH/scripts\nconst feedback_b64 = Buffer.from(feedback, 'utf8').toString('base64');\n\nreturn {\n directive,\n prNumber,\n branch,\n base,\n repo,\n owner,\n sender,\n raw,\n feedback,\n feedback_b64\n};"
},
"id": "c668f595-0a28-4bd3-9125-22fee9350d78",
"name": "Parse & validate",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
2496,
1344
]
},
{
"parameters": {
"rules": {
"values": [
{
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"leftValue": "={{ $json.directive }}",
"rightValue": "MERGE",
"operator": {
"type": "string",
"operation": "equals"
},
"id": "4960f0868bc54687"
}
],
"combinator": "and"
}
},
{
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"leftValue": "={{ $json.directive }}",
"rightValue": "REWORK",
"operator": {
"type": "string",
"operation": "equals"
},
"id": "34002fdd92834d38"
}
],
"combinator": "and"
}
},
{
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"leftValue": "={{ $json.directive }}",
"rightValue": "RESTART",
"operator": {
"type": "string",
"operation": "equals"
},
"id": "d412a74e32ac4f0c"
}
],
"combinator": "and"
}
},
{
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"leftValue": "={{ $json.directive }}",
"rightValue": "SPLIT",
"operator": {
"type": "string",
"operation": "equals"
},
"id": "c0810b33fa474ca0"
}
],
"combinator": "and"
}
},
{
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"leftValue": "={{ $json.directive }}",
"rightValue": "REJECT",
"operator": {
"type": "string",
"operation": "equals"
},
"id": "531039e699c44cea"
}
],
"combinator": "and"
}
},
{
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"leftValue": "={{ $json.directive }}",
"rightValue": "UNAUTHORIZED",
"operator": {
"type": "string",
"operation": "equals"
},
"id": "cfbd691d2e9a4c2a"
}
],
"combinator": "and"
}
},
{
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"leftValue": "={{ $json.directive }}",
"rightValue": "INVALID",
"operator": {
"type": "string",
"operation": "equals"
},
"id": "251f5b7beea6424a"
}
],
"combinator": "and"
}
}
]
},
"options": {
"fallbackOutput": "none"
}
},
"id": "489736cc-bab6-4664-8087-91b6d9ff31ad",
"name": "Switch",
"type": "n8n-nodes-base.switch",
"typeVersion": 3.4,
"position": [
2736,
1344
]
},
{
"parameters": {
"method": "POST",
"url": "=https://git.keruhomelab.com/api/v1/repos/{{ $('Parse & validate').first().json.owner }}/{{ $('Parse & validate').first().json.repo }}/pulls/{{ $('Parse & validate').first().json.prNumber }}/merge",
"authentication": "genericCredentialType",
"genericAuthType": "httpHeaderAuth",
"sendBody": true,
"specifyBody": "json",
"jsonBody": "={\n \"Do\": \"merge\"\n}",
"options": {
"timeout": 15000
}
},
"id": "3440cb8d-ae4c-4523-ae13-ee5667d24252",
"name": "Forgejo Merge PR",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
2976,
1104
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
}
}
},
{
"parameters": {
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "loose",
"version": 2
},
"conditions": [
{
"id": "cc369b5fc3d246a4",
"leftValue": "={{ $('Parse & validate').first().json.branch }}",
"rightValue": "feat/ai-ingest-",
"operator": {
"type": "string",
"operation": "startsWith"
}
}
],
"combinator": "and"
},
"options": {}
},
"id": "e6d45fce-83d0-44ca-9fa4-86558fec1a0f",
"name": "Guardia feat/",
"type": "n8n-nodes-base.if",
"typeVersion": 2.2,
"position": [
2976,
1328
]
},
{
"parameters": {
"method": "PATCH",
"url": "=https://git.keruhomelab.com/api/v1/repos/{{ $('Parse & validate').first().json.owner }}/{{ $('Parse & validate').first().json.repo }}/pulls/{{ $('Parse & validate').first().json.prNumber }}",
"authentication": "genericCredentialType",
"genericAuthType": "httpHeaderAuth",
"sendBody": true,
"specifyBody": "json",
"jsonBody": "={\n \"state\": \"closed\"\n}",
"options": {
"timeout": 15000
}
},
"id": "1601f705-c758-4df6-a3bd-e3ac2e202c94",
"name": "Forgejo Close PR",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
3200,
1296
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
}
}
},
{
"parameters": {
"method": "DELETE",
"url": "=https://git.keruhomelab.com/api/v1/repos/{{ $('Parse & validate').first().json.owner }}/{{ $('Parse & validate').first().json.repo }}/branches/{{ encodeURIComponent($('Parse & validate').first().json.branch) }}",
"authentication": "genericCredentialType",
"genericAuthType": "httpHeaderAuth",
"options": {
"timeout": 15000
}
},
"id": "c2ff2247-efe1-4809-a435-9973188d61bb",
"name": "Forgejo Delete Branch",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
3424,
1296
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
}
}
},
{
"parameters": {
"conditions": {
"options": {
"caseSensitive": true,
"leftValue": "",
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"id": "55cf6c2a6c7d4d79",
"leftValue": "={{ $('Parse & validate').first().json.directive }}",
"rightValue": "REJECT",
"operator": {
"type": "string",
"operation": "equals"
}
}
],
"combinator": "and"
},
"options": {}
},
"id": "a1dbbc06-555d-4a1d-8fbf-ee75f617e98a",
"name": "E' REJECT?",
"type": "n8n-nodes-base.if",
"typeVersion": 2.2,
"position": [
3648,
1296
]
},
{
"parameters": {
"workflowId": {
"__rl": true,
"value": "zbtRXWsLt56nEIfz",
"mode": "list",
"cachedResultUrl": "/workflow/zbtRXWsLt56nEIfz",
"cachedResultName": "Power Manager"
},
"workflowInputs": {
"mappingMode": "defineBelow",
"value": {
"mode": "ensure-on"
},
"matchingColumns": [
"mode"
],
"schema": [
{
"id": "mode",
"displayName": "mode",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
}
],
"attemptToConvertTypes": false,
"convertFieldsToString": true
},
"options": {}
},
"id": "7fc3e648-4712-4eef-a6f3-12c8805ade1f",
"name": "Power Manager - ensure-on",
"type": "n8n-nodes-base.executeWorkflow",
"typeVersion": 1.3,
"position": [
3648,
1168
]
},
{
"parameters": {
"workflowId": {
"__rl": true,
"value": "VIi2ovb5gJxNJLbg",
"mode": "list",
"cachedResultUrl": "/workflow/VIi2ovb5gJxNJLbg",
"cachedResultName": "Genome: run-one-ingest"
},
"workflowInputs": {
"mappingMode": "defineBelow",
"value": {
"genome": "={{ $('Parse & validate').first().json.repo }}",
"raw": "={{ $('Parse & validate').first().json.raw }}",
"mode": "rework",
"feedback_b64": "={{ $('Parse & validate').first().json.feedback_b64 }}",
"reason": "={{ $('Parse & validate').first().json.directive }}",
"prevPr": "={{ String($('Parse & validate').first().json.prNumber || '') }}"
},
"matchingColumns": [],
"schema": [
{
"id": "genome",
"displayName": "genome",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "raw",
"displayName": "raw",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "mode",
"displayName": "mode",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "feedback_b64",
"displayName": "feedback_b64",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "reason",
"displayName": "reason",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "prevPr",
"displayName": "prevPr",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
}
],
"attemptToConvertTypes": false,
"convertFieldsToString": true
},
"options": {
"waitForSubWorkflow": false
}
},
"id": "9704c050-5c63-49fd-a26d-efbae9d92175",
"name": "Run one ingest (rework)",
"type": "n8n-nodes-base.executeWorkflow",
"typeVersion": 1.3,
"position": [
3856,
1168
]
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// merged (MERGE) / closed (REJECT). The HTTP node replaced $json with the API response,\n// so we read context from the parser via node reference (single review -> .first() is safe).\n// Fallback values prevent crashes if the parser node is unreachable.\nconst p = $('Parse & validate').first().json || {};\nconst repo = p.repo || 'unknown';\nconst owner = p.owner || 'unknown';\nconst prNumber = p.prNumber || '?';\nconst base = p.base || 'develop';\nconst branch = p.branch || 'unknown';\nconst sender = p.sender || 'unknown';\nconst directive = p.directive || 'UNKNOWN';\nconst feedback = p.feedback || '';\n\nconst repoUrl = (owner && repo && repo !== 'unknown')\n ? `https://git.keruhomelab.com/${owner}/${repo}`\n : '';\nconst prUrl = (repoUrl && prNumber !== '?')\n ? `${repoUrl}/pulls/${prNumber}`\n : '';\n\nlet n;\nif (directive === 'MERGE') {\n n = {\n topic: 'genome-ingest',\n title: `${repo} · PR #${prNumber} mergiata`,\n priority: 'default',\n tags: 'twisted_rightwards_arrows',\n click: prUrl,\n actions: `view, Vedi la PR, ${prUrl}`,\n body: `PR #${prNumber} mergiata su \\`${base}\\` da **${sender}**.`\n };\n} else {\n n = {\n topic: 'genome-ingest',\n title: `${repo} · PR #${prNumber} chiusa`,\n priority: 'default',\n tags: 'wastebasket',\n click: repoUrl,\n actions: '',\n body: `**REJECT** di **${sender}**: PR #${prNumber} chiusa e branch \\`${branch}\\` rimosso. Nessun nuovo tentativo.\\n> ${feedback}`\n };\n}\n\nreturn n;"
},
"id": "1ce634fd-d402-4a84-9ba1-04673ddffce9",
"name": "Build ntfy action",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
3856,
1344
]
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// Security / near-miss: unauthorized sender, invalid directive, or the feat/ guard.\n// On all three paths Switch/Guardia pass the parser output through, so $json carries the directive + context.\nconst d = $json || {};\nconst directive = d.directive || 'UNKNOWN';\nconst attempted = d.attempted || directive;\nconst sender = d.sender || 'unknown';\nconst prNumber = d.prNumber || '?';\nconst branch = d.branch || 'unknown';\nconst owner = d.owner || '';\nconst repo = d.repo || '';\n\nconst repoUrl = (owner && repo) ? `https://git.keruhomelab.com/${owner}/${repo}` : '';\n\nlet n;\nif (directive === 'UNAUTHORIZED') {\n n = {\n topic: 'genome-ingest',\n title: `Sicurezza · direttiva non autorizzata`,\n priority: 'high',\n tags: 'no_entry',\n click: repoUrl,\n actions: '',\n body: `**${sender}** ha tentato \\`${attempted}\\` su PR #${prNumber}, ma non è tra i maintainer autorizzati. **Nessuna azione** eseguita.`\n };\n} else if (directive === 'INVALID') {\n n = {\n topic: 'genome-ingest',\n title: `Direttiva non applicata`,\n priority: 'low',\n tags: 'information_source',\n click: repoUrl,\n actions: '',\n body: `\\`${attempted}\\` su PR #${prNumber} ignorata: precondizioni non soddisfatte (branch / base / marker raw).`\n };\n} else {\n // Guardia feat/ false branch: destructive action on a non-feat/ai-ingest-* branch\n n = {\n topic: 'genome-ingest',\n title: `Sicurezza · branch protetto`,\n priority: 'high',\n tags: 'no_entry',\n click: repoUrl,\n actions: '',\n body: `Rifiutata azione distruttiva (\\`${attempted || directive}\\`) sul branch \\`${branch}\\`: non è un \\`feat/ai-ingest-*\\`. **Nessuna modifica.**`\n };\n}\n\nreturn n;"
},
"id": "32b16592-5126-4cc2-a3f2-d1bda58ac724",
"name": "Build ntfy sicurezza",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
3200,
1536
]
},
{
"parameters": {
"method": "POST",
"url": "=http://ntfy/{{ $json.topic }}",
"authentication": "genericCredentialType",
"genericAuthType": "httpBearerAuth",
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Title",
"value": "={{ $json.title }}"
},
{
"name": "Priority",
"value": "={{ $json.priority }}"
},
{
"name": "Tags",
"value": "={{ $json.tags }}"
},
{
"name": "Click",
"value": "={{ $json.click }}"
},
{
"name": "Actions",
"value": "={{ $json.actions }}"
},
{
"name": "Markdown",
"value": "yes"
}
]
},
"sendBody": true,
"contentType": "raw",
"rawContentType": "Raw / Text",
"body": "={{ $json.body }}",
"options": {
"timeout": 15000
}
},
"id": "4d45b486-de42-4c7f-be21-b5bfbc05fd44",
"name": "ntfy: send",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
4080,
1424
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
},
"httpBearerAuth": {
"id": "nCv4CUN7Ef086Ewj",
"name": "Bearer Auth account"
}
}
}
],
"pinData": {},
"connections": {
"Webhook PR Review": {
"main": [
[
{
"node": "Parse & validate",
"type": "main",
"index": 0
}
]
]
},
"Parse & validate": {
"main": [
[
{
"node": "Switch",
"type": "main",
"index": 0
}
]
]
},
"Switch": {
"main": [
[
{
"node": "Forgejo Merge PR",
"type": "main",
"index": 0
}
],
[
{
"node": "Power Manager - ensure-on",
"type": "main",
"index": 0
}
],
[
{
"node": "Guardia feat/",
"type": "main",
"index": 0
}
],
[
{
"node": "Guardia feat/",
"type": "main",
"index": 0
}
],
[
{
"node": "Guardia feat/",
"type": "main",
"index": 0
}
],
[
{
"node": "Build ntfy sicurezza",
"type": "main",
"index": 0
}
],
[
{
"node": "Build ntfy sicurezza",
"type": "main",
"index": 0
}
]
]
},
"Forgejo Merge PR": {
"main": [
[
{
"node": "Build ntfy action",
"type": "main",
"index": 0
}
]
]
},
"Guardia feat/": {
"main": [
[
{
"node": "Forgejo Close PR",
"type": "main",
"index": 0
}
],
[
{
"node": "Build ntfy sicurezza",
"type": "main",
"index": 0
}
]
]
},
"Forgejo Close PR": {
"main": [
[
{
"node": "Forgejo Delete Branch",
"type": "main",
"index": 0
}
]
]
},
"Forgejo Delete Branch": {
"main": [
[
{
"node": "E' REJECT?",
"type": "main",
"index": 0
}
]
]
},
"E' REJECT?": {
"main": [
[
{
"node": "Build ntfy action",
"type": "main",
"index": 0
}
],
[
{
"node": "Power Manager - ensure-on",
"type": "main",
"index": 0
}
]
]
},
"Power Manager - ensure-on": {
"main": [
[
{
"node": "Run one ingest (rework)",
"type": "main",
"index": 0
}
]
]
},
"Build ntfy action": {
"main": [
[
{
"node": "ntfy: send",
"type": "main",
"index": 0
}
]
]
},
"Build ntfy sicurezza": {
"main": [
[
{
"node": "ntfy: send",
"type": "main",
"index": 0
}
]
]
}
},
"active": true,
"settings": {
"executionOrder": "v1",
"binaryMode": "separate",
"timeSavedMode": "fixed",
"errorWorkflow": "7Vws3gCX3QnjM3oD",
"callerPolicy": "workflowsFromSameOwner",
"availableInMCP": false
},
"versionId": "22998a54-cd9a-4b57-9c80-df97085a997c",
"meta": {
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
},
"id": "iho7kFQsXbGIxG7P",
"tags": []
}

View file

@ -0,0 +1,170 @@
{
"name": "Genome: ingest MANUALE (scratch)",
"nodes": [
{
"parameters": {},
"type": "n8n-nodes-base.manualTrigger",
"typeVersion": 1,
"position": [
0,
0
],
"id": "2101e704-6275-419d-9963-29a142e5811c",
"name": "Esegui manualmente"
},
{
"parameters": {
"authentication": "privateKey",
"command": "ssh vm101 'pi ingest genome-test raw/articles/il-grano-saraceno.md'"
},
"type": "n8n-nodes-base.ssh",
"typeVersion": 1,
"position": [
224,
0
],
"id": "8ade2def-2d53-4860-88a5-2ca734c6e54a",
"name": "SSH: pi ingest (manuale)",
"credentials": {
"sshPrivateKey": {
"id": "GJQjKzte7Hjdfz89",
"name": "n8n container -> n8n-runner@nexus"
}
}
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// ultima riga JSON di run-ingest.sh (ha 'run_id=' davanti)\nconst out = ($json.stdout || '').trim();\nconst line = out.split('\\n').filter(l => l.trim().startsWith('{')).pop();\nif (!line) return { status: 'error', reason: 'nessuna riga JSON run-ingest', raw: out };\ntry { return JSON.parse(line); } catch (e) { return { status: 'error', reason: 'JSON non parsabile', raw: line }; }"
},
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
448,
0
],
"id": "d84cdeaf-612a-454c-8b4d-31824ae6d71e",
"name": "Parse ingest"
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "const d=$json;let n;\nif (d.status==='ok'){\n n={title:`Ingest ${d.slug}: PR aperta`,priority:'default',tags:'inbox_tray',\n body:`\\u2705 ${d.slug}: PR aperta (lint ${d.lint_clean?'clean':'KO'}${d.conflict?', CONFLITTO':''})\\n\\n\\ud83d\\udd17 ${d.pr_url}`};\n} else if (d.status==='pr_failed'){\n n={title:`Ingest ${d.slug}: PR FALLITA`,priority:'high',tags:'warning',\n body:`\\u26a0\\ufe0f ${d.slug}: semantic/lint ok ma PR non aperta.\\n\\n${(d.detail||'').split('\\n')[0]}`};\n} else {\n n={title:'Ingest: ERRORE',priority:'high',tags:'rotating_light',\n body:`\\u274c ${d.reason||'errore'}\\n\\n${(d.raw||'').slice(0,300)}`};\n}\nreturn n;"
},
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
672,
0
],
"id": "eadd9275-b38c-416b-b15e-0999f70a05fb",
"name": "Build ntfy"
},
{
"parameters": {
"method": "POST",
"url": "http://ntfy/homelab-genome",
"authentication": "genericCredentialType",
"genericAuthType": "httpBearerAuth",
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Title",
"value": "={{ $json.title }}"
},
{
"name": "Priority",
"value": "={{ $json.priority }}"
},
{
"name": "Tags",
"value": "={{ $json.tags }}"
}
]
},
"sendBody": true,
"contentType": "raw",
"rawContentType": "Raw / Text",
"body": "={{ $json.body }}",
"options": {}
},
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
880,
0
],
"id": "63ab577b-893a-4b3d-8f13-b377be778099",
"name": "ntfy: send notification",
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
},
"httpBearerAuth": {
"id": "nCv4CUN7Ef086Ewj",
"name": "Bearer Auth account"
}
}
}
],
"pinData": {},
"connections": {
"Esegui manualmente": {
"main": [
[
{
"node": "SSH: pi ingest (manuale)",
"type": "main",
"index": 0
}
]
]
},
"SSH: pi ingest (manuale)": {
"main": [
[
{
"node": "Parse ingest",
"type": "main",
"index": 0
}
]
]
},
"Parse ingest": {
"main": [
[
{
"node": "Build ntfy",
"type": "main",
"index": 0
}
]
]
},
"Build ntfy": {
"main": [
[
{
"node": "ntfy: send notification",
"type": "main",
"index": 0
}
]
]
}
},
"active": false,
"settings": {
"executionOrder": "v1",
"binaryMode": "separate"
},
"versionId": "df06ce3b-1ea8-43be-91ff-02c77972cfe2",
"meta": {
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
},
"id": "RNoSaRLYG9vcMn6M",
"tags": []
}

View file

@ -0,0 +1,419 @@
{
"name": "Genome: ingest",
"nodes": [
{
"parameters": {
"httpMethod": "POST",
"path": "forgejo-push",
"options": {}
},
"id": "8c44b478-1a95-4c3b-8ac1-d7c57e228414",
"name": "Webhook",
"type": "n8n-nodes-base.webhook",
"typeVersion": 2.1,
"position": [
1520,
1728
],
"webhookId": "cf215f5d31e04dd2"
},
{
"parameters": {
"jsCode": "// Bell filter: proceed ONLY on develop pushes that actually touch raw/.\n// Returning [] stops the flow (no node needed).\n// Performance: never wake vm101 for wiki-only pushes (e.g. an ingest PR merged back to develop).\n// pending-raw remains the source of truth.\nconst item = $input.first().json;\nconst b = item.body || item;\nconst ref = String(b.ref || '');\nconst genome = String((b.repository && b.repository.name) || '').toLowerCase().trim();\n\n// Branch filter\nif (ref !== 'refs/heads/develop') return [];\n\n// Genome name validation (DNS-like: lowercase alphanum + hyphen, 1-64 chars)\nif (!/^[a-z0-9][a-z0-9-]{0,63}$/.test(genome)) return [];\n\n// Collect all touched paths safely (added, modified, removed)\nconst commits = Array.isArray(b.commits) ? b.commits : [];\nconst touched = [];\nfor (const c of commits) {\n if (!c || typeof c !== 'object') continue;\n for (const key of ['added', 'modified', 'removed']) {\n const list = c[key];\n if (!Array.isArray(list)) continue;\n for (const p of list) {\n if (typeof p === 'string' && p.startsWith('raw/')) {\n touched.push(p);\n }\n }\n }\n}\n\n// Gate: stop if nothing under raw/ was touched\nif (touched.length === 0) return [];\n\nreturn [{ json: { genome, touchedCount: touched.length } }];"
},
"id": "604787c7-4e83-468e-9a98-3ac084203040",
"name": "Gate push",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
1744,
1728
]
},
{
"parameters": {
"workflowId": {
"__rl": true,
"value": "zbtRXWsLt56nEIfz",
"mode": "list",
"cachedResultUrl": "/workflow/zbtRXWsLt56nEIfz",
"cachedResultName": "Power Manager"
},
"workflowInputs": {
"mappingMode": "defineBelow",
"value": {
"mode": "ensure-on"
},
"matchingColumns": [
"mode"
],
"schema": [
{
"id": "mode",
"displayName": "mode",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
}
],
"attemptToConvertTypes": false,
"convertFieldsToString": true
},
"options": {}
},
"id": "f93073a3-7753-4ce1-9ef1-2a0c16386543",
"name": "Power Manager - ensure-on",
"type": "n8n-nodes-base.executeWorkflow",
"typeVersion": 1.3,
"position": [
1952,
1728
]
},
{
"parameters": {
"authentication": "privateKey",
"command": "=ssh vm101 'pi pending-raw {{ $('Gate push').first().json.genome }}'"
},
"id": "876dbdaf-3620-4c2c-a65b-336f0b11198c",
"name": "SSH: pending-raw",
"type": "n8n-nodes-base.ssh",
"typeVersion": 1,
"position": [
2176,
1728
],
"credentials": {
"sshPrivateKey": {
"id": "GJQjKzte7Hjdfz89",
"name": "n8n container -> n8n-runner@nexus"
}
}
},
{
"parameters": {
"jsCode": "// Parse pending-raw -> one item per raw, carrying everything run-one-ingest needs.\n// Unsafe filenames (spaces / odd chars) are NOT ingested -> a 'badname' item -> ntfy.\nconst out = String($input.first().json.stdout || '').trim();\nlet d;\ntry {\n d = JSON.parse(out);\n} catch (e) {\n return [{ json: { _kind: 'error', reason: 'pending-raw non parsabile', raw: out.substring(0, 500) } }];\n}\n\nif (!d || typeof d !== 'object') {\n return [{ json: { _kind: 'error', reason: 'pending-raw non è un oggetto JSON', raw: out.substring(0, 500) } }];\n}\n\nconst files = Array.isArray(d.files) ? d.files : [];\nif (files.length === 0) return [];\n\n// Build reason map from detail array\nconst why = {};\nfor (const it of (Array.isArray(d.detail) ? d.detail : [])) {\n if (it && typeof it.path === 'string' && typeof it.reason === 'string') {\n why[it.path] = it.reason;\n }\n}\n\nconst SAFE = /^[A-Za-z0-9._\\/-]+$/;\nconst items = [];\nfor (const raw of files) {\n if (typeof raw !== 'string') {\n items.push({ json: { _kind: 'badname', genome: d.genome, raw: String(raw),\n hint: String(raw).replace(/[^A-Za-z0-9._\\/-]+/g, '-').toLowerCase() || 'invalid' } });\n continue;\n }\n if (SAFE.test(raw)) {\n items.push({ json: { _kind: 'ingest', genome: d.genome, raw,\n mode: 'ingest', feedback_b64: '', reason: why[raw] || 'new', prevPr: '' } });\n } else {\n const hint = raw.replace(/[^A-Za-z0-9._\\/-]+/g, '-').toLowerCase() || 'invalid';\n items.push({ json: { _kind: 'badname', genome: d.genome, raw, hint } });\n }\n}\nreturn items;"
},
"id": "f5bbbed3-222e-4129-a764-7cf47d69c5ce",
"name": "Split raw files",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
2400,
1728
]
},
{
"parameters": {
"conditions": {
"options": {
"caseSensitive": true,
"leftValue": "",
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"id": "cbacf5d98d594ba5",
"leftValue": "={{ $json._kind }}",
"rightValue": "ingest",
"operator": {
"type": "string",
"operation": "equals"
}
}
],
"combinator": "and"
},
"options": {}
},
"id": "5398e2c4-c7ca-4ca4-a2d7-e75077453b7c",
"name": "Nome valido?",
"type": "n8n-nodes-base.if",
"typeVersion": 2.2,
"position": [
2624,
1728
]
},
{
"parameters": {
"workflowId": {
"__rl": true,
"value": "VIi2ovb5gJxNJLbg",
"mode": "list",
"cachedResultUrl": "/workflow/VIi2ovb5gJxNJLbg",
"cachedResultName": "Genome: run-one-ingest"
},
"workflowInputs": {
"mappingMode": "defineBelow",
"value": {
"genome": "={{ $json.genome }}",
"raw": "={{ $json.raw }}",
"mode": "ingest",
"feedback_b64": "",
"reason": "={{ $json.reason }}",
"prevPr": ""
},
"matchingColumns": [],
"schema": [
{
"id": "genome",
"displayName": "genome",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "raw",
"displayName": "raw",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "mode",
"displayName": "mode",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "feedback_b64",
"displayName": "feedback_b64",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "reason",
"displayName": "reason",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "prevPr",
"displayName": "prevPr",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
}
],
"attemptToConvertTypes": false,
"convertFieldsToString": true
},
"options": {
"waitForSubWorkflow": false
}
},
"id": "0f274662-62bb-448b-ae4b-47e4bbcfd35a",
"name": "Run one ingest",
"type": "n8n-nodes-base.executeWorkflow",
"typeVersion": 1.3,
"position": [
2832,
1616
]
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// Build ntfy notification for files with invalid names.\n// Run Once for Each Item: $json is the current badname item.\nconst d = $json || {};\nconst genome = d.genome || 'unknown';\nconst raw = String(d.raw || 'unknown');\nconst hint = String(d.hint || 'unknown');\n\n// Escape backticks to avoid breaking markdown\nconst rawEsc = raw.replace(/`/g, '\\`');\nconst hintEsc = hint.replace(/`/g, '\\`');\n\nreturn {\n topic: 'genome-ingest',\n title: `${genome} · file da rinominare`,\n priority: 'high',\n tags: 'warning',\n click: '',\n actions: '',\n body: `Il file \\`${rawEsc}\\` ha spazi o caratteri non ammessi e **non** è stato ingerito.\\nRinominalo in: \\`${hintEsc}\\``\n};"
},
"id": "0f785bcd-cdc6-4dac-9ced-1c5cfa3453dc",
"name": "Build ntfy badname",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
2832,
1840
]
},
{
"parameters": {
"method": "POST",
"url": "=http://ntfy/{{ $json.topic }}",
"authentication": "genericCredentialType",
"genericAuthType": "httpBearerAuth",
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Title",
"value": "={{ $json.title }}"
},
{
"name": "Priority",
"value": "={{ $json.priority }}"
},
{
"name": "Tags",
"value": "={{ $json.tags }}"
},
{
"name": "Click",
"value": "={{ $json.click }}"
},
{
"name": "Actions",
"value": "={{ $json.actions }}"
},
{
"name": "Markdown",
"value": "yes"
}
]
},
"sendBody": true,
"contentType": "raw",
"rawContentType": "Raw / Text",
"body": "={{ $json.body }}",
"options": {
"timeout": 15000
}
},
"id": "9cd2bde3-6846-4855-ad01-e3a4cdbce208",
"name": "ntfy: send",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
3056,
1840
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
},
"httpBearerAuth": {
"id": "nCv4CUN7Ef086Ewj",
"name": "Bearer Auth account"
}
}
}
],
"pinData": {},
"connections": {
"Webhook": {
"main": [
[
{
"node": "Gate push",
"type": "main",
"index": 0
}
]
]
},
"Gate push": {
"main": [
[
{
"node": "Power Manager - ensure-on",
"type": "main",
"index": 0
}
]
]
},
"Power Manager - ensure-on": {
"main": [
[
{
"node": "SSH: pending-raw",
"type": "main",
"index": 0
}
]
]
},
"SSH: pending-raw": {
"main": [
[
{
"node": "Split raw files",
"type": "main",
"index": 0
}
]
]
},
"Split raw files": {
"main": [
[
{
"node": "Nome valido?",
"type": "main",
"index": 0
}
]
]
},
"Nome valido?": {
"main": [
[
{
"node": "Run one ingest",
"type": "main",
"index": 0
}
],
[
{
"node": "Build ntfy badname",
"type": "main",
"index": 0
}
]
]
},
"Build ntfy badname": {
"main": [
[
{
"node": "ntfy: send",
"type": "main",
"index": 0
}
]
]
}
},
"active": true,
"settings": {
"executionOrder": "v1",
"binaryMode": "separate",
"timeSavedMode": "fixed",
"errorWorkflow": "7Vws3gCX3QnjM3oD",
"callerPolicy": "workflowsFromSameOwner",
"availableInMCP": false
},
"versionId": "63863925-606f-4200-824c-52f1919f2bb1",
"meta": {
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
},
"id": "mUJUuQxcDiiPWcUE",
"tags": []
}

View file

@ -0,0 +1,128 @@
{
"name": "Genome: on-error",
"nodes": [
{
"parameters": {},
"id": "f715ed51-95e6-475f-8aa5-d0df531cc7cf",
"name": "Error Trigger",
"type": "n8n-nodes-base.errorTrigger",
"typeVersion": 1,
"position": [
688,
-32
]
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// Global error handler: set this workflow as the \"Error Workflow\" in each genome workflow's\n// Settings. Catches ANY node failure (SSH down, Forgejo 4xx/5xx, etc.) and notifies once.\n// Run Once for Each Item: $json is the error trigger payload.\nconst e = $json.execution || {};\nconst w = $json.workflow || {};\n\n// Safely extract error message from various shapes\nconst rawMsg = (e.error && (e.error.message || e.error.description)) || 'errore sconosciuto';\nconst msg = String(rawMsg).trim();\n\nconst lastNode = e.lastNodeExecuted ? ` (nodo: ${e.lastNodeExecuted})` : '';\nconst workflowName = w.name || 'n8n';\nconst executionUrl = e.url || '';\n\n// Escape markdown to avoid breaking the notification body\nconst msgEsc = msg.replace(/`/g, '\\`').replace(/\\n/g, '\\n');\n\nreturn {\n topic: 'genome-ingest',\n title: `Workflow KO · ${workflowName}`,\n priority: 'high',\n tags: 'rotating_light',\n click: executionUrl,\n actions: executionUrl ? `view, Apri l'esecuzione, ${executionUrl}` : '',\n body: `**${workflowName}** è fallito${lastNode}.\\n\\n${msgEsc}`\n};"
},
"id": "dd39bc0f-918a-4645-8f04-540ac9089311",
"name": "Build ntfy",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
928,
-32
]
},
{
"parameters": {
"method": "POST",
"url": "=http://ntfy/{{ $json.topic }}",
"authentication": "genericCredentialType",
"genericAuthType": "httpBearerAuth",
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Title",
"value": "={{ $json.title }}"
},
{
"name": "Priority",
"value": "={{ $json.priority }}"
},
{
"name": "Tags",
"value": "={{ $json.tags }}"
},
{
"name": "Click",
"value": "={{ $json.click }}"
},
{
"name": "Actions",
"value": "={{ $json.actions }}"
},
{
"name": "Markdown",
"value": "yes"
}
]
},
"sendBody": true,
"contentType": "raw",
"rawContentType": "Raw / Text",
"body": "={{ $json.body }}",
"options": {
"timeout": 15000
}
},
"id": "a9ee90f3-d7fe-445d-96af-12caef46473f",
"name": "ntfy: send",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
1152,
-32
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
},
"httpBearerAuth": {
"id": "nCv4CUN7Ef086Ewj",
"name": "Bearer Auth account"
}
}
}
],
"pinData": {},
"connections": {
"Error Trigger": {
"main": [
[
{
"node": "Build ntfy",
"type": "main",
"index": 0
}
]
]
},
"Build ntfy": {
"main": [
[
{
"node": "ntfy: send",
"type": "main",
"index": 0
}
]
]
}
},
"active": true,
"settings": {
"executionOrder": "v1",
"binaryMode": "separate"
},
"versionId": "036161c9-c934-474e-9b4f-634259f2a866",
"meta": {
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
},
"id": "7Vws3gCX3QnjM3oD",
"tags": []
}

View file

@ -0,0 +1,326 @@
{
"name": "Genome: prune",
"nodes": [
{
"parameters": {
"httpMethod": "POST",
"path": "forgejo-push-prune",
"options": {}
},
"id": "d31388b9-c6d6-4f28-9a6c-b381922bf5e0",
"name": "Webhook prune",
"type": "n8n-nodes-base.webhook",
"typeVersion": 2.1,
"position": [
1232,
-64
],
"webhookId": "d6ac11900058434e"
},
{
"parameters": {
"jsCode": "// Gate: proceed ONLY on develop pushes that REMOVED at least one file under raw/.\n// Additions/modifications are handled by the ingest flow; this flow reacts to deletions only.\nconst item = $input.first().json;\nconst b = item.body || item;\nconst ref = String(b.ref || '');\nconst genome = String((b.repository?.name) || '').toLowerCase().trim();\n\n// Branch filter\nif (ref !== 'refs/heads/develop') return [];\n\n// Genome name validation (DNS-like: lowercase alphanum + hyphen, 1-64 chars)\nif (!/^[a-z0-9][a-z0-9-]{0,63}$/.test(genome)) return [];\n\n// Collect removed paths safely\nconst removed = [];\nfor (const c of (b.commits || [])) {\n if (!c || !Array.isArray(c.removed)) continue;\n for (const p of c.removed) {\n if (typeof p === 'string' && p.startsWith('raw/')) {\n removed.push(p);\n }\n }\n}\n\n// Gate: stop if nothing under raw/ was removed\nif (removed.length === 0) return [];\n\nreturn [{ json: { genome, removedCount: removed.length } }];"
},
"id": "84848a31-d099-459e-bd03-67abc2cf2b77",
"name": "Gate prune",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
1456,
-64
]
},
{
"parameters": {
"workflowId": {
"__rl": true,
"value": "zbtRXWsLt56nEIfz",
"mode": "list",
"cachedResultUrl": "/workflow/zbtRXWsLt56nEIfz",
"cachedResultName": "Power Manager"
},
"workflowInputs": {
"mappingMode": "defineBelow",
"value": {
"mode": "ensure-on"
},
"matchingColumns": [
"mode"
],
"schema": [
{
"id": "mode",
"displayName": "mode",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
}
],
"attemptToConvertTypes": false,
"convertFieldsToString": true
},
"options": {}
},
"id": "175e4191-eb1b-4e5d-8d82-c39205753152",
"name": "Power Manager - ensure-on",
"type": "n8n-nodes-base.executeWorkflow",
"typeVersion": 1.3,
"position": [
1680,
-64
]
},
{
"parameters": {
"authentication": "privateKey",
"command": "=ssh vm101 'pi orphan-wiki {{ $('Gate prune').first().json.genome }}'"
},
"id": "598f20f8-d668-48da-90e3-1bfada3ace92",
"name": "SSH: orphan-wiki",
"type": "n8n-nodes-base.ssh",
"typeVersion": 1,
"position": [
1904,
-64
],
"credentials": {
"sshPrivateKey": {
"id": "GJQjKzte7Hjdfz89",
"name": "n8n container -> n8n-runner@nexus"
}
}
},
{
"parameters": {
"jsCode": "// Gate: proceed to prune only if orphan-wiki actually found orphans.\n// run-prune re-derives independently anyway (no detected-vs-pruned race);\n// this gate just avoids taking the lock for nothing.\nconst out = String($input.first().json.stdout || '').trim();\nlet d;\n\ntry {\n d = JSON.parse(out);\n} catch (e) {\n // Malformed JSON from orphan-wiki — log and stop\n return [{ json: { _gate: 'parse-error', raw: out.substring(0, 500) } }];\n}\n\n// Strict validation: d must be object with numeric count > 0\nif (!d || typeof d !== 'object' || typeof d.count !== 'number' || d.count <= 0) {\n return []; // 0 orphans or missing count -> stop silently\n}\n\nreturn [{ json: { genome: d.genome, count: d.count } }];"
},
"id": "3b644d61-26d8-4024-baed-bcb4ad169a6a",
"name": "Orfani?",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
2112,
-64
]
},
{
"parameters": {
"authentication": "privateKey",
"command": "=ssh vm101 'pi prune {{ $json.genome }}'"
},
"id": "a8cae2c2-6f2f-4ef6-add9-287195aa84b5",
"name": "SSH: prune",
"type": "n8n-nodes-base.ssh",
"typeVersion": 1,
"position": [
2336,
-64
],
"credentials": {
"sshPrivateKey": {
"id": "GJQjKzte7Hjdfz89",
"name": "n8n container -> n8n-runner@nexus"
}
}
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// Extract the last JSON line from SSH stdout (the command may print logs before/after).\n// Run Once for Each Item: $json is the current SSH result item.\nconst out = String($json.stdout || '').trim();\nconst jsonLines = out\n .split('\\n')\n .map(l => l.trim())\n .filter(l => l.startsWith('{') && l.endsWith('}'));\n\nconst line = jsonLines.pop(); // last JSON object line (command prints JSON last)\n\nlet r;\ntry {\n r = line ? JSON.parse(line) : { status: 'error', reason: 'nessuna riga JSON trovata in stdout' };\n} catch (e) {\n r = { status: 'error', reason: 'JSON non parsabile', rawLine: line?.substring(0, 1000) };\n}\n\n// Ensure consistent shape for downstream nodes\nreturn {\n status: r.status || 'error',\n reason: r.reason || 'errore sconosciuto',\n count: r.count,\n pr_url: r.pr_url,\n genome: r.genome,\n _raw: line?.substring(0, 500)\n};"
},
"id": "da1ab42c-32e1-4c4d-82a1-925fcee1a098",
"name": "Parse prune",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
2560,
-64
]
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// Build ntfy notification for genome pruning.\n// Run Once for Each Item: $json is the parsed prune result.\nconst d = $json;\nconst genome = d.genome || 'unknown';\n\nlet n;\nif (d.status === 'ok') {\n const pm = (d.pr_url || '').match(/\\/pulls\\/(\\d+)/);\n const num = pm ? `#${pm[1]}` : '';\n n = {\n topic: 'genome-ingest',\n title: `${genome} \\u00b7 potatura ${num}`.replace(/\\s+/g, ' ').trim(),\n priority: 'default',\n tags: 'broom',\n click: d.pr_url || '',\n actions: d.pr_url ? `view, Apri la PR, ${d.pr_url}` : '',\n body: `${d.count} sorgente/i orfane proposte per la rimozione. **Approva la PR** per potare, oppure chiudila da Forgejo per annullare.`\n };\n} else {\n n = {\n topic: 'genome-ingest',\n title: `${genome} \\u00b7 errore potatura`.trim(),\n priority: 'high',\n tags: 'rotating_light',\n click: '',\n actions: '',\n body: `${d.reason || 'errore sconosciuto durante la potatura'}.`\n };\n}\n\nreturn n;"
},
"id": "ebe99407-6038-4f8f-a73f-7dc7b0a011e0",
"name": "Build ntfy",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
2784,
-64
]
},
{
"parameters": {
"method": "POST",
"url": "=http://ntfy/{{ $json.topic }}",
"authentication": "genericCredentialType",
"genericAuthType": "httpBearerAuth",
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Title",
"value": "={{ $json.title }}"
},
{
"name": "Priority",
"value": "={{ $json.priority }}"
},
{
"name": "Tags",
"value": "={{ $json.tags }}"
},
{
"name": "Click",
"value": "={{ $json.click }}"
},
{
"name": "Actions",
"value": "={{ $json.actions }}"
},
{
"name": "Markdown",
"value": "yes"
}
]
},
"sendBody": true,
"contentType": "raw",
"rawContentType": "Raw / Text",
"body": "={{ $json.body }}",
"options": {
"timeout": 15000
}
},
"id": "0bd3654e-a73d-4c3a-83ed-9f57ca4aad24",
"name": "ntfy: send",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
2992,
-64
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
},
"httpBearerAuth": {
"id": "nCv4CUN7Ef086Ewj",
"name": "Bearer Auth account"
}
}
}
],
"pinData": {},
"connections": {
"Webhook prune": {
"main": [
[
{
"node": "Gate prune",
"type": "main",
"index": 0
}
]
]
},
"Gate prune": {
"main": [
[
{
"node": "Power Manager - ensure-on",
"type": "main",
"index": 0
}
]
]
},
"Power Manager - ensure-on": {
"main": [
[
{
"node": "SSH: orphan-wiki",
"type": "main",
"index": 0
}
]
]
},
"SSH: orphan-wiki": {
"main": [
[
{
"node": "Orfani?",
"type": "main",
"index": 0
}
]
]
},
"Orfani?": {
"main": [
[
{
"node": "SSH: prune",
"type": "main",
"index": 0
}
]
]
},
"SSH: prune": {
"main": [
[
{
"node": "Parse prune",
"type": "main",
"index": 0
}
]
]
},
"Parse prune": {
"main": [
[
{
"node": "Build ntfy",
"type": "main",
"index": 0
}
]
]
},
"Build ntfy": {
"main": [
[
{
"node": "ntfy: send",
"type": "main",
"index": 0
}
]
]
}
},
"active": true,
"settings": {
"executionOrder": "v1",
"binaryMode": "separate",
"timeSavedMode": "fixed",
"errorWorkflow": "7Vws3gCX3QnjM3oD",
"callerPolicy": "workflowsFromSameOwner",
"availableInMCP": false
},
"versionId": "999f640c-aae6-42aa-9a95-aba26987e9d0",
"meta": {
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
},
"id": "smH5Qrv7CQnTtdAF",
"tags": []
}

View file

@ -0,0 +1,266 @@
{
"name": "Genome: run-one-ingest",
"nodes": [
{
"parameters": {
"inputSource": "passthrough"
},
"id": "b1b7ba8e-1e45-4f76-adc0-089180715975",
"name": "On ingest request",
"type": "n8n-nodes-base.executeWorkflowTrigger",
"typeVersion": 1.1,
"position": [
224,
624
]
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// SECURITY chokepoint: every ingest to vm101 passes here. Re-validate inputs (defense in depth:\n// callers + the SSH wrapper also validate) and assemble the exact command. Charset-validated\n// fields are safe inside the single-quoted remote command -> no shell injection.\n// Run Once for Each Item: $json is the current ingest request.\nconst d = $json || {};\nconst genome = String(d.genome || '').toLowerCase().trim();\nconst raw = String(d.raw || '');\nconst mode = String(d.mode || 'ingest');\nconst fb = String(d.feedback_b64 || '');\n\nconst okGenome = /^[a-z0-9][a-z0-9-]{0,63}$/.test(genome);\nconst okMode = (mode === 'ingest' || mode === 'rework');\nconst okRaw = raw.startsWith('raw/') && !raw.includes('..') && /^[A-Za-z0-9._\\/-]+$/.test(raw);\n// feedback_b64 is required only for rework mode; for ingest it can be empty\nconst okFb = (mode === 'ingest') || /^[A-Za-z0-9+/=]+$/.test(fb);\n\nif (!okGenome || !okMode || !okRaw || !okFb) {\n return {\n _ok: false,\n genome,\n mode,\n _reason: `bad input (genome:${okGenome} mode:${okMode} raw:${okRaw} fb:${okFb})`\n };\n}\n\n// Build SSH command: single-quoted remote command prevents shell injection\nconst ssh_cmd = (mode === 'rework')\n ? `ssh vm101 'pi ingest-rework ${genome} ${raw} ${fb}'`\n : `ssh vm101 'pi ingest ${genome} ${raw}'`;\n\nreturn {\n _ok: true,\n ssh_cmd,\n genome,\n raw,\n mode,\n reason: String(d.reason || ''),\n prevPr: String(d.prevPr || '')\n};"
},
"id": "8e538237-0e0e-4308-b2c8-631a52b31185",
"name": "Guard & build cmd",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
448,
624
]
},
{
"parameters": {
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "loose",
"version": 2
},
"conditions": [
{
"id": "4507e3a8b9714c7e",
"leftValue": "={{ $json._ok }}",
"rightValue": true,
"operator": {
"type": "boolean",
"operation": "true",
"singleValue": true
}
}
],
"combinator": "and"
},
"options": {}
},
"id": "4b249e76-7ab6-4aa3-886d-06b865931cf6",
"name": "Input valido?",
"type": "n8n-nodes-base.if",
"typeVersion": 2.2,
"position": [
672,
624
]
},
{
"parameters": {
"authentication": "privateKey",
"command": "={{ $json.ssh_cmd }}"
},
"id": "8740ae9a-4094-48b2-a9a4-d40d501e09f6",
"name": "SSH: ingest",
"type": "n8n-nodes-base.ssh",
"typeVersion": 1,
"position": [
880,
544
],
"credentials": {
"sshPrivateKey": {
"id": "GJQjKzte7Hjdfz89",
"name": "n8n container -> n8n-runner@nexus"
}
}
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// run-ingest.sh prints one JSON line; the wrapper may instead print {status:busy|error,...}.\n// Take the last {...} line from stdout (logs may precede/follow).\n// Run Once for Each Item: $json is the current SSH result item.\nconst out = String($json.stdout || '').trim();\nconst jsonLines = out\n .split('\\n')\n .map(l => l.trim())\n .filter(l => l.startsWith('{') && l.endsWith('}'));\n\nconst line = jsonLines.pop(); // last JSON object line (command prints JSON last)\n\nlet r;\ntry {\n r = line ? JSON.parse(line) : { status: 'error', reason: 'nessuna riga JSON trovata in stdout', raw: out.substring(0, 500) };\n} catch (e) {\n r = { status: 'error', reason: 'JSON non parsabile', rawLine: line?.substring(0, 1000) };\n}\n\n// Ensure consistent shape for downstream Build ntfy\nreturn {\n status: r.status || 'error',\n reason: r.reason || 'errore sconosciuto',\n pr_url: r.pr_url || '',\n slug: r.slug || '',\n lint_clean: r.lint_clean || false,\n conflict: r.conflict || false,\n stage: r.stage || '',\n detail: r.detail || '',\n log: r.log || '',\n _raw: line?.substring(0, 500)\n};"
},
"id": "928344e3-0712-42e0-b1a8-f5caff489746",
"name": "Parse result",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
1104,
544
]
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// One builder for ingest + rework outcomes. Title is plain ASCII; the icon comes from Tags\n// (ntfy shortcodes); navigation is via Click (tap) + Actions (button) so it works on every\n// client.\n// Run Once for Each Item: $json is the current parsed result.\n// We read the original request context from the Guard node (same execution, no executeWorkflow in between).\nconst g = $('Guard & build cmd').item.json || {};\nconst verb = (g.mode === 'rework') ? 'rework' : 'ingest';\nconst d = $json || {};\nconst genome = g.genome || 'unknown';\n\n// Build notification based on status\nlet n;\n\nif (g._ok === false) {\n // Input validation failed (Guard & build cmd rejected it)\n n = {\n title: `Errore ${verb}: input non valido`,\n priority: 'high',\n tags: 'rotating_light',\n click: '',\n actions: '',\n body: `Richiesta di ${verb} rifiutata.\\n${g._reason || 'motivo sconosciuto'}`\n };\n} else if (d.status === 'ok') {\n // Success: PR opened\n const pm = (d.pr_url || '').match(/\\/pulls\\/(\\d+)/);\n const num = pm ? `#${pm[1]}` : '';\n const lint = d.lint_clean ? 'lint pulito' : 'lint con avvisi';\n const conflict = d.conflict ? ' · ⚠️ conflitto da risolvere' : '';\n const prevPr = g.prevPr ? ` · sostituisce #${g.prevPr}` : '';\n const reason = (g.reason && verb === 'ingest') ? ` (${g.reason})` : '';\n\n n = {\n title: `${genome} · ${verb} ${d.slug || ''} ${num}`.replace(/\\s+/g, ' ').trim(),\n priority: d.conflict ? 'high' : 'default',\n tags: d.conflict ? 'warning' : 'white_check_mark',\n click: d.pr_url || '',\n actions: d.pr_url ? `view, Apri la PR, ${d.pr_url}` : '',\n body: `**${d.slug || 'sorgente'}** ${verb === 'rework' ? 'rilavorata' : 'ingerita'}`\n + reason + prevPr\n + `.\\n${lint}${conflict}.`\n };\n} else if (d.status === 'busy') {\n // Another ingest is already running on this genome\n n = {\n title: `${genome} · ${verb} in coda`,\n priority: 'min',\n tags: 'hourglass_flowing_sand',\n click: '',\n actions: '',\n body: `Un altro ingest era in corso su questo genoma. La fonte resta pendente e verrà ripresa al prossimo campanello.`\n };\n} else if (d.status === 'pr_failed') {\n // Semantic/lint ok but PR could not be opened\n const detailLine = String(d.detail || '').split('\\n')[0] || 'dettaglio non disponibile';\n n = {\n title: `${genome} · ${d.slug || ''}: PR non aperta`,\n priority: 'high',\n tags: 'warning',\n click: '',\n actions: '',\n body: `Semantic e lint ok, ma la PR non si è aperta.\\n${detailLine}`\n };\n} else {\n // Generic error (including parse errors)\n const stage = d.stage ? ` (stage: ${d.stage})` : '';\n const log = d.log ? `\\nLog: ${d.log}` : '';\n n = {\n title: `${genome} · errore ${verb}`,\n priority: 'high',\n tags: 'rotating_light',\n click: '',\n actions: '',\n body: `${d.reason || 'errore sconosciuto'}${stage}.${log}`\n };\n}\n\nn.topic = 'genome-ingest';\nreturn n;"
},
"id": "9062dfba-02ba-4abc-8be6-828c0b353114",
"name": "Build ntfy",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
1328,
624
]
},
{
"parameters": {
"method": "POST",
"url": "=http://ntfy/{{ $json.topic }}",
"authentication": "genericCredentialType",
"genericAuthType": "httpBearerAuth",
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Title",
"value": "={{ $json.title }}"
},
{
"name": "Priority",
"value": "={{ $json.priority }}"
},
{
"name": "Tags",
"value": "={{ $json.tags }}"
},
{
"name": "Click",
"value": "={{ $json.click }}"
},
{
"name": "Actions",
"value": "={{ $json.actions }}"
},
{
"name": "Markdown",
"value": "yes"
}
]
},
"sendBody": true,
"contentType": "raw",
"rawContentType": "Raw / Text",
"body": "={{ $json.body }}",
"options": {
"timeout": 15000
}
},
"id": "0c2b4d9b-2700-4815-b47c-8523bc4eb2ff",
"name": "ntfy: send",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
1552,
624
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
},
"httpBearerAuth": {
"id": "nCv4CUN7Ef086Ewj",
"name": "Bearer Auth account"
}
}
}
],
"pinData": {},
"connections": {
"On ingest request": {
"main": [
[
{
"node": "Guard & build cmd",
"type": "main",
"index": 0
}
]
]
},
"Guard & build cmd": {
"main": [
[
{
"node": "Input valido?",
"type": "main",
"index": 0
}
]
]
},
"Input valido?": {
"main": [
[
{
"node": "SSH: ingest",
"type": "main",
"index": 0
}
],
[
{
"node": "Build ntfy",
"type": "main",
"index": 0
}
]
]
},
"SSH: ingest": {
"main": [
[
{
"node": "Parse result",
"type": "main",
"index": 0
}
]
]
},
"Parse result": {
"main": [
[
{
"node": "Build ntfy",
"type": "main",
"index": 0
}
]
]
},
"Build ntfy": {
"main": [
[
{
"node": "ntfy: send",
"type": "main",
"index": 0
}
]
]
}
},
"active": true,
"settings": {
"executionOrder": "v1",
"binaryMode": "separate",
"timeSavedMode": "fixed",
"errorWorkflow": "7Vws3gCX3QnjM3oD",
"callerPolicy": "workflowsFromSameOwner",
"availableInMCP": false
},
"versionId": "fd8c1cf6-c5df-4074-b777-113349e32a03",
"meta": {
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
},
"id": "VIi2ovb5gJxNJLbg",
"tags": []
}

81
deploy/nexus/README.md Normal file
View file

@ -0,0 +1,81 @@
# Componenti di Sistema — Gestione Sincronizzazione e Automazione Genoma
Questo modulo contiene gli script di backend che vengono installati sul server `nexus` per gestire il ciclo di vita dei vault locali (scratch di lavoro), l'integrazione con Syncthing e l'autocommit dei file grezzi (`raw/`) provenienti dai dispositivi mobili o desktop (es. Obsidian).
## Architettura dei File di Sistema
Gli script sono progettati per girare in un ambiente multi-utente protetto, dove l'istanza globale di `n8n` (tramite l'utente di sistema `n8n-runner`) pilota le operazioni senza possedere i diritti di lettura/scrittura diretti sui file del genoma o sui segreti di configurazione.
### 1. Posizionamento e Permessi degli Script
I file inclusi in questa cartella devono essere installati sul server di produzione nella directory `/usr/local/bin/` con privilegi di esecuzione globali, ma modificabili solo da `root`.
- **Destinazione:** `/usr/local/bin/`
- **Proprietario (Owner):** `root:root`
- **Permessi (Chmod):** `0755` (`-rwxr-xr-x`)
#### Elenco degli Script:
- `ensure-genome-vault`: Script idempotente che inizializza o riallinea il vault locale clonandolo da Forgejo (in loopback) sul branch `develop`, configura gli `.stignore` ed effettua il provisioning automatico della cartella condivisa su Syncthing via API.
- `genome-askpass`: Helper di autenticazione per Git (`GIT_ASKPASS`). Intercetta le richieste di credenziali di Git durante i cloni e i push HTTP su Forgejo, iniettando l'utente e il token applicativo senza esporli nei log di sistema o negli argomenti dei processi.
- `genome-raw-commit`: Script di polling periodico invocato da n8n. Isola i file modificati nella cartella `raw/`, interroga Syncthing per capire quale dispositivo (e quindi quale autore umano) ha generato la modifica, crea commit atomici attribuiti al singolo autore e pusha le modifiche su Forgejo (`develop`).
---
## Modello di Sicurezza e Visibilità
Per garantire l'isolamento del sistema operativo, l'infrastruttura si basa su tre livelli di confinamento:
### A. Variabili d'Ambiente Protette (`.env`)
Le credenziali (Token Forgejo, API Key Syncthing) risiedono nella Home dell'utente operativo del servizio (`homelab`) e sono completamente invisibili a n8n e ad altri utenti del sistema.
- **Path:** `/home/homelab/.config/knowledge-genome.env`
- **Permessi:** `0600` (`-rw-------`), di proprietà esclusiva di `homelab:homelab`.
#### env
Nella cartella `~/.config/knowledge-genome.env`.
```text
# knowledge-genome.env Configuration Profile
# Requirements: Must be owned by the service user with 0600 permissions.
# Vault path and operational branch
GENOME_VAULTS_ROOT=/srv/genome-vaults
GENOME_BASE=develop
# Forgejo Target Instance
# Replace 127.0.0.1 with vm101 IP if Forgejo is hosted on the virtual machine
FORGEJO_HOST=127.0.0.1:3001
FORGEJO_OWNER=Keru
FORGEJO_USER=n8n-bot
FORGEJO_TOKEN="............"
# Git Commit Identity
COMMITTER_NAME=n8n-bot
COMMITTER_EMAIL=n8n-bot@homelab
DEFAULT_AUTHOR_NAME="Matteo Cherubini"
DEFAULT_AUTHOR_EMAIL=matteo@keruhomelab.com
# Syncthing Target Instance
# Replace 127.0.0.1 with vm101 IP if Syncthing API is hosted on the virtual machine
SYNCTHING_URL=http://127.0.0.1:8384
SYNCTHING_API_KEY="............"
```
### B. Confine dei Privilegi in Sudoers
L'utente di automazione `n8n-runner` (usato dall'agente SSH di n8n) non ha accesso alla shell e non può invocare comandi arbitrari. Può unicamente chiamare i due script principali impersonando l'utente `homelab` senza l'inserimento della password.
Configurazione da applicare in `/etc/sudoers.d/n8n-genome` (con permessi rigorosi `0440`):
```text
n8n-runner ALL=(homelab) NOPASSWD: /usr/local/bin/ensure-genome-vault, /usr/local/bin/genome-raw-commit
```
### C. Directory dei Vault
I dati veri e propri sincronizzati da Syncthing risiedono isolati in `/srv/genome-vaults/`.
- **Proprietario**: homelab:homelab (UID/GID 1000), permettendo la convivenza nativa e fluida tra il demone Syncthing in esecuzione nel container e gli script Git locali.

View file

@ -0,0 +1,126 @@
#!/bin/bash
# ensure-genome-vault <genome> [--status-only]
#
# Idempotent, unified command for managing genome vaults.
# Called by n8n during genome creation and as a safety net mechanism.
#
# Operation workflow:
# - Vault absent -> Clone from Forgejo (loopback) + track develop branch
# - Vault present -> Realign to origin/develop (treated as a rebuildable scratchpad)
# - Post-clone/fetch -> Write raw/.stignore and register/update the Syncthing folder.
#
# Source of truth is Forgejo. Vaults are scratch spaces and not backed up directly.
# All operations run locally via loopback.
set -euo pipefail
genome="${1:?usage: ensure-genome-vault <genome> [--status-only]}"
mode="${2:-}"
# Slug validation inside the script to prevent path/URL traversal:
# Lowercase kebab-case, no '/', '..', or spaces.
[[ "$genome" =~ ^[a-z0-9][a-z0-9-]{0,63}$ ]] || { echo '{"status":"error","reason":"invalid genome name"}'; exit 1; }
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
: "${GENOME_VAULTS_ROOT:=/srv/genome-vaults}"
: "${GENOME_BASE:=develop}"
: "${FORGEJO_USER:=n8n-bot}"
: "${FORGEJO_HOST:=127.0.0.1:3001}"
: "${FORGEJO_OWNER:=Keru}"
: "${SYNCTHING_URL:=http://127.0.0.1:8384}"
vault="${GENOME_VAULTS_ROOT}/${genome}"
fid="${genome}-public"
clone_url="http://${FORGEJO_USER}@${FORGEJO_HOST}/${FORGEJO_OWNER}/${genome}.git"
export GIT_ASKPASS=/usr/local/bin/genome-askpass # Provides the n8n-bot token
mkdir -p "$GENOME_VAULTS_ROOT"
# ── 1. Clone (if missing) or realign (if present) ────────────────────────────
if [[ ! -d "${vault}/.git" ]]; then
[[ "$mode" == "--status-only" ]] && { printf '{"status":"absent","genome":"%s"}\n' "$genome"; exit 0; }
git clone -q "$clone_url" "$vault"
cd "$vault"
if git show-ref --verify --quiet "refs/remotes/origin/${GENOME_BASE}"; then
git switch -q -c "$GENOME_BASE" --track "origin/${GENOME_BASE}" 2>/dev/null || git switch -q "$GENOME_BASE"
else
# develop does not exist on remote yet: create it from current base and publish
git switch -q -c "$GENOME_BASE"
git push -q "$clone_url" "${GENOME_BASE}:${GENOME_BASE}"
fi
state="cloned"
else
cd "$vault"
if [[ "$mode" == "--status-only" ]]; then
printf '{"status":"present","genome":"%s","head":"%s"}\n' "$genome" "$(git rev-parse --short HEAD)"
exit 0
fi
git fetch -q origin
if git show-ref --verify --quiet "refs/remotes/origin/${GENOME_BASE}"; then
git switch -q "$GENOME_BASE" 2>/dev/null || git switch -q -c "$GENOME_BASE" --track "origin/${GENOME_BASE}"
# GUARD: hard reset is allowed ONLY if the working tree is clean.
# If Syncthing has already written uncommitted raw files, DO NOT destroy them: soft fast-forward.
if [[ -z "$(git status --porcelain -- raw/ 2>/dev/null)" ]]; then
git reset -q --hard "origin/${GENOME_BASE}"
state="realigned"
else
git merge -q --ff-only "origin/${GENOME_BASE}" 2>/dev/null || true
state="realigned-kept-dirty"
fi
else
git switch -q -c "$GENOME_BASE" 2>/dev/null || true
git push -q "$clone_url" "${GENOME_BASE}:${GENOME_BASE}"
state="base-created"
fi
fi
# ── 2. raw/.stignore + exclusion from git (infrastructure, not content) ────────────
mkdir -p "${vault}/raw"
cat > "${vault}/raw/.stignore" <<'EOF'
// Knowledge Genome — Syncthing exclusions for raw/
// NEVER unencrypted private data: git-crypt protects INSIDE the repo, not in Syncthing transit
private
// Obsidian / editor noise
.obsidian
.trash
*.tmp
workspace*.json
// security
.git
EOF
# .stignore must not be included in genome commits
grep -qxF 'raw/.stignore' "${vault}/.git/info/exclude" 2>/dev/null \
|| echo 'raw/.stignore' >> "${vault}/.git/info/exclude"
# Syncthing folder marker: must exist on disk (locally, NOT on Git).
# Without it, Syncthing refuses to scan (“folder marker missing”).
mkdir -p "${vault}/raw/.stfolder"
# .stfolder must not be included in genome commits
grep -qxF 'raw/.stfolder' "${vault}/.git/info/exclude" 2>/dev/null \
|| echo 'raw/.stfolder' >> "${vault}/.git/info/exclude"
# ── 3. Idempotent Syncthing folder configuration (best-effort, does not block the vault) ────────
folder_state="skipped(no api key)"
if [[ -n "${SYNCTHING_API_KEY:-}" ]]; then
if curl -fsS -o /dev/null -H "X-API-Key: ${SYNCTHING_API_KEY}" \
"${SYNCTHING_URL}/rest/config/folders/${fid}" 2>/dev/null; then
folder_state="exists"
else
body="$(curl -fsS -H "X-API-Key: ${SYNCTHING_API_KEY}" \
"${SYNCTHING_URL}/rest/config/defaults/folder" \
| jq --arg id "$fid" --arg label "${genome} (raw public)" --arg path "${vault}/raw" \
'.id=$id | .label=$label | .path=$path | .type="sendreceive"
| .fsWatcherEnabled=true | .rescanIntervalS=3600')"
if curl -fsS -o /dev/null -X PUT \
-H "X-API-Key: ${SYNCTHING_API_KEY}" -H "Content-Type: application/json" \
-d "$body" "${SYNCTHING_URL}/rest/config/folders/${fid}" 2>/dev/null; then
folder_state="created"
else
folder_state="error(check syncthing api)"
fi
fi
fi
printf '{"status":"ok","genome":"%s","vault":"%s","state":"%s","syncthing_folder":"%s"}\n' \
"$genome" "$vault" "$state" "$folder_state"

View file

@ -0,0 +1,19 @@
#!/bin/bash
#
# GIT_ASKPASS helper for Forgejo HTTP authentication.
# Git invokes this script when it needs a username or password.
#
set -eu
# Load environment variables
. "${HOME}/.config/knowledge-genome.env"
case "${1:-}" in
*[Uu]sername*)
printf '%s\n' "${FORGEJO_USER:-n8n-bot}"
;;
*)
printf '%s\n' "${FORGEJO_TOKEN:?FORGEJO_TOKEN not set}"
;;
esac

View file

@ -0,0 +1,155 @@
#!/bin/bash
# genome-raw-commit <genome>
#
# Commit the raw files that Syncthing has placed in the vault and push them to origin/<base>.
# - Committer = n8n-bot (sole pusher); Author = the person who wrote it (Syncthing modifiedBy -> .authors.json)
# - One commit per author (single-device => one commit). No-op if there is nothing.
# - JSON output built with jq (safe escaping), with a `files` array:
# for each raw -> file, author, local_path, local_url (file://), remote_url (Forgejo web).
set -euo pipefail
genome="${1:?usage: genome-raw-commit <genome>}"
# Input validation to prevent path or URL traversal inside the script
[[ "$genome" =~ ^[a-z0-9][a-z0-9-]{0,63}$ ]] || { echo '{"status":"error","reason":"invalid genome name"}'; exit 1; }
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
: "${GENOME_VAULTS_ROOT:=/srv/genome-vaults}"
: "${GENOME_BASE:=develop}"
: "${FORGEJO_USER:=n8n-bot}"
: "${FORGEJO_HOST:=127.0.0.1:3001}"
: "${FORGEJO_OWNER:=Keru}"
: "${FORGEJO_WEB_BASE:=https://git.keruhomelab.com}" # human-facing URL for remote links (not the loopback)
: "${SYNCTHING_URL:=http://127.0.0.1:8384}"
: "${COMMITTER_NAME:=n8n-bot}"
: "${COMMITTER_EMAIL:=n8n-bot@homelab}"
: "${DEFAULT_AUTHOR_NAME:=Unknown}"
: "${DEFAULT_AUTHOR_EMAIL:=unknown@syncthing}"
vault="${GENOME_VAULTS_ROOT}/${genome}"
fid="${genome}-public"
authors_map="${GENOME_VAULTS_ROOT}/.authors.json"
# GENOME_PUSH_URL is a test seam: defaults to the Forgejo loopback URL in production.
clone_url="${GENOME_PUSH_URL:-http://${FORGEJO_USER}@${FORGEJO_HOST}/${FORGEJO_OWNER}/${genome}.git}"
export GIT_ASKPASS=/usr/local/bin/genome-askpass
[[ -d "${vault}/.git" ]] || { printf '{"status":"error","reason":"vault absent","genome":"%s"}\n' "$genome"; exit 1; }
cd "$vault"
git config user.name "$COMMITTER_NAME"
git config user.email "$COMMITTER_EMAIL"
git config commit.gpgsign false
grep -qxF 'raw/.stignore' "${vault}/.git/info/exclude" 2>/dev/null || echo 'raw/.stignore' >> "${vault}/.git/info/exclude"
grep -qxF 'raw/.stfolder' "${vault}/.git/info/exclude" 2>/dev/null || echo 'raw/.stfolder' >> "${vault}/.git/info/exclude"
git add -A -- raw/
git reset -q -- raw/.stignore raw/.stfolder 2>/dev/null || true
# --- Quiet window: only commit raw files that have STOPPED changing. ----------------
# While a note is being written (Obsidian autosave -> Syncthing -> here) its mtime stays
# fresh; we leave it UNSTAGED so a half-written note never triggers an ingest. A file is
# committed only after it has been still for RAW_QUIET_MINUTES. Deletions (nothing on disk)
# are stable by definition and pass straight through. Deterministic — no model in the loop.
quiet_min="${RAW_QUIET_MINUTES:-2}"
held=0
while IFS= read -r f; do
[[ -z "$f" ]] && continue
# Only an existing file can be "hot"; a staged deletion has nothing on disk to settle.
if [[ -e "$f" && -n "$(find "$f" -mmin -"$quiet_min" 2>/dev/null)" ]]; then
git reset -q -- "$f" 2>/dev/null || true
held=$((held+1))
fi
done < <(git diff --cached --name-only -- raw/)
if git diff --cached --quiet; then
if [[ "$held" -gt 0 ]]; then
printf '{"status":"noop","reason":"raw still settling","genome":"%s","held":%d,"quiet_minutes":%d}\n' \
"$genome" "$held" "$quiet_min"
else
printf '{"status":"noop","genome":"%s"}\n' "$genome"
fi
exit 0
fi
resolve_dev() { # $1 = path relative to the vault (raw/...) -> prints the short device id, or empty
[[ -z "${SYNCTHING_API_KEY:-}" ]] && return 0
curl -fsS -H "X-API-Key: ${SYNCTHING_API_KEY}" --get "${SYNCTHING_URL}/rest/db/file" \
--data-urlencode "folder=${fid}" --data-urlencode "file=${1#raw/}" 2>/dev/null \
| jq -r '.local.modifiedBy // empty' 2>/dev/null || true
}
author_for_dev() { # $1 = device id -> prints "name\temail"
local dev="$1" name="$DEFAULT_AUTHOR_NAME" email="$DEFAULT_AUTHOR_EMAIL"
if [[ -n "$dev" && -f "$authors_map" ]] && jq -e --arg d "$dev" '.[$d]' "$authors_map" >/dev/null 2>&1; then
name="$(jq -r --arg d "$dev" '.[$d].name' "$authors_map")"
email="$(jq -r --arg d "$dev" '.[$d].email' "$authors_map")"
fi
printf '%s\t%s' "$name" "$email"
}
# Collect per-file (relpath, author) and group by author for committing
declare -A G_FILES G_NAME G_EMAIL
declare -a ROWS
while IFS= read -r f; do
[[ -z "$f" ]] && continue
dev="$(resolve_dev "$f")"
IFS=$'\t' read -r aname aemail <<< "$(author_for_dev "$dev")"
ROWS+=("${f}"$'\t'"${aname}")
key="${aname} <${aemail}>"
G_FILES["$key"]+="${f}"$'\n'
G_NAME["$key"]="$aname"; G_EMAIL["$key"]="$aemail"
done < <(git diff --cached --name-only -- raw/)
ts="$(date +%Y-%m-%dT%H:%M:%S%z)"
commits=0; summary=""
for key in "${!G_FILES[@]}"; do
mapfile -t files < <(printf '%s' "${G_FILES[$key]}")
short="$(printf '%s\n' "${files[@]}" | sed 's#^raw/##' | paste -sd, -)"
msg="$(printf 'raw(%s): sync %s\n\nAdded-by: %s\nSource: syncthing-autocommit\nSynced-at: %s\n' \
"$genome" "$short" "${G_NAME[$key]}" "$ts")"
git commit -q --author="$key" -m "$msg" -- "${files[@]}"
commits=$((commits+1))
summary="${summary}${summary:+; }${G_NAME[$key]}:${short}"
done
# Push to origin/<base>. The vault is SCRATCH, so we never do an interactive rebase
# (which can conflict when the same raw file is edited repeatedly). Strategy:
# try a fast-forward push; if origin moved, re-apply our raw changes on top of a
# fresh origin/<base> and push again. Deterministic, conflict-free.
git fetch -q origin
if ! git push -q "$clone_url" "HEAD:${GENOME_BASE}" 2>/dev/null; then
# origin advanced: capture our just-made tree for raw/, realign hard, re-apply, retry once.
tmp="$(mktemp -d)"
cp -a raw/. "$tmp"/ 2>/dev/null || true
git reset -q --hard "origin/${GENOME_BASE}"
git clean -q -fd
cp -a "$tmp"/. raw/ 2>/dev/null || true
rm -rf "$tmp"
git add -A -- raw/
git reset -q -- raw/.stignore raw/.stfolder 2>/dev/null || true
if git diff --cached --quiet; then
# our content already matches origin -> nothing to push, report ok-noop-after-realign
printf '{"status":"ok","genome":"%s","base":"%s","commits":0,"head":"%s","summary":"already in sync after realign","files":[]}\n' \
"$genome" "$GENOME_BASE" "$(git rev-parse --short HEAD)"
exit 0
fi
git commit -q --author="${DEFAULT_AUTHOR_NAME} <${DEFAULT_AUTHOR_EMAIL}>" \
-m "raw(${genome}): re-apply after realign" -- raw/ || true
git push -q "$clone_url" "HEAD:${GENOME_BASE}" \
|| { printf '{"status":"error","reason":"push-failed-after-realign","genome":"%s"}\n' "$genome"; exit 1; }
fi
head="$(git rev-parse --short HEAD)"
# `files` array: local (file://) and remote (Forgejo web) link for each committed raw
files_json="$(
for row in "${ROWS[@]}"; do
IFS=$'\t' read -r rel aname <<< "$row"
jq -n --arg file "$rel" --arg author "$aname" \
--arg lpath "${vault}/${rel}" \
--arg lurl "file://${vault}/${rel}" \
--arg rurl "${FORGEJO_WEB_BASE}/${FORGEJO_OWNER}/${genome}/src/branch/${GENOME_BASE}/${rel}" \
'{file:$file, author:$author, local_path:$lpath, local_url:$lurl, remote_url:$rurl}'
done | jq -s '.'
)"
jq -n --arg genome "$genome" --arg base "$GENOME_BASE" --argjson commits "$commits" \
--arg head "$head" --arg summary "$summary" --argjson files "$files_json" \
'{status:"ok", genome:$genome, base:$base, commits:$commits, head:$head, summary:$summary, files:$files}'

60
deploy/vm101/README.md Normal file
View file

@ -0,0 +1,60 @@
# deploy/vm101
System artifacts deployed to **vm101** (the GPU ingest node). The repo is the
source of truth; the live copies live in `/usr/local/bin/`. Edit here, then
`sudo ./install.sh` on vm101 to push changes.
## Contents
- `n8n-pi-wrap` — forced-command wrapper that fronts every n8n→vm101 SSH call.
- `install.sh` — installs the wrapper(s) into `/usr/local/bin` (idempotent).
## n8n-pi-wrap
The only entry point for the `n8n-runner` identity onto vm101. n8n never gets a
shell here: whatever it sends arrives as `SSH_ORIGINAL_COMMAND`, and a `case`
whitelist decides what runs. Anything outside the whitelist is denied and logged.
Allowed commands:
| Command | What it does |
|---|---|
| `pi run` | one-shot prompt via stdin (proof-of-life / health) |
| `pi ingest <genome> <raw_path>` | the real two-phase ingest (below) |
| `ollama list` / `ollama ps` | model introspection |
### The two-phase ingest
`pi ingest` runs the clean-start + two phases, then stops:
1. **Clean start**`git fetch && switch <INGEST_BASE> && reset --hard origin/<base>`.
Destroys only vm101's *scratch* checkout (never a shared branch, never a
force-push) — this determinism is by design.
2. **Semantic**`skills/ingest/scripts/ingest-semantic.py <genome> <raw_path>`
drives `pi` to WRITE `wiki/*` pages + `.ingest-manifest.json`.
NOTE: this is the script, NOT `pi -p "/skill:ingest ..."` (that form makes the
model reply in chat and write nothing — the classic "manifest not found" trap).
3. **Mechanical**`skills/ingest/scripts/run-ingest.sh <genome>` validates the
manifest, then index/log/scoped-lint/commit on `feat/ai-ingest-<slug>` and opens
a PR onto `<INGEST_BASE>`. Emits one JSON line `{status,slug,pr_url,...}`.
The PR then waits for the human gate. One raw per session, sequential.
### Input hardening
Both inputs come from `SSH_ORIGINAL_COMMAND`, so both are validated:
- `genome` — kebab lowercase `^[a-z0-9-]+$`.
- `raw_path` — must be under `raw/`, no `..` traversal, restricted charset
`[A-Za-z0-9._/-]`, and the file must exist. Rejected paths return a JSON error.
Config (`INGEST_BASE`, `GENOMES_ROOT`, `INGEST_MODEL`, Forgejo token) is sourced
from `~/.config/knowledge-genome.env` (0600, owner-only).
## Install / update
```bash
# on vm101
cd ~/knowledge-genome-orchestrator/deploy/vm101
sudo ./install.sh
```

8
deploy/vm101/install.sh Executable file
View file

@ -0,0 +1,8 @@
#!/bin/bash
# deploy/vm101/install.sh — install vm101 wrappers from repo -> /usr/local/bin (idempotent).
# Run ON vm101 with sudo: sudo ./install.sh
set -euo pipefail
here="$(cd "$(dirname "$0")" && pwd)"
install -m 0755 "${here}/n8n-pi-wrap" /usr/local/bin/n8n-pi-wrap
echo "installed: /usr/local/bin/n8n-pi-wrap"
bash -n /usr/local/bin/n8n-pi-wrap && echo "syntax: ok"

196
deploy/vm101/n8n-pi-wrap Executable file
View file

@ -0,0 +1,196 @@
#!/bin/bash
set -eu
cmd="${SSH_ORIGINAL_COMMAND:-}"
case "$cmd" in
"pi pending-raw "*)
genome="${cmd#pi pending-raw }"
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac
logger -t n8n-pi-wrap "ok: pi pending-raw ${genome}"
set -a; . "${HOME}/.config/knowledge-genome.env" 2>/dev/null || true; set +a
# Run from the DEPLOYED skill dir (same place as ingest-semantic.py / run-ingest.sh on
# lines 54/59), so pending-raw.sh resolves its sibling slug.sh via BASH_SOURCE.
exec "${HOME}/.pi/agent/skills/ingest/scripts/pending-raw.sh" "$genome"
;;
"pi orphan-wiki "*)
genome="${cmd#pi orphan-wiki }"
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac
logger -t n8n-pi-wrap "ok: pi orphan-wiki ${genome}"
set -a; . "${HOME}/.config/knowledge-genome.env" 2>/dev/null || true; set +a
exec "${HOME}/.pi/agent/skills/ingest/scripts/orphan-wiki.sh" "$genome"
;;
"pi run")
logger -t n8n-pi-wrap "ok: pi run (prompt via stdin)"
prompt=$(cat)
exec /usr/local/bin/pi --no-tools --mode json -p "$prompt" </dev/null
;;
"pi ingest "*)
# Strict positional parse: EXACTLY `pi ingest <genome> <raw_path>` (two tokens).
rest="${cmd#pi ingest }"
genome="${rest%% *}"
raw_path="${rest#* }"
# reject: missing second token, or any extra token (a space left in raw_path)
if [ "$genome" = "$rest" ] || [ -z "$raw_path" ] || [ "$raw_path" != "${raw_path#* }" ]; then
echo '{"status":"error","reason":"usage: pi ingest <genome> <raw_path>"}'; exit 1
fi
# genome slug: kebab lowercase only
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac
# raw_path whitelist: MUST live under raw/, no traversal, restricted charset.
# - must start with "raw/" - no ".." segment - no absolute path / leading slash
# - allowed chars: [A-Za-z0-9._/-] (kebab slugs + subdirs like raw/articles/foo.md)
case "$raw_path" in
raw/*) : ;;
*) echo '{"status":"error","reason":"raw_path must be under raw/"}'; exit 1;;
esac
case "$raw_path" in
*..*|*//*) echo '{"status":"error","reason":"raw_path traversal"}'; exit 1;;
esac
case "$raw_path" in
*[!A-Za-z0-9._/-]*) echo '{"status":"error","reason":"raw_path illegal chars"}'; exit 1;;
esac
logger -t n8n-pi-wrap "ok: pi ingest ${genome} ${raw_path}"
# Per-genome lock: serialize writes; never two concurrent ingests on the same genome.
exec 9>"/run/lock/kg-ingest-${genome}.lock" 2>/dev/null || exec 9>"/tmp/kg-ingest-${genome}.lock"
if ! flock -n 9; then
echo '{"status":"busy","reason":"another ingest is running for this genome","genome":"'"$genome"'"}'
exit 0
fi
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
cd "${GENOMES_ROOT}/${genome}" || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
# The raw file must actually exist under the genome's raw/ dir.
[ -f "$raw_path" ] || { echo '{"status":"error","reason":"raw file not found"}'; exit 1; }
# Clean start on the configured base (single source of truth in lib/clean-start.sh).
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
# SEMANTIC step: dedicated script drives pi to WRITE wiki pages + manifest.
# (NOT `pi -p "/skill:ingest ..."`, which makes the model reply in chat and write nothing.)
log="$(mktemp -t pi-ingest.XXXXXX.log)"
"${HOME}/.pi/agent/skills/ingest/scripts/ingest-semantic.py" "${genome}" "${raw_path}" \
>"$log" 2>&1 \
|| { echo "{\"status\":\"error\",\"stage\":\"semantic\",\"reason\":\"ingest-semantic failed\",\"log\":\"${log}\"}"; exit 1; }
# MECHANICAL step: validate manifest -> index/log/scoped-lint/commit/PR -> 1 JSON line
exec "${HOME}/.pi/agent/skills/ingest/scripts/run-ingest.sh" "${genome}"
;;
"pi prune "*)
# Pota le source orfane. Stesso lock dell'ingest (serializza le scritture per genoma),
# clean_start, poi run-prune.sh (che ri-deriva gli orfani e apre una PR gated).
genome="${cmd#pi prune }"
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac
logger -t n8n-pi-wrap "ok: pi prune ${genome}"
exec 9>"/run/lock/kg-ingest-${genome}.lock" 2>/dev/null || exec 9>"/tmp/kg-ingest-${genome}.lock"
if ! flock -n 9; then
echo '{"status":"busy","reason":"another ingest/prune is running for this genome","genome":"'"$genome"'"}'
exit 0
fi
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
cd "${GENOMES_ROOT}/${genome}" || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
exec "${HOME}/.pi/agent/skills/ingest/scripts/run-prune.sh" "${genome}"
;;
"pi ingest-rework "*)
# args: <genome> <raw_path> <feedback_base64> (3 token).
# Feedback in base64 nell'argv: il nodo SSH di n8n non passa stdin, e cosi' i metacaratteri
# della review (apici, newline, $(...)) sono neutralizzati.
args="${cmd#pi ingest-rework }"
genome="${args%% *}"; tmp="${args#* }"
raw_path="${tmp%% *}"; fb_b64="${tmp#* }"
if [ "$genome" = "$args" ] || [ "$raw_path" = "$tmp" ] || [ -z "$fb_b64" ]; then
echo '{"status":"error","reason":"usage: pi ingest-rework <genome> <raw_path> <feedback_b64>"}'; exit 1
fi
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome"}'; exit 1;; esac
case "$raw_path" in raw/*) : ;; *) echo '{"status":"error","reason":"raw_path must be under raw/"}'; exit 1;; esac
case "$raw_path" in *..*|*//*) echo '{"status":"error","reason":"raw_path traversal"}'; exit 1;; esac
case "$raw_path" in *[!A-Za-z0-9._/-]*) echo '{"status":"error","reason":"raw_path illegal chars"}'; exit 1;; esac
case "$fb_b64" in *[!A-Za-z0-9+/=]*) echo '{"status":"error","reason":"feedback not base64"}'; exit 1;; esac
logger -t n8n-pi-wrap "ok: pi ingest-rework ${genome} ${raw_path}"
feedback="$(printf '%s' "$fb_b64" | base64 -d 2>/dev/null || true)"
# lock per-genoma: serializza con gli ingest normali
exec 9>"/run/lock/kg-ingest-${genome}.lock" 2>/dev/null || exec 9>"/tmp/kg-ingest-${genome}.lock"
if ! flock -n 9; then
echo '{"status":"busy","reason":"another ingest is running for this genome","genome":"'"$genome"'"}'; exit 0
fi
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
cd "${GENOMES_ROOT}/${genome}" || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
[ -f "$raw_path" ] || { echo '{"status":"error","reason":"raw file not found"}'; exit 1; }
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
log="$(mktemp -t pi-rework.XXXXXX.log)"
INGEST_FEEDBACK="$feedback" \
"${HOME}/.pi/agent/skills/ingest/scripts/ingest-semantic.py" "${genome}" "${raw_path}" \
>"$log" 2>&1 \
|| { echo "{\"status\":\"error\",\"stage\":\"semantic\",\"reason\":\"rework failed\",\"log\":\"${log}\"}"; exit 1; }
exec "${HOME}/.pi/agent/skills/ingest/scripts/run-ingest.sh" "${genome}"
;;
"pi changed-raw "*)
# List raw/ files changed between two commits, one per line (the webhook payload
# does NOT include file lists, so vm101's checkout computes the diff itself).
rest="${cmd#pi changed-raw }"
genome="${rest%% *}"
range="${rest#* }"
before="${range%% *}"
after="${range#* }"
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac
case "$before$after" in *[!a-f0-9]*|"") echo '{"status":"error","reason":"invalid commit range"}'; exit 1;; esac
logger -t n8n-pi-wrap "ok: pi changed-raw ${genome} ${before}..${after}"
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
cd "${GENOMES_ROOT}/${genome}" 2>/dev/null || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
git fetch -q origin
# Resolve the diff base robustly:
# - before all-zero (brand-new branch) or unreachable (force-push) -> fall back to after~1
# - if even after~1 is missing (root commit) -> list all raw files in `after`
base="$before"
case "$before" in *[!0]*) : ;; *) base="" ;; esac # all-zero -> empty
if [ -n "$base" ] && ! git cat-file -e "${base}^{commit}" 2>/dev/null; then base=""; fi
if [ -z "$base" ]; then
if git cat-file -e "${after}~1^{commit}" 2>/dev/null; then base="${after}~1"; else base=""; fi
fi
if [ -n "$base" ]; then
files="$(git diff --name-only --diff-filter=d "${base}" "${after}" -- raw/ 2>/dev/null \
| grep -vE '(^|/)\.st(folder|ignore)' || true)"
else
# no usable base: enumerate raw files present at `after`
files="$(git ls-tree -r --name-only "${after}" -- raw/ 2>/dev/null \
| grep -vE '(^|/)\.st(folder|ignore)' || true)"
fi
# emit a JSON array via jq (safe escaping)
printf '%s\n' "$files" | grep -c . >/dev/null 2>&1 || files=""
if [ -z "$files" ]; then
echo '{"status":"ok","genome":"'"$genome"'","count":0,"files":[]}'
else
printf '%s\n' "$files" | jq -R . | jq -s \
--arg g "$genome" '{status:"ok", genome:$g, count:length, files:.}'
fi
;;
"ollama list")
logger -t n8n-pi-wrap "ok: ollama list"
exec /usr/local/bin/ollama list
;;
"ollama ps")
logger -t n8n-pi-wrap "ok: ollama ps"
exec /usr/local/bin/ollama ps
;;
*)
logger -t n8n-pi-wrap "denied: ${cmd:-<empty>}"
echo "unauthorized command" >&2
exit 1
;;
esac

View file

@ -9,7 +9,7 @@ PROVIDER=forgejo
# --- FORGEJO --- # --- FORGEJO ---
FORGEJO_URL=https://git.keruhomelab.com FORGEJO_URL=https://git.keruhomelab.com
FORGEJO_USER=keru FORGEJO_USER=Keru
FORGEJO_SSH_PORT=222 FORGEJO_SSH_PORT=222
# --- GITHUB (used when PROVIDER=github) --- # --- GITHUB (used when PROVIDER=github) ---

18
lib/clean-start.sh Normal file
View file

@ -0,0 +1,18 @@
#!/usr/bin/env bash
# =============================================================================
# lib/clean-start.sh — single source of truth for the pre-session reset.
# Caller must already be INSIDE the genome checkout.
# Aligns the working tree to origin/<base>. Never force-pushes a shared branch.
# Tolerates a missing remote branch (first-setup scenario).
# NOTE: sourced library — no `set -euo pipefail` (would leak into the caller).
# =============================================================================
clean_start() {
local base="${INGEST_BASE:-main}"
git fetch -q origin || return 1
git switch -q "$base" 2>/dev/null || git checkout -q -b "$base" || return 1
if git ls-remote --exit-code --heads origin "$base" >/dev/null 2>&1; then
git reset -q --hard "origin/${base}" || return 1
fi
git clean -q -fd || return 1
}

View file

@ -208,3 +208,111 @@ check_broken_links() {
fi fi
done <<< "$links" done <<< "$links"
} }
# ---------------------------------------------------------------------------
# levenshtein <s1> <s2>
# Classic edit distance via a two-row rolling buffer, so every array subscript
# is a single integer. The previous implementation used comma subscripts
# (d[i,j]); in bash arithmetic the comma operator collapses to one dimension,
# so the table aliased onto itself and returned wrong distances — it could not
# even score two identical strings as 0. This form is portable to bash 3.2
# (no associative arrays). Echoes the integer distance.
# ---------------------------------------------------------------------------
levenshtein() {
local s1="$1" s2="$2"
local len1=${#s1} len2=${#s2}
(( len1 == 0 )) && { echo "$len2"; return; }
(( len2 == 0 )) && { echo "$len1"; return; }
local -a prev=() curr=()
local i j cost del ins sub min
for (( j = 0; j <= len2; j++ )); do prev[j]=$j; done
for (( i = 1; i <= len1; i++ )); do
curr[0]=$i
for (( j = 1; j <= len2; j++ )); do
cost=1
[[ "${s1:i-1:1}" == "${s2:j-1:1}" ]] && cost=0
del=$(( prev[j] + 1 ))
ins=$(( curr[j-1] + 1 ))
sub=$(( prev[j-1] + cost ))
min=$del
(( ins < min )) && min=$ins
(( sub < min )) && min=$sub
curr[j]=$min
done
prev=( "${curr[@]}" )
done
echo "${prev[len2]}"
}
# ---------------------------------------------------------------------------
# similarity <s1> <s2>
# Percentage similarity from the edit distance: 100 = identical, 0 = entirely
# different. Two empty strings are treated as identical (100), so the divide
# is always guarded.
# ---------------------------------------------------------------------------
similarity() {
local s1="$1" s2="$2"
local maxlen=${#s1}
(( ${#s2} > maxlen )) && maxlen=${#s2}
(( maxlen == 0 )) && { echo "100"; return; }
local dist
dist=$(levenshtein "$s1" "$s2")
echo $(( 100 - (dist * 100 / maxlen) ))
}
# ---------------------------------------------------------------------------
# check_duplicates <manifest>
# Advisory only: warns when a page created this run has a slug suspiciously
# close to an entity/concept already listed in wiki/index.md, so a human can
# merge them in the PR rather than grow two near-identical pages. Never fails
# the lint (always returns 0), exactly like check_broken_links.
#
# The threshold is tunable via KG_DUP_THRESHOLD (default 70). Exact self-matches
# are skipped: step 1 of run-ingest.sh appends this run's new slugs to the index
# BEFORE the lint runs, so without the skip every new slug would match itself at
# 100%. A page that genuinely collides with a pre-existing file is reported by
# the manifest as 'modified', not 'created', so skipping created==existing pairs
# can never mask a real collision.
# ---------------------------------------------------------------------------
check_duplicates() {
local manifest="$1"
[[ -f "$manifest" ]] || return 0
command -v jq >/dev/null 2>&1 || return 0
# New leaf slugs from pages created this run.
local -a new_slugs=()
local slug
while IFS= read -r slug; do
[[ -n "$slug" ]] && new_slugs+=("$slug")
done < <(jq -r '.pages[]? | select(.status=="created") | .path
| split("/")[-1] | sub("\\.md$";"")' "$manifest" 2>/dev/null)
# Existing entity/concept slugs already catalogued in the index.
local -a existing_slugs=()
if [[ -f "wiki/index.md" ]]; then
local line
while IFS= read -r line; do
if [[ $line =~ \[\[(entities|concepts)/([a-z0-9-]+)\]\] ]]; then
existing_slugs+=("${BASH_REMATCH[2]}")
fi
done < "wiki/index.md"
fi
(( ${#new_slugs[@]} && ${#existing_slugs[@]} )) || return 0
local threshold="${KG_DUP_THRESHOLD:-70}"
local new exist sim
for new in "${new_slugs[@]}"; do
for exist in "${existing_slugs[@]}"; do
[[ "$new" == "$exist" ]] && continue # skip exact self-match (see header)
sim=$(similarity "$new" "$exist")
if (( sim > threshold )); then
warn "Possible duplicate: '${new}' ≈ '${exist}' (${sim}% similar) — review in PR"
fi
done
done
return 0
}

View file

@ -64,8 +64,11 @@ scaffold_genome() {
install_precommit_hook() { install_precommit_hook() {
local repo_path="$1" local repo_path="$1"
local hook_path="${repo_path}/.git/hooks/pre-commit" local hooks_dir
hooks_dir="$(git -C "$repo_path" rev-parse --git-path hooks)"
local hook_path="${hooks_dir}/pre-commit"
mkdir -p "$hooks_dir"
cp "${TEMPLATES_DIR}/pre-commit.sh" "$hook_path" cp "${TEMPLATES_DIR}/pre-commit.sh" "$hook_path"
chmod +x "$hook_path" chmod +x "$hook_path"
success "Pre-commit security hook installed at: $hook_path" success "Pre-commit security hook installed at: $hook_path"

View file

@ -15,6 +15,7 @@ provider_create_repo() {
local name="$1" local name="$1"
local desc="$2" local desc="$2"
local private="$3" local private="$3"
local auto_init="${4:-false}" # genomi: true (submodule add esige un branch). master: false (git init locale + push).
local http_code local http_code
http_code=$(curl -s -o /dev/null -w "%{http_code}" \ http_code=$(curl -s -o /dev/null -w "%{http_code}" \
@ -25,7 +26,7 @@ provider_create_repo() {
\"name\": \"${name}\", \"name\": \"${name}\",
\"description\": \"${desc}\", \"description\": \"${desc}\",
\"private\": ${private}, \"private\": ${private},
\"auto_init\": false \"auto_init\": ${auto_init}
}") }")
case "$http_code" in case "$http_code" in

View file

@ -12,7 +12,7 @@ _REGISTRY_LOADED=1
PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Dynamic paths # Dynamic paths
WORK_DIR="${HOME}/knowledge-genome-orchestrator" WORK_DIR="${PROJECT_ROOT}"
KEYS_DIR="${WORK_DIR}/keys" KEYS_DIR="${WORK_DIR}/keys"
TEMPLATES_DIR="${PROJECT_ROOT}/templates" TEMPLATES_DIR="${PROJECT_ROOT}/templates"
LIB_DIR="${PROJECT_ROOT}/lib" LIB_DIR="${PROJECT_ROOT}/lib"
@ -29,5 +29,7 @@ PROVIDERS_DIR="${PROJECT_ROOT}/providers"
# Example: "genome-work|Work notes and architecture logs||no" # Example: "genome-work|Work notes and architecture logs||no"
# "genome-finance|Personal finance|user/repo-finance|no" # "genome-finance|Personal finance|user/repo-finance|no"
GENOMES=( GENOMES=(
"genome-example|Template genome description for knowledge management||no" # Disposable sandbox: exercise the full pipeline (ingest -> PR) end-to-end.
# Created by `make setup`. Replace with real domains once the circle is validated.
"genome-test|Disposable sandbox for pipeline testing||no"
) )

0
scripts/add-genome.sh Normal file → Executable file
View file

0
scripts/lint-genomes.sh Normal file → Executable file
View file

13
scripts/setup-genomes.sh Normal file → Executable file
View file

@ -19,7 +19,7 @@ source "providers/${PROVIDER}.sh"
step "Processing Genome Registry" step "Processing Genome Registry"
for entry in "${GENOMES[@]}"; do for entry in "${GENOMES[@]}"; do
# 3-field format: name|description|linked_repo (linked_repo optional → may be empty) # 4-field format: name|description|linked_repo|cross_source linked_repo optional (may be empty); cross_source defaults to "no".
IFS='|' read -r GENOME_NAME GENOME_DESC GENOME_LINKED GENOME_CROSS_SOURCE <<< "$entry" IFS='|' read -r GENOME_NAME GENOME_DESC GENOME_LINKED GENOME_CROSS_SOURCE <<< "$entry"
GENOME_CROSS_SOURCE="${GENOME_CROSS_SOURCE:-no}" GENOME_CROSS_SOURCE="${GENOME_CROSS_SOURCE:-no}"
export GENOME_NAME GENOME_DESC GENOME_LINKED GENOME_CROSS_SOURCE export GENOME_NAME GENOME_DESC GENOME_LINKED GENOME_CROSS_SOURCE
@ -27,7 +27,7 @@ for entry in "${GENOMES[@]}"; do
info "Processing: ${GENOME_NAME} (cross_source: ${GENOME_CROSS_SOURCE})..." info "Processing: ${GENOME_NAME} (cross_source: ${GENOME_CROSS_SOURCE})..."
# 1. Remote Creation (Idempotent) # 1. Remote Creation (Idempotent)
provider_create_repo "${GENOME_NAME}" "${GENOME_DESC}" "true" provider_create_repo "${GENOME_NAME}" "${GENOME_DESC}" "true" "true"
SSH_URL=$(provider_ssh_url "${GENOME_NAME}") SSH_URL=$(provider_ssh_url "${GENOME_NAME}")
GENOME_PATH="${WORK_DIR}/${MASTER_REPO}/${GENOME_NAME}" GENOME_PATH="${WORK_DIR}/${MASTER_REPO}/${GENOME_NAME}"
@ -40,6 +40,8 @@ for entry in "${GENOMES[@]}"; do
cd "${GENOME_NAME}" cd "${GENOME_NAME}"
git switch -C main
# IMPORTANT: Initialize git-crypt BEFORE creating any files # IMPORTANT: Initialize git-crypt BEFORE creating any files
gcrypt_init gcrypt_init
@ -50,14 +52,17 @@ for entry in "${GENOMES[@]}"; do
git add . git add .
git commit -m "feat: initial scaffold and git-crypt init for ${GENOME_NAME}" git commit -m "feat: initial scaffold and git-crypt init for ${GENOME_NAME}"
git branch -M main
git push -u origin main git push -u origin main
# Key export and instructions # Key export and instructions
gcrypt_export_key "${GENOME_NAME}" gcrypt_export_key "${GENOME_NAME}"
gcrypt_print_key_instructions "${GENOME_NAME}" gcrypt_print_key_instructions "${GENOME_NAME}"
cd "${WORK_DIR}/${MASTER_REPO}"
git add .gitmodules "${GENOME_NAME}"
git diff --cached --quiet || git commit -m "chore: register submodule ${GENOME_NAME}"
git push
# Commit the submodule reference in the master repo # Commit the submodule reference in the master repo
cd "${WORK_DIR}/${MASTER_REPO}" cd "${WORK_DIR}/${MASTER_REPO}"
git commit -m "feat: add ${GENOME_NAME} as submodule" git commit -m "feat: add ${GENOME_NAME} as submodule"

0
scripts/setup-master.sh Normal file → Executable file
View file

0
scripts/setup.sh Normal file → Executable file
View file

2
scripts/verify-genomes.sh Normal file → Executable file
View file

@ -22,7 +22,7 @@ step "Genome structure: ${MODE}"
TOTAL_MISSING=0 TOTAL_MISSING=0
for entry in "${GENOMES[@]}"; do for entry in "${GENOMES[@]}"; do
IFS='|' read -r GENOME_NAME _ _ <<< "$entry" # 3-field registry; ignore desc + linked IFS='|' read -r GENOME_NAME _ <<< "$entry" # 4-field registry; only GENOME_NAME used here
genome_dir="${WORK_DIR}/${MASTER_REPO}/${GENOME_NAME}" genome_dir="${WORK_DIR}/${MASTER_REPO}/${GENOME_NAME}"
if [[ ! -d "$genome_dir" ]]; then if [[ ! -d "$genome_dir" ]]; then

View file

@ -1,93 +1,92 @@
--- ---
name: ingest name: ingest
description: Semantic pass of a single raw source into the current genome's wiki — read the source, write sources/entities/concepts, handle contradictions, then emit a manifest and STOP. Use when a new file lands in raw/. Does NOT do git, log, index, lint, or PRs (a post-processor handles those), and does NOT handle private sources or project repos. description: Semantic pass of a single raw source into the current genome's wiki. The model ONLY extracts structured semantic content (summary, entities, concepts, contradictions) and returns one JSON object — it does not write files, produce frontmatter, slugs, git, index, log or PRs. A deterministic conform script (ingest-semantic.py) turns that JSON into properly-structured wiki pages + a manifest; run-ingest.sh then does index/log/lint/PR.
license: see repository license: see repository
compatibility: Runs inside one genome checkout (cwd = genome root). Tools needed — read, edit only. NO bash, NO git. The deterministic steps (index, log, scoped lint, PR) run AFTER you exit, via run-ingest.sh. PRIVATE_CONTEXT must be disabled. compatibility: Driven by scripts/ingest-semantic.py (one schema-constrained call to a local model via Ollama /api/chat). NO agent tools are used — no read, no edit, no bash. The model never touches the filesystem. PRIVATE_CONTEXT must be disabled.
allowed-tools: read edit
metadata: metadata:
framework: knowledge-genome framework: knowledge-genome
phase: "1-ingest-semantic" phase: "1-ingest-semantic"
mode: structured-json # lightweight agent + deterministic conform
--- ---
# Ingest — semantic pass # Ingest — semantic pass (structured-JSON)
You run inside ONE genome checkout. `AGENTS.md` (already in your context) is the This is the **light** semantic pass. The model's only job is to read one source
authoritative contract. Your job is the **semantic pass only**: read the source, write and return a single JSON object describing what the source contains. It does
the wiki pages, handle contradictions. You do **not** touch git, the log, the index, the **not** write files, choose paths, produce frontmatter, pick slugs, or touch
linter, or PRs — a post-processor (`run-ingest.sh`) does all of that _after you stop_, git / index / log / PRs. All structure is owned by `scripts/ingest-semantic.py`,
from the manifest you leave behind. This keeps your context clean and your turns few, which conforms the model's JSON into wiki pages with enforced kebab-case paths
which matters on a small local model. and frontmatter, and writes `.ingest-manifest.json` in the exact schema
`run-ingest.sh` consumes. This keeps the agent minimal and makes the output
impossible to mis-shape, regardless of how small or quirky the local model is.
**Argument:** the relative path of the single raw source to ingest Pipeline:
(e.g. `raw/articles/foo.md`). Process only this one.
## Pre-flight — stop the session if any check fails cd <genome checkout>
scripts/ingest-semantic.py <genome> raw/articles/<file>.md # phase 1 (this)
scripts/run-ingest.sh <genome> # phase 2 (deterministic)
1. Refuse if the argument path is under any `private/` directory. ## Pre-flight (enforced by ingest-semantic.py, not by the model)
1. Refuse if the source path is under any `private/` directory.
2. Refuse if `PRIVATE_CONTEXT` is not `disabled`. 2. Refuse if `PRIVATE_CONTEXT` is not `disabled`.
3. Confirm the file exists under `raw/`. 3. Confirm the file exists under `raw/` and is non-empty.
## Semantic work (your only job) ## What the model returns (the only contract)
1. Read the source once. A single JSON object, decoding-constrained to this shape via Ollama's `format`:
2. Write `wiki/sources/<kebab-slug>.md` — faithful summary + key points, with the required
frontmatter (`type: source`, `domain: <genome>`, `maturity: draft`,
`last_updated: <today>`, `private: false`, sensible `tags`).
3. For each entity (person, tool, org) → create or update `wiki/entities/<kebab-name>.md`.
4. For each concept (pattern, theory, decision) → create or update
`wiki/concepts/<kebab-name>.md`.
5. On a real contradiction with an existing claim, follow `AGENTS.md` §Conflict: create
`wiki/queries/conflict-<concept>-<YYYY-MM-DD>.md`. Never overwrite the existing page.
**Naming — you are the sole author of these names; nothing renames your files.** Use
minimal kebab-case: lowercase letters, digits and hyphens only — no spaces, no underscores,
no capitals. Pick stable names so the same entity is never created twice (always `acme`,
never also `acme-corp`). The path you write a file to MUST be byte-for-byte the path you
list in the manifest.
**Deciding create-vs-update and spotting contradictions — mind the context budget.** Use
`wiki/index.md` to locate existing pages, then read **only** the handful that _this source
actually names_ — the entities and concepts in the source's title and opening paragraphs —
not everything the index lists. When in doubt, read fewer: a missed cross-link is far
cheaper than a saturated context. Never scan whole directories.
## Finish: write the manifest, then STOP
As your **final action**, write `.ingest-manifest.json` at the genome root
(NOT under `wiki/`) describing exactly what you did. Then stop — do not commit, lint,
append to the log/index, or open anything.
```json ```json
{ {
"raw_source": "raw/articles/foo.md", "source_title": "Human title of the source",
"reasoning": "One sentence for the log: what changed and why.", "source_summary": "Faithful, self-contained prose summary of the source.",
"pr_summary": "One or two sentences describing this ingest for the PR.", "key_points": ["Concrete fact or claim worth indexing", "..."],
"contradictions": "None (or: 1 conflict file created — <concept>)", "entities": [
"pages": [ { "name": "Acme", "kind": "org", "description": "Vendor referenced by the source." }
{ ],
"path": "wiki/sources/foo.md", "concepts": [
"summary": "One-line index summary.", { "name": "JWT RS256", "description": "Asymmetric token signing scheme the source uses." }
"maturity": "draft", ],
"status": "created" "contradictions": [
}, { "concept": "auth", "description": "Source claims X, contradicting the existing claim Y." }
{ ],
"path": "wiki/entities/acme.md", "reasoning": "One sentence for the log: what this source adds.",
"summary": "Acme — vendor.", "pr_summary": "One or two sentences describing this ingest for the PR."
"status": "modified"
}
]
} }
``` ```
Manifest rules: Field rules (guidance for the model; the script enforces _structure_):
- List every page you created or modified, with `status` `created` or `modified`. - `source_summary` is faithful and in the source's own language. No markdown
- `summary` is the one-line index description (≈12 words max). For conflict pages the headings inside any description field. No padding.
summary is ignored — the index lists conflicts by slug only. - `entities` = every person, tool, org or product the source names. `kind`
- `maturity` is required only on `created` pages (it seeds the new index entry). It is `person|tool|org|product`. `description` = one or two factual sentences.
ignored for `modified` pages, so omit it there. - `concepts` = every pattern, theory, decision or named idea the source explains.
- Do NOT add a `model` field — the orchestrator records which model produced this run; you - `contradictions` = only a claim that directly contradicts a widely-known fact
cannot know your own model name reliably, so do not guess one. or contradicts the source itself; otherwise an empty list.
- Do not invent a `run_id`, branch, commit, or PR — those belong to the post-processor. - Names are the natural name of the thing. The script normalises them to
kebab-case and guarantees a single stable page per entity/concept.
One source per session. After writing the manifest, stop. ## What the conform script guarantees (so the model cannot break it)
- **Paths:** `wiki/sources/<slug>.md`, `wiki/entities/<slug>.md`,
`wiki/concepts/<slug>.md`, `wiki/queries/conflict-<concept>-<YYYY-MM-DD>.md`.
- **Slugs:** minimal kebab-case (lowercase, digits, hyphens; no spaces /
underscores / capitals).
- **Frontmatter:** `type`, `domain: <genome>`, `maturity: draft`,
`last_updated: <today>`, `private: false`, `tags`.
- **Create-vs-update:** existing entity/concept pages are **appended to** (a
section attributed to the new source), never overwritten. The source page is
the canonical summary of that exact source and is (re)written.
- **Manifest:** `.ingest-manifest.json` with `raw_source`, `reasoning`,
`pr_summary`, `contradictions` (string), and `pages[]` (`path`, `summary`,
`status`, plus `maturity` on created pages) — exactly what `run-ingest.sh`
validates.
The model name is recorded by the orchestrator (`INGEST_MODEL`); the model does
not self-report it. No `run_id`, branch, commit or PR is invented here — those
belong to phase 2.
> Interactive use of `pi` (TUI) is unaffected and still available for manual
> exploration. The **automated** ingest path no longer relies on `pi` or on
> native tool-calling: it is the single schema-constrained call above.

205
skills/ingest/scripts/index-append.py Normal file → Executable file
View file

@ -1,16 +1,12 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# ============================================================================= # =============================================================================
# skills/ingest/scripts/index-append.py # skills/ingest/scripts/index-append.py
# Insert an entry line into the correct section of wiki/index.md and keep that # Insert OR remove an entry line in wiki/index.md, keeping the target section
# section's entries alphabetically ordered. Bumps frontmatter last_updated. # alphabetically ordered. Bumps frontmatter last_updated.
#
# NOTE: agents-genome.md and wiki-index.md claim the pre-commit hook sorts the
# index. The actual pre-commit.sh only runs the plaintext-leak check — it does
# NOT sort. This script owns the ordering instead. (If you later move sorting
# into the hook, reduce this to a plain append.)
# #
# index-append.py --section Sources \ # index-append.py --section Sources \
# --entry '- [[sources/foo]] — One-line summary. `maturity: draft`' # --entry '- [[sources/foo]] — One-line summary. `maturity: draft`'
# index-append.py --remove 'sources/foo' # delete the entry by wikilink
# ============================================================================= # =============================================================================
import argparse import argparse
import datetime import datetime
@ -22,14 +18,116 @@ LINK_RE = re.compile(r"^- \[\[([^\]]+)\]\]")
HEADER_RE = re.compile(r"^## ") HEADER_RE = re.compile(r"^## ")
def bump_last_updated(lines, today):
"""Bump (or self-heal) last_updated inside the first frontmatter block."""
fm_open = False
fm_close_idx = None
bumped = False
for i, ln in enumerate(lines):
if ln.strip() == "---":
if not fm_open:
fm_open = True
continue
fm_close_idx = i
break
if fm_open and ln.startswith("last_updated:"):
lines[i] = f"last_updated: {today}"
bumped = True
if not fm_open:
print("index-append: warning: no frontmatter found, last_updated not bumped",
file=sys.stderr)
elif not bumped and fm_close_idx is not None:
lines.insert(fm_close_idx, f"last_updated: {today}")
print("index-append: last_updated key was missing — inserted", file=sys.stderr)
def do_remove(lines, link, today):
"""Remove every entry line whose wikilink == link. Idempotent."""
bump_last_updated(lines, today)
kept = []
removed = 0
for ln in lines:
m = LINK_RE.match(ln)
if m and m.group(1) == link:
removed += 1
continue
kept.append(ln)
if removed:
print(f"index-append: removed [[{link}]] ({removed} line(s))")
else:
# Idempotent: the goal state (entry absent) already holds.
print(f"index-append: [[{link}]] not present, nothing to remove")
return kept
def do_append(lines, section, entry, today):
bump_last_updated(lines, today)
# Locate the target section [start, end)
start = None
for i, ln in enumerate(lines):
if HEADER_RE.match(ln) and ln[3:].startswith(section):
start = i
break
if start is None:
print(f"index-append: section '{section}' not found", file=sys.stderr)
return None
end = len(lines)
for i in range(start + 1, len(lines)):
if HEADER_RE.match(lines[i]):
end = i
break
body = lines[start + 1:end]
intro = [ln for ln in body if not ENTRY_RE.match(ln)]
entries = [ln for ln in body if ENTRY_RE.match(ln)]
new_m = LINK_RE.match(entry)
new_link = new_m.group(1) if new_m else None
if new_link is not None:
replaced = False
for idx, ln in enumerate(entries):
m = LINK_RE.match(ln)
if m and m.group(1) == new_link:
if ln == entry:
print("index-append: entry already present, skipping")
return lines
entries[idx] = entry
replaced = True
break
if not replaced:
entries.append(entry)
else:
if entry in entries:
print("index-append: entry already present, skipping")
return lines
entries.append(entry)
entries.sort(key=str.casefold)
while intro and intro[-1].strip() == "":
intro.pop()
new_section = intro + [""] + entries + [""]
print(f"index-append: added to {section}")
return lines[:start + 1] + new_section + lines[end:]
def main() -> int: def main() -> int:
ap = argparse.ArgumentParser() ap = argparse.ArgumentParser()
ap.add_argument("--section", required=True, ap.add_argument("--section", help="Section name (required with --entry)")
help="Section name, e.g. Sources / Entities / Concepts / Queries / Conflicts") ap.add_argument("--entry", help="Full index line to insert")
ap.add_argument("--entry", required=True, help="Full index line to insert") ap.add_argument("--remove", metavar="WIKILINK",
help="Remove the entry with this wikilink, e.g. sources/foo")
ap.add_argument("--file", default="wiki/index.md") ap.add_argument("--file", default="wiki/index.md")
args = ap.parse_args() args = ap.parse_args()
if bool(args.remove) == bool(args.entry):
print("index-append: provide exactly one of --entry or --remove", file=sys.stderr)
return 2
if args.entry and not args.section:
print("index-append: --entry requires --section", file=sys.stderr)
return 2
try: try:
with open(args.file, encoding="utf-8") as fh: with open(args.file, encoding="utf-8") as fh:
lines = fh.read().splitlines() lines = fh.read().splitlines()
@ -38,90 +136,15 @@ def main() -> int:
return 1 return 1
today = datetime.date.today().isoformat() today = datetime.date.today().isoformat()
if args.remove:
# 1. Bump last_updated inside the first frontmatter block out = do_remove(lines, args.remove, today)
fm_open = False
fm_close_idx = None
bumped = False
for i, ln in enumerate(lines):
if ln.strip() == "---":
if not fm_open:
fm_open = True
continue
fm_close_idx = i # the closing ---
break
if fm_open and ln.startswith("last_updated:"):
lines[i] = f"last_updated: {today}"
bumped = True
if not fm_open:
print("index-append: warning: no frontmatter found, last_updated not bumped",
file=sys.stderr)
elif not bumped and fm_close_idx is not None:
# self-heal: frontmatter present but missing the key — insert it before the close
lines.insert(fm_close_idx, f"last_updated: {today}")
print("index-append: last_updated key was missing — inserted", file=sys.stderr)
# 2. Locate the target section [start, end)
start = None
for i, ln in enumerate(lines):
if HEADER_RE.match(ln) and ln[3:].startswith(args.section):
start = i
break
if start is None:
print(f"index-append: section '{args.section}' not found in {args.file}",
file=sys.stderr)
return 1
end = len(lines)
for i in range(start + 1, len(lines)):
if HEADER_RE.match(lines[i]):
end = i
break
# 3. Split the section body into intro (non-entry) and entries
body = lines[start + 1:end]
intro = [ln for ln in body if not ENTRY_RE.match(ln)]
entries = [ln for ln in body if ENTRY_RE.match(ln)]
# Deduplicate by wikilink PATH, not by exact line: a re-ingest with a changed
# summary/maturity should UPDATE the existing entry, not add a duplicate line.
new_m = LINK_RE.match(args.entry)
new_link = new_m.group(1) if new_m else None
if new_link is not None:
replaced = False
for idx, ln in enumerate(entries):
m = LINK_RE.match(ln)
if m and m.group(1) == new_link:
if ln == args.entry:
print("index-append: entry already present, skipping")
return 0
entries[idx] = args.entry # same page, refreshed text
replaced = True
break
if not replaced:
entries.append(args.entry)
else: else:
# No parseable wikilink — fall back to exact-line dedup. out = do_append(lines, args.section, args.entry, today)
if args.entry in entries: if out is None:
print("index-append: entry already present, skipping") return 1
return 0
entries.append(args.entry)
entries.sort(key=str.casefold)
# Normalise intro: drop trailing blanks, keep header + comment(s)
while intro and intro[-1].strip() == "":
intro.pop()
new_section = intro + [""] + entries + [""]
lines = lines[:start + 1] + new_section + lines[end:]
with open(args.file, "w", encoding="utf-8") as fh: with open(args.file, "w", encoding="utf-8") as fh:
fh.write("\n".join(lines) + "\n") fh.write("\n".join(out) + "\n")
print(f"index-append: added to {args.section}")
return 0 return 0

View file

@ -0,0 +1,374 @@
#!/usr/bin/env python3
# =============================================================================
# skills/ingest/scripts/ingest-semantic.py
# Phase 1 (semantic) of the Knowledge Genome ingest — light agent + deterministic conform.
#
# - FIXED: Add 'title:' field to frontmatter (lint was complaining about missing title)
# - NEW: Inject existing index (entity/concept names) into prompt to prevent duplicates
# - NEW: Richer prompt asking for 2-4 sentences per description (not 1-2), with concrete details
# - Enhanced schema to handle longer descriptions naturally
#
# The model does ONLY semantic extraction and returns ONE schema-constrained JSON
# object (no tools, no file writing, no git, no frontmatter, no slugs). This script
# then CONFORMS that output deterministically into wiki pages with enforced
# frontmatter + kebab-case paths, and writes a .ingest-manifest.json in EXACTLY the
# schema run-ingest.sh expects.
#
# cd <genome checkout>
# ingest-semantic.py <genome> raw/articles/<file>.md # phase 1 (this)
# run-ingest.sh <genome> # phase 2 (deterministic)
#
# Emits a single JSON status line on stdout (for n8n / logs).
# =============================================================================
import json, os, hashlib, subprocess, re, sys, datetime, urllib.request, urllib.error, time
# --- config (override via env; these live in ~/.config/knowledge-genome.env) ---
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/chat")
MODEL = os.environ.get("INGEST_MODEL", "qwen2.5:14b")
NUM_CTX = int(os.environ.get("INGEST_NUM_CTX", "16384"))
TIMEOUT = int(os.environ.get("INGEST_TIMEOUT", "600"))
# INGEST_THINK: "false" disables a reasoning model's thinking trace, so models like
# gemma / qwq / qwen3 emit only the structured JSON (no truncation from long thinking).
# Unset = omit the flag entirely (correct for plain instruct models such as qwen2.5).
THINK = os.environ.get("INGEST_THINK")
TODAY = datetime.date.today().isoformat()
FEEDBACK = os.environ.get("INGEST_FEEDBACK", "").strip()
def die(stage, reason):
print(json.dumps({"status": "error", "stage": stage, "reason": reason}))
sys.exit(1)
# --- args + pre-flight (mirror the old skill's guards, enforced in code) ---
if len(sys.argv) < 3:
die("args", "usage: ingest-semantic.py <genome> <raw/rel/path.md>")
genome = sys.argv[1]
raw_rel = sys.argv[2].lstrip("./")
if "private/" in raw_rel or raw_rel.startswith("private"):
die("preflight", "refusing private source: " + raw_rel)
if os.environ.get("PRIVATE_CONTEXT", "disabled") != "disabled":
die("preflight", "PRIVATE_CONTEXT must be disabled")
if not raw_rel.startswith("raw/"):
die("preflight", "source must live under raw/: " + raw_rel)
if not os.path.isfile(raw_rel):
die("preflight", "source not found in cwd: " + raw_rel)
with open(raw_rel, "r", encoding="utf-8") as fh:
source_text = fh.read()
if not source_text.strip():
die("preflight", "source is empty: " + raw_rel)
# --- pre-flight check: if the prompt exceeds context window, exit cleanly with stage:input ---
# Conservative estimate: ~4 chars/token for mixed IT/EN text
SAFETY_MARGIN = 4096 # room for system prompt + JSON response
MAX_SOURCE_TOKENS = NUM_CTX - SAFETY_MARGIN
MAX_SOURCE_CHARS = MAX_SOURCE_TOKENS * 4
if len(source_text) > MAX_SOURCE_CHARS:
die("input", f"source too large ({len(source_text)} chars, limit ~{MAX_SOURCE_CHARS}). "
f"Use the SPLIT directive or divide the document.")
# --- read existing index to avoid duplicate slugs ---
existing_entities = set()
existing_concepts = set()
if os.path.isfile("wiki/index.md"):
try:
with open("wiki/index.md", "r", encoding="utf-8") as f:
idx_text = f.read()
# extract slugs from [[entities/slug]] and [[concepts/slug]] patterns
for m in re.finditer(r"\[\[entities/([a-z0-9\-]+)\]\]", idx_text):
existing_entities.add(m.group(1))
for m in re.finditer(r"\[\[concepts/([a-z0-9\-]+)\]\]", idx_text):
existing_concepts.add(m.group(1))
except Exception:
pass # index not readable or not found; that's OK
def slugify(s):
s = re.sub(r"[^a-z0-9]+", "-", (s or "").strip().lower())
return re.sub(r"-+", "-", s).strip("-") or "untitled"
def twords(s, n=20):
"""Truncate at n words; used for index entry summaries."""
s = " ".join((s or "").split())
w = s.split(" ")
return s if len(w) <= n else " ".join(w[:n]) + ""
def yaml_dq(s):
"""Render a value as a YAML double-quoted scalar.
Titles can contain characters that break a bare scalar most commonly a
colon-space ('Conflict: X' would parse as a mapping), but also '#', leading
'-'/'?', quotes, etc. Double-quoting and escaping '\\' and '"' makes any
title valid YAML (and keeps Obsidian/Dataview/qmd happy). Newlines are
collapsed to spaces so the scalar stays on one line.
"""
s = " ".join((s or "").split())
s = s.replace("\\", "\\\\").replace('"', '\\"')
return f'"{s}"'
def frontmatter(ptype, title, tags):
"""Return YAML frontmatter with title field."""
taglist = "[" + ", ".join(sorted(set(t for t in tags if t))) + "]"
return ("---\n"
f"title: {yaml_dq(title)}\n"
f"type: {ptype}\n"
f"domain: {genome}\n"
"maturity: draft\n"
f"last_updated: {TODAY}\n"
"private: false\n"
f"tags: {taglist}\n"
"---\n")
def write_new(path, ptype, title, body, tags):
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, "w", encoding="utf-8") as f:
f.write(frontmatter(ptype, title, tags))
f.write(f"\n# {title}\n\n{body}\n")
def append_section(path, source_slug, body):
# never overwrite an existing page: accumulate, attributed to the new source
with open(path, "a", encoding="utf-8") as f:
f.write(f"\n\n## From [[sources/{source_slug}]]\n\n{body}\n")
try: # best-effort bump of last_updated in the existing frontmatter
with open(path, "r", encoding="utf-8") as f:
txt = f.read()
txt = re.sub(r"(?m)^last_updated:.*$", "last_updated: " + TODAY, txt, count=1)
with open(path, "w", encoding="utf-8") as f:
f.write(txt)
except Exception:
pass
# --- the semantic contract ---
SYSTEM_PROMPT = """You perform the SEMANTIC PASS of a single source into a knowledge wiki.
Read the source and return ONLY structured data describing what it contains.
You do not write files, you do not produce frontmatter, and you do not invent
paths, slugs, branches, commits or PRs a deterministic script does all of that.
Rules:
- source_summary: a faithful, self-contained summary of the source, in the
source's own language. Plain prose, NO markdown headings. 2-4 sentences,
with concrete details. Preserve the essence and nuance of the source.
- key_points: 3-5 concrete facts or claims worth indexing; no padding.
- entities: every person, tool, organisation or product the source names.
kind is one of person|tool|org|product. description is 2-3 factual sentences
with specifics. No markdown headings inside the description.
- concepts: every pattern, theory, decision or named idea the source explains.
description is 2-3 factual sentences with concrete examples or context.
- contradictions: ONLY when the source makes a claim that directly contradicts a
widely-known fact or contradicts itself. Otherwise return an empty list.
- Names must be the natural name of the thing; the script will normalise them.
If the source references an entity or concept already in the wiki (see the list below),
use the EXACT name already present; do not invent a variant. This prevents duplicates.
Existing entities in this genome:
{existing_entities}
Existing concepts in this genome:
{existing_concepts}
Be faithful to the source. Be specific. Do not pad or improvise."""
# --- JSON schema -> constrained decoding (Ollama structured outputs) ---
SCHEMA = {
"type": "object",
"properties": {
"source_title": {"type": "string"},
"source_summary": {"type": "string"},
"key_points": {"type": "array", "items": {"type": "string"}},
"entities": {"type": "array", "items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"kind": {"type": "string",
"enum": ["person", "tool", "org", "product"]},
"description": {"type": "string"},
},
"required": ["name", "description"],
}},
"concepts": {"type": "array", "items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"description": {"type": "string"},
},
"required": ["name", "description"],
}},
"contradictions": {"type": "array", "items": {
"type": "object",
"properties": {
"concept": {"type": "string"},
"description": {"type": "string"},
},
"required": ["concept", "description"],
}},
"reasoning": {"type": "string"},
"pr_summary": {"type": "string"},
},
"required": ["source_title", "source_summary", "entities", "concepts"],
}
def call_model(max_retries=2, base_delay=2.0):
"""Call Ollama with retry on transient errors (connection, timeout, malformed JSON).
Retries up to max_retries times with exponential backoff. Does NOT retry on
content errors (schema violations, empty response) those are model issues."""
existing_ents = ", ".join(sorted(existing_entities)) or "(none yet)"
existing_conc = ", ".join(sorted(existing_concepts)) or "(none yet)"
prompt = SYSTEM_PROMPT.format(existing_entities=existing_ents, existing_concepts=existing_conc)
user_content = (
("REVISION REQUESTED BY THE MAINTAINER (address this explicitly):\n"
+ FEEDBACK + "\n\n") if FEEDBACK else ""
) + (
"Source path: " + raw_rel + "\n\n--- SOURCE START ---\n"
+ source_text + "\n--- SOURCE END ---\n\nReturn the JSON now."
)
payload = {
"model": MODEL,
"messages": [
{"role": "system", "content": prompt},
{"role": "user", "content": user_content },
],
"format": SCHEMA,
"stream": False,
"options": {"temperature": 0.2, "repeat_penalty": 1.0, "num_ctx": NUM_CTX},
}
if THINK is not None:
payload["think"] = THINK.strip().lower() in ("1", "true", "yes", "on")
data = json.dumps(payload).encode("utf-8")
last_error = None
for attempt in range(max_retries + 1):
if attempt > 0:
delay = base_delay * (2 ** (attempt - 1))
print(f"call_model: retry {attempt}/{max_retries} after {delay}s: {last_error}", file=sys.stderr)
time.sleep(delay)
req = urllib.request.Request(OLLAMA_URL, data=data, headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
resp = json.loads(r.read().decode("utf-8"))
except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError) as e:
last_error = f"connection/transport error: {e}"; continue
except json.JSONDecodeError as e:
last_error = f"invalid JSON from Ollama API: {e}"; continue
content = ((resp.get("message") or {}).get("content") or "").strip()
if content.startswith("```"):
content = content.strip("`")
brace = content.find("{")
if brace >= 0:
content = content[brace:]
try:
return json.loads(content)
except json.JSONDecodeError as e:
last_error = f"model did not return valid JSON: {e}"
if len(content) < 10:
continue # likely truncated -> retry
break # long but malformed -> model issue, stop
die("model", last_error or "model call failed after retries")
# --- run the semantic pass ---
sem = call_model()
# Source of truth: slug from slug.sh --raw (deterministic, path-aware, collision-proof)
source_slug = subprocess.check_output(
["bash", os.path.join(os.path.dirname(__file__), "slug.sh"), "--raw", raw_rel],
text=True
).strip()
with open(raw_rel, "rb") as f:
src_sha = hashlib.sha256(f.read()).hexdigest()
pages = []
# 1. source page — canonical summary of THIS source (re)written
src_path = f"wiki/sources/{source_slug}.md"
src_status = "modified" if os.path.exists(src_path) else "created"
kp_lines = "\n".join("- " + p for p in (sem.get("key_points") or []) if p.strip())
src_body = (sem.get("source_summary") or "").strip()
if kp_lines:
src_body += "\n\n## Key points\n\n" + kp_lines
src_body += f"\n\n## Source\n\n- [[{raw_rel}]]\n"
src_title = sem.get('source_title') or source_slug
src_tags = ([slugify(e.get("name", "")) for e in sem.get("entities", [])]
+ [slugify(c.get("name", "")) for c in sem.get("concepts", [])])[:8]
os.makedirs("wiki/sources", exist_ok=True)
with open(src_path, "w", encoding="utf-8") as f:
fm = frontmatter("source", src_title, src_tags)
# Inject tracking fields before the closing '---' (first newline-dash-dash-dash-newline)
fm = fm.replace("\n---\n", f"\nsource_path: {raw_rel}\nsource_sha256: {src_sha}\n---\n", 1)
f.write(fm)
f.write(f"\n# {src_title}\n\n{src_body}\n")
pages.append({"path": src_path,
"summary": twords(src_title),
"maturity": "draft", "status": src_status})
def handle(kind_dir, ptype, items):
for it in items or []:
name = (it.get("name") or "").strip()
if not name:
continue
slug = slugify(name)
path = f"wiki/{kind_dir}/{slug}.md"
desc = (it.get("description") or "").strip()
if os.path.exists(path):
append_section(path, source_slug, desc)
pages.append({"path": path, "summary": twords(desc), "status": "modified"})
else:
body = desc + f"\n\n## Sources\n\n- [[sources/{source_slug}]]\n"
write_new(path, ptype, name, body, [genome, ptype])
pages.append({"path": path, "summary": twords(desc),
"maturity": "draft", "status": "created"})
# 2. entities, 3. concepts
handle("entities", "entity", sem.get("entities", []))
handle("concepts", "concept", sem.get("concepts", []))
# 4. contradictions -> conflict pages (run-ingest routes wiki/queries/conflict-*)
conflicts = sem.get("contradictions") or []
conf_slugs = []
for c in conflicts:
cslug = slugify(c.get("concept", "unknown"))
conf_slugs.append(cslug)
path = f"wiki/queries/conflict-{cslug}-{TODAY}.md"
write_new(path, "query", f"Conflict: {c.get('concept', '')}",
(c.get("description") or "").strip()
+ f"\n\n## Source\n\n- [[sources/{source_slug}]]\n",
[genome, "conflict"])
pages.append({"path": path, "summary": "", "maturity": "draft",
"status": "created"})
contradictions_str = ("None" if not conflicts
else f"{len(conflicts)} conflict file(s) created — "
+ ", ".join(conf_slugs))
# --- write the manifest in EXACTLY run-ingest.sh's schema ---
manifest = {
"raw_source": raw_rel,
"reasoning": sem.get("reasoning") or ("Ingest of " + raw_rel),
"pr_summary": sem.get("pr_summary") or ("Semantic ingest of " + raw_rel),
"contradictions": contradictions_str,
"pages": pages,
}
with open(".ingest-manifest.json", "w", encoding="utf-8") as f:
json.dump(manifest, f, indent=2, ensure_ascii=False)
print(json.dumps({"status": "ok", "stage": "semantic",
"pages": len(pages), "model": MODEL,
"manifest": ".ingest-manifest.json"}))

9
skills/ingest/scripts/log-append.sh Normal file → Executable file
View file

@ -21,6 +21,7 @@ while [[ $# -gt 0 ]]; do
--context) context="$2"; shift 2 ;; --context) context="$2"; shift 2 ;;
--output) output="$2"; shift 2 ;; --output) output="$2"; shift 2 ;;
--reasoning) reasoning="$2"; shift 2 ;; --reasoning) reasoning="$2"; shift 2 ;;
--run-id) run_id_arg="$2"; shift 2 ;;
*) echo "log-append: unknown arg: $1" >&2; exit 1 ;; *) echo "log-append: unknown arg: $1" >&2; exit 1 ;;
esac esac
done done
@ -35,9 +36,15 @@ esac
[[ -f "$LOG_FILE" ]] || { echo "log-append: not found: $LOG_FILE" >&2; exit 1; } [[ -f "$LOG_FILE" ]] || { echo "log-append: not found: $LOG_FILE" >&2; exit 1; }
run_id="$(uuidgen 2>/dev/null || cat /proc/sys/kernel/random/uuid 2>/dev/null || python3 -c 'import uuid; print(uuid.uuid4())')" run_id="${run_id_arg:-$(uuidgen 2>/dev/null || cat /proc/sys/kernel/random/uuid 2>/dev/null || python3 -c 'import uuid; print(uuid.uuid4())')}"
today="$(date +%Y-%m-%d)" today="$(date +%Y-%m-%d)"
if grep -qF "run_id: \`${run_id}\`" "$LOG_FILE" 2>/dev/null; then
echo "log-append: run_id ${run_id} already present — skipping (idempotent)" >&2
echo "run_id=${run_id}"
exit 0
fi
{ {
printf '\n## [%s] %s | %s\n\n' "$today" "$type" "$subject" printf '\n## [%s] %s | %s\n\n' "$today" "$type" "$subject"
printf -- '- run_id: `%s`\n' "$run_id" printf -- '- run_id: `%s`\n' "$run_id"

21
skills/ingest/scripts/open-pr.sh Normal file → Executable file
View file

@ -16,10 +16,11 @@ set -euo pipefail
: "${FORGEJO_USER:?missing FORGEJO_USER}" : "${FORGEJO_USER:?missing FORGEJO_USER}"
: "${FORGEJO_TOKEN:?missing FORGEJO_TOKEN}" : "${FORGEJO_TOKEN:?missing FORGEJO_TOKEN}"
slug="" title="" body_file="" base="main" label="" slug="" title="" body_file="" base="main" label="" branch=""
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
case "$1" in case "$1" in
--slug) slug="$2"; shift 2 ;; --slug) slug="$2"; shift 2 ;;
--branch) branch="$2"; shift 2 ;;
--title) title="$2"; shift 2 ;; --title) title="$2"; shift 2 ;;
--body-file) body_file="$2"; shift 2 ;; --body-file) body_file="$2"; shift 2 ;;
--base) base="$2"; shift 2 ;; --base) base="$2"; shift 2 ;;
@ -28,16 +29,23 @@ while [[ $# -gt 0 ]]; do
esac esac
done done
: "${slug:?--slug required}"
: "${title:?--title required}" : "${title:?--title required}"
: "${body_file:?--body-file required}" : "${body_file:?--body-file required}"
[[ -f "$body_file" ]] || { echo "open-pr: body file not found: $body_file" >&2; exit 1; } [[ -f "$body_file" ]] || { echo "open-pr: body file not found: $body_file" >&2; exit 1; }
branch="feat/ai-ingest-${slug}" # --branch overrides the default; otherwise derive the ingest branch from --slug.
# (run-prune passes its own chore/prune-orphans-* branch; run-ingest passes --slug.)
if [[ -z "$branch" ]]; then
: "${slug:?--slug or --branch required}"
branch="feat/ai-ingest-${slug}"
fi
repo="$(basename -s .git "$(git config --get remote.origin.url)")" repo="$(basename -s .git "$(git config --get remote.origin.url)")"
# 1. Branch + commit + push (AGENTS.md rule 5: never commit to main) # 1. Branch + commit + push (AGENTS.md rule 5: never commit to main)
git switch -c "$branch" 2>/dev/null || git switch "$branch" # Rolling PR: -C force-resets the branch label to the current base (we are on it after
# clean_start) and CARRIES the freshly-written wiki/ changes, so a re-ingest of the same
# source rebuilds the branch cleanly instead of hitting a dirty-switch refusal.
git switch -C "$branch"
git add wiki/ git add wiki/
# Scope BOTH the emptiness check and the commit to wiki/ — never commit anything that # Scope BOTH the emptiness check and the commit to wiki/ — never commit anything that
# happened to be staged outside wiki/ (a stray hook, an aborted prior run, etc.). # happened to be staged outside wiki/ (a stray hook, an aborted prior run, etc.).
@ -46,7 +54,10 @@ if git diff --cached --quiet -- wiki/; then
exit 1 exit 1
fi fi
git commit -m "$title" -- wiki/ git commit -m "$title" -- wiki/
git push -u origin "$branch" # Try a normal push (new branch / fast-forward). If the branch was rebuilt from base and
# diverged, force-with-lease updates the open PR in place — the lease refuses to clobber if
# origin moved unexpectedly since our fetch, so concurrent work is never lost.
git push -u origin "$branch" 2>/dev/null || git push -u --force-with-lease origin "$branch"
# DRY_RUN: local git work done; skip the Forgejo API (offline tests). # DRY_RUN: local git work done; skip the Forgejo API (offline tests).
if [[ -n "${DRY_RUN:-}" ]]; then if [[ -n "${DRY_RUN:-}" ]]; then

View file

@ -0,0 +1,35 @@
#!/usr/bin/env bash
# =============================================================================
# orphan-wiki.sh — find source pages whose raw source no longer exists.
# Reads source_path from each wiki/sources/*.md frontmatter. If the raw is gone,
# the page is orphaned. Emits JSON envelope: {status, genome, count, files[], detail[]}.
# Read-only: no lock needed (same policy as pending-raw).
# =============================================================================
set -euo pipefail
genome="${1:?usage: orphan-wiki.sh <genome>}"
base_dir="${GENOMES_ROOT:-${HOME}/genomes}"
cd "${base_dir}/${genome}" 2>/dev/null || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
# Clean start on the configured base (single source of truth in lib/clean-start.sh).
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
declare -a ORPH=()
for page in wiki/sources/*.md; do
[[ -e "$page" ]] || continue
sp="$(sed -n 's/^source_path:[[:space:]]*//p' "$page" | tr -d '\r' | head -n1)"
# Pages without source_path are pre-Step-2 legacy: ignore, don't false-positive.
[[ -n "$sp" ]] || continue
[[ -f "$sp" ]] || ORPH+=("$page")
done
if [[ ${#ORPH[@]} -eq 0 ]]; then
echo '{"status":"ok","genome":"'"$genome"'","count":0,"files":[],"detail":[]}'
else
for x in "${ORPH[@]}"; do printf '%s\torphan\n' "$x"; done \
| jq -R 'split("\t") | {path: .[0], reason: .[1]}' \
| jq -s --arg g "$genome" '{status:"ok", genome:$g, count:length, files:[.[].path], detail:.}'
fi

View file

@ -0,0 +1,64 @@
#!/usr/bin/env bash
# =============================================================================
# pending-raw.sh — deterministic "what needs ingesting" calculator.
# Reads the clean base checkout and classifies each raw/articles/*.md as:
# new -> no wiki/sources/<slug>.md
# modified -> page exists but its source_sha256 != current file hash
# Emits the same JSON envelope as changed-raw (drop-in), plus detail[] for ntfy.
# =============================================================================
set -euo pipefail
genome="${1:?usage: pending-raw.sh <genome>}"
base_dir="${GENOMES_ROOT:-${HOME}/genomes}"
cd "${base_dir}/${genome}" 2>/dev/null || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
# Clean start on the configured base (single source of truth in lib/clean-start.sh).
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SLUG="${SCRIPT_DIR}/slug.sh"
declare -a NEW=()
declare -a MOD=()
declare -A SEEN_SLUG=()
if [[ -d raw/articles ]]; then
while IFS= read -r -d '' f; do
rel="${f#./}"
case "$rel" in
*/.stfolder/*|*/.stignore|*/.gitkeep) continue ;;
esac
slug="$("$SLUG" --raw "$rel")" || continue
# Residual collision (two distinct raws -> same slug): warn, do not silence.
if [[ -n "${SEEN_SLUG[$slug]:-}" && "${SEEN_SLUG[$slug]}" != "$rel" ]]; then
logger -t pending-raw "warn: slug collision ${slug}: ${SEEN_SLUG[$slug]} <-> ${rel}"
fi
SEEN_SLUG[$slug]="$rel"
page="wiki/sources/${slug}.md"
if [[ ! -f "$page" ]]; then
NEW+=("$rel")
else
cur="$(sha256sum "$rel" | cut -d' ' -f1)"
rec="$(sed -n 's/^source_sha256:[[:space:]]*//p' "$page" | tr -d '\r' | head -n1)"
if [[ "$cur" != "$rec" ]]; then
MOD+=("$rel")
fi
fi
done < <(find raw/articles -type f -name '*.md' -print0 2>/dev/null)
fi
if [[ ${#NEW[@]} -eq 0 && ${#MOD[@]} -eq 0 ]]; then
echo '{"status":"ok","genome":"'"$genome"'","count":0,"files":[],"detail":[]}'
else
{
for x in "${NEW[@]}"; do printf '%s\tnew\n' "$x"; done
for x in "${MOD[@]}"; do printf '%s\tmodified\n' "$x"; done
} | jq -R 'split("\t") | {path: .[0], reason: .[1]}' \
| jq -s --arg g "$genome" \
'{status: "ok", genome: $g, count: length, files: [.[].path], detail: .}'
fi

35
skills/ingest/scripts/run-ingest.sh Normal file → Executable file
View file

@ -1,13 +1,17 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# ============================================================================= # =============================================================================
# skills/ingest/scripts/run-ingest.sh # skills/ingest/scripts/run-ingest.sh
# Post-pi orchestrator. Runs OUTSIDE pi's loop, on vm101, in the genome checkout. # Post-semantic orchestrator. Runs OUTSIDE the model, on vm101, in the genome
# Consumes .ingest-manifest.json (written by the ingest skill) and performs every # checkout. Consumes .ingest-manifest.json (written by ingest-semantic.py) and
# deterministic step — index, log, scoped lint, PR — so pi's context stays clean. # performs every deterministic step — index, log, scoped lint, PR.
# #
# run-ingest.sh <genome_name> [manifest_path] # run-ingest.sh <genome_name> [manifest_path]
# #
# Emits a single JSON result line on stdout for n8n to parse. # Emits a single JSON result line on stdout for n8n to parse.
#
# every page listed in the manifest must exist on disk before we trust the run.
# Everything else is unchanged: the manifest the semantic phase now produces is
# already in this script's expected schema.
# ============================================================================= # =============================================================================
set -euo pipefail set -euo pipefail
@ -49,7 +53,7 @@ contradictions="$(jq -r '.contradictions // "None"' "$manifest")"
[[ -n "$raw_source" && "$raw_source" != "null" ]] || fail "manifest" "raw_source missing" [[ -n "$raw_source" && "$raw_source" != "null" ]] || fail "manifest" "raw_source missing"
slug="$(bash "${SCRIPTS}/slug.sh" "$raw_source")" || fail "slug" "empty or invalid slug for ${raw_source}" slug="$(bash "${SCRIPTS}/slug.sh" --raw "$raw_source")" || fail "slug" "empty or invalid slug for ${raw_source}"
# --- collect touched paths --- # --- collect touched paths ---
mapfile -t created_paths < <(jq -r '.pages[] | select(.status=="created") | .path' "$manifest") mapfile -t created_paths < <(jq -r '.pages[] | select(.status=="created") | .path' "$manifest")
@ -57,6 +61,13 @@ mapfile -t modified_paths < <(jq -r '.pages[] | select(.status=="modified") | .p
all_paths=( "${created_paths[@]}" "${modified_paths[@]}" ) all_paths=( "${created_paths[@]}" "${modified_paths[@]}" )
[[ ${#all_paths[@]} -gt 0 ]] || fail "manifest" "no pages reported" [[ ${#all_paths[@]} -gt 0 ]] || fail "manifest" "no pages reported"
# --- the semantic phase (ingest-semantic.py) writes the files; verify
# every manifest page actually exists on disk before trusting the run. Catches any
# drift between what the manifest claims and what was really written. ---
for _p in "${all_paths[@]}"; do
[[ -f "$_p" ]] || fail "pages" "manifest lists a file not present on disk: ${_p}"
done
conflict_label="" conflict_label=""
# NOTE: No rollback. The steps below modify the working tree in order (index → log → commit). # NOTE: No rollback. The steps below modify the working tree in order (index → log → commit).
@ -96,19 +107,31 @@ done < <(jq -r '.pages[] | select(.status=="created")
| [.path, (.summary // ""), (.maturity // "draft")] | @tsv' "$manifest") | [.path, (.summary // ""), (.maturity // "draft")] | @tsv' "$manifest")
# --- 2. log entry --- # --- 2. log entry ---
# Stable run_id: deterministic from the input (raw path + content hash). Survives wrapper
# re-runs and makes the append-only log idempotent (paired with the guard in log-append.sh).
src_sha="$(sha256sum "$raw_source" 2>/dev/null | cut -d' ' -f1)" || src_sha="unknown"
run_id="$(printf '%s' "${raw_source}:${src_sha}" | sha256sum | cut -c1-16)"
out="$(jq -r '[.pages[].path | "[[" + (sub("^wiki/";"") | sub("\\.md$";"")) + "]]"] | join(", ")' "$manifest")" out="$(jq -r '[.pages[].path | "[[" + (sub("^wiki/";"") | sub("\\.md$";"")) + "]]"] | join(", ")' "$manifest")"
bash "${SCRIPTS}/log-append.sh" --type INGEST --subject "$slug" --model "$model" \ bash "${SCRIPTS}/log-append.sh" --run-id "$run_id" --type INGEST --subject "$slug" --model "$model" \
--context "[[${raw_source}]]" --output "${out:-*(none)*}" --reasoning "$reasoning" \ --context "[[${raw_source}]]" --output "${out:-*(none)*}" --reasoning "$reasoning" \
|| fail "log" "log-append failed" || fail "log" "log-append failed"
# --- 3. scoped linter (capture findings for the PR; never aborts the run) --- # --- 3. scoped linter (capture findings for the PR; never aborts the run) ---
lint_out="$( bash "${SCRIPTS}/scoped-lint.sh" "$genome" "${all_paths[@]}" 2>&1 )" && lint_rc=0 || lint_rc=$? # Point scoped-lint at the same manifest we were handed so its duplicate
# advisory reads the right file even when a non-default path arrives as $2.
# (The dedup check lives inside lib/lint.sh and is invoked by scoped-lint —
# there is no separate check-duplicates.sh script.)
export INGEST_MANIFEST="$manifest"
lint_out="$(
bash "${SCRIPTS}/scoped-lint.sh" "$genome" "${all_paths[@]}" 2>&1
)" && lint_rc=0 || lint_rc=$?
# --- 4. assemble the PR body (manifest tables + lint results) --- # --- 4. assemble the PR body (manifest tables + lint results) ---
body="$(mktemp)" body="$(mktemp)"
trap 'rm -f "$body"' EXIT # auto-clean on any exit (success, fail(), or crash) trap 'rm -f "$body"' EXIT # auto-clean on any exit (success, fail(), or crash)
{ {
echo "<!-- kg:raw=${raw_source} -->" # marker for the rejection loop (invisible in the render)
echo "## Summary" echo "## Summary"
echo "$pr_summary" echo "$pr_summary"
echo "" echo ""

View file

@ -0,0 +1,96 @@
#!/usr/bin/env bash
# =============================================================================
# skills/ingest/scripts/run-prune.sh
# Symmetric companion to run-ingest: prune source pages whose raw source no
# longer exists. RE-DERIVES the orphan set itself (mirrors orphan-wiki.sh) — it
# never trusts a list handed in by n8n, so there is no "detected-vs-pruned"
# race. Removes ONLY the pages it derived plus their index entries, commits
# ONLY wiki/ on chore/prune-orphans-<date>, and opens a GATED removal PR (the
# operator approves the deletion; principle 2). Never deletes of its own accord.
#
# Runs OUTSIDE the model, on vm101, cwd = genome checkout. The wrapper (`pi
# prune`) has already taken the per-genome lock and done clean_start, exactly
# like `pi ingest` — so this script does neither.
#
# run-prune.sh <genome>
#
# Emits a single JSON result line on stdout for n8n to parse.
# =============================================================================
set -euo pipefail
genome="${1:?usage: run-prune.sh <genome>}"
SCRIPTS="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
fail() {
jq -nc --arg stage "$1" --arg reason "$2" '{status:"error", stage:$stage, reason:$reason}'
exit 1
}
command -v jq >/dev/null 2>&1 || { echo '{"status":"error","reason":"jq missing"}'; exit 1; }
command -v python3 >/dev/null 2>&1 || fail "deps" "python3 missing (needed by index-append.py)"
# --- re-derive orphans (same rule as orphan-wiki.sh; computed fresh, here, now) ---
# A wiki/sources/*.md page is orphaned when its frontmatter source_path points at
# a raw file that no longer exists. Legacy pages without source_path are ignored.
declare -a ORPH=()
for page in wiki/sources/*.md; do
[[ -e "$page" ]] || continue
sp="$(sed -n 's/^source_path:[[:space:]]*//p' "$page" | tr -d '\r' | head -n1)"
[[ -n "$sp" ]] || continue
[[ -f "$sp" ]] || ORPH+=("$page")
done
if [[ ${#ORPH[@]} -eq 0 ]]; then
jq -nc '{status:"ok", count:0, pruned:[], detail:"no orphans"}'
exit 0
fi
# --- remove each orphan page + its index entry (anti-traversal, wiki/-only) ---
declare -a PRUNED=()
for page in "${ORPH[@]}"; do
case "$page" in
wiki/*) : ;;
*) fail "prune" "refusing to remove outside wiki/: ${page}" ;;
esac
case "$page" in *..*) fail "prune" "path traversal in page: ${page}" ;; esac
[[ -f "$page" ]] || continue
rm -f "$page"
link="${page#wiki/}"; link="${link%.md}" # e.g. sources/foo
python3 "${SCRIPTS}/index-append.py" --remove "$link" \
|| fail "index" "index-append --remove failed for ${link}"
PRUNED+=("$link")
done
# --- assemble the PR body ---
date_tag="$(date +%F)"
body="$(mktemp)"
trap 'rm -f "$body"' EXIT
{
echo "## Prune orphaned sources"
echo ""
echo "These source pages reference a \`source_path\` whose raw file no longer exists"
echo "in \`raw/\`. Removing them keeps the wiki in sync with git (the source of truth)."
echo ""
echo "| Removed page |"
echo "|--------------|"
for l in "${PRUNED[@]}"; do echo "| \`wiki/${l}.md\` |"; done
} > "$body"
# --- open the GATED removal PR on a chore/ branch (open-pr --branch override) ---
branch="chore/prune-orphans-${date_tag}"
pr_out="$( bash "${SCRIPTS}/open-pr.sh" \
--branch "$branch" \
--title "chore: prune ${#PRUNED[@]} orphaned source(s)" \
--body-file "$body" --base "${INGEST_BASE:-main}" 2>&1 )" && pr_rc=0 || pr_rc=$?
pr_url="$(printf '%s\n' "$pr_out" | sed -n 's/^PR opened: //p' | head -n1)"
# --- result line for n8n ---
jq -nc \
--arg status "$([[ $pr_rc -eq 0 ]] && echo ok || echo pr_failed)" \
--argjson count "${#PRUNED[@]}" \
--arg pr_url "$pr_url" \
--arg detail "$pr_out" \
--argjson pruned "$(printf '%s\n' "${PRUNED[@]}" | jq -R . | jq -s .)" \
'{status:$status, count:$count, pr_url:$pr_url, pruned:$pruned, detail:$detail}'
[[ $pr_rc -eq 0 ]] || exit 1

7
skills/ingest/scripts/scoped-lint.sh Normal file → Executable file
View file

@ -49,6 +49,13 @@ for f in "$@"; do
check_broken_links "$f" || true # warnings only check_broken_links "$f" || true # warnings only
done done
# Cross-page duplicate advisory: runs ONCE over the whole manifest (not per
# file) — it compares this run's created slugs against the index, so repeating
# it for every file would only print the same warnings N times. Warn-only;
# never affects the exit status. INGEST_MANIFEST lets run-ingest.sh point us at
# a non-default manifest path; falls back to the conventional name otherwise.
check_duplicates "${INGEST_MANIFEST:-.ingest-manifest.json}"
echo "" echo ""
echo "scoped-lint: ${errors} error(s), ${stale} stale across ${count} file(s)" echo "scoped-lint: ${errors} error(s), ${stale} stale across ${count} file(s)"

12
skills/ingest/scripts/slug.sh Normal file → Executable file
View file

@ -7,6 +7,18 @@
# ============================================================================= # =============================================================================
set -euo pipefail set -euo pipefail
if [[ "${1:-}" == "--raw" ]]; then
raw="${2:?usage: slug.sh --raw <raw/bucket/rel/path>}"
rel="${raw#raw/}"; rel="${rel#*/}" # strip "raw/" and the bucket name
rel="${rel%.*}" # strip extension
slug="$(printf '%s\n' "$rel" | tr '/' '\n' \
| sed -E 's/[^a-zA-Z0-9]+/-/g; s/-{2,}/-/g; s/^-+//; s/-+$//' \
| tr '[:upper:]' '[:lower:]' | paste -sd- -)"
[[ -n "$slug" ]] || { echo "slug: empty result for input '${raw}'" >&2; exit 1; }
printf '%s\n' "$slug"
exit 0
fi
input="${1:?usage: slug.sh <path-or-title>}" input="${1:?usage: slug.sh <path-or-title>}"
# Strip directory and extension when given a path # Strip directory and extension when given a path

View file

@ -166,7 +166,7 @@ private: true | false
### Links ### Links
- Internal: `[[folder/file]]` — Obsidian wikilinks only. Never `[text](url)` for internal refs. - Internal: `[[folder/file]]` — Obsidian wikilinks only. Never `[text](url)` for internal refs.
- Cross-genome: `[[../genome-target/wiki/folder/file]]`. - Cross-genome: NOT via wikilink (submodule pointers make relative paths brittle). A concept owned by another genome is pulled in by the navigation skill as a raw under `raw/articles/crossgen-<topic>-<date>.md`, then ingested here normally. See master `AGENTS.md` §Cross-Genome Pull.
- External: `[text](https://...)`. - External: `[text](https://...)`.
### Index entries ### Index entries

View file

@ -2,6 +2,19 @@
<!-- One sentence: goal of this session and source processed. --> <!-- One sentence: goal of this session and source processed. -->
<!--
REVIEW GUIDELINES (write the guideline as the FIRST word of your review):
REWORK: <what to fix> -> same branch, guided retry
RESTART: <why restart> -> close PR, start over from scratch
SPLIT: <how to split> -> close PR, reopen as separate branches
REJECT: <why not> -> close PR, no retry
MERGE -> approve and merge
Rules: one concern per directive; be specific to lines/pages; name the principle
that was violated; describe the DESIRED STATE; avoid saying “do better.”
-->
Translated with DeepL.com (free version)
## Pages Created ## Pages Created
| Path | Type | Maturity | | Path | Type | Maturity |

View file

@ -11,9 +11,8 @@ set -euo pipefail
FAILED=0 FAILED=0
# Verify git-crypt is initialized # Verify git-crypt is initialized
if [[ ! -d ".git-crypt" ]]; then if ! git-crypt status >/dev/null 2>&1; then
printf "\n\033[0;31m[CRITICAL] git-crypt not initialized.\033[0m\n" printf "\n[CRITICAL] git-crypt not initialized.\n"
printf "Run 'git-crypt init' and 'make setup' before committing.\n"
exit 1 exit 1
fi fi

View file

@ -57,7 +57,7 @@ grep "^## \[2026-05" wiki/log.md
## [{{DATE}}] CONFIG | Genome scaffolded ## [{{DATE}}] CONFIG | Genome scaffolded
- run_id: `system-init` - run_id: `system-init`
- model: `setup-knowledge-genome.sh` - model: `scaffold.sh`
- context_read: _(none — initial scaffold)_ - context_read: _(none — initial scaffold)_
- output_written: `[[wiki/index.md]]`, `[[wiki/log.md]]`, `[[AGENTS.md]]` - output_written: `[[wiki/index.md]]`, `[[wiki/log.md]]`, `[[AGENTS.md]]`
- reasoning: Initial directory structure and encryption layer initialized by setup script. - reasoning: Initial directory structure and encryption layer initialized by setup script.

View file

@ -9,7 +9,7 @@ they run anywhere (laptop, CI, a git hook). They do **not** belong on vm101 or i
| File | Covers | | File | Covers |
|------|--------| |------|--------|
| `scripts.bats` | `slug.sh`, `log-append.sh`, `index-append.py` (insert, sort, bump, idempotent) | | `scripts.bats` | `slug.sh`, `log-append.sh`, `index-append.py` (insert, sort, bump, idempotent) |
| `lint.bats` | `lib/lint.sh` validators + `scoped-lint.sh` reuse | | `lint.bats` | `lib/lint.sh` validators + `scoped-lint.sh` reuse + duplicate-slug advisory (edit-distance math, self-match skip, once-per-run) |
| `structure.bats` | `lib/structure.sh` report/sync | | `structure.bats` | `lib/structure.sh` report/sync |
| `run-ingest.bats` | `run-ingest.sh` end-to-end (DRY_RUN, local bare remote) — needs `jq` | | `run-ingest.bats` | `run-ingest.sh` end-to-end (DRY_RUN, local bare remote) — needs `jq` |

18
tests/clean-start.bats Normal file
View file

@ -0,0 +1,18 @@
#!/usr/bin/env bats
setup() {
load 'helpers'
source "${LIB_DIR}/clean-start.sh" 2>/dev/null || source "${REPO_ROOT}/lib/clean-start.sh"
}
@test "clean_start: aligns to origin/base, reverts tracked edits, removes untracked" {
G="$(make_fixture_genome)"; cd "$G"
echo "from origin" >> wiki/index.md
git add -A && git commit -q -m "origin ahead" && git push -q
git reset --hard HEAD~1 # local BEHIND origin/main
echo "local junk" >> wiki/log.md # tracked edit, uncommitted
echo "scratch" > scratch.txt # genuinely untracked
INGEST_BASE="main" clean_start
git diff --quiet origin/main # aligned to origin
grep -q "from origin" wiki/index.md # forwarded to origin state
! grep -q "local junk" wiki/log.md # tracked edit reverted
[ ! -f scratch.txt ] # untracked removed
}

View file

@ -57,8 +57,8 @@ private: false
## Private Synthesis (`wiki/private/`) ## Private Synthesis (`wiki/private/`)
*Restricted access. Requires PRIVATE_CONTEXT: enabled and unlocked repo.* _Restricted access. Requires `PRIVATE_CONTEXT: enabled` and unlocked repo._
*List slug names ONLY. Do not append summaries — prevents metadata leakage.* _List slug names ONLY. Do not append summaries — prevents metadata leakage._
EOF EOF
cat > "${g}/wiki/log.md" <<'EOF' cat > "${g}/wiki/log.md" <<'EOF'
@ -88,16 +88,17 @@ EOF
git init -q git init -q
# Hermetic: ignore the user's global git config (signing, global hooks); # Hermetic: ignore the user's global git config (signing, global hooks);
# otherwise commit.gpgsign or a global core.hooksPath makes git commit fail here. # otherwise commit.gpgsign or a global core.hooksPath makes git commit fail here.
git config commit.gpgsign false git config --local user.name "Framework Test"
git config core.hooksPath "${base}/nohooks" git config --local user.email "test@genome.local"
git config user.email t@t git config --local commit.gpgsign false
git config user.name tester git config --local core.hooksPath "${base}/nohooks"
git add .
git commit -qm init
git branch -M main git branch -M main
git remote add origin "${base}/origin.git" git remote add origin "${base}/origin.git"
git add .
git commit -q -m "chore: initial scaffold"
git push -q -u origin main git push -q -u origin main
) >/dev/null )
echo "${g}" echo "${g}"
} }

44
tests/index-remove.bats Normal file
View file

@ -0,0 +1,44 @@
#!/usr/bin/env bats
# tests/index-remove.bats — index-append.py --remove mode.
setup() {
load 'helpers'
export GENOMES_ROOT="${BATS_TEST_TMPDIR}"
g_src="$(make_fixture_genome)"; export g="$g_src"
}
@test "index --remove: deletes the matching entry, keeps the others" {
cd "$g"
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/a]] — A. `maturity: draft`'
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/b]] — B. `maturity: draft`'
grep -q 'sources/a' wiki/index.md
grep -q 'sources/b' wiki/index.md
run python3 "$SKILL_SCRIPTS/index-append.py" --remove 'sources/a'
[ "$status" -eq 0 ]
! grep -q '\[\[sources/a\]\]' wiki/index.md
grep -q 'sources/b' wiki/index.md
}
@test "index --remove: idempotent when the entry is absent" {
cd "$g"
run python3 "$SKILL_SCRIPTS/index-append.py" --remove 'sources/does-not-exist'
[ "$status" -eq 0 ]
[[ "$output" == *'nothing to remove'* ]]
}
@test "index --remove: bumps last_updated" {
cd "$g"
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/a]] — A. `maturity: draft`'
# set last_updated to an old date, then remove and check it moved
sed -i 's/^last_updated:.*/last_updated: 2000-01-01/' wiki/index.md
run python3 "$SKILL_SCRIPTS/index-append.py" --remove 'sources/a'
[ "$status" -eq 0 ]
! grep -q '2000-01-01' wiki/index.md
grep -q "last_updated: $(date +%F)" wiki/index.md
}
@test "index --remove: rejects passing both --entry and --remove" {
cd "$g"
run python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/a]] — x' --remove 'sources/a'
[ "$status" -eq 2 ]
}

View file

@ -0,0 +1,29 @@
#!/usr/bin/env bats
setup() {
load 'helpers'
source "$LIB_DIR/output.sh"
source "$LIB_DIR/lint.sh"
}
@test "lint tolerates source_path/source_sha256 in source frontmatter" {
G="$(make_fixture_genome)"
mkdir -p "$G/wiki/sources"
cat > "$G/wiki/sources/test-source.md" <<'EOFMD'
---
title: "Test Source"
type: source
domain: genome-test
maturity: draft
last_updated: 2026-06-25
private: false
tags: [test]
source_path: raw/articles/test.md
source_sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
---
# Test Source
body
EOFMD
run lint_markdown_file "$G/wiki/sources/test-source.md" genome-test
[ "$status" -eq 0 ]
}

View file

@ -69,3 +69,80 @@ EOF
run bash "$SKILL_SCRIPTS/scoped-lint.sh" genome-test wiki/sources/good.md run bash "$SKILL_SCRIPTS/scoped-lint.sh" genome-test wiki/sources/good.md
[ "$status" -eq 0 ] [ "$status" -eq 0 ]
} }
# --- duplicate-slug advisory (check_duplicates + its distance helpers) --------
# These guard the dedup feature: correct edit-distance math, the warn-only
# contract, the exact-self-match skip (run-ingest appends new slugs to the
# index before lint runs), and that the advisory fires once per run, not once
# per file.
@test "levenshtein: identical strings have distance 0" {
run levenshtein cat cat
[ "$status" -eq 0 ]
[ "$output" -eq 0 ]
}
@test "levenshtein: kitten→sitting is 3 (textbook case)" {
run levenshtein kitten sitting
[ "$output" -eq 3 ]
}
@test "similarity: identical strings score 100" {
run similarity gpu-pricing gpu-pricing
[ "$output" -eq 100 ]
}
@test "check_duplicates: warns on a near-duplicate of an indexed concept" {
G="$(make_fixture_genome)"; cd "$G"
printf -- '- [[concepts/llm-routing]] — x\n' >> wiki/index.md
cat > .ingest-manifest.json <<'JSON'
{"raw_source":"src","pages":[{"path":"wiki/concepts/llm-routings.md","status":"created"}]}
JSON
run check_duplicates .ingest-manifest.json
[ "$status" -eq 0 ]
[[ "$output" == *"≈"* ]]
[[ "$output" == *"llm-routings"* ]]
}
@test "check_duplicates: silent when the new slug is unlike anything indexed" {
G="$(make_fixture_genome)"; cd "$G"
printf -- '- [[concepts/llm-routing]] — x\n' >> wiki/index.md
cat > .ingest-manifest.json <<'JSON'
{"raw_source":"src","pages":[{"path":"wiki/concepts/budget-hardware.md","status":"created"}]}
JSON
run check_duplicates .ingest-manifest.json
[ "$status" -eq 0 ]
[[ "$output" != *"≈"* ]]
}
@test "check_duplicates: an exact self-match is not flagged (index already has the slug)" {
G="$(make_fixture_genome)"; cd "$G"
# run-ingest step 1 inserts this run's slug into the index BEFORE lint runs;
# the slug must not be reported as a duplicate of itself.
printf -- '- [[concepts/llm-routing]] — x\n' >> wiki/index.md
cat > .ingest-manifest.json <<'JSON'
{"raw_source":"src","pages":[{"path":"wiki/concepts/llm-routing.md","status":"created"}]}
JSON
run check_duplicates .ingest-manifest.json
[ "$status" -eq 0 ]
[[ "$output" != *"≈"* ]]
}
@test "scoped-lint: duplicate advisory fires once across multiple files, not per file" {
G="$(make_fixture_genome)"
write_page "$G/wiki/concepts/data-pipelines.md" concept genome-test
write_page "$G/wiki/concepts/other-topic.md" concept genome-test
printf -- '- [[concepts/data-pipeline]] — x\n' >> "$G/wiki/index.md"
cat > "$G/.ingest-manifest.json" <<'JSON'
{"raw_source":"src","pages":[
{"path":"wiki/concepts/data-pipelines.md","status":"created"},
{"path":"wiki/concepts/other-topic.md","status":"created"}
]}
JSON
cd "$G"
export KG_LIB_DIR="$LIB_DIR"
run bash "$SKILL_SCRIPTS/scoped-lint.sh" genome-test \
wiki/concepts/data-pipelines.md wiki/concepts/other-topic.md
[ "$status" -eq 0 ]
[ "$(grep -c "≈" <<< "$output")" -eq 1 ]
}

View file

@ -0,0 +1,48 @@
#!/usr/bin/env bats
# open-pr-rolling.bats — a re-ingest of the same slug updates the OPEN PR's branch
# (force-with-lease) instead of failing. Uses the local bare remote from make_fixture_genome.
load helpers
setup_file() { :; }
@test "open-pr: re-ingest of the same slug rolls the branch forward (force-with-lease)" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
G="$(make_fixture_genome)"; cd "$G"
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER=u FORGEJO_TOKEN=t DRY_RUN=1
body="$(mktemp)"; echo body > "$body"
# first ingest of slug x (v1)
mkdir -p wiki/sources; printf 'v1\n' > wiki/sources/x.md
run bash "$SKILL_SCRIPTS/open-pr.sh" --slug x --title "feat: ingest x" --body-file "$body" --base main
[ "$status" -eq 0 ]
git rev-parse --verify feat/ai-ingest-x
first="$(git rev-parse feat/ai-ingest-x)"
# simulate clean_start back to base, then an edited re-ingest (v2)
git switch -q main; git reset -q --hard origin/main; git clean -q -fd
printf 'v2-edited\n' > wiki/sources/x.md
run bash "$SKILL_SCRIPTS/open-pr.sh" --slug x --title "feat: ingest x" --body-file "$body" --base main
[ "$status" -eq 0 ]
second="$(git rev-parse feat/ai-ingest-x)"
# the branch was REBUILT from base (diverged), not appended: second is not a descendant of first
run git merge-base --is-ancestor "$first" "$second"
[ "$status" -ne 0 ]
# origin received the v2 content (force-with-lease pushed the rebuilt branch)
git fetch -q origin
run git show "origin/feat/ai-ingest-x:wiki/sources/x.md"
[ "$status" -eq 0 ]
[[ "$output" == *"v2-edited"* ]]
}
@test "open-pr: prune branch override still works after the rolling change" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
G="$(make_fixture_genome)"; cd "$G"
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER=u FORGEJO_TOKEN=t DRY_RUN=1
body="$(mktemp)"; echo body > "$body"
mkdir -p wiki/sources; printf 'p\n' > wiki/sources/p.md
run bash "$SKILL_SCRIPTS/open-pr.sh" --branch "chore/prune-orphans-2026-06-30" \
--title "chore: prune 1 orphaned source(s)" --body-file "$body" --base main
[ "$status" -eq 0 ]
git rev-parse --verify "chore/prune-orphans-2026-06-30"
}

38
tests/orphan-wiki.bats Normal file
View file

@ -0,0 +1,38 @@
#!/usr/bin/env bats
setup() {
load 'helpers'
export ORPHAN="${SKILL_SCRIPTS}/orphan-wiki.sh"
export GENOMES_ROOT="${BATS_TEST_TMPDIR}"
export INGEST_BASE="main"
export KG_LIB_DIR="${LIB_DIR}" # orphan-wiki.sh sources clean-start.sh via KG_LIB_DIR
g_src="$(make_fixture_genome)"
export g_name="fixture-genome"
mv "$g_src" "${GENOMES_ROOT}/${g_name}"
export g="${GENOMES_ROOT}/${g_name}"
( cd "$g" && rm -f raw/articles/test.md && git add -A && git commit -q -m "clear" && git push -q )
}
@test "orphan-wiki: no orphans when raw and source page match" {
mkdir -p "${g}/raw/articles"; echo "content" > "${g}/raw/articles/existing.md"
hash="$(sha256sum "${g}/raw/articles/existing.md" | cut -d' ' -f1)"
mkdir -p "${g}/wiki/sources"
printf -- '---\nsource_path: raw/articles/existing.md\nsource_sha256: %s\n---\n' "$hash" > "${g}/wiki/sources/existing.md"
( cd "$g" && git add . && git commit -q -m "setup" && git push -q )
run bash "$ORPHAN" "$g_name"
[ "$status" -eq 0 ]; echo "$output" | jq -e '.count == 0'
}
@test "orphan-wiki: detects orphaned source page" {
mkdir -p "${g}/wiki/sources"
printf -- '---\nsource_path: raw/articles/deleted.md\nsource_sha256: abc123\n---\n' > "${g}/wiki/sources/orphaned.md"
( cd "$g" && git add . && git commit -q -m "orphan" && git push -q )
run bash "$ORPHAN" "$g_name"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.count == 1'
echo "$output" | jq -e '.detail[0].reason == "orphan"'
}
@test "orphan-wiki: ignores legacy pages without source_path" {
mkdir -p "${g}/wiki/sources"
printf -- '---\ntitle: "Legacy"\ntype: source\n---\n' > "${g}/wiki/sources/legacy.md"
( cd "$g" && git add . && git commit -q -m "legacy" && git push -q )
run bash "$ORPHAN" "$g_name"
[ "$status" -eq 0 ]; echo "$output" | jq -e '.count == 0'
}

91
tests/pending-raw.bats Normal file
View file

@ -0,0 +1,91 @@
#!/usr/bin/env bats
setup() {
load 'helpers'
export PENDING="${SKILL_SCRIPTS}/pending-raw.sh"
export GENOMES_ROOT="${BATS_TEST_TMPDIR}"
export INGEST_BASE="main"
export KG_LIB_DIR="${LIB_DIR}"
g_src="$(make_fixture_genome)"
export g_name="fixture-genome"
mv "$g_src" "${GENOMES_ROOT}/${g_name}"
export g="${GENOMES_ROOT}/${g_name}"
# FIX: make_fixture_genome ships raw/articles/test.md with no source page, which would
# otherwise count as a permanent 'new' and break every count assertion. Clear it so each
# test controls exactly what is pending (verified: count base becomes 0).
( cd "$g" && rm -f raw/articles/test.md && git add -A \
&& git commit -q -m "test: clear default raw" && git push -q )
}
@test "pending-raw: detects a brand new raw file" {
echo "new content" > "${g}/raw/articles/new-file.md"
( cd "$g" && git add . && git commit -q -m "add raw" && git push -q )
run bash "$PENDING" "$g_name"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.count == 1'
echo "$output" | jq -e '.detail[0].path == "raw/articles/new-file.md"'
echo "$output" | jq -e '.detail[0].reason == "new"'
}
@test "pending-raw: skips up-to-date files" {
echo "ok content" > "${g}/raw/articles/ok-file.md"
hash_ok="$(sha256sum "${g}/raw/articles/ok-file.md" | cut -d' ' -f1)"
cat > "${g}/wiki/sources/ok-file.md" <<FM
---
source_sha256: $hash_ok
---
FM
( cd "$g" && git add . && git commit -q -m "add ok" && git push -q )
run bash "$PENDING" "$g_name"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.count == 0'
}
@test "pending-raw: flags modified files" {
echo "content v1" > "${g}/raw/articles/mod-file.md"
hash_v1="$(sha256sum "${g}/raw/articles/mod-file.md" | cut -d' ' -f1)"
cat > "${g}/wiki/sources/mod-file.md" <<FM
---
source_sha256: $hash_v1
---
FM
( cd "$g" && git add . && git commit -q -m "v1" && git push -q )
echo "content v2" > "${g}/raw/articles/mod-file.md"
( cd "$g" && git add . && git commit -q -m "v2" && git push -q )
run bash "$PENDING" "$g_name"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.count == 1'
echo "$output" | jq -e '.detail[0].reason == "modified"'
}
@test "pending-raw: nested subdirectory yields prefixed slug" {
mkdir -p "${g}/raw/articles/sub-b"
echo "subdir content" > "${g}/raw/articles/sub-b/file.md"
( cd "$g" && git add . && git commit -q -m "subdir" && git push -q )
run bash "$PENDING" "$g_name"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.count == 1'
echo "$output" | jq -e '.files[0] == "raw/articles/sub-b/file.md"'
}
@test "pending-raw: excludes noise (.stfolder, .gitkeep)" {
touch "${g}/raw/articles/.gitkeep"
mkdir -p "${g}/raw/articles/.stfolder"
touch "${g}/raw/articles/.stfolder/sync.log"
( cd "$g" && git add . && git commit -q -m "noise" && git push -q )
run bash "$PENDING" "$g_name"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.count == 0'
}
@test "pending-raw: reports both files on a slug collision" {
mkdir -p "${g}/raw/articles/cibo"
echo "c1" > "${g}/raw/articles/cibo-pane.md"
echo "c2" > "${g}/raw/articles/cibo/pane.md"
( cd "$g" && git add . && git commit -q -m "collision" && git push -q )
run bash "$PENDING" "$g_name"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.count == 2'
}

68
tests/permissions.bats Normal file
View file

@ -0,0 +1,68 @@
#!/usr/bin/env bats
# tests/permissions.bats
# Blinda i permessi del repo, cosi' un `cp`/deploy preserva l'eseguibilita' e non
# ricapita il "Permission denied" (es. ingest-semantic.py lanciato diretto).
#
# Principio:
# - script con shebang lanciati direttamente -> eseguibili (git mode 100755)
# - librerie *sourced* (lib/, providers/, registry.sh, globals.env) -> NON eseguibili (100644)
REPO="${BATS_TEST_DIRNAME}/.."
# Entry-point / script eseguibili (tutti hanno shebang; alcuni anche lanciati a mano per debug)
EXECUTABLES=(
skills/ingest/scripts/ingest-semantic.py
skills/ingest/scripts/run-ingest.sh
skills/ingest/scripts/scoped-lint.sh
skills/ingest/scripts/open-pr.sh
skills/ingest/scripts/log-append.sh
skills/ingest/scripts/slug.sh
skills/ingest/scripts/pending-raw.sh
skills/ingest/scripts/orphan-wiki.sh
skills/ingest/scripts/index-append.py
scripts/add-genome.sh
scripts/setup.sh
scripts/setup-genomes.sh
scripts/setup-master.sh
scripts/lint-genomes.sh
scripts/verify-genomes.sh
)
# Librerie sourced: NON devono essere eseguibili.
LIBRARIES=(
lib/lint.sh lib/output.sh lib/deps.sh lib/git-crypt.sh lib/scaffold.sh lib/structure.sh lib/clean-start.sh
providers/forgejo.sh providers/github.sh
registry.sh globals.env
)
git_mode() { git -C "$REPO" ls-files -s -- "$1" | awk '{print $1}'; }
@test "executable scripts have the +x bit on disk" {
for f in "${EXECUTABLES[@]}"; do
[ -x "${REPO}/${f}" ] || { echo "NON eseguibile su disco: $f"; return 1; }
done
}
@test "executable scripts are recorded 100755 in git" {
for f in "${EXECUTABLES[@]}"; do
mode="$(git_mode "$f")"
[ -n "$mode" ] || { echo "non tracciato in git: $f"; return 1; }
[ "$mode" = "100755" ] || { echo "git mode $mode (atteso 100755): $f"; return 1; }
done
}
@test "sourced libraries are NOT executable in git (100644)" {
for f in "${LIBRARIES[@]}"; do
mode="$(git_mode "$f")"
[ -z "$mode" ] && continue # non tracciato/opzionale -> salta
[ "$mode" = "100644" ] || { echo "git mode $mode (atteso 100644, e' sourced): $f"; return 1; }
done
}
@test "executable shell scripts pass bash -n (syntax)" {
for f in "${EXECUTABLES[@]}"; do
case "$f" in
*.sh) bash -n "${REPO}/${f}" || { echo "syntax error: $f"; return 1; } ;;
esac
done
}

View file

@ -0,0 +1,75 @@
#!/usr/bin/env bats
# raw-commit-quiet.bats — quiet-window behaviour of genome-raw-commit.sh.
# No Syncthing (no API key -> default author); pushes to a local bare repo via GENOME_PUSH_URL.
setup() {
SCRIPT="${BATS_TEST_DIRNAME}/../deploy/nexus/genome-raw-commit.sh"
export HOME="${BATS_TEST_TMPDIR}/home"; mkdir -p "$HOME/.config"
root="${BATS_TEST_TMPDIR}/vaults"; mkdir -p "$root"
bare="${BATS_TEST_TMPDIR}/origin.git"; git init -q --bare "$bare"
cat > "$HOME/.config/knowledge-genome.env" <<EOF
GENOME_VAULTS_ROOT=$root
GENOME_BASE=main
FORGEJO_USER=n8n-bot
FORGEJO_HOST=127.0.0.1:3001
FORGEJO_OWNER=Keru
COMMITTER_NAME=n8n-bot
COMMITTER_EMAIL=n8n-bot@homelab
DEFAULT_AUTHOR_NAME=Tester
DEFAULT_AUTHOR_EMAIL=tester@local
EOF
export g="genome-test"; export vault="$root/$g"
git clone -q "$bare" "$vault" 2>/dev/null || mkdir -p "$vault"
( cd "$vault"
git init -q 2>/dev/null || true
git config user.name n8n-bot; git config user.email n8n-bot@homelab; git config commit.gpgsign false
git checkout -q -b main 2>/dev/null || git switch -q main
mkdir -p raw/articles; echo seed > raw/articles/.gitkeep
git add -A; git commit -q -m init
git remote add origin "$bare" 2>/dev/null || git remote set-url origin "$bare"
git push -q -u origin main )
export GENOME_PUSH_URL="$bare" # test seam -> push to the local bare repo
}
files() { ( cd "$vault" && git ls-files raw/ ) > "${BATS_TEST_TMPDIR}/f.txt"; }
@test "raw-commit: holds a freshly-written raw, commits it once it settles" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
echo "still typing" > "$vault/raw/articles/hot.md" # fresh -> hot
echo "finished" > "$vault/raw/articles/stable.md"
touch -d "10 minutes ago" "$vault/raw/articles/stable.md" # settled
run bash "$SCRIPT" "$g"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.status=="ok"'
files
grep -q 'raw/articles/stable.md' "${BATS_TEST_TMPDIR}/f.txt" # committed
! grep -q 'raw/articles/hot.md' "${BATS_TEST_TMPDIR}/f.txt" # held back
touch -d "10 minutes ago" "$vault/raw/articles/hot.md" # now it settles
run bash "$SCRIPT" "$g"
[ "$status" -eq 0 ]
files
grep -q 'raw/articles/hot.md' "${BATS_TEST_TMPDIR}/f.txt" # now committed
}
@test "raw-commit: noop with held count while everything is still settling" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
echo "typing" > "$vault/raw/articles/wip.md" # fresh -> hot
run bash "$SCRIPT" "$g"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.status=="noop"'
echo "$output" | jq -e '.held==1'
}
@test "raw-commit: a deletion is committed immediately (not subject to the quiet window)" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
# commit a settled file first
echo done > "$vault/raw/articles/old.md"; touch -d "10 minutes ago" "$vault/raw/articles/old.md"
run bash "$SCRIPT" "$g"; [ "$status" -eq 0 ]
files; grep -q 'raw/articles/old.md' "${BATS_TEST_TMPDIR}/f.txt"
# now delete it -> should commit the removal even though "just changed"
rm "$vault/raw/articles/old.md"
run bash "$SCRIPT" "$g"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.status=="ok"'
files; ! grep -q 'raw/articles/old.md' "${BATS_TEST_TMPDIR}/f.txt"
}

View file

@ -171,3 +171,41 @@ EOF
[ "$status" -eq 0 ] [ "$status" -eq 0 ]
[[ "$output" == *"develop"* ]] [[ "$output" == *"develop"* ]]
} }
@test "run-ingest: branch name matches slug.sh --raw for nested raw paths" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
G="$(make_fixture_genome)"; cd "$G"
mkdir -p wiki/sources
cat > wiki/sources/cibo-il-pane.md <<'EOFMD'
---
title: "Il Pane"
type: source
domain: genome-test
tags: [cibo]
maturity: draft
last_updated: 2026-06-25
private: false
source_path: raw/articles/cibo/il-pane.md
source_sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
---
# Il Pane
body
EOFMD
cat > .ingest-manifest.json <<'EOFJSON'
{
"raw_source": "raw/articles/cibo/il-pane.md",
"model": "qwen3.5-9b",
"reasoning": "Ingest.",
"pr_summary": "Ingest summary.",
"contradictions": "None",
"pages": [
{"path": "wiki/sources/cibo-il-pane.md", "summary": "Summary.", "maturity": "draft", "status": "created"}
]
}
EOFJSON
export KG_LIB_DIR="$LIB_DIR"
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER="u" FORGEJO_TOKEN="t" DRY_RUN=1
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
[ "$status" -eq 0 ]
[[ "$output" == *"cibo-il-pane"* ]]
}

68
tests/run-prune.bats Normal file
View file

@ -0,0 +1,68 @@
#!/usr/bin/env bats
# tests/run-prune.bats — prune orphaned sources (no LLM, no network; DRY_RUN).
setup() {
load 'helpers'
export PRUNE="${SKILL_SCRIPTS}/run-prune.sh"
export GENOMES_ROOT="${BATS_TEST_TMPDIR}"
export INGEST_BASE="main"
export KG_LIB_DIR="${LIB_DIR}"
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER="u" FORGEJO_TOKEN="t"
export DRY_RUN=1
g_src="$(make_fixture_genome)"; export g_name="fixture-genome"
mv "$g_src" "${GENOMES_ROOT}/${g_name}"; export g="${GENOMES_ROOT}/${g_name}"
( cd "$g" && rm -f raw/articles/test.md && git add -A && git commit -q -m clear && git push -q )
}
@test "run-prune: removes only the orphaned source + its index entry, opens a dry PR" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
cd "$g"
# kept: raw exists. orphan: raw missing.
echo content > raw/articles/kept.md
h="$(sha256sum raw/articles/kept.md | cut -d' ' -f1)"
printf -- '---\nsource_path: raw/articles/kept.md\nsource_sha256: %s\n---\nbody\n' "$h" > wiki/sources/kept.md
printf -- '---\nsource_path: raw/articles/gone.md\nsource_sha256: abc\n---\nbody\n' > wiki/sources/orphan.md
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/kept]] — kept. `maturity: draft`'
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/orphan]] — orphan. `maturity: draft`'
git add -A && git commit -q -m setup && git push -q
run bash "$PRUNE" "$g_name"
[ "$status" -eq 0 ]
[[ "$output" == *'"status":"ok"'* ]]
[[ "$output" == *'"count":1'* ]]
# only the orphan page is gone
[ ! -f wiki/sources/orphan.md ]
[ -f wiki/sources/kept.md ]
# index reflects the removal
! grep -q 'sources/orphan' wiki/index.md
grep -q 'sources/kept' wiki/index.md
# committed on a chore/ branch (NOT feat/ai-ingest-*)
git rev-parse --verify "chore/prune-orphans-$(date +%F)"
}
@test "run-prune: no orphans -> count 0 and no PR/branch" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
cd "$g"
echo content > raw/articles/kept.md
h="$(sha256sum raw/articles/kept.md | cut -d' ' -f1)"
printf -- '---\nsource_path: raw/articles/kept.md\nsource_sha256: %s\n---\nbody\n' "$h" > wiki/sources/kept.md
git add -A && git commit -q -m setup && git push -q
run bash "$PRUNE" "$g_name"
[ "$status" -eq 0 ]
[[ "$output" == *'"count":0'* ]]
run git rev-parse --verify "chore/prune-orphans-$(date +%F)"
[ "$status" -ne 0 ]
}
@test "run-prune: refuses when an orphan path would escape wiki/ (defense in depth)" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
cd "$g"
# legacy page without source_path is ignored; a page with a missing raw is the orphan.
printf -- '---\nsource_path: raw/articles/gone.md\nsource_sha256: abc\n---\nbody\n' > wiki/sources/orphan.md
git add -A && git commit -q -m setup && git push -q
run bash "$PRUNE" "$g_name"
[ "$status" -eq 0 ]
[[ "$output" == *'"count":1'* ]]
[ ! -f wiki/sources/orphan.md ]
}

View file

@ -86,3 +86,17 @@ EOF
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/foo]] — s. `maturity: draft`' python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/foo]] — s. `maturity: draft`'
grep -q "^last_updated: $(date +%F)$" wiki/index.md grep -q "^last_updated: $(date +%F)$" wiki/index.md
} }
@test "log-append: dedup on stable run_id prevents duplicate entries" {
G="$(make_fixture_genome)"; cd "$G"
stable_id="test-stable-run-id-001"
run bash "$SKILL_SCRIPTS/log-append.sh" --run-id "$stable_id" --type INGEST --subject "test" --model "m" \
--context "[[raw/x]]" --output "[[sources/x]]" --reasoning "r"
[ "$status" -eq 0 ]
run bash "$SKILL_SCRIPTS/log-append.sh" --run-id "$stable_id" --type INGEST --subject "test" --model "m" \
--context "[[raw/x]]" --output "[[sources/x]]" --reasoning "r"
[ "$status" -eq 0 ]
[[ "$output" == *"already present"* ]]
count="$(grep -cF "run_id: \`${stable_id}\`" wiki/log.md || true)"
[ "$count" -eq 1 ]
}

30
tests/slug.bats Normal file
View file

@ -0,0 +1,30 @@
#!/usr/bin/env bats
setup() {
load 'helpers'
SLUG="${SKILL_SCRIPTS}/slug.sh"
}
@test "slug --raw: flat file remains unchanged" {
run bash "$SLUG" --raw "raw/articles/il-pane.md"
[ "$status" -eq 0 ]
[ "$output" = "il-pane" ]
}
@test "slug --raw: nested file gets folder prefix" {
run bash "$SLUG" --raw "raw/articles/cibo/il-pane.md"
[ "$status" -eq 0 ]
[ "$output" = "cibo-il-pane" ]
}
@test "slug --raw: distinct subdirs avoid collision" {
s1="$(bash "$SLUG" --raw "raw/articles/cibo/pane.md")"
s2="$(bash "$SLUG" --raw "raw/articles/storia/pane.md")"
[ "$s1" != "$s2" ]
}
@test "slug --raw: Bash and Python-calling-bash agree (single implementation)" {
b="$(bash "$SLUG" --raw "raw/articles/cibo/il-pane.md")"
p="$(python3 -c "import subprocess;print(subprocess.check_output(['bash','$SLUG','--raw','raw/articles/cibo/il-pane.md'],text=True).strip())")"
[ "$b" = "$p" ]
}