Compare commits

..

No commits in common. "main" and "release/0.3.0" have entirely different histories.

71 changed files with 281 additions and 7195 deletions

View file

@ -1,29 +0,0 @@
root = true
# Whitespace / EOL / indent per TUTTI i tipi — cross-editor, zero dipendenze.
# Non tocca mai il CONTENUTO (quindi i placeholder {{...}} sono al sicuro qui).
[*]
charset = utf-8
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
# Markdown: preserva i "due spazi" di fine riga (hard break) → non trimmare.
[*.md]
trim_trailing_whitespace = false
[*.{sh,bash}]
indent_style = space
indent_size = 2
[*.{py,pyi}]
indent_style = space
indent_size = 4
[*.{yml,yaml,json}]
indent_style = space
indent_size = 2
[Makefile]
indent_style = tab

12
.gitignore vendored
View file

@ -1,12 +0,0 @@
# VS Code — only shared workspace settings
.vscode/*
!.vscode/
!.vscode/settings.json
!.vscode/extensions.json
# framework
/master-knowledge-genome/
/keys/
*.key
__pycache__/
*.pyc

View file

@ -1,8 +0,0 @@
# Template engine — contengono i placeholder {{...}}: NON formattare mai.
templates/
# Contenuto di proprietà dell'agente / generato (di norma in repo separati,
# elencato qui per sicurezza se apri un genoma nello stesso workspace).
wiki/
genomes/
raw/

View file

@ -1,5 +0,0 @@
{
"printWidth": 100,
"tabWidth": 2,
"proseWrap": "preserve"
}

View file

@ -1,8 +0,0 @@
{
"recommendations": [
"esbenp.prettier-vscode",
"editorconfig.editorconfig",
"timonwong.shellcheck"
],
"unwantedRecommendations": ["dbaeumer.vscode-eslint", "ms-vscode.vscode-typescript-next"]
}

18
.vscode/settings.json vendored
View file

@ -1,18 +0,0 @@
{
"editor.formatOnSave": true,
"prettier.requireConfig": true,
"files.associations": {
"templates/**/*.md": "plaintext"
},
"[markdown]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
"[json]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
"[jsonc]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
}
}

View file

@ -1,22 +1,19 @@
# =============================================================================
# Knowledge Genome - Makefile v. 1.13.0
# Knowledge Genome - Makefile v. 0.3.0
# Orchestrates the setup and management of the knowledge base.
# =============================================================================
include globals.env
export $(shell grep -v '^[#[:space:]]' globals.env | sed 's/=.*//')
.PHONY: setup add-genome status lint lock doctor sync test verify-structure sync-structure help
.PHONY: setup add-genome status lint lock doctor sync help
help:
@echo "Available commands:"
@echo " make setup - Full system initialization"
@echo " make add-genome - Register and scaffold a new genome [LINKED=owner/repo] [CROSS=yes|no]"
@echo " make add-genome - Register and scaffold a new genome"
@echo " make status - Check submodule and encryption status"
@echo " make lint - Verify schema, privacy flags, and metadata"
@echo " make verify-structure - Report directory drift across all genomes"
@echo " make sync-structure - Create any missing canonical dirs (safe)"
@echo " make test - Run the bats test suite (no LLM/GPU needed)"
@echo " make lock - Lock all encrypted files across all genomes"
@echo " make doctor - Verify all required tools are installed"
@echo " make sync - Sync submodules and report unpushed commits"
@ -30,27 +27,16 @@ setup:
add-genome:
@if [ -z "$(NAME)" ] || [ -z "$(DESC)" ]; then \
echo "Error: NAME and DESC are required."; \
echo "Usage: make add-genome NAME=my-genome DESC='My description' [LINKED=owner/project-repo] [CROSS=yes|no]"; \
echo "Usage: make add-genome NAME=my-genome DESC='My description'"; \
exit 1; \
fi
@bash scripts/add-genome.sh "$(NAME)" "$(DESC)" "$(LINKED)" "$(or $(CROSS),no)"
@bash scripts/add-genome.sh "$(NAME)" "$(DESC)"
status:
@[ -d "$(MASTER_REPO)" ] || { echo "Master non trovato. Esegui 'make setup'."; exit 1; }
@echo "--- Master Status ---"
@cd $(MASTER_REPO) && git submodule status
@echo "--- Encryption Status (per genome) ---"
@cd $(MASTER_REPO) && git submodule foreach 'git-crypt status 2>/dev/null | head -n 10 || true'
verify-structure:
@bash scripts/verify-genomes.sh
sync-structure:
@bash scripts/verify-genomes.sh --sync
test:
@command -v bats >/dev/null 2>&1 || { echo " MISSING: bats (sudo apt install bats)"; exit 1; }
@bats tests/
@git submodule status
@echo "--- Encryption Status (First 10 files) ---"
@git-crypt status | head -n 10
doctor:
@echo "Checking required tools..."
@ -59,20 +45,17 @@ doctor:
@command -v curl >/dev/null 2>&1 || { echo " MISSING: curl"; exit 1; }
@command -v jq >/dev/null 2>&1 || { echo " MISSING: jq"; exit 1; }
@command -v bw >/dev/null 2>&1 || echo " OPTIONAL: bw (Bitwarden CLI) not found — key injection will be manual."
@command -v python3 >/dev/null 2>&1 || echo " OPTIONAL: python3 not found — needed for 'make test' and the ingest skill (index-append.py), not for setup."
@echo "System ready."
sync:
@[ -d "$(MASTER_REPO)" ] || { echo "Master non trovato. Esegui 'make setup'."; exit 1; }
@echo "Syncing submodules..."
@cd $(MASTER_REPO) && git submodule update --init --recursive
@git submodule update --init --recursive
@echo "--- Unpushed commits per genome ---"
@cd $(MASTER_REPO) && git submodule foreach 'git log --oneline @{u}.. 2>/dev/null | head -5 || true'
@git submodule foreach 'git log --oneline @{u}.. 2>/dev/null | head -5 || true'
lock:
@[ -d "$(MASTER_REPO)" ] || { echo "Master non trovato. Esegui 'make setup'."; exit 1; }
@echo "Locking master repository..."
@cd $(MASTER_REPO) && git-crypt lock 2>/dev/null || true
@git-crypt lock 2>/dev/null || true
@echo "Locking all submodules..."
@cd $(MASTER_REPO) && git submodule foreach 'git-crypt lock 2>/dev/null || true'
@git submodule foreach 'git-crypt lock 2>/dev/null || true'
@echo "All genomes securely locked."

1245
README.md

File diff suppressed because it is too large Load diff

View file

@ -1,773 +0,0 @@
{
"name": "Genome: PR review",
"nodes": [
{
"parameters": {
"httpMethod": "POST",
"path": "forgejo-pr-review-23319ab8687b16f10e0f278fb920c112",
"options": {}
},
"id": "58df1ca9-e48e-4834-b231-d97c974cd01b",
"name": "Webhook PR Review",
"type": "n8n-nodes-base.webhook",
"typeVersion": 2.1,
"position": [
2272,
1344
],
"webhookId": "61ff3a5baa304571"
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// THE only parser of the review side: parse the directive, VALIDATE, prepare the rework payload.\n// Security: only allow-listed maintainers may drive the gate; destructive directives require a\n// feat/ai-ingest-* branch on the expected base; raw_source is recovered from a machine-readable\n// marker that run-ingest.sh writes into the PR body.\nconst ALLOWED_SENDERS = ['Keru']; // <-- maintainers allowed to issue directives\nconst BASE = 'develop';\n\n// n8n Run Once for Each Item: $json is the current webhook payload\nconst j = $json.body || $json;\nif (!j || typeof j !== 'object') {\n return { directive: 'INVALID', reason: 'malformed webhook payload' };\n}\n\nconst review = j.review || null;\nconst comment = j.comment || null;\nconst pr = j.pull_request || j.issue || null;\n\n// Extract directive text from review content or comment body\nconst body = String(\n (review && review.content) ||\n (comment && comment.body) ||\n ''\n);\nconst sender = String((j.sender && j.sender.login) || 'unknown');\n\n// Match directive at the start of the text (case-insensitive)\nconst m = body.match(/^\\s*(REWORK|RESTART|REVERT\\s+\\d+|SPLIT|REJECT|MERGE)\\s*:?/i);\nif (!m) return { directive: 'NONE' };\n\nconst headTok = m[1].toUpperCase().replace(/\\s+/g, ' ');\nconst directive = headTok.startsWith('REVERT') ? 'REVERT' : headTok;\nconst feedback = body.slice(m[0].length).trim() || '(nessun dettaglio fornito)';\n\n// Extract PR metadata safely\nconst prNumber = (pr && pr.number) || null;\nconst branch = (pr && pr.head && pr.head.ref) || null;\nconst base = (pr && pr.base && pr.base.ref) || null;\nconst repo = (pr && pr.base && pr.base.repo && pr.base.repo.name) ||\n (j.repository && j.repository.name) || null;\nconst owner = (pr && pr.base && pr.base.repo && pr.base.repo.owner && pr.base.repo.owner.login) ||\n (j.repository && j.repository.owner && j.repository.owner.login) || null;\nconst prBody = (pr && pr.body) || (j.issue && j.issue.body) || '';\n\n// Recover raw_source from machine-readable marker: <!-- kg:raw=path -->\n// Restricted to valid path characters, no spaces, no HTML breaking\nconst rawMatch = prBody.match(/<!--\\s*kg:raw=([^\\s>]+)\\s*-->/);\nconst raw = rawMatch ? rawMatch[1] : null;\n\n// REVERT is reserved for future Step 7 implementation\nif (directive === 'REVERT') {\n return { directive: 'NONE', note: 'REVERT reserved for Step 7' };\n}\n\n// Authorization gate\nif (!ALLOWED_SENDERS.includes(sender)) {\n return {\n directive: 'UNAUTHORIZED',\n attempted: directive,\n sender,\n prNumber,\n owner,\n repo\n };\n}\n\n// Validation rules\nconst okGenome = !!repo && /^[a-z0-9][a-z0-9-]{0,63}$/.test(repo);\nconst okPr = !!prNumber && /^[0-9]+$/.test(String(prNumber));\nconst okBranch = !!branch && /^feat\\/ai-ingest-[a-z0-9-]+$/.test(branch);\nconst okBase = base === BASE;\nconst okRaw = (directive === 'MERGE')\n ? true\n : (!!raw && raw.startsWith('raw/') && !raw.includes('..') && /^[A-Za-z0-9._\\/-]+$/.test(raw));\n\nif (!okGenome || !okPr || !okBase || (directive !== 'MERGE' && !okBranch) || !okRaw) {\n return {\n directive: 'INVALID',\n attempted: directive,\n prNumber,\n owner,\n repo,\n why: { okGenome, okPr, okBranch, okBase, okRaw }\n };\n}\n\n// Encode feedback for safe transport through SSH/scripts\nconst feedback_b64 = Buffer.from(feedback, 'utf8').toString('base64');\n\nreturn {\n directive,\n prNumber,\n branch,\n base,\n repo,\n owner,\n sender,\n raw,\n feedback,\n feedback_b64\n};"
},
"id": "c668f595-0a28-4bd3-9125-22fee9350d78",
"name": "Parse & validate",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
2496,
1344
]
},
{
"parameters": {
"rules": {
"values": [
{
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"leftValue": "={{ $json.directive }}",
"rightValue": "MERGE",
"operator": {
"type": "string",
"operation": "equals"
},
"id": "4960f0868bc54687"
}
],
"combinator": "and"
}
},
{
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"leftValue": "={{ $json.directive }}",
"rightValue": "REWORK",
"operator": {
"type": "string",
"operation": "equals"
},
"id": "34002fdd92834d38"
}
],
"combinator": "and"
}
},
{
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"leftValue": "={{ $json.directive }}",
"rightValue": "RESTART",
"operator": {
"type": "string",
"operation": "equals"
},
"id": "d412a74e32ac4f0c"
}
],
"combinator": "and"
}
},
{
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"leftValue": "={{ $json.directive }}",
"rightValue": "SPLIT",
"operator": {
"type": "string",
"operation": "equals"
},
"id": "c0810b33fa474ca0"
}
],
"combinator": "and"
}
},
{
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"leftValue": "={{ $json.directive }}",
"rightValue": "REJECT",
"operator": {
"type": "string",
"operation": "equals"
},
"id": "531039e699c44cea"
}
],
"combinator": "and"
}
},
{
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"leftValue": "={{ $json.directive }}",
"rightValue": "UNAUTHORIZED",
"operator": {
"type": "string",
"operation": "equals"
},
"id": "cfbd691d2e9a4c2a"
}
],
"combinator": "and"
}
},
{
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"leftValue": "={{ $json.directive }}",
"rightValue": "INVALID",
"operator": {
"type": "string",
"operation": "equals"
},
"id": "251f5b7beea6424a"
}
],
"combinator": "and"
}
}
]
},
"options": {
"fallbackOutput": "none"
}
},
"id": "489736cc-bab6-4664-8087-91b6d9ff31ad",
"name": "Switch",
"type": "n8n-nodes-base.switch",
"typeVersion": 3.4,
"position": [
2736,
1344
]
},
{
"parameters": {
"method": "POST",
"url": "=https://git.keruhomelab.com/api/v1/repos/{{ $('Parse & validate').first().json.owner }}/{{ $('Parse & validate').first().json.repo }}/pulls/{{ $('Parse & validate').first().json.prNumber }}/merge",
"authentication": "genericCredentialType",
"genericAuthType": "httpHeaderAuth",
"sendBody": true,
"specifyBody": "json",
"jsonBody": "={\n \"Do\": \"merge\"\n}",
"options": {
"timeout": 15000
}
},
"id": "3440cb8d-ae4c-4523-ae13-ee5667d24252",
"name": "Forgejo Merge PR",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
2976,
1104
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
}
}
},
{
"parameters": {
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "loose",
"version": 2
},
"conditions": [
{
"id": "cc369b5fc3d246a4",
"leftValue": "={{ $('Parse & validate').first().json.branch }}",
"rightValue": "feat/ai-ingest-",
"operator": {
"type": "string",
"operation": "startsWith"
}
}
],
"combinator": "and"
},
"options": {}
},
"id": "e6d45fce-83d0-44ca-9fa4-86558fec1a0f",
"name": "Guardia feat/",
"type": "n8n-nodes-base.if",
"typeVersion": 2.2,
"position": [
2976,
1328
]
},
{
"parameters": {
"method": "PATCH",
"url": "=https://git.keruhomelab.com/api/v1/repos/{{ $('Parse & validate').first().json.owner }}/{{ $('Parse & validate').first().json.repo }}/pulls/{{ $('Parse & validate').first().json.prNumber }}",
"authentication": "genericCredentialType",
"genericAuthType": "httpHeaderAuth",
"sendBody": true,
"specifyBody": "json",
"jsonBody": "={\n \"state\": \"closed\"\n}",
"options": {
"timeout": 15000
}
},
"id": "1601f705-c758-4df6-a3bd-e3ac2e202c94",
"name": "Forgejo Close PR",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
3200,
1296
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
}
}
},
{
"parameters": {
"method": "DELETE",
"url": "=https://git.keruhomelab.com/api/v1/repos/{{ $('Parse & validate').first().json.owner }}/{{ $('Parse & validate').first().json.repo }}/branches/{{ encodeURIComponent($('Parse & validate').first().json.branch) }}",
"authentication": "genericCredentialType",
"genericAuthType": "httpHeaderAuth",
"options": {
"timeout": 15000
}
},
"id": "c2ff2247-efe1-4809-a435-9973188d61bb",
"name": "Forgejo Delete Branch",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
3424,
1296
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
}
}
},
{
"parameters": {
"conditions": {
"options": {
"caseSensitive": true,
"leftValue": "",
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"id": "55cf6c2a6c7d4d79",
"leftValue": "={{ $('Parse & validate').first().json.directive }}",
"rightValue": "REJECT",
"operator": {
"type": "string",
"operation": "equals"
}
}
],
"combinator": "and"
},
"options": {}
},
"id": "a1dbbc06-555d-4a1d-8fbf-ee75f617e98a",
"name": "E' REJECT?",
"type": "n8n-nodes-base.if",
"typeVersion": 2.2,
"position": [
3648,
1296
]
},
{
"parameters": {
"workflowId": {
"__rl": true,
"value": "zbtRXWsLt56nEIfz",
"mode": "list",
"cachedResultUrl": "/workflow/zbtRXWsLt56nEIfz",
"cachedResultName": "Power Manager"
},
"workflowInputs": {
"mappingMode": "defineBelow",
"value": {
"mode": "ensure-on"
},
"matchingColumns": [
"mode"
],
"schema": [
{
"id": "mode",
"displayName": "mode",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
}
],
"attemptToConvertTypes": false,
"convertFieldsToString": true
},
"options": {}
},
"id": "7fc3e648-4712-4eef-a6f3-12c8805ade1f",
"name": "Power Manager - ensure-on",
"type": "n8n-nodes-base.executeWorkflow",
"typeVersion": 1.3,
"position": [
3648,
1168
]
},
{
"parameters": {
"workflowId": {
"__rl": true,
"value": "VIi2ovb5gJxNJLbg",
"mode": "list",
"cachedResultUrl": "/workflow/VIi2ovb5gJxNJLbg",
"cachedResultName": "Genome: run-one-ingest"
},
"workflowInputs": {
"mappingMode": "defineBelow",
"value": {
"genome": "={{ $('Parse & validate').first().json.repo }}",
"raw": "={{ $('Parse & validate').first().json.raw }}",
"mode": "rework",
"feedback_b64": "={{ $('Parse & validate').first().json.feedback_b64 }}",
"reason": "={{ $('Parse & validate').first().json.directive }}",
"prevPr": "={{ String($('Parse & validate').first().json.prNumber || '') }}"
},
"matchingColumns": [],
"schema": [
{
"id": "genome",
"displayName": "genome",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "raw",
"displayName": "raw",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "mode",
"displayName": "mode",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "feedback_b64",
"displayName": "feedback_b64",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "reason",
"displayName": "reason",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "prevPr",
"displayName": "prevPr",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
}
],
"attemptToConvertTypes": false,
"convertFieldsToString": true
},
"options": {
"waitForSubWorkflow": false
}
},
"id": "9704c050-5c63-49fd-a26d-efbae9d92175",
"name": "Run one ingest (rework)",
"type": "n8n-nodes-base.executeWorkflow",
"typeVersion": 1.3,
"position": [
3856,
1168
]
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// merged (MERGE) / closed (REJECT). The HTTP node replaced $json with the API response,\n// so we read context from the parser via node reference (single review -> .first() is safe).\n// Fallback values prevent crashes if the parser node is unreachable.\nconst p = $('Parse & validate').first().json || {};\nconst repo = p.repo || 'unknown';\nconst owner = p.owner || 'unknown';\nconst prNumber = p.prNumber || '?';\nconst base = p.base || 'develop';\nconst branch = p.branch || 'unknown';\nconst sender = p.sender || 'unknown';\nconst directive = p.directive || 'UNKNOWN';\nconst feedback = p.feedback || '';\n\nconst repoUrl = (owner && repo && repo !== 'unknown')\n ? `https://git.keruhomelab.com/${owner}/${repo}`\n : '';\nconst prUrl = (repoUrl && prNumber !== '?')\n ? `${repoUrl}/pulls/${prNumber}`\n : '';\n\nlet n;\nif (directive === 'MERGE') {\n n = {\n topic: 'genome-ingest',\n title: `${repo} · PR #${prNumber} mergiata`,\n priority: 'default',\n tags: 'twisted_rightwards_arrows',\n click: prUrl,\n actions: `view, Vedi la PR, ${prUrl}`,\n body: `PR #${prNumber} mergiata su \\`${base}\\` da **${sender}**.`\n };\n} else {\n n = {\n topic: 'genome-ingest',\n title: `${repo} · PR #${prNumber} chiusa`,\n priority: 'default',\n tags: 'wastebasket',\n click: repoUrl,\n actions: '',\n body: `**REJECT** di **${sender}**: PR #${prNumber} chiusa e branch \\`${branch}\\` rimosso. Nessun nuovo tentativo.\\n> ${feedback}`\n };\n}\n\nreturn n;"
},
"id": "1ce634fd-d402-4a84-9ba1-04673ddffce9",
"name": "Build ntfy action",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
3856,
1344
]
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// Security / near-miss: unauthorized sender, invalid directive, or the feat/ guard.\n// On all three paths Switch/Guardia pass the parser output through, so $json carries the directive + context.\nconst d = $json || {};\nconst directive = d.directive || 'UNKNOWN';\nconst attempted = d.attempted || directive;\nconst sender = d.sender || 'unknown';\nconst prNumber = d.prNumber || '?';\nconst branch = d.branch || 'unknown';\nconst owner = d.owner || '';\nconst repo = d.repo || '';\n\nconst repoUrl = (owner && repo) ? `https://git.keruhomelab.com/${owner}/${repo}` : '';\n\nlet n;\nif (directive === 'UNAUTHORIZED') {\n n = {\n topic: 'genome-ingest',\n title: `Sicurezza · direttiva non autorizzata`,\n priority: 'high',\n tags: 'no_entry',\n click: repoUrl,\n actions: '',\n body: `**${sender}** ha tentato \\`${attempted}\\` su PR #${prNumber}, ma non è tra i maintainer autorizzati. **Nessuna azione** eseguita.`\n };\n} else if (directive === 'INVALID') {\n n = {\n topic: 'genome-ingest',\n title: `Direttiva non applicata`,\n priority: 'low',\n tags: 'information_source',\n click: repoUrl,\n actions: '',\n body: `\\`${attempted}\\` su PR #${prNumber} ignorata: precondizioni non soddisfatte (branch / base / marker raw).`\n };\n} else {\n // Guardia feat/ false branch: destructive action on a non-feat/ai-ingest-* branch\n n = {\n topic: 'genome-ingest',\n title: `Sicurezza · branch protetto`,\n priority: 'high',\n tags: 'no_entry',\n click: repoUrl,\n actions: '',\n body: `Rifiutata azione distruttiva (\\`${attempted || directive}\\`) sul branch \\`${branch}\\`: non è un \\`feat/ai-ingest-*\\`. **Nessuna modifica.**`\n };\n}\n\nreturn n;"
},
"id": "32b16592-5126-4cc2-a3f2-d1bda58ac724",
"name": "Build ntfy sicurezza",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
3200,
1536
]
},
{
"parameters": {
"method": "POST",
"url": "=http://ntfy/{{ $json.topic }}",
"authentication": "genericCredentialType",
"genericAuthType": "httpBearerAuth",
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Title",
"value": "={{ $json.title }}"
},
{
"name": "Priority",
"value": "={{ $json.priority }}"
},
{
"name": "Tags",
"value": "={{ $json.tags }}"
},
{
"name": "Click",
"value": "={{ $json.click }}"
},
{
"name": "Actions",
"value": "={{ $json.actions }}"
},
{
"name": "Markdown",
"value": "yes"
}
]
},
"sendBody": true,
"contentType": "raw",
"rawContentType": "Raw / Text",
"body": "={{ $json.body }}",
"options": {
"timeout": 15000
}
},
"id": "4d45b486-de42-4c7f-be21-b5bfbc05fd44",
"name": "ntfy: send",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
4080,
1424
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
},
"httpBearerAuth": {
"id": "nCv4CUN7Ef086Ewj",
"name": "Bearer Auth account"
}
}
}
],
"pinData": {},
"connections": {
"Webhook PR Review": {
"main": [
[
{
"node": "Parse & validate",
"type": "main",
"index": 0
}
]
]
},
"Parse & validate": {
"main": [
[
{
"node": "Switch",
"type": "main",
"index": 0
}
]
]
},
"Switch": {
"main": [
[
{
"node": "Forgejo Merge PR",
"type": "main",
"index": 0
}
],
[
{
"node": "Power Manager - ensure-on",
"type": "main",
"index": 0
}
],
[
{
"node": "Guardia feat/",
"type": "main",
"index": 0
}
],
[
{
"node": "Guardia feat/",
"type": "main",
"index": 0
}
],
[
{
"node": "Guardia feat/",
"type": "main",
"index": 0
}
],
[
{
"node": "Build ntfy sicurezza",
"type": "main",
"index": 0
}
],
[
{
"node": "Build ntfy sicurezza",
"type": "main",
"index": 0
}
]
]
},
"Forgejo Merge PR": {
"main": [
[
{
"node": "Build ntfy action",
"type": "main",
"index": 0
}
]
]
},
"Guardia feat/": {
"main": [
[
{
"node": "Forgejo Close PR",
"type": "main",
"index": 0
}
],
[
{
"node": "Build ntfy sicurezza",
"type": "main",
"index": 0
}
]
]
},
"Forgejo Close PR": {
"main": [
[
{
"node": "Forgejo Delete Branch",
"type": "main",
"index": 0
}
]
]
},
"Forgejo Delete Branch": {
"main": [
[
{
"node": "E' REJECT?",
"type": "main",
"index": 0
}
]
]
},
"E' REJECT?": {
"main": [
[
{
"node": "Build ntfy action",
"type": "main",
"index": 0
}
],
[
{
"node": "Power Manager - ensure-on",
"type": "main",
"index": 0
}
]
]
},
"Power Manager - ensure-on": {
"main": [
[
{
"node": "Run one ingest (rework)",
"type": "main",
"index": 0
}
]
]
},
"Build ntfy action": {
"main": [
[
{
"node": "ntfy: send",
"type": "main",
"index": 0
}
]
]
},
"Build ntfy sicurezza": {
"main": [
[
{
"node": "ntfy: send",
"type": "main",
"index": 0
}
]
]
}
},
"active": true,
"settings": {
"executionOrder": "v1",
"binaryMode": "separate",
"timeSavedMode": "fixed",
"errorWorkflow": "7Vws3gCX3QnjM3oD",
"callerPolicy": "workflowsFromSameOwner",
"availableInMCP": false
},
"versionId": "22998a54-cd9a-4b57-9c80-df97085a997c",
"meta": {
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
},
"id": "iho7kFQsXbGIxG7P",
"tags": []
}

View file

@ -1,170 +0,0 @@
{
"name": "Genome: ingest MANUALE (scratch)",
"nodes": [
{
"parameters": {},
"type": "n8n-nodes-base.manualTrigger",
"typeVersion": 1,
"position": [
0,
0
],
"id": "2101e704-6275-419d-9963-29a142e5811c",
"name": "Esegui manualmente"
},
{
"parameters": {
"authentication": "privateKey",
"command": "ssh vm101 'pi ingest genome-test raw/articles/il-grano-saraceno.md'"
},
"type": "n8n-nodes-base.ssh",
"typeVersion": 1,
"position": [
224,
0
],
"id": "8ade2def-2d53-4860-88a5-2ca734c6e54a",
"name": "SSH: pi ingest (manuale)",
"credentials": {
"sshPrivateKey": {
"id": "GJQjKzte7Hjdfz89",
"name": "n8n container -> n8n-runner@nexus"
}
}
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// ultima riga JSON di run-ingest.sh (ha 'run_id=' davanti)\nconst out = ($json.stdout || '').trim();\nconst line = out.split('\\n').filter(l => l.trim().startsWith('{')).pop();\nif (!line) return { status: 'error', reason: 'nessuna riga JSON run-ingest', raw: out };\ntry { return JSON.parse(line); } catch (e) { return { status: 'error', reason: 'JSON non parsabile', raw: line }; }"
},
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
448,
0
],
"id": "d84cdeaf-612a-454c-8b4d-31824ae6d71e",
"name": "Parse ingest"
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "const d=$json;let n;\nif (d.status==='ok'){\n n={title:`Ingest ${d.slug}: PR aperta`,priority:'default',tags:'inbox_tray',\n body:`\\u2705 ${d.slug}: PR aperta (lint ${d.lint_clean?'clean':'KO'}${d.conflict?', CONFLITTO':''})\\n\\n\\ud83d\\udd17 ${d.pr_url}`};\n} else if (d.status==='pr_failed'){\n n={title:`Ingest ${d.slug}: PR FALLITA`,priority:'high',tags:'warning',\n body:`\\u26a0\\ufe0f ${d.slug}: semantic/lint ok ma PR non aperta.\\n\\n${(d.detail||'').split('\\n')[0]}`};\n} else {\n n={title:'Ingest: ERRORE',priority:'high',tags:'rotating_light',\n body:`\\u274c ${d.reason||'errore'}\\n\\n${(d.raw||'').slice(0,300)}`};\n}\nreturn n;"
},
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
672,
0
],
"id": "eadd9275-b38c-416b-b15e-0999f70a05fb",
"name": "Build ntfy"
},
{
"parameters": {
"method": "POST",
"url": "http://ntfy/homelab-genome",
"authentication": "genericCredentialType",
"genericAuthType": "httpBearerAuth",
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Title",
"value": "={{ $json.title }}"
},
{
"name": "Priority",
"value": "={{ $json.priority }}"
},
{
"name": "Tags",
"value": "={{ $json.tags }}"
}
]
},
"sendBody": true,
"contentType": "raw",
"rawContentType": "Raw / Text",
"body": "={{ $json.body }}",
"options": {}
},
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
880,
0
],
"id": "63ab577b-893a-4b3d-8f13-b377be778099",
"name": "ntfy: send notification",
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
},
"httpBearerAuth": {
"id": "nCv4CUN7Ef086Ewj",
"name": "Bearer Auth account"
}
}
}
],
"pinData": {},
"connections": {
"Esegui manualmente": {
"main": [
[
{
"node": "SSH: pi ingest (manuale)",
"type": "main",
"index": 0
}
]
]
},
"SSH: pi ingest (manuale)": {
"main": [
[
{
"node": "Parse ingest",
"type": "main",
"index": 0
}
]
]
},
"Parse ingest": {
"main": [
[
{
"node": "Build ntfy",
"type": "main",
"index": 0
}
]
]
},
"Build ntfy": {
"main": [
[
{
"node": "ntfy: send notification",
"type": "main",
"index": 0
}
]
]
}
},
"active": false,
"settings": {
"executionOrder": "v1",
"binaryMode": "separate"
},
"versionId": "df06ce3b-1ea8-43be-91ff-02c77972cfe2",
"meta": {
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
},
"id": "RNoSaRLYG9vcMn6M",
"tags": []
}

View file

@ -1,419 +0,0 @@
{
"name": "Genome: ingest",
"nodes": [
{
"parameters": {
"httpMethod": "POST",
"path": "forgejo-push",
"options": {}
},
"id": "8c44b478-1a95-4c3b-8ac1-d7c57e228414",
"name": "Webhook",
"type": "n8n-nodes-base.webhook",
"typeVersion": 2.1,
"position": [
1520,
1728
],
"webhookId": "cf215f5d31e04dd2"
},
{
"parameters": {
"jsCode": "// Bell filter: proceed ONLY on develop pushes that actually touch raw/.\n// Returning [] stops the flow (no node needed).\n// Performance: never wake vm101 for wiki-only pushes (e.g. an ingest PR merged back to develop).\n// pending-raw remains the source of truth.\nconst item = $input.first().json;\nconst b = item.body || item;\nconst ref = String(b.ref || '');\nconst genome = String((b.repository && b.repository.name) || '').toLowerCase().trim();\n\n// Branch filter\nif (ref !== 'refs/heads/develop') return [];\n\n// Genome name validation (DNS-like: lowercase alphanum + hyphen, 1-64 chars)\nif (!/^[a-z0-9][a-z0-9-]{0,63}$/.test(genome)) return [];\n\n// Collect all touched paths safely (added, modified, removed)\nconst commits = Array.isArray(b.commits) ? b.commits : [];\nconst touched = [];\nfor (const c of commits) {\n if (!c || typeof c !== 'object') continue;\n for (const key of ['added', 'modified', 'removed']) {\n const list = c[key];\n if (!Array.isArray(list)) continue;\n for (const p of list) {\n if (typeof p === 'string' && p.startsWith('raw/')) {\n touched.push(p);\n }\n }\n }\n}\n\n// Gate: stop if nothing under raw/ was touched\nif (touched.length === 0) return [];\n\nreturn [{ json: { genome, touchedCount: touched.length } }];"
},
"id": "604787c7-4e83-468e-9a98-3ac084203040",
"name": "Gate push",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
1744,
1728
]
},
{
"parameters": {
"workflowId": {
"__rl": true,
"value": "zbtRXWsLt56nEIfz",
"mode": "list",
"cachedResultUrl": "/workflow/zbtRXWsLt56nEIfz",
"cachedResultName": "Power Manager"
},
"workflowInputs": {
"mappingMode": "defineBelow",
"value": {
"mode": "ensure-on"
},
"matchingColumns": [
"mode"
],
"schema": [
{
"id": "mode",
"displayName": "mode",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
}
],
"attemptToConvertTypes": false,
"convertFieldsToString": true
},
"options": {}
},
"id": "f93073a3-7753-4ce1-9ef1-2a0c16386543",
"name": "Power Manager - ensure-on",
"type": "n8n-nodes-base.executeWorkflow",
"typeVersion": 1.3,
"position": [
1952,
1728
]
},
{
"parameters": {
"authentication": "privateKey",
"command": "=ssh vm101 'pi pending-raw {{ $('Gate push').first().json.genome }}'"
},
"id": "876dbdaf-3620-4c2c-a65b-336f0b11198c",
"name": "SSH: pending-raw",
"type": "n8n-nodes-base.ssh",
"typeVersion": 1,
"position": [
2176,
1728
],
"credentials": {
"sshPrivateKey": {
"id": "GJQjKzte7Hjdfz89",
"name": "n8n container -> n8n-runner@nexus"
}
}
},
{
"parameters": {
"jsCode": "// Parse pending-raw -> one item per raw, carrying everything run-one-ingest needs.\n// Unsafe filenames (spaces / odd chars) are NOT ingested -> a 'badname' item -> ntfy.\nconst out = String($input.first().json.stdout || '').trim();\nlet d;\ntry {\n d = JSON.parse(out);\n} catch (e) {\n return [{ json: { _kind: 'error', reason: 'pending-raw non parsabile', raw: out.substring(0, 500) } }];\n}\n\nif (!d || typeof d !== 'object') {\n return [{ json: { _kind: 'error', reason: 'pending-raw non è un oggetto JSON', raw: out.substring(0, 500) } }];\n}\n\nconst files = Array.isArray(d.files) ? d.files : [];\nif (files.length === 0) return [];\n\n// Build reason map from detail array\nconst why = {};\nfor (const it of (Array.isArray(d.detail) ? d.detail : [])) {\n if (it && typeof it.path === 'string' && typeof it.reason === 'string') {\n why[it.path] = it.reason;\n }\n}\n\nconst SAFE = /^[A-Za-z0-9._\\/-]+$/;\nconst items = [];\nfor (const raw of files) {\n if (typeof raw !== 'string') {\n items.push({ json: { _kind: 'badname', genome: d.genome, raw: String(raw),\n hint: String(raw).replace(/[^A-Za-z0-9._\\/-]+/g, '-').toLowerCase() || 'invalid' } });\n continue;\n }\n if (SAFE.test(raw)) {\n items.push({ json: { _kind: 'ingest', genome: d.genome, raw,\n mode: 'ingest', feedback_b64: '', reason: why[raw] || 'new', prevPr: '' } });\n } else {\n const hint = raw.replace(/[^A-Za-z0-9._\\/-]+/g, '-').toLowerCase() || 'invalid';\n items.push({ json: { _kind: 'badname', genome: d.genome, raw, hint } });\n }\n}\nreturn items;"
},
"id": "f5bbbed3-222e-4129-a764-7cf47d69c5ce",
"name": "Split raw files",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
2400,
1728
]
},
{
"parameters": {
"conditions": {
"options": {
"caseSensitive": true,
"leftValue": "",
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"id": "cbacf5d98d594ba5",
"leftValue": "={{ $json._kind }}",
"rightValue": "ingest",
"operator": {
"type": "string",
"operation": "equals"
}
}
],
"combinator": "and"
},
"options": {}
},
"id": "5398e2c4-c7ca-4ca4-a2d7-e75077453b7c",
"name": "Nome valido?",
"type": "n8n-nodes-base.if",
"typeVersion": 2.2,
"position": [
2624,
1728
]
},
{
"parameters": {
"workflowId": {
"__rl": true,
"value": "VIi2ovb5gJxNJLbg",
"mode": "list",
"cachedResultUrl": "/workflow/VIi2ovb5gJxNJLbg",
"cachedResultName": "Genome: run-one-ingest"
},
"workflowInputs": {
"mappingMode": "defineBelow",
"value": {
"genome": "={{ $json.genome }}",
"raw": "={{ $json.raw }}",
"mode": "ingest",
"feedback_b64": "",
"reason": "={{ $json.reason }}",
"prevPr": ""
},
"matchingColumns": [],
"schema": [
{
"id": "genome",
"displayName": "genome",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "raw",
"displayName": "raw",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "mode",
"displayName": "mode",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "feedback_b64",
"displayName": "feedback_b64",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "reason",
"displayName": "reason",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "prevPr",
"displayName": "prevPr",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
}
],
"attemptToConvertTypes": false,
"convertFieldsToString": true
},
"options": {
"waitForSubWorkflow": false
}
},
"id": "0f274662-62bb-448b-ae4b-47e4bbcfd35a",
"name": "Run one ingest",
"type": "n8n-nodes-base.executeWorkflow",
"typeVersion": 1.3,
"position": [
2832,
1616
]
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// Build ntfy notification for files with invalid names.\n// Run Once for Each Item: $json is the current badname item.\nconst d = $json || {};\nconst genome = d.genome || 'unknown';\nconst raw = String(d.raw || 'unknown');\nconst hint = String(d.hint || 'unknown');\n\n// Escape backticks to avoid breaking markdown\nconst rawEsc = raw.replace(/`/g, '\\`');\nconst hintEsc = hint.replace(/`/g, '\\`');\n\nreturn {\n topic: 'genome-ingest',\n title: `${genome} · file da rinominare`,\n priority: 'high',\n tags: 'warning',\n click: '',\n actions: '',\n body: `Il file \\`${rawEsc}\\` ha spazi o caratteri non ammessi e **non** è stato ingerito.\\nRinominalo in: \\`${hintEsc}\\``\n};"
},
"id": "0f785bcd-cdc6-4dac-9ced-1c5cfa3453dc",
"name": "Build ntfy badname",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
2832,
1840
]
},
{
"parameters": {
"method": "POST",
"url": "=http://ntfy/{{ $json.topic }}",
"authentication": "genericCredentialType",
"genericAuthType": "httpBearerAuth",
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Title",
"value": "={{ $json.title }}"
},
{
"name": "Priority",
"value": "={{ $json.priority }}"
},
{
"name": "Tags",
"value": "={{ $json.tags }}"
},
{
"name": "Click",
"value": "={{ $json.click }}"
},
{
"name": "Actions",
"value": "={{ $json.actions }}"
},
{
"name": "Markdown",
"value": "yes"
}
]
},
"sendBody": true,
"contentType": "raw",
"rawContentType": "Raw / Text",
"body": "={{ $json.body }}",
"options": {
"timeout": 15000
}
},
"id": "9cd2bde3-6846-4855-ad01-e3a4cdbce208",
"name": "ntfy: send",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
3056,
1840
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
},
"httpBearerAuth": {
"id": "nCv4CUN7Ef086Ewj",
"name": "Bearer Auth account"
}
}
}
],
"pinData": {},
"connections": {
"Webhook": {
"main": [
[
{
"node": "Gate push",
"type": "main",
"index": 0
}
]
]
},
"Gate push": {
"main": [
[
{
"node": "Power Manager - ensure-on",
"type": "main",
"index": 0
}
]
]
},
"Power Manager - ensure-on": {
"main": [
[
{
"node": "SSH: pending-raw",
"type": "main",
"index": 0
}
]
]
},
"SSH: pending-raw": {
"main": [
[
{
"node": "Split raw files",
"type": "main",
"index": 0
}
]
]
},
"Split raw files": {
"main": [
[
{
"node": "Nome valido?",
"type": "main",
"index": 0
}
]
]
},
"Nome valido?": {
"main": [
[
{
"node": "Run one ingest",
"type": "main",
"index": 0
}
],
[
{
"node": "Build ntfy badname",
"type": "main",
"index": 0
}
]
]
},
"Build ntfy badname": {
"main": [
[
{
"node": "ntfy: send",
"type": "main",
"index": 0
}
]
]
}
},
"active": true,
"settings": {
"executionOrder": "v1",
"binaryMode": "separate",
"timeSavedMode": "fixed",
"errorWorkflow": "7Vws3gCX3QnjM3oD",
"callerPolicy": "workflowsFromSameOwner",
"availableInMCP": false
},
"versionId": "63863925-606f-4200-824c-52f1919f2bb1",
"meta": {
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
},
"id": "mUJUuQxcDiiPWcUE",
"tags": []
}

View file

@ -1,128 +0,0 @@
{
"name": "Genome: on-error",
"nodes": [
{
"parameters": {},
"id": "f715ed51-95e6-475f-8aa5-d0df531cc7cf",
"name": "Error Trigger",
"type": "n8n-nodes-base.errorTrigger",
"typeVersion": 1,
"position": [
688,
-32
]
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// Global error handler: set this workflow as the \"Error Workflow\" in each genome workflow's\n// Settings. Catches ANY node failure (SSH down, Forgejo 4xx/5xx, etc.) and notifies once.\n// Run Once for Each Item: $json is the error trigger payload.\nconst e = $json.execution || {};\nconst w = $json.workflow || {};\n\n// Safely extract error message from various shapes\nconst rawMsg = (e.error && (e.error.message || e.error.description)) || 'errore sconosciuto';\nconst msg = String(rawMsg).trim();\n\nconst lastNode = e.lastNodeExecuted ? ` (nodo: ${e.lastNodeExecuted})` : '';\nconst workflowName = w.name || 'n8n';\nconst executionUrl = e.url || '';\n\n// Escape markdown to avoid breaking the notification body\nconst msgEsc = msg.replace(/`/g, '\\`').replace(/\\n/g, '\\n');\n\nreturn {\n topic: 'genome-ingest',\n title: `Workflow KO · ${workflowName}`,\n priority: 'high',\n tags: 'rotating_light',\n click: executionUrl,\n actions: executionUrl ? `view, Apri l'esecuzione, ${executionUrl}` : '',\n body: `**${workflowName}** è fallito${lastNode}.\\n\\n${msgEsc}`\n};"
},
"id": "dd39bc0f-918a-4645-8f04-540ac9089311",
"name": "Build ntfy",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
928,
-32
]
},
{
"parameters": {
"method": "POST",
"url": "=http://ntfy/{{ $json.topic }}",
"authentication": "genericCredentialType",
"genericAuthType": "httpBearerAuth",
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Title",
"value": "={{ $json.title }}"
},
{
"name": "Priority",
"value": "={{ $json.priority }}"
},
{
"name": "Tags",
"value": "={{ $json.tags }}"
},
{
"name": "Click",
"value": "={{ $json.click }}"
},
{
"name": "Actions",
"value": "={{ $json.actions }}"
},
{
"name": "Markdown",
"value": "yes"
}
]
},
"sendBody": true,
"contentType": "raw",
"rawContentType": "Raw / Text",
"body": "={{ $json.body }}",
"options": {
"timeout": 15000
}
},
"id": "a9ee90f3-d7fe-445d-96af-12caef46473f",
"name": "ntfy: send",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
1152,
-32
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
},
"httpBearerAuth": {
"id": "nCv4CUN7Ef086Ewj",
"name": "Bearer Auth account"
}
}
}
],
"pinData": {},
"connections": {
"Error Trigger": {
"main": [
[
{
"node": "Build ntfy",
"type": "main",
"index": 0
}
]
]
},
"Build ntfy": {
"main": [
[
{
"node": "ntfy: send",
"type": "main",
"index": 0
}
]
]
}
},
"active": true,
"settings": {
"executionOrder": "v1",
"binaryMode": "separate"
},
"versionId": "036161c9-c934-474e-9b4f-634259f2a866",
"meta": {
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
},
"id": "7Vws3gCX3QnjM3oD",
"tags": []
}

View file

@ -1,326 +0,0 @@
{
"name": "Genome: prune",
"nodes": [
{
"parameters": {
"httpMethod": "POST",
"path": "forgejo-push-prune",
"options": {}
},
"id": "d31388b9-c6d6-4f28-9a6c-b381922bf5e0",
"name": "Webhook prune",
"type": "n8n-nodes-base.webhook",
"typeVersion": 2.1,
"position": [
1232,
-64
],
"webhookId": "d6ac11900058434e"
},
{
"parameters": {
"jsCode": "// Gate: proceed ONLY on develop pushes that REMOVED at least one file under raw/.\n// Additions/modifications are handled by the ingest flow; this flow reacts to deletions only.\nconst item = $input.first().json;\nconst b = item.body || item;\nconst ref = String(b.ref || '');\nconst genome = String((b.repository?.name) || '').toLowerCase().trim();\n\n// Branch filter\nif (ref !== 'refs/heads/develop') return [];\n\n// Genome name validation (DNS-like: lowercase alphanum + hyphen, 1-64 chars)\nif (!/^[a-z0-9][a-z0-9-]{0,63}$/.test(genome)) return [];\n\n// Collect removed paths safely\nconst removed = [];\nfor (const c of (b.commits || [])) {\n if (!c || !Array.isArray(c.removed)) continue;\n for (const p of c.removed) {\n if (typeof p === 'string' && p.startsWith('raw/')) {\n removed.push(p);\n }\n }\n}\n\n// Gate: stop if nothing under raw/ was removed\nif (removed.length === 0) return [];\n\nreturn [{ json: { genome, removedCount: removed.length } }];"
},
"id": "84848a31-d099-459e-bd03-67abc2cf2b77",
"name": "Gate prune",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
1456,
-64
]
},
{
"parameters": {
"workflowId": {
"__rl": true,
"value": "zbtRXWsLt56nEIfz",
"mode": "list",
"cachedResultUrl": "/workflow/zbtRXWsLt56nEIfz",
"cachedResultName": "Power Manager"
},
"workflowInputs": {
"mappingMode": "defineBelow",
"value": {
"mode": "ensure-on"
},
"matchingColumns": [
"mode"
],
"schema": [
{
"id": "mode",
"displayName": "mode",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
}
],
"attemptToConvertTypes": false,
"convertFieldsToString": true
},
"options": {}
},
"id": "175e4191-eb1b-4e5d-8d82-c39205753152",
"name": "Power Manager - ensure-on",
"type": "n8n-nodes-base.executeWorkflow",
"typeVersion": 1.3,
"position": [
1680,
-64
]
},
{
"parameters": {
"authentication": "privateKey",
"command": "=ssh vm101 'pi orphan-wiki {{ $('Gate prune').first().json.genome }}'"
},
"id": "598f20f8-d668-48da-90e3-1bfada3ace92",
"name": "SSH: orphan-wiki",
"type": "n8n-nodes-base.ssh",
"typeVersion": 1,
"position": [
1904,
-64
],
"credentials": {
"sshPrivateKey": {
"id": "GJQjKzte7Hjdfz89",
"name": "n8n container -> n8n-runner@nexus"
}
}
},
{
"parameters": {
"jsCode": "// Gate: proceed to prune only if orphan-wiki actually found orphans.\n// run-prune re-derives independently anyway (no detected-vs-pruned race);\n// this gate just avoids taking the lock for nothing.\nconst out = String($input.first().json.stdout || '').trim();\nlet d;\n\ntry {\n d = JSON.parse(out);\n} catch (e) {\n // Malformed JSON from orphan-wiki — log and stop\n return [{ json: { _gate: 'parse-error', raw: out.substring(0, 500) } }];\n}\n\n// Strict validation: d must be object with numeric count > 0\nif (!d || typeof d !== 'object' || typeof d.count !== 'number' || d.count <= 0) {\n return []; // 0 orphans or missing count -> stop silently\n}\n\nreturn [{ json: { genome: d.genome, count: d.count } }];"
},
"id": "3b644d61-26d8-4024-baed-bcb4ad169a6a",
"name": "Orfani?",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
2112,
-64
]
},
{
"parameters": {
"authentication": "privateKey",
"command": "=ssh vm101 'pi prune {{ $json.genome }}'"
},
"id": "a8cae2c2-6f2f-4ef6-add9-287195aa84b5",
"name": "SSH: prune",
"type": "n8n-nodes-base.ssh",
"typeVersion": 1,
"position": [
2336,
-64
],
"credentials": {
"sshPrivateKey": {
"id": "GJQjKzte7Hjdfz89",
"name": "n8n container -> n8n-runner@nexus"
}
}
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// Extract the last JSON line from SSH stdout (the command may print logs before/after).\n// Run Once for Each Item: $json is the current SSH result item.\nconst out = String($json.stdout || '').trim();\nconst jsonLines = out\n .split('\\n')\n .map(l => l.trim())\n .filter(l => l.startsWith('{') && l.endsWith('}'));\n\nconst line = jsonLines.pop(); // last JSON object line (command prints JSON last)\n\nlet r;\ntry {\n r = line ? JSON.parse(line) : { status: 'error', reason: 'nessuna riga JSON trovata in stdout' };\n} catch (e) {\n r = { status: 'error', reason: 'JSON non parsabile', rawLine: line?.substring(0, 1000) };\n}\n\n// Ensure consistent shape for downstream nodes\nreturn {\n status: r.status || 'error',\n reason: r.reason || 'errore sconosciuto',\n count: r.count,\n pr_url: r.pr_url,\n genome: r.genome,\n _raw: line?.substring(0, 500)\n};"
},
"id": "da1ab42c-32e1-4c4d-82a1-925fcee1a098",
"name": "Parse prune",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
2560,
-64
]
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// Build ntfy notification for genome pruning.\n// Run Once for Each Item: $json is the parsed prune result.\nconst d = $json;\nconst genome = d.genome || 'unknown';\n\nlet n;\nif (d.status === 'ok') {\n const pm = (d.pr_url || '').match(/\\/pulls\\/(\\d+)/);\n const num = pm ? `#${pm[1]}` : '';\n n = {\n topic: 'genome-ingest',\n title: `${genome} \\u00b7 potatura ${num}`.replace(/\\s+/g, ' ').trim(),\n priority: 'default',\n tags: 'broom',\n click: d.pr_url || '',\n actions: d.pr_url ? `view, Apri la PR, ${d.pr_url}` : '',\n body: `${d.count} sorgente/i orfane proposte per la rimozione. **Approva la PR** per potare, oppure chiudila da Forgejo per annullare.`\n };\n} else {\n n = {\n topic: 'genome-ingest',\n title: `${genome} \\u00b7 errore potatura`.trim(),\n priority: 'high',\n tags: 'rotating_light',\n click: '',\n actions: '',\n body: `${d.reason || 'errore sconosciuto durante la potatura'}.`\n };\n}\n\nreturn n;"
},
"id": "ebe99407-6038-4f8f-a73f-7dc7b0a011e0",
"name": "Build ntfy",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
2784,
-64
]
},
{
"parameters": {
"method": "POST",
"url": "=http://ntfy/{{ $json.topic }}",
"authentication": "genericCredentialType",
"genericAuthType": "httpBearerAuth",
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Title",
"value": "={{ $json.title }}"
},
{
"name": "Priority",
"value": "={{ $json.priority }}"
},
{
"name": "Tags",
"value": "={{ $json.tags }}"
},
{
"name": "Click",
"value": "={{ $json.click }}"
},
{
"name": "Actions",
"value": "={{ $json.actions }}"
},
{
"name": "Markdown",
"value": "yes"
}
]
},
"sendBody": true,
"contentType": "raw",
"rawContentType": "Raw / Text",
"body": "={{ $json.body }}",
"options": {
"timeout": 15000
}
},
"id": "0bd3654e-a73d-4c3a-83ed-9f57ca4aad24",
"name": "ntfy: send",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
2992,
-64
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
},
"httpBearerAuth": {
"id": "nCv4CUN7Ef086Ewj",
"name": "Bearer Auth account"
}
}
}
],
"pinData": {},
"connections": {
"Webhook prune": {
"main": [
[
{
"node": "Gate prune",
"type": "main",
"index": 0
}
]
]
},
"Gate prune": {
"main": [
[
{
"node": "Power Manager - ensure-on",
"type": "main",
"index": 0
}
]
]
},
"Power Manager - ensure-on": {
"main": [
[
{
"node": "SSH: orphan-wiki",
"type": "main",
"index": 0
}
]
]
},
"SSH: orphan-wiki": {
"main": [
[
{
"node": "Orfani?",
"type": "main",
"index": 0
}
]
]
},
"Orfani?": {
"main": [
[
{
"node": "SSH: prune",
"type": "main",
"index": 0
}
]
]
},
"SSH: prune": {
"main": [
[
{
"node": "Parse prune",
"type": "main",
"index": 0
}
]
]
},
"Parse prune": {
"main": [
[
{
"node": "Build ntfy",
"type": "main",
"index": 0
}
]
]
},
"Build ntfy": {
"main": [
[
{
"node": "ntfy: send",
"type": "main",
"index": 0
}
]
]
}
},
"active": true,
"settings": {
"executionOrder": "v1",
"binaryMode": "separate",
"timeSavedMode": "fixed",
"errorWorkflow": "7Vws3gCX3QnjM3oD",
"callerPolicy": "workflowsFromSameOwner",
"availableInMCP": false
},
"versionId": "999f640c-aae6-42aa-9a95-aba26987e9d0",
"meta": {
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
},
"id": "smH5Qrv7CQnTtdAF",
"tags": []
}

View file

@ -1,266 +0,0 @@
{
"name": "Genome: run-one-ingest",
"nodes": [
{
"parameters": {
"inputSource": "passthrough"
},
"id": "b1b7ba8e-1e45-4f76-adc0-089180715975",
"name": "On ingest request",
"type": "n8n-nodes-base.executeWorkflowTrigger",
"typeVersion": 1.1,
"position": [
224,
624
]
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// SECURITY chokepoint: every ingest to vm101 passes here. Re-validate inputs (defense in depth:\n// callers + the SSH wrapper also validate) and assemble the exact command. Charset-validated\n// fields are safe inside the single-quoted remote command -> no shell injection.\n// Run Once for Each Item: $json is the current ingest request.\nconst d = $json || {};\nconst genome = String(d.genome || '').toLowerCase().trim();\nconst raw = String(d.raw || '');\nconst mode = String(d.mode || 'ingest');\nconst fb = String(d.feedback_b64 || '');\n\nconst okGenome = /^[a-z0-9][a-z0-9-]{0,63}$/.test(genome);\nconst okMode = (mode === 'ingest' || mode === 'rework');\nconst okRaw = raw.startsWith('raw/') && !raw.includes('..') && /^[A-Za-z0-9._\\/-]+$/.test(raw);\n// feedback_b64 is required only for rework mode; for ingest it can be empty\nconst okFb = (mode === 'ingest') || /^[A-Za-z0-9+/=]+$/.test(fb);\n\nif (!okGenome || !okMode || !okRaw || !okFb) {\n return {\n _ok: false,\n genome,\n mode,\n _reason: `bad input (genome:${okGenome} mode:${okMode} raw:${okRaw} fb:${okFb})`\n };\n}\n\n// Build SSH command: single-quoted remote command prevents shell injection\nconst ssh_cmd = (mode === 'rework')\n ? `ssh vm101 'pi ingest-rework ${genome} ${raw} ${fb}'`\n : `ssh vm101 'pi ingest ${genome} ${raw}'`;\n\nreturn {\n _ok: true,\n ssh_cmd,\n genome,\n raw,\n mode,\n reason: String(d.reason || ''),\n prevPr: String(d.prevPr || '')\n};"
},
"id": "8e538237-0e0e-4308-b2c8-631a52b31185",
"name": "Guard & build cmd",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
448,
624
]
},
{
"parameters": {
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "loose",
"version": 2
},
"conditions": [
{
"id": "4507e3a8b9714c7e",
"leftValue": "={{ $json._ok }}",
"rightValue": true,
"operator": {
"type": "boolean",
"operation": "true",
"singleValue": true
}
}
],
"combinator": "and"
},
"options": {}
},
"id": "4b249e76-7ab6-4aa3-886d-06b865931cf6",
"name": "Input valido?",
"type": "n8n-nodes-base.if",
"typeVersion": 2.2,
"position": [
672,
624
]
},
{
"parameters": {
"authentication": "privateKey",
"command": "={{ $json.ssh_cmd }}"
},
"id": "8740ae9a-4094-48b2-a9a4-d40d501e09f6",
"name": "SSH: ingest",
"type": "n8n-nodes-base.ssh",
"typeVersion": 1,
"position": [
880,
544
],
"credentials": {
"sshPrivateKey": {
"id": "GJQjKzte7Hjdfz89",
"name": "n8n container -> n8n-runner@nexus"
}
}
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// run-ingest.sh prints one JSON line; the wrapper may instead print {status:busy|error,...}.\n// Take the last {...} line from stdout (logs may precede/follow).\n// Run Once for Each Item: $json is the current SSH result item.\nconst out = String($json.stdout || '').trim();\nconst jsonLines = out\n .split('\\n')\n .map(l => l.trim())\n .filter(l => l.startsWith('{') && l.endsWith('}'));\n\nconst line = jsonLines.pop(); // last JSON object line (command prints JSON last)\n\nlet r;\ntry {\n r = line ? JSON.parse(line) : { status: 'error', reason: 'nessuna riga JSON trovata in stdout', raw: out.substring(0, 500) };\n} catch (e) {\n r = { status: 'error', reason: 'JSON non parsabile', rawLine: line?.substring(0, 1000) };\n}\n\n// Ensure consistent shape for downstream Build ntfy\nreturn {\n status: r.status || 'error',\n reason: r.reason || 'errore sconosciuto',\n pr_url: r.pr_url || '',\n slug: r.slug || '',\n lint_clean: r.lint_clean || false,\n conflict: r.conflict || false,\n stage: r.stage || '',\n detail: r.detail || '',\n log: r.log || '',\n _raw: line?.substring(0, 500)\n};"
},
"id": "928344e3-0712-42e0-b1a8-f5caff489746",
"name": "Parse result",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
1104,
544
]
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// One builder for ingest + rework outcomes. Title is plain ASCII; the icon comes from Tags\n// (ntfy shortcodes); navigation is via Click (tap) + Actions (button) so it works on every\n// client.\n// Run Once for Each Item: $json is the current parsed result.\n// We read the original request context from the Guard node (same execution, no executeWorkflow in between).\nconst g = $('Guard & build cmd').item.json || {};\nconst verb = (g.mode === 'rework') ? 'rework' : 'ingest';\nconst d = $json || {};\nconst genome = g.genome || 'unknown';\n\n// Build notification based on status\nlet n;\n\nif (g._ok === false) {\n // Input validation failed (Guard & build cmd rejected it)\n n = {\n title: `Errore ${verb}: input non valido`,\n priority: 'high',\n tags: 'rotating_light',\n click: '',\n actions: '',\n body: `Richiesta di ${verb} rifiutata.\\n${g._reason || 'motivo sconosciuto'}`\n };\n} else if (d.status === 'ok') {\n // Success: PR opened\n const pm = (d.pr_url || '').match(/\\/pulls\\/(\\d+)/);\n const num = pm ? `#${pm[1]}` : '';\n const lint = d.lint_clean ? 'lint pulito' : 'lint con avvisi';\n const conflict = d.conflict ? ' · ⚠️ conflitto da risolvere' : '';\n const prevPr = g.prevPr ? ` · sostituisce #${g.prevPr}` : '';\n const reason = (g.reason && verb === 'ingest') ? ` (${g.reason})` : '';\n\n n = {\n title: `${genome} · ${verb} ${d.slug || ''} ${num}`.replace(/\\s+/g, ' ').trim(),\n priority: d.conflict ? 'high' : 'default',\n tags: d.conflict ? 'warning' : 'white_check_mark',\n click: d.pr_url || '',\n actions: d.pr_url ? `view, Apri la PR, ${d.pr_url}` : '',\n body: `**${d.slug || 'sorgente'}** ${verb === 'rework' ? 'rilavorata' : 'ingerita'}`\n + reason + prevPr\n + `.\\n${lint}${conflict}.`\n };\n} else if (d.status === 'busy') {\n // Another ingest is already running on this genome\n n = {\n title: `${genome} · ${verb} in coda`,\n priority: 'min',\n tags: 'hourglass_flowing_sand',\n click: '',\n actions: '',\n body: `Un altro ingest era in corso su questo genoma. La fonte resta pendente e verrà ripresa al prossimo campanello.`\n };\n} else if (d.status === 'pr_failed') {\n // Semantic/lint ok but PR could not be opened\n const detailLine = String(d.detail || '').split('\\n')[0] || 'dettaglio non disponibile';\n n = {\n title: `${genome} · ${d.slug || ''}: PR non aperta`,\n priority: 'high',\n tags: 'warning',\n click: '',\n actions: '',\n body: `Semantic e lint ok, ma la PR non si è aperta.\\n${detailLine}`\n };\n} else {\n // Generic error (including parse errors)\n const stage = d.stage ? ` (stage: ${d.stage})` : '';\n const log = d.log ? `\\nLog: ${d.log}` : '';\n n = {\n title: `${genome} · errore ${verb}`,\n priority: 'high',\n tags: 'rotating_light',\n click: '',\n actions: '',\n body: `${d.reason || 'errore sconosciuto'}${stage}.${log}`\n };\n}\n\nn.topic = 'genome-ingest';\nreturn n;"
},
"id": "9062dfba-02ba-4abc-8be6-828c0b353114",
"name": "Build ntfy",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
1328,
624
]
},
{
"parameters": {
"method": "POST",
"url": "=http://ntfy/{{ $json.topic }}",
"authentication": "genericCredentialType",
"genericAuthType": "httpBearerAuth",
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Title",
"value": "={{ $json.title }}"
},
{
"name": "Priority",
"value": "={{ $json.priority }}"
},
{
"name": "Tags",
"value": "={{ $json.tags }}"
},
{
"name": "Click",
"value": "={{ $json.click }}"
},
{
"name": "Actions",
"value": "={{ $json.actions }}"
},
{
"name": "Markdown",
"value": "yes"
}
]
},
"sendBody": true,
"contentType": "raw",
"rawContentType": "Raw / Text",
"body": "={{ $json.body }}",
"options": {
"timeout": 15000
}
},
"id": "0c2b4d9b-2700-4815-b47c-8523bc4eb2ff",
"name": "ntfy: send",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
1552,
624
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
},
"httpBearerAuth": {
"id": "nCv4CUN7Ef086Ewj",
"name": "Bearer Auth account"
}
}
}
],
"pinData": {},
"connections": {
"On ingest request": {
"main": [
[
{
"node": "Guard & build cmd",
"type": "main",
"index": 0
}
]
]
},
"Guard & build cmd": {
"main": [
[
{
"node": "Input valido?",
"type": "main",
"index": 0
}
]
]
},
"Input valido?": {
"main": [
[
{
"node": "SSH: ingest",
"type": "main",
"index": 0
}
],
[
{
"node": "Build ntfy",
"type": "main",
"index": 0
}
]
]
},
"SSH: ingest": {
"main": [
[
{
"node": "Parse result",
"type": "main",
"index": 0
}
]
]
},
"Parse result": {
"main": [
[
{
"node": "Build ntfy",
"type": "main",
"index": 0
}
]
]
},
"Build ntfy": {
"main": [
[
{
"node": "ntfy: send",
"type": "main",
"index": 0
}
]
]
}
},
"active": true,
"settings": {
"executionOrder": "v1",
"binaryMode": "separate",
"timeSavedMode": "fixed",
"errorWorkflow": "7Vws3gCX3QnjM3oD",
"callerPolicy": "workflowsFromSameOwner",
"availableInMCP": false
},
"versionId": "fd8c1cf6-c5df-4074-b777-113349e32a03",
"meta": {
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
},
"id": "VIi2ovb5gJxNJLbg",
"tags": []
}

View file

@ -1,81 +0,0 @@
# Componenti di Sistema — Gestione Sincronizzazione e Automazione Genoma
Questo modulo contiene gli script di backend che vengono installati sul server `nexus` per gestire il ciclo di vita dei vault locali (scratch di lavoro), l'integrazione con Syncthing e l'autocommit dei file grezzi (`raw/`) provenienti dai dispositivi mobili o desktop (es. Obsidian).
## Architettura dei File di Sistema
Gli script sono progettati per girare in un ambiente multi-utente protetto, dove l'istanza globale di `n8n` (tramite l'utente di sistema `n8n-runner`) pilota le operazioni senza possedere i diritti di lettura/scrittura diretti sui file del genoma o sui segreti di configurazione.
### 1. Posizionamento e Permessi degli Script
I file inclusi in questa cartella devono essere installati sul server di produzione nella directory `/usr/local/bin/` con privilegi di esecuzione globali, ma modificabili solo da `root`.
- **Destinazione:** `/usr/local/bin/`
- **Proprietario (Owner):** `root:root`
- **Permessi (Chmod):** `0755` (`-rwxr-xr-x`)
#### Elenco degli Script:
- `ensure-genome-vault`: Script idempotente che inizializza o riallinea il vault locale clonandolo da Forgejo (in loopback) sul branch `develop`, configura gli `.stignore` ed effettua il provisioning automatico della cartella condivisa su Syncthing via API.
- `genome-askpass`: Helper di autenticazione per Git (`GIT_ASKPASS`). Intercetta le richieste di credenziali di Git durante i cloni e i push HTTP su Forgejo, iniettando l'utente e il token applicativo senza esporli nei log di sistema o negli argomenti dei processi.
- `genome-raw-commit`: Script di polling periodico invocato da n8n. Isola i file modificati nella cartella `raw/`, interroga Syncthing per capire quale dispositivo (e quindi quale autore umano) ha generato la modifica, crea commit atomici attribuiti al singolo autore e pusha le modifiche su Forgejo (`develop`).
---
## Modello di Sicurezza e Visibilità
Per garantire l'isolamento del sistema operativo, l'infrastruttura si basa su tre livelli di confinamento:
### A. Variabili d'Ambiente Protette (`.env`)
Le credenziali (Token Forgejo, API Key Syncthing) risiedono nella Home dell'utente operativo del servizio (`homelab`) e sono completamente invisibili a n8n e ad altri utenti del sistema.
- **Path:** `/home/homelab/.config/knowledge-genome.env`
- **Permessi:** `0600` (`-rw-------`), di proprietà esclusiva di `homelab:homelab`.
#### env
Nella cartella `~/.config/knowledge-genome.env`.
```text
# knowledge-genome.env Configuration Profile
# Requirements: Must be owned by the service user with 0600 permissions.
# Vault path and operational branch
GENOME_VAULTS_ROOT=/srv/genome-vaults
GENOME_BASE=develop
# Forgejo Target Instance
# Replace 127.0.0.1 with vm101 IP if Forgejo is hosted on the virtual machine
FORGEJO_HOST=127.0.0.1:3001
FORGEJO_OWNER=Keru
FORGEJO_USER=n8n-bot
FORGEJO_TOKEN="............"
# Git Commit Identity
COMMITTER_NAME=n8n-bot
COMMITTER_EMAIL=n8n-bot@homelab
DEFAULT_AUTHOR_NAME="Matteo Cherubini"
DEFAULT_AUTHOR_EMAIL=matteo@keruhomelab.com
# Syncthing Target Instance
# Replace 127.0.0.1 with vm101 IP if Syncthing API is hosted on the virtual machine
SYNCTHING_URL=http://127.0.0.1:8384
SYNCTHING_API_KEY="............"
```
### B. Confine dei Privilegi in Sudoers
L'utente di automazione `n8n-runner` (usato dall'agente SSH di n8n) non ha accesso alla shell e non può invocare comandi arbitrari. Può unicamente chiamare i due script principali impersonando l'utente `homelab` senza l'inserimento della password.
Configurazione da applicare in `/etc/sudoers.d/n8n-genome` (con permessi rigorosi `0440`):
```text
n8n-runner ALL=(homelab) NOPASSWD: /usr/local/bin/ensure-genome-vault, /usr/local/bin/genome-raw-commit
```
### C. Directory dei Vault
I dati veri e propri sincronizzati da Syncthing risiedono isolati in `/srv/genome-vaults/`.
- **Proprietario**: homelab:homelab (UID/GID 1000), permettendo la convivenza nativa e fluida tra il demone Syncthing in esecuzione nel container e gli script Git locali.

View file

@ -1,126 +0,0 @@
#!/bin/bash
# ensure-genome-vault <genome> [--status-only]
#
# Idempotent, unified command for managing genome vaults.
# Called by n8n during genome creation and as a safety net mechanism.
#
# Operation workflow:
# - Vault absent -> Clone from Forgejo (loopback) + track develop branch
# - Vault present -> Realign to origin/develop (treated as a rebuildable scratchpad)
# - Post-clone/fetch -> Write raw/.stignore and register/update the Syncthing folder.
#
# Source of truth is Forgejo. Vaults are scratch spaces and not backed up directly.
# All operations run locally via loopback.
set -euo pipefail
genome="${1:?usage: ensure-genome-vault <genome> [--status-only]}"
mode="${2:-}"
# Slug validation inside the script to prevent path/URL traversal:
# Lowercase kebab-case, no '/', '..', or spaces.
[[ "$genome" =~ ^[a-z0-9][a-z0-9-]{0,63}$ ]] || { echo '{"status":"error","reason":"invalid genome name"}'; exit 1; }
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
: "${GENOME_VAULTS_ROOT:=/srv/genome-vaults}"
: "${GENOME_BASE:=develop}"
: "${FORGEJO_USER:=n8n-bot}"
: "${FORGEJO_HOST:=127.0.0.1:3001}"
: "${FORGEJO_OWNER:=Keru}"
: "${SYNCTHING_URL:=http://127.0.0.1:8384}"
vault="${GENOME_VAULTS_ROOT}/${genome}"
fid="${genome}-public"
clone_url="http://${FORGEJO_USER}@${FORGEJO_HOST}/${FORGEJO_OWNER}/${genome}.git"
export GIT_ASKPASS=/usr/local/bin/genome-askpass # Provides the n8n-bot token
mkdir -p "$GENOME_VAULTS_ROOT"
# ── 1. Clone (if missing) or realign (if present) ────────────────────────────
if [[ ! -d "${vault}/.git" ]]; then
[[ "$mode" == "--status-only" ]] && { printf '{"status":"absent","genome":"%s"}\n' "$genome"; exit 0; }
git clone -q "$clone_url" "$vault"
cd "$vault"
if git show-ref --verify --quiet "refs/remotes/origin/${GENOME_BASE}"; then
git switch -q -c "$GENOME_BASE" --track "origin/${GENOME_BASE}" 2>/dev/null || git switch -q "$GENOME_BASE"
else
# develop does not exist on remote yet: create it from current base and publish
git switch -q -c "$GENOME_BASE"
git push -q "$clone_url" "${GENOME_BASE}:${GENOME_BASE}"
fi
state="cloned"
else
cd "$vault"
if [[ "$mode" == "--status-only" ]]; then
printf '{"status":"present","genome":"%s","head":"%s"}\n' "$genome" "$(git rev-parse --short HEAD)"
exit 0
fi
git fetch -q origin
if git show-ref --verify --quiet "refs/remotes/origin/${GENOME_BASE}"; then
git switch -q "$GENOME_BASE" 2>/dev/null || git switch -q -c "$GENOME_BASE" --track "origin/${GENOME_BASE}"
# GUARD: hard reset is allowed ONLY if the working tree is clean.
# If Syncthing has already written uncommitted raw files, DO NOT destroy them: soft fast-forward.
if [[ -z "$(git status --porcelain -- raw/ 2>/dev/null)" ]]; then
git reset -q --hard "origin/${GENOME_BASE}"
state="realigned"
else
git merge -q --ff-only "origin/${GENOME_BASE}" 2>/dev/null || true
state="realigned-kept-dirty"
fi
else
git switch -q -c "$GENOME_BASE" 2>/dev/null || true
git push -q "$clone_url" "${GENOME_BASE}:${GENOME_BASE}"
state="base-created"
fi
fi
# ── 2. raw/.stignore + exclusion from git (infrastructure, not content) ────────────
mkdir -p "${vault}/raw"
cat > "${vault}/raw/.stignore" <<'EOF'
// Knowledge Genome — Syncthing exclusions for raw/
// NEVER unencrypted private data: git-crypt protects INSIDE the repo, not in Syncthing transit
private
// Obsidian / editor noise
.obsidian
.trash
*.tmp
workspace*.json
// security
.git
EOF
# .stignore must not be included in genome commits
grep -qxF 'raw/.stignore' "${vault}/.git/info/exclude" 2>/dev/null \
|| echo 'raw/.stignore' >> "${vault}/.git/info/exclude"
# Syncthing folder marker: must exist on disk (locally, NOT on Git).
# Without it, Syncthing refuses to scan (“folder marker missing”).
mkdir -p "${vault}/raw/.stfolder"
# .stfolder must not be included in genome commits
grep -qxF 'raw/.stfolder' "${vault}/.git/info/exclude" 2>/dev/null \
|| echo 'raw/.stfolder' >> "${vault}/.git/info/exclude"
# ── 3. Idempotent Syncthing folder configuration (best-effort, does not block the vault) ────────
folder_state="skipped(no api key)"
if [[ -n "${SYNCTHING_API_KEY:-}" ]]; then
if curl -fsS -o /dev/null -H "X-API-Key: ${SYNCTHING_API_KEY}" \
"${SYNCTHING_URL}/rest/config/folders/${fid}" 2>/dev/null; then
folder_state="exists"
else
body="$(curl -fsS -H "X-API-Key: ${SYNCTHING_API_KEY}" \
"${SYNCTHING_URL}/rest/config/defaults/folder" \
| jq --arg id "$fid" --arg label "${genome} (raw public)" --arg path "${vault}/raw" \
'.id=$id | .label=$label | .path=$path | .type="sendreceive"
| .fsWatcherEnabled=true | .rescanIntervalS=3600')"
if curl -fsS -o /dev/null -X PUT \
-H "X-API-Key: ${SYNCTHING_API_KEY}" -H "Content-Type: application/json" \
-d "$body" "${SYNCTHING_URL}/rest/config/folders/${fid}" 2>/dev/null; then
folder_state="created"
else
folder_state="error(check syncthing api)"
fi
fi
fi
printf '{"status":"ok","genome":"%s","vault":"%s","state":"%s","syncthing_folder":"%s"}\n' \
"$genome" "$vault" "$state" "$folder_state"

View file

@ -1,19 +0,0 @@
#!/bin/bash
#
# GIT_ASKPASS helper for Forgejo HTTP authentication.
# Git invokes this script when it needs a username or password.
#
set -eu
# Load environment variables
. "${HOME}/.config/knowledge-genome.env"
case "${1:-}" in
*[Uu]sername*)
printf '%s\n' "${FORGEJO_USER:-n8n-bot}"
;;
*)
printf '%s\n' "${FORGEJO_TOKEN:?FORGEJO_TOKEN not set}"
;;
esac

View file

@ -1,155 +0,0 @@
#!/bin/bash
# genome-raw-commit <genome>
#
# Commit the raw files that Syncthing has placed in the vault and push them to origin/<base>.
# - Committer = n8n-bot (sole pusher); Author = the person who wrote it (Syncthing modifiedBy -> .authors.json)
# - One commit per author (single-device => one commit). No-op if there is nothing.
# - JSON output built with jq (safe escaping), with a `files` array:
# for each raw -> file, author, local_path, local_url (file://), remote_url (Forgejo web).
set -euo pipefail
genome="${1:?usage: genome-raw-commit <genome>}"
# Input validation to prevent path or URL traversal inside the script
[[ "$genome" =~ ^[a-z0-9][a-z0-9-]{0,63}$ ]] || { echo '{"status":"error","reason":"invalid genome name"}'; exit 1; }
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
: "${GENOME_VAULTS_ROOT:=/srv/genome-vaults}"
: "${GENOME_BASE:=develop}"
: "${FORGEJO_USER:=n8n-bot}"
: "${FORGEJO_HOST:=127.0.0.1:3001}"
: "${FORGEJO_OWNER:=Keru}"
: "${FORGEJO_WEB_BASE:=https://git.keruhomelab.com}" # human-facing URL for remote links (not the loopback)
: "${SYNCTHING_URL:=http://127.0.0.1:8384}"
: "${COMMITTER_NAME:=n8n-bot}"
: "${COMMITTER_EMAIL:=n8n-bot@homelab}"
: "${DEFAULT_AUTHOR_NAME:=Unknown}"
: "${DEFAULT_AUTHOR_EMAIL:=unknown@syncthing}"
vault="${GENOME_VAULTS_ROOT}/${genome}"
fid="${genome}-public"
authors_map="${GENOME_VAULTS_ROOT}/.authors.json"
# GENOME_PUSH_URL is a test seam: defaults to the Forgejo loopback URL in production.
clone_url="${GENOME_PUSH_URL:-http://${FORGEJO_USER}@${FORGEJO_HOST}/${FORGEJO_OWNER}/${genome}.git}"
export GIT_ASKPASS=/usr/local/bin/genome-askpass
[[ -d "${vault}/.git" ]] || { printf '{"status":"error","reason":"vault absent","genome":"%s"}\n' "$genome"; exit 1; }
cd "$vault"
git config user.name "$COMMITTER_NAME"
git config user.email "$COMMITTER_EMAIL"
git config commit.gpgsign false
grep -qxF 'raw/.stignore' "${vault}/.git/info/exclude" 2>/dev/null || echo 'raw/.stignore' >> "${vault}/.git/info/exclude"
grep -qxF 'raw/.stfolder' "${vault}/.git/info/exclude" 2>/dev/null || echo 'raw/.stfolder' >> "${vault}/.git/info/exclude"
git add -A -- raw/
git reset -q -- raw/.stignore raw/.stfolder 2>/dev/null || true
# --- Quiet window: only commit raw files that have STOPPED changing. ----------------
# While a note is being written (Obsidian autosave -> Syncthing -> here) its mtime stays
# fresh; we leave it UNSTAGED so a half-written note never triggers an ingest. A file is
# committed only after it has been still for RAW_QUIET_MINUTES. Deletions (nothing on disk)
# are stable by definition and pass straight through. Deterministic — no model in the loop.
quiet_min="${RAW_QUIET_MINUTES:-2}"
held=0
while IFS= read -r f; do
[[ -z "$f" ]] && continue
# Only an existing file can be "hot"; a staged deletion has nothing on disk to settle.
if [[ -e "$f" && -n "$(find "$f" -mmin -"$quiet_min" 2>/dev/null)" ]]; then
git reset -q -- "$f" 2>/dev/null || true
held=$((held+1))
fi
done < <(git diff --cached --name-only -- raw/)
if git diff --cached --quiet; then
if [[ "$held" -gt 0 ]]; then
printf '{"status":"noop","reason":"raw still settling","genome":"%s","held":%d,"quiet_minutes":%d}\n' \
"$genome" "$held" "$quiet_min"
else
printf '{"status":"noop","genome":"%s"}\n' "$genome"
fi
exit 0
fi
resolve_dev() { # $1 = path relative to the vault (raw/...) -> prints the short device id, or empty
[[ -z "${SYNCTHING_API_KEY:-}" ]] && return 0
curl -fsS -H "X-API-Key: ${SYNCTHING_API_KEY}" --get "${SYNCTHING_URL}/rest/db/file" \
--data-urlencode "folder=${fid}" --data-urlencode "file=${1#raw/}" 2>/dev/null \
| jq -r '.local.modifiedBy // empty' 2>/dev/null || true
}
author_for_dev() { # $1 = device id -> prints "name\temail"
local dev="$1" name="$DEFAULT_AUTHOR_NAME" email="$DEFAULT_AUTHOR_EMAIL"
if [[ -n "$dev" && -f "$authors_map" ]] && jq -e --arg d "$dev" '.[$d]' "$authors_map" >/dev/null 2>&1; then
name="$(jq -r --arg d "$dev" '.[$d].name' "$authors_map")"
email="$(jq -r --arg d "$dev" '.[$d].email' "$authors_map")"
fi
printf '%s\t%s' "$name" "$email"
}
# Collect per-file (relpath, author) and group by author for committing
declare -A G_FILES G_NAME G_EMAIL
declare -a ROWS
while IFS= read -r f; do
[[ -z "$f" ]] && continue
dev="$(resolve_dev "$f")"
IFS=$'\t' read -r aname aemail <<< "$(author_for_dev "$dev")"
ROWS+=("${f}"$'\t'"${aname}")
key="${aname} <${aemail}>"
G_FILES["$key"]+="${f}"$'\n'
G_NAME["$key"]="$aname"; G_EMAIL["$key"]="$aemail"
done < <(git diff --cached --name-only -- raw/)
ts="$(date +%Y-%m-%dT%H:%M:%S%z)"
commits=0; summary=""
for key in "${!G_FILES[@]}"; do
mapfile -t files < <(printf '%s' "${G_FILES[$key]}")
short="$(printf '%s\n' "${files[@]}" | sed 's#^raw/##' | paste -sd, -)"
msg="$(printf 'raw(%s): sync %s\n\nAdded-by: %s\nSource: syncthing-autocommit\nSynced-at: %s\n' \
"$genome" "$short" "${G_NAME[$key]}" "$ts")"
git commit -q --author="$key" -m "$msg" -- "${files[@]}"
commits=$((commits+1))
summary="${summary}${summary:+; }${G_NAME[$key]}:${short}"
done
# Push to origin/<base>. The vault is SCRATCH, so we never do an interactive rebase
# (which can conflict when the same raw file is edited repeatedly). Strategy:
# try a fast-forward push; if origin moved, re-apply our raw changes on top of a
# fresh origin/<base> and push again. Deterministic, conflict-free.
git fetch -q origin
if ! git push -q "$clone_url" "HEAD:${GENOME_BASE}" 2>/dev/null; then
# origin advanced: capture our just-made tree for raw/, realign hard, re-apply, retry once.
tmp="$(mktemp -d)"
cp -a raw/. "$tmp"/ 2>/dev/null || true
git reset -q --hard "origin/${GENOME_BASE}"
git clean -q -fd
cp -a "$tmp"/. raw/ 2>/dev/null || true
rm -rf "$tmp"
git add -A -- raw/
git reset -q -- raw/.stignore raw/.stfolder 2>/dev/null || true
if git diff --cached --quiet; then
# our content already matches origin -> nothing to push, report ok-noop-after-realign
printf '{"status":"ok","genome":"%s","base":"%s","commits":0,"head":"%s","summary":"already in sync after realign","files":[]}\n' \
"$genome" "$GENOME_BASE" "$(git rev-parse --short HEAD)"
exit 0
fi
git commit -q --author="${DEFAULT_AUTHOR_NAME} <${DEFAULT_AUTHOR_EMAIL}>" \
-m "raw(${genome}): re-apply after realign" -- raw/ || true
git push -q "$clone_url" "HEAD:${GENOME_BASE}" \
|| { printf '{"status":"error","reason":"push-failed-after-realign","genome":"%s"}\n' "$genome"; exit 1; }
fi
head="$(git rev-parse --short HEAD)"
# `files` array: local (file://) and remote (Forgejo web) link for each committed raw
files_json="$(
for row in "${ROWS[@]}"; do
IFS=$'\t' read -r rel aname <<< "$row"
jq -n --arg file "$rel" --arg author "$aname" \
--arg lpath "${vault}/${rel}" \
--arg lurl "file://${vault}/${rel}" \
--arg rurl "${FORGEJO_WEB_BASE}/${FORGEJO_OWNER}/${genome}/src/branch/${GENOME_BASE}/${rel}" \
'{file:$file, author:$author, local_path:$lpath, local_url:$lurl, remote_url:$rurl}'
done | jq -s '.'
)"
jq -n --arg genome "$genome" --arg base "$GENOME_BASE" --argjson commits "$commits" \
--arg head "$head" --arg summary "$summary" --argjson files "$files_json" \
'{status:"ok", genome:$genome, base:$base, commits:$commits, head:$head, summary:$summary, files:$files}'

View file

@ -1,60 +0,0 @@
# deploy/vm101
System artifacts deployed to **vm101** (the GPU ingest node). The repo is the
source of truth; the live copies live in `/usr/local/bin/`. Edit here, then
`sudo ./install.sh` on vm101 to push changes.
## Contents
- `n8n-pi-wrap` — forced-command wrapper that fronts every n8n→vm101 SSH call.
- `install.sh` — installs the wrapper(s) into `/usr/local/bin` (idempotent).
## n8n-pi-wrap
The only entry point for the `n8n-runner` identity onto vm101. n8n never gets a
shell here: whatever it sends arrives as `SSH_ORIGINAL_COMMAND`, and a `case`
whitelist decides what runs. Anything outside the whitelist is denied and logged.
Allowed commands:
| Command | What it does |
|---|---|
| `pi run` | one-shot prompt via stdin (proof-of-life / health) |
| `pi ingest <genome> <raw_path>` | the real two-phase ingest (below) |
| `ollama list` / `ollama ps` | model introspection |
### The two-phase ingest
`pi ingest` runs the clean-start + two phases, then stops:
1. **Clean start**`git fetch && switch <INGEST_BASE> && reset --hard origin/<base>`.
Destroys only vm101's *scratch* checkout (never a shared branch, never a
force-push) — this determinism is by design.
2. **Semantic**`skills/ingest/scripts/ingest-semantic.py <genome> <raw_path>`
drives `pi` to WRITE `wiki/*` pages + `.ingest-manifest.json`.
NOTE: this is the script, NOT `pi -p "/skill:ingest ..."` (that form makes the
model reply in chat and write nothing — the classic "manifest not found" trap).
3. **Mechanical**`skills/ingest/scripts/run-ingest.sh <genome>` validates the
manifest, then index/log/scoped-lint/commit on `feat/ai-ingest-<slug>` and opens
a PR onto `<INGEST_BASE>`. Emits one JSON line `{status,slug,pr_url,...}`.
The PR then waits for the human gate. One raw per session, sequential.
### Input hardening
Both inputs come from `SSH_ORIGINAL_COMMAND`, so both are validated:
- `genome` — kebab lowercase `^[a-z0-9-]+$`.
- `raw_path` — must be under `raw/`, no `..` traversal, restricted charset
`[A-Za-z0-9._/-]`, and the file must exist. Rejected paths return a JSON error.
Config (`INGEST_BASE`, `GENOMES_ROOT`, `INGEST_MODEL`, Forgejo token) is sourced
from `~/.config/knowledge-genome.env` (0600, owner-only).
## Install / update
```bash
# on vm101
cd ~/knowledge-genome-orchestrator/deploy/vm101
sudo ./install.sh
```

View file

@ -1,8 +0,0 @@
#!/bin/bash
# deploy/vm101/install.sh — install vm101 wrappers from repo -> /usr/local/bin (idempotent).
# Run ON vm101 with sudo: sudo ./install.sh
set -euo pipefail
here="$(cd "$(dirname "$0")" && pwd)"
install -m 0755 "${here}/n8n-pi-wrap" /usr/local/bin/n8n-pi-wrap
echo "installed: /usr/local/bin/n8n-pi-wrap"
bash -n /usr/local/bin/n8n-pi-wrap && echo "syntax: ok"

View file

@ -1,196 +0,0 @@
#!/bin/bash
set -eu
cmd="${SSH_ORIGINAL_COMMAND:-}"
case "$cmd" in
"pi pending-raw "*)
genome="${cmd#pi pending-raw }"
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac
logger -t n8n-pi-wrap "ok: pi pending-raw ${genome}"
set -a; . "${HOME}/.config/knowledge-genome.env" 2>/dev/null || true; set +a
# Run from the DEPLOYED skill dir (same place as ingest-semantic.py / run-ingest.sh on
# lines 54/59), so pending-raw.sh resolves its sibling slug.sh via BASH_SOURCE.
exec "${HOME}/.pi/agent/skills/ingest/scripts/pending-raw.sh" "$genome"
;;
"pi orphan-wiki "*)
genome="${cmd#pi orphan-wiki }"
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac
logger -t n8n-pi-wrap "ok: pi orphan-wiki ${genome}"
set -a; . "${HOME}/.config/knowledge-genome.env" 2>/dev/null || true; set +a
exec "${HOME}/.pi/agent/skills/ingest/scripts/orphan-wiki.sh" "$genome"
;;
"pi run")
logger -t n8n-pi-wrap "ok: pi run (prompt via stdin)"
prompt=$(cat)
exec /usr/local/bin/pi --no-tools --mode json -p "$prompt" </dev/null
;;
"pi ingest "*)
# Strict positional parse: EXACTLY `pi ingest <genome> <raw_path>` (two tokens).
rest="${cmd#pi ingest }"
genome="${rest%% *}"
raw_path="${rest#* }"
# reject: missing second token, or any extra token (a space left in raw_path)
if [ "$genome" = "$rest" ] || [ -z "$raw_path" ] || [ "$raw_path" != "${raw_path#* }" ]; then
echo '{"status":"error","reason":"usage: pi ingest <genome> <raw_path>"}'; exit 1
fi
# genome slug: kebab lowercase only
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac
# raw_path whitelist: MUST live under raw/, no traversal, restricted charset.
# - must start with "raw/" - no ".." segment - no absolute path / leading slash
# - allowed chars: [A-Za-z0-9._/-] (kebab slugs + subdirs like raw/articles/foo.md)
case "$raw_path" in
raw/*) : ;;
*) echo '{"status":"error","reason":"raw_path must be under raw/"}'; exit 1;;
esac
case "$raw_path" in
*..*|*//*) echo '{"status":"error","reason":"raw_path traversal"}'; exit 1;;
esac
case "$raw_path" in
*[!A-Za-z0-9._/-]*) echo '{"status":"error","reason":"raw_path illegal chars"}'; exit 1;;
esac
logger -t n8n-pi-wrap "ok: pi ingest ${genome} ${raw_path}"
# Per-genome lock: serialize writes; never two concurrent ingests on the same genome.
exec 9>"/run/lock/kg-ingest-${genome}.lock" 2>/dev/null || exec 9>"/tmp/kg-ingest-${genome}.lock"
if ! flock -n 9; then
echo '{"status":"busy","reason":"another ingest is running for this genome","genome":"'"$genome"'"}'
exit 0
fi
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
cd "${GENOMES_ROOT}/${genome}" || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
# The raw file must actually exist under the genome's raw/ dir.
[ -f "$raw_path" ] || { echo '{"status":"error","reason":"raw file not found"}'; exit 1; }
# Clean start on the configured base (single source of truth in lib/clean-start.sh).
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
# SEMANTIC step: dedicated script drives pi to WRITE wiki pages + manifest.
# (NOT `pi -p "/skill:ingest ..."`, which makes the model reply in chat and write nothing.)
log="$(mktemp -t pi-ingest.XXXXXX.log)"
"${HOME}/.pi/agent/skills/ingest/scripts/ingest-semantic.py" "${genome}" "${raw_path}" \
>"$log" 2>&1 \
|| { echo "{\"status\":\"error\",\"stage\":\"semantic\",\"reason\":\"ingest-semantic failed\",\"log\":\"${log}\"}"; exit 1; }
# MECHANICAL step: validate manifest -> index/log/scoped-lint/commit/PR -> 1 JSON line
exec "${HOME}/.pi/agent/skills/ingest/scripts/run-ingest.sh" "${genome}"
;;
"pi prune "*)
# Pota le source orfane. Stesso lock dell'ingest (serializza le scritture per genoma),
# clean_start, poi run-prune.sh (che ri-deriva gli orfani e apre una PR gated).
genome="${cmd#pi prune }"
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac
logger -t n8n-pi-wrap "ok: pi prune ${genome}"
exec 9>"/run/lock/kg-ingest-${genome}.lock" 2>/dev/null || exec 9>"/tmp/kg-ingest-${genome}.lock"
if ! flock -n 9; then
echo '{"status":"busy","reason":"another ingest/prune is running for this genome","genome":"'"$genome"'"}'
exit 0
fi
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
cd "${GENOMES_ROOT}/${genome}" || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
exec "${HOME}/.pi/agent/skills/ingest/scripts/run-prune.sh" "${genome}"
;;
"pi ingest-rework "*)
# args: <genome> <raw_path> <feedback_base64> (3 token).
# Feedback in base64 nell'argv: il nodo SSH di n8n non passa stdin, e cosi' i metacaratteri
# della review (apici, newline, $(...)) sono neutralizzati.
args="${cmd#pi ingest-rework }"
genome="${args%% *}"; tmp="${args#* }"
raw_path="${tmp%% *}"; fb_b64="${tmp#* }"
if [ "$genome" = "$args" ] || [ "$raw_path" = "$tmp" ] || [ -z "$fb_b64" ]; then
echo '{"status":"error","reason":"usage: pi ingest-rework <genome> <raw_path> <feedback_b64>"}'; exit 1
fi
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome"}'; exit 1;; esac
case "$raw_path" in raw/*) : ;; *) echo '{"status":"error","reason":"raw_path must be under raw/"}'; exit 1;; esac
case "$raw_path" in *..*|*//*) echo '{"status":"error","reason":"raw_path traversal"}'; exit 1;; esac
case "$raw_path" in *[!A-Za-z0-9._/-]*) echo '{"status":"error","reason":"raw_path illegal chars"}'; exit 1;; esac
case "$fb_b64" in *[!A-Za-z0-9+/=]*) echo '{"status":"error","reason":"feedback not base64"}'; exit 1;; esac
logger -t n8n-pi-wrap "ok: pi ingest-rework ${genome} ${raw_path}"
feedback="$(printf '%s' "$fb_b64" | base64 -d 2>/dev/null || true)"
# lock per-genoma: serializza con gli ingest normali
exec 9>"/run/lock/kg-ingest-${genome}.lock" 2>/dev/null || exec 9>"/tmp/kg-ingest-${genome}.lock"
if ! flock -n 9; then
echo '{"status":"busy","reason":"another ingest is running for this genome","genome":"'"$genome"'"}'; exit 0
fi
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
cd "${GENOMES_ROOT}/${genome}" || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
[ -f "$raw_path" ] || { echo '{"status":"error","reason":"raw file not found"}'; exit 1; }
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
log="$(mktemp -t pi-rework.XXXXXX.log)"
INGEST_FEEDBACK="$feedback" \
"${HOME}/.pi/agent/skills/ingest/scripts/ingest-semantic.py" "${genome}" "${raw_path}" \
>"$log" 2>&1 \
|| { echo "{\"status\":\"error\",\"stage\":\"semantic\",\"reason\":\"rework failed\",\"log\":\"${log}\"}"; exit 1; }
exec "${HOME}/.pi/agent/skills/ingest/scripts/run-ingest.sh" "${genome}"
;;
"pi changed-raw "*)
# List raw/ files changed between two commits, one per line (the webhook payload
# does NOT include file lists, so vm101's checkout computes the diff itself).
rest="${cmd#pi changed-raw }"
genome="${rest%% *}"
range="${rest#* }"
before="${range%% *}"
after="${range#* }"
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac
case "$before$after" in *[!a-f0-9]*|"") echo '{"status":"error","reason":"invalid commit range"}'; exit 1;; esac
logger -t n8n-pi-wrap "ok: pi changed-raw ${genome} ${before}..${after}"
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
cd "${GENOMES_ROOT}/${genome}" 2>/dev/null || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
git fetch -q origin
# Resolve the diff base robustly:
# - before all-zero (brand-new branch) or unreachable (force-push) -> fall back to after~1
# - if even after~1 is missing (root commit) -> list all raw files in `after`
base="$before"
case "$before" in *[!0]*) : ;; *) base="" ;; esac # all-zero -> empty
if [ -n "$base" ] && ! git cat-file -e "${base}^{commit}" 2>/dev/null; then base=""; fi
if [ -z "$base" ]; then
if git cat-file -e "${after}~1^{commit}" 2>/dev/null; then base="${after}~1"; else base=""; fi
fi
if [ -n "$base" ]; then
files="$(git diff --name-only --diff-filter=d "${base}" "${after}" -- raw/ 2>/dev/null \
| grep -vE '(^|/)\.st(folder|ignore)' || true)"
else
# no usable base: enumerate raw files present at `after`
files="$(git ls-tree -r --name-only "${after}" -- raw/ 2>/dev/null \
| grep -vE '(^|/)\.st(folder|ignore)' || true)"
fi
# emit a JSON array via jq (safe escaping)
printf '%s\n' "$files" | grep -c . >/dev/null 2>&1 || files=""
if [ -z "$files" ]; then
echo '{"status":"ok","genome":"'"$genome"'","count":0,"files":[]}'
else
printf '%s\n' "$files" | jq -R . | jq -s \
--arg g "$genome" '{status:"ok", genome:$g, count:length, files:.}'
fi
;;
"ollama list")
logger -t n8n-pi-wrap "ok: ollama list"
exec /usr/local/bin/ollama list
;;
"ollama ps")
logger -t n8n-pi-wrap "ok: ollama ps"
exec /usr/local/bin/ollama ps
;;
*)
logger -t n8n-pi-wrap "denied: ${cmd:-<empty>}"
echo "unauthorized command" >&2
exit 1
;;
esac

View file

@ -1,130 +0,0 @@
#!/usr/bin/env bash
# diagnose-run-ingest.sh
# Run from the repo root: bash diagnose-run-ingest.sh
# Builds the same fixture the bats test uses and runs run-ingest under `bash -x`
# so we can see exactly which command makes it exit non-zero.
set -uo pipefail
REPO="$(pwd)"
RI="${REPO}/skills/ingest/scripts/run-ingest.sh"
echo "==================== ENV ===================="
echo "bash: $(bash --version | head -1)"
echo "git : $(git --version)"
echo "jq : $(jq --version 2>/dev/null || echo MISSING)"
echo "py : $(python3 --version 2>/dev/null || echo MISSING)"
echo
echo "============ run-ingest.sh on disk ============"
if [[ ! -f "$RI" ]]; then echo "NOT FOUND: $RI (run me from the repo root)"; exit 1; fi
echo "-- helper invocations (want 'bash ...'): --"
grep -nE 'log-append\.sh|scoped-lint\.sh|open-pr\.sh' "$RI"
echo "-- result emitter (want 'jq -nc'): --"
grep -nE 'jq -nc?|jq -n ' "$RI"
echo
echo "============ build hermetic fixture ============"
T="$(mktemp -d)"
mkdir -p "$T/nohooks"
git init --bare -q "$T/origin.git"
g="$T/g"
mkdir -p "$g"/{raw/articles,wiki/sources,wiki/entities,wiki/concepts,wiki/queries,wiki/private}
cat > "$g/wiki/index.md" <<'EOF'
---
title: "Index"
type: index
domain: genome-test
maturity: stable
last_updated: 2026-01-01
private: false
---
# Index
---
## Sources (`wiki/sources/`)
*x*
## Entities (`wiki/entities/`)
*x*
## Concepts (`wiki/concepts/`)
*x*
## Queries (`wiki/queries/`)
*x*
## Conflicts Pending Review (`wiki/queries/conflict-*.md`)
*x*
EOF
cat > "$g/wiki/log.md" <<'EOF'
---
title: "Log"
type: log
domain: genome-test
maturity: stable
last_updated: 2026-01-01
private: false
---
# Log
---
## [2026-01-01] CONFIG | init
- run_id: `init`
EOF
echo raw > "$g/raw/articles/test.md"
(
cd "$g"
git init -q
git config commit.gpgsign false
git config core.hooksPath "$T/nohooks"
git config user.email t@t
git config user.name t
git add .
git commit -qm init
git branch -M main
git remote add origin "$T/origin.git"
git push -q -u origin main
) && echo "fixture commit+push OK" || echo "FIXTURE SETUP FAILED (look above)"
cat > "$g/wiki/sources/test-source.md" <<'EOF'
---
title: "Test Source"
type: source
domain: genome-test
tags: [t]
maturity: draft
last_updated: 2026-06-04
private: false
---
body
EOF
cat > "$g/.ingest-manifest.json" <<'EOF'
{ "raw_source":"raw/articles/test.md","model":"m","reasoning":"r","pr_summary":"s","contradictions":"None",
"pages":[{"path":"wiki/sources/test-source.md","summary":"a source","maturity":"draft","status":"created"}] }
EOF
echo
echo "============ run-ingest (bash -x) ============"
cd "$g"
export KG_LIB_DIR="${REPO}/lib" FORGEJO_URL=http://x FORGEJO_USER=u FORGEJO_TOKEN=t DRY_RUN=1
bash -x "$RI" genome-test >"$T/out.txt" 2>"$T/trace.txt"
rc=$?
echo "EXIT=$rc"
echo "-- run-ingest stdout (final JSON should be here): --"
cat "$T/out.txt"
echo "-- last 25 lines of the trace (the failing command is near the end): --"
tail -n 25 "$T/trace.txt"

View file

@ -9,7 +9,7 @@ PROVIDER=forgejo
# --- FORGEJO ---
FORGEJO_URL=https://git.keruhomelab.com
FORGEJO_USER=Keru
FORGEJO_USER=keru
FORGEJO_SSH_PORT=222
# --- GITHUB (used when PROVIDER=github) ---

View file

@ -1,18 +0,0 @@
#!/usr/bin/env bash
# =============================================================================
# lib/clean-start.sh — single source of truth for the pre-session reset.
# Caller must already be INSIDE the genome checkout.
# Aligns the working tree to origin/<base>. Never force-pushes a shared branch.
# Tolerates a missing remote branch (first-setup scenario).
# NOTE: sourced library — no `set -euo pipefail` (would leak into the caller).
# =============================================================================
clean_start() {
local base="${INGEST_BASE:-main}"
git fetch -q origin || return 1
git switch -q "$base" 2>/dev/null || git checkout -q -b "$base" || return 1
if git ls-remote --exit-code --heads origin "$base" >/dev/null 2>&1; then
git reset -q --hard "origin/${base}" || return 1
fi
git clean -q -fd || return 1
}

View file

@ -27,10 +27,6 @@ check_deps() {
if ! command -v bw &>/dev/null; then
warn "Optional tool 'bw' (Bitwarden CLI) not found. Vaultwarden integration will be manual."
fi
if ! command -v python3 &>/dev/null; then
warn "Optional tool 'python3' not found. Needed for 'make test' and the ingest skill (index-append.py), not for setup."
fi
}
check_git_identity() {

View file

@ -21,29 +21,18 @@ gcrypt_export_key() {
gcrypt_verify() {
local genome_name="$1"
local key_path="${KEYS_DIR}/${genome_name}.key"
info "Verifying git-crypt configuration for ${genome_name}..."
info "Verifying git-crypt status for ${genome_name}..."
git-crypt lock
# `git-crypt status` reports the CONFIGURED status (from `.gitattributes`), not the
# lock/unlock status of the working tree. Encrypted lines have their labels right-aligned
# (with leading whitespace), so you CANNOT anchor on `^encrypted`.
# We filter by private/ and distinguish “encrypted” from “not encrypted” without
# relying on exact spacing.
local status_out encrypted_count not_encrypted_count
status_out=$(git-crypt status 2>/dev/null || true)
encrypted_count=$(printf '%s\n' "$status_out" | grep 'private/' | grep -cE '^[[:space:]]*encrypted:' || true)
not_encrypted_count=$(printf '%s\n' "$status_out" | grep 'private/' | grep -cE '^not encrypted:' || true)
if [[ "$encrypted_count" -gt 0 ]]; then
success "Encryption configured: ${encrypted_count} private file(s) under git-crypt."
if [[ "$not_encrypted_count" -gt 0 ]]; then
warn "${not_encrypted_count} file(s) under private/ are NOT covered by the git-crypt filter — check .gitattributes (leak risk)."
fi
elif [[ "$not_encrypted_count" -gt 0 ]]; then
warn "private/ files exist but none are covered by the git-crypt filter — check the .gitattributes filter (leak risk)."
if file "raw/private/.gitkeep" 2>/dev/null | grep -q "data"; then
success "Encryption verified: private/ directory is protected."
else
info "No private/ files present yet — nothing to verify."
warn "Encryption check inconclusive. Run 'git-crypt status' manually."
fi
[[ -f "$key_path" ]] && git-crypt unlock "$key_path"
}
# ---------------------------------------------------------------------------
@ -66,7 +55,7 @@ gcrypt_verify() {
#
# USAGE:
# source lib/git-crypt.sh
# cd ~/knowledge-genome-orchestrator/genome-dev
# cd ~/knowledge-genome-setup/genome-dev
# gcrypt_rotate_key "genome-dev"
#
# REQUIRES:
@ -118,8 +107,6 @@ gcrypt_rotate_key() {
# 5. Re-stage private files so they are committed encrypted with the new key
local staged=0
# compgen -G requires bash 4+ for reliable glob expansion. macOS stock
# bash is 3.2; use Homebrew bash (already recommended in README) for rotation.
if compgen -G "raw/private/*" > /dev/null 2>&1; then
git add raw/private/
staged=1

View file

@ -23,7 +23,7 @@ lint_markdown_file() {
# 1. Check frontmatter delimiters
if [[ $(head -n 1 "$file") != "---" ]]; then
error "Missing frontmatter start (---) in: $file"
warn "Missing frontmatter start (---) in: $file"
errors=$((errors + 1))
fi
@ -31,14 +31,14 @@ lint_markdown_file() {
local mandatory_fields=("title:" "type:" "domain:" "maturity:" "last_updated:")
for field in "${mandatory_fields[@]}"; do
if ! grep -q "^${field}" "$file"; then
error "Missing mandatory field '${field}' in: $file"
warn "Missing mandatory field '${field}' in: $file"
errors=$((errors + 1))
fi
done
# 3. Check domain matches genome name
if grep -q "^domain:" "$file" && ! grep -q "^domain: ${genome_name}" "$file"; then
error "Domain mismatch in $file (expected '${genome_name}')"
warn "Domain mismatch in $file (expected '${genome_name}')"
errors=$((errors + 1))
fi
@ -70,8 +70,8 @@ check_valid_type() {
done
if [[ $valid -eq 0 ]]; then
error "Invalid type value '${type_value}' in: $file"
error " Valid types: ${VALID_TYPES[*]}"
warn "Invalid type value '${type_value}' in: $file"
warn " Valid types: ${VALID_TYPES[*]}"
return 1
fi
@ -144,8 +144,8 @@ check_knowledge_decay() {
esac
if [[ $days_old -gt $threshold ]]; then
error "STALE: $file"
error " maturity: ${maturity} | last_updated: ${last_updated} | ${days_old} days ago (threshold: ${threshold})"
warn "STALE: $file"
warn " maturity: ${maturity} | last_updated: ${last_updated} | ${days_old} days ago (threshold: ${threshold})"
return 1
fi
@ -190,129 +190,12 @@ check_broken_links() {
local links
links=$(grep -oE '\[\[[^\]]+' "$file" 2>/dev/null | sed 's/^\[\[//' | cut -d'|' -f1)
# Cross-genome links (../other-genome/…) are not resolvable from a single
# genome checkout and are skipped — they would always fall
# through the two-level lookup and produce non-actionable warnings.
while IFS= read -r link; do
[[ -z "$link" ]] && continue
if [[ "$link" == ../* ]]; then
continue
fi
for link in $links; do
local target="$link"
[[ "$target" != *.md ]] && target="${target}.md"
if [[ ! -f "${base_dir}/${target}" && ! -f "${base_dir}/../${target}" ]]; then
warn "Potential broken link: [[$link]] in $file"
fi
done <<< "$links"
}
# ---------------------------------------------------------------------------
# levenshtein <s1> <s2>
# Classic edit distance via a two-row rolling buffer, so every array subscript
# is a single integer. The previous implementation used comma subscripts
# (d[i,j]); in bash arithmetic the comma operator collapses to one dimension,
# so the table aliased onto itself and returned wrong distances — it could not
# even score two identical strings as 0. This form is portable to bash 3.2
# (no associative arrays). Echoes the integer distance.
# ---------------------------------------------------------------------------
levenshtein() {
local s1="$1" s2="$2"
local len1=${#s1} len2=${#s2}
(( len1 == 0 )) && { echo "$len2"; return; }
(( len2 == 0 )) && { echo "$len1"; return; }
local -a prev=() curr=()
local i j cost del ins sub min
for (( j = 0; j <= len2; j++ )); do prev[j]=$j; done
for (( i = 1; i <= len1; i++ )); do
curr[0]=$i
for (( j = 1; j <= len2; j++ )); do
cost=1
[[ "${s1:i-1:1}" == "${s2:j-1:1}" ]] && cost=0
del=$(( prev[j] + 1 ))
ins=$(( curr[j-1] + 1 ))
sub=$(( prev[j-1] + cost ))
min=$del
(( ins < min )) && min=$ins
(( sub < min )) && min=$sub
curr[j]=$min
done
prev=( "${curr[@]}" )
done
echo "${prev[len2]}"
}
# ---------------------------------------------------------------------------
# similarity <s1> <s2>
# Percentage similarity from the edit distance: 100 = identical, 0 = entirely
# different. Two empty strings are treated as identical (100), so the divide
# is always guarded.
# ---------------------------------------------------------------------------
similarity() {
local s1="$1" s2="$2"
local maxlen=${#s1}
(( ${#s2} > maxlen )) && maxlen=${#s2}
(( maxlen == 0 )) && { echo "100"; return; }
local dist
dist=$(levenshtein "$s1" "$s2")
echo $(( 100 - (dist * 100 / maxlen) ))
}
# ---------------------------------------------------------------------------
# check_duplicates <manifest>
# Advisory only: warns when a page created this run has a slug suspiciously
# close to an entity/concept already listed in wiki/index.md, so a human can
# merge them in the PR rather than grow two near-identical pages. Never fails
# the lint (always returns 0), exactly like check_broken_links.
#
# The threshold is tunable via KG_DUP_THRESHOLD (default 70). Exact self-matches
# are skipped: step 1 of run-ingest.sh appends this run's new slugs to the index
# BEFORE the lint runs, so without the skip every new slug would match itself at
# 100%. A page that genuinely collides with a pre-existing file is reported by
# the manifest as 'modified', not 'created', so skipping created==existing pairs
# can never mask a real collision.
# ---------------------------------------------------------------------------
check_duplicates() {
local manifest="$1"
[[ -f "$manifest" ]] || return 0
command -v jq >/dev/null 2>&1 || return 0
# New leaf slugs from pages created this run.
local -a new_slugs=()
local slug
while IFS= read -r slug; do
[[ -n "$slug" ]] && new_slugs+=("$slug")
done < <(jq -r '.pages[]? | select(.status=="created") | .path
| split("/")[-1] | sub("\\.md$";"")' "$manifest" 2>/dev/null)
# Existing entity/concept slugs already catalogued in the index.
local -a existing_slugs=()
if [[ -f "wiki/index.md" ]]; then
local line
while IFS= read -r line; do
if [[ $line =~ \[\[(entities|concepts)/([a-z0-9-]+)\]\] ]]; then
existing_slugs+=("${BASH_REMATCH[2]}")
fi
done < "wiki/index.md"
fi
(( ${#new_slugs[@]} && ${#existing_slugs[@]} )) || return 0
local threshold="${KG_DUP_THRESHOLD:-70}"
local new exist sim
for new in "${new_slugs[@]}"; do
for exist in "${existing_slugs[@]}"; do
[[ "$new" == "$exist" ]] && continue # skip exact self-match (see header)
sim=$(similarity "$new" "$exist")
if (( sim > threshold )); then
warn "Possible duplicate: '${new}' ≈ '${exist}' (${sim}% similar) — review in PR"
fi
done
done
return 0
}

View file

@ -4,9 +4,6 @@
# Directory structure creation and template rendering engine.
# =============================================================================
# Canonical directory layout lives in one place (lib/structure.sh).
source "$(dirname "${BASH_SOURCE[0]}")/structure.sh"
render_template() {
local template_file="$1"
local output_file="$2"
@ -16,27 +13,17 @@ render_template() {
local content
content=$(<"$template_file")
# HARDENING: collapse any “spaced” placeholders from a formatter
# { { KEY } } -> {{KEY}} (KEY = UPPERCASE/underscore)
# Defense in depth: if Prettier or a copy-paste breaks the syntax again,
# the scaffold fixes itself. sed is a core utility (like tr/date already used here).
content=$(sed -E 's/\{[[:space:]]*\{[[:space:]]*([A-Z_]+)[[:space:]]*\}[[:space:]]*\}/{{\1}}/g' <<<"$content")
# Defaults (:-) so master-repo templates render even when GENOME_* are unset
# (scaffold_master runs before any genome; set -u would otherwise abort here).
local genome_name_upper
genome_name_upper=$(tr '[:lower:]' '[:upper:]' <<< "${GENOME_NAME:-}")
genome_name_upper=$(tr '[:lower:]' '[:upper:]' <<< "${GENOME_NAME}")
# Placeholder replacement
content="${content//\{\{GENOME_NAME\}\}/${GENOME_NAME:-}}"
content="${content//\{\{GENOME_NAME\}\}/${GENOME_NAME}}"
content="${content//\{\{GENOME_NAME_UPPER\}\}/${genome_name_upper}}"
content="${content//\{\{GENOME_DESC\}\}/${GENOME_DESC:-}}"
content="${content//\{\{FORGEJO_URL\}\}/${FORGEJO_URL:-}}"
content="${content//\{\{FORGEJO_USER\}\}/${FORGEJO_USER:-}}"
content="${content//\{\{VAULTWARDEN_URL\}\}/${VAULTWARDEN_URL:-}}"
content="${content//\{\{MASTER_REPO\}\}/${MASTER_REPO:-}}"
# linked project reference (optional) — empty registry field renders as 'none'
content="${content//\{\{LINKED_PROJECT\}\}/${GENOME_LINKED:-none}}"
content="${content//\{\{GENOME_DESC\}\}/${GENOME_DESC}}"
content="${content//\{\{FORGEJO_URL\}\}/${FORGEJO_URL}}"
content="${content//\{\{FORGEJO_USER\}\}/${FORGEJO_USER}}"
content="${content//\{\{VAULTWARDEN_URL\}\}/${VAULTWARDEN_URL}}"
content="${content//\{\{MASTER_REPO\}\}/${MASTER_REPO}}"
content="${content//\{\{DATE\}\}/$(date +%Y-%m-%d)}"
mkdir -p "$(dirname "$output_file")"
@ -45,9 +32,13 @@ render_template() {
scaffold_genome() {
local base="$1"
local dirs=(
"raw/articles" "raw/transcripts" "raw/code-packs" "raw/assets" "raw/private"
"wiki/sources" "wiki/entities" "wiki/concepts" "wiki/queries" "wiki/private"
)
info "Building directory structure in ${base}..."
for dir in "${GENOME_DIRS[@]}"; do
for dir in "${dirs[@]}"; do
mkdir -p "${base}/${dir}"
touch "${base}/${dir}/.gitkeep"
done
@ -64,11 +55,8 @@ scaffold_genome() {
install_precommit_hook() {
local repo_path="$1"
local hooks_dir
hooks_dir="$(git -C "$repo_path" rev-parse --git-path hooks)"
local hook_path="${hooks_dir}/pre-commit"
local hook_path="${repo_path}/.git/hooks/pre-commit"
mkdir -p "$hooks_dir"
cp "${TEMPLATES_DIR}/pre-commit.sh" "$hook_path"
chmod +x "$hook_path"
success "Pre-commit security hook installed at: $hook_path"

View file

@ -1,79 +0,0 @@
#!/usr/bin/env bash
# =============================================================================
# lib/structure.sh
# Single source of truth for the canonical genome directory layout, plus the
# verify/sync helpers used by scripts/verify-genomes.sh.
#
# IMPORTANT: this is the ONE place the structure is defined. scaffold.sh sources
# this file and builds new genomes from GENOME_DIRS, so scaffolding and the
# structure check can never drift apart.
# =============================================================================
# NOTE — Return-code smell
# Several functions in this file (and in lint.sh) use the return code as a
# numeric counter (e.g. return $missing). This is a known smell: exit codes
# wrap at 256 and conflate "count of problems" with "exit status". At the
# current scale (<10 problems per run) the wrap-around risk is zero, so we
# accept it pragmatically. If counts ever grow, switch to stdout counters
# or dedicated global variables.
# Canonical directories every genome must have.
# raw/* are input buckets (collaborator-writable); wiki/* is the agent-owned,
# contract-bound layout the lint, the index sections and the ingest skill depend on.
GENOME_DIRS=(
"raw/articles" "raw/transcripts" "raw/code-packs" "raw/assets" "raw/private"
"wiki/sources" "wiki/entities" "wiki/concepts" "wiki/queries" "wiki/private"
)
# ---------------------------------------------------------------------------
# structure_report <base>
# Reports drift of <base> against GENOME_DIRS.
# - missing canonical dir → counted as drift (returns non-zero)
# - extra dir under raw/ or wiki/ → warning only (does not fail)
# Returns the number of MISSING canonical directories.
# ---------------------------------------------------------------------------
structure_report() {
local base="$1"
local missing=0
for d in "${GENOME_DIRS[@]}"; do
if [[ ! -d "${base}/${d}" ]]; then
warn "missing: ${d}"
missing=$((missing + 1))
fi
done
# Extra directories (drift the other way) — informational only.
local canon=" ${GENOME_DIRS[*]} "
while IFS= read -r d; do
d="${d#"${base}/"}"
[[ "$canon" == *" ${d} "* ]] && continue
info "extra (not in canon): ${d}"
done < <(find "${base}/raw" "${base}/wiki" -mindepth 1 -type d 2>/dev/null)
# NOTE: return $missing is a smell — see header. Kept for compatibility.
return $missing
}
# ---------------------------------------------------------------------------
# structure_sync <base>
# Creates any MISSING canonical directories (idempotent). Never deletes —
# retiring a bucket is a deliberate, contract-aware change to GENOME_DIRS +
# the templates, not an automatic prune.
# ---------------------------------------------------------------------------
structure_sync() {
local base="$1"
local added=0
for d in "${GENOME_DIRS[@]}"; do
if [[ ! -d "${base}/${d}" ]]; then
mkdir -p "${base}/${d}"
touch "${base}/${d}/.gitkeep"
success "created: ${d}"
added=$((added + 1))
fi
done
[[ $added -eq 0 ]] && info "already in sync: ${base}"
return 0
}

View file

@ -15,7 +15,6 @@ provider_create_repo() {
local name="$1"
local desc="$2"
local private="$3"
local auto_init="${4:-false}" # genomi: true (submodule add esige un branch). master: false (git init locale + push).
local http_code
http_code=$(curl -s -o /dev/null -w "%{http_code}" \
@ -26,7 +25,7 @@ provider_create_repo() {
\"name\": \"${name}\",
\"description\": \"${desc}\",
\"private\": ${private},
\"auto_init\": ${auto_init}
\"auto_init\": false
}")
case "$http_code" in

View file

@ -12,24 +12,16 @@ _REGISTRY_LOADED=1
PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Dynamic paths
WORK_DIR="${PROJECT_ROOT}"
WORK_DIR="${HOME}/knowledge-genome-setup"
KEYS_DIR="${WORK_DIR}/keys"
TEMPLATES_DIR="${PROJECT_ROOT}/templates"
LIB_DIR="${PROJECT_ROOT}/lib"
PROVIDERS_DIR="${PROJECT_ROOT}/providers"
# --- GENOME REGISTRY ---
# Format: "name|description|linked_repo|cross_source"
# - linked_repo: OPTIONAL. Leave empty for knowledge-only genomes.
# - cross_source: "yes" or "no" (default: no). Controls whether the collector
# may read this genome as a source during cross-genome pulls.
#
# HOW TO CUSTOMIZE:
# Replace the placeholder below with your actual genome domains.
# Example: "genome-work|Work notes and architecture logs||no"
# "genome-finance|Personal finance|user/repo-finance|no"
# Format: "name|description"
GENOMES=(
# Disposable sandbox: exercise the full pipeline (ingest -> PR) end-to-end.
# Created by `make setup`. Replace with real domains once the circle is validated.
"genome-test|Disposable sandbox for pipeline testing||no"
"genome-dev|Web development, TUI, Angular, software architecture"
"genome-finance|Personal finance, investments, market analysis"
"genome-homelab|Keru infrastructure, network configs, architecture logs"
)

25
scripts/add-genome.sh Executable file → Normal file
View file

@ -11,37 +11,16 @@ source "registry.sh"
GENOME_NAME="${1:-}"
GENOME_DESC="${2:-}"
GENOME_LINKED="${3:-}" # optional: linked project repo reference
GENOME_CROSS_SOURCE="${4:-no}" # optional: cross_source flag (default: no)
# 1. Check mandatory arguments first
if [[ -z "$GENOME_NAME" || -z "$GENOME_DESC" ]]; then
error "Missing arguments."
echo "Usage: $0 <genome-name> <description> [linked-repo] [cross_source]"
echo " cross_source: yes|no (default: no)"
exit 1
fi
# 2. Then validate the flag if a non-default value was passed
if [[ "$GENOME_CROSS_SOURCE" != "yes" && "$GENOME_CROSS_SOURCE" != "no" ]]; then
error "Invalid cross_source value: $GENOME_CROSS_SOURCE"
echo "cross_source must be 'yes' or 'no'"
echo "Usage: $0 <genome-name> <description>"
exit 1
fi
step "Adding New Genome: ${GENOME_NAME}"
# Build a 4-field registry entry (linked_repo may be empty, cross_source defaults to no)
GENOMES=("${GENOME_NAME}|${GENOME_DESC}|${GENOME_LINKED}|${GENOME_CROSS_SOURCE}")
# NOTE — Maintenance smell
# We source setup-genomes.sh as a library/orchestrator hybrid. This works because:
# - registry.sh is guarded against double-source (idempotent guard)
# - setup-genomes.sh checks WORK_DIR before re-sourcing registry.sh
# - GENOMES is built locally just before the source, so it is not clobbered
# However, sourcing an orchestration script as a library makes the control flow
# harder to trace. If this grows, refactor into a shared function (e.g. setup_one_genome)
# called by both add-genome.sh and setup-genomes.sh.
GENOMES=("${GENOME_NAME}|${GENOME_DESC}")
source "scripts/setup-genomes.sh"

0
scripts/lint-genomes.sh Executable file → Normal file
View file

18
scripts/setup-genomes.sh Executable file → Normal file
View file

@ -19,15 +19,13 @@ source "providers/${PROVIDER}.sh"
step "Processing Genome Registry"
for entry in "${GENOMES[@]}"; do
# 4-field format: name|description|linked_repo|cross_source linked_repo optional (may be empty); cross_source defaults to "no".
IFS='|' read -r GENOME_NAME GENOME_DESC GENOME_LINKED GENOME_CROSS_SOURCE <<< "$entry"
GENOME_CROSS_SOURCE="${GENOME_CROSS_SOURCE:-no}"
export GENOME_NAME GENOME_DESC GENOME_LINKED GENOME_CROSS_SOURCE
IFS='|' read -r GENOME_NAME GENOME_DESC <<< "$entry"
export GENOME_NAME GENOME_DESC
info "Processing: ${GENOME_NAME} (cross_source: ${GENOME_CROSS_SOURCE})..."
info "Processing: ${GENOME_NAME}..."
# 1. Remote Creation (Idempotent)
provider_create_repo "${GENOME_NAME}" "${GENOME_DESC}" "true" "true"
provider_create_repo "${GENOME_NAME}" "${GENOME_DESC}" "true"
SSH_URL=$(provider_ssh_url "${GENOME_NAME}")
GENOME_PATH="${WORK_DIR}/${MASTER_REPO}/${GENOME_NAME}"
@ -40,8 +38,6 @@ for entry in "${GENOMES[@]}"; do
cd "${GENOME_NAME}"
git switch -C main
# IMPORTANT: Initialize git-crypt BEFORE creating any files
gcrypt_init
@ -51,18 +47,12 @@ for entry in "${GENOMES[@]}"; do
# Initial genome push
git add .
git commit -m "feat: initial scaffold and git-crypt init for ${GENOME_NAME}"
git push -u origin main
# Key export and instructions
gcrypt_export_key "${GENOME_NAME}"
gcrypt_print_key_instructions "${GENOME_NAME}"
cd "${WORK_DIR}/${MASTER_REPO}"
git add .gitmodules "${GENOME_NAME}"
git diff --cached --quiet || git commit -m "chore: register submodule ${GENOME_NAME}"
git push
# Commit the submodule reference in the master repo
cd "${WORK_DIR}/${MASTER_REPO}"
git commit -m "feat: add ${GENOME_NAME} as submodule"

2
scripts/setup-master.sh Executable file → Normal file
View file

@ -37,7 +37,5 @@ scaffold_master "."
git add .
git commit -m "chore: initialize master scaffold" || info "No changes to commit in master."
git branch -M main
# 3. Initial Push
git push -u origin main

0
scripts/setup.sh Executable file → Normal file
View file

View file

@ -1,50 +0,0 @@
#!/usr/bin/env bash
# =============================================================================
# scripts/verify-genomes.sh
# Check (default) or --sync the directory structure of every registered genome
# against the canonical layout in lib/structure.sh.
#
# bash scripts/verify-genomes.sh # report drift, non-zero exit on drift
# bash scripts/verify-genomes.sh --sync # create missing dirs everywhere (safe)
#
# No hardware/LLM involved — pure structure check. Run anywhere.
# =============================================================================
set -euo pipefail
source "lib/output.sh"
source "globals.env"
source "registry.sh"
source "lib/structure.sh"
MODE="verify"
[[ "${1:-}" == "--sync" ]] && MODE="sync"
step "Genome structure: ${MODE}"
TOTAL_MISSING=0
for entry in "${GENOMES[@]}"; do
IFS='|' read -r GENOME_NAME _ <<< "$entry" # 4-field registry; only GENOME_NAME used here
genome_dir="${WORK_DIR}/${MASTER_REPO}/${GENOME_NAME}"
if [[ ! -d "$genome_dir" ]]; then
warn "not found locally, skipping: ${GENOME_NAME}"
continue
fi
info "Genome: ${GENOME_NAME}"
if [[ "$MODE" == "sync" ]]; then
structure_sync "$genome_dir"
else
structure_report "$genome_dir" && m=0 || m=$?
TOTAL_MISSING=$((TOTAL_MISSING + m))
fi
done
echo ""
if [[ "$MODE" == "sync" ]]; then
success "Structure sync complete."
elif [[ $TOTAL_MISSING -eq 0 ]]; then
success "Structure verified: all genomes match the canonical layout."
else
error "Structure drift: ${TOTAL_MISSING} missing directory(ies). Fix with: make sync-structure"
exit 1
fi

View file

@ -1,92 +0,0 @@
---
name: ingest
description: Semantic pass of a single raw source into the current genome's wiki. The model ONLY extracts structured semantic content (summary, entities, concepts, contradictions) and returns one JSON object — it does not write files, produce frontmatter, slugs, git, index, log or PRs. A deterministic conform script (ingest-semantic.py) turns that JSON into properly-structured wiki pages + a manifest; run-ingest.sh then does index/log/lint/PR.
license: see repository
compatibility: Driven by scripts/ingest-semantic.py (one schema-constrained call to a local model via Ollama /api/chat). NO agent tools are used — no read, no edit, no bash. The model never touches the filesystem. PRIVATE_CONTEXT must be disabled.
metadata:
framework: knowledge-genome
phase: "1-ingest-semantic"
mode: structured-json # lightweight agent + deterministic conform
---
# Ingest — semantic pass (structured-JSON)
This is the **light** semantic pass. The model's only job is to read one source
and return a single JSON object describing what the source contains. It does
**not** write files, choose paths, produce frontmatter, pick slugs, or touch
git / index / log / PRs. All structure is owned by `scripts/ingest-semantic.py`,
which conforms the model's JSON into wiki pages with enforced kebab-case paths
and frontmatter, and writes `.ingest-manifest.json` in the exact schema
`run-ingest.sh` consumes. This keeps the agent minimal and makes the output
impossible to mis-shape, regardless of how small or quirky the local model is.
Pipeline:
cd <genome checkout>
scripts/ingest-semantic.py <genome> raw/articles/<file>.md # phase 1 (this)
scripts/run-ingest.sh <genome> # phase 2 (deterministic)
## Pre-flight (enforced by ingest-semantic.py, not by the model)
1. Refuse if the source path is under any `private/` directory.
2. Refuse if `PRIVATE_CONTEXT` is not `disabled`.
3. Confirm the file exists under `raw/` and is non-empty.
## What the model returns (the only contract)
A single JSON object, decoding-constrained to this shape via Ollama's `format`:
```json
{
"source_title": "Human title of the source",
"source_summary": "Faithful, self-contained prose summary of the source.",
"key_points": ["Concrete fact or claim worth indexing", "..."],
"entities": [
{ "name": "Acme", "kind": "org", "description": "Vendor referenced by the source." }
],
"concepts": [
{ "name": "JWT RS256", "description": "Asymmetric token signing scheme the source uses." }
],
"contradictions": [
{ "concept": "auth", "description": "Source claims X, contradicting the existing claim Y." }
],
"reasoning": "One sentence for the log: what this source adds.",
"pr_summary": "One or two sentences describing this ingest for the PR."
}
```
Field rules (guidance for the model; the script enforces _structure_):
- `source_summary` is faithful and in the source's own language. No markdown
headings inside any description field. No padding.
- `entities` = every person, tool, org or product the source names. `kind`
`person|tool|org|product`. `description` = one or two factual sentences.
- `concepts` = every pattern, theory, decision or named idea the source explains.
- `contradictions` = only a claim that directly contradicts a widely-known fact
or contradicts the source itself; otherwise an empty list.
- Names are the natural name of the thing. The script normalises them to
kebab-case and guarantees a single stable page per entity/concept.
## What the conform script guarantees (so the model cannot break it)
- **Paths:** `wiki/sources/<slug>.md`, `wiki/entities/<slug>.md`,
`wiki/concepts/<slug>.md`, `wiki/queries/conflict-<concept>-<YYYY-MM-DD>.md`.
- **Slugs:** minimal kebab-case (lowercase, digits, hyphens; no spaces /
underscores / capitals).
- **Frontmatter:** `type`, `domain: <genome>`, `maturity: draft`,
`last_updated: <today>`, `private: false`, `tags`.
- **Create-vs-update:** existing entity/concept pages are **appended to** (a
section attributed to the new source), never overwritten. The source page is
the canonical summary of that exact source and is (re)written.
- **Manifest:** `.ingest-manifest.json` with `raw_source`, `reasoning`,
`pr_summary`, `contradictions` (string), and `pages[]` (`path`, `summary`,
`status`, plus `maturity` on created pages) — exactly what `run-ingest.sh`
validates.
The model name is recorded by the orchestrator (`INGEST_MODEL`); the model does
not self-report it. No `run_id`, branch, commit or PR is invented here — those
belong to phase 2.
> Interactive use of `pi` (TUI) is unaffected and still available for manual
> exploration. The **automated** ingest path no longer relies on `pi` or on
> native tool-calling: it is the single schema-constrained call above.

View file

@ -1,152 +0,0 @@
#!/usr/bin/env python3
# =============================================================================
# skills/ingest/scripts/index-append.py
# Insert OR remove an entry line in wiki/index.md, keeping the target section
# alphabetically ordered. Bumps frontmatter last_updated.
#
# index-append.py --section Sources \
# --entry '- [[sources/foo]] — One-line summary. `maturity: draft`'
# index-append.py --remove 'sources/foo' # delete the entry by wikilink
# =============================================================================
import argparse
import datetime
import re
import sys
ENTRY_RE = re.compile(r"^- \[\[")
LINK_RE = re.compile(r"^- \[\[([^\]]+)\]\]")
HEADER_RE = re.compile(r"^## ")
def bump_last_updated(lines, today):
"""Bump (or self-heal) last_updated inside the first frontmatter block."""
fm_open = False
fm_close_idx = None
bumped = False
for i, ln in enumerate(lines):
if ln.strip() == "---":
if not fm_open:
fm_open = True
continue
fm_close_idx = i
break
if fm_open and ln.startswith("last_updated:"):
lines[i] = f"last_updated: {today}"
bumped = True
if not fm_open:
print("index-append: warning: no frontmatter found, last_updated not bumped",
file=sys.stderr)
elif not bumped and fm_close_idx is not None:
lines.insert(fm_close_idx, f"last_updated: {today}")
print("index-append: last_updated key was missing — inserted", file=sys.stderr)
def do_remove(lines, link, today):
"""Remove every entry line whose wikilink == link. Idempotent."""
bump_last_updated(lines, today)
kept = []
removed = 0
for ln in lines:
m = LINK_RE.match(ln)
if m and m.group(1) == link:
removed += 1
continue
kept.append(ln)
if removed:
print(f"index-append: removed [[{link}]] ({removed} line(s))")
else:
# Idempotent: the goal state (entry absent) already holds.
print(f"index-append: [[{link}]] not present, nothing to remove")
return kept
def do_append(lines, section, entry, today):
bump_last_updated(lines, today)
# Locate the target section [start, end)
start = None
for i, ln in enumerate(lines):
if HEADER_RE.match(ln) and ln[3:].startswith(section):
start = i
break
if start is None:
print(f"index-append: section '{section}' not found", file=sys.stderr)
return None
end = len(lines)
for i in range(start + 1, len(lines)):
if HEADER_RE.match(lines[i]):
end = i
break
body = lines[start + 1:end]
intro = [ln for ln in body if not ENTRY_RE.match(ln)]
entries = [ln for ln in body if ENTRY_RE.match(ln)]
new_m = LINK_RE.match(entry)
new_link = new_m.group(1) if new_m else None
if new_link is not None:
replaced = False
for idx, ln in enumerate(entries):
m = LINK_RE.match(ln)
if m and m.group(1) == new_link:
if ln == entry:
print("index-append: entry already present, skipping")
return lines
entries[idx] = entry
replaced = True
break
if not replaced:
entries.append(entry)
else:
if entry in entries:
print("index-append: entry already present, skipping")
return lines
entries.append(entry)
entries.sort(key=str.casefold)
while intro and intro[-1].strip() == "":
intro.pop()
new_section = intro + [""] + entries + [""]
print(f"index-append: added to {section}")
return lines[:start + 1] + new_section + lines[end:]
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--section", help="Section name (required with --entry)")
ap.add_argument("--entry", help="Full index line to insert")
ap.add_argument("--remove", metavar="WIKILINK",
help="Remove the entry with this wikilink, e.g. sources/foo")
ap.add_argument("--file", default="wiki/index.md")
args = ap.parse_args()
if bool(args.remove) == bool(args.entry):
print("index-append: provide exactly one of --entry or --remove", file=sys.stderr)
return 2
if args.entry and not args.section:
print("index-append: --entry requires --section", file=sys.stderr)
return 2
try:
with open(args.file, encoding="utf-8") as fh:
lines = fh.read().splitlines()
except FileNotFoundError:
print(f"index-append: not found: {args.file}", file=sys.stderr)
return 1
today = datetime.date.today().isoformat()
if args.remove:
out = do_remove(lines, args.remove, today)
else:
out = do_append(lines, args.section, args.entry, today)
if out is None:
return 1
with open(args.file, "w", encoding="utf-8") as fh:
fh.write("\n".join(out) + "\n")
return 0
if __name__ == "__main__":
sys.exit(main())

View file

@ -1,374 +0,0 @@
#!/usr/bin/env python3
# =============================================================================
# skills/ingest/scripts/ingest-semantic.py
# Phase 1 (semantic) of the Knowledge Genome ingest — light agent + deterministic conform.
#
# - FIXED: Add 'title:' field to frontmatter (lint was complaining about missing title)
# - NEW: Inject existing index (entity/concept names) into prompt to prevent duplicates
# - NEW: Richer prompt asking for 2-4 sentences per description (not 1-2), with concrete details
# - Enhanced schema to handle longer descriptions naturally
#
# The model does ONLY semantic extraction and returns ONE schema-constrained JSON
# object (no tools, no file writing, no git, no frontmatter, no slugs). This script
# then CONFORMS that output deterministically into wiki pages with enforced
# frontmatter + kebab-case paths, and writes a .ingest-manifest.json in EXACTLY the
# schema run-ingest.sh expects.
#
# cd <genome checkout>
# ingest-semantic.py <genome> raw/articles/<file>.md # phase 1 (this)
# run-ingest.sh <genome> # phase 2 (deterministic)
#
# Emits a single JSON status line on stdout (for n8n / logs).
# =============================================================================
import json, os, hashlib, subprocess, re, sys, datetime, urllib.request, urllib.error, time
# --- config (override via env; these live in ~/.config/knowledge-genome.env) ---
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/chat")
MODEL = os.environ.get("INGEST_MODEL", "qwen2.5:14b")
NUM_CTX = int(os.environ.get("INGEST_NUM_CTX", "16384"))
TIMEOUT = int(os.environ.get("INGEST_TIMEOUT", "600"))
# INGEST_THINK: "false" disables a reasoning model's thinking trace, so models like
# gemma / qwq / qwen3 emit only the structured JSON (no truncation from long thinking).
# Unset = omit the flag entirely (correct for plain instruct models such as qwen2.5).
THINK = os.environ.get("INGEST_THINK")
TODAY = datetime.date.today().isoformat()
FEEDBACK = os.environ.get("INGEST_FEEDBACK", "").strip()
def die(stage, reason):
print(json.dumps({"status": "error", "stage": stage, "reason": reason}))
sys.exit(1)
# --- args + pre-flight (mirror the old skill's guards, enforced in code) ---
if len(sys.argv) < 3:
die("args", "usage: ingest-semantic.py <genome> <raw/rel/path.md>")
genome = sys.argv[1]
raw_rel = sys.argv[2].lstrip("./")
if "private/" in raw_rel or raw_rel.startswith("private"):
die("preflight", "refusing private source: " + raw_rel)
if os.environ.get("PRIVATE_CONTEXT", "disabled") != "disabled":
die("preflight", "PRIVATE_CONTEXT must be disabled")
if not raw_rel.startswith("raw/"):
die("preflight", "source must live under raw/: " + raw_rel)
if not os.path.isfile(raw_rel):
die("preflight", "source not found in cwd: " + raw_rel)
with open(raw_rel, "r", encoding="utf-8") as fh:
source_text = fh.read()
if not source_text.strip():
die("preflight", "source is empty: " + raw_rel)
# --- pre-flight check: if the prompt exceeds context window, exit cleanly with stage:input ---
# Conservative estimate: ~4 chars/token for mixed IT/EN text
SAFETY_MARGIN = 4096 # room for system prompt + JSON response
MAX_SOURCE_TOKENS = NUM_CTX - SAFETY_MARGIN
MAX_SOURCE_CHARS = MAX_SOURCE_TOKENS * 4
if len(source_text) > MAX_SOURCE_CHARS:
die("input", f"source too large ({len(source_text)} chars, limit ~{MAX_SOURCE_CHARS}). "
f"Use the SPLIT directive or divide the document.")
# --- read existing index to avoid duplicate slugs ---
existing_entities = set()
existing_concepts = set()
if os.path.isfile("wiki/index.md"):
try:
with open("wiki/index.md", "r", encoding="utf-8") as f:
idx_text = f.read()
# extract slugs from [[entities/slug]] and [[concepts/slug]] patterns
for m in re.finditer(r"\[\[entities/([a-z0-9\-]+)\]\]", idx_text):
existing_entities.add(m.group(1))
for m in re.finditer(r"\[\[concepts/([a-z0-9\-]+)\]\]", idx_text):
existing_concepts.add(m.group(1))
except Exception:
pass # index not readable or not found; that's OK
def slugify(s):
s = re.sub(r"[^a-z0-9]+", "-", (s or "").strip().lower())
return re.sub(r"-+", "-", s).strip("-") or "untitled"
def twords(s, n=20):
"""Truncate at n words; used for index entry summaries."""
s = " ".join((s or "").split())
w = s.split(" ")
return s if len(w) <= n else " ".join(w[:n]) + ""
def yaml_dq(s):
"""Render a value as a YAML double-quoted scalar.
Titles can contain characters that break a bare scalar most commonly a
colon-space ('Conflict: X' would parse as a mapping), but also '#', leading
'-'/'?', quotes, etc. Double-quoting and escaping '\\' and '"' makes any
title valid YAML (and keeps Obsidian/Dataview/qmd happy). Newlines are
collapsed to spaces so the scalar stays on one line.
"""
s = " ".join((s or "").split())
s = s.replace("\\", "\\\\").replace('"', '\\"')
return f'"{s}"'
def frontmatter(ptype, title, tags):
"""Return YAML frontmatter with title field."""
taglist = "[" + ", ".join(sorted(set(t for t in tags if t))) + "]"
return ("---\n"
f"title: {yaml_dq(title)}\n"
f"type: {ptype}\n"
f"domain: {genome}\n"
"maturity: draft\n"
f"last_updated: {TODAY}\n"
"private: false\n"
f"tags: {taglist}\n"
"---\n")
def write_new(path, ptype, title, body, tags):
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, "w", encoding="utf-8") as f:
f.write(frontmatter(ptype, title, tags))
f.write(f"\n# {title}\n\n{body}\n")
def append_section(path, source_slug, body):
# never overwrite an existing page: accumulate, attributed to the new source
with open(path, "a", encoding="utf-8") as f:
f.write(f"\n\n## From [[sources/{source_slug}]]\n\n{body}\n")
try: # best-effort bump of last_updated in the existing frontmatter
with open(path, "r", encoding="utf-8") as f:
txt = f.read()
txt = re.sub(r"(?m)^last_updated:.*$", "last_updated: " + TODAY, txt, count=1)
with open(path, "w", encoding="utf-8") as f:
f.write(txt)
except Exception:
pass
# --- the semantic contract ---
SYSTEM_PROMPT = """You perform the SEMANTIC PASS of a single source into a knowledge wiki.
Read the source and return ONLY structured data describing what it contains.
You do not write files, you do not produce frontmatter, and you do not invent
paths, slugs, branches, commits or PRs a deterministic script does all of that.
Rules:
- source_summary: a faithful, self-contained summary of the source, in the
source's own language. Plain prose, NO markdown headings. 2-4 sentences,
with concrete details. Preserve the essence and nuance of the source.
- key_points: 3-5 concrete facts or claims worth indexing; no padding.
- entities: every person, tool, organisation or product the source names.
kind is one of person|tool|org|product. description is 2-3 factual sentences
with specifics. No markdown headings inside the description.
- concepts: every pattern, theory, decision or named idea the source explains.
description is 2-3 factual sentences with concrete examples or context.
- contradictions: ONLY when the source makes a claim that directly contradicts a
widely-known fact or contradicts itself. Otherwise return an empty list.
- Names must be the natural name of the thing; the script will normalise them.
If the source references an entity or concept already in the wiki (see the list below),
use the EXACT name already present; do not invent a variant. This prevents duplicates.
Existing entities in this genome:
{existing_entities}
Existing concepts in this genome:
{existing_concepts}
Be faithful to the source. Be specific. Do not pad or improvise."""
# --- JSON schema -> constrained decoding (Ollama structured outputs) ---
SCHEMA = {
"type": "object",
"properties": {
"source_title": {"type": "string"},
"source_summary": {"type": "string"},
"key_points": {"type": "array", "items": {"type": "string"}},
"entities": {"type": "array", "items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"kind": {"type": "string",
"enum": ["person", "tool", "org", "product"]},
"description": {"type": "string"},
},
"required": ["name", "description"],
}},
"concepts": {"type": "array", "items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"description": {"type": "string"},
},
"required": ["name", "description"],
}},
"contradictions": {"type": "array", "items": {
"type": "object",
"properties": {
"concept": {"type": "string"},
"description": {"type": "string"},
},
"required": ["concept", "description"],
}},
"reasoning": {"type": "string"},
"pr_summary": {"type": "string"},
},
"required": ["source_title", "source_summary", "entities", "concepts"],
}
def call_model(max_retries=2, base_delay=2.0):
"""Call Ollama with retry on transient errors (connection, timeout, malformed JSON).
Retries up to max_retries times with exponential backoff. Does NOT retry on
content errors (schema violations, empty response) those are model issues."""
existing_ents = ", ".join(sorted(existing_entities)) or "(none yet)"
existing_conc = ", ".join(sorted(existing_concepts)) or "(none yet)"
prompt = SYSTEM_PROMPT.format(existing_entities=existing_ents, existing_concepts=existing_conc)
user_content = (
("REVISION REQUESTED BY THE MAINTAINER (address this explicitly):\n"
+ FEEDBACK + "\n\n") if FEEDBACK else ""
) + (
"Source path: " + raw_rel + "\n\n--- SOURCE START ---\n"
+ source_text + "\n--- SOURCE END ---\n\nReturn the JSON now."
)
payload = {
"model": MODEL,
"messages": [
{"role": "system", "content": prompt},
{"role": "user", "content": user_content },
],
"format": SCHEMA,
"stream": False,
"options": {"temperature": 0.2, "repeat_penalty": 1.0, "num_ctx": NUM_CTX},
}
if THINK is not None:
payload["think"] = THINK.strip().lower() in ("1", "true", "yes", "on")
data = json.dumps(payload).encode("utf-8")
last_error = None
for attempt in range(max_retries + 1):
if attempt > 0:
delay = base_delay * (2 ** (attempt - 1))
print(f"call_model: retry {attempt}/{max_retries} after {delay}s: {last_error}", file=sys.stderr)
time.sleep(delay)
req = urllib.request.Request(OLLAMA_URL, data=data, headers={"Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
resp = json.loads(r.read().decode("utf-8"))
except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError) as e:
last_error = f"connection/transport error: {e}"; continue
except json.JSONDecodeError as e:
last_error = f"invalid JSON from Ollama API: {e}"; continue
content = ((resp.get("message") or {}).get("content") or "").strip()
if content.startswith("```"):
content = content.strip("`")
brace = content.find("{")
if brace >= 0:
content = content[brace:]
try:
return json.loads(content)
except json.JSONDecodeError as e:
last_error = f"model did not return valid JSON: {e}"
if len(content) < 10:
continue # likely truncated -> retry
break # long but malformed -> model issue, stop
die("model", last_error or "model call failed after retries")
# --- run the semantic pass ---
sem = call_model()
# Source of truth: slug from slug.sh --raw (deterministic, path-aware, collision-proof)
source_slug = subprocess.check_output(
["bash", os.path.join(os.path.dirname(__file__), "slug.sh"), "--raw", raw_rel],
text=True
).strip()
with open(raw_rel, "rb") as f:
src_sha = hashlib.sha256(f.read()).hexdigest()
pages = []
# 1. source page — canonical summary of THIS source (re)written
src_path = f"wiki/sources/{source_slug}.md"
src_status = "modified" if os.path.exists(src_path) else "created"
kp_lines = "\n".join("- " + p for p in (sem.get("key_points") or []) if p.strip())
src_body = (sem.get("source_summary") or "").strip()
if kp_lines:
src_body += "\n\n## Key points\n\n" + kp_lines
src_body += f"\n\n## Source\n\n- [[{raw_rel}]]\n"
src_title = sem.get('source_title') or source_slug
src_tags = ([slugify(e.get("name", "")) for e in sem.get("entities", [])]
+ [slugify(c.get("name", "")) for c in sem.get("concepts", [])])[:8]
os.makedirs("wiki/sources", exist_ok=True)
with open(src_path, "w", encoding="utf-8") as f:
fm = frontmatter("source", src_title, src_tags)
# Inject tracking fields before the closing '---' (first newline-dash-dash-dash-newline)
fm = fm.replace("\n---\n", f"\nsource_path: {raw_rel}\nsource_sha256: {src_sha}\n---\n", 1)
f.write(fm)
f.write(f"\n# {src_title}\n\n{src_body}\n")
pages.append({"path": src_path,
"summary": twords(src_title),
"maturity": "draft", "status": src_status})
def handle(kind_dir, ptype, items):
for it in items or []:
name = (it.get("name") or "").strip()
if not name:
continue
slug = slugify(name)
path = f"wiki/{kind_dir}/{slug}.md"
desc = (it.get("description") or "").strip()
if os.path.exists(path):
append_section(path, source_slug, desc)
pages.append({"path": path, "summary": twords(desc), "status": "modified"})
else:
body = desc + f"\n\n## Sources\n\n- [[sources/{source_slug}]]\n"
write_new(path, ptype, name, body, [genome, ptype])
pages.append({"path": path, "summary": twords(desc),
"maturity": "draft", "status": "created"})
# 2. entities, 3. concepts
handle("entities", "entity", sem.get("entities", []))
handle("concepts", "concept", sem.get("concepts", []))
# 4. contradictions -> conflict pages (run-ingest routes wiki/queries/conflict-*)
conflicts = sem.get("contradictions") or []
conf_slugs = []
for c in conflicts:
cslug = slugify(c.get("concept", "unknown"))
conf_slugs.append(cslug)
path = f"wiki/queries/conflict-{cslug}-{TODAY}.md"
write_new(path, "query", f"Conflict: {c.get('concept', '')}",
(c.get("description") or "").strip()
+ f"\n\n## Source\n\n- [[sources/{source_slug}]]\n",
[genome, "conflict"])
pages.append({"path": path, "summary": "", "maturity": "draft",
"status": "created"})
contradictions_str = ("None" if not conflicts
else f"{len(conflicts)} conflict file(s) created — "
+ ", ".join(conf_slugs))
# --- write the manifest in EXACTLY run-ingest.sh's schema ---
manifest = {
"raw_source": raw_rel,
"reasoning": sem.get("reasoning") or ("Ingest of " + raw_rel),
"pr_summary": sem.get("pr_summary") or ("Semantic ingest of " + raw_rel),
"contradictions": contradictions_str,
"pages": pages,
}
with open(".ingest-manifest.json", "w", encoding="utf-8") as f:
json.dump(manifest, f, indent=2, ensure_ascii=False)
print(json.dumps({"status": "ok", "stage": "semantic",
"pages": len(pages), "model": MODEL,
"manifest": ".ingest-manifest.json"}))

View file

@ -1,57 +0,0 @@
#!/usr/bin/env bash
# =============================================================================
# skills/ingest/scripts/log-append.sh
# Append one entry to the append-only ledger wiki/log.md, in the exact format
# defined by AGENTS.md / wiki-log.md. Generates run_id. Never edits prior entries.
#
# log-append.sh --type INGEST --subject "<slug>" --model "<model>" \
# --context "[[raw/x]]" --output "[[sources/x]]" \
# --reasoning "One sentence."
# =============================================================================
set -euo pipefail
LOG_FILE="${LOG_FILE:-wiki/log.md}"
type="" subject="" model="" context="" output="" reasoning=""
while [[ $# -gt 0 ]]; do
case "$1" in
--type) type="$2"; shift 2 ;;
--subject) subject="$2"; shift 2 ;;
--model) model="$2"; shift 2 ;;
--context) context="$2"; shift 2 ;;
--output) output="$2"; shift 2 ;;
--reasoning) reasoning="$2"; shift 2 ;;
--run-id) run_id_arg="$2"; shift 2 ;;
*) echo "log-append: unknown arg: $1" >&2; exit 1 ;;
esac
done
: "${type:?--type required}"
: "${subject:?--subject required}"
case "$type" in
INGEST|LINT|QUERY|CONFLICT|CONFIG|SECURITY) ;;
*) echo "log-append: invalid TYPE '${type}'" >&2; exit 1 ;;
esac
[[ -f "$LOG_FILE" ]] || { echo "log-append: not found: $LOG_FILE" >&2; exit 1; }
run_id="${run_id_arg:-$(uuidgen 2>/dev/null || cat /proc/sys/kernel/random/uuid 2>/dev/null || python3 -c 'import uuid; print(uuid.uuid4())')}"
today="$(date +%Y-%m-%d)"
if grep -qF "run_id: \`${run_id}\`" "$LOG_FILE" 2>/dev/null; then
echo "log-append: run_id ${run_id} already present — skipping (idempotent)" >&2
echo "run_id=${run_id}"
exit 0
fi
{
printf '\n## [%s] %s | %s\n\n' "$today" "$type" "$subject"
printf -- '- run_id: `%s`\n' "$run_id"
printf -- '- model: `%s`\n' "${model:-unknown}"
printf -- '- context_read: %s\n' "${context:-*(none)*}"
printf -- '- output_written: %s\n' "${output:-*(none)*}"
printf -- '- reasoning: %s\n' "${reasoning:-No reasoning provided.}"
} >> "$LOG_FILE"
echo "run_id=${run_id}"

View file

@ -1,129 +0,0 @@
#!/usr/bin/env bash
# =============================================================================
# skills/ingest/scripts/open-pr.sh
# Branch, commit (conventional), push, and open a Forgejo PR for the wiki/ changes.
# Mirrors the API conventions of providers/forgejo.sh (token auth + http_code).
# Runs inside the genome checkout (cwd = genome root). Never touches main.
#
# open-pr.sh --slug <slug> --title "feat: ingest <slug>" --body-file <path> \
# [--base main] [--label CONFLICT]
#
# Requires env: FORGEJO_URL, FORGEJO_USER, FORGEJO_TOKEN.
# =============================================================================
set -euo pipefail
: "${FORGEJO_URL:?missing FORGEJO_URL}"
: "${FORGEJO_USER:?missing FORGEJO_USER}"
: "${FORGEJO_TOKEN:?missing FORGEJO_TOKEN}"
slug="" title="" body_file="" base="main" label="" branch=""
while [[ $# -gt 0 ]]; do
case "$1" in
--slug) slug="$2"; shift 2 ;;
--branch) branch="$2"; shift 2 ;;
--title) title="$2"; shift 2 ;;
--body-file) body_file="$2"; shift 2 ;;
--base) base="$2"; shift 2 ;;
--label) label="$2"; shift 2 ;;
*) echo "open-pr: unknown arg: $1" >&2; exit 1 ;;
esac
done
: "${title:?--title required}"
: "${body_file:?--body-file required}"
[[ -f "$body_file" ]] || { echo "open-pr: body file not found: $body_file" >&2; exit 1; }
# --branch overrides the default; otherwise derive the ingest branch from --slug.
# (run-prune passes its own chore/prune-orphans-* branch; run-ingest passes --slug.)
if [[ -z "$branch" ]]; then
: "${slug:?--slug or --branch required}"
branch="feat/ai-ingest-${slug}"
fi
repo="$(basename -s .git "$(git config --get remote.origin.url)")"
# 1. Branch + commit + push (AGENTS.md rule 5: never commit to main)
# Rolling PR: -C force-resets the branch label to the current base (we are on it after
# clean_start) and CARRIES the freshly-written wiki/ changes, so a re-ingest of the same
# source rebuilds the branch cleanly instead of hitting a dirty-switch refusal.
git switch -C "$branch"
git add wiki/
# Scope BOTH the emptiness check and the commit to wiki/ — never commit anything that
# happened to be staged outside wiki/ (a stray hook, an aborted prior run, etc.).
if git diff --cached --quiet -- wiki/; then
echo "open-pr: nothing staged under wiki/ — aborting" >&2
exit 1
fi
git commit -m "$title" -- wiki/
# Try a normal push (new branch / fast-forward). If the branch was rebuilt from base and
# diverged, force-with-lease updates the open PR in place — the lease refuses to clobber if
# origin moved unexpectedly since our fetch, so concurrent work is never lost.
git push -u origin "$branch" 2>/dev/null || git push -u --force-with-lease origin "$branch"
# DRY_RUN: local git work done; skip the Forgejo API (offline tests).
if [[ -n "${DRY_RUN:-}" ]]; then
echo "PR opened: DRY-RUN ${branch} -> ${base}"
exit 0
fi
# 2. Open the PR via Forgejo API (jq builds the JSON safely)
# TODO: Forgejo-only. When registry.sh/globals.env sets PROVIDER=github, branch on
# $PROVIDER here and delegate to providers/github.sh (same token + http_code contract).
body="$(cat "$body_file")"
payload="$(jq -n --arg head "$branch" --arg base "$base" \
--arg title "$title" --arg body "$body" \
'{head:$head, base:$base, title:$title, body:$body}')"
resp="$(curl --max-time 30 -s -w '\n%{http_code}' \
-H "Authorization: token ${FORGEJO_TOKEN}" \
-H "Content-Type: application/json" \
-X POST "${FORGEJO_URL}/api/v1/repos/${FORGEJO_USER}/${repo}/pulls" \
-d "$payload")"
# curl -w appends '\n<code>' AFTER the body, so the code is always the final line and the
# body is everything before it. Parameter expansion (no subshells), robust to multi-line JSON.
code="${resp##*$'\n'}"
json="${resp%$'\n'*}"
case "$code" in
201)
url="$(printf '%s' "$json" | jq -r '.html_url')"
number="$(printf '%s' "$json" | jq -r '.number')"
echo "PR opened: ${url}"
;;
409)
# PR already exists — fetch it so the orchestrator still gets the URL.
existing="$(curl --max-time 15 -s -H "Authorization: token ${FORGEJO_TOKEN}" \
"${FORGEJO_URL}/api/v1/repos/${FORGEJO_USER}/${repo}/pulls?state=open" \
| jq -r --arg b "$branch" '.[] | select(.head.ref==$b) | .html_url' | head -n1)"
if [[ -n "$existing" && "$existing" != "null" ]]; then
echo "PR opened: ${existing}"
else
echo "open-pr: a PR for '${branch}' already exists (push updated the branch)." >&2
fi
exit 0
;;
401)
echo "open-pr: unauthorized — check FORGEJO_TOKEN (n8n-bot)." >&2
exit 1
;;
*)
echo "open-pr: Forgejo API HTTP ${code}: ${json}" >&2
exit 1
;;
esac
# 3. Optional label (e.g. CONFLICT). Best-effort; non-fatal.
if [[ -n "$label" && -n "${number:-}" ]]; then
label_id="$(curl --max-time 15 -s -H "Authorization: token ${FORGEJO_TOKEN}" \
"${FORGEJO_URL}/api/v1/repos/${FORGEJO_USER}/${repo}/labels" \
| jq -r --arg n "$label" '.[] | select(.name==$n) | .id' | head -n1)"
if [[ -n "$label_id" && "$label_id" != "null" ]]; then
curl --max-time 15 -s -o /dev/null \
-H "Authorization: token ${FORGEJO_TOKEN}" -H "Content-Type: application/json" \
-X POST "${FORGEJO_URL}/api/v1/repos/${FORGEJO_USER}/${repo}/issues/${number}/labels" \
-d "{\"labels\":[${label_id}]}" \
&& echo "label '${label}' applied" >&2
else
echo "open-pr: label '${label}' not found in repo — skipped." >&2
fi
fi

View file

@ -1,35 +0,0 @@
#!/usr/bin/env bash
# =============================================================================
# orphan-wiki.sh — find source pages whose raw source no longer exists.
# Reads source_path from each wiki/sources/*.md frontmatter. If the raw is gone,
# the page is orphaned. Emits JSON envelope: {status, genome, count, files[], detail[]}.
# Read-only: no lock needed (same policy as pending-raw).
# =============================================================================
set -euo pipefail
genome="${1:?usage: orphan-wiki.sh <genome>}"
base_dir="${GENOMES_ROOT:-${HOME}/genomes}"
cd "${base_dir}/${genome}" 2>/dev/null || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
# Clean start on the configured base (single source of truth in lib/clean-start.sh).
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
declare -a ORPH=()
for page in wiki/sources/*.md; do
[[ -e "$page" ]] || continue
sp="$(sed -n 's/^source_path:[[:space:]]*//p' "$page" | tr -d '\r' | head -n1)"
# Pages without source_path are pre-Step-2 legacy: ignore, don't false-positive.
[[ -n "$sp" ]] || continue
[[ -f "$sp" ]] || ORPH+=("$page")
done
if [[ ${#ORPH[@]} -eq 0 ]]; then
echo '{"status":"ok","genome":"'"$genome"'","count":0,"files":[],"detail":[]}'
else
for x in "${ORPH[@]}"; do printf '%s\torphan\n' "$x"; done \
| jq -R 'split("\t") | {path: .[0], reason: .[1]}' \
| jq -s --arg g "$genome" '{status:"ok", genome:$g, count:length, files:[.[].path], detail:.}'
fi

View file

@ -1,64 +0,0 @@
#!/usr/bin/env bash
# =============================================================================
# pending-raw.sh — deterministic "what needs ingesting" calculator.
# Reads the clean base checkout and classifies each raw/articles/*.md as:
# new -> no wiki/sources/<slug>.md
# modified -> page exists but its source_sha256 != current file hash
# Emits the same JSON envelope as changed-raw (drop-in), plus detail[] for ntfy.
# =============================================================================
set -euo pipefail
genome="${1:?usage: pending-raw.sh <genome>}"
base_dir="${GENOMES_ROOT:-${HOME}/genomes}"
cd "${base_dir}/${genome}" 2>/dev/null || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
# Clean start on the configured base (single source of truth in lib/clean-start.sh).
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SLUG="${SCRIPT_DIR}/slug.sh"
declare -a NEW=()
declare -a MOD=()
declare -A SEEN_SLUG=()
if [[ -d raw/articles ]]; then
while IFS= read -r -d '' f; do
rel="${f#./}"
case "$rel" in
*/.stfolder/*|*/.stignore|*/.gitkeep) continue ;;
esac
slug="$("$SLUG" --raw "$rel")" || continue
# Residual collision (two distinct raws -> same slug): warn, do not silence.
if [[ -n "${SEEN_SLUG[$slug]:-}" && "${SEEN_SLUG[$slug]}" != "$rel" ]]; then
logger -t pending-raw "warn: slug collision ${slug}: ${SEEN_SLUG[$slug]} <-> ${rel}"
fi
SEEN_SLUG[$slug]="$rel"
page="wiki/sources/${slug}.md"
if [[ ! -f "$page" ]]; then
NEW+=("$rel")
else
cur="$(sha256sum "$rel" | cut -d' ' -f1)"
rec="$(sed -n 's/^source_sha256:[[:space:]]*//p' "$page" | tr -d '\r' | head -n1)"
if [[ "$cur" != "$rec" ]]; then
MOD+=("$rel")
fi
fi
done < <(find raw/articles -type f -name '*.md' -print0 2>/dev/null)
fi
if [[ ${#NEW[@]} -eq 0 && ${#MOD[@]} -eq 0 ]]; then
echo '{"status":"ok","genome":"'"$genome"'","count":0,"files":[],"detail":[]}'
else
{
for x in "${NEW[@]}"; do printf '%s\tnew\n' "$x"; done
for x in "${MOD[@]}"; do printf '%s\tmodified\n' "$x"; done
} | jq -R 'split("\t") | {path: .[0], reason: .[1]}' \
| jq -s --arg g "$genome" \
'{status: "ok", genome: $g, count: length, files: [.[].path], detail: .}'
fi

View file

@ -1,174 +0,0 @@
#!/usr/bin/env bash
# =============================================================================
# skills/ingest/scripts/run-ingest.sh
# Post-semantic orchestrator. Runs OUTSIDE the model, on vm101, in the genome
# checkout. Consumes .ingest-manifest.json (written by ingest-semantic.py) and
# performs every deterministic step — index, log, scoped lint, PR.
#
# run-ingest.sh <genome_name> [manifest_path]
#
# Emits a single JSON result line on stdout for n8n to parse.
#
# every page listed in the manifest must exist on disk before we trust the run.
# Everything else is unchanged: the manifest the semantic phase now produces is
# already in this script's expected schema.
# =============================================================================
set -euo pipefail
genome="${1:?usage: run-ingest.sh <genome> [manifest]}"
manifest="${2:-.ingest-manifest.json}"
SCRIPTS="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
fail() {
jq -nc --arg stage "$1" --arg reason "$2" \
'{status:"error", stage:$stage, reason:$reason}'
exit 1
}
command -v jq >/dev/null 2>&1 || { echo '{"status":"error","reason":"jq missing"}'; exit 1; }
command -v python3 >/dev/null 2>&1 || fail "deps" "python3 missing (needed by index-append.py)"
[[ -f "$manifest" ]] || fail "manifest" "manifest not found: ${manifest}"
# --- validate the manifest BEFORE trusting any field (LLM output is stochastic) ---
# 1) well-formed JSON object with a string raw_source and an array of pages
jq -e 'type=="object" and (.raw_source|type=="string") and (.pages|type=="array")' \
"$manifest" >/dev/null 2>&1 \
|| fail "manifest" "invalid manifest: need object with string raw_source and array pages"
# 2) every page.path must be a string, live under wiki/, and contain no '..' (no traversal)
if jq -e '[.pages[].path
| select((type!="string") or (startswith("wiki/")|not) or contains(".."))]
| length > 0' "$manifest" >/dev/null 2>&1; then
fail "manifest" "unsafe page path (must be a string under wiki/, no '..')"
fi
# --- read manifest scalars ---
raw_source="$(jq -r '.raw_source' "$manifest")"
# model name comes from the orchestrator/wrapper (INGEST_MODEL); the agent cannot know its
# own tag, so we do not trust a self-reported manifest field. Fall back only if unset.
model="${INGEST_MODEL:-$(jq -r '.model // "unknown"' "$manifest")}"
reasoning="$(jq -r '.reasoning // "Ingest."' "$manifest")"
pr_summary="$(jq -r '.pr_summary // "Ingest."' "$manifest")"
contradictions="$(jq -r '.contradictions // "None"' "$manifest")"
[[ -n "$raw_source" && "$raw_source" != "null" ]] || fail "manifest" "raw_source missing"
slug="$(bash "${SCRIPTS}/slug.sh" --raw "$raw_source")" || fail "slug" "empty or invalid slug for ${raw_source}"
# --- collect touched paths ---
mapfile -t created_paths < <(jq -r '.pages[] | select(.status=="created") | .path' "$manifest")
mapfile -t modified_paths < <(jq -r '.pages[] | select(.status=="modified") | .path' "$manifest")
all_paths=( "${created_paths[@]}" "${modified_paths[@]}" )
[[ ${#all_paths[@]} -gt 0 ]] || fail "manifest" "no pages reported"
# --- the semantic phase (ingest-semantic.py) writes the files; verify
# every manifest page actually exists on disk before trusting the run. Catches any
# drift between what the manifest claims and what was really written. ---
for _p in "${all_paths[@]}"; do
[[ -f "$_p" ]] || fail "pages" "manifest lists a file not present on disk: ${_p}"
done
conflict_label=""
# NOTE: No rollback. The steps below modify the working tree in order (index → log → commit).
# All steps are idempotent on re-run EXCEPT log-append (append-only). If a step fails midway,
# nothing is committed (open-pr is the only committer) — the operator re-runs, or checks
# wiki/ if log-append has already written a line. The manifest is removed only upon full success.
# log-append is not idempotent: a re-run after a post-log failure produces
# duplicate lines. This is accepted by design (append-only ledger, no rollback). If this
# becomes a nuisance tomorrow, add a dedup check on run_id in log-append.sh
# (grep for run_id before appending). Manual recovery: grep for run_id in wiki/log.md.
# --- 1. index entries (created pages only), inserted in order ---
while IFS=$'\t' read -r path summary maturity; do
[[ -z "$path" ]] && continue
link="${path#wiki/}"; link="${link%.md}" # e.g. sources/foo
folder="${link%%/*}"
case "$folder" in
sources) section="Sources" ;;
entities) section="Entities" ;;
concepts) section="Concepts" ;;
queries)
if [[ "$link" == queries/conflict-* ]]; then section="Conflicts"; conflict_label="CONFLICT"
else section="Queries"; fi ;;
# private/ is not routed here — ingest is public-only. Add when private ingest is built.
*) section="Sources" ;;
esac
if [[ "$section" == "Conflicts" ]]; then
entry="- [[${link}]]" # conflicts: slug only
else
entry="- [[${link}]] — ${summary} \`maturity: ${maturity}\`"
fi
python3 "${SCRIPTS}/index-append.py" --section "$section" --entry "$entry" \
|| fail "index" "index-append failed for ${path}"
done < <(jq -r '.pages[] | select(.status=="created")
| [.path, (.summary // ""), (.maturity // "draft")] | @tsv' "$manifest")
# --- 2. log entry ---
# Stable run_id: deterministic from the input (raw path + content hash). Survives wrapper
# re-runs and makes the append-only log idempotent (paired with the guard in log-append.sh).
src_sha="$(sha256sum "$raw_source" 2>/dev/null | cut -d' ' -f1)" || src_sha="unknown"
run_id="$(printf '%s' "${raw_source}:${src_sha}" | sha256sum | cut -c1-16)"
out="$(jq -r '[.pages[].path | "[[" + (sub("^wiki/";"") | sub("\\.md$";"")) + "]]"] | join(", ")' "$manifest")"
bash "${SCRIPTS}/log-append.sh" --run-id "$run_id" --type INGEST --subject "$slug" --model "$model" \
--context "[[${raw_source}]]" --output "${out:-*(none)*}" --reasoning "$reasoning" \
|| fail "log" "log-append failed"
# --- 3. scoped linter (capture findings for the PR; never aborts the run) ---
# Point scoped-lint at the same manifest we were handed so its duplicate
# advisory reads the right file even when a non-default path arrives as $2.
# (The dedup check lives inside lib/lint.sh and is invoked by scoped-lint —
# there is no separate check-duplicates.sh script.)
export INGEST_MANIFEST="$manifest"
lint_out="$(
bash "${SCRIPTS}/scoped-lint.sh" "$genome" "${all_paths[@]}" 2>&1
)" && lint_rc=0 || lint_rc=$?
# --- 4. assemble the PR body (manifest tables + lint results) ---
body="$(mktemp)"
trap 'rm -f "$body"' EXIT # auto-clean on any exit (success, fail(), or crash)
{
echo "<!-- kg:raw=${raw_source} -->" # marker for the rejection loop (invisible in the render)
echo "## Summary"
echo "$pr_summary"
echo ""
echo "## Pages"
echo "| Path | Status | Maturity |"
echo "|------|--------|----------|"
jq -r '.pages[] | "| `\(.path)` | \(.status) | \(.maturity // "draft") |"' "$manifest"
echo ""
echo "## Contradictions"
echo "$contradictions"
echo ""
echo "## Scoped Lint (post-ingest)"
echo '```'
echo "$lint_out"
echo '```'
} > "$body"
# --- 5. open the PR ---
pr_args=( --slug "$slug" --title "feat: ingest ${slug}" --body-file "$body" --base "${INGEST_BASE:-main}" )
[[ -n "$conflict_label" ]] && pr_args+=( --label "$conflict_label" )
pr_out="$( bash "${SCRIPTS}/open-pr.sh" "${pr_args[@]}" 2>&1 )" && pr_rc=0 || pr_rc=$?
pr_url="$(printf '%s\n' "$pr_out" | sed -n 's/^PR opened: //p' | head -n1)"
# --- final result line for n8n ---
jq -nc \
--arg status "$([[ $pr_rc -eq 0 ]] && echo ok || echo pr_failed)" \
--arg slug "$slug" \
--arg pr_url "$pr_url" \
--argjson lint_clean "$([[ $lint_rc -eq 0 ]] && echo true || echo false)" \
--argjson conflict "$([[ -n "$conflict_label" ]] && echo true || echo false)" \
--arg detail "$pr_out" \
'{status:$status, slug:$slug, pr_url:$pr_url, lint_clean:$lint_clean, conflict:$conflict, detail:$detail}'
# The manifest is a single file that is overwritten with each run, but if the process is
# completely successful, we remove it to prevent an outdated manifest from being reprocessed by mistake.
if [[ $pr_rc -eq 0 ]]; then
rm -f "$manifest"
else
exit 1
fi

View file

@ -1,96 +0,0 @@
#!/usr/bin/env bash
# =============================================================================
# skills/ingest/scripts/run-prune.sh
# Symmetric companion to run-ingest: prune source pages whose raw source no
# longer exists. RE-DERIVES the orphan set itself (mirrors orphan-wiki.sh) — it
# never trusts a list handed in by n8n, so there is no "detected-vs-pruned"
# race. Removes ONLY the pages it derived plus their index entries, commits
# ONLY wiki/ on chore/prune-orphans-<date>, and opens a GATED removal PR (the
# operator approves the deletion; principle 2). Never deletes of its own accord.
#
# Runs OUTSIDE the model, on vm101, cwd = genome checkout. The wrapper (`pi
# prune`) has already taken the per-genome lock and done clean_start, exactly
# like `pi ingest` — so this script does neither.
#
# run-prune.sh <genome>
#
# Emits a single JSON result line on stdout for n8n to parse.
# =============================================================================
set -euo pipefail
genome="${1:?usage: run-prune.sh <genome>}"
SCRIPTS="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
fail() {
jq -nc --arg stage "$1" --arg reason "$2" '{status:"error", stage:$stage, reason:$reason}'
exit 1
}
command -v jq >/dev/null 2>&1 || { echo '{"status":"error","reason":"jq missing"}'; exit 1; }
command -v python3 >/dev/null 2>&1 || fail "deps" "python3 missing (needed by index-append.py)"
# --- re-derive orphans (same rule as orphan-wiki.sh; computed fresh, here, now) ---
# A wiki/sources/*.md page is orphaned when its frontmatter source_path points at
# a raw file that no longer exists. Legacy pages without source_path are ignored.
declare -a ORPH=()
for page in wiki/sources/*.md; do
[[ -e "$page" ]] || continue
sp="$(sed -n 's/^source_path:[[:space:]]*//p' "$page" | tr -d '\r' | head -n1)"
[[ -n "$sp" ]] || continue
[[ -f "$sp" ]] || ORPH+=("$page")
done
if [[ ${#ORPH[@]} -eq 0 ]]; then
jq -nc '{status:"ok", count:0, pruned:[], detail:"no orphans"}'
exit 0
fi
# --- remove each orphan page + its index entry (anti-traversal, wiki/-only) ---
declare -a PRUNED=()
for page in "${ORPH[@]}"; do
case "$page" in
wiki/*) : ;;
*) fail "prune" "refusing to remove outside wiki/: ${page}" ;;
esac
case "$page" in *..*) fail "prune" "path traversal in page: ${page}" ;; esac
[[ -f "$page" ]] || continue
rm -f "$page"
link="${page#wiki/}"; link="${link%.md}" # e.g. sources/foo
python3 "${SCRIPTS}/index-append.py" --remove "$link" \
|| fail "index" "index-append --remove failed for ${link}"
PRUNED+=("$link")
done
# --- assemble the PR body ---
date_tag="$(date +%F)"
body="$(mktemp)"
trap 'rm -f "$body"' EXIT
{
echo "## Prune orphaned sources"
echo ""
echo "These source pages reference a \`source_path\` whose raw file no longer exists"
echo "in \`raw/\`. Removing them keeps the wiki in sync with git (the source of truth)."
echo ""
echo "| Removed page |"
echo "|--------------|"
for l in "${PRUNED[@]}"; do echo "| \`wiki/${l}.md\` |"; done
} > "$body"
# --- open the GATED removal PR on a chore/ branch (open-pr --branch override) ---
branch="chore/prune-orphans-${date_tag}"
pr_out="$( bash "${SCRIPTS}/open-pr.sh" \
--branch "$branch" \
--title "chore: prune ${#PRUNED[@]} orphaned source(s)" \
--body-file "$body" --base "${INGEST_BASE:-main}" 2>&1 )" && pr_rc=0 || pr_rc=$?
pr_url="$(printf '%s\n' "$pr_out" | sed -n 's/^PR opened: //p' | head -n1)"
# --- result line for n8n ---
jq -nc \
--arg status "$([[ $pr_rc -eq 0 ]] && echo ok || echo pr_failed)" \
--argjson count "${#PRUNED[@]}" \
--arg pr_url "$pr_url" \
--arg detail "$pr_out" \
--argjson pruned "$(printf '%s\n' "${PRUNED[@]}" | jq -R . | jq -s .)" \
'{status:$status, count:$count, pr_url:$pr_url, pruned:$pruned, detail:$detail}'
[[ $pr_rc -eq 0 ]] || exit 1

View file

@ -1,62 +0,0 @@
#!/usr/bin/env bash
# =============================================================================
# skills/ingest/scripts/scoped-lint.sh
# Run the framework's validation on ONLY the files touched this session.
# Reuses lib/lint.sh + lib/output.sh — same checks as `make lint`, scoped.
#
# KG_LIB_DIR=/opt/knowledge-genome-orchestrator/lib \
# scoped-lint.sh <genome_name> wiki/sources/x.md wiki/entities/y.md
#
# Exits non-zero if any hard error is found, so the agent notices.
# Findings are printed (stderr from the lint functions + a summary on stdout).
# =============================================================================
set -euo pipefail
: "${KG_LIB_DIR:?set KG_LIB_DIR to the framework lib/ dir (e.g. /opt/knowledge-genome-orchestrator/lib)}"
# Fail clearly if the lib files are missing, rather than a raw `source: No such file`.
for _f in output.sh lint.sh; do
[[ -f "${KG_LIB_DIR}/${_f}" ]] || { echo "scoped-lint: missing ${KG_LIB_DIR}/${_f}" >&2; exit 1; }
done
# shellcheck source=/dev/null
source "${KG_LIB_DIR}/output.sh"
# shellcheck source=/dev/null
source "${KG_LIB_DIR}/lint.sh"
genome="${1:?usage: scoped-lint.sh <genome> <file...>}"
shift
[[ $# -gt 0 ]] || { echo "scoped-lint: no files given" >&2; exit 1; }
errors=0
stale=0
count=$#
for f in "$@"; do
if [[ ! -f "$f" ]]; then
warn "scoped-lint: missing file (skipped): $f"
continue
fi
lint_markdown_file "$f" "$genome" && fe=0 || fe=$?
check_privacy_consistency "$f" && pce=0 || pce=$?
check_page_size "$f" && pse=0 || pse=$?
errors=$(( errors + fe + pce + pse ))
check_knowledge_decay "$f" && st=0 || st=$?
stale=$(( stale + st ))
check_broken_links "$f" || true # warnings only
done
# Cross-page duplicate advisory: runs ONCE over the whole manifest (not per
# file) — it compares this run's created slugs against the index, so repeating
# it for every file would only print the same warnings N times. Warn-only;
# never affects the exit status. INGEST_MANIFEST lets run-ingest.sh point us at
# a non-default manifest path; falls back to the conventional name otherwise.
check_duplicates "${INGEST_MANIFEST:-.ingest-manifest.json}"
echo ""
echo "scoped-lint: ${errors} error(s), ${stale} stale across ${count} file(s)"
[[ $errors -eq 0 ]]

View file

@ -1,35 +0,0 @@
#!/usr/bin/env bash
# =============================================================================
# skills/ingest/scripts/slug.sh
# Derive a wiki slug from a path, filename, or title string.
# slug.sh "raw/articles/My Source.md" -> my-source
# slug.sh "Some Concept Name" -> some-concept-name
# =============================================================================
set -euo pipefail
if [[ "${1:-}" == "--raw" ]]; then
raw="${2:?usage: slug.sh --raw <raw/bucket/rel/path>}"
rel="${raw#raw/}"; rel="${rel#*/}" # strip "raw/" and the bucket name
rel="${rel%.*}" # strip extension
slug="$(printf '%s\n' "$rel" | tr '/' '\n' \
| sed -E 's/[^a-zA-Z0-9]+/-/g; s/-{2,}/-/g; s/^-+//; s/-+$//' \
| tr '[:upper:]' '[:lower:]' | paste -sd- -)"
[[ -n "$slug" ]] || { echo "slug: empty result for input '${raw}'" >&2; exit 1; }
printf '%s\n' "$slug"
exit 0
fi
input="${1:?usage: slug.sh <path-or-title>}"
# Strip directory and extension when given a path
base="${input##*/}"
base="${base%.*}"
slug="$(printf '%s\n' "$base" \
| tr '[:upper:]' '[:lower:]' \
| sed -E 's/[^a-z0-9]+/-/g; s/-{2,}/-/g; s/^-+//; s/-+$//')"
# An all-symbols input (e.g. "!!!.md") collapses to "" — refuse rather than emit a
# broken/empty slug that would produce an invalid branch name downstream.
[[ -n "$slug" ]] || { echo "slug: empty result for input '${input}'" >&2; exit 1; }
printf '%s\n' "$slug"

View file

@ -3,7 +3,7 @@
## Identity
| Field | Value |
| ------ | -------------------------------------------------- |
|--------|-------|
| Genome | `{{GENOME_NAME}}` |
| Domain | `{{GENOME_DESC}}` |
| Owner | `{{FORGEJO_USER}}` |
@ -14,26 +14,12 @@
---
## Linked Project
| Field | Value |
| --------------- | --------------------- |
| Project repo | `{{LINKED_PROJECT}}` |
| Branch | `main` |
| Allowed tasks | `readme, tests, code` |
| Preferred model | `auto` |
If `Project repo` is `none`, this genome is knowledge-only — phase-2 project work
does not apply. When set, after a wiki PR is **merged**, the orchestrator may trigger
work on this repo within _Allowed tasks_. The agent never touches the project repo
during ingest.
## PRIVATE_CONTEXT
**Default: `disabled`** — never infer; require explicit operator declaration per session.
| State | Behavior |
| ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|-------|----------|
| `disabled` | `raw/private/` and `wiki/private/` do not exist. No read, list, grep, or summary on private paths. All outputs safe for collaborators. |
| `enabled` | Operator has confirmed `git-crypt unlock` ran on host. Read/write `private/` authorized. All outputs from private data go exclusively to `wiki/private/`. Prefix every response drawing on private data: `[PRIVATE DATA INCLUDED]`. Never leak private synthesis into public wiki paths. |
@ -47,15 +33,13 @@ Session end or return to `disabled`: remind operator to run `git-crypt lock` on
1. `raw/` is read-only. Never create, modify, or delete files in `raw/`.
2. `wiki/` is agent-owned. Create, update, and maintain all wiki pages here.
3. Every operation → one log entry appended to `wiki/log.md` (§Log) (automated via manifest during Ingest).
4. Every new page → one entry appended to `wiki/index.md` (§Index) (automated via manifest during Ingest).
3. Every operation → one log entry appended to `wiki/log.md` (§Log).
4. Every new page → one entry appended to `wiki/index.md` (§Index).
5. Never commit to `main`. Branch per task; PR required; no self-merge.
6. Contradict, don't overwrite. New evidence contradicts existing claim → §Conflict.
7. Never commit plaintext to any path marked for encryption in `.gitattributes`.
8. Every PR must use `templates/pr-description.md`. Do not omit the tabular summary (automated via run-ingest.sh during Ingest).
### NEVER
- Load `wiki/log.md` in full — read only the tail injected by the orchestrator.
- Rewrite `wiki/index.md` to reorder entries — append only; sorting is automated.
- Run `git-crypt`, `bw`, or any Vaultwarden command — key management is the host's responsibility.
@ -63,7 +47,6 @@ Session end or return to `disabled`: remind operator to run `git-crypt lock` on
- Merge PRs — human approval required.
### ASK FIRST
- Deleting any wiki page.
- Changing `maturity` from `stable` to `deprecated`.
- Writing to `wiki/private/` when PRIVATE_CONTEXT state is ambiguous.
@ -75,68 +58,53 @@ Session end or return to `disabled`: remind operator to run `git-crypt lock` on
Execute in this order before any file operation:
1. **One source per session.** If multiple sources are queued in `raw/`, process only the first. Commit, close session. The orchestrator starts a new session for the next source.
2. Read `wiki/index.md` — full catalog of all pages and their maturity.
3. Read the last 20 log entries injected by orchestrator — do not open `wiki/log.md` directly.
4. For any task involving related pages: `qmd search "<query>"` before opening files.
5. Operate on individual target files. Never scan entire directories.
1. Read `wiki/index.md` — full catalog of all pages and their maturity.
2. Read the last 20 log entries injected by orchestrator — do not open `wiki/log.md` directly.
3. For any task involving related pages: `qmd search "<query>"` before opening files.
4. Operate on individual target files. Never scan entire directories.
---
## Workflows
### Ingest
*Triggered by new file in `raw/`.*
_Triggered by new file in `raw/`._
**Phase 1 — Semantic Pass (Agent Skill)**
1. Read source once.
2. Create `wiki/sources/<slug>.md` — summary + key points.
3. Per entity (person, tool, org): create or update `wiki/entities/<name>.md`.
4. Per concept (pattern, theory, decision): create or update `wiki/concepts/<name>.md`.
5. Check each touched page for contradictions → apply §Conflict if found.
6. **Final action:** Write `.ingest-manifest.json` at the genome root.
7. **STOP.** Do not proceed to index, log, lint, commit, or PR — these are Phase 2.
6. Append entry to `wiki/index.md` (bottom of relevant section).
7. Append log entry: `INGEST | <slug>`.
8. Commit on `feat/ai-ingest-<slug>`. Open PR.
**Phase 2 — Deterministic Post-Processing (`run-ingest.sh`)**
_Executed automatically by the orchestrator after Phase 1._
8. Append entry to `wiki/index.md` (bottom of relevant section).
9. Append log entry: `INGEST | <slug>`.
10. Run scoped lint on pages created or modified in this session. Report issues in PR description. Do not auto-fix.
11. Commit on `feat/ai-ingest-<slug>`. Open PR using `templates/pr-description.md`.
_Private source_ (`PRIVATE_CONTEXT: enabled` required):
*Private source* (`PRIVATE_CONTEXT: enabled` required):
- All output → `wiki/private/<slug>.md` only.
- PR title: `[PRIVATE] ingest: <slug>`.
### Query
_Triggered by operator question._
*Triggered by operator question.*
1. `qmd search "<query>"` → identify candidate pages.
2. Read candidate pages directly.
2. Read relevant pages via `wiki/index.md` catalog.
3. Synthesize answer with `[[wikilink]]` citations.
4. If answer is non-trivial: save as `wiki/queries/<slug>.md`.
5. Append entry to `wiki/index.md` under Queries.
6. Append log entry: `QUERY | <subject>`.
_For general orientation without a specific query: read `wiki/index.md` directly._
### Lint
*Triggered by operator or schedule.*
_Triggered by operator with bash pre-scan output._
Find and report — do not auto-fix without operator approval:
Pre-requisite: operator runs `bash scripts/lint-genomes.sh` and provides output to this session.
The script handles deterministically: broken links, knowledge decay, page size, frontmatter validation.
1. Orphan pages — no inbound `[[wikilink]]`.
2. Duplicate concepts — two pages covering same topic → propose merge.
3. Implicit concepts — term in 3+ pages with no dedicated page.
4. `maturity: draft` with 2+ sources → propose promote to `stable`.
5. Broken internal links.
6. Knowledge decay violations (§Decay).
Agent tasks — apply semantic judgment to bash findings + independent semantic checks:
1. **Orphan pages** (list from bash): for each orphan, identify 1-3 existing pages that should link to it. Propose specific link additions.
2. **Implicit concepts** (term list from bash): for each candidate term, determine if a dedicated page is warranted. If yes, draft stub.
3. **Duplicate concepts**: `qmd search "<concept>"` for suspected duplicates → propose merge if confirmed.
4. **`maturity: draft`** pages with 2+ sources cited → propose promote to `stable`.
Report all findings as structured list. Do not modify files without operator approval.
Append log entry: `LINT | <summary of findings>`.
---
@ -144,7 +112,6 @@ Append log entry: `LINT | <summary of findings>`.
## File Conventions
### Frontmatter
Required on every wiki page:
```yaml
@ -164,29 +131,19 @@ private: true | false
- `deprecated` — superseded. Add `> **DEPRECATED:** <reason>` callout at top of body.
### Links
- Internal: `[[folder/file]]` — Obsidian wikilinks only. Never `[text](url)` for internal refs.
- Cross-genome: NOT via wikilink (submodule pointers make relative paths brittle). A concept owned by another genome is pulled in by the navigation skill as a raw under `raw/articles/crossgen-<topic>-<date>.md`, then ingested here normally. See master `AGENTS.md` §Cross-Genome Pull.
- Cross-genome: `[[../genome-target/wiki/folder/file]]`.
- External: `[text](https://...)`.
### Index entries
> **Skill mode:** auto-generated by `run-ingest.sh` from manifest. Below applies to manual workflows only.
Append at bottom of relevant section in `wiki/index.md`:
```
- [[folder/slug]] — One-line summary. `maturity: draft`
```
Never reorder. Alphabetical sorting is handled by the post-processor (index-append.py); the pre-commit hook only enforces the security policy.
Never reorder. Alphabetical sort is handled by the pre-commit hook.
### Log entries
> **Skill mode:** auto-generated by `run-ingest.sh` from manifest. Below applies to manual workflows only.
Append one entry per operation to `wiki/log.md`:
```markdown
## [YYYY-MM-DD] TYPE | Subject
@ -196,7 +153,6 @@ Append one entry per operation to `wiki/log.md`:
- output_written: `[[path/C]]`
- reasoning: One sentence — what changed and why.
```
Valid TYPEs: `INGEST` `LINT` `QUERY` `CONFLICT` `CONFIG` `SECURITY`
Parse: `grep "^## \[" wiki/log.md | tail -5`
@ -220,20 +176,16 @@ last_updated: YYYY-MM-DD
private: false
---
```
```markdown
## Conflict: <concept>
**Claim A (existing):** [[path/to/existing-page]]
> Summary of current wiki position.
**Claim B (new):** [[path/to/new-source]]
> Summary of contradicting evidence.
**Assessment:**
- Confidence A: high | medium | low — <reason>
- Confidence B: high | medium | low — <reason>
- Recommendation: `accept_b` | `keep_a` | `requires_human_review`
@ -253,11 +205,9 @@ private: false
- `maturity: draft` not updated in **90 days** → flag during lint.
Flagged pages: prepend to body:
```markdown
> **⚠️ STALE:** Last validated {{last_updated}}. Re-validation required.
```
Propose re-validation task. Do not change `maturity` without new source evidence.
---
@ -265,7 +215,7 @@ Propose re-validation task. Do not change `maturity` without new source evidence
## Collaboration
| Role | Access | Permitted |
| -------------- | ----------------- | ------------------------------------------------------------------------------------ |
|------|--------|-----------|
| Owner | Full — key holder | Read/write everywhere |
| Collaborator | No key | Push to `raw/articles`, `raw/transcripts`, `raw/code-packs`, `raw/assets` |
| Local AI agent | Conditional | `private/` only when `PRIVATE_CONTEXT: enabled` |

View file

@ -3,13 +3,13 @@
## Identity
| Field | Value |
| ------ | -------------------------------------------------- |
|--------|-------|
| Repo | `{{MASTER_REPO}}` |
| Owner | `{{FORGEJO_USER}}` |
| Remote | `{{FORGEJO_URL}}/{{FORGEJO_USER}}/{{MASTER_REPO}}` |
**Role:** Cross-genome coordinator for the Knowledge Genome network.
**Metrics:** no cross-genome boundary violations · submodule pointers current · cross-genome discoveries routed to target raw/ · zero stale submodule-relative wikilinks.
**Metrics:** no cross-genome boundary violations · submodule pointers current · cross-genome wikilinks valid · no private data outside local network.
---
@ -18,8 +18,10 @@
```text
{{MASTER_REPO}}/
├── core-karpathy/ ← Reference pattern — read-only, never modify
├── genome-example/ ← Submodule placeholder (replace with your domain)
└── AGENTS.md
├── genome-dev/ ← Submodule: web development, Angular, TUI
├── genome-finance/ ← Submodule: personal finance (git-crypt on private/)
├── genome-homelab/ ← Submodule: Keru infrastructure and network
└── AGENTS.md ← This file (update diagram when adding a genome)
```
Each genome has its own `AGENTS.md` with domain-specific rules.
@ -30,17 +32,14 @@ Genome-level operations are governed by the genome's `AGENTS.md`, not this file.
## Global Security Rules
### PRIVATE_CONTEXT scope
- Toggle is **per-genome and per-session**. Enabling for `genome-finance` does NOT enable for `genome-dev`.
- Cloud LLM models: `PRIVATE_CONTEXT` must be `disabled` for all genomes. Private data never leaves the local network.
### Log sanitization
- Never print decrypted secrets, session tokens, or key contents to stdout or log files.
- Document only `run_id` and genome name — never the key value.
### Key management
- Key injection is the host's responsibility — executed before this session starts.
- Never write, suggest, or generate scripts that save `.key` files to disk.
@ -50,20 +49,17 @@ Genome-level operations are governed by the genome's `AGENTS.md`, not this file.
1. Operate within ONE genome at a time. No atomic commits across multiple genomes.
2. `core-karpathy` is read-only. Never commit to it.
3. Cross-genome references are NEVER expressed as wikilinks. When a concept belongs to another genome, use the navigation skill to emit a raw stub into that genome's `raw/articles/` and let its own ingest pipeline handle it asynchronously.
3. Cross-genome references use relative wikilinks only: `[[../genome-target/wiki/folder/page]]`.
4. Never commit to `main` in any genome. PRs required; no self-merge.
5. Per-genome `AGENTS.md` governs all wiki operations within that genome. This file governs boundaries only.
### NEVER
- Load multiple `wiki/index.md` files simultaneously for cross-genome comparison — use qmd.
- Run `git-crypt`, `bw`, or Vaultwarden commands — host responsibility.
- Modify files in more than one genome in the same operation.
- Create cross-genome wikilinks (e.g., `[[../genome-*/wiki/...]]`). All cross-domain connections must be routed via the navigation skill as raw stubs.
- Modify `core-karpathy` in any way.
### ASK FIRST
- Any operation that touches two or more genomes.
- Updating submodule pointers in master.
- Any key rotation procedure.
@ -80,55 +76,58 @@ Genome-level operations are governed by the genome's `AGENTS.md`, not this file.
---
## Cross-Genome Pull (Navigation Skill)
## Cross-Genome Lint
*Manual, monthly — requires operator initiation. Not automated.*
Cross-genome knowledge moves by **pull, never push**: the genome you are working in draws material *in*; nothing is ever written into another genome. The cross-genome reading is performed by a deterministic collector **outside any agent's context**, so the agent still operates within ONE genome (Immutable Rule 1 holds). The `cross_source` registry flag decides which genomes may be read as sources.
1. Use `qmd search "<concept>"` to find pages covering the same concept across genomes.
2. Identify:
- Concepts defined in 2+ genomes with potentially conflicting definitions.
- Entities referenced across genomes without a canonical cross-genome wikilink.
- Concepts in genome-X that should link to genome-Y but don't.
3. Report findings. Do not modify any files.
4. For each finding: create a conflict note in the genome where resolution belongs, following that genome's §Conflict procedure.
There is **no separate synthesis step**: retrieving and then distilling twice would only add LLM cost and lose information. The collector *retrieves* (like a search) and deposits the result as a raw; the working genome's own ingest *distills* it once, for this genome's needs.
---
### How it works
## Reference Operations
Two actors:
### Add a genome
```bash
make add-genome NAME=genome-newname DESC="Domain description"
```
Then update the architecture diagram in this file.
1. **Collector** (`collect-crossgen.sh`, deterministic, agent-free). Clones each genome flagged `cross_source: yes` **read-only at its remote HEAD** — a disposable checkout, for freshness; never the pinned submodule state. The clone is **keyless**, so `private/` stays an encrypted blob and is unreadable. It indexes the public wikis with `qmd`, runs `qmd search "<topic>"`, and assembles a **dossier**: the text of the matching pages plus per-excerpt provenance (source genome, page, HEAD short-sha, date), with every `[[wikilink]]` neutralized to plain text. It deposits the dossier as **one** raw in the working genome at `raw/articles/crossgen-<topic>-<YYYY-MM-DD>.md`, commits, and pushes. Nothing is written to any source genome.
2. **Target ingest.** The working genome's standard ingest reads that raw as an ordinary source and distills it into wiki pages for the local domain — one semantic pass → PR → human gate. Same gate as any other source.
### When to pull
Pull is initiated deliberately (operator- or context-driven, never on a timer). Produce a crossgen raw ONLY when all three hold:
1. **Ownership elsewhere.** The concept, entity, or pattern is defined and maintained in another genome, and you need it framed for the working domain.
2. **Structural relevance.** It influences decisions, patterns, or entities here — not a casual mention.
3. **No fresh local coverage.** `qmd search "<concept>"` in the working genome returns nothing, or only a stub that needs enrichment.
If in doubt, do NOT pull. A missed cross-reference is cheaper than crossgen spam.
### Boundaries (enforced by the master)
- **Sources are restricted to `cross_source: yes` genomes.** A genome flagged `no` (e.g., a client / confidential file) is NEVER read as a source — the collector skips it physically. The wall is structural, not a matter of the agent's discipline.
- **Keyless collection.** The collector holds no git-crypt key, so `private/` stays ciphertext and cannot be read — privacy does not depend on the agent behaving.
- **Sources are read-only, at HEAD.** No write, commit, branch, or PR in any genome other than the one being worked on.
- **NEVER `git submodule update --remote`.** Read other genomes via disposable read-only clones — never by moving this master's submodule pointers (that is ASK FIRST).
- The deposited raw must contain **no wikilinks and no private data**; it is processed by the working genome's normal ingest + human gate.
### Output raw (the only artifact written)
**Path (in the working genome):** `raw/articles/crossgen-<topic>-<YYYY-MM-DD>.md`
Plain text. No YAML frontmatter (raw is immutable input). **No wikilinks of any kind**`[[...]]` from source pages are flattened to plain text so they never become broken cross-references here.
```markdown
> Cross-genome pull | Into: genome-<working> | Query: "<topic>" | Date: YYYY-MM-DD
## From genome-<a> — wiki/concepts/<x>.md (HEAD <short-sha>)
[retrieved page text — wikilinks flattened to plain text, no private data]
## From genome-<b> — wiki/entities/<y>.md (HEAD <short-sha>)
[retrieved page text]
### Sync submodules
```bash
make sync
```
**Rules:**
### Update core-karpathy reference
```bash
git submodule update --remote core-karpathy
git add core-karpathy
git commit -m "chore: update core-karpathy to latest gist"
git push
```
- **Deterministic deposit.** The raw is written by the collector (the skill's mechanical side), never edited by an agent — agents never create, modify, or delete files in any `raw/`. Each pull is a **new, dated** file (raw is immutable).
- **Distillation happens at ingest, once.** The working genome's normal ingest turns the dossier into wiki pages and **deduplicates against existing pages** via its §Conflict procedure. There is no pre-summarization.
- **Bound large retrievals deterministically** (top-N pages / relevant sections) rather than adding an LLM pass — keeps the dossier-raw and the ingest job reasonable at any scale.
- *Optional (large + expensive-cloud deployments only):* a cheap **local** pre-distillation may be inserted before an expensive cloud ingest to shrink its input. This is an opt-in optimization; the default is no synthesis.
### Clone (full)
```bash
git clone --recurse-submodules \
{{FORGEJO_URL}}/{{FORGEJO_USER}}/{{MASTER_REPO}}.git
```
After cloning, unlock each genome on the host before starting an agent session.
### Key rotation (emergency)
If a key is compromised: `gcrypt_rotate_key "<genome>"` from project root.
Update the Vaultwarden Secure Note with the new base64-encoded key.
Revoke access from previous key holders.
### Key registry
| Genome | Vaultwarden Secure Note | Temp key file |
|--------|------------------------|---------------|
| genome-dev | `genome-dev key` | `keys/genome-dev.key` |
| genome-finance | `genome-finance key` | `keys/genome-finance.key` |
| genome-homelab | `genome-homelab key` | `keys/genome-homelab.key` |
Temp key files in `keys/` are post-export only. Delete after upload to Vaultwarden.

View file

@ -1,45 +0,0 @@
## Summary
<!-- One sentence: goal of this session and source processed. -->
<!--
REVIEW GUIDELINES (write the guideline as the FIRST word of your review):
REWORK: <what to fix> -> same branch, guided retry
RESTART: <why restart> -> close PR, start over from scratch
SPLIT: <how to split> -> close PR, reopen as separate branches
REJECT: <why not> -> close PR, no retry
MERGE -> approve and merge
Rules: one concern per directive; be specific to lines/pages; name the principle
that was violated; describe the DESIRED STATE; avoid saying “do better.”
-->
Translated with DeepL.com (free version)
## Pages Created
| Path | Type | Maturity |
| ----------------- | --------------------------------- | -------- |
| `[[folder/slug]]` | entity / concept / source / query | draft |
## Pages Modified
| Path | Change |
| ----------------- | ----------------------------------------- |
| `[[folder/slug]]` | Added cross-reference to `[[other/page]]` |
## Contradictions Found
- [ ] None
- [ ] `n` conflict file(s) created — listed below
## Private Data Accessed
- [ ] No — `PRIVATE_CONTEXT: disabled`
- [ ] Yes — `PRIVATE_CONTEXT: enabled` · outputs in `wiki/private/` only
## Scoped Lint (post-ingest)
- [ ] Frontmatter valid on all touched pages
- [ ] No broken wikilinks on touched pages
- [ ] No issues found
- [ ] Issues found (list):

View file

@ -11,8 +11,9 @@ set -euo pipefail
FAILED=0
# Verify git-crypt is initialized
if ! git-crypt status >/dev/null 2>&1; then
printf "\n[CRITICAL] git-crypt not initialized.\n"
if [[ ! -d ".git-crypt" ]]; then
printf "\n\033[0;31m[CRITICAL] git-crypt not initialized.\033[0m\n"
printf "Run 'git-crypt init' and 'make setup' before committing.\n"
exit 1
fi

View file

@ -1,45 +0,0 @@
# {{MASTER_REPO}}
Master (umbrella) repository for the Knowledge Genome network.
| Field | Value |
| ---------- | -------------------------------------------------- |
| Owner | `{{FORGEJO_USER}}` |
| Remote | `{{FORGEJO_URL}}/{{FORGEJO_USER}}/{{MASTER_REPO}}` |
| Scaffolded | `{{DATE}}` |
## What this repo is
This repository does **not** hold knowledge itself. It is the orchestrator: each genome
is a Git submodule, plus `core-karpathy` as a read-only reference pattern. Cross-genome
coordination rules live in `AGENTS.md`.
```text
{{MASTER_REPO}}/
├── core-karpathy/ ← reference pattern — read-only, never modify
├── genome-*/ ← one submodule per genome (own AGENTS.md, own git-crypt)
└── AGENTS.md ← cross-genome coordinator (boundaries only)
```
## Working with submodules
```bash
# Clone with all genomes
git clone --recurse-submodules {{FORGEJO_URL}}/{{FORGEJO_USER}}/{{MASTER_REPO}}.git
# Pull the latest pointers for every genome
git submodule update --remote --merge
# Operate inside a single genome (one genome at a time — see AGENTS.md)
cd genome-<name>
```
## Rules of the road
- Operate within **one genome at a time**; no commits spanning multiple genomes.
- `core-karpathy` is read-only.
- Never commit to `main` in a genome — PRs only, no self-merge.
- Private data (`**/private/**`) is git-crypt encrypted and never leaves the local network.
Genome-level operations are governed by each genome's own `AGENTS.md`. This README and the
master `AGENTS.md` govern boundaries only.

View file

@ -12,35 +12,34 @@ private: false
**[AGENT INSTRUCTION]**
This is the primary navigation file. Read it first on every session before accessing individual pages.
Append new entries at the bottom of the relevant section — do not reorder or rewrite sections.
Alphabetical sorting is handled by the post-processor (index-append.py); the pre-commit hook only enforces the security policy.
Alphabetical sorting is handled automatically by the pre-commit hook.
Update `last_updated` in the YAML frontmatter on every edit.
Entry format: `- [[folder/slug]] — One-line summary. \`maturity: &lt;value&gt;\``
Entry format: `- [[folder/slug]] — One-line summary. \`maturity: <value>\``
---
## Sources (`wiki/sources/`)
*Ingested raw materials. One entry per processed source.*
_Ingested raw materials. One entry per processed source._
## Entities (`wiki/entities/`)
*People, organisations, tools, projects.*
_People, organisations, tools, projects._
## Concepts (`wiki/concepts/`)
*Theories, methodologies, patterns, architectural decisions.*
_Theories, methodologies, patterns, architectural decisions._
## Queries (`wiki/queries/`)
*Synthesised answers worth preserving. Archived explorations and analyses.*
_Synthesised answers worth preserving. Archived explorations and analyses._
## Conflicts Pending Review (`wiki/queries/conflict-*.md`)
*Created automatically when the agent detects contradictions between sources.*
*Do not summarise entries here — list slugs only to avoid surfacing unresolved claims.*
*Remove entry once the operator has resolved and closed the corresponding PR.*
_Created automatically when the agent detects contradictions between sources._
_Do not summarise entries here — list slugs only to avoid surfacing unresolved claims._
_Remove entry once the operator has resolved and closed the corresponding PR._
## Private Synthesis (`wiki/private/`)
_Restricted access. Requires `PRIVATE_CONTEXT: enabled` and unlocked repo._
_List slug names ONLY. Do not append summaries — prevents metadata leakage._
*Restricted access. Requires `PRIVATE_CONTEXT: enabled` and unlocked repo.*
*List slug names ONLY. Do not append summaries — prevents metadata leakage.*

View file

@ -22,13 +22,11 @@ Append new entries at the bottom using the format defined below.
## Entry Format
### Required header (enables shell parsing):
```text
## [YYYY-MM-DD] TYPE | Subject or title
```
### Required metadata block for all agent-generated entries:
```markdown
- run_id: `<short-uuid or session-identifier>`
- model: `<model-name-and-version>`
@ -40,7 +38,6 @@ Append new entries at the bottom using the format defined below.
**Valid TYPEs:** `INGEST` | `LINT` | `QUERY` | `CONFLICT` | `CONFIG` | `SECURITY`
**Parse examples:**
```bash
# Last 5 entries
grep "^## \[" wiki/log.md | tail -5
@ -57,7 +54,7 @@ grep "^## \[2026-05" wiki/log.md
## [{{DATE}}] CONFIG | Genome scaffolded
- run_id: `system-init`
- model: `scaffold.sh`
- context_read: _(none — initial scaffold)_
- model: `setup-knowledge-genome.sh`
- context_read: *(none — initial scaffold)*
- output_written: `[[wiki/index.md]]`, `[[wiki/log.md]]`, `[[AGENTS.md]]`
- reasoning: Initial directory structure and encryption layer initialized by setup script.

View file

@ -1,56 +0,0 @@
# Tests
Deterministic tests for the mechanical layer of the framework — **no LLM, no GPU, no
network**. They simulate pi's output with fixtures and exercise the scripts directly, so
they run anywhere (laptop, CI, a git hook). They do **not** belong on vm101 or in n8n.
## What's covered
| File | Covers |
|------|--------|
| `scripts.bats` | `slug.sh`, `log-append.sh`, `index-append.py` (insert, sort, bump, idempotent) |
| `lint.bats` | `lib/lint.sh` validators + `scoped-lint.sh` reuse + duplicate-slug advisory (edit-distance math, self-match skip, once-per-run) |
| `structure.bats` | `lib/structure.sh` report/sync |
| `run-ingest.bats` | `run-ingest.sh` end-to-end (DRY_RUN, local bare remote) — needs `jq` |
`run-ingest.bats` auto-`skip`s if `jq` is missing; everything else needs only bash + git
(+ `python3` for the index tests).
## Install bats
```bash
# Debian/Ubuntu
sudo apt install bats
# or pinned, as a vendored submodule
git submodule add https://github.com/bats-core/bats-core.git test/bats
```
## Run
```bash
bats tests/ # whole suite
bats tests/lint.bats # one file
bats -f "sorted" tests/scripts.bats # filter by name
```
Each test builds its own throwaway genome under `BATS_TEST_TMPDIR` (auto-cleaned) with a
local bare git remote, so `open-pr.sh --DRY_RUN` can branch/commit/push without touching
Forgejo.
## Makefile targets
```make
test:
@bats tests/
verify-structure:
@bash scripts/verify-genomes.sh
sync-structure:
@bash scripts/verify-genomes.sh --sync
```
## Note on `helpers.bash`
`FIXTURE_DIRS` in `helpers.bash` must match `GENOME_DIRS` in `lib/structure.sh`. If you
change the canonical layout, update both (the structure tests assume a clean baseline).

View file

@ -1,18 +0,0 @@
#!/usr/bin/env bats
setup() {
load 'helpers'
source "${LIB_DIR}/clean-start.sh" 2>/dev/null || source "${REPO_ROOT}/lib/clean-start.sh"
}
@test "clean_start: aligns to origin/base, reverts tracked edits, removes untracked" {
G="$(make_fixture_genome)"; cd "$G"
echo "from origin" >> wiki/index.md
git add -A && git commit -q -m "origin ahead" && git push -q
git reset --hard HEAD~1 # local BEHIND origin/main
echo "local junk" >> wiki/log.md # tracked edit, uncommitted
echo "scratch" > scratch.txt # genuinely untracked
INGEST_BASE="main" clean_start
git diff --quiet origin/main # aligned to origin
grep -q "from origin" wiki/index.md # forwarded to origin state
! grep -q "local junk" wiki/log.md # tracked edit reverted
[ ! -f scratch.txt ] # untracked removed
}

View file

@ -1,104 +0,0 @@
#!/usr/bin/env bash
# tests/helpers.bash — shared helpers for the bats suite.
REPO_ROOT="$(cd "${BATS_TEST_DIRNAME}/.." && pwd)"
LIB_DIR="${REPO_ROOT}/lib"
SKILL_SCRIPTS="${REPO_ROOT}/skills/ingest/scripts"
# Canonical dirs a fresh genome must contain (kept in sync with lib/structure.sh).
FIXTURE_DIRS=(
raw/articles raw/transcripts raw/code-packs raw/assets raw/private
wiki/sources wiki/entities wiki/concepts wiki/queries wiki/private
)
# make_fixture_genome → echoes the path to a throwaway genome checkout with a
# local bare remote, the full canonical structure, and rendered index/log.
# Uses BATS_TEST_TMPDIR so bats cleans it up automatically.
make_fixture_genome() {
local base; base="$(mktemp -d "${BATS_TEST_TMPDIR:-/tmp}/genome.XXXXXX")"
git init --bare -q "${base}/origin.git"
local g="${base}/genome"
local d
for d in "${FIXTURE_DIRS[@]}"; do mkdir -p "${g}/${d}"; touch "${g}/${d}/.gitkeep"; done
cat > "${g}/wiki/index.md" <<'EOF'
---
title: "Index — genome-test"
type: index
domain: genome-test
maturity: stable
last_updated: 2026-01-01
private: false
---
# Master Index: genome-test
---
## Sources (`wiki/sources/`)
*Ingested raw materials.*
## Entities (`wiki/entities/`)
*People, tools.*
## Concepts (`wiki/concepts/`)
*Patterns.*
## Queries (`wiki/queries/`)
*Answers.*
## Conflicts Pending Review (`wiki/queries/conflict-*.md`)
*slugs only.*
## Private Synthesis (`wiki/private/`)
_Restricted access. Requires `PRIVATE_CONTEXT: enabled` and unlocked repo._
_List slug names ONLY. Do not append summaries — prevents metadata leakage._
EOF
cat > "${g}/wiki/log.md" <<'EOF'
---
title: "Operations Log — genome-test"
type: log
domain: genome-test
maturity: stable
last_updated: 2026-01-01
private: false
---
# Operations Log
---
## [2026-01-01] CONFIG | scaffolded
- run_id: `init`
EOF
echo "raw test" > "${g}/raw/articles/test.md"
mkdir -p "${base}/nohooks"
(
cd "${g}"
git init -q
# Hermetic: ignore the user's global git config (signing, global hooks);
# otherwise commit.gpgsign or a global core.hooksPath makes git commit fail here.
git config --local user.name "Framework Test"
git config --local user.email "test@genome.local"
git config --local commit.gpgsign false
git config --local core.hooksPath "${base}/nohooks"
git branch -M main
git remote add origin "${base}/origin.git"
git add .
git commit -q -m "chore: initial scaffold"
git push -q -u origin main
)
echo "${g}"
}

View file

@ -1,44 +0,0 @@
#!/usr/bin/env bats
# tests/index-remove.bats — index-append.py --remove mode.
setup() {
load 'helpers'
export GENOMES_ROOT="${BATS_TEST_TMPDIR}"
g_src="$(make_fixture_genome)"; export g="$g_src"
}
@test "index --remove: deletes the matching entry, keeps the others" {
cd "$g"
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/a]] — A. `maturity: draft`'
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/b]] — B. `maturity: draft`'
grep -q 'sources/a' wiki/index.md
grep -q 'sources/b' wiki/index.md
run python3 "$SKILL_SCRIPTS/index-append.py" --remove 'sources/a'
[ "$status" -eq 0 ]
! grep -q '\[\[sources/a\]\]' wiki/index.md
grep -q 'sources/b' wiki/index.md
}
@test "index --remove: idempotent when the entry is absent" {
cd "$g"
run python3 "$SKILL_SCRIPTS/index-append.py" --remove 'sources/does-not-exist'
[ "$status" -eq 0 ]
[[ "$output" == *'nothing to remove'* ]]
}
@test "index --remove: bumps last_updated" {
cd "$g"
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/a]] — A. `maturity: draft`'
# set last_updated to an old date, then remove and check it moved
sed -i 's/^last_updated:.*/last_updated: 2000-01-01/' wiki/index.md
run python3 "$SKILL_SCRIPTS/index-append.py" --remove 'sources/a'
[ "$status" -eq 0 ]
! grep -q '2000-01-01' wiki/index.md
grep -q "last_updated: $(date +%F)" wiki/index.md
}
@test "index --remove: rejects passing both --entry and --remove" {
cd "$g"
run python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/a]] — x' --remove 'sources/a'
[ "$status" -eq 2 ]
}

View file

@ -1,29 +0,0 @@
#!/usr/bin/env bats
setup() {
load 'helpers'
source "$LIB_DIR/output.sh"
source "$LIB_DIR/lint.sh"
}
@test "lint tolerates source_path/source_sha256 in source frontmatter" {
G="$(make_fixture_genome)"
mkdir -p "$G/wiki/sources"
cat > "$G/wiki/sources/test-source.md" <<'EOFMD'
---
title: "Test Source"
type: source
domain: genome-test
maturity: draft
last_updated: 2026-06-25
private: false
tags: [test]
source_path: raw/articles/test.md
source_sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
---
# Test Source
body
EOFMD
run lint_markdown_file "$G/wiki/sources/test-source.md" genome-test
[ "$status" -eq 0 ]
}

View file

@ -1,148 +0,0 @@
#!/usr/bin/env bats
# tests/lint.bats — lib/lint.sh validators and the scoped-lint wrapper.
load helpers
setup() {
source "$LIB_DIR/output.sh"
source "$LIB_DIR/lint.sh"
}
write_page() { # write_page <path> <type> <domain>
cat > "$1" <<EOF
---
title: "T"
type: $2
domain: $3
tags: [x]
maturity: draft
last_updated: $(date +%F)
private: false
---
body
EOF
}
@test "lint_markdown_file: a clean page passes (0 errors)" {
G="$(make_fixture_genome)"
write_page "$G/wiki/sources/good.md" source genome-test
run lint_markdown_file "$G/wiki/sources/good.md" genome-test
[ "$status" -eq 0 ]
}
@test "lint_markdown_file: invalid type + wrong domain are caught" {
G="$(make_fixture_genome)"
write_page "$G/wiki/sources/bad.md" banana wrong-genome
run lint_markdown_file "$G/wiki/sources/bad.md" genome-test
[ "$status" -ne 0 ]
}
@test "check_privacy_consistency: a private/ file without 'private: true' fails" {
G="$(make_fixture_genome)"
# page sits in wiki/private/ but is flagged private: false → leak
write_page "$G/wiki/private/p.md" private genome-test
run check_privacy_consistency "$G/wiki/private/p.md"
[ "$status" -ne 0 ]
}
@test "check_page_size: a >800-line page errors" {
G="$(make_fixture_genome)"
{ write_page "$G/wiki/sources/big.md" source genome-test; yes "x" | head -n 850 >> "$G/wiki/sources/big.md"; }
run check_page_size "$G/wiki/sources/big.md"
[ "$status" -ne 0 ]
}
@test "scoped-lint: aggregates findings and exits non-zero on errors" {
G="$(make_fixture_genome)"
write_page "$G/wiki/sources/bad.md" banana wrong-genome
cd "$G"
export KG_LIB_DIR="$LIB_DIR"
run bash "$SKILL_SCRIPTS/scoped-lint.sh" genome-test wiki/sources/bad.md
[ "$status" -ne 0 ]
[[ "$output" == *"error(s)"* ]]
}
@test "scoped-lint: a clean page passes (exit 0)" {
G="$(make_fixture_genome)"
write_page "$G/wiki/sources/good.md" source genome-test
cd "$G"
export KG_LIB_DIR="$LIB_DIR"
run bash "$SKILL_SCRIPTS/scoped-lint.sh" genome-test wiki/sources/good.md
[ "$status" -eq 0 ]
}
# --- duplicate-slug advisory (check_duplicates + its distance helpers) --------
# These guard the dedup feature: correct edit-distance math, the warn-only
# contract, the exact-self-match skip (run-ingest appends new slugs to the
# index before lint runs), and that the advisory fires once per run, not once
# per file.
@test "levenshtein: identical strings have distance 0" {
run levenshtein cat cat
[ "$status" -eq 0 ]
[ "$output" -eq 0 ]
}
@test "levenshtein: kitten→sitting is 3 (textbook case)" {
run levenshtein kitten sitting
[ "$output" -eq 3 ]
}
@test "similarity: identical strings score 100" {
run similarity gpu-pricing gpu-pricing
[ "$output" -eq 100 ]
}
@test "check_duplicates: warns on a near-duplicate of an indexed concept" {
G="$(make_fixture_genome)"; cd "$G"
printf -- '- [[concepts/llm-routing]] — x\n' >> wiki/index.md
cat > .ingest-manifest.json <<'JSON'
{"raw_source":"src","pages":[{"path":"wiki/concepts/llm-routings.md","status":"created"}]}
JSON
run check_duplicates .ingest-manifest.json
[ "$status" -eq 0 ]
[[ "$output" == *"≈"* ]]
[[ "$output" == *"llm-routings"* ]]
}
@test "check_duplicates: silent when the new slug is unlike anything indexed" {
G="$(make_fixture_genome)"; cd "$G"
printf -- '- [[concepts/llm-routing]] — x\n' >> wiki/index.md
cat > .ingest-manifest.json <<'JSON'
{"raw_source":"src","pages":[{"path":"wiki/concepts/budget-hardware.md","status":"created"}]}
JSON
run check_duplicates .ingest-manifest.json
[ "$status" -eq 0 ]
[[ "$output" != *"≈"* ]]
}
@test "check_duplicates: an exact self-match is not flagged (index already has the slug)" {
G="$(make_fixture_genome)"; cd "$G"
# run-ingest step 1 inserts this run's slug into the index BEFORE lint runs;
# the slug must not be reported as a duplicate of itself.
printf -- '- [[concepts/llm-routing]] — x\n' >> wiki/index.md
cat > .ingest-manifest.json <<'JSON'
{"raw_source":"src","pages":[{"path":"wiki/concepts/llm-routing.md","status":"created"}]}
JSON
run check_duplicates .ingest-manifest.json
[ "$status" -eq 0 ]
[[ "$output" != *"≈"* ]]
}
@test "scoped-lint: duplicate advisory fires once across multiple files, not per file" {
G="$(make_fixture_genome)"
write_page "$G/wiki/concepts/data-pipelines.md" concept genome-test
write_page "$G/wiki/concepts/other-topic.md" concept genome-test
printf -- '- [[concepts/data-pipeline]] — x\n' >> "$G/wiki/index.md"
cat > "$G/.ingest-manifest.json" <<'JSON'
{"raw_source":"src","pages":[
{"path":"wiki/concepts/data-pipelines.md","status":"created"},
{"path":"wiki/concepts/other-topic.md","status":"created"}
]}
JSON
cd "$G"
export KG_LIB_DIR="$LIB_DIR"
run bash "$SKILL_SCRIPTS/scoped-lint.sh" genome-test \
wiki/concepts/data-pipelines.md wiki/concepts/other-topic.md
[ "$status" -eq 0 ]
[ "$(grep -c "≈" <<< "$output")" -eq 1 ]
}

View file

@ -1,48 +0,0 @@
#!/usr/bin/env bats
# open-pr-rolling.bats — a re-ingest of the same slug updates the OPEN PR's branch
# (force-with-lease) instead of failing. Uses the local bare remote from make_fixture_genome.
load helpers
setup_file() { :; }
@test "open-pr: re-ingest of the same slug rolls the branch forward (force-with-lease)" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
G="$(make_fixture_genome)"; cd "$G"
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER=u FORGEJO_TOKEN=t DRY_RUN=1
body="$(mktemp)"; echo body > "$body"
# first ingest of slug x (v1)
mkdir -p wiki/sources; printf 'v1\n' > wiki/sources/x.md
run bash "$SKILL_SCRIPTS/open-pr.sh" --slug x --title "feat: ingest x" --body-file "$body" --base main
[ "$status" -eq 0 ]
git rev-parse --verify feat/ai-ingest-x
first="$(git rev-parse feat/ai-ingest-x)"
# simulate clean_start back to base, then an edited re-ingest (v2)
git switch -q main; git reset -q --hard origin/main; git clean -q -fd
printf 'v2-edited\n' > wiki/sources/x.md
run bash "$SKILL_SCRIPTS/open-pr.sh" --slug x --title "feat: ingest x" --body-file "$body" --base main
[ "$status" -eq 0 ]
second="$(git rev-parse feat/ai-ingest-x)"
# the branch was REBUILT from base (diverged), not appended: second is not a descendant of first
run git merge-base --is-ancestor "$first" "$second"
[ "$status" -ne 0 ]
# origin received the v2 content (force-with-lease pushed the rebuilt branch)
git fetch -q origin
run git show "origin/feat/ai-ingest-x:wiki/sources/x.md"
[ "$status" -eq 0 ]
[[ "$output" == *"v2-edited"* ]]
}
@test "open-pr: prune branch override still works after the rolling change" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
G="$(make_fixture_genome)"; cd "$G"
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER=u FORGEJO_TOKEN=t DRY_RUN=1
body="$(mktemp)"; echo body > "$body"
mkdir -p wiki/sources; printf 'p\n' > wiki/sources/p.md
run bash "$SKILL_SCRIPTS/open-pr.sh" --branch "chore/prune-orphans-2026-06-30" \
--title "chore: prune 1 orphaned source(s)" --body-file "$body" --base main
[ "$status" -eq 0 ]
git rev-parse --verify "chore/prune-orphans-2026-06-30"
}

View file

@ -1,38 +0,0 @@
#!/usr/bin/env bats
setup() {
load 'helpers'
export ORPHAN="${SKILL_SCRIPTS}/orphan-wiki.sh"
export GENOMES_ROOT="${BATS_TEST_TMPDIR}"
export INGEST_BASE="main"
export KG_LIB_DIR="${LIB_DIR}" # orphan-wiki.sh sources clean-start.sh via KG_LIB_DIR
g_src="$(make_fixture_genome)"
export g_name="fixture-genome"
mv "$g_src" "${GENOMES_ROOT}/${g_name}"
export g="${GENOMES_ROOT}/${g_name}"
( cd "$g" && rm -f raw/articles/test.md && git add -A && git commit -q -m "clear" && git push -q )
}
@test "orphan-wiki: no orphans when raw and source page match" {
mkdir -p "${g}/raw/articles"; echo "content" > "${g}/raw/articles/existing.md"
hash="$(sha256sum "${g}/raw/articles/existing.md" | cut -d' ' -f1)"
mkdir -p "${g}/wiki/sources"
printf -- '---\nsource_path: raw/articles/existing.md\nsource_sha256: %s\n---\n' "$hash" > "${g}/wiki/sources/existing.md"
( cd "$g" && git add . && git commit -q -m "setup" && git push -q )
run bash "$ORPHAN" "$g_name"
[ "$status" -eq 0 ]; echo "$output" | jq -e '.count == 0'
}
@test "orphan-wiki: detects orphaned source page" {
mkdir -p "${g}/wiki/sources"
printf -- '---\nsource_path: raw/articles/deleted.md\nsource_sha256: abc123\n---\n' > "${g}/wiki/sources/orphaned.md"
( cd "$g" && git add . && git commit -q -m "orphan" && git push -q )
run bash "$ORPHAN" "$g_name"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.count == 1'
echo "$output" | jq -e '.detail[0].reason == "orphan"'
}
@test "orphan-wiki: ignores legacy pages without source_path" {
mkdir -p "${g}/wiki/sources"
printf -- '---\ntitle: "Legacy"\ntype: source\n---\n' > "${g}/wiki/sources/legacy.md"
( cd "$g" && git add . && git commit -q -m "legacy" && git push -q )
run bash "$ORPHAN" "$g_name"
[ "$status" -eq 0 ]; echo "$output" | jq -e '.count == 0'
}

View file

@ -1,91 +0,0 @@
#!/usr/bin/env bats
setup() {
load 'helpers'
export PENDING="${SKILL_SCRIPTS}/pending-raw.sh"
export GENOMES_ROOT="${BATS_TEST_TMPDIR}"
export INGEST_BASE="main"
export KG_LIB_DIR="${LIB_DIR}"
g_src="$(make_fixture_genome)"
export g_name="fixture-genome"
mv "$g_src" "${GENOMES_ROOT}/${g_name}"
export g="${GENOMES_ROOT}/${g_name}"
# FIX: make_fixture_genome ships raw/articles/test.md with no source page, which would
# otherwise count as a permanent 'new' and break every count assertion. Clear it so each
# test controls exactly what is pending (verified: count base becomes 0).
( cd "$g" && rm -f raw/articles/test.md && git add -A \
&& git commit -q -m "test: clear default raw" && git push -q )
}
@test "pending-raw: detects a brand new raw file" {
echo "new content" > "${g}/raw/articles/new-file.md"
( cd "$g" && git add . && git commit -q -m "add raw" && git push -q )
run bash "$PENDING" "$g_name"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.count == 1'
echo "$output" | jq -e '.detail[0].path == "raw/articles/new-file.md"'
echo "$output" | jq -e '.detail[0].reason == "new"'
}
@test "pending-raw: skips up-to-date files" {
echo "ok content" > "${g}/raw/articles/ok-file.md"
hash_ok="$(sha256sum "${g}/raw/articles/ok-file.md" | cut -d' ' -f1)"
cat > "${g}/wiki/sources/ok-file.md" <<FM
---
source_sha256: $hash_ok
---
FM
( cd "$g" && git add . && git commit -q -m "add ok" && git push -q )
run bash "$PENDING" "$g_name"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.count == 0'
}
@test "pending-raw: flags modified files" {
echo "content v1" > "${g}/raw/articles/mod-file.md"
hash_v1="$(sha256sum "${g}/raw/articles/mod-file.md" | cut -d' ' -f1)"
cat > "${g}/wiki/sources/mod-file.md" <<FM
---
source_sha256: $hash_v1
---
FM
( cd "$g" && git add . && git commit -q -m "v1" && git push -q )
echo "content v2" > "${g}/raw/articles/mod-file.md"
( cd "$g" && git add . && git commit -q -m "v2" && git push -q )
run bash "$PENDING" "$g_name"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.count == 1'
echo "$output" | jq -e '.detail[0].reason == "modified"'
}
@test "pending-raw: nested subdirectory yields prefixed slug" {
mkdir -p "${g}/raw/articles/sub-b"
echo "subdir content" > "${g}/raw/articles/sub-b/file.md"
( cd "$g" && git add . && git commit -q -m "subdir" && git push -q )
run bash "$PENDING" "$g_name"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.count == 1'
echo "$output" | jq -e '.files[0] == "raw/articles/sub-b/file.md"'
}
@test "pending-raw: excludes noise (.stfolder, .gitkeep)" {
touch "${g}/raw/articles/.gitkeep"
mkdir -p "${g}/raw/articles/.stfolder"
touch "${g}/raw/articles/.stfolder/sync.log"
( cd "$g" && git add . && git commit -q -m "noise" && git push -q )
run bash "$PENDING" "$g_name"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.count == 0'
}
@test "pending-raw: reports both files on a slug collision" {
mkdir -p "${g}/raw/articles/cibo"
echo "c1" > "${g}/raw/articles/cibo-pane.md"
echo "c2" > "${g}/raw/articles/cibo/pane.md"
( cd "$g" && git add . && git commit -q -m "collision" && git push -q )
run bash "$PENDING" "$g_name"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.count == 2'
}

View file

@ -1,68 +0,0 @@
#!/usr/bin/env bats
# tests/permissions.bats
# Blinda i permessi del repo, cosi' un `cp`/deploy preserva l'eseguibilita' e non
# ricapita il "Permission denied" (es. ingest-semantic.py lanciato diretto).
#
# Principio:
# - script con shebang lanciati direttamente -> eseguibili (git mode 100755)
# - librerie *sourced* (lib/, providers/, registry.sh, globals.env) -> NON eseguibili (100644)
REPO="${BATS_TEST_DIRNAME}/.."
# Entry-point / script eseguibili (tutti hanno shebang; alcuni anche lanciati a mano per debug)
EXECUTABLES=(
skills/ingest/scripts/ingest-semantic.py
skills/ingest/scripts/run-ingest.sh
skills/ingest/scripts/scoped-lint.sh
skills/ingest/scripts/open-pr.sh
skills/ingest/scripts/log-append.sh
skills/ingest/scripts/slug.sh
skills/ingest/scripts/pending-raw.sh
skills/ingest/scripts/orphan-wiki.sh
skills/ingest/scripts/index-append.py
scripts/add-genome.sh
scripts/setup.sh
scripts/setup-genomes.sh
scripts/setup-master.sh
scripts/lint-genomes.sh
scripts/verify-genomes.sh
)
# Librerie sourced: NON devono essere eseguibili.
LIBRARIES=(
lib/lint.sh lib/output.sh lib/deps.sh lib/git-crypt.sh lib/scaffold.sh lib/structure.sh lib/clean-start.sh
providers/forgejo.sh providers/github.sh
registry.sh globals.env
)
git_mode() { git -C "$REPO" ls-files -s -- "$1" | awk '{print $1}'; }
@test "executable scripts have the +x bit on disk" {
for f in "${EXECUTABLES[@]}"; do
[ -x "${REPO}/${f}" ] || { echo "NON eseguibile su disco: $f"; return 1; }
done
}
@test "executable scripts are recorded 100755 in git" {
for f in "${EXECUTABLES[@]}"; do
mode="$(git_mode "$f")"
[ -n "$mode" ] || { echo "non tracciato in git: $f"; return 1; }
[ "$mode" = "100755" ] || { echo "git mode $mode (atteso 100755): $f"; return 1; }
done
}
@test "sourced libraries are NOT executable in git (100644)" {
for f in "${LIBRARIES[@]}"; do
mode="$(git_mode "$f")"
[ -z "$mode" ] && continue # non tracciato/opzionale -> salta
[ "$mode" = "100644" ] || { echo "git mode $mode (atteso 100644, e' sourced): $f"; return 1; }
done
}
@test "executable shell scripts pass bash -n (syntax)" {
for f in "${EXECUTABLES[@]}"; do
case "$f" in
*.sh) bash -n "${REPO}/${f}" || { echo "syntax error: $f"; return 1; } ;;
esac
done
}

View file

@ -1,75 +0,0 @@
#!/usr/bin/env bats
# raw-commit-quiet.bats — quiet-window behaviour of genome-raw-commit.sh.
# No Syncthing (no API key -> default author); pushes to a local bare repo via GENOME_PUSH_URL.
setup() {
SCRIPT="${BATS_TEST_DIRNAME}/../deploy/nexus/genome-raw-commit.sh"
export HOME="${BATS_TEST_TMPDIR}/home"; mkdir -p "$HOME/.config"
root="${BATS_TEST_TMPDIR}/vaults"; mkdir -p "$root"
bare="${BATS_TEST_TMPDIR}/origin.git"; git init -q --bare "$bare"
cat > "$HOME/.config/knowledge-genome.env" <<EOF
GENOME_VAULTS_ROOT=$root
GENOME_BASE=main
FORGEJO_USER=n8n-bot
FORGEJO_HOST=127.0.0.1:3001
FORGEJO_OWNER=Keru
COMMITTER_NAME=n8n-bot
COMMITTER_EMAIL=n8n-bot@homelab
DEFAULT_AUTHOR_NAME=Tester
DEFAULT_AUTHOR_EMAIL=tester@local
EOF
export g="genome-test"; export vault="$root/$g"
git clone -q "$bare" "$vault" 2>/dev/null || mkdir -p "$vault"
( cd "$vault"
git init -q 2>/dev/null || true
git config user.name n8n-bot; git config user.email n8n-bot@homelab; git config commit.gpgsign false
git checkout -q -b main 2>/dev/null || git switch -q main
mkdir -p raw/articles; echo seed > raw/articles/.gitkeep
git add -A; git commit -q -m init
git remote add origin "$bare" 2>/dev/null || git remote set-url origin "$bare"
git push -q -u origin main )
export GENOME_PUSH_URL="$bare" # test seam -> push to the local bare repo
}
files() { ( cd "$vault" && git ls-files raw/ ) > "${BATS_TEST_TMPDIR}/f.txt"; }
@test "raw-commit: holds a freshly-written raw, commits it once it settles" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
echo "still typing" > "$vault/raw/articles/hot.md" # fresh -> hot
echo "finished" > "$vault/raw/articles/stable.md"
touch -d "10 minutes ago" "$vault/raw/articles/stable.md" # settled
run bash "$SCRIPT" "$g"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.status=="ok"'
files
grep -q 'raw/articles/stable.md' "${BATS_TEST_TMPDIR}/f.txt" # committed
! grep -q 'raw/articles/hot.md' "${BATS_TEST_TMPDIR}/f.txt" # held back
touch -d "10 minutes ago" "$vault/raw/articles/hot.md" # now it settles
run bash "$SCRIPT" "$g"
[ "$status" -eq 0 ]
files
grep -q 'raw/articles/hot.md' "${BATS_TEST_TMPDIR}/f.txt" # now committed
}
@test "raw-commit: noop with held count while everything is still settling" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
echo "typing" > "$vault/raw/articles/wip.md" # fresh -> hot
run bash "$SCRIPT" "$g"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.status=="noop"'
echo "$output" | jq -e '.held==1'
}
@test "raw-commit: a deletion is committed immediately (not subject to the quiet window)" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
# commit a settled file first
echo done > "$vault/raw/articles/old.md"; touch -d "10 minutes ago" "$vault/raw/articles/old.md"
run bash "$SCRIPT" "$g"; [ "$status" -eq 0 ]
files; grep -q 'raw/articles/old.md' "${BATS_TEST_TMPDIR}/f.txt"
# now delete it -> should commit the removal even though "just changed"
rm "$vault/raw/articles/old.md"
run bash "$SCRIPT" "$g"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.status=="ok"'
files; ! grep -q 'raw/articles/old.md' "${BATS_TEST_TMPDIR}/f.txt"
}

View file

@ -1,211 +0,0 @@
#!/usr/bin/env bats
# tests/run-ingest.bats — end-to-end orchestrator test (no LLM, no network).
# Simulates pi's output (a source page + manifest) and runs the mechanical pass.
load helpers
@test "run-ingest: DRY_RUN end-to-end updates index + log and opens a dry PR" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
G="$(make_fixture_genome)"; cd "$G"
# --- simulate the semantic pass that pi would have done ---
cat > wiki/sources/test-source.md <<'EOF'
---
title: "Test Source"
type: source
domain: genome-test
tags: [t]
maturity: draft
last_updated: 2026-06-03
private: false
---
body
EOF
cat > .ingest-manifest.json <<'EOF'
{
"raw_source": "raw/articles/test.md",
"model": "qwen3.5-9b",
"reasoning": "Ingested the test source.",
"pr_summary": "Ingest of test: 1 source page.",
"contradictions": "None",
"pages": [
{"path": "wiki/sources/test-source.md", "summary": "A smoke-test source.", "maturity": "draft", "status": "created"}
]
}
EOF
export KG_LIB_DIR="$LIB_DIR"
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER="u" FORGEJO_TOKEN="t"
export DRY_RUN=1
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
[ "$status" -eq 0 ]
[[ "$output" == *'"status":"ok"'* ]]
[[ "$output" == *'"lint_clean":true'* ]]
[[ "$output" == *'"conflict":false'* ]]
# side effects on the working tree
grep -q 'sources/test-source' wiki/index.md
grep -q 'INGEST | test' wiki/log.md
git rev-parse --verify feat/ai-ingest-test
}
@test "run-ingest: a conflict page is labelled and lands in the Conflicts section" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
G="$(make_fixture_genome)"; cd "$G"
cat > wiki/queries/conflict-pricing-2026-06-03.md <<'EOF'
---
title: "Conflict: pricing"
type: conflict
domain: genome-test
maturity: draft
last_updated: 2026-06-03
private: false
---
conflict body
EOF
cat > .ingest-manifest.json <<'EOF'
{
"raw_source": "raw/articles/test.md",
"model": "m",
"reasoning": "Flagged a contradiction.",
"pr_summary": "Conflict on pricing.",
"contradictions": "1 conflict file created — pricing",
"pages": [
{"path": "wiki/queries/conflict-pricing-2026-06-03.md", "summary": "ignored", "maturity": "draft", "status": "created"}
]
}
EOF
export KG_LIB_DIR="$LIB_DIR"
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER="u" FORGEJO_TOKEN="t"
export DRY_RUN=1
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
[ "$status" -eq 0 ]
[[ "$output" == *'"conflict":true'* ]]
# listed by slug under the Conflicts section
grep -q 'queries/conflict-pricing-2026-06-03' wiki/index.md
}
@test "run-ingest: records INGEST_MODEL in the log (manifest carries no model field)" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
G="$(make_fixture_genome)"; cd "$G"
cat > wiki/sources/test-source.md <<'EOF'
---
title: "Test Source"
type: source
domain: genome-test
tags: [t]
maturity: draft
last_updated: 2026-06-04
private: false
---
body
EOF
# New contract: NO "model" field — the orchestrator supplies it via INGEST_MODEL.
cat > .ingest-manifest.json <<'EOF'
{
"raw_source": "raw/articles/test.md",
"reasoning": "Ingested the test source.",
"pr_summary": "Ingest of test: 1 source page.",
"contradictions": "None",
"pages": [
{"path": "wiki/sources/test-source.md", "summary": "A smoke-test source.", "maturity": "draft", "status": "created"}
]
}
EOF
export KG_LIB_DIR="$LIB_DIR"
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER="u" FORGEJO_TOKEN="t" DRY_RUN=1
export INGEST_MODEL="qwen-test-tag"
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
[ "$status" -eq 0 ]
[[ "$output" == *'"status":"ok"'* ]]
grep -q 'qwen-test-tag' wiki/log.md
}
@test "run-ingest: rejects a manifest path that escapes wiki/ (traversal)" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
G="$(make_fixture_genome)"; cd "$G"
cat > .ingest-manifest.json <<'EOF'
{ "raw_source":"raw/articles/test.md","reasoning":"r","pr_summary":"s","contradictions":"None",
"pages":[{"path":"wiki/../etc/passwd","summary":"x","maturity":"draft","status":"created"}] }
EOF
export KG_LIB_DIR="$LIB_DIR" FORGEJO_URL=http://x FORGEJO_USER=u FORGEJO_TOKEN=t DRY_RUN=1
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
[ "$status" -ne 0 ]
[[ "$output" == *'"status":"error"'* ]]
}
@test "run-ingest: honours INGEST_BASE for the PR base" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
G="$(make_fixture_genome)"; cd "$G"
cat > wiki/sources/test-source.md <<'EOF'
---
title: "Test Source"
type: source
domain: genome-test
tags: [t]
maturity: draft
last_updated: 2026-06-04
private: false
---
body
EOF
cat > .ingest-manifest.json <<'EOF'
{ "raw_source":"raw/articles/test.md","reasoning":"r","pr_summary":"s","contradictions":"None",
"pages":[{"path":"wiki/sources/test-source.md","summary":"s","maturity":"draft","status":"created"}] }
EOF
export KG_LIB_DIR="$LIB_DIR" FORGEJO_URL=http://x FORGEJO_USER=u FORGEJO_TOKEN=t DRY_RUN=1
export INGEST_BASE="develop"
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
[ "$status" -eq 0 ]
[[ "$output" == *"develop"* ]]
}
@test "run-ingest: branch name matches slug.sh --raw for nested raw paths" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
G="$(make_fixture_genome)"; cd "$G"
mkdir -p wiki/sources
cat > wiki/sources/cibo-il-pane.md <<'EOFMD'
---
title: "Il Pane"
type: source
domain: genome-test
tags: [cibo]
maturity: draft
last_updated: 2026-06-25
private: false
source_path: raw/articles/cibo/il-pane.md
source_sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
---
# Il Pane
body
EOFMD
cat > .ingest-manifest.json <<'EOFJSON'
{
"raw_source": "raw/articles/cibo/il-pane.md",
"model": "qwen3.5-9b",
"reasoning": "Ingest.",
"pr_summary": "Ingest summary.",
"contradictions": "None",
"pages": [
{"path": "wiki/sources/cibo-il-pane.md", "summary": "Summary.", "maturity": "draft", "status": "created"}
]
}
EOFJSON
export KG_LIB_DIR="$LIB_DIR"
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER="u" FORGEJO_TOKEN="t" DRY_RUN=1
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
[ "$status" -eq 0 ]
[[ "$output" == *"cibo-il-pane"* ]]
}

View file

@ -1,68 +0,0 @@
#!/usr/bin/env bats
# tests/run-prune.bats — prune orphaned sources (no LLM, no network; DRY_RUN).
setup() {
load 'helpers'
export PRUNE="${SKILL_SCRIPTS}/run-prune.sh"
export GENOMES_ROOT="${BATS_TEST_TMPDIR}"
export INGEST_BASE="main"
export KG_LIB_DIR="${LIB_DIR}"
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER="u" FORGEJO_TOKEN="t"
export DRY_RUN=1
g_src="$(make_fixture_genome)"; export g_name="fixture-genome"
mv "$g_src" "${GENOMES_ROOT}/${g_name}"; export g="${GENOMES_ROOT}/${g_name}"
( cd "$g" && rm -f raw/articles/test.md && git add -A && git commit -q -m clear && git push -q )
}
@test "run-prune: removes only the orphaned source + its index entry, opens a dry PR" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
cd "$g"
# kept: raw exists. orphan: raw missing.
echo content > raw/articles/kept.md
h="$(sha256sum raw/articles/kept.md | cut -d' ' -f1)"
printf -- '---\nsource_path: raw/articles/kept.md\nsource_sha256: %s\n---\nbody\n' "$h" > wiki/sources/kept.md
printf -- '---\nsource_path: raw/articles/gone.md\nsource_sha256: abc\n---\nbody\n' > wiki/sources/orphan.md
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/kept]] — kept. `maturity: draft`'
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/orphan]] — orphan. `maturity: draft`'
git add -A && git commit -q -m setup && git push -q
run bash "$PRUNE" "$g_name"
[ "$status" -eq 0 ]
[[ "$output" == *'"status":"ok"'* ]]
[[ "$output" == *'"count":1'* ]]
# only the orphan page is gone
[ ! -f wiki/sources/orphan.md ]
[ -f wiki/sources/kept.md ]
# index reflects the removal
! grep -q 'sources/orphan' wiki/index.md
grep -q 'sources/kept' wiki/index.md
# committed on a chore/ branch (NOT feat/ai-ingest-*)
git rev-parse --verify "chore/prune-orphans-$(date +%F)"
}
@test "run-prune: no orphans -> count 0 and no PR/branch" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
cd "$g"
echo content > raw/articles/kept.md
h="$(sha256sum raw/articles/kept.md | cut -d' ' -f1)"
printf -- '---\nsource_path: raw/articles/kept.md\nsource_sha256: %s\n---\nbody\n' "$h" > wiki/sources/kept.md
git add -A && git commit -q -m setup && git push -q
run bash "$PRUNE" "$g_name"
[ "$status" -eq 0 ]
[[ "$output" == *'"count":0'* ]]
run git rev-parse --verify "chore/prune-orphans-$(date +%F)"
[ "$status" -ne 0 ]
}
@test "run-prune: refuses when an orphan path would escape wiki/ (defense in depth)" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
cd "$g"
# legacy page without source_path is ignored; a page with a missing raw is the orphan.
printf -- '---\nsource_path: raw/articles/gone.md\nsource_sha256: abc\n---\nbody\n' > wiki/sources/orphan.md
git add -A && git commit -q -m setup && git push -q
run bash "$PRUNE" "$g_name"
[ "$status" -eq 0 ]
[[ "$output" == *'"count":1'* ]]
[ ! -f wiki/sources/orphan.md ]
}

View file

@ -1,102 +0,0 @@
#!/usr/bin/env bats
# tests/scripts.bats — unit tests for the deterministic skill scripts.
load helpers
@test "slug: path with extension and spaces" {
run bash "$SKILL_SCRIPTS/slug.sh" "raw/articles/My Test Source.md"
[ "$status" -eq 0 ]
[ "$output" = "my-test-source" ]
}
@test "slug: punctuation and repeats collapse to single hyphens" {
run bash "$SKILL_SCRIPTS/slug.sh" "Qualche Concetto!! Strano"
[ "$output" = "qualche-concetto-strano" ]
}
@test "log-append: appends a well-formed INGEST entry with a run_id" {
G="$(make_fixture_genome)"; cd "$G"
run bash "$SKILL_SCRIPTS/log-append.sh" --type INGEST --subject foo --model m \
--context "[[raw/x]]" --output "[[sources/foo]]" --reasoning "why"
[ "$status" -eq 0 ]
grep -q "INGEST | foo" wiki/log.md
grep -q '^- run_id: `' wiki/log.md
grep -q '^- model: `m`' wiki/log.md
}
@test "log-append: rejects an invalid TYPE" {
G="$(make_fixture_genome)"; cd "$G"
run bash "$SKILL_SCRIPTS/log-append.sh" --type BOGUS --subject foo
[ "$status" -ne 0 ]
}
@test "index-append: inserts under the right section and keeps it sorted" {
G="$(make_fixture_genome)"; cd "$G"
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/zzz]] — z. `maturity: draft`'
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/aaa]] — a. `maturity: draft`'
a=$(grep -n 'sources/aaa' wiki/index.md | cut -d: -f1)
z=$(grep -n 'sources/zzz' wiki/index.md | cut -d: -f1)
[ -n "$a" ] && [ -n "$z" ]
[ "$a" -lt "$z" ]
}
@test "index-append: bumps frontmatter last_updated to today" {
G="$(make_fixture_genome)"; cd "$G"
python3 "$SKILL_SCRIPTS/index-append.py" --section Concepts --entry '- [[concepts/x]] — x. `maturity: draft`'
grep -q "^last_updated: $(date +%F)$" wiki/index.md
}
@test "index-append: is idempotent for the same entry" {
G="$(make_fixture_genome)"; cd "$G"
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/dup]] — d. `maturity: draft`'
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/dup]] — d. `maturity: draft`'
[ "$(grep -c 'sources/dup' wiki/index.md)" -eq 1 ]
}
@test "index-append: updates an existing entry by wikilink path (no duplicate)" {
G="$(make_fixture_genome)"; cd "$G"
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/foo]] — old summary. `maturity: draft`'
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/foo]] — new summary. `maturity: stable`'
[ "$(grep -c 'sources/foo' wiki/index.md)" -eq 1 ]
grep -q 'new summary' wiki/index.md
! grep -q 'old summary' wiki/index.md
}
@test "slug: refuses an all-symbols input (no empty slug)" {
run bash "$SKILL_SCRIPTS/slug.sh" "!!!.md"
[ "$status" -ne 0 ]
[ -z "$output" ] || [[ "$output" != *"feat/ai-ingest-"* ]]
}
@test "index-append: self-heals a frontmatter missing last_updated" {
G="$(make_fixture_genome)"; cd "$G"
cat > wiki/index.md <<'EOF'
---
title: "Index"
type: index
domain: genome-test
maturity: stable
private: false
---
# Index
## Sources (`wiki/sources/`)
*x*
EOF
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/foo]] — s. `maturity: draft`'
grep -q "^last_updated: $(date +%F)$" wiki/index.md
}
@test "log-append: dedup on stable run_id prevents duplicate entries" {
G="$(make_fixture_genome)"; cd "$G"
stable_id="test-stable-run-id-001"
run bash "$SKILL_SCRIPTS/log-append.sh" --run-id "$stable_id" --type INGEST --subject "test" --model "m" \
--context "[[raw/x]]" --output "[[sources/x]]" --reasoning "r"
[ "$status" -eq 0 ]
run bash "$SKILL_SCRIPTS/log-append.sh" --run-id "$stable_id" --type INGEST --subject "test" --model "m" \
--context "[[raw/x]]" --output "[[sources/x]]" --reasoning "r"
[ "$status" -eq 0 ]
[[ "$output" == *"already present"* ]]
count="$(grep -cF "run_id: \`${stable_id}\`" wiki/log.md || true)"
[ "$count" -eq 1 ]
}

View file

@ -1,30 +0,0 @@
#!/usr/bin/env bats
setup() {
load 'helpers'
SLUG="${SKILL_SCRIPTS}/slug.sh"
}
@test "slug --raw: flat file remains unchanged" {
run bash "$SLUG" --raw "raw/articles/il-pane.md"
[ "$status" -eq 0 ]
[ "$output" = "il-pane" ]
}
@test "slug --raw: nested file gets folder prefix" {
run bash "$SLUG" --raw "raw/articles/cibo/il-pane.md"
[ "$status" -eq 0 ]
[ "$output" = "cibo-il-pane" ]
}
@test "slug --raw: distinct subdirs avoid collision" {
s1="$(bash "$SLUG" --raw "raw/articles/cibo/pane.md")"
s2="$(bash "$SLUG" --raw "raw/articles/storia/pane.md")"
[ "$s1" != "$s2" ]
}
@test "slug --raw: Bash and Python-calling-bash agree (single implementation)" {
b="$(bash "$SLUG" --raw "raw/articles/cibo/il-pane.md")"
p="$(python3 -c "import subprocess;print(subprocess.check_output(['bash','$SLUG','--raw','raw/articles/cibo/il-pane.md'],text=True).strip())")"
[ "$b" = "$p" ]
}

View file

@ -1,40 +0,0 @@
#!/usr/bin/env bats
# tests/structure.bats — canonical-structure verify/sync.
load helpers
setup() {
source "$LIB_DIR/output.sh"
source "$LIB_DIR/structure.sh"
}
@test "structure_report: a full fixture has no drift" {
G="$(make_fixture_genome)"
run structure_report "$G"
[ "$status" -eq 0 ]
}
@test "structure_report: flags a missing canonical dir" {
G="$(make_fixture_genome)"
rm -rf "$G/wiki/private"
run structure_report "$G"
[ "$status" -ne 0 ]
[[ "$output" == *"wiki/private"* ]]
}
@test "structure_report: notes an extra dir but does not fail on it" {
G="$(make_fixture_genome)"
mkdir -p "$G/wiki/experiments"
run structure_report "$G"
[ "$status" -eq 0 ]
[[ "$output" == *"experiments"* ]]
}
@test "structure_sync: creates missing dirs and is idempotent" {
G="$(make_fixture_genome)"
rm -rf "$G/wiki/private" "$G/raw/transcripts"
structure_sync "$G"
[ -d "$G/wiki/private" ] && [ -d "$G/raw/transcripts" ]
run structure_report "$G"
[ "$status" -eq 0 ]
structure_sync "$G" # second run: nothing to do
}