Compare commits

...
Sign in to create a new pull request.

79 commits

Author SHA1 Message Date
06a16f1e81 Merge branch 'release/1.13.0' into main 2026-07-02 17:55:42 +02:00
a1f521d43f Update version 2026-07-02 17:55:32 +02:00
3c29d36656 feat(n8n): Harden all genome workflow JS nodes with defensive coding 2026-07-02 12:54:14 +02:00
0267661de8 Merge branch 'release/1.12.0' into main 2026-07-02 10:17:12 +02:00
15f6d7f4a7 Merge branch 'release/1.12.0' into develop 2026-07-02 10:17:12 +02:00
12fc807147 Update version 2026-07-02 10:17:00 +02:00
5cfdce66b8 Merge branch 'feature/quiet-window-rolling-PR' into develop 2026-07-02 10:16:15 +02:00
f27704f054 test(open-pr): add test for rolling PR branch updates 2026-07-02 10:16:02 +02:00
6c4468cc12 test(raw-commit): add tests for quiet window behavior 2026-07-02 10:16:02 +02:00
eed2251c28 feat(raw-commit): implement raw file quiet window for ingest 2026-07-02 10:16:02 +02:00
bf657c3708 feat(raw-commit): introduce GENOME_PUSH_URL test seam 2026-07-02 10:16:02 +02:00
601277fce4 Merge branch 'release/1.11.1' into main 2026-07-02 10:15:45 +02:00
4f9fbdec8b Merge branch 'release/1.11.1' into develop 2026-07-02 10:15:45 +02:00
fd5c07043c Update version 2026-07-02 10:15:32 +02:00
c4aba8507c feat(n8n): add workflow for genome content pruning on raw/ file deletion 2026-07-01 19:53:57 +02:00
1c6d7a4ecd feat(n8n): add 'pi prune' command to n8n-pi-wrap 2026-07-01 19:37:14 +02:00
066db00e89 test(ingest): add tests for run-prune.sh 2026-07-01 19:37:14 +02:00
c0659d5ce9 feat(ingest): introduce run-prune.sh for orphaned source removal 2026-07-01 19:37:14 +02:00
101eef98aa feat(infra): enhance open-pr.sh for rolling PRs and custom branches 2026-07-01 19:37:14 +02:00
8082bc3003 test(ingest): add tests for index-append.py --remove 2026-07-01 19:37:14 +02:00
990118de71 feat(ingest): allow index-append.py to remove entries 2026-07-01 19:37:14 +02:00
95b3866549 feat(ingest-semantic.py): Add pre-flight context window check 2026-07-01 18:24:25 +02:00
111ffd266a refactor(genome-ingest): Use run-one-ingest and improve filtering 2026-07-01 18:24:25 +02:00
88aa6a0798 refactor: Remove old raw-commit workflow 2026-07-01 18:24:25 +02:00
79c4f6dde2 feat: Implement PR review directive workflow 2026-07-01 18:24:25 +02:00
047330b384 feat: Introduce run-one-ingest sub-workflow 2026-07-01 18:24:25 +02:00
c8b45d537c feat: Add global n8n error handling workflow 2026-07-01 18:24:25 +02:00
799cc1f2e0 Merge branch 'release/1.11.0' into main 2026-06-30 10:33:14 +02:00
e57b811956 Merge branch 'release/1.11.0' into develop 2026-06-30 10:33:14 +02:00
02c3a39101 Update version 2026-06-30 10:33:01 +02:00
19529531cf feat(ingest): Implement 'pi ingest-rework' command 2026-06-30 10:32:18 +02:00
c5f113de52 feat(ingest): Add raw source marker to PR description 2026-06-30 10:32:18 +02:00
06e877ec1d feat(ingest): Allow semantic ingest to accept feedback 2026-06-30 10:32:18 +02:00
3360388f00 doc: Add PR review guidelines to template 2026-06-30 10:32:18 +02:00
5fb2baedef trivial: Fix indentation in n8n-pi-wrap service script 2026-06-30 10:32:18 +02:00
bab4e987c3 Merge branch 'release/1.10.0' into main 2026-06-27 17:20:40 +02:00
896e2adfa4 Merge branch 'release/1.10.0' into develop 2026-06-27 17:20:40 +02:00
8d0e1b91cc Update version 2026-06-27 17:20:20 +02:00
f962a7fb13 fix: Implement retry logic for Ollama model API calls 2026-06-27 17:18:32 +02:00
3c9b24c3b2 feature: Make ingest log entries idempotent with stable run_id 2026-06-27 17:18:22 +02:00
13d08866ef feature: Add pi orphan-wiki command to detect unlinked pages 2026-06-27 17:18:15 +02:00
31340f9d19 refactor: Integrate shared clean_start into ingest preparation 2026-06-27 17:18:09 +02:00
e0a39d8a15 refactor: Extract git clean start logic into shared library 2026-06-27 17:18:02 +02:00
502fdcc3bd Merge branch 'release/1.9.1' into main 2026-06-27 14:44:07 +02:00
aaec7002d7 Merge branch 'release/1.9.1' into develop 2026-06-27 14:44:07 +02:00
bea1b70af4 Update version 2026-06-27 14:43:57 +02:00
f74f755d87 Add Python cache files to .gitignore 2026-06-27 14:42:56 +02:00
cdee98d7fa Merge branch 'release/1.9.0' into main 2026-06-27 12:17:16 +02:00
4349b7f2a2 Merge branch 'release/1.9.0' into develop 2026-06-27 12:17:16 +02:00
1b19a03971 Update version 2026-06-27 12:17:03 +02:00
32c722a6ae chore: Integrate pi pending-raw command into n8n-pi-wrap 2026-06-27 12:16:05 +02:00
918d632b41 feat: Implement pending-raw.sh to identify changed sources 2026-06-27 12:16:05 +02:00
0ff98e1ebd feat: Enhance ingest to track raw source path and SHA256 hash 2026-06-27 12:16:05 +02:00
e62ad0c831 feat: Add slug.sh --raw for deterministic raw file slugging 2026-06-27 12:16:05 +02:00
eeb2c6d48a Merge branch 'release/1.8.1' into main 2026-06-25 17:38:05 +02:00
64125d91b4 Merge branch 'release/1.8.1' into develop 2026-06-25 17:38:05 +02:00
5fb6a09a96 Update version 2026-06-25 17:37:44 +02:00
e33f4653f1 test: improve hermetic git repository setup in helpers 2026-06-25 17:36:56 +02:00
79f0ef9ac6 refactor(tests): improve formatting of private wiki instructions 2026-06-25 17:36:56 +02:00
69c189955b Merge branch 'release/1.8.0' into main 2026-06-25 13:09:21 +02:00
efc86e11a4 Merge branch 'release/1.8.0' into develop 2026-06-25 13:09:21 +02:00
1cb3da41c3 Update version 2026-06-25 13:09:10 +02:00
940eb49a9e refactor: Standardize n8n workflow file naming to kebab-case 2026-06-25 13:08:29 +02:00
13b6d47574 feat(n8n): Add scheduled Genome raw file committer 2026-06-25 12:59:34 +02:00
2e557ad48f feat(n8n): Implement automated Genome ingest workflow 2026-06-25 12:59:34 +02:00
4462d18866 feat(n8n): Add manual Genome ingest workflow (scratch) 2026-06-25 12:59:34 +02:00
c77a2b02b9 deploy: nexus: Add Syncthing folder marker for raw vault 2026-06-25 12:55:45 +02:00
5339a7018b Merge branch 'release/1.7.0' into main 2026-06-21 19:25:20 +02:00
bcfb618869 Merge branch 'release/1.7.0' into develop 2026-06-21 19:25:20 +02:00
52db07c9b1 Update version 2026-06-21 19:25:08 +02:00
23ede343df deploy/n8n: Improve robustness of diff base resolution for ingestion 2026-06-21 19:24:28 +02:00
a3de9f673f deploy/nexus: Refactor raw commit push strategy for robustness 2026-06-21 19:24:28 +02:00
813961544e Merge branch 'release/1.6.3' into main 2026-06-21 15:53:17 +02:00
d26fdc857a Merge branch 'release/1.6.3' into develop 2026-06-21 15:53:17 +02:00
0a98e5f2ba Update version 2026-06-21 15:53:04 +02:00
9ec3d9d785 feat: Add 'pi changed-raw' command to list changed raw files 2026-06-21 15:52:22 +02:00
0912ef457a Merge branch 'release/1.6.2' into main 2026-06-21 15:35:29 +02:00
05d7b99807 Merge branch 'release/1.6.2' into develop 2026-06-21 15:35:29 +02:00
d37c2d51c2 Merge branch 'release/1.6.1' into main 2026-06-21 14:34:30 +02:00
35 changed files with 3234 additions and 155 deletions

2
.gitignore vendored
View file

@ -8,3 +8,5 @@
/master-knowledge-genome/ /master-knowledge-genome/
/keys/ /keys/
*.key *.key
__pycache__/
*.pyc

View file

@ -1,5 +1,5 @@
# ============================================================================= # =============================================================================
# Knowledge Genome - Makefile v. 1.6.2 # Knowledge Genome - Makefile v. 1.13.0
# Orchestrates the setup and management of the knowledge base. # Orchestrates the setup and management of the knowledge base.
# ============================================================================= # =============================================================================

View file

@ -0,0 +1,773 @@
{
"name": "Genome: PR review",
"nodes": [
{
"parameters": {
"httpMethod": "POST",
"path": "forgejo-pr-review-23319ab8687b16f10e0f278fb920c112",
"options": {}
},
"id": "58df1ca9-e48e-4834-b231-d97c974cd01b",
"name": "Webhook PR Review",
"type": "n8n-nodes-base.webhook",
"typeVersion": 2.1,
"position": [
2272,
1344
],
"webhookId": "61ff3a5baa304571"
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// THE only parser of the review side: parse the directive, VALIDATE, prepare the rework payload.\n// Security: only allow-listed maintainers may drive the gate; destructive directives require a\n// feat/ai-ingest-* branch on the expected base; raw_source is recovered from a machine-readable\n// marker that run-ingest.sh writes into the PR body.\nconst ALLOWED_SENDERS = ['Keru']; // <-- maintainers allowed to issue directives\nconst BASE = 'develop';\n\n// n8n Run Once for Each Item: $json is the current webhook payload\nconst j = $json.body || $json;\nif (!j || typeof j !== 'object') {\n return { directive: 'INVALID', reason: 'malformed webhook payload' };\n}\n\nconst review = j.review || null;\nconst comment = j.comment || null;\nconst pr = j.pull_request || j.issue || null;\n\n// Extract directive text from review content or comment body\nconst body = String(\n (review && review.content) ||\n (comment && comment.body) ||\n ''\n);\nconst sender = String((j.sender && j.sender.login) || 'unknown');\n\n// Match directive at the start of the text (case-insensitive)\nconst m = body.match(/^\\s*(REWORK|RESTART|REVERT\\s+\\d+|SPLIT|REJECT|MERGE)\\s*:?/i);\nif (!m) return { directive: 'NONE' };\n\nconst headTok = m[1].toUpperCase().replace(/\\s+/g, ' ');\nconst directive = headTok.startsWith('REVERT') ? 'REVERT' : headTok;\nconst feedback = body.slice(m[0].length).trim() || '(nessun dettaglio fornito)';\n\n// Extract PR metadata safely\nconst prNumber = (pr && pr.number) || null;\nconst branch = (pr && pr.head && pr.head.ref) || null;\nconst base = (pr && pr.base && pr.base.ref) || null;\nconst repo = (pr && pr.base && pr.base.repo && pr.base.repo.name) ||\n (j.repository && j.repository.name) || null;\nconst owner = (pr && pr.base && pr.base.repo && pr.base.repo.owner && pr.base.repo.owner.login) ||\n (j.repository && j.repository.owner && j.repository.owner.login) || null;\nconst prBody = (pr && pr.body) || (j.issue && j.issue.body) || '';\n\n// Recover raw_source from machine-readable marker: <!-- kg:raw=path -->\n// Restricted to valid path characters, no spaces, no HTML breaking\nconst rawMatch = prBody.match(/<!--\\s*kg:raw=([^\\s>]+)\\s*-->/);\nconst raw = rawMatch ? rawMatch[1] : null;\n\n// REVERT is reserved for future Step 7 implementation\nif (directive === 'REVERT') {\n return { directive: 'NONE', note: 'REVERT reserved for Step 7' };\n}\n\n// Authorization gate\nif (!ALLOWED_SENDERS.includes(sender)) {\n return {\n directive: 'UNAUTHORIZED',\n attempted: directive,\n sender,\n prNumber,\n owner,\n repo\n };\n}\n\n// Validation rules\nconst okGenome = !!repo && /^[a-z0-9][a-z0-9-]{0,63}$/.test(repo);\nconst okPr = !!prNumber && /^[0-9]+$/.test(String(prNumber));\nconst okBranch = !!branch && /^feat\\/ai-ingest-[a-z0-9-]+$/.test(branch);\nconst okBase = base === BASE;\nconst okRaw = (directive === 'MERGE')\n ? true\n : (!!raw && raw.startsWith('raw/') && !raw.includes('..') && /^[A-Za-z0-9._\\/-]+$/.test(raw));\n\nif (!okGenome || !okPr || !okBase || (directive !== 'MERGE' && !okBranch) || !okRaw) {\n return {\n directive: 'INVALID',\n attempted: directive,\n prNumber,\n owner,\n repo,\n why: { okGenome, okPr, okBranch, okBase, okRaw }\n };\n}\n\n// Encode feedback for safe transport through SSH/scripts\nconst feedback_b64 = Buffer.from(feedback, 'utf8').toString('base64');\n\nreturn {\n directive,\n prNumber,\n branch,\n base,\n repo,\n owner,\n sender,\n raw,\n feedback,\n feedback_b64\n};"
},
"id": "c668f595-0a28-4bd3-9125-22fee9350d78",
"name": "Parse & validate",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
2496,
1344
]
},
{
"parameters": {
"rules": {
"values": [
{
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"leftValue": "={{ $json.directive }}",
"rightValue": "MERGE",
"operator": {
"type": "string",
"operation": "equals"
},
"id": "4960f0868bc54687"
}
],
"combinator": "and"
}
},
{
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"leftValue": "={{ $json.directive }}",
"rightValue": "REWORK",
"operator": {
"type": "string",
"operation": "equals"
},
"id": "34002fdd92834d38"
}
],
"combinator": "and"
}
},
{
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"leftValue": "={{ $json.directive }}",
"rightValue": "RESTART",
"operator": {
"type": "string",
"operation": "equals"
},
"id": "d412a74e32ac4f0c"
}
],
"combinator": "and"
}
},
{
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"leftValue": "={{ $json.directive }}",
"rightValue": "SPLIT",
"operator": {
"type": "string",
"operation": "equals"
},
"id": "c0810b33fa474ca0"
}
],
"combinator": "and"
}
},
{
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"leftValue": "={{ $json.directive }}",
"rightValue": "REJECT",
"operator": {
"type": "string",
"operation": "equals"
},
"id": "531039e699c44cea"
}
],
"combinator": "and"
}
},
{
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"leftValue": "={{ $json.directive }}",
"rightValue": "UNAUTHORIZED",
"operator": {
"type": "string",
"operation": "equals"
},
"id": "cfbd691d2e9a4c2a"
}
],
"combinator": "and"
}
},
{
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"leftValue": "={{ $json.directive }}",
"rightValue": "INVALID",
"operator": {
"type": "string",
"operation": "equals"
},
"id": "251f5b7beea6424a"
}
],
"combinator": "and"
}
}
]
},
"options": {
"fallbackOutput": "none"
}
},
"id": "489736cc-bab6-4664-8087-91b6d9ff31ad",
"name": "Switch",
"type": "n8n-nodes-base.switch",
"typeVersion": 3.4,
"position": [
2736,
1344
]
},
{
"parameters": {
"method": "POST",
"url": "=https://git.keruhomelab.com/api/v1/repos/{{ $('Parse & validate').first().json.owner }}/{{ $('Parse & validate').first().json.repo }}/pulls/{{ $('Parse & validate').first().json.prNumber }}/merge",
"authentication": "genericCredentialType",
"genericAuthType": "httpHeaderAuth",
"sendBody": true,
"specifyBody": "json",
"jsonBody": "={\n \"Do\": \"merge\"\n}",
"options": {
"timeout": 15000
}
},
"id": "3440cb8d-ae4c-4523-ae13-ee5667d24252",
"name": "Forgejo Merge PR",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
2976,
1104
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
}
}
},
{
"parameters": {
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "loose",
"version": 2
},
"conditions": [
{
"id": "cc369b5fc3d246a4",
"leftValue": "={{ $('Parse & validate').first().json.branch }}",
"rightValue": "feat/ai-ingest-",
"operator": {
"type": "string",
"operation": "startsWith"
}
}
],
"combinator": "and"
},
"options": {}
},
"id": "e6d45fce-83d0-44ca-9fa4-86558fec1a0f",
"name": "Guardia feat/",
"type": "n8n-nodes-base.if",
"typeVersion": 2.2,
"position": [
2976,
1328
]
},
{
"parameters": {
"method": "PATCH",
"url": "=https://git.keruhomelab.com/api/v1/repos/{{ $('Parse & validate').first().json.owner }}/{{ $('Parse & validate').first().json.repo }}/pulls/{{ $('Parse & validate').first().json.prNumber }}",
"authentication": "genericCredentialType",
"genericAuthType": "httpHeaderAuth",
"sendBody": true,
"specifyBody": "json",
"jsonBody": "={\n \"state\": \"closed\"\n}",
"options": {
"timeout": 15000
}
},
"id": "1601f705-c758-4df6-a3bd-e3ac2e202c94",
"name": "Forgejo Close PR",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
3200,
1296
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
}
}
},
{
"parameters": {
"method": "DELETE",
"url": "=https://git.keruhomelab.com/api/v1/repos/{{ $('Parse & validate').first().json.owner }}/{{ $('Parse & validate').first().json.repo }}/branches/{{ encodeURIComponent($('Parse & validate').first().json.branch) }}",
"authentication": "genericCredentialType",
"genericAuthType": "httpHeaderAuth",
"options": {
"timeout": 15000
}
},
"id": "c2ff2247-efe1-4809-a435-9973188d61bb",
"name": "Forgejo Delete Branch",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
3424,
1296
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
}
}
},
{
"parameters": {
"conditions": {
"options": {
"caseSensitive": true,
"leftValue": "",
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"id": "55cf6c2a6c7d4d79",
"leftValue": "={{ $('Parse & validate').first().json.directive }}",
"rightValue": "REJECT",
"operator": {
"type": "string",
"operation": "equals"
}
}
],
"combinator": "and"
},
"options": {}
},
"id": "a1dbbc06-555d-4a1d-8fbf-ee75f617e98a",
"name": "E' REJECT?",
"type": "n8n-nodes-base.if",
"typeVersion": 2.2,
"position": [
3648,
1296
]
},
{
"parameters": {
"workflowId": {
"__rl": true,
"value": "zbtRXWsLt56nEIfz",
"mode": "list",
"cachedResultUrl": "/workflow/zbtRXWsLt56nEIfz",
"cachedResultName": "Power Manager"
},
"workflowInputs": {
"mappingMode": "defineBelow",
"value": {
"mode": "ensure-on"
},
"matchingColumns": [
"mode"
],
"schema": [
{
"id": "mode",
"displayName": "mode",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
}
],
"attemptToConvertTypes": false,
"convertFieldsToString": true
},
"options": {}
},
"id": "7fc3e648-4712-4eef-a6f3-12c8805ade1f",
"name": "Power Manager - ensure-on",
"type": "n8n-nodes-base.executeWorkflow",
"typeVersion": 1.3,
"position": [
3648,
1168
]
},
{
"parameters": {
"workflowId": {
"__rl": true,
"value": "VIi2ovb5gJxNJLbg",
"mode": "list",
"cachedResultUrl": "/workflow/VIi2ovb5gJxNJLbg",
"cachedResultName": "Genome: run-one-ingest"
},
"workflowInputs": {
"mappingMode": "defineBelow",
"value": {
"genome": "={{ $('Parse & validate').first().json.repo }}",
"raw": "={{ $('Parse & validate').first().json.raw }}",
"mode": "rework",
"feedback_b64": "={{ $('Parse & validate').first().json.feedback_b64 }}",
"reason": "={{ $('Parse & validate').first().json.directive }}",
"prevPr": "={{ String($('Parse & validate').first().json.prNumber || '') }}"
},
"matchingColumns": [],
"schema": [
{
"id": "genome",
"displayName": "genome",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "raw",
"displayName": "raw",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "mode",
"displayName": "mode",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "feedback_b64",
"displayName": "feedback_b64",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "reason",
"displayName": "reason",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "prevPr",
"displayName": "prevPr",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
}
],
"attemptToConvertTypes": false,
"convertFieldsToString": true
},
"options": {
"waitForSubWorkflow": false
}
},
"id": "9704c050-5c63-49fd-a26d-efbae9d92175",
"name": "Run one ingest (rework)",
"type": "n8n-nodes-base.executeWorkflow",
"typeVersion": 1.3,
"position": [
3856,
1168
]
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// merged (MERGE) / closed (REJECT). The HTTP node replaced $json with the API response,\n// so we read context from the parser via node reference (single review -> .first() is safe).\n// Fallback values prevent crashes if the parser node is unreachable.\nconst p = $('Parse & validate').first().json || {};\nconst repo = p.repo || 'unknown';\nconst owner = p.owner || 'unknown';\nconst prNumber = p.prNumber || '?';\nconst base = p.base || 'develop';\nconst branch = p.branch || 'unknown';\nconst sender = p.sender || 'unknown';\nconst directive = p.directive || 'UNKNOWN';\nconst feedback = p.feedback || '';\n\nconst repoUrl = (owner && repo && repo !== 'unknown')\n ? `https://git.keruhomelab.com/${owner}/${repo}`\n : '';\nconst prUrl = (repoUrl && prNumber !== '?')\n ? `${repoUrl}/pulls/${prNumber}`\n : '';\n\nlet n;\nif (directive === 'MERGE') {\n n = {\n topic: 'genome-ingest',\n title: `${repo} · PR #${prNumber} mergiata`,\n priority: 'default',\n tags: 'twisted_rightwards_arrows',\n click: prUrl,\n actions: `view, Vedi la PR, ${prUrl}`,\n body: `PR #${prNumber} mergiata su \\`${base}\\` da **${sender}**.`\n };\n} else {\n n = {\n topic: 'genome-ingest',\n title: `${repo} · PR #${prNumber} chiusa`,\n priority: 'default',\n tags: 'wastebasket',\n click: repoUrl,\n actions: '',\n body: `**REJECT** di **${sender}**: PR #${prNumber} chiusa e branch \\`${branch}\\` rimosso. Nessun nuovo tentativo.\\n> ${feedback}`\n };\n}\n\nreturn n;"
},
"id": "1ce634fd-d402-4a84-9ba1-04673ddffce9",
"name": "Build ntfy action",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
3856,
1344
]
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// Security / near-miss: unauthorized sender, invalid directive, or the feat/ guard.\n// On all three paths Switch/Guardia pass the parser output through, so $json carries the directive + context.\nconst d = $json || {};\nconst directive = d.directive || 'UNKNOWN';\nconst attempted = d.attempted || directive;\nconst sender = d.sender || 'unknown';\nconst prNumber = d.prNumber || '?';\nconst branch = d.branch || 'unknown';\nconst owner = d.owner || '';\nconst repo = d.repo || '';\n\nconst repoUrl = (owner && repo) ? `https://git.keruhomelab.com/${owner}/${repo}` : '';\n\nlet n;\nif (directive === 'UNAUTHORIZED') {\n n = {\n topic: 'genome-ingest',\n title: `Sicurezza · direttiva non autorizzata`,\n priority: 'high',\n tags: 'no_entry',\n click: repoUrl,\n actions: '',\n body: `**${sender}** ha tentato \\`${attempted}\\` su PR #${prNumber}, ma non è tra i maintainer autorizzati. **Nessuna azione** eseguita.`\n };\n} else if (directive === 'INVALID') {\n n = {\n topic: 'genome-ingest',\n title: `Direttiva non applicata`,\n priority: 'low',\n tags: 'information_source',\n click: repoUrl,\n actions: '',\n body: `\\`${attempted}\\` su PR #${prNumber} ignorata: precondizioni non soddisfatte (branch / base / marker raw).`\n };\n} else {\n // Guardia feat/ false branch: destructive action on a non-feat/ai-ingest-* branch\n n = {\n topic: 'genome-ingest',\n title: `Sicurezza · branch protetto`,\n priority: 'high',\n tags: 'no_entry',\n click: repoUrl,\n actions: '',\n body: `Rifiutata azione distruttiva (\\`${attempted || directive}\\`) sul branch \\`${branch}\\`: non è un \\`feat/ai-ingest-*\\`. **Nessuna modifica.**`\n };\n}\n\nreturn n;"
},
"id": "32b16592-5126-4cc2-a3f2-d1bda58ac724",
"name": "Build ntfy sicurezza",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
3200,
1536
]
},
{
"parameters": {
"method": "POST",
"url": "=http://ntfy/{{ $json.topic }}",
"authentication": "genericCredentialType",
"genericAuthType": "httpBearerAuth",
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Title",
"value": "={{ $json.title }}"
},
{
"name": "Priority",
"value": "={{ $json.priority }}"
},
{
"name": "Tags",
"value": "={{ $json.tags }}"
},
{
"name": "Click",
"value": "={{ $json.click }}"
},
{
"name": "Actions",
"value": "={{ $json.actions }}"
},
{
"name": "Markdown",
"value": "yes"
}
]
},
"sendBody": true,
"contentType": "raw",
"rawContentType": "Raw / Text",
"body": "={{ $json.body }}",
"options": {
"timeout": 15000
}
},
"id": "4d45b486-de42-4c7f-be21-b5bfbc05fd44",
"name": "ntfy: send",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
4080,
1424
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
},
"httpBearerAuth": {
"id": "nCv4CUN7Ef086Ewj",
"name": "Bearer Auth account"
}
}
}
],
"pinData": {},
"connections": {
"Webhook PR Review": {
"main": [
[
{
"node": "Parse & validate",
"type": "main",
"index": 0
}
]
]
},
"Parse & validate": {
"main": [
[
{
"node": "Switch",
"type": "main",
"index": 0
}
]
]
},
"Switch": {
"main": [
[
{
"node": "Forgejo Merge PR",
"type": "main",
"index": 0
}
],
[
{
"node": "Power Manager - ensure-on",
"type": "main",
"index": 0
}
],
[
{
"node": "Guardia feat/",
"type": "main",
"index": 0
}
],
[
{
"node": "Guardia feat/",
"type": "main",
"index": 0
}
],
[
{
"node": "Guardia feat/",
"type": "main",
"index": 0
}
],
[
{
"node": "Build ntfy sicurezza",
"type": "main",
"index": 0
}
],
[
{
"node": "Build ntfy sicurezza",
"type": "main",
"index": 0
}
]
]
},
"Forgejo Merge PR": {
"main": [
[
{
"node": "Build ntfy action",
"type": "main",
"index": 0
}
]
]
},
"Guardia feat/": {
"main": [
[
{
"node": "Forgejo Close PR",
"type": "main",
"index": 0
}
],
[
{
"node": "Build ntfy sicurezza",
"type": "main",
"index": 0
}
]
]
},
"Forgejo Close PR": {
"main": [
[
{
"node": "Forgejo Delete Branch",
"type": "main",
"index": 0
}
]
]
},
"Forgejo Delete Branch": {
"main": [
[
{
"node": "E' REJECT?",
"type": "main",
"index": 0
}
]
]
},
"E' REJECT?": {
"main": [
[
{
"node": "Build ntfy action",
"type": "main",
"index": 0
}
],
[
{
"node": "Power Manager - ensure-on",
"type": "main",
"index": 0
}
]
]
},
"Power Manager - ensure-on": {
"main": [
[
{
"node": "Run one ingest (rework)",
"type": "main",
"index": 0
}
]
]
},
"Build ntfy action": {
"main": [
[
{
"node": "ntfy: send",
"type": "main",
"index": 0
}
]
]
},
"Build ntfy sicurezza": {
"main": [
[
{
"node": "ntfy: send",
"type": "main",
"index": 0
}
]
]
}
},
"active": true,
"settings": {
"executionOrder": "v1",
"binaryMode": "separate",
"timeSavedMode": "fixed",
"errorWorkflow": "7Vws3gCX3QnjM3oD",
"callerPolicy": "workflowsFromSameOwner",
"availableInMCP": false
},
"versionId": "22998a54-cd9a-4b57-9c80-df97085a997c",
"meta": {
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
},
"id": "iho7kFQsXbGIxG7P",
"tags": []
}

View file

@ -0,0 +1,170 @@
{
"name": "Genome: ingest MANUALE (scratch)",
"nodes": [
{
"parameters": {},
"type": "n8n-nodes-base.manualTrigger",
"typeVersion": 1,
"position": [
0,
0
],
"id": "2101e704-6275-419d-9963-29a142e5811c",
"name": "Esegui manualmente"
},
{
"parameters": {
"authentication": "privateKey",
"command": "ssh vm101 'pi ingest genome-test raw/articles/il-grano-saraceno.md'"
},
"type": "n8n-nodes-base.ssh",
"typeVersion": 1,
"position": [
224,
0
],
"id": "8ade2def-2d53-4860-88a5-2ca734c6e54a",
"name": "SSH: pi ingest (manuale)",
"credentials": {
"sshPrivateKey": {
"id": "GJQjKzte7Hjdfz89",
"name": "n8n container -> n8n-runner@nexus"
}
}
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// ultima riga JSON di run-ingest.sh (ha 'run_id=' davanti)\nconst out = ($json.stdout || '').trim();\nconst line = out.split('\\n').filter(l => l.trim().startsWith('{')).pop();\nif (!line) return { status: 'error', reason: 'nessuna riga JSON run-ingest', raw: out };\ntry { return JSON.parse(line); } catch (e) { return { status: 'error', reason: 'JSON non parsabile', raw: line }; }"
},
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
448,
0
],
"id": "d84cdeaf-612a-454c-8b4d-31824ae6d71e",
"name": "Parse ingest"
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "const d=$json;let n;\nif (d.status==='ok'){\n n={title:`Ingest ${d.slug}: PR aperta`,priority:'default',tags:'inbox_tray',\n body:`\\u2705 ${d.slug}: PR aperta (lint ${d.lint_clean?'clean':'KO'}${d.conflict?', CONFLITTO':''})\\n\\n\\ud83d\\udd17 ${d.pr_url}`};\n} else if (d.status==='pr_failed'){\n n={title:`Ingest ${d.slug}: PR FALLITA`,priority:'high',tags:'warning',\n body:`\\u26a0\\ufe0f ${d.slug}: semantic/lint ok ma PR non aperta.\\n\\n${(d.detail||'').split('\\n')[0]}`};\n} else {\n n={title:'Ingest: ERRORE',priority:'high',tags:'rotating_light',\n body:`\\u274c ${d.reason||'errore'}\\n\\n${(d.raw||'').slice(0,300)}`};\n}\nreturn n;"
},
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
672,
0
],
"id": "eadd9275-b38c-416b-b15e-0999f70a05fb",
"name": "Build ntfy"
},
{
"parameters": {
"method": "POST",
"url": "http://ntfy/homelab-genome",
"authentication": "genericCredentialType",
"genericAuthType": "httpBearerAuth",
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Title",
"value": "={{ $json.title }}"
},
{
"name": "Priority",
"value": "={{ $json.priority }}"
},
{
"name": "Tags",
"value": "={{ $json.tags }}"
}
]
},
"sendBody": true,
"contentType": "raw",
"rawContentType": "Raw / Text",
"body": "={{ $json.body }}",
"options": {}
},
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
880,
0
],
"id": "63ab577b-893a-4b3d-8f13-b377be778099",
"name": "ntfy: send notification",
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
},
"httpBearerAuth": {
"id": "nCv4CUN7Ef086Ewj",
"name": "Bearer Auth account"
}
}
}
],
"pinData": {},
"connections": {
"Esegui manualmente": {
"main": [
[
{
"node": "SSH: pi ingest (manuale)",
"type": "main",
"index": 0
}
]
]
},
"SSH: pi ingest (manuale)": {
"main": [
[
{
"node": "Parse ingest",
"type": "main",
"index": 0
}
]
]
},
"Parse ingest": {
"main": [
[
{
"node": "Build ntfy",
"type": "main",
"index": 0
}
]
]
},
"Build ntfy": {
"main": [
[
{
"node": "ntfy: send notification",
"type": "main",
"index": 0
}
]
]
}
},
"active": false,
"settings": {
"executionOrder": "v1",
"binaryMode": "separate"
},
"versionId": "df06ce3b-1ea8-43be-91ff-02c77972cfe2",
"meta": {
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
},
"id": "RNoSaRLYG9vcMn6M",
"tags": []
}

View file

@ -0,0 +1,419 @@
{
"name": "Genome: ingest",
"nodes": [
{
"parameters": {
"httpMethod": "POST",
"path": "forgejo-push",
"options": {}
},
"id": "8c44b478-1a95-4c3b-8ac1-d7c57e228414",
"name": "Webhook",
"type": "n8n-nodes-base.webhook",
"typeVersion": 2.1,
"position": [
1520,
1728
],
"webhookId": "cf215f5d31e04dd2"
},
{
"parameters": {
"jsCode": "// Bell filter: proceed ONLY on develop pushes that actually touch raw/.\n// Returning [] stops the flow (no node needed).\n// Performance: never wake vm101 for wiki-only pushes (e.g. an ingest PR merged back to develop).\n// pending-raw remains the source of truth.\nconst item = $input.first().json;\nconst b = item.body || item;\nconst ref = String(b.ref || '');\nconst genome = String((b.repository && b.repository.name) || '').toLowerCase().trim();\n\n// Branch filter\nif (ref !== 'refs/heads/develop') return [];\n\n// Genome name validation (DNS-like: lowercase alphanum + hyphen, 1-64 chars)\nif (!/^[a-z0-9][a-z0-9-]{0,63}$/.test(genome)) return [];\n\n// Collect all touched paths safely (added, modified, removed)\nconst commits = Array.isArray(b.commits) ? b.commits : [];\nconst touched = [];\nfor (const c of commits) {\n if (!c || typeof c !== 'object') continue;\n for (const key of ['added', 'modified', 'removed']) {\n const list = c[key];\n if (!Array.isArray(list)) continue;\n for (const p of list) {\n if (typeof p === 'string' && p.startsWith('raw/')) {\n touched.push(p);\n }\n }\n }\n}\n\n// Gate: stop if nothing under raw/ was touched\nif (touched.length === 0) return [];\n\nreturn [{ json: { genome, touchedCount: touched.length } }];"
},
"id": "604787c7-4e83-468e-9a98-3ac084203040",
"name": "Gate push",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
1744,
1728
]
},
{
"parameters": {
"workflowId": {
"__rl": true,
"value": "zbtRXWsLt56nEIfz",
"mode": "list",
"cachedResultUrl": "/workflow/zbtRXWsLt56nEIfz",
"cachedResultName": "Power Manager"
},
"workflowInputs": {
"mappingMode": "defineBelow",
"value": {
"mode": "ensure-on"
},
"matchingColumns": [
"mode"
],
"schema": [
{
"id": "mode",
"displayName": "mode",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
}
],
"attemptToConvertTypes": false,
"convertFieldsToString": true
},
"options": {}
},
"id": "f93073a3-7753-4ce1-9ef1-2a0c16386543",
"name": "Power Manager - ensure-on",
"type": "n8n-nodes-base.executeWorkflow",
"typeVersion": 1.3,
"position": [
1952,
1728
]
},
{
"parameters": {
"authentication": "privateKey",
"command": "=ssh vm101 'pi pending-raw {{ $('Gate push').first().json.genome }}'"
},
"id": "876dbdaf-3620-4c2c-a65b-336f0b11198c",
"name": "SSH: pending-raw",
"type": "n8n-nodes-base.ssh",
"typeVersion": 1,
"position": [
2176,
1728
],
"credentials": {
"sshPrivateKey": {
"id": "GJQjKzte7Hjdfz89",
"name": "n8n container -> n8n-runner@nexus"
}
}
},
{
"parameters": {
"jsCode": "// Parse pending-raw -> one item per raw, carrying everything run-one-ingest needs.\n// Unsafe filenames (spaces / odd chars) are NOT ingested -> a 'badname' item -> ntfy.\nconst out = String($input.first().json.stdout || '').trim();\nlet d;\ntry {\n d = JSON.parse(out);\n} catch (e) {\n return [{ json: { _kind: 'error', reason: 'pending-raw non parsabile', raw: out.substring(0, 500) } }];\n}\n\nif (!d || typeof d !== 'object') {\n return [{ json: { _kind: 'error', reason: 'pending-raw non è un oggetto JSON', raw: out.substring(0, 500) } }];\n}\n\nconst files = Array.isArray(d.files) ? d.files : [];\nif (files.length === 0) return [];\n\n// Build reason map from detail array\nconst why = {};\nfor (const it of (Array.isArray(d.detail) ? d.detail : [])) {\n if (it && typeof it.path === 'string' && typeof it.reason === 'string') {\n why[it.path] = it.reason;\n }\n}\n\nconst SAFE = /^[A-Za-z0-9._\\/-]+$/;\nconst items = [];\nfor (const raw of files) {\n if (typeof raw !== 'string') {\n items.push({ json: { _kind: 'badname', genome: d.genome, raw: String(raw),\n hint: String(raw).replace(/[^A-Za-z0-9._\\/-]+/g, '-').toLowerCase() || 'invalid' } });\n continue;\n }\n if (SAFE.test(raw)) {\n items.push({ json: { _kind: 'ingest', genome: d.genome, raw,\n mode: 'ingest', feedback_b64: '', reason: why[raw] || 'new', prevPr: '' } });\n } else {\n const hint = raw.replace(/[^A-Za-z0-9._\\/-]+/g, '-').toLowerCase() || 'invalid';\n items.push({ json: { _kind: 'badname', genome: d.genome, raw, hint } });\n }\n}\nreturn items;"
},
"id": "f5bbbed3-222e-4129-a764-7cf47d69c5ce",
"name": "Split raw files",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
2400,
1728
]
},
{
"parameters": {
"conditions": {
"options": {
"caseSensitive": true,
"leftValue": "",
"typeValidation": "strict",
"version": 2
},
"conditions": [
{
"id": "cbacf5d98d594ba5",
"leftValue": "={{ $json._kind }}",
"rightValue": "ingest",
"operator": {
"type": "string",
"operation": "equals"
}
}
],
"combinator": "and"
},
"options": {}
},
"id": "5398e2c4-c7ca-4ca4-a2d7-e75077453b7c",
"name": "Nome valido?",
"type": "n8n-nodes-base.if",
"typeVersion": 2.2,
"position": [
2624,
1728
]
},
{
"parameters": {
"workflowId": {
"__rl": true,
"value": "VIi2ovb5gJxNJLbg",
"mode": "list",
"cachedResultUrl": "/workflow/VIi2ovb5gJxNJLbg",
"cachedResultName": "Genome: run-one-ingest"
},
"workflowInputs": {
"mappingMode": "defineBelow",
"value": {
"genome": "={{ $json.genome }}",
"raw": "={{ $json.raw }}",
"mode": "ingest",
"feedback_b64": "",
"reason": "={{ $json.reason }}",
"prevPr": ""
},
"matchingColumns": [],
"schema": [
{
"id": "genome",
"displayName": "genome",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "raw",
"displayName": "raw",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "mode",
"displayName": "mode",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "feedback_b64",
"displayName": "feedback_b64",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "reason",
"displayName": "reason",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
},
{
"id": "prevPr",
"displayName": "prevPr",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
}
],
"attemptToConvertTypes": false,
"convertFieldsToString": true
},
"options": {
"waitForSubWorkflow": false
}
},
"id": "0f274662-62bb-448b-ae4b-47e4bbcfd35a",
"name": "Run one ingest",
"type": "n8n-nodes-base.executeWorkflow",
"typeVersion": 1.3,
"position": [
2832,
1616
]
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// Build ntfy notification for files with invalid names.\n// Run Once for Each Item: $json is the current badname item.\nconst d = $json || {};\nconst genome = d.genome || 'unknown';\nconst raw = String(d.raw || 'unknown');\nconst hint = String(d.hint || 'unknown');\n\n// Escape backticks to avoid breaking markdown\nconst rawEsc = raw.replace(/`/g, '\\`');\nconst hintEsc = hint.replace(/`/g, '\\`');\n\nreturn {\n topic: 'genome-ingest',\n title: `${genome} · file da rinominare`,\n priority: 'high',\n tags: 'warning',\n click: '',\n actions: '',\n body: `Il file \\`${rawEsc}\\` ha spazi o caratteri non ammessi e **non** è stato ingerito.\\nRinominalo in: \\`${hintEsc}\\``\n};"
},
"id": "0f785bcd-cdc6-4dac-9ced-1c5cfa3453dc",
"name": "Build ntfy badname",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
2832,
1840
]
},
{
"parameters": {
"method": "POST",
"url": "=http://ntfy/{{ $json.topic }}",
"authentication": "genericCredentialType",
"genericAuthType": "httpBearerAuth",
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Title",
"value": "={{ $json.title }}"
},
{
"name": "Priority",
"value": "={{ $json.priority }}"
},
{
"name": "Tags",
"value": "={{ $json.tags }}"
},
{
"name": "Click",
"value": "={{ $json.click }}"
},
{
"name": "Actions",
"value": "={{ $json.actions }}"
},
{
"name": "Markdown",
"value": "yes"
}
]
},
"sendBody": true,
"contentType": "raw",
"rawContentType": "Raw / Text",
"body": "={{ $json.body }}",
"options": {
"timeout": 15000
}
},
"id": "9cd2bde3-6846-4855-ad01-e3a4cdbce208",
"name": "ntfy: send",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
3056,
1840
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
},
"httpBearerAuth": {
"id": "nCv4CUN7Ef086Ewj",
"name": "Bearer Auth account"
}
}
}
],
"pinData": {},
"connections": {
"Webhook": {
"main": [
[
{
"node": "Gate push",
"type": "main",
"index": 0
}
]
]
},
"Gate push": {
"main": [
[
{
"node": "Power Manager - ensure-on",
"type": "main",
"index": 0
}
]
]
},
"Power Manager - ensure-on": {
"main": [
[
{
"node": "SSH: pending-raw",
"type": "main",
"index": 0
}
]
]
},
"SSH: pending-raw": {
"main": [
[
{
"node": "Split raw files",
"type": "main",
"index": 0
}
]
]
},
"Split raw files": {
"main": [
[
{
"node": "Nome valido?",
"type": "main",
"index": 0
}
]
]
},
"Nome valido?": {
"main": [
[
{
"node": "Run one ingest",
"type": "main",
"index": 0
}
],
[
{
"node": "Build ntfy badname",
"type": "main",
"index": 0
}
]
]
},
"Build ntfy badname": {
"main": [
[
{
"node": "ntfy: send",
"type": "main",
"index": 0
}
]
]
}
},
"active": true,
"settings": {
"executionOrder": "v1",
"binaryMode": "separate",
"timeSavedMode": "fixed",
"errorWorkflow": "7Vws3gCX3QnjM3oD",
"callerPolicy": "workflowsFromSameOwner",
"availableInMCP": false
},
"versionId": "63863925-606f-4200-824c-52f1919f2bb1",
"meta": {
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
},
"id": "mUJUuQxcDiiPWcUE",
"tags": []
}

View file

@ -0,0 +1,128 @@
{
"name": "Genome: on-error",
"nodes": [
{
"parameters": {},
"id": "f715ed51-95e6-475f-8aa5-d0df531cc7cf",
"name": "Error Trigger",
"type": "n8n-nodes-base.errorTrigger",
"typeVersion": 1,
"position": [
688,
-32
]
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// Global error handler: set this workflow as the \"Error Workflow\" in each genome workflow's\n// Settings. Catches ANY node failure (SSH down, Forgejo 4xx/5xx, etc.) and notifies once.\n// Run Once for Each Item: $json is the error trigger payload.\nconst e = $json.execution || {};\nconst w = $json.workflow || {};\n\n// Safely extract error message from various shapes\nconst rawMsg = (e.error && (e.error.message || e.error.description)) || 'errore sconosciuto';\nconst msg = String(rawMsg).trim();\n\nconst lastNode = e.lastNodeExecuted ? ` (nodo: ${e.lastNodeExecuted})` : '';\nconst workflowName = w.name || 'n8n';\nconst executionUrl = e.url || '';\n\n// Escape markdown to avoid breaking the notification body\nconst msgEsc = msg.replace(/`/g, '\\`').replace(/\\n/g, '\\n');\n\nreturn {\n topic: 'genome-ingest',\n title: `Workflow KO · ${workflowName}`,\n priority: 'high',\n tags: 'rotating_light',\n click: executionUrl,\n actions: executionUrl ? `view, Apri l'esecuzione, ${executionUrl}` : '',\n body: `**${workflowName}** è fallito${lastNode}.\\n\\n${msgEsc}`\n};"
},
"id": "dd39bc0f-918a-4645-8f04-540ac9089311",
"name": "Build ntfy",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
928,
-32
]
},
{
"parameters": {
"method": "POST",
"url": "=http://ntfy/{{ $json.topic }}",
"authentication": "genericCredentialType",
"genericAuthType": "httpBearerAuth",
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Title",
"value": "={{ $json.title }}"
},
{
"name": "Priority",
"value": "={{ $json.priority }}"
},
{
"name": "Tags",
"value": "={{ $json.tags }}"
},
{
"name": "Click",
"value": "={{ $json.click }}"
},
{
"name": "Actions",
"value": "={{ $json.actions }}"
},
{
"name": "Markdown",
"value": "yes"
}
]
},
"sendBody": true,
"contentType": "raw",
"rawContentType": "Raw / Text",
"body": "={{ $json.body }}",
"options": {
"timeout": 15000
}
},
"id": "a9ee90f3-d7fe-445d-96af-12caef46473f",
"name": "ntfy: send",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
1152,
-32
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
},
"httpBearerAuth": {
"id": "nCv4CUN7Ef086Ewj",
"name": "Bearer Auth account"
}
}
}
],
"pinData": {},
"connections": {
"Error Trigger": {
"main": [
[
{
"node": "Build ntfy",
"type": "main",
"index": 0
}
]
]
},
"Build ntfy": {
"main": [
[
{
"node": "ntfy: send",
"type": "main",
"index": 0
}
]
]
}
},
"active": true,
"settings": {
"executionOrder": "v1",
"binaryMode": "separate"
},
"versionId": "036161c9-c934-474e-9b4f-634259f2a866",
"meta": {
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
},
"id": "7Vws3gCX3QnjM3oD",
"tags": []
}

View file

@ -0,0 +1,326 @@
{
"name": "Genome: prune",
"nodes": [
{
"parameters": {
"httpMethod": "POST",
"path": "forgejo-push-prune",
"options": {}
},
"id": "d31388b9-c6d6-4f28-9a6c-b381922bf5e0",
"name": "Webhook prune",
"type": "n8n-nodes-base.webhook",
"typeVersion": 2.1,
"position": [
1232,
-64
],
"webhookId": "d6ac11900058434e"
},
{
"parameters": {
"jsCode": "// Gate: proceed ONLY on develop pushes that REMOVED at least one file under raw/.\n// Additions/modifications are handled by the ingest flow; this flow reacts to deletions only.\nconst item = $input.first().json;\nconst b = item.body || item;\nconst ref = String(b.ref || '');\nconst genome = String((b.repository?.name) || '').toLowerCase().trim();\n\n// Branch filter\nif (ref !== 'refs/heads/develop') return [];\n\n// Genome name validation (DNS-like: lowercase alphanum + hyphen, 1-64 chars)\nif (!/^[a-z0-9][a-z0-9-]{0,63}$/.test(genome)) return [];\n\n// Collect removed paths safely\nconst removed = [];\nfor (const c of (b.commits || [])) {\n if (!c || !Array.isArray(c.removed)) continue;\n for (const p of c.removed) {\n if (typeof p === 'string' && p.startsWith('raw/')) {\n removed.push(p);\n }\n }\n}\n\n// Gate: stop if nothing under raw/ was removed\nif (removed.length === 0) return [];\n\nreturn [{ json: { genome, removedCount: removed.length } }];"
},
"id": "84848a31-d099-459e-bd03-67abc2cf2b77",
"name": "Gate prune",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
1456,
-64
]
},
{
"parameters": {
"workflowId": {
"__rl": true,
"value": "zbtRXWsLt56nEIfz",
"mode": "list",
"cachedResultUrl": "/workflow/zbtRXWsLt56nEIfz",
"cachedResultName": "Power Manager"
},
"workflowInputs": {
"mappingMode": "defineBelow",
"value": {
"mode": "ensure-on"
},
"matchingColumns": [
"mode"
],
"schema": [
{
"id": "mode",
"displayName": "mode",
"required": false,
"defaultMatch": false,
"display": true,
"canBeUsedToMatch": true,
"type": "string",
"removed": false
}
],
"attemptToConvertTypes": false,
"convertFieldsToString": true
},
"options": {}
},
"id": "175e4191-eb1b-4e5d-8d82-c39205753152",
"name": "Power Manager - ensure-on",
"type": "n8n-nodes-base.executeWorkflow",
"typeVersion": 1.3,
"position": [
1680,
-64
]
},
{
"parameters": {
"authentication": "privateKey",
"command": "=ssh vm101 'pi orphan-wiki {{ $('Gate prune').first().json.genome }}'"
},
"id": "598f20f8-d668-48da-90e3-1bfada3ace92",
"name": "SSH: orphan-wiki",
"type": "n8n-nodes-base.ssh",
"typeVersion": 1,
"position": [
1904,
-64
],
"credentials": {
"sshPrivateKey": {
"id": "GJQjKzte7Hjdfz89",
"name": "n8n container -> n8n-runner@nexus"
}
}
},
{
"parameters": {
"jsCode": "// Gate: proceed to prune only if orphan-wiki actually found orphans.\n// run-prune re-derives independently anyway (no detected-vs-pruned race);\n// this gate just avoids taking the lock for nothing.\nconst out = String($input.first().json.stdout || '').trim();\nlet d;\n\ntry {\n d = JSON.parse(out);\n} catch (e) {\n // Malformed JSON from orphan-wiki — log and stop\n return [{ json: { _gate: 'parse-error', raw: out.substring(0, 500) } }];\n}\n\n// Strict validation: d must be object with numeric count > 0\nif (!d || typeof d !== 'object' || typeof d.count !== 'number' || d.count <= 0) {\n return []; // 0 orphans or missing count -> stop silently\n}\n\nreturn [{ json: { genome: d.genome, count: d.count } }];"
},
"id": "3b644d61-26d8-4024-baed-bcb4ad169a6a",
"name": "Orfani?",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
2112,
-64
]
},
{
"parameters": {
"authentication": "privateKey",
"command": "=ssh vm101 'pi prune {{ $json.genome }}'"
},
"id": "a8cae2c2-6f2f-4ef6-add9-287195aa84b5",
"name": "SSH: prune",
"type": "n8n-nodes-base.ssh",
"typeVersion": 1,
"position": [
2336,
-64
],
"credentials": {
"sshPrivateKey": {
"id": "GJQjKzte7Hjdfz89",
"name": "n8n container -> n8n-runner@nexus"
}
}
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// Extract the last JSON line from SSH stdout (the command may print logs before/after).\n// Run Once for Each Item: $json is the current SSH result item.\nconst out = String($json.stdout || '').trim();\nconst jsonLines = out\n .split('\\n')\n .map(l => l.trim())\n .filter(l => l.startsWith('{') && l.endsWith('}'));\n\nconst line = jsonLines.pop(); // last JSON object line (command prints JSON last)\n\nlet r;\ntry {\n r = line ? JSON.parse(line) : { status: 'error', reason: 'nessuna riga JSON trovata in stdout' };\n} catch (e) {\n r = { status: 'error', reason: 'JSON non parsabile', rawLine: line?.substring(0, 1000) };\n}\n\n// Ensure consistent shape for downstream nodes\nreturn {\n status: r.status || 'error',\n reason: r.reason || 'errore sconosciuto',\n count: r.count,\n pr_url: r.pr_url,\n genome: r.genome,\n _raw: line?.substring(0, 500)\n};"
},
"id": "da1ab42c-32e1-4c4d-82a1-925fcee1a098",
"name": "Parse prune",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
2560,
-64
]
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// Build ntfy notification for genome pruning.\n// Run Once for Each Item: $json is the parsed prune result.\nconst d = $json;\nconst genome = d.genome || 'unknown';\n\nlet n;\nif (d.status === 'ok') {\n const pm = (d.pr_url || '').match(/\\/pulls\\/(\\d+)/);\n const num = pm ? `#${pm[1]}` : '';\n n = {\n topic: 'genome-ingest',\n title: `${genome} \\u00b7 potatura ${num}`.replace(/\\s+/g, ' ').trim(),\n priority: 'default',\n tags: 'broom',\n click: d.pr_url || '',\n actions: d.pr_url ? `view, Apri la PR, ${d.pr_url}` : '',\n body: `${d.count} sorgente/i orfane proposte per la rimozione. **Approva la PR** per potare, oppure chiudila da Forgejo per annullare.`\n };\n} else {\n n = {\n topic: 'genome-ingest',\n title: `${genome} \\u00b7 errore potatura`.trim(),\n priority: 'high',\n tags: 'rotating_light',\n click: '',\n actions: '',\n body: `${d.reason || 'errore sconosciuto durante la potatura'}.`\n };\n}\n\nreturn n;"
},
"id": "ebe99407-6038-4f8f-a73f-7dc7b0a011e0",
"name": "Build ntfy",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
2784,
-64
]
},
{
"parameters": {
"method": "POST",
"url": "=http://ntfy/{{ $json.topic }}",
"authentication": "genericCredentialType",
"genericAuthType": "httpBearerAuth",
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Title",
"value": "={{ $json.title }}"
},
{
"name": "Priority",
"value": "={{ $json.priority }}"
},
{
"name": "Tags",
"value": "={{ $json.tags }}"
},
{
"name": "Click",
"value": "={{ $json.click }}"
},
{
"name": "Actions",
"value": "={{ $json.actions }}"
},
{
"name": "Markdown",
"value": "yes"
}
]
},
"sendBody": true,
"contentType": "raw",
"rawContentType": "Raw / Text",
"body": "={{ $json.body }}",
"options": {
"timeout": 15000
}
},
"id": "0bd3654e-a73d-4c3a-83ed-9f57ca4aad24",
"name": "ntfy: send",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
2992,
-64
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
},
"httpBearerAuth": {
"id": "nCv4CUN7Ef086Ewj",
"name": "Bearer Auth account"
}
}
}
],
"pinData": {},
"connections": {
"Webhook prune": {
"main": [
[
{
"node": "Gate prune",
"type": "main",
"index": 0
}
]
]
},
"Gate prune": {
"main": [
[
{
"node": "Power Manager - ensure-on",
"type": "main",
"index": 0
}
]
]
},
"Power Manager - ensure-on": {
"main": [
[
{
"node": "SSH: orphan-wiki",
"type": "main",
"index": 0
}
]
]
},
"SSH: orphan-wiki": {
"main": [
[
{
"node": "Orfani?",
"type": "main",
"index": 0
}
]
]
},
"Orfani?": {
"main": [
[
{
"node": "SSH: prune",
"type": "main",
"index": 0
}
]
]
},
"SSH: prune": {
"main": [
[
{
"node": "Parse prune",
"type": "main",
"index": 0
}
]
]
},
"Parse prune": {
"main": [
[
{
"node": "Build ntfy",
"type": "main",
"index": 0
}
]
]
},
"Build ntfy": {
"main": [
[
{
"node": "ntfy: send",
"type": "main",
"index": 0
}
]
]
}
},
"active": true,
"settings": {
"executionOrder": "v1",
"binaryMode": "separate",
"timeSavedMode": "fixed",
"errorWorkflow": "7Vws3gCX3QnjM3oD",
"callerPolicy": "workflowsFromSameOwner",
"availableInMCP": false
},
"versionId": "999f640c-aae6-42aa-9a95-aba26987e9d0",
"meta": {
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
},
"id": "smH5Qrv7CQnTtdAF",
"tags": []
}

View file

@ -0,0 +1,266 @@
{
"name": "Genome: run-one-ingest",
"nodes": [
{
"parameters": {
"inputSource": "passthrough"
},
"id": "b1b7ba8e-1e45-4f76-adc0-089180715975",
"name": "On ingest request",
"type": "n8n-nodes-base.executeWorkflowTrigger",
"typeVersion": 1.1,
"position": [
224,
624
]
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// SECURITY chokepoint: every ingest to vm101 passes here. Re-validate inputs (defense in depth:\n// callers + the SSH wrapper also validate) and assemble the exact command. Charset-validated\n// fields are safe inside the single-quoted remote command -> no shell injection.\n// Run Once for Each Item: $json is the current ingest request.\nconst d = $json || {};\nconst genome = String(d.genome || '').toLowerCase().trim();\nconst raw = String(d.raw || '');\nconst mode = String(d.mode || 'ingest');\nconst fb = String(d.feedback_b64 || '');\n\nconst okGenome = /^[a-z0-9][a-z0-9-]{0,63}$/.test(genome);\nconst okMode = (mode === 'ingest' || mode === 'rework');\nconst okRaw = raw.startsWith('raw/') && !raw.includes('..') && /^[A-Za-z0-9._\\/-]+$/.test(raw);\n// feedback_b64 is required only for rework mode; for ingest it can be empty\nconst okFb = (mode === 'ingest') || /^[A-Za-z0-9+/=]+$/.test(fb);\n\nif (!okGenome || !okMode || !okRaw || !okFb) {\n return {\n _ok: false,\n genome,\n mode,\n _reason: `bad input (genome:${okGenome} mode:${okMode} raw:${okRaw} fb:${okFb})`\n };\n}\n\n// Build SSH command: single-quoted remote command prevents shell injection\nconst ssh_cmd = (mode === 'rework')\n ? `ssh vm101 'pi ingest-rework ${genome} ${raw} ${fb}'`\n : `ssh vm101 'pi ingest ${genome} ${raw}'`;\n\nreturn {\n _ok: true,\n ssh_cmd,\n genome,\n raw,\n mode,\n reason: String(d.reason || ''),\n prevPr: String(d.prevPr || '')\n};"
},
"id": "8e538237-0e0e-4308-b2c8-631a52b31185",
"name": "Guard & build cmd",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
448,
624
]
},
{
"parameters": {
"conditions": {
"options": {
"caseSensitive": true,
"typeValidation": "loose",
"version": 2
},
"conditions": [
{
"id": "4507e3a8b9714c7e",
"leftValue": "={{ $json._ok }}",
"rightValue": true,
"operator": {
"type": "boolean",
"operation": "true",
"singleValue": true
}
}
],
"combinator": "and"
},
"options": {}
},
"id": "4b249e76-7ab6-4aa3-886d-06b865931cf6",
"name": "Input valido?",
"type": "n8n-nodes-base.if",
"typeVersion": 2.2,
"position": [
672,
624
]
},
{
"parameters": {
"authentication": "privateKey",
"command": "={{ $json.ssh_cmd }}"
},
"id": "8740ae9a-4094-48b2-a9a4-d40d501e09f6",
"name": "SSH: ingest",
"type": "n8n-nodes-base.ssh",
"typeVersion": 1,
"position": [
880,
544
],
"credentials": {
"sshPrivateKey": {
"id": "GJQjKzte7Hjdfz89",
"name": "n8n container -> n8n-runner@nexus"
}
}
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// run-ingest.sh prints one JSON line; the wrapper may instead print {status:busy|error,...}.\n// Take the last {...} line from stdout (logs may precede/follow).\n// Run Once for Each Item: $json is the current SSH result item.\nconst out = String($json.stdout || '').trim();\nconst jsonLines = out\n .split('\\n')\n .map(l => l.trim())\n .filter(l => l.startsWith('{') && l.endsWith('}'));\n\nconst line = jsonLines.pop(); // last JSON object line (command prints JSON last)\n\nlet r;\ntry {\n r = line ? JSON.parse(line) : { status: 'error', reason: 'nessuna riga JSON trovata in stdout', raw: out.substring(0, 500) };\n} catch (e) {\n r = { status: 'error', reason: 'JSON non parsabile', rawLine: line?.substring(0, 1000) };\n}\n\n// Ensure consistent shape for downstream Build ntfy\nreturn {\n status: r.status || 'error',\n reason: r.reason || 'errore sconosciuto',\n pr_url: r.pr_url || '',\n slug: r.slug || '',\n lint_clean: r.lint_clean || false,\n conflict: r.conflict || false,\n stage: r.stage || '',\n detail: r.detail || '',\n log: r.log || '',\n _raw: line?.substring(0, 500)\n};"
},
"id": "928344e3-0712-42e0-b1a8-f5caff489746",
"name": "Parse result",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
1104,
544
]
},
{
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// One builder for ingest + rework outcomes. Title is plain ASCII; the icon comes from Tags\n// (ntfy shortcodes); navigation is via Click (tap) + Actions (button) so it works on every\n// client.\n// Run Once for Each Item: $json is the current parsed result.\n// We read the original request context from the Guard node (same execution, no executeWorkflow in between).\nconst g = $('Guard & build cmd').item.json || {};\nconst verb = (g.mode === 'rework') ? 'rework' : 'ingest';\nconst d = $json || {};\nconst genome = g.genome || 'unknown';\n\n// Build notification based on status\nlet n;\n\nif (g._ok === false) {\n // Input validation failed (Guard & build cmd rejected it)\n n = {\n title: `Errore ${verb}: input non valido`,\n priority: 'high',\n tags: 'rotating_light',\n click: '',\n actions: '',\n body: `Richiesta di ${verb} rifiutata.\\n${g._reason || 'motivo sconosciuto'}`\n };\n} else if (d.status === 'ok') {\n // Success: PR opened\n const pm = (d.pr_url || '').match(/\\/pulls\\/(\\d+)/);\n const num = pm ? `#${pm[1]}` : '';\n const lint = d.lint_clean ? 'lint pulito' : 'lint con avvisi';\n const conflict = d.conflict ? ' · ⚠️ conflitto da risolvere' : '';\n const prevPr = g.prevPr ? ` · sostituisce #${g.prevPr}` : '';\n const reason = (g.reason && verb === 'ingest') ? ` (${g.reason})` : '';\n\n n = {\n title: `${genome} · ${verb} ${d.slug || ''} ${num}`.replace(/\\s+/g, ' ').trim(),\n priority: d.conflict ? 'high' : 'default',\n tags: d.conflict ? 'warning' : 'white_check_mark',\n click: d.pr_url || '',\n actions: d.pr_url ? `view, Apri la PR, ${d.pr_url}` : '',\n body: `**${d.slug || 'sorgente'}** ${verb === 'rework' ? 'rilavorata' : 'ingerita'}`\n + reason + prevPr\n + `.\\n${lint}${conflict}.`\n };\n} else if (d.status === 'busy') {\n // Another ingest is already running on this genome\n n = {\n title: `${genome} · ${verb} in coda`,\n priority: 'min',\n tags: 'hourglass_flowing_sand',\n click: '',\n actions: '',\n body: `Un altro ingest era in corso su questo genoma. La fonte resta pendente e verrà ripresa al prossimo campanello.`\n };\n} else if (d.status === 'pr_failed') {\n // Semantic/lint ok but PR could not be opened\n const detailLine = String(d.detail || '').split('\\n')[0] || 'dettaglio non disponibile';\n n = {\n title: `${genome} · ${d.slug || ''}: PR non aperta`,\n priority: 'high',\n tags: 'warning',\n click: '',\n actions: '',\n body: `Semantic e lint ok, ma la PR non si è aperta.\\n${detailLine}`\n };\n} else {\n // Generic error (including parse errors)\n const stage = d.stage ? ` (stage: ${d.stage})` : '';\n const log = d.log ? `\\nLog: ${d.log}` : '';\n n = {\n title: `${genome} · errore ${verb}`,\n priority: 'high',\n tags: 'rotating_light',\n click: '',\n actions: '',\n body: `${d.reason || 'errore sconosciuto'}${stage}.${log}`\n };\n}\n\nn.topic = 'genome-ingest';\nreturn n;"
},
"id": "9062dfba-02ba-4abc-8be6-828c0b353114",
"name": "Build ntfy",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
1328,
624
]
},
{
"parameters": {
"method": "POST",
"url": "=http://ntfy/{{ $json.topic }}",
"authentication": "genericCredentialType",
"genericAuthType": "httpBearerAuth",
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Title",
"value": "={{ $json.title }}"
},
{
"name": "Priority",
"value": "={{ $json.priority }}"
},
{
"name": "Tags",
"value": "={{ $json.tags }}"
},
{
"name": "Click",
"value": "={{ $json.click }}"
},
{
"name": "Actions",
"value": "={{ $json.actions }}"
},
{
"name": "Markdown",
"value": "yes"
}
]
},
"sendBody": true,
"contentType": "raw",
"rawContentType": "Raw / Text",
"body": "={{ $json.body }}",
"options": {
"timeout": 15000
}
},
"id": "0c2b4d9b-2700-4815-b47c-8523bc4eb2ff",
"name": "ntfy: send",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.4,
"position": [
1552,
624
],
"credentials": {
"httpHeaderAuth": {
"id": "TBPXSWOF63k9mvm8",
"name": "ntfy-token"
},
"httpBearerAuth": {
"id": "nCv4CUN7Ef086Ewj",
"name": "Bearer Auth account"
}
}
}
],
"pinData": {},
"connections": {
"On ingest request": {
"main": [
[
{
"node": "Guard & build cmd",
"type": "main",
"index": 0
}
]
]
},
"Guard & build cmd": {
"main": [
[
{
"node": "Input valido?",
"type": "main",
"index": 0
}
]
]
},
"Input valido?": {
"main": [
[
{
"node": "SSH: ingest",
"type": "main",
"index": 0
}
],
[
{
"node": "Build ntfy",
"type": "main",
"index": 0
}
]
]
},
"SSH: ingest": {
"main": [
[
{
"node": "Parse result",
"type": "main",
"index": 0
}
]
]
},
"Parse result": {
"main": [
[
{
"node": "Build ntfy",
"type": "main",
"index": 0
}
]
]
},
"Build ntfy": {
"main": [
[
{
"node": "ntfy: send",
"type": "main",
"index": 0
}
]
]
}
},
"active": true,
"settings": {
"executionOrder": "v1",
"binaryMode": "separate",
"timeSavedMode": "fixed",
"errorWorkflow": "7Vws3gCX3QnjM3oD",
"callerPolicy": "workflowsFromSameOwner",
"availableInMCP": false
},
"versionId": "fd8c1cf6-c5df-4074-b777-113349e32a03",
"meta": {
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
},
"id": "VIi2ovb5gJxNJLbg",
"tags": []
}

View file

@ -91,6 +91,14 @@ EOF
grep -qxF 'raw/.stignore' "${vault}/.git/info/exclude" 2>/dev/null \ grep -qxF 'raw/.stignore' "${vault}/.git/info/exclude" 2>/dev/null \
|| echo 'raw/.stignore' >> "${vault}/.git/info/exclude" || echo 'raw/.stignore' >> "${vault}/.git/info/exclude"
# Syncthing folder marker: must exist on disk (locally, NOT on Git).
# Without it, Syncthing refuses to scan (“folder marker missing”).
mkdir -p "${vault}/raw/.stfolder"
# .stfolder must not be included in genome commits
grep -qxF 'raw/.stfolder' "${vault}/.git/info/exclude" 2>/dev/null \
|| echo 'raw/.stfolder' >> "${vault}/.git/info/exclude"
# ── 3. Idempotent Syncthing folder configuration (best-effort, does not block the vault) ──────── # ── 3. Idempotent Syncthing folder configuration (best-effort, does not block the vault) ────────
folder_state="skipped(no api key)" folder_state="skipped(no api key)"
if [[ -n "${SYNCTHING_API_KEY:-}" ]]; then if [[ -n "${SYNCTHING_API_KEY:-}" ]]; then

View file

@ -28,7 +28,8 @@ set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
vault="${GENOME_VAULTS_ROOT}/${genome}" vault="${GENOME_VAULTS_ROOT}/${genome}"
fid="${genome}-public" fid="${genome}-public"
authors_map="${GENOME_VAULTS_ROOT}/.authors.json" authors_map="${GENOME_VAULTS_ROOT}/.authors.json"
clone_url="http://${FORGEJO_USER}@${FORGEJO_HOST}/${FORGEJO_OWNER}/${genome}.git" # GENOME_PUSH_URL is a test seam: defaults to the Forgejo loopback URL in production.
clone_url="${GENOME_PUSH_URL:-http://${FORGEJO_USER}@${FORGEJO_HOST}/${FORGEJO_OWNER}/${genome}.git}"
export GIT_ASKPASS=/usr/local/bin/genome-askpass export GIT_ASKPASS=/usr/local/bin/genome-askpass
[[ -d "${vault}/.git" ]] || { printf '{"status":"error","reason":"vault absent","genome":"%s"}\n' "$genome"; exit 1; } [[ -d "${vault}/.git" ]] || { printf '{"status":"error","reason":"vault absent","genome":"%s"}\n' "$genome"; exit 1; }
@ -42,8 +43,30 @@ grep -qxF 'raw/.stfolder' "${vault}/.git/info/exclude" 2>/dev/null || echo 'raw/
git add -A -- raw/ git add -A -- raw/
git reset -q -- raw/.stignore raw/.stfolder 2>/dev/null || true git reset -q -- raw/.stignore raw/.stfolder 2>/dev/null || true
# --- Quiet window: only commit raw files that have STOPPED changing. ----------------
# While a note is being written (Obsidian autosave -> Syncthing -> here) its mtime stays
# fresh; we leave it UNSTAGED so a half-written note never triggers an ingest. A file is
# committed only after it has been still for RAW_QUIET_MINUTES. Deletions (nothing on disk)
# are stable by definition and pass straight through. Deterministic — no model in the loop.
quiet_min="${RAW_QUIET_MINUTES:-2}"
held=0
while IFS= read -r f; do
[[ -z "$f" ]] && continue
# Only an existing file can be "hot"; a staged deletion has nothing on disk to settle.
if [[ -e "$f" && -n "$(find "$f" -mmin -"$quiet_min" 2>/dev/null)" ]]; then
git reset -q -- "$f" 2>/dev/null || true
held=$((held+1))
fi
done < <(git diff --cached --name-only -- raw/)
if git diff --cached --quiet; then if git diff --cached --quiet; then
if [[ "$held" -gt 0 ]]; then
printf '{"status":"noop","reason":"raw still settling","genome":"%s","held":%d,"quiet_minutes":%d}\n' \
"$genome" "$held" "$quiet_min"
else
printf '{"status":"noop","genome":"%s"}\n' "$genome" printf '{"status":"noop","genome":"%s"}\n' "$genome"
fi
exit 0 exit 0
fi fi
@ -87,13 +110,32 @@ for key in "${!G_FILES[@]}"; do
summary="${summary}${summary:+; }${G_NAME[$key]}:${short}" summary="${summary}${summary:+; }${G_NAME[$key]}:${short}"
done done
# Pull in any remote advances (e.g. a merged wiki PR), then push # Push to origin/<base>. The vault is SCRATCH, so we never do an interactive rebase
# (which can conflict when the same raw file is edited repeatedly). Strategy:
# try a fast-forward push; if origin moved, re-apply our raw changes on top of a
# fresh origin/<base> and push again. Deterministic, conflict-free.
git fetch -q origin git fetch -q origin
if git show-ref --verify --quiet "refs/remotes/origin/${GENOME_BASE}"; then if ! git push -q "$clone_url" "HEAD:${GENOME_BASE}" 2>/dev/null; then
git rebase -q "origin/${GENOME_BASE}" \ # origin advanced: capture our just-made tree for raw/, realign hard, re-apply, retry once.
|| { git rebase --abort 2>/dev/null || true; printf '{"status":"error","reason":"rebase-conflict","genome":"%s"}\n' "$genome"; exit 1; } tmp="$(mktemp -d)"
cp -a raw/. "$tmp"/ 2>/dev/null || true
git reset -q --hard "origin/${GENOME_BASE}"
git clean -q -fd
cp -a "$tmp"/. raw/ 2>/dev/null || true
rm -rf "$tmp"
git add -A -- raw/
git reset -q -- raw/.stignore raw/.stfolder 2>/dev/null || true
if git diff --cached --quiet; then
# our content already matches origin -> nothing to push, report ok-noop-after-realign
printf '{"status":"ok","genome":"%s","base":"%s","commits":0,"head":"%s","summary":"already in sync after realign","files":[]}\n' \
"$genome" "$GENOME_BASE" "$(git rev-parse --short HEAD)"
exit 0
fi
git commit -q --author="${DEFAULT_AUTHOR_NAME} <${DEFAULT_AUTHOR_EMAIL}>" \
-m "raw(${genome}): re-apply after realign" -- raw/ || true
git push -q "$clone_url" "HEAD:${GENOME_BASE}" \
|| { printf '{"status":"error","reason":"push-failed-after-realign","genome":"%s"}\n' "$genome"; exit 1; }
fi fi
git push -q "$clone_url" "HEAD:${GENOME_BASE}"
head="$(git rev-parse --short HEAD)" head="$(git rev-parse --short HEAD)"
# `files` array: local (file://) and remote (Forgejo web) link for each committed raw # `files` array: local (file://) and remote (Forgejo web) link for each committed raw

View file

@ -2,6 +2,22 @@
set -eu set -eu
cmd="${SSH_ORIGINAL_COMMAND:-}" cmd="${SSH_ORIGINAL_COMMAND:-}"
case "$cmd" in case "$cmd" in
"pi pending-raw "*)
genome="${cmd#pi pending-raw }"
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac
logger -t n8n-pi-wrap "ok: pi pending-raw ${genome}"
set -a; . "${HOME}/.config/knowledge-genome.env" 2>/dev/null || true; set +a
# Run from the DEPLOYED skill dir (same place as ingest-semantic.py / run-ingest.sh on
# lines 54/59), so pending-raw.sh resolves its sibling slug.sh via BASH_SOURCE.
exec "${HOME}/.pi/agent/skills/ingest/scripts/pending-raw.sh" "$genome"
;;
"pi orphan-wiki "*)
genome="${cmd#pi orphan-wiki }"
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac
logger -t n8n-pi-wrap "ok: pi orphan-wiki ${genome}"
set -a; . "${HOME}/.config/knowledge-genome.env" 2>/dev/null || true; set +a
exec "${HOME}/.pi/agent/skills/ingest/scripts/orphan-wiki.sh" "$genome"
;;
"pi run") "pi run")
logger -t n8n-pi-wrap "ok: pi run (prompt via stdin)" logger -t n8n-pi-wrap "ok: pi run (prompt via stdin)"
prompt=$(cat) prompt=$(cat)
@ -33,20 +49,25 @@ case "$cmd" in
esac esac
logger -t n8n-pi-wrap "ok: pi ingest ${genome} ${raw_path}" logger -t n8n-pi-wrap "ok: pi ingest ${genome} ${raw_path}"
# Per-genome lock: serialize writes; never two concurrent ingests on the same genome.
exec 9>"/run/lock/kg-ingest-${genome}.lock" 2>/dev/null || exec 9>"/tmp/kg-ingest-${genome}.lock"
if ! flock -n 9; then
echo '{"status":"busy","reason":"another ingest is running for this genome","genome":"'"$genome"'"}'
exit 0
fi
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
cd "${GENOMES_ROOT}/${genome}" || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; } cd "${GENOMES_ROOT}/${genome}" || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
# The raw file must actually exist under the genome's raw/ dir. # The raw file must actually exist under the genome's raw/ dir.
[ -f "$raw_path" ] || { echo '{"status":"error","reason":"raw file not found"}'; exit 1; } [ -f "$raw_path" ] || { echo '{"status":"error","reason":"raw file not found"}'; exit 1; }
# Clean start on the configured base (develop), pinned to the remote. Destroys only # Clean start on the configured base (single source of truth in lib/clean-start.sh).
# vm101's scratch checkout (never a shared branch, never a force-push) — this is by design. : "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
# `clean -fd` also removes leftover UNTRACKED files (e.g. wiki/sources/* or a stale source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
# .ingest-manifest.json from a half-finished previous run) that `reset --hard` won't touch. || { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
git fetch -q origin \ clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
&& git switch -q "${INGEST_BASE:-main}" 2>/dev/null \
&& git reset -q --hard "origin/${INGEST_BASE:-main}" \
&& git clean -q -fd
# SEMANTIC step: dedicated script drives pi to WRITE wiki pages + manifest. # SEMANTIC step: dedicated script drives pi to WRITE wiki pages + manifest.
# (NOT `pi -p "/skill:ingest ..."`, which makes the model reply in chat and write nothing.) # (NOT `pi -p "/skill:ingest ..."`, which makes the model reply in chat and write nothing.)
@ -58,6 +79,107 @@ case "$cmd" in
# MECHANICAL step: validate manifest -> index/log/scoped-lint/commit/PR -> 1 JSON line # MECHANICAL step: validate manifest -> index/log/scoped-lint/commit/PR -> 1 JSON line
exec "${HOME}/.pi/agent/skills/ingest/scripts/run-ingest.sh" "${genome}" exec "${HOME}/.pi/agent/skills/ingest/scripts/run-ingest.sh" "${genome}"
;; ;;
"pi prune "*)
# Pota le source orfane. Stesso lock dell'ingest (serializza le scritture per genoma),
# clean_start, poi run-prune.sh (che ri-deriva gli orfani e apre una PR gated).
genome="${cmd#pi prune }"
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac
logger -t n8n-pi-wrap "ok: pi prune ${genome}"
exec 9>"/run/lock/kg-ingest-${genome}.lock" 2>/dev/null || exec 9>"/tmp/kg-ingest-${genome}.lock"
if ! flock -n 9; then
echo '{"status":"busy","reason":"another ingest/prune is running for this genome","genome":"'"$genome"'"}'
exit 0
fi
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
cd "${GENOMES_ROOT}/${genome}" || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
exec "${HOME}/.pi/agent/skills/ingest/scripts/run-prune.sh" "${genome}"
;;
"pi ingest-rework "*)
# args: <genome> <raw_path> <feedback_base64> (3 token).
# Feedback in base64 nell'argv: il nodo SSH di n8n non passa stdin, e cosi' i metacaratteri
# della review (apici, newline, $(...)) sono neutralizzati.
args="${cmd#pi ingest-rework }"
genome="${args%% *}"; tmp="${args#* }"
raw_path="${tmp%% *}"; fb_b64="${tmp#* }"
if [ "$genome" = "$args" ] || [ "$raw_path" = "$tmp" ] || [ -z "$fb_b64" ]; then
echo '{"status":"error","reason":"usage: pi ingest-rework <genome> <raw_path> <feedback_b64>"}'; exit 1
fi
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome"}'; exit 1;; esac
case "$raw_path" in raw/*) : ;; *) echo '{"status":"error","reason":"raw_path must be under raw/"}'; exit 1;; esac
case "$raw_path" in *..*|*//*) echo '{"status":"error","reason":"raw_path traversal"}'; exit 1;; esac
case "$raw_path" in *[!A-Za-z0-9._/-]*) echo '{"status":"error","reason":"raw_path illegal chars"}'; exit 1;; esac
case "$fb_b64" in *[!A-Za-z0-9+/=]*) echo '{"status":"error","reason":"feedback not base64"}'; exit 1;; esac
logger -t n8n-pi-wrap "ok: pi ingest-rework ${genome} ${raw_path}"
feedback="$(printf '%s' "$fb_b64" | base64 -d 2>/dev/null || true)"
# lock per-genoma: serializza con gli ingest normali
exec 9>"/run/lock/kg-ingest-${genome}.lock" 2>/dev/null || exec 9>"/tmp/kg-ingest-${genome}.lock"
if ! flock -n 9; then
echo '{"status":"busy","reason":"another ingest is running for this genome","genome":"'"$genome"'"}'; exit 0
fi
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
cd "${GENOMES_ROOT}/${genome}" || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
[ -f "$raw_path" ] || { echo '{"status":"error","reason":"raw file not found"}'; exit 1; }
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
log="$(mktemp -t pi-rework.XXXXXX.log)"
INGEST_FEEDBACK="$feedback" \
"${HOME}/.pi/agent/skills/ingest/scripts/ingest-semantic.py" "${genome}" "${raw_path}" \
>"$log" 2>&1 \
|| { echo "{\"status\":\"error\",\"stage\":\"semantic\",\"reason\":\"rework failed\",\"log\":\"${log}\"}"; exit 1; }
exec "${HOME}/.pi/agent/skills/ingest/scripts/run-ingest.sh" "${genome}"
;;
"pi changed-raw "*)
# List raw/ files changed between two commits, one per line (the webhook payload
# does NOT include file lists, so vm101's checkout computes the diff itself).
rest="${cmd#pi changed-raw }"
genome="${rest%% *}"
range="${rest#* }"
before="${range%% *}"
after="${range#* }"
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac
case "$before$after" in *[!a-f0-9]*|"") echo '{"status":"error","reason":"invalid commit range"}'; exit 1;; esac
logger -t n8n-pi-wrap "ok: pi changed-raw ${genome} ${before}..${after}"
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
cd "${GENOMES_ROOT}/${genome}" 2>/dev/null || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
git fetch -q origin
# Resolve the diff base robustly:
# - before all-zero (brand-new branch) or unreachable (force-push) -> fall back to after~1
# - if even after~1 is missing (root commit) -> list all raw files in `after`
base="$before"
case "$before" in *[!0]*) : ;; *) base="" ;; esac # all-zero -> empty
if [ -n "$base" ] && ! git cat-file -e "${base}^{commit}" 2>/dev/null; then base=""; fi
if [ -z "$base" ]; then
if git cat-file -e "${after}~1^{commit}" 2>/dev/null; then base="${after}~1"; else base=""; fi
fi
if [ -n "$base" ]; then
files="$(git diff --name-only --diff-filter=d "${base}" "${after}" -- raw/ 2>/dev/null \
| grep -vE '(^|/)\.st(folder|ignore)' || true)"
else
# no usable base: enumerate raw files present at `after`
files="$(git ls-tree -r --name-only "${after}" -- raw/ 2>/dev/null \
| grep -vE '(^|/)\.st(folder|ignore)' || true)"
fi
# emit a JSON array via jq (safe escaping)
printf '%s\n' "$files" | grep -c . >/dev/null 2>&1 || files=""
if [ -z "$files" ]; then
echo '{"status":"ok","genome":"'"$genome"'","count":0,"files":[]}'
else
printf '%s\n' "$files" | jq -R . | jq -s \
--arg g "$genome" '{status:"ok", genome:$g, count:length, files:.}'
fi
;;
"ollama list") "ollama list")
logger -t n8n-pi-wrap "ok: ollama list" logger -t n8n-pi-wrap "ok: ollama list"
exec /usr/local/bin/ollama list exec /usr/local/bin/ollama list

18
lib/clean-start.sh Normal file
View file

@ -0,0 +1,18 @@
#!/usr/bin/env bash
# =============================================================================
# lib/clean-start.sh — single source of truth for the pre-session reset.
# Caller must already be INSIDE the genome checkout.
# Aligns the working tree to origin/<base>. Never force-pushes a shared branch.
# Tolerates a missing remote branch (first-setup scenario).
# NOTE: sourced library — no `set -euo pipefail` (would leak into the caller).
# =============================================================================
clean_start() {
local base="${INGEST_BASE:-main}"
git fetch -q origin || return 1
git switch -q "$base" 2>/dev/null || git checkout -q -b "$base" || return 1
if git ls-remote --exit-code --heads origin "$base" >/dev/null 2>&1; then
git reset -q --hard "origin/${base}" || return 1
fi
git clean -q -fd || return 1
}

View file

@ -1,11 +1,12 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# ============================================================================= # =============================================================================
# skills/ingest/scripts/index-append.py # skills/ingest/scripts/index-append.py
# Insert an entry line into the correct section of wiki/index.md and keep that # Insert OR remove an entry line in wiki/index.md, keeping the target section
# section's entries alphabetically ordered. Bumps frontmatter last_updated. # alphabetically ordered. Bumps frontmatter last_updated.
# #
# index-append.py --section Sources \ # index-append.py --section Sources \
# --entry '- [[sources/foo]] — One-line summary. `maturity: draft`' # --entry '- [[sources/foo]] — One-line summary. `maturity: draft`'
# index-append.py --remove 'sources/foo' # delete the entry by wikilink
# ============================================================================= # =============================================================================
import argparse import argparse
import datetime import datetime
@ -17,14 +18,116 @@ LINK_RE = re.compile(r"^- \[\[([^\]]+)\]\]")
HEADER_RE = re.compile(r"^## ") HEADER_RE = re.compile(r"^## ")
def bump_last_updated(lines, today):
"""Bump (or self-heal) last_updated inside the first frontmatter block."""
fm_open = False
fm_close_idx = None
bumped = False
for i, ln in enumerate(lines):
if ln.strip() == "---":
if not fm_open:
fm_open = True
continue
fm_close_idx = i
break
if fm_open and ln.startswith("last_updated:"):
lines[i] = f"last_updated: {today}"
bumped = True
if not fm_open:
print("index-append: warning: no frontmatter found, last_updated not bumped",
file=sys.stderr)
elif not bumped and fm_close_idx is not None:
lines.insert(fm_close_idx, f"last_updated: {today}")
print("index-append: last_updated key was missing — inserted", file=sys.stderr)
def do_remove(lines, link, today):
"""Remove every entry line whose wikilink == link. Idempotent."""
bump_last_updated(lines, today)
kept = []
removed = 0
for ln in lines:
m = LINK_RE.match(ln)
if m and m.group(1) == link:
removed += 1
continue
kept.append(ln)
if removed:
print(f"index-append: removed [[{link}]] ({removed} line(s))")
else:
# Idempotent: the goal state (entry absent) already holds.
print(f"index-append: [[{link}]] not present, nothing to remove")
return kept
def do_append(lines, section, entry, today):
bump_last_updated(lines, today)
# Locate the target section [start, end)
start = None
for i, ln in enumerate(lines):
if HEADER_RE.match(ln) and ln[3:].startswith(section):
start = i
break
if start is None:
print(f"index-append: section '{section}' not found", file=sys.stderr)
return None
end = len(lines)
for i in range(start + 1, len(lines)):
if HEADER_RE.match(lines[i]):
end = i
break
body = lines[start + 1:end]
intro = [ln for ln in body if not ENTRY_RE.match(ln)]
entries = [ln for ln in body if ENTRY_RE.match(ln)]
new_m = LINK_RE.match(entry)
new_link = new_m.group(1) if new_m else None
if new_link is not None:
replaced = False
for idx, ln in enumerate(entries):
m = LINK_RE.match(ln)
if m and m.group(1) == new_link:
if ln == entry:
print("index-append: entry already present, skipping")
return lines
entries[idx] = entry
replaced = True
break
if not replaced:
entries.append(entry)
else:
if entry in entries:
print("index-append: entry already present, skipping")
return lines
entries.append(entry)
entries.sort(key=str.casefold)
while intro and intro[-1].strip() == "":
intro.pop()
new_section = intro + [""] + entries + [""]
print(f"index-append: added to {section}")
return lines[:start + 1] + new_section + lines[end:]
def main() -> int: def main() -> int:
ap = argparse.ArgumentParser() ap = argparse.ArgumentParser()
ap.add_argument("--section", required=True, ap.add_argument("--section", help="Section name (required with --entry)")
help="Section name, e.g. Sources / Entities / Concepts / Queries / Conflicts") ap.add_argument("--entry", help="Full index line to insert")
ap.add_argument("--entry", required=True, help="Full index line to insert") ap.add_argument("--remove", metavar="WIKILINK",
help="Remove the entry with this wikilink, e.g. sources/foo")
ap.add_argument("--file", default="wiki/index.md") ap.add_argument("--file", default="wiki/index.md")
args = ap.parse_args() args = ap.parse_args()
if bool(args.remove) == bool(args.entry):
print("index-append: provide exactly one of --entry or --remove", file=sys.stderr)
return 2
if args.entry and not args.section:
print("index-append: --entry requires --section", file=sys.stderr)
return 2
try: try:
with open(args.file, encoding="utf-8") as fh: with open(args.file, encoding="utf-8") as fh:
lines = fh.read().splitlines() lines = fh.read().splitlines()
@ -33,90 +136,15 @@ def main() -> int:
return 1 return 1
today = datetime.date.today().isoformat() today = datetime.date.today().isoformat()
if args.remove:
# 1. Bump last_updated inside the first frontmatter block out = do_remove(lines, args.remove, today)
fm_open = False else:
fm_close_idx = None out = do_append(lines, args.section, args.entry, today)
bumped = False if out is None:
for i, ln in enumerate(lines):
if ln.strip() == "---":
if not fm_open:
fm_open = True
continue
fm_close_idx = i # the closing ---
break
if fm_open and ln.startswith("last_updated:"):
lines[i] = f"last_updated: {today}"
bumped = True
if not fm_open:
print("index-append: warning: no frontmatter found, last_updated not bumped",
file=sys.stderr)
elif not bumped and fm_close_idx is not None:
# self-heal: frontmatter present but missing the key — insert it before the close
lines.insert(fm_close_idx, f"last_updated: {today}")
print("index-append: last_updated key was missing — inserted", file=sys.stderr)
# 2. Locate the target section [start, end)
start = None
for i, ln in enumerate(lines):
if HEADER_RE.match(ln) and ln[3:].startswith(args.section):
start = i
break
if start is None:
print(f"index-append: section '{args.section}' not found in {args.file}",
file=sys.stderr)
return 1 return 1
end = len(lines)
for i in range(start + 1, len(lines)):
if HEADER_RE.match(lines[i]):
end = i
break
# 3. Split the section body into intro (non-entry) and entries
body = lines[start + 1:end]
intro = [ln for ln in body if not ENTRY_RE.match(ln)]
entries = [ln for ln in body if ENTRY_RE.match(ln)]
# Deduplicate by wikilink PATH, not by exact line: a re-ingest with a changed
# summary/maturity should UPDATE the existing entry, not add a duplicate line.
new_m = LINK_RE.match(args.entry)
new_link = new_m.group(1) if new_m else None
if new_link is not None:
replaced = False
for idx, ln in enumerate(entries):
m = LINK_RE.match(ln)
if m and m.group(1) == new_link:
if ln == args.entry:
print("index-append: entry already present, skipping")
return 0
entries[idx] = args.entry # same page, refreshed text
replaced = True
break
if not replaced:
entries.append(args.entry)
else:
# No parseable wikilink — fall back to exact-line dedup.
if args.entry in entries:
print("index-append: entry already present, skipping")
return 0
entries.append(args.entry)
entries.sort(key=str.casefold)
# Normalise intro: drop trailing blanks, keep header + comment(s)
while intro and intro[-1].strip() == "":
intro.pop()
new_section = intro + [""] + entries + [""]
lines = lines[:start + 1] + new_section + lines[end:]
with open(args.file, "w", encoding="utf-8") as fh: with open(args.file, "w", encoding="utf-8") as fh:
fh.write("\n".join(lines) + "\n") fh.write("\n".join(out) + "\n")
print(f"index-append: added to {args.section}")
return 0 return 0

View file

@ -20,7 +20,7 @@
# #
# Emits a single JSON status line on stdout (for n8n / logs). # Emits a single JSON status line on stdout (for n8n / logs).
# ============================================================================= # =============================================================================
import json, os, re, sys, datetime, urllib.request, urllib.error import json, os, hashlib, subprocess, re, sys, datetime, urllib.request, urllib.error, time
# --- config (override via env; these live in ~/.config/knowledge-genome.env) --- # --- config (override via env; these live in ~/.config/knowledge-genome.env) ---
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/chat") OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/chat")
@ -32,6 +32,7 @@ TIMEOUT = int(os.environ.get("INGEST_TIMEOUT", "600"))
# Unset = omit the flag entirely (correct for plain instruct models such as qwen2.5). # Unset = omit the flag entirely (correct for plain instruct models such as qwen2.5).
THINK = os.environ.get("INGEST_THINK") THINK = os.environ.get("INGEST_THINK")
TODAY = datetime.date.today().isoformat() TODAY = datetime.date.today().isoformat()
FEEDBACK = os.environ.get("INGEST_FEEDBACK", "").strip()
def die(stage, reason): def die(stage, reason):
@ -59,6 +60,15 @@ with open(raw_rel, "r", encoding="utf-8") as fh:
if not source_text.strip(): if not source_text.strip():
die("preflight", "source is empty: " + raw_rel) die("preflight", "source is empty: " + raw_rel)
# --- pre-flight check: if the prompt exceeds context window, exit cleanly with stage:input ---
# Conservative estimate: ~4 chars/token for mixed IT/EN text
SAFETY_MARGIN = 4096 # room for system prompt + JSON response
MAX_SOURCE_TOKENS = NUM_CTX - SAFETY_MARGIN
MAX_SOURCE_CHARS = MAX_SOURCE_TOKENS * 4
if len(source_text) > MAX_SOURCE_CHARS:
die("input", f"source too large ({len(source_text)} chars, limit ~{MAX_SOURCE_CHARS}). "
f"Use the SPLIT directive or divide the document.")
# --- read existing index to avoid duplicate slugs --- # --- read existing index to avoid duplicate slugs ---
existing_entities = set() existing_entities = set()
@ -209,41 +219,53 @@ SCHEMA = {
} }
def call_model(): def call_model(max_retries=2, base_delay=2.0):
# format existing names as a human-readable list """Call Ollama with retry on transient errors (connection, timeout, malformed JSON).
Retries up to max_retries times with exponential backoff. Does NOT retry on
content errors (schema violations, empty response) those are model issues."""
existing_ents = ", ".join(sorted(existing_entities)) or "(none yet)" existing_ents = ", ".join(sorted(existing_entities)) or "(none yet)"
existing_conc = ", ".join(sorted(existing_concepts)) or "(none yet)" existing_conc = ", ".join(sorted(existing_concepts)) or "(none yet)"
prompt = SYSTEM_PROMPT.format(existing_entities=existing_ents, existing_concepts=existing_conc)
prompt = SYSTEM_PROMPT.format( user_content = (
existing_entities=existing_ents, ("REVISION REQUESTED BY THE MAINTAINER (address this explicitly):\n"
existing_concepts=existing_conc, + FEEDBACK + "\n\n") if FEEDBACK else ""
) + (
"Source path: " + raw_rel + "\n\n--- SOURCE START ---\n"
+ source_text + "\n--- SOURCE END ---\n\nReturn the JSON now."
) )
payload = { payload = {
"model": MODEL, "model": MODEL,
"messages": [ "messages": [
{"role": "system", "content": prompt}, {"role": "system", "content": prompt},
{"role": "user", "content": {"role": "user", "content": user_content },
"Source path: " + raw_rel + "\n\n--- SOURCE START ---\n"
+ source_text + "\n--- SOURCE END ---\n\nReturn the JSON now."},
], ],
"format": SCHEMA, # schema-constrained generation "format": SCHEMA,
"stream": False, "stream": False,
# deterministic extraction; repetition penalties OFF for structured output
"options": {"temperature": 0.2, "repeat_penalty": 1.0, "num_ctx": NUM_CTX}, "options": {"temperature": 0.2, "repeat_penalty": 1.0, "num_ctx": NUM_CTX},
} }
if THINK is not None: if THINK is not None:
payload["think"] = THINK.strip().lower() in ("1", "true", "yes", "on") payload["think"] = THINK.strip().lower() in ("1", "true", "yes", "on")
data = json.dumps(payload).encode("utf-8") data = json.dumps(payload).encode("utf-8")
req = urllib.request.Request(
OLLAMA_URL, data=data, headers={"Content-Type": "application/json"}) last_error = None
for attempt in range(max_retries + 1):
if attempt > 0:
delay = base_delay * (2 ** (attempt - 1))
print(f"call_model: retry {attempt}/{max_retries} after {delay}s: {last_error}", file=sys.stderr)
time.sleep(delay)
req = urllib.request.Request(OLLAMA_URL, data=data, headers={"Content-Type": "application/json"})
try: try:
with urllib.request.urlopen(req, timeout=TIMEOUT) as r: with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
resp = json.loads(r.read().decode("utf-8")) resp = json.loads(r.read().decode("utf-8"))
except urllib.error.URLError as e: except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError) as e:
die("model", "ollama request failed: " + str(e)) last_error = f"connection/transport error: {e}"; continue
except json.JSONDecodeError as e:
last_error = f"invalid JSON from Ollama API: {e}"; continue
content = ((resp.get("message") or {}).get("content") or "").strip() content = ((resp.get("message") or {}).get("content") or "").strip()
# schema-constrained, but stay defensive if a model wraps it in a fence
if content.startswith("```"): if content.startswith("```"):
content = content.strip("`") content = content.strip("`")
brace = content.find("{") brace = content.find("{")
@ -252,12 +274,25 @@ def call_model():
try: try:
return json.loads(content) return json.loads(content)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
die("model", "model did not return valid JSON: " + str(e)) last_error = f"model did not return valid JSON: {e}"
if len(content) < 10:
continue # likely truncated -> retry
break # long but malformed -> model issue, stop
die("model", last_error or "model call failed after retries")
# --- run the semantic pass --- # --- run the semantic pass ---
sem = call_model() sem = call_model()
source_slug = slugify(os.path.splitext(os.path.basename(raw_rel))[0])
# Source of truth: slug from slug.sh --raw (deterministic, path-aware, collision-proof)
source_slug = subprocess.check_output(
["bash", os.path.join(os.path.dirname(__file__), "slug.sh"), "--raw", raw_rel],
text=True
).strip()
with open(raw_rel, "rb") as f:
src_sha = hashlib.sha256(f.read()).hexdigest()
pages = [] pages = []
# 1. source page — canonical summary of THIS source (re)written # 1. source page — canonical summary of THIS source (re)written
@ -273,7 +308,10 @@ src_tags = ([slugify(e.get("name", "")) for e in sem.get("entities", [])]
+ [slugify(c.get("name", "")) for c in sem.get("concepts", [])])[:8] + [slugify(c.get("name", "")) for c in sem.get("concepts", [])])[:8]
os.makedirs("wiki/sources", exist_ok=True) os.makedirs("wiki/sources", exist_ok=True)
with open(src_path, "w", encoding="utf-8") as f: with open(src_path, "w", encoding="utf-8") as f:
f.write(frontmatter("source", src_title, src_tags)) fm = frontmatter("source", src_title, src_tags)
# Inject tracking fields before the closing '---' (first newline-dash-dash-dash-newline)
fm = fm.replace("\n---\n", f"\nsource_path: {raw_rel}\nsource_sha256: {src_sha}\n---\n", 1)
f.write(fm)
f.write(f"\n# {src_title}\n\n{src_body}\n") f.write(f"\n# {src_title}\n\n{src_body}\n")
pages.append({"path": src_path, pages.append({"path": src_path,
"summary": twords(src_title), "summary": twords(src_title),

View file

@ -21,6 +21,7 @@ while [[ $# -gt 0 ]]; do
--context) context="$2"; shift 2 ;; --context) context="$2"; shift 2 ;;
--output) output="$2"; shift 2 ;; --output) output="$2"; shift 2 ;;
--reasoning) reasoning="$2"; shift 2 ;; --reasoning) reasoning="$2"; shift 2 ;;
--run-id) run_id_arg="$2"; shift 2 ;;
*) echo "log-append: unknown arg: $1" >&2; exit 1 ;; *) echo "log-append: unknown arg: $1" >&2; exit 1 ;;
esac esac
done done
@ -35,9 +36,15 @@ esac
[[ -f "$LOG_FILE" ]] || { echo "log-append: not found: $LOG_FILE" >&2; exit 1; } [[ -f "$LOG_FILE" ]] || { echo "log-append: not found: $LOG_FILE" >&2; exit 1; }
run_id="$(uuidgen 2>/dev/null || cat /proc/sys/kernel/random/uuid 2>/dev/null || python3 -c 'import uuid; print(uuid.uuid4())')" run_id="${run_id_arg:-$(uuidgen 2>/dev/null || cat /proc/sys/kernel/random/uuid 2>/dev/null || python3 -c 'import uuid; print(uuid.uuid4())')}"
today="$(date +%Y-%m-%d)" today="$(date +%Y-%m-%d)"
if grep -qF "run_id: \`${run_id}\`" "$LOG_FILE" 2>/dev/null; then
echo "log-append: run_id ${run_id} already present — skipping (idempotent)" >&2
echo "run_id=${run_id}"
exit 0
fi
{ {
printf '\n## [%s] %s | %s\n\n' "$today" "$type" "$subject" printf '\n## [%s] %s | %s\n\n' "$today" "$type" "$subject"
printf -- '- run_id: `%s`\n' "$run_id" printf -- '- run_id: `%s`\n' "$run_id"

View file

@ -16,10 +16,11 @@ set -euo pipefail
: "${FORGEJO_USER:?missing FORGEJO_USER}" : "${FORGEJO_USER:?missing FORGEJO_USER}"
: "${FORGEJO_TOKEN:?missing FORGEJO_TOKEN}" : "${FORGEJO_TOKEN:?missing FORGEJO_TOKEN}"
slug="" title="" body_file="" base="main" label="" slug="" title="" body_file="" base="main" label="" branch=""
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
case "$1" in case "$1" in
--slug) slug="$2"; shift 2 ;; --slug) slug="$2"; shift 2 ;;
--branch) branch="$2"; shift 2 ;;
--title) title="$2"; shift 2 ;; --title) title="$2"; shift 2 ;;
--body-file) body_file="$2"; shift 2 ;; --body-file) body_file="$2"; shift 2 ;;
--base) base="$2"; shift 2 ;; --base) base="$2"; shift 2 ;;
@ -28,16 +29,23 @@ while [[ $# -gt 0 ]]; do
esac esac
done done
: "${slug:?--slug required}"
: "${title:?--title required}" : "${title:?--title required}"
: "${body_file:?--body-file required}" : "${body_file:?--body-file required}"
[[ -f "$body_file" ]] || { echo "open-pr: body file not found: $body_file" >&2; exit 1; } [[ -f "$body_file" ]] || { echo "open-pr: body file not found: $body_file" >&2; exit 1; }
branch="feat/ai-ingest-${slug}" # --branch overrides the default; otherwise derive the ingest branch from --slug.
# (run-prune passes its own chore/prune-orphans-* branch; run-ingest passes --slug.)
if [[ -z "$branch" ]]; then
: "${slug:?--slug or --branch required}"
branch="feat/ai-ingest-${slug}"
fi
repo="$(basename -s .git "$(git config --get remote.origin.url)")" repo="$(basename -s .git "$(git config --get remote.origin.url)")"
# 1. Branch + commit + push (AGENTS.md rule 5: never commit to main) # 1. Branch + commit + push (AGENTS.md rule 5: never commit to main)
git switch -c "$branch" 2>/dev/null || git switch "$branch" # Rolling PR: -C force-resets the branch label to the current base (we are on it after
# clean_start) and CARRIES the freshly-written wiki/ changes, so a re-ingest of the same
# source rebuilds the branch cleanly instead of hitting a dirty-switch refusal.
git switch -C "$branch"
git add wiki/ git add wiki/
# Scope BOTH the emptiness check and the commit to wiki/ — never commit anything that # Scope BOTH the emptiness check and the commit to wiki/ — never commit anything that
# happened to be staged outside wiki/ (a stray hook, an aborted prior run, etc.). # happened to be staged outside wiki/ (a stray hook, an aborted prior run, etc.).
@ -46,7 +54,10 @@ if git diff --cached --quiet -- wiki/; then
exit 1 exit 1
fi fi
git commit -m "$title" -- wiki/ git commit -m "$title" -- wiki/
git push -u origin "$branch" # Try a normal push (new branch / fast-forward). If the branch was rebuilt from base and
# diverged, force-with-lease updates the open PR in place — the lease refuses to clobber if
# origin moved unexpectedly since our fetch, so concurrent work is never lost.
git push -u origin "$branch" 2>/dev/null || git push -u --force-with-lease origin "$branch"
# DRY_RUN: local git work done; skip the Forgejo API (offline tests). # DRY_RUN: local git work done; skip the Forgejo API (offline tests).
if [[ -n "${DRY_RUN:-}" ]]; then if [[ -n "${DRY_RUN:-}" ]]; then

View file

@ -0,0 +1,35 @@
#!/usr/bin/env bash
# =============================================================================
# orphan-wiki.sh — find source pages whose raw source no longer exists.
# Reads source_path from each wiki/sources/*.md frontmatter. If the raw is gone,
# the page is orphaned. Emits JSON envelope: {status, genome, count, files[], detail[]}.
# Read-only: no lock needed (same policy as pending-raw).
# =============================================================================
set -euo pipefail
genome="${1:?usage: orphan-wiki.sh <genome>}"
base_dir="${GENOMES_ROOT:-${HOME}/genomes}"
cd "${base_dir}/${genome}" 2>/dev/null || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
# Clean start on the configured base (single source of truth in lib/clean-start.sh).
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
declare -a ORPH=()
for page in wiki/sources/*.md; do
[[ -e "$page" ]] || continue
sp="$(sed -n 's/^source_path:[[:space:]]*//p' "$page" | tr -d '\r' | head -n1)"
# Pages without source_path are pre-Step-2 legacy: ignore, don't false-positive.
[[ -n "$sp" ]] || continue
[[ -f "$sp" ]] || ORPH+=("$page")
done
if [[ ${#ORPH[@]} -eq 0 ]]; then
echo '{"status":"ok","genome":"'"$genome"'","count":0,"files":[],"detail":[]}'
else
for x in "${ORPH[@]}"; do printf '%s\torphan\n' "$x"; done \
| jq -R 'split("\t") | {path: .[0], reason: .[1]}' \
| jq -s --arg g "$genome" '{status:"ok", genome:$g, count:length, files:[.[].path], detail:.}'
fi

View file

@ -0,0 +1,64 @@
#!/usr/bin/env bash
# =============================================================================
# pending-raw.sh — deterministic "what needs ingesting" calculator.
# Reads the clean base checkout and classifies each raw/articles/*.md as:
# new -> no wiki/sources/<slug>.md
# modified -> page exists but its source_sha256 != current file hash
# Emits the same JSON envelope as changed-raw (drop-in), plus detail[] for ntfy.
# =============================================================================
set -euo pipefail
genome="${1:?usage: pending-raw.sh <genome>}"
base_dir="${GENOMES_ROOT:-${HOME}/genomes}"
cd "${base_dir}/${genome}" 2>/dev/null || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
# Clean start on the configured base (single source of truth in lib/clean-start.sh).
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SLUG="${SCRIPT_DIR}/slug.sh"
declare -a NEW=()
declare -a MOD=()
declare -A SEEN_SLUG=()
if [[ -d raw/articles ]]; then
while IFS= read -r -d '' f; do
rel="${f#./}"
case "$rel" in
*/.stfolder/*|*/.stignore|*/.gitkeep) continue ;;
esac
slug="$("$SLUG" --raw "$rel")" || continue
# Residual collision (two distinct raws -> same slug): warn, do not silence.
if [[ -n "${SEEN_SLUG[$slug]:-}" && "${SEEN_SLUG[$slug]}" != "$rel" ]]; then
logger -t pending-raw "warn: slug collision ${slug}: ${SEEN_SLUG[$slug]} <-> ${rel}"
fi
SEEN_SLUG[$slug]="$rel"
page="wiki/sources/${slug}.md"
if [[ ! -f "$page" ]]; then
NEW+=("$rel")
else
cur="$(sha256sum "$rel" | cut -d' ' -f1)"
rec="$(sed -n 's/^source_sha256:[[:space:]]*//p' "$page" | tr -d '\r' | head -n1)"
if [[ "$cur" != "$rec" ]]; then
MOD+=("$rel")
fi
fi
done < <(find raw/articles -type f -name '*.md' -print0 2>/dev/null)
fi
if [[ ${#NEW[@]} -eq 0 && ${#MOD[@]} -eq 0 ]]; then
echo '{"status":"ok","genome":"'"$genome"'","count":0,"files":[],"detail":[]}'
else
{
for x in "${NEW[@]}"; do printf '%s\tnew\n' "$x"; done
for x in "${MOD[@]}"; do printf '%s\tmodified\n' "$x"; done
} | jq -R 'split("\t") | {path: .[0], reason: .[1]}' \
| jq -s --arg g "$genome" \
'{status: "ok", genome: $g, count: length, files: [.[].path], detail: .}'
fi

View file

@ -53,7 +53,7 @@ contradictions="$(jq -r '.contradictions // "None"' "$manifest")"
[[ -n "$raw_source" && "$raw_source" != "null" ]] || fail "manifest" "raw_source missing" [[ -n "$raw_source" && "$raw_source" != "null" ]] || fail "manifest" "raw_source missing"
slug="$(bash "${SCRIPTS}/slug.sh" "$raw_source")" || fail "slug" "empty or invalid slug for ${raw_source}" slug="$(bash "${SCRIPTS}/slug.sh" --raw "$raw_source")" || fail "slug" "empty or invalid slug for ${raw_source}"
# --- collect touched paths --- # --- collect touched paths ---
mapfile -t created_paths < <(jq -r '.pages[] | select(.status=="created") | .path' "$manifest") mapfile -t created_paths < <(jq -r '.pages[] | select(.status=="created") | .path' "$manifest")
@ -107,8 +107,12 @@ done < <(jq -r '.pages[] | select(.status=="created")
| [.path, (.summary // ""), (.maturity // "draft")] | @tsv' "$manifest") | [.path, (.summary // ""), (.maturity // "draft")] | @tsv' "$manifest")
# --- 2. log entry --- # --- 2. log entry ---
# Stable run_id: deterministic from the input (raw path + content hash). Survives wrapper
# re-runs and makes the append-only log idempotent (paired with the guard in log-append.sh).
src_sha="$(sha256sum "$raw_source" 2>/dev/null | cut -d' ' -f1)" || src_sha="unknown"
run_id="$(printf '%s' "${raw_source}:${src_sha}" | sha256sum | cut -c1-16)"
out="$(jq -r '[.pages[].path | "[[" + (sub("^wiki/";"") | sub("\\.md$";"")) + "]]"] | join(", ")' "$manifest")" out="$(jq -r '[.pages[].path | "[[" + (sub("^wiki/";"") | sub("\\.md$";"")) + "]]"] | join(", ")' "$manifest")"
bash "${SCRIPTS}/log-append.sh" --type INGEST --subject "$slug" --model "$model" \ bash "${SCRIPTS}/log-append.sh" --run-id "$run_id" --type INGEST --subject "$slug" --model "$model" \
--context "[[${raw_source}]]" --output "${out:-*(none)*}" --reasoning "$reasoning" \ --context "[[${raw_source}]]" --output "${out:-*(none)*}" --reasoning "$reasoning" \
|| fail "log" "log-append failed" || fail "log" "log-append failed"
@ -127,6 +131,7 @@ body="$(mktemp)"
trap 'rm -f "$body"' EXIT # auto-clean on any exit (success, fail(), or crash) trap 'rm -f "$body"' EXIT # auto-clean on any exit (success, fail(), or crash)
{ {
echo "<!-- kg:raw=${raw_source} -->" # marker for the rejection loop (invisible in the render)
echo "## Summary" echo "## Summary"
echo "$pr_summary" echo "$pr_summary"
echo "" echo ""

View file

@ -0,0 +1,96 @@
#!/usr/bin/env bash
# =============================================================================
# skills/ingest/scripts/run-prune.sh
# Symmetric companion to run-ingest: prune source pages whose raw source no
# longer exists. RE-DERIVES the orphan set itself (mirrors orphan-wiki.sh) — it
# never trusts a list handed in by n8n, so there is no "detected-vs-pruned"
# race. Removes ONLY the pages it derived plus their index entries, commits
# ONLY wiki/ on chore/prune-orphans-<date>, and opens a GATED removal PR (the
# operator approves the deletion; principle 2). Never deletes of its own accord.
#
# Runs OUTSIDE the model, on vm101, cwd = genome checkout. The wrapper (`pi
# prune`) has already taken the per-genome lock and done clean_start, exactly
# like `pi ingest` — so this script does neither.
#
# run-prune.sh <genome>
#
# Emits a single JSON result line on stdout for n8n to parse.
# =============================================================================
set -euo pipefail
genome="${1:?usage: run-prune.sh <genome>}"
SCRIPTS="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
fail() {
jq -nc --arg stage "$1" --arg reason "$2" '{status:"error", stage:$stage, reason:$reason}'
exit 1
}
command -v jq >/dev/null 2>&1 || { echo '{"status":"error","reason":"jq missing"}'; exit 1; }
command -v python3 >/dev/null 2>&1 || fail "deps" "python3 missing (needed by index-append.py)"
# --- re-derive orphans (same rule as orphan-wiki.sh; computed fresh, here, now) ---
# A wiki/sources/*.md page is orphaned when its frontmatter source_path points at
# a raw file that no longer exists. Legacy pages without source_path are ignored.
declare -a ORPH=()
for page in wiki/sources/*.md; do
[[ -e "$page" ]] || continue
sp="$(sed -n 's/^source_path:[[:space:]]*//p' "$page" | tr -d '\r' | head -n1)"
[[ -n "$sp" ]] || continue
[[ -f "$sp" ]] || ORPH+=("$page")
done
if [[ ${#ORPH[@]} -eq 0 ]]; then
jq -nc '{status:"ok", count:0, pruned:[], detail:"no orphans"}'
exit 0
fi
# --- remove each orphan page + its index entry (anti-traversal, wiki/-only) ---
declare -a PRUNED=()
for page in "${ORPH[@]}"; do
case "$page" in
wiki/*) : ;;
*) fail "prune" "refusing to remove outside wiki/: ${page}" ;;
esac
case "$page" in *..*) fail "prune" "path traversal in page: ${page}" ;; esac
[[ -f "$page" ]] || continue
rm -f "$page"
link="${page#wiki/}"; link="${link%.md}" # e.g. sources/foo
python3 "${SCRIPTS}/index-append.py" --remove "$link" \
|| fail "index" "index-append --remove failed for ${link}"
PRUNED+=("$link")
done
# --- assemble the PR body ---
date_tag="$(date +%F)"
body="$(mktemp)"
trap 'rm -f "$body"' EXIT
{
echo "## Prune orphaned sources"
echo ""
echo "These source pages reference a \`source_path\` whose raw file no longer exists"
echo "in \`raw/\`. Removing them keeps the wiki in sync with git (the source of truth)."
echo ""
echo "| Removed page |"
echo "|--------------|"
for l in "${PRUNED[@]}"; do echo "| \`wiki/${l}.md\` |"; done
} > "$body"
# --- open the GATED removal PR on a chore/ branch (open-pr --branch override) ---
branch="chore/prune-orphans-${date_tag}"
pr_out="$( bash "${SCRIPTS}/open-pr.sh" \
--branch "$branch" \
--title "chore: prune ${#PRUNED[@]} orphaned source(s)" \
--body-file "$body" --base "${INGEST_BASE:-main}" 2>&1 )" && pr_rc=0 || pr_rc=$?
pr_url="$(printf '%s\n' "$pr_out" | sed -n 's/^PR opened: //p' | head -n1)"
# --- result line for n8n ---
jq -nc \
--arg status "$([[ $pr_rc -eq 0 ]] && echo ok || echo pr_failed)" \
--argjson count "${#PRUNED[@]}" \
--arg pr_url "$pr_url" \
--arg detail "$pr_out" \
--argjson pruned "$(printf '%s\n' "${PRUNED[@]}" | jq -R . | jq -s .)" \
'{status:$status, count:$count, pr_url:$pr_url, pruned:$pruned, detail:$detail}'
[[ $pr_rc -eq 0 ]] || exit 1

View file

@ -7,6 +7,18 @@
# ============================================================================= # =============================================================================
set -euo pipefail set -euo pipefail
if [[ "${1:-}" == "--raw" ]]; then
raw="${2:?usage: slug.sh --raw <raw/bucket/rel/path>}"
rel="${raw#raw/}"; rel="${rel#*/}" # strip "raw/" and the bucket name
rel="${rel%.*}" # strip extension
slug="$(printf '%s\n' "$rel" | tr '/' '\n' \
| sed -E 's/[^a-zA-Z0-9]+/-/g; s/-{2,}/-/g; s/^-+//; s/-+$//' \
| tr '[:upper:]' '[:lower:]' | paste -sd- -)"
[[ -n "$slug" ]] || { echo "slug: empty result for input '${raw}'" >&2; exit 1; }
printf '%s\n' "$slug"
exit 0
fi
input="${1:?usage: slug.sh <path-or-title>}" input="${1:?usage: slug.sh <path-or-title>}"
# Strip directory and extension when given a path # Strip directory and extension when given a path

View file

@ -2,6 +2,19 @@
<!-- One sentence: goal of this session and source processed. --> <!-- One sentence: goal of this session and source processed. -->
<!--
REVIEW GUIDELINES (write the guideline as the FIRST word of your review):
REWORK: <what to fix> -> same branch, guided retry
RESTART: <why restart> -> close PR, start over from scratch
SPLIT: <how to split> -> close PR, reopen as separate branches
REJECT: <why not> -> close PR, no retry
MERGE -> approve and merge
Rules: one concern per directive; be specific to lines/pages; name the principle
that was violated; describe the DESIRED STATE; avoid saying “do better.”
-->
Translated with DeepL.com (free version)
## Pages Created ## Pages Created
| Path | Type | Maturity | | Path | Type | Maturity |

18
tests/clean-start.bats Normal file
View file

@ -0,0 +1,18 @@
#!/usr/bin/env bats
setup() {
load 'helpers'
source "${LIB_DIR}/clean-start.sh" 2>/dev/null || source "${REPO_ROOT}/lib/clean-start.sh"
}
@test "clean_start: aligns to origin/base, reverts tracked edits, removes untracked" {
G="$(make_fixture_genome)"; cd "$G"
echo "from origin" >> wiki/index.md
git add -A && git commit -q -m "origin ahead" && git push -q
git reset --hard HEAD~1 # local BEHIND origin/main
echo "local junk" >> wiki/log.md # tracked edit, uncommitted
echo "scratch" > scratch.txt # genuinely untracked
INGEST_BASE="main" clean_start
git diff --quiet origin/main # aligned to origin
grep -q "from origin" wiki/index.md # forwarded to origin state
! grep -q "local junk" wiki/log.md # tracked edit reverted
[ ! -f scratch.txt ] # untracked removed
}

View file

@ -57,8 +57,8 @@ private: false
## Private Synthesis (`wiki/private/`) ## Private Synthesis (`wiki/private/`)
*Restricted access. Requires PRIVATE_CONTEXT: enabled and unlocked repo.* _Restricted access. Requires `PRIVATE_CONTEXT: enabled` and unlocked repo._
*List slug names ONLY. Do not append summaries — prevents metadata leakage.* _List slug names ONLY. Do not append summaries — prevents metadata leakage._
EOF EOF
cat > "${g}/wiki/log.md" <<'EOF' cat > "${g}/wiki/log.md" <<'EOF'
@ -88,16 +88,17 @@ EOF
git init -q git init -q
# Hermetic: ignore the user's global git config (signing, global hooks); # Hermetic: ignore the user's global git config (signing, global hooks);
# otherwise commit.gpgsign or a global core.hooksPath makes git commit fail here. # otherwise commit.gpgsign or a global core.hooksPath makes git commit fail here.
git config commit.gpgsign false git config --local user.name "Framework Test"
git config core.hooksPath "${base}/nohooks" git config --local user.email "test@genome.local"
git config user.email t@t git config --local commit.gpgsign false
git config user.name tester git config --local core.hooksPath "${base}/nohooks"
git add .
git commit -qm init
git branch -M main git branch -M main
git remote add origin "${base}/origin.git" git remote add origin "${base}/origin.git"
git add .
git commit -q -m "chore: initial scaffold"
git push -q -u origin main git push -q -u origin main
) >/dev/null )
echo "${g}" echo "${g}"
} }

44
tests/index-remove.bats Normal file
View file

@ -0,0 +1,44 @@
#!/usr/bin/env bats
# tests/index-remove.bats — index-append.py --remove mode.
setup() {
load 'helpers'
export GENOMES_ROOT="${BATS_TEST_TMPDIR}"
g_src="$(make_fixture_genome)"; export g="$g_src"
}
@test "index --remove: deletes the matching entry, keeps the others" {
cd "$g"
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/a]] — A. `maturity: draft`'
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/b]] — B. `maturity: draft`'
grep -q 'sources/a' wiki/index.md
grep -q 'sources/b' wiki/index.md
run python3 "$SKILL_SCRIPTS/index-append.py" --remove 'sources/a'
[ "$status" -eq 0 ]
! grep -q '\[\[sources/a\]\]' wiki/index.md
grep -q 'sources/b' wiki/index.md
}
@test "index --remove: idempotent when the entry is absent" {
cd "$g"
run python3 "$SKILL_SCRIPTS/index-append.py" --remove 'sources/does-not-exist'
[ "$status" -eq 0 ]
[[ "$output" == *'nothing to remove'* ]]
}
@test "index --remove: bumps last_updated" {
cd "$g"
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/a]] — A. `maturity: draft`'
# set last_updated to an old date, then remove and check it moved
sed -i 's/^last_updated:.*/last_updated: 2000-01-01/' wiki/index.md
run python3 "$SKILL_SCRIPTS/index-append.py" --remove 'sources/a'
[ "$status" -eq 0 ]
! grep -q '2000-01-01' wiki/index.md
grep -q "last_updated: $(date +%F)" wiki/index.md
}
@test "index --remove: rejects passing both --entry and --remove" {
cd "$g"
run python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/a]] — x' --remove 'sources/a'
[ "$status" -eq 2 ]
}

View file

@ -0,0 +1,29 @@
#!/usr/bin/env bats
setup() {
load 'helpers'
source "$LIB_DIR/output.sh"
source "$LIB_DIR/lint.sh"
}
@test "lint tolerates source_path/source_sha256 in source frontmatter" {
G="$(make_fixture_genome)"
mkdir -p "$G/wiki/sources"
cat > "$G/wiki/sources/test-source.md" <<'EOFMD'
---
title: "Test Source"
type: source
domain: genome-test
maturity: draft
last_updated: 2026-06-25
private: false
tags: [test]
source_path: raw/articles/test.md
source_sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
---
# Test Source
body
EOFMD
run lint_markdown_file "$G/wiki/sources/test-source.md" genome-test
[ "$status" -eq 0 ]
}

View file

@ -0,0 +1,48 @@
#!/usr/bin/env bats
# open-pr-rolling.bats — a re-ingest of the same slug updates the OPEN PR's branch
# (force-with-lease) instead of failing. Uses the local bare remote from make_fixture_genome.
load helpers
setup_file() { :; }
@test "open-pr: re-ingest of the same slug rolls the branch forward (force-with-lease)" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
G="$(make_fixture_genome)"; cd "$G"
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER=u FORGEJO_TOKEN=t DRY_RUN=1
body="$(mktemp)"; echo body > "$body"
# first ingest of slug x (v1)
mkdir -p wiki/sources; printf 'v1\n' > wiki/sources/x.md
run bash "$SKILL_SCRIPTS/open-pr.sh" --slug x --title "feat: ingest x" --body-file "$body" --base main
[ "$status" -eq 0 ]
git rev-parse --verify feat/ai-ingest-x
first="$(git rev-parse feat/ai-ingest-x)"
# simulate clean_start back to base, then an edited re-ingest (v2)
git switch -q main; git reset -q --hard origin/main; git clean -q -fd
printf 'v2-edited\n' > wiki/sources/x.md
run bash "$SKILL_SCRIPTS/open-pr.sh" --slug x --title "feat: ingest x" --body-file "$body" --base main
[ "$status" -eq 0 ]
second="$(git rev-parse feat/ai-ingest-x)"
# the branch was REBUILT from base (diverged), not appended: second is not a descendant of first
run git merge-base --is-ancestor "$first" "$second"
[ "$status" -ne 0 ]
# origin received the v2 content (force-with-lease pushed the rebuilt branch)
git fetch -q origin
run git show "origin/feat/ai-ingest-x:wiki/sources/x.md"
[ "$status" -eq 0 ]
[[ "$output" == *"v2-edited"* ]]
}
@test "open-pr: prune branch override still works after the rolling change" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
G="$(make_fixture_genome)"; cd "$G"
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER=u FORGEJO_TOKEN=t DRY_RUN=1
body="$(mktemp)"; echo body > "$body"
mkdir -p wiki/sources; printf 'p\n' > wiki/sources/p.md
run bash "$SKILL_SCRIPTS/open-pr.sh" --branch "chore/prune-orphans-2026-06-30" \
--title "chore: prune 1 orphaned source(s)" --body-file "$body" --base main
[ "$status" -eq 0 ]
git rev-parse --verify "chore/prune-orphans-2026-06-30"
}

38
tests/orphan-wiki.bats Normal file
View file

@ -0,0 +1,38 @@
#!/usr/bin/env bats
setup() {
load 'helpers'
export ORPHAN="${SKILL_SCRIPTS}/orphan-wiki.sh"
export GENOMES_ROOT="${BATS_TEST_TMPDIR}"
export INGEST_BASE="main"
export KG_LIB_DIR="${LIB_DIR}" # orphan-wiki.sh sources clean-start.sh via KG_LIB_DIR
g_src="$(make_fixture_genome)"
export g_name="fixture-genome"
mv "$g_src" "${GENOMES_ROOT}/${g_name}"
export g="${GENOMES_ROOT}/${g_name}"
( cd "$g" && rm -f raw/articles/test.md && git add -A && git commit -q -m "clear" && git push -q )
}
@test "orphan-wiki: no orphans when raw and source page match" {
mkdir -p "${g}/raw/articles"; echo "content" > "${g}/raw/articles/existing.md"
hash="$(sha256sum "${g}/raw/articles/existing.md" | cut -d' ' -f1)"
mkdir -p "${g}/wiki/sources"
printf -- '---\nsource_path: raw/articles/existing.md\nsource_sha256: %s\n---\n' "$hash" > "${g}/wiki/sources/existing.md"
( cd "$g" && git add . && git commit -q -m "setup" && git push -q )
run bash "$ORPHAN" "$g_name"
[ "$status" -eq 0 ]; echo "$output" | jq -e '.count == 0'
}
@test "orphan-wiki: detects orphaned source page" {
mkdir -p "${g}/wiki/sources"
printf -- '---\nsource_path: raw/articles/deleted.md\nsource_sha256: abc123\n---\n' > "${g}/wiki/sources/orphaned.md"
( cd "$g" && git add . && git commit -q -m "orphan" && git push -q )
run bash "$ORPHAN" "$g_name"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.count == 1'
echo "$output" | jq -e '.detail[0].reason == "orphan"'
}
@test "orphan-wiki: ignores legacy pages without source_path" {
mkdir -p "${g}/wiki/sources"
printf -- '---\ntitle: "Legacy"\ntype: source\n---\n' > "${g}/wiki/sources/legacy.md"
( cd "$g" && git add . && git commit -q -m "legacy" && git push -q )
run bash "$ORPHAN" "$g_name"
[ "$status" -eq 0 ]; echo "$output" | jq -e '.count == 0'
}

91
tests/pending-raw.bats Normal file
View file

@ -0,0 +1,91 @@
#!/usr/bin/env bats
setup() {
load 'helpers'
export PENDING="${SKILL_SCRIPTS}/pending-raw.sh"
export GENOMES_ROOT="${BATS_TEST_TMPDIR}"
export INGEST_BASE="main"
export KG_LIB_DIR="${LIB_DIR}"
g_src="$(make_fixture_genome)"
export g_name="fixture-genome"
mv "$g_src" "${GENOMES_ROOT}/${g_name}"
export g="${GENOMES_ROOT}/${g_name}"
# FIX: make_fixture_genome ships raw/articles/test.md with no source page, which would
# otherwise count as a permanent 'new' and break every count assertion. Clear it so each
# test controls exactly what is pending (verified: count base becomes 0).
( cd "$g" && rm -f raw/articles/test.md && git add -A \
&& git commit -q -m "test: clear default raw" && git push -q )
}
@test "pending-raw: detects a brand new raw file" {
echo "new content" > "${g}/raw/articles/new-file.md"
( cd "$g" && git add . && git commit -q -m "add raw" && git push -q )
run bash "$PENDING" "$g_name"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.count == 1'
echo "$output" | jq -e '.detail[0].path == "raw/articles/new-file.md"'
echo "$output" | jq -e '.detail[0].reason == "new"'
}
@test "pending-raw: skips up-to-date files" {
echo "ok content" > "${g}/raw/articles/ok-file.md"
hash_ok="$(sha256sum "${g}/raw/articles/ok-file.md" | cut -d' ' -f1)"
cat > "${g}/wiki/sources/ok-file.md" <<FM
---
source_sha256: $hash_ok
---
FM
( cd "$g" && git add . && git commit -q -m "add ok" && git push -q )
run bash "$PENDING" "$g_name"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.count == 0'
}
@test "pending-raw: flags modified files" {
echo "content v1" > "${g}/raw/articles/mod-file.md"
hash_v1="$(sha256sum "${g}/raw/articles/mod-file.md" | cut -d' ' -f1)"
cat > "${g}/wiki/sources/mod-file.md" <<FM
---
source_sha256: $hash_v1
---
FM
( cd "$g" && git add . && git commit -q -m "v1" && git push -q )
echo "content v2" > "${g}/raw/articles/mod-file.md"
( cd "$g" && git add . && git commit -q -m "v2" && git push -q )
run bash "$PENDING" "$g_name"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.count == 1'
echo "$output" | jq -e '.detail[0].reason == "modified"'
}
@test "pending-raw: nested subdirectory yields prefixed slug" {
mkdir -p "${g}/raw/articles/sub-b"
echo "subdir content" > "${g}/raw/articles/sub-b/file.md"
( cd "$g" && git add . && git commit -q -m "subdir" && git push -q )
run bash "$PENDING" "$g_name"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.count == 1'
echo "$output" | jq -e '.files[0] == "raw/articles/sub-b/file.md"'
}
@test "pending-raw: excludes noise (.stfolder, .gitkeep)" {
touch "${g}/raw/articles/.gitkeep"
mkdir -p "${g}/raw/articles/.stfolder"
touch "${g}/raw/articles/.stfolder/sync.log"
( cd "$g" && git add . && git commit -q -m "noise" && git push -q )
run bash "$PENDING" "$g_name"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.count == 0'
}
@test "pending-raw: reports both files on a slug collision" {
mkdir -p "${g}/raw/articles/cibo"
echo "c1" > "${g}/raw/articles/cibo-pane.md"
echo "c2" > "${g}/raw/articles/cibo/pane.md"
( cd "$g" && git add . && git commit -q -m "collision" && git push -q )
run bash "$PENDING" "$g_name"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.count == 2'
}

View file

@ -17,6 +17,8 @@ EXECUTABLES=(
skills/ingest/scripts/open-pr.sh skills/ingest/scripts/open-pr.sh
skills/ingest/scripts/log-append.sh skills/ingest/scripts/log-append.sh
skills/ingest/scripts/slug.sh skills/ingest/scripts/slug.sh
skills/ingest/scripts/pending-raw.sh
skills/ingest/scripts/orphan-wiki.sh
skills/ingest/scripts/index-append.py skills/ingest/scripts/index-append.py
scripts/add-genome.sh scripts/add-genome.sh
scripts/setup.sh scripts/setup.sh
@ -28,7 +30,7 @@ EXECUTABLES=(
# Librerie sourced: NON devono essere eseguibili. # Librerie sourced: NON devono essere eseguibili.
LIBRARIES=( LIBRARIES=(
lib/lint.sh lib/output.sh lib/deps.sh lib/git-crypt.sh lib/scaffold.sh lib/structure.sh lib/lint.sh lib/output.sh lib/deps.sh lib/git-crypt.sh lib/scaffold.sh lib/structure.sh lib/clean-start.sh
providers/forgejo.sh providers/github.sh providers/forgejo.sh providers/github.sh
registry.sh globals.env registry.sh globals.env
) )

View file

@ -0,0 +1,75 @@
#!/usr/bin/env bats
# raw-commit-quiet.bats — quiet-window behaviour of genome-raw-commit.sh.
# No Syncthing (no API key -> default author); pushes to a local bare repo via GENOME_PUSH_URL.
setup() {
SCRIPT="${BATS_TEST_DIRNAME}/../deploy/nexus/genome-raw-commit.sh"
export HOME="${BATS_TEST_TMPDIR}/home"; mkdir -p "$HOME/.config"
root="${BATS_TEST_TMPDIR}/vaults"; mkdir -p "$root"
bare="${BATS_TEST_TMPDIR}/origin.git"; git init -q --bare "$bare"
cat > "$HOME/.config/knowledge-genome.env" <<EOF
GENOME_VAULTS_ROOT=$root
GENOME_BASE=main
FORGEJO_USER=n8n-bot
FORGEJO_HOST=127.0.0.1:3001
FORGEJO_OWNER=Keru
COMMITTER_NAME=n8n-bot
COMMITTER_EMAIL=n8n-bot@homelab
DEFAULT_AUTHOR_NAME=Tester
DEFAULT_AUTHOR_EMAIL=tester@local
EOF
export g="genome-test"; export vault="$root/$g"
git clone -q "$bare" "$vault" 2>/dev/null || mkdir -p "$vault"
( cd "$vault"
git init -q 2>/dev/null || true
git config user.name n8n-bot; git config user.email n8n-bot@homelab; git config commit.gpgsign false
git checkout -q -b main 2>/dev/null || git switch -q main
mkdir -p raw/articles; echo seed > raw/articles/.gitkeep
git add -A; git commit -q -m init
git remote add origin "$bare" 2>/dev/null || git remote set-url origin "$bare"
git push -q -u origin main )
export GENOME_PUSH_URL="$bare" # test seam -> push to the local bare repo
}
files() { ( cd "$vault" && git ls-files raw/ ) > "${BATS_TEST_TMPDIR}/f.txt"; }
@test "raw-commit: holds a freshly-written raw, commits it once it settles" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
echo "still typing" > "$vault/raw/articles/hot.md" # fresh -> hot
echo "finished" > "$vault/raw/articles/stable.md"
touch -d "10 minutes ago" "$vault/raw/articles/stable.md" # settled
run bash "$SCRIPT" "$g"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.status=="ok"'
files
grep -q 'raw/articles/stable.md' "${BATS_TEST_TMPDIR}/f.txt" # committed
! grep -q 'raw/articles/hot.md' "${BATS_TEST_TMPDIR}/f.txt" # held back
touch -d "10 minutes ago" "$vault/raw/articles/hot.md" # now it settles
run bash "$SCRIPT" "$g"
[ "$status" -eq 0 ]
files
grep -q 'raw/articles/hot.md' "${BATS_TEST_TMPDIR}/f.txt" # now committed
}
@test "raw-commit: noop with held count while everything is still settling" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
echo "typing" > "$vault/raw/articles/wip.md" # fresh -> hot
run bash "$SCRIPT" "$g"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.status=="noop"'
echo "$output" | jq -e '.held==1'
}
@test "raw-commit: a deletion is committed immediately (not subject to the quiet window)" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
# commit a settled file first
echo done > "$vault/raw/articles/old.md"; touch -d "10 minutes ago" "$vault/raw/articles/old.md"
run bash "$SCRIPT" "$g"; [ "$status" -eq 0 ]
files; grep -q 'raw/articles/old.md' "${BATS_TEST_TMPDIR}/f.txt"
# now delete it -> should commit the removal even though "just changed"
rm "$vault/raw/articles/old.md"
run bash "$SCRIPT" "$g"
[ "$status" -eq 0 ]
echo "$output" | jq -e '.status=="ok"'
files; ! grep -q 'raw/articles/old.md' "${BATS_TEST_TMPDIR}/f.txt"
}

View file

@ -171,3 +171,41 @@ EOF
[ "$status" -eq 0 ] [ "$status" -eq 0 ]
[[ "$output" == *"develop"* ]] [[ "$output" == *"develop"* ]]
} }
@test "run-ingest: branch name matches slug.sh --raw for nested raw paths" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
G="$(make_fixture_genome)"; cd "$G"
mkdir -p wiki/sources
cat > wiki/sources/cibo-il-pane.md <<'EOFMD'
---
title: "Il Pane"
type: source
domain: genome-test
tags: [cibo]
maturity: draft
last_updated: 2026-06-25
private: false
source_path: raw/articles/cibo/il-pane.md
source_sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
---
# Il Pane
body
EOFMD
cat > .ingest-manifest.json <<'EOFJSON'
{
"raw_source": "raw/articles/cibo/il-pane.md",
"model": "qwen3.5-9b",
"reasoning": "Ingest.",
"pr_summary": "Ingest summary.",
"contradictions": "None",
"pages": [
{"path": "wiki/sources/cibo-il-pane.md", "summary": "Summary.", "maturity": "draft", "status": "created"}
]
}
EOFJSON
export KG_LIB_DIR="$LIB_DIR"
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER="u" FORGEJO_TOKEN="t" DRY_RUN=1
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
[ "$status" -eq 0 ]
[[ "$output" == *"cibo-il-pane"* ]]
}

68
tests/run-prune.bats Normal file
View file

@ -0,0 +1,68 @@
#!/usr/bin/env bats
# tests/run-prune.bats — prune orphaned sources (no LLM, no network; DRY_RUN).
setup() {
load 'helpers'
export PRUNE="${SKILL_SCRIPTS}/run-prune.sh"
export GENOMES_ROOT="${BATS_TEST_TMPDIR}"
export INGEST_BASE="main"
export KG_LIB_DIR="${LIB_DIR}"
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER="u" FORGEJO_TOKEN="t"
export DRY_RUN=1
g_src="$(make_fixture_genome)"; export g_name="fixture-genome"
mv "$g_src" "${GENOMES_ROOT}/${g_name}"; export g="${GENOMES_ROOT}/${g_name}"
( cd "$g" && rm -f raw/articles/test.md && git add -A && git commit -q -m clear && git push -q )
}
@test "run-prune: removes only the orphaned source + its index entry, opens a dry PR" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
cd "$g"
# kept: raw exists. orphan: raw missing.
echo content > raw/articles/kept.md
h="$(sha256sum raw/articles/kept.md | cut -d' ' -f1)"
printf -- '---\nsource_path: raw/articles/kept.md\nsource_sha256: %s\n---\nbody\n' "$h" > wiki/sources/kept.md
printf -- '---\nsource_path: raw/articles/gone.md\nsource_sha256: abc\n---\nbody\n' > wiki/sources/orphan.md
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/kept]] — kept. `maturity: draft`'
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/orphan]] — orphan. `maturity: draft`'
git add -A && git commit -q -m setup && git push -q
run bash "$PRUNE" "$g_name"
[ "$status" -eq 0 ]
[[ "$output" == *'"status":"ok"'* ]]
[[ "$output" == *'"count":1'* ]]
# only the orphan page is gone
[ ! -f wiki/sources/orphan.md ]
[ -f wiki/sources/kept.md ]
# index reflects the removal
! grep -q 'sources/orphan' wiki/index.md
grep -q 'sources/kept' wiki/index.md
# committed on a chore/ branch (NOT feat/ai-ingest-*)
git rev-parse --verify "chore/prune-orphans-$(date +%F)"
}
@test "run-prune: no orphans -> count 0 and no PR/branch" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
cd "$g"
echo content > raw/articles/kept.md
h="$(sha256sum raw/articles/kept.md | cut -d' ' -f1)"
printf -- '---\nsource_path: raw/articles/kept.md\nsource_sha256: %s\n---\nbody\n' "$h" > wiki/sources/kept.md
git add -A && git commit -q -m setup && git push -q
run bash "$PRUNE" "$g_name"
[ "$status" -eq 0 ]
[[ "$output" == *'"count":0'* ]]
run git rev-parse --verify "chore/prune-orphans-$(date +%F)"
[ "$status" -ne 0 ]
}
@test "run-prune: refuses when an orphan path would escape wiki/ (defense in depth)" {
command -v jq >/dev/null 2>&1 || skip "jq not installed"
cd "$g"
# legacy page without source_path is ignored; a page with a missing raw is the orphan.
printf -- '---\nsource_path: raw/articles/gone.md\nsource_sha256: abc\n---\nbody\n' > wiki/sources/orphan.md
git add -A && git commit -q -m setup && git push -q
run bash "$PRUNE" "$g_name"
[ "$status" -eq 0 ]
[[ "$output" == *'"count":1'* ]]
[ ! -f wiki/sources/orphan.md ]
}

View file

@ -86,3 +86,17 @@ EOF
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/foo]] — s. `maturity: draft`' python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/foo]] — s. `maturity: draft`'
grep -q "^last_updated: $(date +%F)$" wiki/index.md grep -q "^last_updated: $(date +%F)$" wiki/index.md
} }
@test "log-append: dedup on stable run_id prevents duplicate entries" {
G="$(make_fixture_genome)"; cd "$G"
stable_id="test-stable-run-id-001"
run bash "$SKILL_SCRIPTS/log-append.sh" --run-id "$stable_id" --type INGEST --subject "test" --model "m" \
--context "[[raw/x]]" --output "[[sources/x]]" --reasoning "r"
[ "$status" -eq 0 ]
run bash "$SKILL_SCRIPTS/log-append.sh" --run-id "$stable_id" --type INGEST --subject "test" --model "m" \
--context "[[raw/x]]" --output "[[sources/x]]" --reasoning "r"
[ "$status" -eq 0 ]
[[ "$output" == *"already present"* ]]
count="$(grep -cF "run_id: \`${stable_id}\`" wiki/log.md || true)"
[ "$count" -eq 1 ]
}

30
tests/slug.bats Normal file
View file

@ -0,0 +1,30 @@
#!/usr/bin/env bats
setup() {
load 'helpers'
SLUG="${SKILL_SCRIPTS}/slug.sh"
}
@test "slug --raw: flat file remains unchanged" {
run bash "$SLUG" --raw "raw/articles/il-pane.md"
[ "$status" -eq 0 ]
[ "$output" = "il-pane" ]
}
@test "slug --raw: nested file gets folder prefix" {
run bash "$SLUG" --raw "raw/articles/cibo/il-pane.md"
[ "$status" -eq 0 ]
[ "$output" = "cibo-il-pane" ]
}
@test "slug --raw: distinct subdirs avoid collision" {
s1="$(bash "$SLUG" --raw "raw/articles/cibo/pane.md")"
s2="$(bash "$SLUG" --raw "raw/articles/storia/pane.md")"
[ "$s1" != "$s2" ]
}
@test "slug --raw: Bash and Python-calling-bash agree (single implementation)" {
b="$(bash "$SLUG" --raw "raw/articles/cibo/il-pane.md")"
p="$(python3 -c "import subprocess;print(subprocess.check_output(['bash','$SLUG','--raw','raw/articles/cibo/il-pane.md'],text=True).strip())")"
[ "$b" = "$p" ]
}