Compare commits
No commits in common. "main" and "release/1.0.0" have entirely different histories.
main
...
release/1.
71 changed files with 288 additions and 6376 deletions
|
|
@ -1,29 +0,0 @@
|
|||
root = true
|
||||
|
||||
# Whitespace / EOL / indent per TUTTI i tipi — cross-editor, zero dipendenze.
|
||||
# Non tocca mai il CONTENUTO (quindi i placeholder {{...}} sono al sicuro qui).
|
||||
|
||||
[*]
|
||||
charset = utf-8
|
||||
end_of_line = lf
|
||||
insert_final_newline = true
|
||||
trim_trailing_whitespace = true
|
||||
|
||||
# Markdown: preserva i "due spazi" di fine riga (hard break) → non trimmare.
|
||||
[*.md]
|
||||
trim_trailing_whitespace = false
|
||||
|
||||
[*.{sh,bash}]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
|
||||
[*.{py,pyi}]
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
|
||||
[*.{yml,yaml,json}]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
|
||||
[Makefile]
|
||||
indent_style = tab
|
||||
12
.gitignore
vendored
12
.gitignore
vendored
|
|
@ -1,12 +0,0 @@
|
|||
# VS Code — only shared workspace settings
|
||||
.vscode/*
|
||||
!.vscode/
|
||||
!.vscode/settings.json
|
||||
!.vscode/extensions.json
|
||||
|
||||
# framework
|
||||
/master-knowledge-genome/
|
||||
/keys/
|
||||
*.key
|
||||
__pycache__/
|
||||
*.pyc
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
# Template engine — contengono i placeholder {{...}}: NON formattare mai.
|
||||
templates/
|
||||
|
||||
# Contenuto di proprietà dell'agente / generato (di norma in repo separati,
|
||||
# elencato qui per sicurezza se apri un genoma nello stesso workspace).
|
||||
wiki/
|
||||
genomes/
|
||||
raw/
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
{
|
||||
"printWidth": 100,
|
||||
"tabWidth": 2,
|
||||
"proseWrap": "preserve"
|
||||
}
|
||||
8
.vscode/extensions.json
vendored
8
.vscode/extensions.json
vendored
|
|
@ -1,8 +0,0 @@
|
|||
{
|
||||
"recommendations": [
|
||||
"esbenp.prettier-vscode",
|
||||
"editorconfig.editorconfig",
|
||||
"timonwong.shellcheck"
|
||||
],
|
||||
"unwantedRecommendations": ["dbaeumer.vscode-eslint", "ms-vscode.vscode-typescript-next"]
|
||||
}
|
||||
18
.vscode/settings.json
vendored
18
.vscode/settings.json
vendored
|
|
@ -1,18 +0,0 @@
|
|||
{
|
||||
"editor.formatOnSave": true,
|
||||
"prettier.requireConfig": true,
|
||||
|
||||
"files.associations": {
|
||||
"templates/**/*.md": "plaintext"
|
||||
},
|
||||
|
||||
"[markdown]": {
|
||||
"editor.defaultFormatter": "esbenp.prettier-vscode"
|
||||
},
|
||||
"[json]": {
|
||||
"editor.defaultFormatter": "esbenp.prettier-vscode"
|
||||
},
|
||||
"[jsonc]": {
|
||||
"editor.defaultFormatter": "esbenp.prettier-vscode"
|
||||
}
|
||||
}
|
||||
53
Makefile
53
Makefile
|
|
@ -1,25 +1,22 @@
|
|||
# =============================================================================
|
||||
# Knowledge Genome - Makefile v. 1.13.0
|
||||
# Knowledge Genome - Makefile v. 1.0.0
|
||||
# Orchestrates the setup and management of the knowledge base.
|
||||
# =============================================================================
|
||||
|
||||
include globals.env
|
||||
export $(shell grep -v '^[#[:space:]]' globals.env | sed 's/=.*//')
|
||||
|
||||
.PHONY: setup add-genome status lint lock doctor sync test verify-structure sync-structure help
|
||||
.PHONY: setup add-genome status lint lock doctor sync help
|
||||
|
||||
help:
|
||||
@echo "Available commands:"
|
||||
@echo " make setup - Full system initialization"
|
||||
@echo " make add-genome - Register and scaffold a new genome [LINKED=owner/repo] [CROSS=yes|no]"
|
||||
@echo " make status - Check submodule and encryption status"
|
||||
@echo " make lint - Verify schema, privacy flags, and metadata"
|
||||
@echo " make verify-structure - Report directory drift across all genomes"
|
||||
@echo " make sync-structure - Create any missing canonical dirs (safe)"
|
||||
@echo " make test - Run the bats test suite (no LLM/GPU needed)"
|
||||
@echo " make lock - Lock all encrypted files across all genomes"
|
||||
@echo " make doctor - Verify all required tools are installed"
|
||||
@echo " make sync - Sync submodules and report unpushed commits"
|
||||
@echo " make setup - Full system initialization"
|
||||
@echo " make add-genome - Register and scaffold a new genome"
|
||||
@echo " make status - Check submodule and encryption status"
|
||||
@echo " make lint - Verify schema, privacy flags, and metadata"
|
||||
@echo " make lock - Lock all encrypted files across all genomes"
|
||||
@echo " make doctor - Verify all required tools are installed"
|
||||
@echo " make sync - Sync submodules and report unpushed commits"
|
||||
|
||||
lint:
|
||||
@bash scripts/lint-genomes.sh
|
||||
|
|
@ -30,27 +27,16 @@ setup:
|
|||
add-genome:
|
||||
@if [ -z "$(NAME)" ] || [ -z "$(DESC)" ]; then \
|
||||
echo "Error: NAME and DESC are required."; \
|
||||
echo "Usage: make add-genome NAME=my-genome DESC='My description' [LINKED=owner/project-repo] [CROSS=yes|no]"; \
|
||||
echo "Usage: make add-genome NAME=my-genome DESC='My description'"; \
|
||||
exit 1; \
|
||||
fi
|
||||
@bash scripts/add-genome.sh "$(NAME)" "$(DESC)" "$(LINKED)" "$(or $(CROSS),no)"
|
||||
@bash scripts/add-genome.sh "$(NAME)" "$(DESC)"
|
||||
|
||||
status:
|
||||
@[ -d "$(MASTER_REPO)" ] || { echo "Master non trovato. Esegui 'make setup'."; exit 1; }
|
||||
@echo "--- Master Status ---"
|
||||
@cd $(MASTER_REPO) && git submodule status
|
||||
@echo "--- Encryption Status (per genome) ---"
|
||||
@cd $(MASTER_REPO) && git submodule foreach 'git-crypt status 2>/dev/null | head -n 10 || true'
|
||||
|
||||
verify-structure:
|
||||
@bash scripts/verify-genomes.sh
|
||||
|
||||
sync-structure:
|
||||
@bash scripts/verify-genomes.sh --sync
|
||||
|
||||
test:
|
||||
@command -v bats >/dev/null 2>&1 || { echo " MISSING: bats (sudo apt install bats)"; exit 1; }
|
||||
@bats tests/
|
||||
@git submodule status
|
||||
@echo "--- Encryption Status (First 10 files) ---"
|
||||
@git-crypt status | head -n 10
|
||||
|
||||
doctor:
|
||||
@echo "Checking required tools..."
|
||||
|
|
@ -59,20 +45,17 @@ doctor:
|
|||
@command -v curl >/dev/null 2>&1 || { echo " MISSING: curl"; exit 1; }
|
||||
@command -v jq >/dev/null 2>&1 || { echo " MISSING: jq"; exit 1; }
|
||||
@command -v bw >/dev/null 2>&1 || echo " OPTIONAL: bw (Bitwarden CLI) not found — key injection will be manual."
|
||||
@command -v python3 >/dev/null 2>&1 || echo " OPTIONAL: python3 not found — needed for 'make test' and the ingest skill (index-append.py), not for setup."
|
||||
@echo "System ready."
|
||||
|
||||
sync:
|
||||
@[ -d "$(MASTER_REPO)" ] || { echo "Master non trovato. Esegui 'make setup'."; exit 1; }
|
||||
@echo "Syncing submodules..."
|
||||
@cd $(MASTER_REPO) && git submodule update --init --recursive
|
||||
@git submodule update --init --recursive
|
||||
@echo "--- Unpushed commits per genome ---"
|
||||
@cd $(MASTER_REPO) && git submodule foreach 'git log --oneline @{u}.. 2>/dev/null | head -5 || true'
|
||||
@git submodule foreach 'git log --oneline @{u}.. 2>/dev/null | head -5 || true'
|
||||
|
||||
lock:
|
||||
@[ -d "$(MASTER_REPO)" ] || { echo "Master non trovato. Esegui 'make setup'."; exit 1; }
|
||||
@echo "Locking master repository..."
|
||||
@cd $(MASTER_REPO) && git-crypt lock 2>/dev/null || true
|
||||
@git-crypt lock 2>/dev/null || true
|
||||
@echo "Locking all submodules..."
|
||||
@cd $(MASTER_REPO) && git submodule foreach 'git-crypt lock 2>/dev/null || true'
|
||||
@git submodule foreach 'git-crypt lock 2>/dev/null || true'
|
||||
@echo "All genomes securely locked."
|
||||
|
|
|
|||
507
README.md
507
README.md
|
|
@ -19,17 +19,16 @@ and a human-in-the-loop Git Flow for quality control.
|
|||
5. [Configuration](#configuration)
|
||||
6. [Quick Start](#quick-start)
|
||||
7. [Makefile Reference](#makefile-reference)
|
||||
8. [Testing](#testing)
|
||||
9. [Genome Lifecycle](#genome-lifecycle)
|
||||
10. [Security Model](#security-model)
|
||||
11. [Key Management](#key-management)
|
||||
12. [Agent Sessions](#agent-sessions)
|
||||
13. [Workflows](#workflows)
|
||||
14. [Knowledge Quality](#knowledge-quality)
|
||||
15. [Knowledge Schema](#knowledge-schema)
|
||||
16. [Collaboration Model](#collaboration-model)
|
||||
17. [Optional Extensions](#optional-extensions)
|
||||
18. [Troubleshooting](#troubleshooting)
|
||||
8. [Genome Lifecycle](#genome-lifecycle)
|
||||
9. [Security Model](#security-model)
|
||||
10. [Key Management](#key-management)
|
||||
11. [Agent Sessions](#agent-sessions)
|
||||
12. [Workflows](#workflows)
|
||||
13. [Knowledge Quality](#knowledge-quality)
|
||||
14. [Knowledge Schema](#knowledge-schema)
|
||||
15. [Collaboration Model](#collaboration-model)
|
||||
16. [Optional Extensions](#optional-extensions)
|
||||
17. [Troubleshooting](#troubleshooting)
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -50,7 +49,6 @@ evolving synthesis. Knowledge is compiled once and kept current.
|
|||
Contradictions have been flagged. The synthesis already reflects everything ingested.
|
||||
|
||||
This means:
|
||||
|
||||
- No vector database.
|
||||
- No embedding pipeline.
|
||||
- No external retrieval infrastructure.
|
||||
|
|
@ -77,11 +75,6 @@ master-knowledge-genome/ ← Root orchestrator (submodule registry)
|
|||
└── AGENTS.md ← Global coordination schema (cross-genome rules)
|
||||
```
|
||||
|
||||
> The genome names above (`genome-dev`, `genome-finance`, `genome-homelab`) are
|
||||
> **illustrative** — they show the kind of multi-domain layout this orchestrator targets.
|
||||
> The shipped `registry.sh` defines a single disposable sandbox, **`genome-test`**; you
|
||||
> create real genomes yourself with `make add-genome` (see the registry examples below).
|
||||
|
||||
Each genome is an independent git repository:
|
||||
|
||||
```text
|
||||
|
|
@ -110,24 +103,16 @@ genome-{name}/
|
|||
|
||||
### Three layers
|
||||
|
||||
| Layer | Path | Owner | Rule |
|
||||
| ----------- | ----------- | ----------- | ----------------------------------------------------- |
|
||||
| Raw sources | `raw/` | Human | Immutable. LLM reads only. Never modified. |
|
||||
| Wiki | `wiki/` | LLM | Agent creates, updates, cross-links, maintains. |
|
||||
| Schema | `AGENTS.md` | Human + LLM | Co-evolved contract defining structure and workflows. |
|
||||
|
||||
### Linked projects (optional)
|
||||
|
||||
A genome can optionally declare a **linked project repository** — a separate repo where
|
||||
the knowledge in that genome is meant to be applied (e.g. `genome-dev` linked to an app
|
||||
repo). The link is recorded as a third field in the registry and rendered into the
|
||||
genome's `AGENTS.md` (`## Linked Project`). A genome with no link is _knowledge-only_ and
|
||||
behaves exactly as before. See [Configuration](#configuration).
|
||||
| Layer | Path | Owner | Rule |
|
||||
|-------|------|-------|------|
|
||||
| Raw sources | `raw/` | Human | Immutable. LLM reads only. Never modified. |
|
||||
| Wiki | `wiki/` | LLM | Agent creates, updates, cross-links, maintains. |
|
||||
| Schema | `AGENTS.md` | Human + LLM | Co-evolved contract defining structure and workflows. |
|
||||
|
||||
### Framework structure
|
||||
|
||||
```text
|
||||
knowledge-genome-orchestrator/ ← This repository (setup tooling)
|
||||
knowledge-genome-setup/ ← This repository (setup tooling)
|
||||
├── globals.env ← Static KEY=VALUE config (Make-includable)
|
||||
├── registry.sh ← Bash-only: GENOMES array + dynamic paths
|
||||
├── Makefile ← Entry point for all operations
|
||||
|
|
@ -135,7 +120,6 @@ knowledge-genome-orchestrator/ ← This repository (setup tooling)
|
|||
│ ├── output.sh ← Terminal helpers (colors, log levels)
|
||||
│ ├── deps.sh ← Dependency validation
|
||||
│ ├── scaffold.sh ← Template rendering engine
|
||||
│ ├── structure.sh ← Canonical genome layout (single source of truth)
|
||||
│ ├── lint.sh ← Per-file validation functions
|
||||
│ └── git-crypt.sh ← git-crypt lifecycle (init, export, verify, rotate)
|
||||
├── providers/
|
||||
|
|
@ -146,46 +130,18 @@ knowledge-genome-orchestrator/ ← This repository (setup tooling)
|
|||
│ ├── setup-master.sh ← Master repo initialisation
|
||||
│ ├── setup-genomes.sh ← Genome provisioning loop
|
||||
│ ├── add-genome.sh ← Add a single new genome
|
||||
│ ├── lint-genomes.sh ← Quality control across all genomes
|
||||
│ └── verify-genomes.sh ← Structure verify / --sync across all genomes
|
||||
├── templates/
|
||||
│ ├── agents-genome.md ← Per-genome agent contract template
|
||||
│ ├── agents-master.md ← Master coordination schema template
|
||||
│ ├── readme-master.md ← Master repo README template
|
||||
│ ├── wiki-index.md ← Index template (rendered per genome)
|
||||
│ ├── wiki-log.md ← Log template (rendered per genome)
|
||||
│ ├── pr-description.md ← PR review checklist template
|
||||
│ ├── pre-commit.sh ← Security hook template
|
||||
│ ├── gitattributes ← Git encryption rules template
|
||||
│ └── gitignore ← Git ignore template
|
||||
├── skills/
|
||||
│ └── ingest/ ← pi skill: deployed to the AI node (vm101)
|
||||
│ ├── SKILL.md ← Semantic-only contract (read/edit, emits manifest)
|
||||
│ ├── references/ ← On-demand reference docs for the agent
|
||||
│ └── scripts/ ← Deterministic post-processor (runs outside the agent)
|
||||
│ ├── run-ingest.sh ← Orchestrator: consumes the manifest, emits one JSON line
|
||||
│ ├── slug.sh ← Slug normalisation
|
||||
│ ├── index-append.py ← Sorted insert into wiki/index.md + last_updated bump
|
||||
│ ├── log-append.sh ← Append a wiki/log.md entry
|
||||
│ ├── scoped-lint.sh ← Lint only the pages touched this run (reuses lib/lint.sh)
|
||||
│ └── open-pr.sh ← Branch / commit / push / open PR (DRY_RUN seam for tests)
|
||||
└── tests/ ← bats suite — deterministic, no LLM/GPU (see Testing)
|
||||
├── helpers.bash
|
||||
├── scripts.bats
|
||||
├── lint.bats
|
||||
├── structure.bats
|
||||
└── run-ingest.bats
|
||||
│ └── lint-genomes.sh ← Quality control across all genomes
|
||||
└── templates/
|
||||
├── agents-genome.md ← Per-genome agent contract template
|
||||
├── agents-master.md ← Master coordination schema template
|
||||
├── wiki-index.md ← Index template (rendered per genome)
|
||||
├── wiki-log.md ← Log template (rendered per genome)
|
||||
├── pr-description.md ← PR review checklist template
|
||||
├── pre-commit.sh ← Security hook template
|
||||
├── gitattributes ← Git encryption rules template
|
||||
└── gitignore ← Git ignore template
|
||||
```
|
||||
|
||||
> The `skills/ingest/` directory is version-controlled here but **deployed** to the AI
|
||||
> node (vm101) under `~/.pi/agent/skills/ingest`. The agent (`pi`) does only semantic work
|
||||
> and writes a manifest; `run-ingest.sh` does the mechanical steps. See [Workflows → Ingest](#ingest).
|
||||
>
|
||||
> ingest-semantic.py: one schema-constrained call to local model, returns JSON. run-ingest.sh: index/log/lint/PR.
|
||||
> Semantic JSON extraction → deterministic wiki conform + manifest.
|
||||
>
|
||||
> cp skills/ingest/\* ~/.pi/agent/skills/ingest/ after make setup. Updated via git pull on laptop, pushed to vm101 via SSH in n8n flow.
|
||||
|
||||
---
|
||||
|
||||
## System Requirements
|
||||
|
|
@ -198,11 +154,7 @@ All tools (git-crypt, bw, qmd) have native Linux binaries.
|
|||
### macOS — full support
|
||||
|
||||
All scripts are compatible with macOS. Requirements:
|
||||
|
||||
- bash 3.2+ (macOS default) — supported for the **setup scripts** (`make` targets, scaffolding).
|
||||
Two things need bash 4+: the `ingest` skill (`mapfile`), which runs on the Linux AI node (not a
|
||||
constraint on the macOS setup machine); and `gcrypt_rotate_key` (`compgen -G`), which **does**
|
||||
run on the laptop. For key rotation on macOS, use Homebrew bash (`brew install bash`).
|
||||
- bash 3.2+ (macOS default) — fully supported. All `bash 4+` constructs removed.
|
||||
- GNU coreutils not required — BSD variants of `date`, `grep`, `sed` all handled.
|
||||
- `git-crypt`: install via Homebrew — `brew install git-crypt`
|
||||
- `jq`, `curl`: pre-installed or via Homebrew
|
||||
|
|
@ -214,7 +166,6 @@ If you use Homebrew bash (`brew install bash`), the scripts work identically to
|
|||
**Git Bash and native Windows are not supported.**
|
||||
|
||||
Reasons:
|
||||
|
||||
- `git-crypt` has no native Windows binary.
|
||||
- Process substitution `<(...)` used for runtime key injection is not available
|
||||
in Git Bash or PowerShell.
|
||||
|
|
@ -228,42 +179,37 @@ All setup and runtime operations work identically to native Linux inside WSL2.
|
|||
|
||||
The system is designed for a homelab architecture:
|
||||
|
||||
| Component | Recommended | Role |
|
||||
| --------------- | ------------------------- | --------------------------------------------------------------- |
|
||||
| Storage node | Any Linux server with NFS | Hosts Forgejo, stores genome repos |
|
||||
| AI compute node | GPU server (16GB+ VRAM) | Runs local LLM agent sessions |
|
||||
| VRAM | 16GB minimum | 14B model at Q5_K_M ≈ 10GB weights; ~6GB for KV cache |
|
||||
| Local LLM | 14B–32B quantised | Active wiki maintenance sessions |
|
||||
| Large LLM | 70B (async) | Deep reflection, complex synthesis (scheduled, not interactive) |
|
||||
| Component | Recommended | Role |
|
||||
|-----------|-------------|------|
|
||||
| Storage node | Any Linux server with NFS | Hosts Forgejo, stores genome repos |
|
||||
| AI compute node | GPU server (16GB+ VRAM) | Runs local LLM agent sessions |
|
||||
| VRAM | 16GB minimum | 14B model at Q5_K_M ≈ 10GB weights; ~6GB for KV cache |
|
||||
| Local LLM | 14B–32B quantised | Active wiki maintenance sessions |
|
||||
| Large LLM | 70B (async) | Deep reflection, complex synthesis (scheduled, not interactive) |
|
||||
|
||||
> **On VRAM constraints:** with a 16GB card and a 14B model, the KV cache budget
|
||||
> is ~6GB — approximately 32k tokens of effective context. Every token in `AGENTS.md`,
|
||||
> the index, and the log tail is a cost. This is why all agent files are token-optimised
|
||||
> and sessions are kept to one source at a time.
|
||||
|
||||
> **Reference deployment:** the table above is a target profile, not a hard requirement.
|
||||
> The current setup runs a single 16GB GPU (RTX 5060 Ti) with a ~9B model for interactive
|
||||
> ingest, and offloads heavy/async synthesis to a cloud model. Smaller models work — they
|
||||
> just make the "one source per session" discipline and the token budget matter more.
|
||||
|
||||
---
|
||||
|
||||
## Prerequisites
|
||||
|
||||
### Required
|
||||
|
||||
| Tool | Purpose |
|
||||
| ----------- | -------------------------------- |
|
||||
| `git` | Version control |
|
||||
| `git-crypt` | Transparent file encryption |
|
||||
| `curl` | REST API calls to Forgejo/GitHub |
|
||||
| `jq` | JSON parsing |
|
||||
| Tool | Purpose |
|
||||
|------|---------|
|
||||
| `git` | Version control |
|
||||
| `git-crypt` | Transparent file encryption |
|
||||
| `curl` | REST API calls to Forgejo/GitHub |
|
||||
| `jq` | JSON parsing |
|
||||
|
||||
### Optional
|
||||
|
||||
| Tool | Purpose |
|
||||
| ----- | ----------------------------------------------------------------------- |
|
||||
| `bw` | Bitwarden CLI — runtime key injection from Vaultwarden (no key on disk) |
|
||||
| Tool | Purpose |
|
||||
|------|---------|
|
||||
| `bw` | Bitwarden CLI — runtime key injection from Vaultwarden (no key on disk) |
|
||||
| `qmd` | Local BM25 + vector search for Markdown files with MCP server interface |
|
||||
|
||||
> **`bw` vs `bws`:** Use `bw` (standard Bitwarden CLI). `bws` is the Bitwarden
|
||||
|
|
@ -336,18 +282,14 @@ resolution. Never included by Make.
|
|||
|
||||
```bash
|
||||
# Dynamic paths (resolved at source time)
|
||||
WORK_DIR="${HOME}/knowledge-genome-orchestrator"
|
||||
WORK_DIR="${HOME}/knowledge-genome-setup"
|
||||
KEYS_DIR="${WORK_DIR}/keys"
|
||||
|
||||
# Genome registry — format: "name|description|linked_repo"
|
||||
# The third and fourth fields are OPTIONAL:
|
||||
# - leave it empty → knowledge-only genome (no linked project)
|
||||
# - owner/repo → genome is linked to that project repository (rendered into AGENTS.md)
|
||||
# - cross_source → yes|no (default no): whether the cross-genome collector may read this genome as a source
|
||||
# Genome registry — format: "name|description"
|
||||
GENOMES=(
|
||||
"genome-dev|Web development, TUI, Angular, software architecture|myorg/my-app|no"
|
||||
"genome-finance|Personal finance, investments, market analysis||no"
|
||||
"genome-homelab|Infrastructure, network configs, architecture logs||no"
|
||||
"genome-dev|Web development, TUI, Angular, software architecture"
|
||||
"genome-finance|Personal finance, investments, market analysis"
|
||||
"genome-homelab|Infrastructure, network configs, architecture logs"
|
||||
)
|
||||
```
|
||||
|
||||
|
|
@ -370,8 +312,8 @@ export GITHUB_TOKEN="your_github_token"
|
|||
|
||||
```bash
|
||||
# 1. Clone the setup framework
|
||||
git clone <setup-repo-url> knowledge-genome-orchestrator
|
||||
cd knowledge-genome-orchestrator
|
||||
git clone <setup-repo-url> knowledge-genome-setup
|
||||
cd knowledge-genome-setup
|
||||
|
||||
# 2. Configure your environment
|
||||
cp globals.env.example globals.env # edit with your values
|
||||
|
|
@ -405,7 +347,6 @@ make setup
|
|||
- Commits submodule pointer in master repo
|
||||
|
||||
After setup completes:
|
||||
|
||||
- Upload all files in `keys/` to Vaultwarden (see Key Management)
|
||||
- Delete key files from disk: `rm keys/*.key`
|
||||
|
||||
|
|
@ -413,19 +354,16 @@ After setup completes:
|
|||
|
||||
## Makefile Reference
|
||||
|
||||
| Target | Description |
|
||||
| ----------------------------------------------------- | ------------------------------------------------------------------------------------- |
|
||||
| `make setup` | Full system initialisation — master repo + all genomes in `registry.sh` |
|
||||
| `make add-genome NAME=x DESC="y" [LINKED=owner/repo]` | Scaffold and register a single new genome (optional linked project) |
|
||||
| `make lint` | Run quality checks across all genomes (schema, privacy, decay, page size) |
|
||||
| `make verify-structure` | Report directory drift of each genome vs the canonical layout (`lib/structure.sh`) |
|
||||
| `make sync-structure` | Create any missing canonical directories across all genomes (safe, idempotent) |
|
||||
| `make test` | Run the bats test suite (deterministic; no LLM/GPU/network) — see [Testing](#testing) |
|
||||
| `make status` | Show submodule status and per-genome git-crypt encryption state |
|
||||
| `make lock` | Lock all encrypted repos (master + all genome submodules) |
|
||||
| `make doctor` | Verify required tools: git, git-crypt, curl, jq; warn if bw missing |
|
||||
| `make sync` | `git submodule update --init --recursive` + report unpushed commits per genome |
|
||||
| `make help` | Print all available targets |
|
||||
| Target | Description |
|
||||
|--------|-------------|
|
||||
| `make setup` | Full system initialisation — master repo + all genomes in `registry.sh` |
|
||||
| `make add-genome NAME=x DESC="y"` | Scaffold and register a single new genome |
|
||||
| `make lint` | Run quality checks across all genomes (schema, privacy, decay, page size) |
|
||||
| `make status` | Show submodule status and first 10 git-crypt encryption states |
|
||||
| `make lock` | Lock all encrypted repos (master + all genome submodules) |
|
||||
| `make doctor` | Verify required tools: git, git-crypt, curl, jq; warn if bw missing |
|
||||
| `make sync` | `git submodule update --init --recursive` + report unpushed commits per genome |
|
||||
| `make help` | Print all available targets |
|
||||
|
||||
### Examples
|
||||
|
||||
|
|
@ -436,12 +374,6 @@ make doctor
|
|||
# Add a new genome after initial setup
|
||||
make add-genome NAME=genome-research DESC="Academic papers and deep research"
|
||||
|
||||
# Add a genome linked to a project repository
|
||||
make add-genome NAME=genome-dev DESC="Web development" LINKED=myorg/my-app
|
||||
|
||||
# Check every genome against the canonical directory layout
|
||||
make verify-structure
|
||||
|
||||
# Run full lint pass (bash deterministic checks)
|
||||
make lint
|
||||
|
||||
|
|
@ -454,38 +386,6 @@ make lock
|
|||
|
||||
---
|
||||
|
||||
## Testing
|
||||
|
||||
The mechanical layer (slug, index, log, lint, structure, the ingest orchestrator) is
|
||||
covered by a [bats](https://github.com/bats-core/bats-core) suite. The tests are
|
||||
**deterministic and have zero dependency on the LLM, the GPU, or the network** — they
|
||||
simulate the agent's output with fixtures and exercise the scripts directly, so they run
|
||||
anywhere git + bash live (laptop, CI, a git hook). They are **not** meant to run on the AI
|
||||
node or via n8n.
|
||||
|
||||
```bash
|
||||
sudo apt install bats # once
|
||||
make test # or: bats tests/
|
||||
```
|
||||
|
||||
| File | Covers |
|
||||
| ----------------- | ------------------------------------------------------------------------------ |
|
||||
| `scripts.bats` | `slug.sh`, `log-append.sh`, `index-append.py` (insert, sort, bump, idempotent) |
|
||||
| `lint.bats` | `lib/lint.sh` validators + `scoped-lint.sh` |
|
||||
| `structure.bats` | `lib/structure.sh` report / sync |
|
||||
| `run-ingest.bats` | `run-ingest.sh` end-to-end (DRY_RUN, local bare remote) — needs `jq` |
|
||||
|
||||
Each test builds its own throwaway genome with a local bare remote, configured to ignore
|
||||
the operator's global git settings (signing, global hooks) so the suite is hermetic. The
|
||||
`run-ingest` tests auto-`skip` if `jq` is absent. If you change the canonical layout in
|
||||
`lib/structure.sh`, update `FIXTURE_DIRS` in `tests/helpers.bash` to match.
|
||||
|
||||
> Why this matters: the only non-deterministic part of the system is the model. Pinning
|
||||
> the mechanical layer with tests means that when an ingest misbehaves, you know it's the
|
||||
> model or the prompt — not the plumbing.
|
||||
|
||||
---
|
||||
|
||||
## Genome Lifecycle
|
||||
|
||||
### Initial setup
|
||||
|
|
@ -507,7 +407,6 @@ After adding: upload the new key to Vaultwarden and delete the key file.
|
|||
### Removing a genome
|
||||
|
||||
Manual process:
|
||||
|
||||
```bash
|
||||
# In master repo
|
||||
git submodule deinit genome-name
|
||||
|
|
@ -522,17 +421,16 @@ git push
|
|||
When a genome is scaffolded, `render_template` replaces these placeholders in all
|
||||
template files:
|
||||
|
||||
| Placeholder | Source | Example |
|
||||
| ----------------------- | ----------- | ------------------------------ |
|
||||
| `{{GENOME_NAME}}` | registry.sh | `genome-dev` |
|
||||
| `{{GENOME_NAME_UPPER}}` | derived | `GENOME-DEV` |
|
||||
| `{{GENOME_DESC}}` | registry.sh | `Web development...` |
|
||||
| `{{LINKED_PROJECT}}` | registry.sh | `myorg/my-app` (or `none`) |
|
||||
| `{{FORGEJO_URL}}` | globals.env | `https://git.yourserver.com` |
|
||||
| `{{FORGEJO_USER}}` | globals.env | `yourusername` |
|
||||
| `{{VAULTWARDEN_URL}}` | globals.env | `https://vault.yourserver.com` |
|
||||
| `{{MASTER_REPO}}` | globals.env | `master-knowledge-genome` |
|
||||
| `{{DATE}}` | runtime | `2026-05-11` |
|
||||
| Placeholder | Source | Example |
|
||||
|-------------|--------|---------|
|
||||
| `{{GENOME_NAME}}` | registry.sh | `genome-dev` |
|
||||
| `{{GENOME_NAME_UPPER}}` | derived | `GENOME-DEV` |
|
||||
| `{{GENOME_DESC}}` | registry.sh | `Web development...` |
|
||||
| `{{FORGEJO_URL}}` | globals.env | `https://git.yourserver.com` |
|
||||
| `{{FORGEJO_USER}}` | globals.env | `yourusername` |
|
||||
| `{{VAULTWARDEN_URL}}` | globals.env | `https://vault.yourserver.com` |
|
||||
| `{{MASTER_REPO}}` | globals.env | `master-knowledge-genome` |
|
||||
| `{{DATE}}` | runtime | `2026-05-11` |
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -543,9 +441,9 @@ template files:
|
|||
Each genome uses a unique symmetric AES-256-CTR key managed by git-crypt.
|
||||
Two directories in every genome are always encrypted:
|
||||
|
||||
| Directory | Contents | On remote |
|
||||
| --------------- | --------------------------- | ------------------ |
|
||||
| `raw/private/` | Sensitive source material | Opaque binary blob |
|
||||
| Directory | Contents | On remote |
|
||||
|-----------|----------|-----------|
|
||||
| `raw/private/` | Sensitive source material | Opaque binary blob |
|
||||
| `wiki/private/` | Private synthesis and notes | Opaque binary blob |
|
||||
|
||||
All other directories (`raw/articles/`, `wiki/sources/`, etc.) are plaintext.
|
||||
|
|
@ -592,17 +490,6 @@ This means: any file matching `**/private/**` in `.gitattributes` is protected,
|
|||
including future `private/` directories created anywhere in the repo.
|
||||
The hook never needs updating when the encryption rules change.
|
||||
|
||||
### Untrusted agent output — manifest validation
|
||||
|
||||
The ingest agent's output is stochastic: a hallucinated manifest could carry a missing field,
|
||||
a wrong type, or a malicious path such as `wiki/../../etc/passwd`. `run-ingest.sh` therefore
|
||||
**validates the manifest before trusting any field** — it must be well-formed JSON with a
|
||||
string `raw_source` and an array `pages`, and **every `path` must be a string under `wiki/`
|
||||
with no `..`**. Anything else fails fast with a structured `{"status":"error"}` and no
|
||||
filesystem access outside the wiki, so a bad path can't drive a read or a lint outside the
|
||||
knowledge tree. This is the trust boundary between the (stochastic) model and the
|
||||
(deterministic, tested) post-processor.
|
||||
|
||||
### PRIVATE_CONTEXT toggle
|
||||
|
||||
The `PRIVATE_CONTEXT` toggle in `AGENTS.md` controls whether the LLM agent
|
||||
|
|
@ -615,7 +502,6 @@ PRIVATE_CONTEXT: enabled ← Agent may read/write private/. Requires git-cryp
|
|||
```
|
||||
|
||||
Rules:
|
||||
|
||||
- Never inferred. Never carried over from a previous session.
|
||||
- `enabled` requires the operator to confirm that `git-crypt unlock` has run on the host.
|
||||
- Per-genome, per-session: enabling for `genome-finance` does NOT enable for `genome-dev`.
|
||||
|
|
@ -644,7 +530,6 @@ The key flows: Vaultwarden → `bw get notes` → `base64 -d` → kernel pipe
|
|||
At no point is the key written to any file on disk.
|
||||
|
||||
Lock a genome when the session ends:
|
||||
|
||||
```bash
|
||||
git-crypt lock
|
||||
```
|
||||
|
|
@ -659,11 +544,11 @@ git-crypt lock
|
|||
|
||||
Each genome key is stored as a base64-encoded Secure Note in Vaultwarden:
|
||||
|
||||
| Genome | Vaultwarden Note Name |
|
||||
| ---------------- | --------------------- |
|
||||
| `genome-dev` | `genome-dev key` |
|
||||
| `genome-finance` | `genome-finance key` |
|
||||
| `genome-homelab` | `genome-homelab key` |
|
||||
| Genome | Vaultwarden Note Name |
|
||||
|--------|----------------------|
|
||||
| `genome-dev` | `genome-dev key` |
|
||||
| `genome-finance` | `genome-finance key` |
|
||||
| `genome-homelab` | `genome-homelab key` |
|
||||
|
||||
After `make setup` or `make add-genome`, key files are exported to `keys/`.
|
||||
Upload procedure:
|
||||
|
|
@ -701,19 +586,13 @@ git clone https://git.yourserver.com/yourusername/genome-dev.git
|
|||
If a key is lost or compromised:
|
||||
|
||||
```bash
|
||||
# From the knowledge-genome-orchestrator/ directory
|
||||
# From the knowledge-genome-setup/ directory
|
||||
source lib/git-crypt.sh
|
||||
# If gcrypt_rotate_key operates on the CWD: cd into .../master-knowledge-genome/genome-dev
|
||||
# If it navigates by name instead: cd into .../master-knowledge-genome
|
||||
cd ~/knowledge-genome-orchestrator/master-knowledge-genome
|
||||
cd ~/knowledge-genome-setup/genome-dev
|
||||
gcrypt_rotate_key "genome-dev"
|
||||
```
|
||||
|
||||
> **macOS:** `gcrypt_rotate_key` uses `compgen -G` (bash 4+). The stock macOS bash 3.2 is not
|
||||
> enough — run rotation under Homebrew bash (`brew install bash`).
|
||||
|
||||
`gcrypt_rotate_key` performs:
|
||||
|
||||
1. Unlocks repo with existing key
|
||||
2. Removes old key material
|
||||
3. Generates new symmetric key via `git-crypt init`
|
||||
|
|
@ -724,16 +603,13 @@ gcrypt_rotate_key "genome-dev"
|
|||
> **Limitation:** git history still contains blobs encrypted with the old key.
|
||||
> Anyone with the old key and git history access can decrypt them. To purge old
|
||||
> encrypted blobs from history:
|
||||
>
|
||||
> ```bash
|
||||
> git filter-repo --invert-paths --path raw/private --path wiki/private
|
||||
> git push --force origin main
|
||||
> ```
|
||||
>
|
||||
> This rewrites all commit hashes — coordinate with any collaborators first.
|
||||
|
||||
After rotation:
|
||||
|
||||
- Upload new key to Vaultwarden (replace existing note)
|
||||
- Delete both `keys/genome-dev.key` and `keys/genome-dev-rotated-*.key` from disk
|
||||
- Revoke access from previous key holders
|
||||
|
|
@ -745,7 +621,6 @@ After rotation:
|
|||
### Prerequisites for every session
|
||||
|
||||
Before starting an LLM agent session on a genome:
|
||||
|
||||
1. The host (AI server) runs `git-crypt unlock` for the required genomes
|
||||
2. The orchestrator prepares context: `tail -n 20 wiki/log.md`
|
||||
3. Declare `PRIVATE_CONTEXT` state explicitly in the opening prompt
|
||||
|
|
@ -756,8 +631,7 @@ The agent executes in this order at the start of every session:
|
|||
|
||||
1. Read `wiki/index.md` — primary catalog of all pages and maturity
|
||||
2. Read last 20 log entries (injected by orchestrator — does NOT open `wiki/log.md` directly)
|
||||
3. For tasks involving related pages: if the optional `qmd` extension is installed,
|
||||
`qmd search "<query>"` before opening files; otherwise navigate from `wiki/index.md`
|
||||
3. For tasks involving related pages: `qmd search "<query>"` before opening any files
|
||||
4. Operate on individual files — never scan entire directories
|
||||
|
||||
### One source per session
|
||||
|
|
@ -777,13 +651,12 @@ sequentially — not one session with 5 files.
|
|||
### n8n automation
|
||||
|
||||
For Forgejo webhook → automated ingest:
|
||||
|
||||
1. Forgejo sends webhook on push to `raw/`
|
||||
2. n8n receives webhook, identifies new files
|
||||
3. n8n starts one agent session per new file (sequential, not parallel)
|
||||
4. Each session: realign the checkout to the base (`git switch <base> && git reset --hard origin/<base>`), then inject `tail -n 20 wiki/log.md` + `PRIVATE_CONTEXT` state + source path
|
||||
5. Phase 1 agent (`/skill:ingest`) writes the manifest; Phase 2 `run-ingest.sh` opens the PR, then **stops**
|
||||
6. Human reviews — **merge to accept**, or close the PR + delete the `feat` branch to reject
|
||||
4. Each session: inject `tail -n 20 wiki/log.md` + `PRIVATE_CONTEXT` state + source path
|
||||
5. Agent ingest workflow runs, opens PR
|
||||
6. Human reviews and merges PR
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -791,79 +664,27 @@ For Forgejo webhook → automated ingest:
|
|||
|
||||
### Ingest
|
||||
|
||||
Triggered by a new file in `raw/` (manual or via webhook). Ingest is split into two
|
||||
phases so that the small local model spends its limited context only on judgement, and
|
||||
all the deterministic bookkeeping happens outside the model's loop.
|
||||
Triggered by a new file in `raw/` (manual or via webhook).
|
||||
|
||||
**Phase 1 — agent (semantic only).** The `ingest` skill gives the agent read/edit tools
|
||||
only (no shell). It:
|
||||
|
||||
1. Reads the source once
|
||||
2. Creates `wiki/sources/<slug>.md` — summary and key points
|
||||
3. Per entity (person, tool, organisation): creates or updates `wiki/entities/<name>.md`
|
||||
4. Per concept (pattern, theory, decision): creates or updates `wiki/concepts/<name>.md`
|
||||
5. Checks each touched page for contradictions → applies Conflict Resolution if found
|
||||
6. Writes `.ingest-manifest.json` (the list of pages it created/modified, the model name,
|
||||
a one-line reasoning, the PR summary, and any contradictions) — then **stops**
|
||||
|
||||
**Phase 2 — `run-ingest.sh` (deterministic, outside the agent).** The post-processor first
|
||||
**validates the manifest** — well-formed JSON, expected shape, and every page path confined to
|
||||
`wiki/` with no `..` (see [Security Model](#security-model)) — then does the mechanical work the
|
||||
model must not waste context on:
|
||||
|
||||
7. Inserts each page into the correct `wiki/index.md` section **in alphabetical order**,
|
||||
deduplicated by wikilink (a re-ingest updates the entry, never duplicates it), and bumps the
|
||||
index `last_updated` (`index-append.py`)
|
||||
8. Appends the `INGEST | <slug>` entry to `wiki/log.md` (the model name comes from the
|
||||
orchestrator via `INGEST_MODEL` — the agent cannot reliably know its own tag)
|
||||
9. Runs scoped lint on exactly the pages touched this run (`scoped-lint.sh`, reusing
|
||||
`lib/lint.sh`), including a **duplicate-slug advisory**: a slug created this run that is
|
||||
highly similar to an entity/concept already in `wiki/index.md` is flagged in the PR so a
|
||||
human can merge them. It is advisory only — it never fails the lint or blocks the PR
|
||||
(threshold tunable via `KG_DUP_THRESHOLD`, default 70)
|
||||
10. Commits **only `wiki/`** on `feat/ai-ingest-<slug>` and opens a PR against the integration
|
||||
base (`INGEST_BASE`, default `main`); the body matches the `templates/pr-description.md`
|
||||
structure (Summary / Pages / Contradictions / Scoped Lint)
|
||||
11. Emits a single compact JSON line (status, slug, PR url, lint_clean, conflict) for n8n
|
||||
|
||||
The agent never runs git, never edits the index/log mechanically, and never lints — those
|
||||
are deterministic and tested (see [Testing](#testing)). Invocation on the AI node:
|
||||
|
||||
```bash
|
||||
pi --mode json -p "/skill:ingest raw/articles/<file>.md" # phase 1 → writes manifest
|
||||
run-ingest.sh <genome> # phase 2 → index/log/lint/PR
|
||||
```
|
||||
1. Read source once
|
||||
2. Create `wiki/sources/<slug>.md` — summary and key points
|
||||
3. Per entity (person, tool, organisation): create or update `wiki/entities/<name>.md`
|
||||
4. Per concept (pattern, theory, decision): create or update `wiki/concepts/<name>.md`
|
||||
5. Check each touched page for contradictions → apply Conflict Resolution if found
|
||||
6. Append entry to `wiki/index.md` (bottom of relevant section — do not reorder)
|
||||
7. Append log entry: `INGEST | <slug>`
|
||||
8. Run scoped lint on pages created or modified in this session; report in PR
|
||||
9. Commit on `feat/ai-ingest-<slug>`; open PR using `templates/pr-description.md`
|
||||
|
||||
For private sources (`PRIVATE_CONTEXT: enabled` required):
|
||||
|
||||
- All output goes to `wiki/private/<slug>.md` only
|
||||
- PR title: `[PRIVATE] ingest: <slug>`
|
||||
|
||||
**Branch lifecycle & the manual gate.** `run-ingest.sh` / `open-pr.sh` are deliberately
|
||||
"dumb": they create the `feat/ai-ingest-<slug>` branch, commit only `wiki/`, open the PR, and
|
||||
stop. They never reset, revert, or touch the integration branch — that lifecycle belongs to
|
||||
the orchestrator, around the human gate:
|
||||
|
||||
- **Before each session** the orchestrator realigns the checkout to the base
|
||||
(`git fetch && git switch <base> && git reset --hard origin/<base>`) — a reset of the _local_
|
||||
checkout to match the remote, never a force-push to the shared branch.
|
||||
- **After the PR opens, everything stops** until a human approves: one source per session,
|
||||
sequential, no new ingest until the pending PR is closed.
|
||||
- **Approve = merge. Reject = close the PR and delete the remote `feat` branch.** To undo an
|
||||
already-merged ingest, open a _revert PR_ against the base — never rewrite history on a
|
||||
shared branch.
|
||||
|
||||
The PR base is configurable via `INGEST_BASE` (default `main`). Per-page `maturity` already
|
||||
encodes stability and tags/releases mark versioned snapshots, so `main` is the integration
|
||||
branch today. If a linked project later _consumes_ a genome, set `INGEST_BASE=develop` to
|
||||
buffer ingests on `develop` and cut manual `develop → main` releases — no code change.
|
||||
|
||||
### Query
|
||||
|
||||
Triggered by an operator question.
|
||||
|
||||
1. `qmd search "<query>"` (if the optional qmd extension is installed) → identify
|
||||
candidate pages; otherwise start from `wiki/index.md`
|
||||
1. `qmd search "<query>"` → identify candidate pages
|
||||
2. Read candidate pages directly (qmd already returns file paths — no intermediate index lookup)
|
||||
3. Synthesise answer with `[[wikilink]]` citations
|
||||
4. If answer is non-trivial: save as `wiki/queries/<slug>.md` and append to index
|
||||
|
|
@ -876,13 +697,11 @@ For general orientation without a specific query: read `wiki/index.md` directly.
|
|||
The lint workflow is split between deterministic bash checks and semantic LLM judgment.
|
||||
|
||||
**Step 1 — operator runs bash linter:**
|
||||
|
||||
```bash
|
||||
make lint
|
||||
```
|
||||
|
||||
The bash linter checks automatically:
|
||||
|
||||
- YAML frontmatter validity (all mandatory fields present)
|
||||
- Domain consistency (domain field matches genome name)
|
||||
- Type validity (value from allowed list)
|
||||
|
|
@ -894,7 +713,6 @@ The bash linter checks automatically:
|
|||
**Step 2 — operator provides bash output to LLM agent:**
|
||||
|
||||
The agent applies semantic judgment to findings the bash linter cannot make:
|
||||
|
||||
- **Orphan pages** (from bash list): for each orphan, identify 1-3 existing pages
|
||||
that should link to it; propose specific additions
|
||||
- **Implicit concepts** (from bash term frequency list): determine if a candidate
|
||||
|
|
@ -917,28 +735,22 @@ The PR description uses `templates/pr-description.md`:
|
|||
|
||||
```markdown
|
||||
## Summary
|
||||
|
||||
One sentence: goal of this session and source processed.
|
||||
|
||||
## Pages Created
|
||||
|
||||
| Path | Type | Maturity |
|
||||
|
||||
## Pages Modified
|
||||
|
||||
| Path | Change |
|
||||
|
||||
## Contradictions Found
|
||||
|
||||
[ ] None / [ ] n conflict file(s) created
|
||||
[ ] None / [ ] n conflict file(s) created
|
||||
|
||||
## Private Data Accessed
|
||||
|
||||
[ ] No (PRIVATE_CONTEXT: disabled) / [ ] Yes
|
||||
[ ] No (PRIVATE_CONTEXT: disabled) / [ ] Yes
|
||||
|
||||
## Scoped Lint (post-ingest)
|
||||
|
||||
[ ] Frontmatter valid [ ] No broken links [ ] No issues found
|
||||
[ ] Frontmatter valid [ ] No broken links [ ] No issues found
|
||||
```
|
||||
|
||||
This makes human review fast and structured: read the table, scan the diff,
|
||||
|
|
@ -964,36 +776,25 @@ The operator resolves the conflict, updates relevant pages, closes the PR.
|
|||
|
||||
Pages have a `last_updated` field in frontmatter. During lint passes:
|
||||
|
||||
| Maturity | Threshold | Action |
|
||||
| -------- | --------- | -------------------------------------- |
|
||||
| `stable` | 180 days | Flag as stale — add `⚠️ STALE` callout |
|
||||
| `draft` | 90 days | Flag as stale — add `⚠️ STALE` callout |
|
||||
| Maturity | Threshold | Action |
|
||||
|----------|-----------|--------|
|
||||
| `stable` | 180 days | Flag as stale — add `⚠️ STALE` callout |
|
||||
| `draft` | 90 days | Flag as stale — add `⚠️ STALE` callout |
|
||||
|
||||
The agent proposes re-validation but does not change `maturity` without new source evidence.
|
||||
|
||||
### Cross-genome references
|
||||
### Cross-genome lint
|
||||
|
||||
> **Status: planned.** The cross-genome collector and **navigation skill** described in this
|
||||
> section are specified but **not yet implemented** in this release — only the `ingest` skill
|
||||
> ships today. What follows documents the intended design and the boundary contract it will honour.
|
||||
A manual, monthly operation. Not automated in CI/CD — the token cost and coordination
|
||||
complexity are not justified at this scale.
|
||||
|
||||
Cross-domain knowledge moves by **pull, never push**: the genome you are working in draws
|
||||
material _in_; nothing is ever written into another genome. There are **no cross-genome
|
||||
wikilinks** — submodule pointers make relative paths brittle.
|
||||
|
||||
When the working genome needs a concept that lives elsewhere, the **navigation skill** handles
|
||||
it in the same two-phase shape as ingest:
|
||||
|
||||
1. A deterministic collector clones the relevant genomes **read-only at HEAD** (fresh — never the
|
||||
pinned submodule state) and assembles a dossier of excerpts with provenance.
|
||||
2. A semantic pass reads only that dossier; the skill then deposits **one** abstract, non-private
|
||||
raw into the working genome at `raw/articles/crossgen-<topic>-<date>.md`.
|
||||
3. That raw goes through the working genome's normal ingest → PR → human gate, like any source.
|
||||
|
||||
Which genomes may be read as **sources** is gated by a per-genome `cross_source: yes|no` flag: a
|
||||
confidential genome (e.g. a client file) is marked `no` and is never read as a source — the wall
|
||||
is structural, not a matter of the agent's discipline. The master `AGENTS.md` holds the full
|
||||
boundary contract.
|
||||
1. Operator initiates a master-repo agent session
|
||||
2. Agent uses `qmd search "<concept>"` across the multi-genome index to find:
|
||||
- Concepts defined in 2+ genomes with potentially conflicting definitions
|
||||
- Entities referenced cross-genome without canonical cross-genome wikilinks
|
||||
- Concepts in genome-X that should link to genome-Y
|
||||
3. Agent reports findings — does not modify files
|
||||
4. For each finding: create conflict note in the genome where resolution belongs
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -1015,45 +816,50 @@ private: true | false
|
|||
---
|
||||
```
|
||||
|
||||
| Field | Rules |
|
||||
| ---------------------- | ------------------------------------------------------------------------ |
|
||||
| `type` | Must be one of: `source entity concept query conflict private index log` |
|
||||
| `maturity: draft` | Single source or unvalidated |
|
||||
| `maturity: stable` | Confirmed by 2+ independent sources |
|
||||
| `maturity: deprecated` | Superseded — add `> **DEPRECATED:** <reason>` callout at top |
|
||||
| `private: true` | Required on all pages in `wiki/private/` and `raw/private/` |
|
||||
| Field | Rules |
|
||||
|-------|-------|
|
||||
| `type` | Must be one of: `source entity concept query conflict private index log` |
|
||||
| `maturity: draft` | Single source or unvalidated |
|
||||
| `maturity: stable` | Confirmed by 2+ independent sources |
|
||||
| `maturity: deprecated` | Superseded — add `> **DEPRECATED:** <reason>` callout at top |
|
||||
| `private: true` | Required on all pages in `wiki/private/` and `raw/private/` |
|
||||
|
||||
Do not use semantic versioning for content. Git history tracks every change.
|
||||
`maturity` captures epistemic state; `last_updated` tracks recency.
|
||||
|
||||
### Page types and directories
|
||||
|
||||
| Type | Directory | Description |
|
||||
| ---------- | ---------------------------- | -------------------------------------------- |
|
||||
| `source` | `wiki/sources/` | One page per processed raw source |
|
||||
| `entity` | `wiki/entities/` | People, tools, organisations, projects |
|
||||
| `concept` | `wiki/concepts/` | Patterns, theories, architectural decisions |
|
||||
| `query` | `wiki/queries/` | Preserved answers and analyses |
|
||||
| `conflict` | `wiki/queries/conflict-*.md` | Unresolved contradictions |
|
||||
| `private` | `wiki/private/` | Private synthesis (PRIVATE_CONTEXT: enabled) |
|
||||
| `index` | `wiki/index.md` | Primary navigation catalog (singleton) |
|
||||
| `log` | `wiki/log.md` | Operations ledger (singleton) |
|
||||
| Type | Directory | Description |
|
||||
|------|-----------|-------------|
|
||||
| `source` | `wiki/sources/` | One page per processed raw source |
|
||||
| `entity` | `wiki/entities/` | People, tools, organisations, projects |
|
||||
| `concept` | `wiki/concepts/` | Patterns, theories, architectural decisions |
|
||||
| `query` | `wiki/queries/` | Preserved answers and analyses |
|
||||
| `conflict` | `wiki/queries/conflict-*.md` | Unresolved contradictions |
|
||||
| `private` | `wiki/private/` | Private synthesis (PRIVATE_CONTEXT: enabled) |
|
||||
| `index` | `wiki/index.md` | Primary navigation catalog (singleton) |
|
||||
| `log` | `wiki/log.md` | Operations ledger (singleton) |
|
||||
|
||||
### Page size limits
|
||||
|
||||
| Limit | Lines | Action |
|
||||
| -------- | ----- | ----------------------------------- |
|
||||
| Soft cap | 400 | Bash linter warns |
|
||||
| Hard cap | 800 | Bash linter errors — split the page |
|
||||
| Limit | Lines | Action |
|
||||
|-------|-------|--------|
|
||||
| Soft cap | 400 | Bash linter warns |
|
||||
| Hard cap | 800 | Bash linter errors — split the page |
|
||||
|
||||
These limits ensure pages fit within the LLM context window without attention degradation
|
||||
and keep the wiki atomically navigable.
|
||||
|
||||
### Linking conventions
|
||||
|
||||
- **Intra-genome:** `[[folder/file]]` — Obsidian wikilinks only.
|
||||
- **Cross-genome:** NOT supported via wikilink — submodule pointers make relative paths brittle. When the working genome needs a concept that lives elsewhere, the navigation skill **pulls it in** as one abstract raw under _this_ genome's `raw/articles/`, which then goes through normal ingest. See [Cross-genome references](#cross-genome-references).
|
||||
- **External:** `[text](https://...)` — standard Markdown.
|
||||
| Type | Format |
|
||||
|------|--------|
|
||||
| Internal (same genome) | `[[folder/slug]]` — Obsidian wikilinks only |
|
||||
| Cross-genome | `[[../genome-target/wiki/folder/slug]]` |
|
||||
| External | `[text](https://url)` — standard Markdown |
|
||||
|
||||
Never use `[text](relative/path)` for internal references. Obsidian wikilinks are
|
||||
bidirectional and appear in the graph view.
|
||||
|
||||
### Log format
|
||||
|
||||
|
|
@ -1072,26 +878,25 @@ Every operation appends one entry to `wiki/log.md`:
|
|||
Valid TYPEs: `INGEST` `LINT` `QUERY` `CONFLICT` `CONFIG` `SECURITY`
|
||||
|
||||
Parse examples:
|
||||
|
||||
```bash
|
||||
grep "^## \[" wiki/log.md | tail -5 # Last 5 entries
|
||||
grep "^## \[" wiki/log.md | grep "CONFLICT" # All conflicts
|
||||
grep "^## \[2026-05" wiki/log.md # Entries from a specific month
|
||||
```
|
||||
|
||||
ingest-semantic.py receives source text + existing entity/concept names (from index) as prompt context.
|
||||
The orchestrator always injects only `tail -n 20 wiki/log.md` into agent context.
|
||||
The LLM never loads the full log.
|
||||
|
||||
---
|
||||
|
||||
## Collaboration Model
|
||||
|
||||
| Role | Key access | Permitted operations |
|
||||
| -------------- | ----------------- | ----------------------------------------------------------------------------- |
|
||||
| Owner | Full — key holder | Read/write everywhere |
|
||||
| Collaborator | None | Push to `raw/articles/`, `raw/transcripts/`, `raw/code-packs/`, `raw/assets/` |
|
||||
| Local AI agent | Conditional | `private/` only when `PRIVATE_CONTEXT: enabled` |
|
||||
| Cloud AI model | Never | `PRIVATE_CONTEXT` must be `disabled`; private data stays on local network |
|
||||
| Role | Key access | Permitted operations |
|
||||
|------|-----------|----------------------|
|
||||
| Owner | Full — key holder | Read/write everywhere |
|
||||
| Collaborator | None | Push to `raw/articles/`, `raw/transcripts/`, `raw/code-packs/`, `raw/assets/` |
|
||||
| Local AI agent | Conditional | `private/` only when `PRIVATE_CONTEXT: enabled` |
|
||||
| Cloud AI model | Never | `PRIVATE_CONTEXT` must be `disabled`; private data stays on local network |
|
||||
|
||||
Grant collaborator access: add as Forgejo contributor with Write role.
|
||||
Never share the git-crypt key — collaborators operate exclusively in public directories.
|
||||
|
|
@ -1125,7 +930,6 @@ qmd serve --port 3333
|
|||
Obsidian is the recommended wiki browser. Open any genome directory as an Obsidian vault.
|
||||
|
||||
Recommended setup:
|
||||
|
||||
- **Graph view** — visualise page connections; spot orphans and hubs instantly
|
||||
- **Obsidian Web Clipper** — browser extension to clip articles directly to `raw/articles/`
|
||||
as Markdown
|
||||
|
|
@ -1139,16 +943,13 @@ Note: `.obsidian/` is in `.gitignore`. Workspace and plugin settings are local
|
|||
|
||||
### n8n automation
|
||||
|
||||
n8n → SSH → ingest-semantic.py <genome> <raw> → run-ingest.sh <genome>.
|
||||
|
||||
n8n (running on the storage node) can automate the ingest pipeline:
|
||||
|
||||
1. Forgejo webhook fires on push to a genome's `raw/` directory
|
||||
2. n8n flow identifies new files
|
||||
3. For each new file: starts one agent session (sequential — never parallel)
|
||||
4. Each session receives: `tail -n 20 wiki/log.md` + `PRIVATE_CONTEXT` state + source path
|
||||
5. Phase 1 — agent runs `/skill:ingest` (semantic → writes manifest); Phase 2 —
|
||||
`run-ingest.sh` does index/log/lint and opens the PR, returning one JSON line to n8n
|
||||
5. Agent runs ingest workflow and opens PR
|
||||
6. Human reviews the PR
|
||||
|
||||
Key constraint: one source per session, sessions sequential.
|
||||
|
|
@ -1158,13 +959,11 @@ Never batch multiple sources into one agent session.
|
|||
|
||||
If the AI compute node has an Intel NPU (e.g. Core Ultra series):
|
||||
|
||||
- Background/auxiliary tasks (OCR of `raw/assets/`, async summarisation, or qmd
|
||||
re-indexing **if** the optional qmd extension is in use) → Intel NPU via OpenVINO
|
||||
- Background tasks (embedding updates, index refresh) → Intel NPU via OpenVINO
|
||||
- Active reasoning sessions (ingest, query, synthesis) → GPU
|
||||
|
||||
Note: the core system has no embedding pipeline (see [Core Philosophy](#core-philosophy)),
|
||||
so there is nothing to embed here — the NPU is only for auxiliary work. This keeps the
|
||||
GPU's KV cache free for interactive sessions and lowers power draw for background jobs.
|
||||
This keeps the GPU's KV cache free for interactive work and reduces power consumption
|
||||
for background operations.
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -1192,7 +991,6 @@ sudo apt install git git-crypt curl jq
|
|||
The staged file is in a path matching `**/private/**` but is not encrypted.
|
||||
|
||||
Fix options:
|
||||
|
||||
1. Verify `.gitattributes` contains `**/private/** filter=git-crypt diff=git-crypt -text`
|
||||
2. Run `git-crypt init` if git-crypt is not initialised in this repo
|
||||
3. Run `git-crypt status` to check the encryption state of all files
|
||||
|
|
@ -1213,7 +1011,6 @@ git commit -m "fix: re-stage private files for encryption"
|
|||
### Agent returns stale or missing cross-references
|
||||
|
||||
Likely causes:
|
||||
|
||||
1. Session was too long — KV cache degraded. Use one source per session.
|
||||
2. `wiki/index.md` was not read at session start — agent lacked the page catalog.
|
||||
3. qmd index is stale — re-index: `qmd index <genome>/wiki/`
|
||||
|
|
|
|||
|
|
@ -1,773 +0,0 @@
|
|||
{
|
||||
"name": "Genome: PR review",
|
||||
"nodes": [
|
||||
{
|
||||
"parameters": {
|
||||
"httpMethod": "POST",
|
||||
"path": "forgejo-pr-review-23319ab8687b16f10e0f278fb920c112",
|
||||
"options": {}
|
||||
},
|
||||
"id": "58df1ca9-e48e-4834-b231-d97c974cd01b",
|
||||
"name": "Webhook PR Review",
|
||||
"type": "n8n-nodes-base.webhook",
|
||||
"typeVersion": 2.1,
|
||||
"position": [
|
||||
2272,
|
||||
1344
|
||||
],
|
||||
"webhookId": "61ff3a5baa304571"
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"mode": "runOnceForEachItem",
|
||||
"jsCode": "// THE only parser of the review side: parse the directive, VALIDATE, prepare the rework payload.\n// Security: only allow-listed maintainers may drive the gate; destructive directives require a\n// feat/ai-ingest-* branch on the expected base; raw_source is recovered from a machine-readable\n// marker that run-ingest.sh writes into the PR body.\nconst ALLOWED_SENDERS = ['Keru']; // <-- maintainers allowed to issue directives\nconst BASE = 'develop';\n\n// n8n Run Once for Each Item: $json is the current webhook payload\nconst j = $json.body || $json;\nif (!j || typeof j !== 'object') {\n return { directive: 'INVALID', reason: 'malformed webhook payload' };\n}\n\nconst review = j.review || null;\nconst comment = j.comment || null;\nconst pr = j.pull_request || j.issue || null;\n\n// Extract directive text from review content or comment body\nconst body = String(\n (review && review.content) ||\n (comment && comment.body) ||\n ''\n);\nconst sender = String((j.sender && j.sender.login) || 'unknown');\n\n// Match directive at the start of the text (case-insensitive)\nconst m = body.match(/^\\s*(REWORK|RESTART|REVERT\\s+\\d+|SPLIT|REJECT|MERGE)\\s*:?/i);\nif (!m) return { directive: 'NONE' };\n\nconst headTok = m[1].toUpperCase().replace(/\\s+/g, ' ');\nconst directive = headTok.startsWith('REVERT') ? 'REVERT' : headTok;\nconst feedback = body.slice(m[0].length).trim() || '(nessun dettaglio fornito)';\n\n// Extract PR metadata safely\nconst prNumber = (pr && pr.number) || null;\nconst branch = (pr && pr.head && pr.head.ref) || null;\nconst base = (pr && pr.base && pr.base.ref) || null;\nconst repo = (pr && pr.base && pr.base.repo && pr.base.repo.name) ||\n (j.repository && j.repository.name) || null;\nconst owner = (pr && pr.base && pr.base.repo && pr.base.repo.owner && pr.base.repo.owner.login) ||\n (j.repository && j.repository.owner && j.repository.owner.login) || null;\nconst prBody = (pr && pr.body) || (j.issue && j.issue.body) || '';\n\n// Recover raw_source from machine-readable marker: <!-- kg:raw=path -->\n// Restricted to valid path characters, no spaces, no HTML breaking\nconst rawMatch = prBody.match(/<!--\\s*kg:raw=([^\\s>]+)\\s*-->/);\nconst raw = rawMatch ? rawMatch[1] : null;\n\n// REVERT is reserved for future Step 7 implementation\nif (directive === 'REVERT') {\n return { directive: 'NONE', note: 'REVERT reserved for Step 7' };\n}\n\n// Authorization gate\nif (!ALLOWED_SENDERS.includes(sender)) {\n return {\n directive: 'UNAUTHORIZED',\n attempted: directive,\n sender,\n prNumber,\n owner,\n repo\n };\n}\n\n// Validation rules\nconst okGenome = !!repo && /^[a-z0-9][a-z0-9-]{0,63}$/.test(repo);\nconst okPr = !!prNumber && /^[0-9]+$/.test(String(prNumber));\nconst okBranch = !!branch && /^feat\\/ai-ingest-[a-z0-9-]+$/.test(branch);\nconst okBase = base === BASE;\nconst okRaw = (directive === 'MERGE')\n ? true\n : (!!raw && raw.startsWith('raw/') && !raw.includes('..') && /^[A-Za-z0-9._\\/-]+$/.test(raw));\n\nif (!okGenome || !okPr || !okBase || (directive !== 'MERGE' && !okBranch) || !okRaw) {\n return {\n directive: 'INVALID',\n attempted: directive,\n prNumber,\n owner,\n repo,\n why: { okGenome, okPr, okBranch, okBase, okRaw }\n };\n}\n\n// Encode feedback for safe transport through SSH/scripts\nconst feedback_b64 = Buffer.from(feedback, 'utf8').toString('base64');\n\nreturn {\n directive,\n prNumber,\n branch,\n base,\n repo,\n owner,\n sender,\n raw,\n feedback,\n feedback_b64\n};"
|
||||
},
|
||||
"id": "c668f595-0a28-4bd3-9125-22fee9350d78",
|
||||
"name": "Parse & validate",
|
||||
"type": "n8n-nodes-base.code",
|
||||
"typeVersion": 2,
|
||||
"position": [
|
||||
2496,
|
||||
1344
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"rules": {
|
||||
"values": [
|
||||
{
|
||||
"conditions": {
|
||||
"options": {
|
||||
"caseSensitive": true,
|
||||
"typeValidation": "strict",
|
||||
"version": 2
|
||||
},
|
||||
"conditions": [
|
||||
{
|
||||
"leftValue": "={{ $json.directive }}",
|
||||
"rightValue": "MERGE",
|
||||
"operator": {
|
||||
"type": "string",
|
||||
"operation": "equals"
|
||||
},
|
||||
"id": "4960f0868bc54687"
|
||||
}
|
||||
],
|
||||
"combinator": "and"
|
||||
}
|
||||
},
|
||||
{
|
||||
"conditions": {
|
||||
"options": {
|
||||
"caseSensitive": true,
|
||||
"typeValidation": "strict",
|
||||
"version": 2
|
||||
},
|
||||
"conditions": [
|
||||
{
|
||||
"leftValue": "={{ $json.directive }}",
|
||||
"rightValue": "REWORK",
|
||||
"operator": {
|
||||
"type": "string",
|
||||
"operation": "equals"
|
||||
},
|
||||
"id": "34002fdd92834d38"
|
||||
}
|
||||
],
|
||||
"combinator": "and"
|
||||
}
|
||||
},
|
||||
{
|
||||
"conditions": {
|
||||
"options": {
|
||||
"caseSensitive": true,
|
||||
"typeValidation": "strict",
|
||||
"version": 2
|
||||
},
|
||||
"conditions": [
|
||||
{
|
||||
"leftValue": "={{ $json.directive }}",
|
||||
"rightValue": "RESTART",
|
||||
"operator": {
|
||||
"type": "string",
|
||||
"operation": "equals"
|
||||
},
|
||||
"id": "d412a74e32ac4f0c"
|
||||
}
|
||||
],
|
||||
"combinator": "and"
|
||||
}
|
||||
},
|
||||
{
|
||||
"conditions": {
|
||||
"options": {
|
||||
"caseSensitive": true,
|
||||
"typeValidation": "strict",
|
||||
"version": 2
|
||||
},
|
||||
"conditions": [
|
||||
{
|
||||
"leftValue": "={{ $json.directive }}",
|
||||
"rightValue": "SPLIT",
|
||||
"operator": {
|
||||
"type": "string",
|
||||
"operation": "equals"
|
||||
},
|
||||
"id": "c0810b33fa474ca0"
|
||||
}
|
||||
],
|
||||
"combinator": "and"
|
||||
}
|
||||
},
|
||||
{
|
||||
"conditions": {
|
||||
"options": {
|
||||
"caseSensitive": true,
|
||||
"typeValidation": "strict",
|
||||
"version": 2
|
||||
},
|
||||
"conditions": [
|
||||
{
|
||||
"leftValue": "={{ $json.directive }}",
|
||||
"rightValue": "REJECT",
|
||||
"operator": {
|
||||
"type": "string",
|
||||
"operation": "equals"
|
||||
},
|
||||
"id": "531039e699c44cea"
|
||||
}
|
||||
],
|
||||
"combinator": "and"
|
||||
}
|
||||
},
|
||||
{
|
||||
"conditions": {
|
||||
"options": {
|
||||
"caseSensitive": true,
|
||||
"typeValidation": "strict",
|
||||
"version": 2
|
||||
},
|
||||
"conditions": [
|
||||
{
|
||||
"leftValue": "={{ $json.directive }}",
|
||||
"rightValue": "UNAUTHORIZED",
|
||||
"operator": {
|
||||
"type": "string",
|
||||
"operation": "equals"
|
||||
},
|
||||
"id": "cfbd691d2e9a4c2a"
|
||||
}
|
||||
],
|
||||
"combinator": "and"
|
||||
}
|
||||
},
|
||||
{
|
||||
"conditions": {
|
||||
"options": {
|
||||
"caseSensitive": true,
|
||||
"typeValidation": "strict",
|
||||
"version": 2
|
||||
},
|
||||
"conditions": [
|
||||
{
|
||||
"leftValue": "={{ $json.directive }}",
|
||||
"rightValue": "INVALID",
|
||||
"operator": {
|
||||
"type": "string",
|
||||
"operation": "equals"
|
||||
},
|
||||
"id": "251f5b7beea6424a"
|
||||
}
|
||||
],
|
||||
"combinator": "and"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"options": {
|
||||
"fallbackOutput": "none"
|
||||
}
|
||||
},
|
||||
"id": "489736cc-bab6-4664-8087-91b6d9ff31ad",
|
||||
"name": "Switch",
|
||||
"type": "n8n-nodes-base.switch",
|
||||
"typeVersion": 3.4,
|
||||
"position": [
|
||||
2736,
|
||||
1344
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"method": "POST",
|
||||
"url": "=https://git.keruhomelab.com/api/v1/repos/{{ $('Parse & validate').first().json.owner }}/{{ $('Parse & validate').first().json.repo }}/pulls/{{ $('Parse & validate').first().json.prNumber }}/merge",
|
||||
"authentication": "genericCredentialType",
|
||||
"genericAuthType": "httpHeaderAuth",
|
||||
"sendBody": true,
|
||||
"specifyBody": "json",
|
||||
"jsonBody": "={\n \"Do\": \"merge\"\n}",
|
||||
"options": {
|
||||
"timeout": 15000
|
||||
}
|
||||
},
|
||||
"id": "3440cb8d-ae4c-4523-ae13-ee5667d24252",
|
||||
"name": "Forgejo Merge PR",
|
||||
"type": "n8n-nodes-base.httpRequest",
|
||||
"typeVersion": 4.4,
|
||||
"position": [
|
||||
2976,
|
||||
1104
|
||||
],
|
||||
"credentials": {
|
||||
"httpHeaderAuth": {
|
||||
"id": "TBPXSWOF63k9mvm8",
|
||||
"name": "ntfy-token"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"conditions": {
|
||||
"options": {
|
||||
"caseSensitive": true,
|
||||
"typeValidation": "loose",
|
||||
"version": 2
|
||||
},
|
||||
"conditions": [
|
||||
{
|
||||
"id": "cc369b5fc3d246a4",
|
||||
"leftValue": "={{ $('Parse & validate').first().json.branch }}",
|
||||
"rightValue": "feat/ai-ingest-",
|
||||
"operator": {
|
||||
"type": "string",
|
||||
"operation": "startsWith"
|
||||
}
|
||||
}
|
||||
],
|
||||
"combinator": "and"
|
||||
},
|
||||
"options": {}
|
||||
},
|
||||
"id": "e6d45fce-83d0-44ca-9fa4-86558fec1a0f",
|
||||
"name": "Guardia feat/",
|
||||
"type": "n8n-nodes-base.if",
|
||||
"typeVersion": 2.2,
|
||||
"position": [
|
||||
2976,
|
||||
1328
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"method": "PATCH",
|
||||
"url": "=https://git.keruhomelab.com/api/v1/repos/{{ $('Parse & validate').first().json.owner }}/{{ $('Parse & validate').first().json.repo }}/pulls/{{ $('Parse & validate').first().json.prNumber }}",
|
||||
"authentication": "genericCredentialType",
|
||||
"genericAuthType": "httpHeaderAuth",
|
||||
"sendBody": true,
|
||||
"specifyBody": "json",
|
||||
"jsonBody": "={\n \"state\": \"closed\"\n}",
|
||||
"options": {
|
||||
"timeout": 15000
|
||||
}
|
||||
},
|
||||
"id": "1601f705-c758-4df6-a3bd-e3ac2e202c94",
|
||||
"name": "Forgejo Close PR",
|
||||
"type": "n8n-nodes-base.httpRequest",
|
||||
"typeVersion": 4.4,
|
||||
"position": [
|
||||
3200,
|
||||
1296
|
||||
],
|
||||
"credentials": {
|
||||
"httpHeaderAuth": {
|
||||
"id": "TBPXSWOF63k9mvm8",
|
||||
"name": "ntfy-token"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"method": "DELETE",
|
||||
"url": "=https://git.keruhomelab.com/api/v1/repos/{{ $('Parse & validate').first().json.owner }}/{{ $('Parse & validate').first().json.repo }}/branches/{{ encodeURIComponent($('Parse & validate').first().json.branch) }}",
|
||||
"authentication": "genericCredentialType",
|
||||
"genericAuthType": "httpHeaderAuth",
|
||||
"options": {
|
||||
"timeout": 15000
|
||||
}
|
||||
},
|
||||
"id": "c2ff2247-efe1-4809-a435-9973188d61bb",
|
||||
"name": "Forgejo Delete Branch",
|
||||
"type": "n8n-nodes-base.httpRequest",
|
||||
"typeVersion": 4.4,
|
||||
"position": [
|
||||
3424,
|
||||
1296
|
||||
],
|
||||
"credentials": {
|
||||
"httpHeaderAuth": {
|
||||
"id": "TBPXSWOF63k9mvm8",
|
||||
"name": "ntfy-token"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"conditions": {
|
||||
"options": {
|
||||
"caseSensitive": true,
|
||||
"leftValue": "",
|
||||
"typeValidation": "strict",
|
||||
"version": 2
|
||||
},
|
||||
"conditions": [
|
||||
{
|
||||
"id": "55cf6c2a6c7d4d79",
|
||||
"leftValue": "={{ $('Parse & validate').first().json.directive }}",
|
||||
"rightValue": "REJECT",
|
||||
"operator": {
|
||||
"type": "string",
|
||||
"operation": "equals"
|
||||
}
|
||||
}
|
||||
],
|
||||
"combinator": "and"
|
||||
},
|
||||
"options": {}
|
||||
},
|
||||
"id": "a1dbbc06-555d-4a1d-8fbf-ee75f617e98a",
|
||||
"name": "E' REJECT?",
|
||||
"type": "n8n-nodes-base.if",
|
||||
"typeVersion": 2.2,
|
||||
"position": [
|
||||
3648,
|
||||
1296
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"workflowId": {
|
||||
"__rl": true,
|
||||
"value": "zbtRXWsLt56nEIfz",
|
||||
"mode": "list",
|
||||
"cachedResultUrl": "/workflow/zbtRXWsLt56nEIfz",
|
||||
"cachedResultName": "Power Manager"
|
||||
},
|
||||
"workflowInputs": {
|
||||
"mappingMode": "defineBelow",
|
||||
"value": {
|
||||
"mode": "ensure-on"
|
||||
},
|
||||
"matchingColumns": [
|
||||
"mode"
|
||||
],
|
||||
"schema": [
|
||||
{
|
||||
"id": "mode",
|
||||
"displayName": "mode",
|
||||
"required": false,
|
||||
"defaultMatch": false,
|
||||
"display": true,
|
||||
"canBeUsedToMatch": true,
|
||||
"type": "string",
|
||||
"removed": false
|
||||
}
|
||||
],
|
||||
"attemptToConvertTypes": false,
|
||||
"convertFieldsToString": true
|
||||
},
|
||||
"options": {}
|
||||
},
|
||||
"id": "7fc3e648-4712-4eef-a6f3-12c8805ade1f",
|
||||
"name": "Power Manager - ensure-on",
|
||||
"type": "n8n-nodes-base.executeWorkflow",
|
||||
"typeVersion": 1.3,
|
||||
"position": [
|
||||
3648,
|
||||
1168
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"workflowId": {
|
||||
"__rl": true,
|
||||
"value": "VIi2ovb5gJxNJLbg",
|
||||
"mode": "list",
|
||||
"cachedResultUrl": "/workflow/VIi2ovb5gJxNJLbg",
|
||||
"cachedResultName": "Genome: run-one-ingest"
|
||||
},
|
||||
"workflowInputs": {
|
||||
"mappingMode": "defineBelow",
|
||||
"value": {
|
||||
"genome": "={{ $('Parse & validate').first().json.repo }}",
|
||||
"raw": "={{ $('Parse & validate').first().json.raw }}",
|
||||
"mode": "rework",
|
||||
"feedback_b64": "={{ $('Parse & validate').first().json.feedback_b64 }}",
|
||||
"reason": "={{ $('Parse & validate').first().json.directive }}",
|
||||
"prevPr": "={{ String($('Parse & validate').first().json.prNumber || '') }}"
|
||||
},
|
||||
"matchingColumns": [],
|
||||
"schema": [
|
||||
{
|
||||
"id": "genome",
|
||||
"displayName": "genome",
|
||||
"required": false,
|
||||
"defaultMatch": false,
|
||||
"display": true,
|
||||
"canBeUsedToMatch": true,
|
||||
"type": "string",
|
||||
"removed": false
|
||||
},
|
||||
{
|
||||
"id": "raw",
|
||||
"displayName": "raw",
|
||||
"required": false,
|
||||
"defaultMatch": false,
|
||||
"display": true,
|
||||
"canBeUsedToMatch": true,
|
||||
"type": "string",
|
||||
"removed": false
|
||||
},
|
||||
{
|
||||
"id": "mode",
|
||||
"displayName": "mode",
|
||||
"required": false,
|
||||
"defaultMatch": false,
|
||||
"display": true,
|
||||
"canBeUsedToMatch": true,
|
||||
"type": "string",
|
||||
"removed": false
|
||||
},
|
||||
{
|
||||
"id": "feedback_b64",
|
||||
"displayName": "feedback_b64",
|
||||
"required": false,
|
||||
"defaultMatch": false,
|
||||
"display": true,
|
||||
"canBeUsedToMatch": true,
|
||||
"type": "string",
|
||||
"removed": false
|
||||
},
|
||||
{
|
||||
"id": "reason",
|
||||
"displayName": "reason",
|
||||
"required": false,
|
||||
"defaultMatch": false,
|
||||
"display": true,
|
||||
"canBeUsedToMatch": true,
|
||||
"type": "string",
|
||||
"removed": false
|
||||
},
|
||||
{
|
||||
"id": "prevPr",
|
||||
"displayName": "prevPr",
|
||||
"required": false,
|
||||
"defaultMatch": false,
|
||||
"display": true,
|
||||
"canBeUsedToMatch": true,
|
||||
"type": "string",
|
||||
"removed": false
|
||||
}
|
||||
],
|
||||
"attemptToConvertTypes": false,
|
||||
"convertFieldsToString": true
|
||||
},
|
||||
"options": {
|
||||
"waitForSubWorkflow": false
|
||||
}
|
||||
},
|
||||
"id": "9704c050-5c63-49fd-a26d-efbae9d92175",
|
||||
"name": "Run one ingest (rework)",
|
||||
"type": "n8n-nodes-base.executeWorkflow",
|
||||
"typeVersion": 1.3,
|
||||
"position": [
|
||||
3856,
|
||||
1168
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"mode": "runOnceForEachItem",
|
||||
"jsCode": "// merged (MERGE) / closed (REJECT). The HTTP node replaced $json with the API response,\n// so we read context from the parser via node reference (single review -> .first() is safe).\n// Fallback values prevent crashes if the parser node is unreachable.\nconst p = $('Parse & validate').first().json || {};\nconst repo = p.repo || 'unknown';\nconst owner = p.owner || 'unknown';\nconst prNumber = p.prNumber || '?';\nconst base = p.base || 'develop';\nconst branch = p.branch || 'unknown';\nconst sender = p.sender || 'unknown';\nconst directive = p.directive || 'UNKNOWN';\nconst feedback = p.feedback || '';\n\nconst repoUrl = (owner && repo && repo !== 'unknown')\n ? `https://git.keruhomelab.com/${owner}/${repo}`\n : '';\nconst prUrl = (repoUrl && prNumber !== '?')\n ? `${repoUrl}/pulls/${prNumber}`\n : '';\n\nlet n;\nif (directive === 'MERGE') {\n n = {\n topic: 'genome-ingest',\n title: `${repo} · PR #${prNumber} mergiata`,\n priority: 'default',\n tags: 'twisted_rightwards_arrows',\n click: prUrl,\n actions: `view, Vedi la PR, ${prUrl}`,\n body: `PR #${prNumber} mergiata su \\`${base}\\` da **${sender}**.`\n };\n} else {\n n = {\n topic: 'genome-ingest',\n title: `${repo} · PR #${prNumber} chiusa`,\n priority: 'default',\n tags: 'wastebasket',\n click: repoUrl,\n actions: '',\n body: `**REJECT** di **${sender}**: PR #${prNumber} chiusa e branch \\`${branch}\\` rimosso. Nessun nuovo tentativo.\\n> ${feedback}`\n };\n}\n\nreturn n;"
|
||||
},
|
||||
"id": "1ce634fd-d402-4a84-9ba1-04673ddffce9",
|
||||
"name": "Build ntfy action",
|
||||
"type": "n8n-nodes-base.code",
|
||||
"typeVersion": 2,
|
||||
"position": [
|
||||
3856,
|
||||
1344
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"mode": "runOnceForEachItem",
|
||||
"jsCode": "// Security / near-miss: unauthorized sender, invalid directive, or the feat/ guard.\n// On all three paths Switch/Guardia pass the parser output through, so $json carries the directive + context.\nconst d = $json || {};\nconst directive = d.directive || 'UNKNOWN';\nconst attempted = d.attempted || directive;\nconst sender = d.sender || 'unknown';\nconst prNumber = d.prNumber || '?';\nconst branch = d.branch || 'unknown';\nconst owner = d.owner || '';\nconst repo = d.repo || '';\n\nconst repoUrl = (owner && repo) ? `https://git.keruhomelab.com/${owner}/${repo}` : '';\n\nlet n;\nif (directive === 'UNAUTHORIZED') {\n n = {\n topic: 'genome-ingest',\n title: `Sicurezza · direttiva non autorizzata`,\n priority: 'high',\n tags: 'no_entry',\n click: repoUrl,\n actions: '',\n body: `**${sender}** ha tentato \\`${attempted}\\` su PR #${prNumber}, ma non è tra i maintainer autorizzati. **Nessuna azione** eseguita.`\n };\n} else if (directive === 'INVALID') {\n n = {\n topic: 'genome-ingest',\n title: `Direttiva non applicata`,\n priority: 'low',\n tags: 'information_source',\n click: repoUrl,\n actions: '',\n body: `\\`${attempted}\\` su PR #${prNumber} ignorata: precondizioni non soddisfatte (branch / base / marker raw).`\n };\n} else {\n // Guardia feat/ false branch: destructive action on a non-feat/ai-ingest-* branch\n n = {\n topic: 'genome-ingest',\n title: `Sicurezza · branch protetto`,\n priority: 'high',\n tags: 'no_entry',\n click: repoUrl,\n actions: '',\n body: `Rifiutata azione distruttiva (\\`${attempted || directive}\\`) sul branch \\`${branch}\\`: non è un \\`feat/ai-ingest-*\\`. **Nessuna modifica.**`\n };\n}\n\nreturn n;"
|
||||
},
|
||||
"id": "32b16592-5126-4cc2-a3f2-d1bda58ac724",
|
||||
"name": "Build ntfy sicurezza",
|
||||
"type": "n8n-nodes-base.code",
|
||||
"typeVersion": 2,
|
||||
"position": [
|
||||
3200,
|
||||
1536
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"method": "POST",
|
||||
"url": "=http://ntfy/{{ $json.topic }}",
|
||||
"authentication": "genericCredentialType",
|
||||
"genericAuthType": "httpBearerAuth",
|
||||
"sendHeaders": true,
|
||||
"headerParameters": {
|
||||
"parameters": [
|
||||
{
|
||||
"name": "Title",
|
||||
"value": "={{ $json.title }}"
|
||||
},
|
||||
{
|
||||
"name": "Priority",
|
||||
"value": "={{ $json.priority }}"
|
||||
},
|
||||
{
|
||||
"name": "Tags",
|
||||
"value": "={{ $json.tags }}"
|
||||
},
|
||||
{
|
||||
"name": "Click",
|
||||
"value": "={{ $json.click }}"
|
||||
},
|
||||
{
|
||||
"name": "Actions",
|
||||
"value": "={{ $json.actions }}"
|
||||
},
|
||||
{
|
||||
"name": "Markdown",
|
||||
"value": "yes"
|
||||
}
|
||||
]
|
||||
},
|
||||
"sendBody": true,
|
||||
"contentType": "raw",
|
||||
"rawContentType": "Raw / Text",
|
||||
"body": "={{ $json.body }}",
|
||||
"options": {
|
||||
"timeout": 15000
|
||||
}
|
||||
},
|
||||
"id": "4d45b486-de42-4c7f-be21-b5bfbc05fd44",
|
||||
"name": "ntfy: send",
|
||||
"type": "n8n-nodes-base.httpRequest",
|
||||
"typeVersion": 4.4,
|
||||
"position": [
|
||||
4080,
|
||||
1424
|
||||
],
|
||||
"credentials": {
|
||||
"httpHeaderAuth": {
|
||||
"id": "TBPXSWOF63k9mvm8",
|
||||
"name": "ntfy-token"
|
||||
},
|
||||
"httpBearerAuth": {
|
||||
"id": "nCv4CUN7Ef086Ewj",
|
||||
"name": "Bearer Auth account"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"pinData": {},
|
||||
"connections": {
|
||||
"Webhook PR Review": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Parse & validate",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Parse & validate": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Switch",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Switch": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Forgejo Merge PR",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"node": "Power Manager - ensure-on",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"node": "Guardia feat/",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"node": "Guardia feat/",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"node": "Guardia feat/",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"node": "Build ntfy sicurezza",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"node": "Build ntfy sicurezza",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Forgejo Merge PR": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Build ntfy action",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Guardia feat/": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Forgejo Close PR",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"node": "Build ntfy sicurezza",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Forgejo Close PR": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Forgejo Delete Branch",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Forgejo Delete Branch": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "E' REJECT?",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"E' REJECT?": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Build ntfy action",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"node": "Power Manager - ensure-on",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Power Manager - ensure-on": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Run one ingest (rework)",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Build ntfy action": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "ntfy: send",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Build ntfy sicurezza": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "ntfy: send",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
},
|
||||
"active": true,
|
||||
"settings": {
|
||||
"executionOrder": "v1",
|
||||
"binaryMode": "separate",
|
||||
"timeSavedMode": "fixed",
|
||||
"errorWorkflow": "7Vws3gCX3QnjM3oD",
|
||||
"callerPolicy": "workflowsFromSameOwner",
|
||||
"availableInMCP": false
|
||||
},
|
||||
"versionId": "22998a54-cd9a-4b57-9c80-df97085a997c",
|
||||
"meta": {
|
||||
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
|
||||
},
|
||||
"id": "iho7kFQsXbGIxG7P",
|
||||
"tags": []
|
||||
}
|
||||
|
|
@ -1,170 +0,0 @@
|
|||
{
|
||||
"name": "Genome: ingest MANUALE (scratch)",
|
||||
"nodes": [
|
||||
{
|
||||
"parameters": {},
|
||||
"type": "n8n-nodes-base.manualTrigger",
|
||||
"typeVersion": 1,
|
||||
"position": [
|
||||
0,
|
||||
0
|
||||
],
|
||||
"id": "2101e704-6275-419d-9963-29a142e5811c",
|
||||
"name": "Esegui manualmente"
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"authentication": "privateKey",
|
||||
"command": "ssh vm101 'pi ingest genome-test raw/articles/il-grano-saraceno.md'"
|
||||
},
|
||||
"type": "n8n-nodes-base.ssh",
|
||||
"typeVersion": 1,
|
||||
"position": [
|
||||
224,
|
||||
0
|
||||
],
|
||||
"id": "8ade2def-2d53-4860-88a5-2ca734c6e54a",
|
||||
"name": "SSH: pi ingest (manuale)",
|
||||
"credentials": {
|
||||
"sshPrivateKey": {
|
||||
"id": "GJQjKzte7Hjdfz89",
|
||||
"name": "n8n container -> n8n-runner@nexus"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"mode": "runOnceForEachItem",
|
||||
"jsCode": "// ultima riga JSON di run-ingest.sh (ha 'run_id=' davanti)\nconst out = ($json.stdout || '').trim();\nconst line = out.split('\\n').filter(l => l.trim().startsWith('{')).pop();\nif (!line) return { status: 'error', reason: 'nessuna riga JSON run-ingest', raw: out };\ntry { return JSON.parse(line); } catch (e) { return { status: 'error', reason: 'JSON non parsabile', raw: line }; }"
|
||||
},
|
||||
"type": "n8n-nodes-base.code",
|
||||
"typeVersion": 2,
|
||||
"position": [
|
||||
448,
|
||||
0
|
||||
],
|
||||
"id": "d84cdeaf-612a-454c-8b4d-31824ae6d71e",
|
||||
"name": "Parse ingest"
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"mode": "runOnceForEachItem",
|
||||
"jsCode": "const d=$json;let n;\nif (d.status==='ok'){\n n={title:`Ingest ${d.slug}: PR aperta`,priority:'default',tags:'inbox_tray',\n body:`\\u2705 ${d.slug}: PR aperta (lint ${d.lint_clean?'clean':'KO'}${d.conflict?', CONFLITTO':''})\\n\\n\\ud83d\\udd17 ${d.pr_url}`};\n} else if (d.status==='pr_failed'){\n n={title:`Ingest ${d.slug}: PR FALLITA`,priority:'high',tags:'warning',\n body:`\\u26a0\\ufe0f ${d.slug}: semantic/lint ok ma PR non aperta.\\n\\n${(d.detail||'').split('\\n')[0]}`};\n} else {\n n={title:'Ingest: ERRORE',priority:'high',tags:'rotating_light',\n body:`\\u274c ${d.reason||'errore'}\\n\\n${(d.raw||'').slice(0,300)}`};\n}\nreturn n;"
|
||||
},
|
||||
"type": "n8n-nodes-base.code",
|
||||
"typeVersion": 2,
|
||||
"position": [
|
||||
672,
|
||||
0
|
||||
],
|
||||
"id": "eadd9275-b38c-416b-b15e-0999f70a05fb",
|
||||
"name": "Build ntfy"
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"method": "POST",
|
||||
"url": "http://ntfy/homelab-genome",
|
||||
"authentication": "genericCredentialType",
|
||||
"genericAuthType": "httpBearerAuth",
|
||||
"sendHeaders": true,
|
||||
"headerParameters": {
|
||||
"parameters": [
|
||||
{
|
||||
"name": "Title",
|
||||
"value": "={{ $json.title }}"
|
||||
},
|
||||
{
|
||||
"name": "Priority",
|
||||
"value": "={{ $json.priority }}"
|
||||
},
|
||||
{
|
||||
"name": "Tags",
|
||||
"value": "={{ $json.tags }}"
|
||||
}
|
||||
]
|
||||
},
|
||||
"sendBody": true,
|
||||
"contentType": "raw",
|
||||
"rawContentType": "Raw / Text",
|
||||
"body": "={{ $json.body }}",
|
||||
"options": {}
|
||||
},
|
||||
"type": "n8n-nodes-base.httpRequest",
|
||||
"typeVersion": 4.4,
|
||||
"position": [
|
||||
880,
|
||||
0
|
||||
],
|
||||
"id": "63ab577b-893a-4b3d-8f13-b377be778099",
|
||||
"name": "ntfy: send notification",
|
||||
"credentials": {
|
||||
"httpHeaderAuth": {
|
||||
"id": "TBPXSWOF63k9mvm8",
|
||||
"name": "ntfy-token"
|
||||
},
|
||||
"httpBearerAuth": {
|
||||
"id": "nCv4CUN7Ef086Ewj",
|
||||
"name": "Bearer Auth account"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"pinData": {},
|
||||
"connections": {
|
||||
"Esegui manualmente": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "SSH: pi ingest (manuale)",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"SSH: pi ingest (manuale)": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Parse ingest",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Parse ingest": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Build ntfy",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Build ntfy": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "ntfy: send notification",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
},
|
||||
"active": false,
|
||||
"settings": {
|
||||
"executionOrder": "v1",
|
||||
"binaryMode": "separate"
|
||||
},
|
||||
"versionId": "df06ce3b-1ea8-43be-91ff-02c77972cfe2",
|
||||
"meta": {
|
||||
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
|
||||
},
|
||||
"id": "RNoSaRLYG9vcMn6M",
|
||||
"tags": []
|
||||
}
|
||||
|
|
@ -1,419 +0,0 @@
|
|||
{
|
||||
"name": "Genome: ingest",
|
||||
"nodes": [
|
||||
{
|
||||
"parameters": {
|
||||
"httpMethod": "POST",
|
||||
"path": "forgejo-push",
|
||||
"options": {}
|
||||
},
|
||||
"id": "8c44b478-1a95-4c3b-8ac1-d7c57e228414",
|
||||
"name": "Webhook",
|
||||
"type": "n8n-nodes-base.webhook",
|
||||
"typeVersion": 2.1,
|
||||
"position": [
|
||||
1520,
|
||||
1728
|
||||
],
|
||||
"webhookId": "cf215f5d31e04dd2"
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"jsCode": "// Bell filter: proceed ONLY on develop pushes that actually touch raw/.\n// Returning [] stops the flow (no node needed).\n// Performance: never wake vm101 for wiki-only pushes (e.g. an ingest PR merged back to develop).\n// pending-raw remains the source of truth.\nconst item = $input.first().json;\nconst b = item.body || item;\nconst ref = String(b.ref || '');\nconst genome = String((b.repository && b.repository.name) || '').toLowerCase().trim();\n\n// Branch filter\nif (ref !== 'refs/heads/develop') return [];\n\n// Genome name validation (DNS-like: lowercase alphanum + hyphen, 1-64 chars)\nif (!/^[a-z0-9][a-z0-9-]{0,63}$/.test(genome)) return [];\n\n// Collect all touched paths safely (added, modified, removed)\nconst commits = Array.isArray(b.commits) ? b.commits : [];\nconst touched = [];\nfor (const c of commits) {\n if (!c || typeof c !== 'object') continue;\n for (const key of ['added', 'modified', 'removed']) {\n const list = c[key];\n if (!Array.isArray(list)) continue;\n for (const p of list) {\n if (typeof p === 'string' && p.startsWith('raw/')) {\n touched.push(p);\n }\n }\n }\n}\n\n// Gate: stop if nothing under raw/ was touched\nif (touched.length === 0) return [];\n\nreturn [{ json: { genome, touchedCount: touched.length } }];"
|
||||
},
|
||||
"id": "604787c7-4e83-468e-9a98-3ac084203040",
|
||||
"name": "Gate push",
|
||||
"type": "n8n-nodes-base.code",
|
||||
"typeVersion": 2,
|
||||
"position": [
|
||||
1744,
|
||||
1728
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"workflowId": {
|
||||
"__rl": true,
|
||||
"value": "zbtRXWsLt56nEIfz",
|
||||
"mode": "list",
|
||||
"cachedResultUrl": "/workflow/zbtRXWsLt56nEIfz",
|
||||
"cachedResultName": "Power Manager"
|
||||
},
|
||||
"workflowInputs": {
|
||||
"mappingMode": "defineBelow",
|
||||
"value": {
|
||||
"mode": "ensure-on"
|
||||
},
|
||||
"matchingColumns": [
|
||||
"mode"
|
||||
],
|
||||
"schema": [
|
||||
{
|
||||
"id": "mode",
|
||||
"displayName": "mode",
|
||||
"required": false,
|
||||
"defaultMatch": false,
|
||||
"display": true,
|
||||
"canBeUsedToMatch": true,
|
||||
"type": "string",
|
||||
"removed": false
|
||||
}
|
||||
],
|
||||
"attemptToConvertTypes": false,
|
||||
"convertFieldsToString": true
|
||||
},
|
||||
"options": {}
|
||||
},
|
||||
"id": "f93073a3-7753-4ce1-9ef1-2a0c16386543",
|
||||
"name": "Power Manager - ensure-on",
|
||||
"type": "n8n-nodes-base.executeWorkflow",
|
||||
"typeVersion": 1.3,
|
||||
"position": [
|
||||
1952,
|
||||
1728
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"authentication": "privateKey",
|
||||
"command": "=ssh vm101 'pi pending-raw {{ $('Gate push').first().json.genome }}'"
|
||||
},
|
||||
"id": "876dbdaf-3620-4c2c-a65b-336f0b11198c",
|
||||
"name": "SSH: pending-raw",
|
||||
"type": "n8n-nodes-base.ssh",
|
||||
"typeVersion": 1,
|
||||
"position": [
|
||||
2176,
|
||||
1728
|
||||
],
|
||||
"credentials": {
|
||||
"sshPrivateKey": {
|
||||
"id": "GJQjKzte7Hjdfz89",
|
||||
"name": "n8n container -> n8n-runner@nexus"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"jsCode": "// Parse pending-raw -> one item per raw, carrying everything run-one-ingest needs.\n// Unsafe filenames (spaces / odd chars) are NOT ingested -> a 'badname' item -> ntfy.\nconst out = String($input.first().json.stdout || '').trim();\nlet d;\ntry {\n d = JSON.parse(out);\n} catch (e) {\n return [{ json: { _kind: 'error', reason: 'pending-raw non parsabile', raw: out.substring(0, 500) } }];\n}\n\nif (!d || typeof d !== 'object') {\n return [{ json: { _kind: 'error', reason: 'pending-raw non è un oggetto JSON', raw: out.substring(0, 500) } }];\n}\n\nconst files = Array.isArray(d.files) ? d.files : [];\nif (files.length === 0) return [];\n\n// Build reason map from detail array\nconst why = {};\nfor (const it of (Array.isArray(d.detail) ? d.detail : [])) {\n if (it && typeof it.path === 'string' && typeof it.reason === 'string') {\n why[it.path] = it.reason;\n }\n}\n\nconst SAFE = /^[A-Za-z0-9._\\/-]+$/;\nconst items = [];\nfor (const raw of files) {\n if (typeof raw !== 'string') {\n items.push({ json: { _kind: 'badname', genome: d.genome, raw: String(raw),\n hint: String(raw).replace(/[^A-Za-z0-9._\\/-]+/g, '-').toLowerCase() || 'invalid' } });\n continue;\n }\n if (SAFE.test(raw)) {\n items.push({ json: { _kind: 'ingest', genome: d.genome, raw,\n mode: 'ingest', feedback_b64: '', reason: why[raw] || 'new', prevPr: '' } });\n } else {\n const hint = raw.replace(/[^A-Za-z0-9._\\/-]+/g, '-').toLowerCase() || 'invalid';\n items.push({ json: { _kind: 'badname', genome: d.genome, raw, hint } });\n }\n}\nreturn items;"
|
||||
},
|
||||
"id": "f5bbbed3-222e-4129-a764-7cf47d69c5ce",
|
||||
"name": "Split raw files",
|
||||
"type": "n8n-nodes-base.code",
|
||||
"typeVersion": 2,
|
||||
"position": [
|
||||
2400,
|
||||
1728
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"conditions": {
|
||||
"options": {
|
||||
"caseSensitive": true,
|
||||
"leftValue": "",
|
||||
"typeValidation": "strict",
|
||||
"version": 2
|
||||
},
|
||||
"conditions": [
|
||||
{
|
||||
"id": "cbacf5d98d594ba5",
|
||||
"leftValue": "={{ $json._kind }}",
|
||||
"rightValue": "ingest",
|
||||
"operator": {
|
||||
"type": "string",
|
||||
"operation": "equals"
|
||||
}
|
||||
}
|
||||
],
|
||||
"combinator": "and"
|
||||
},
|
||||
"options": {}
|
||||
},
|
||||
"id": "5398e2c4-c7ca-4ca4-a2d7-e75077453b7c",
|
||||
"name": "Nome valido?",
|
||||
"type": "n8n-nodes-base.if",
|
||||
"typeVersion": 2.2,
|
||||
"position": [
|
||||
2624,
|
||||
1728
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"workflowId": {
|
||||
"__rl": true,
|
||||
"value": "VIi2ovb5gJxNJLbg",
|
||||
"mode": "list",
|
||||
"cachedResultUrl": "/workflow/VIi2ovb5gJxNJLbg",
|
||||
"cachedResultName": "Genome: run-one-ingest"
|
||||
},
|
||||
"workflowInputs": {
|
||||
"mappingMode": "defineBelow",
|
||||
"value": {
|
||||
"genome": "={{ $json.genome }}",
|
||||
"raw": "={{ $json.raw }}",
|
||||
"mode": "ingest",
|
||||
"feedback_b64": "",
|
||||
"reason": "={{ $json.reason }}",
|
||||
"prevPr": ""
|
||||
},
|
||||
"matchingColumns": [],
|
||||
"schema": [
|
||||
{
|
||||
"id": "genome",
|
||||
"displayName": "genome",
|
||||
"required": false,
|
||||
"defaultMatch": false,
|
||||
"display": true,
|
||||
"canBeUsedToMatch": true,
|
||||
"type": "string",
|
||||
"removed": false
|
||||
},
|
||||
{
|
||||
"id": "raw",
|
||||
"displayName": "raw",
|
||||
"required": false,
|
||||
"defaultMatch": false,
|
||||
"display": true,
|
||||
"canBeUsedToMatch": true,
|
||||
"type": "string",
|
||||
"removed": false
|
||||
},
|
||||
{
|
||||
"id": "mode",
|
||||
"displayName": "mode",
|
||||
"required": false,
|
||||
"defaultMatch": false,
|
||||
"display": true,
|
||||
"canBeUsedToMatch": true,
|
||||
"type": "string",
|
||||
"removed": false
|
||||
},
|
||||
{
|
||||
"id": "feedback_b64",
|
||||
"displayName": "feedback_b64",
|
||||
"required": false,
|
||||
"defaultMatch": false,
|
||||
"display": true,
|
||||
"canBeUsedToMatch": true,
|
||||
"type": "string",
|
||||
"removed": false
|
||||
},
|
||||
{
|
||||
"id": "reason",
|
||||
"displayName": "reason",
|
||||
"required": false,
|
||||
"defaultMatch": false,
|
||||
"display": true,
|
||||
"canBeUsedToMatch": true,
|
||||
"type": "string",
|
||||
"removed": false
|
||||
},
|
||||
{
|
||||
"id": "prevPr",
|
||||
"displayName": "prevPr",
|
||||
"required": false,
|
||||
"defaultMatch": false,
|
||||
"display": true,
|
||||
"canBeUsedToMatch": true,
|
||||
"type": "string",
|
||||
"removed": false
|
||||
}
|
||||
],
|
||||
"attemptToConvertTypes": false,
|
||||
"convertFieldsToString": true
|
||||
},
|
||||
"options": {
|
||||
"waitForSubWorkflow": false
|
||||
}
|
||||
},
|
||||
"id": "0f274662-62bb-448b-ae4b-47e4bbcfd35a",
|
||||
"name": "Run one ingest",
|
||||
"type": "n8n-nodes-base.executeWorkflow",
|
||||
"typeVersion": 1.3,
|
||||
"position": [
|
||||
2832,
|
||||
1616
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"mode": "runOnceForEachItem",
|
||||
"jsCode": "// Build ntfy notification for files with invalid names.\n// Run Once for Each Item: $json is the current badname item.\nconst d = $json || {};\nconst genome = d.genome || 'unknown';\nconst raw = String(d.raw || 'unknown');\nconst hint = String(d.hint || 'unknown');\n\n// Escape backticks to avoid breaking markdown\nconst rawEsc = raw.replace(/`/g, '\\`');\nconst hintEsc = hint.replace(/`/g, '\\`');\n\nreturn {\n topic: 'genome-ingest',\n title: `${genome} · file da rinominare`,\n priority: 'high',\n tags: 'warning',\n click: '',\n actions: '',\n body: `Il file \\`${rawEsc}\\` ha spazi o caratteri non ammessi e **non** è stato ingerito.\\nRinominalo in: \\`${hintEsc}\\``\n};"
|
||||
},
|
||||
"id": "0f785bcd-cdc6-4dac-9ced-1c5cfa3453dc",
|
||||
"name": "Build ntfy badname",
|
||||
"type": "n8n-nodes-base.code",
|
||||
"typeVersion": 2,
|
||||
"position": [
|
||||
2832,
|
||||
1840
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"method": "POST",
|
||||
"url": "=http://ntfy/{{ $json.topic }}",
|
||||
"authentication": "genericCredentialType",
|
||||
"genericAuthType": "httpBearerAuth",
|
||||
"sendHeaders": true,
|
||||
"headerParameters": {
|
||||
"parameters": [
|
||||
{
|
||||
"name": "Title",
|
||||
"value": "={{ $json.title }}"
|
||||
},
|
||||
{
|
||||
"name": "Priority",
|
||||
"value": "={{ $json.priority }}"
|
||||
},
|
||||
{
|
||||
"name": "Tags",
|
||||
"value": "={{ $json.tags }}"
|
||||
},
|
||||
{
|
||||
"name": "Click",
|
||||
"value": "={{ $json.click }}"
|
||||
},
|
||||
{
|
||||
"name": "Actions",
|
||||
"value": "={{ $json.actions }}"
|
||||
},
|
||||
{
|
||||
"name": "Markdown",
|
||||
"value": "yes"
|
||||
}
|
||||
]
|
||||
},
|
||||
"sendBody": true,
|
||||
"contentType": "raw",
|
||||
"rawContentType": "Raw / Text",
|
||||
"body": "={{ $json.body }}",
|
||||
"options": {
|
||||
"timeout": 15000
|
||||
}
|
||||
},
|
||||
"id": "9cd2bde3-6846-4855-ad01-e3a4cdbce208",
|
||||
"name": "ntfy: send",
|
||||
"type": "n8n-nodes-base.httpRequest",
|
||||
"typeVersion": 4.4,
|
||||
"position": [
|
||||
3056,
|
||||
1840
|
||||
],
|
||||
"credentials": {
|
||||
"httpHeaderAuth": {
|
||||
"id": "TBPXSWOF63k9mvm8",
|
||||
"name": "ntfy-token"
|
||||
},
|
||||
"httpBearerAuth": {
|
||||
"id": "nCv4CUN7Ef086Ewj",
|
||||
"name": "Bearer Auth account"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"pinData": {},
|
||||
"connections": {
|
||||
"Webhook": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Gate push",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Gate push": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Power Manager - ensure-on",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Power Manager - ensure-on": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "SSH: pending-raw",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"SSH: pending-raw": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Split raw files",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Split raw files": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Nome valido?",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Nome valido?": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Run one ingest",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"node": "Build ntfy badname",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Build ntfy badname": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "ntfy: send",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
},
|
||||
"active": true,
|
||||
"settings": {
|
||||
"executionOrder": "v1",
|
||||
"binaryMode": "separate",
|
||||
"timeSavedMode": "fixed",
|
||||
"errorWorkflow": "7Vws3gCX3QnjM3oD",
|
||||
"callerPolicy": "workflowsFromSameOwner",
|
||||
"availableInMCP": false
|
||||
},
|
||||
"versionId": "63863925-606f-4200-824c-52f1919f2bb1",
|
||||
"meta": {
|
||||
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
|
||||
},
|
||||
"id": "mUJUuQxcDiiPWcUE",
|
||||
"tags": []
|
||||
}
|
||||
|
|
@ -1,128 +0,0 @@
|
|||
{
|
||||
"name": "Genome: on-error",
|
||||
"nodes": [
|
||||
{
|
||||
"parameters": {},
|
||||
"id": "f715ed51-95e6-475f-8aa5-d0df531cc7cf",
|
||||
"name": "Error Trigger",
|
||||
"type": "n8n-nodes-base.errorTrigger",
|
||||
"typeVersion": 1,
|
||||
"position": [
|
||||
688,
|
||||
-32
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"mode": "runOnceForEachItem",
|
||||
"jsCode": "// Global error handler: set this workflow as the \"Error Workflow\" in each genome workflow's\n// Settings. Catches ANY node failure (SSH down, Forgejo 4xx/5xx, etc.) and notifies once.\n// Run Once for Each Item: $json is the error trigger payload.\nconst e = $json.execution || {};\nconst w = $json.workflow || {};\n\n// Safely extract error message from various shapes\nconst rawMsg = (e.error && (e.error.message || e.error.description)) || 'errore sconosciuto';\nconst msg = String(rawMsg).trim();\n\nconst lastNode = e.lastNodeExecuted ? ` (nodo: ${e.lastNodeExecuted})` : '';\nconst workflowName = w.name || 'n8n';\nconst executionUrl = e.url || '';\n\n// Escape markdown to avoid breaking the notification body\nconst msgEsc = msg.replace(/`/g, '\\`').replace(/\\n/g, '\\n');\n\nreturn {\n topic: 'genome-ingest',\n title: `Workflow KO · ${workflowName}`,\n priority: 'high',\n tags: 'rotating_light',\n click: executionUrl,\n actions: executionUrl ? `view, Apri l'esecuzione, ${executionUrl}` : '',\n body: `**${workflowName}** è fallito${lastNode}.\\n\\n${msgEsc}`\n};"
|
||||
},
|
||||
"id": "dd39bc0f-918a-4645-8f04-540ac9089311",
|
||||
"name": "Build ntfy",
|
||||
"type": "n8n-nodes-base.code",
|
||||
"typeVersion": 2,
|
||||
"position": [
|
||||
928,
|
||||
-32
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"method": "POST",
|
||||
"url": "=http://ntfy/{{ $json.topic }}",
|
||||
"authentication": "genericCredentialType",
|
||||
"genericAuthType": "httpBearerAuth",
|
||||
"sendHeaders": true,
|
||||
"headerParameters": {
|
||||
"parameters": [
|
||||
{
|
||||
"name": "Title",
|
||||
"value": "={{ $json.title }}"
|
||||
},
|
||||
{
|
||||
"name": "Priority",
|
||||
"value": "={{ $json.priority }}"
|
||||
},
|
||||
{
|
||||
"name": "Tags",
|
||||
"value": "={{ $json.tags }}"
|
||||
},
|
||||
{
|
||||
"name": "Click",
|
||||
"value": "={{ $json.click }}"
|
||||
},
|
||||
{
|
||||
"name": "Actions",
|
||||
"value": "={{ $json.actions }}"
|
||||
},
|
||||
{
|
||||
"name": "Markdown",
|
||||
"value": "yes"
|
||||
}
|
||||
]
|
||||
},
|
||||
"sendBody": true,
|
||||
"contentType": "raw",
|
||||
"rawContentType": "Raw / Text",
|
||||
"body": "={{ $json.body }}",
|
||||
"options": {
|
||||
"timeout": 15000
|
||||
}
|
||||
},
|
||||
"id": "a9ee90f3-d7fe-445d-96af-12caef46473f",
|
||||
"name": "ntfy: send",
|
||||
"type": "n8n-nodes-base.httpRequest",
|
||||
"typeVersion": 4.4,
|
||||
"position": [
|
||||
1152,
|
||||
-32
|
||||
],
|
||||
"credentials": {
|
||||
"httpHeaderAuth": {
|
||||
"id": "TBPXSWOF63k9mvm8",
|
||||
"name": "ntfy-token"
|
||||
},
|
||||
"httpBearerAuth": {
|
||||
"id": "nCv4CUN7Ef086Ewj",
|
||||
"name": "Bearer Auth account"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"pinData": {},
|
||||
"connections": {
|
||||
"Error Trigger": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Build ntfy",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Build ntfy": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "ntfy: send",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
},
|
||||
"active": true,
|
||||
"settings": {
|
||||
"executionOrder": "v1",
|
||||
"binaryMode": "separate"
|
||||
},
|
||||
"versionId": "036161c9-c934-474e-9b4f-634259f2a866",
|
||||
"meta": {
|
||||
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
|
||||
},
|
||||
"id": "7Vws3gCX3QnjM3oD",
|
||||
"tags": []
|
||||
}
|
||||
|
|
@ -1,326 +0,0 @@
|
|||
{
|
||||
"name": "Genome: prune",
|
||||
"nodes": [
|
||||
{
|
||||
"parameters": {
|
||||
"httpMethod": "POST",
|
||||
"path": "forgejo-push-prune",
|
||||
"options": {}
|
||||
},
|
||||
"id": "d31388b9-c6d6-4f28-9a6c-b381922bf5e0",
|
||||
"name": "Webhook prune",
|
||||
"type": "n8n-nodes-base.webhook",
|
||||
"typeVersion": 2.1,
|
||||
"position": [
|
||||
1232,
|
||||
-64
|
||||
],
|
||||
"webhookId": "d6ac11900058434e"
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"jsCode": "// Gate: proceed ONLY on develop pushes that REMOVED at least one file under raw/.\n// Additions/modifications are handled by the ingest flow; this flow reacts to deletions only.\nconst item = $input.first().json;\nconst b = item.body || item;\nconst ref = String(b.ref || '');\nconst genome = String((b.repository?.name) || '').toLowerCase().trim();\n\n// Branch filter\nif (ref !== 'refs/heads/develop') return [];\n\n// Genome name validation (DNS-like: lowercase alphanum + hyphen, 1-64 chars)\nif (!/^[a-z0-9][a-z0-9-]{0,63}$/.test(genome)) return [];\n\n// Collect removed paths safely\nconst removed = [];\nfor (const c of (b.commits || [])) {\n if (!c || !Array.isArray(c.removed)) continue;\n for (const p of c.removed) {\n if (typeof p === 'string' && p.startsWith('raw/')) {\n removed.push(p);\n }\n }\n}\n\n// Gate: stop if nothing under raw/ was removed\nif (removed.length === 0) return [];\n\nreturn [{ json: { genome, removedCount: removed.length } }];"
|
||||
},
|
||||
"id": "84848a31-d099-459e-bd03-67abc2cf2b77",
|
||||
"name": "Gate prune",
|
||||
"type": "n8n-nodes-base.code",
|
||||
"typeVersion": 2,
|
||||
"position": [
|
||||
1456,
|
||||
-64
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"workflowId": {
|
||||
"__rl": true,
|
||||
"value": "zbtRXWsLt56nEIfz",
|
||||
"mode": "list",
|
||||
"cachedResultUrl": "/workflow/zbtRXWsLt56nEIfz",
|
||||
"cachedResultName": "Power Manager"
|
||||
},
|
||||
"workflowInputs": {
|
||||
"mappingMode": "defineBelow",
|
||||
"value": {
|
||||
"mode": "ensure-on"
|
||||
},
|
||||
"matchingColumns": [
|
||||
"mode"
|
||||
],
|
||||
"schema": [
|
||||
{
|
||||
"id": "mode",
|
||||
"displayName": "mode",
|
||||
"required": false,
|
||||
"defaultMatch": false,
|
||||
"display": true,
|
||||
"canBeUsedToMatch": true,
|
||||
"type": "string",
|
||||
"removed": false
|
||||
}
|
||||
],
|
||||
"attemptToConvertTypes": false,
|
||||
"convertFieldsToString": true
|
||||
},
|
||||
"options": {}
|
||||
},
|
||||
"id": "175e4191-eb1b-4e5d-8d82-c39205753152",
|
||||
"name": "Power Manager - ensure-on",
|
||||
"type": "n8n-nodes-base.executeWorkflow",
|
||||
"typeVersion": 1.3,
|
||||
"position": [
|
||||
1680,
|
||||
-64
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"authentication": "privateKey",
|
||||
"command": "=ssh vm101 'pi orphan-wiki {{ $('Gate prune').first().json.genome }}'"
|
||||
},
|
||||
"id": "598f20f8-d668-48da-90e3-1bfada3ace92",
|
||||
"name": "SSH: orphan-wiki",
|
||||
"type": "n8n-nodes-base.ssh",
|
||||
"typeVersion": 1,
|
||||
"position": [
|
||||
1904,
|
||||
-64
|
||||
],
|
||||
"credentials": {
|
||||
"sshPrivateKey": {
|
||||
"id": "GJQjKzte7Hjdfz89",
|
||||
"name": "n8n container -> n8n-runner@nexus"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"jsCode": "// Gate: proceed to prune only if orphan-wiki actually found orphans.\n// run-prune re-derives independently anyway (no detected-vs-pruned race);\n// this gate just avoids taking the lock for nothing.\nconst out = String($input.first().json.stdout || '').trim();\nlet d;\n\ntry {\n d = JSON.parse(out);\n} catch (e) {\n // Malformed JSON from orphan-wiki — log and stop\n return [{ json: { _gate: 'parse-error', raw: out.substring(0, 500) } }];\n}\n\n// Strict validation: d must be object with numeric count > 0\nif (!d || typeof d !== 'object' || typeof d.count !== 'number' || d.count <= 0) {\n return []; // 0 orphans or missing count -> stop silently\n}\n\nreturn [{ json: { genome: d.genome, count: d.count } }];"
|
||||
},
|
||||
"id": "3b644d61-26d8-4024-baed-bcb4ad169a6a",
|
||||
"name": "Orfani?",
|
||||
"type": "n8n-nodes-base.code",
|
||||
"typeVersion": 2,
|
||||
"position": [
|
||||
2112,
|
||||
-64
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"authentication": "privateKey",
|
||||
"command": "=ssh vm101 'pi prune {{ $json.genome }}'"
|
||||
},
|
||||
"id": "a8cae2c2-6f2f-4ef6-add9-287195aa84b5",
|
||||
"name": "SSH: prune",
|
||||
"type": "n8n-nodes-base.ssh",
|
||||
"typeVersion": 1,
|
||||
"position": [
|
||||
2336,
|
||||
-64
|
||||
],
|
||||
"credentials": {
|
||||
"sshPrivateKey": {
|
||||
"id": "GJQjKzte7Hjdfz89",
|
||||
"name": "n8n container -> n8n-runner@nexus"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"mode": "runOnceForEachItem",
|
||||
"jsCode": "// Extract the last JSON line from SSH stdout (the command may print logs before/after).\n// Run Once for Each Item: $json is the current SSH result item.\nconst out = String($json.stdout || '').trim();\nconst jsonLines = out\n .split('\\n')\n .map(l => l.trim())\n .filter(l => l.startsWith('{') && l.endsWith('}'));\n\nconst line = jsonLines.pop(); // last JSON object line (command prints JSON last)\n\nlet r;\ntry {\n r = line ? JSON.parse(line) : { status: 'error', reason: 'nessuna riga JSON trovata in stdout' };\n} catch (e) {\n r = { status: 'error', reason: 'JSON non parsabile', rawLine: line?.substring(0, 1000) };\n}\n\n// Ensure consistent shape for downstream nodes\nreturn {\n status: r.status || 'error',\n reason: r.reason || 'errore sconosciuto',\n count: r.count,\n pr_url: r.pr_url,\n genome: r.genome,\n _raw: line?.substring(0, 500)\n};"
|
||||
},
|
||||
"id": "da1ab42c-32e1-4c4d-82a1-925fcee1a098",
|
||||
"name": "Parse prune",
|
||||
"type": "n8n-nodes-base.code",
|
||||
"typeVersion": 2,
|
||||
"position": [
|
||||
2560,
|
||||
-64
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"mode": "runOnceForEachItem",
|
||||
"jsCode": "// Build ntfy notification for genome pruning.\n// Run Once for Each Item: $json is the parsed prune result.\nconst d = $json;\nconst genome = d.genome || 'unknown';\n\nlet n;\nif (d.status === 'ok') {\n const pm = (d.pr_url || '').match(/\\/pulls\\/(\\d+)/);\n const num = pm ? `#${pm[1]}` : '';\n n = {\n topic: 'genome-ingest',\n title: `${genome} \\u00b7 potatura ${num}`.replace(/\\s+/g, ' ').trim(),\n priority: 'default',\n tags: 'broom',\n click: d.pr_url || '',\n actions: d.pr_url ? `view, Apri la PR, ${d.pr_url}` : '',\n body: `${d.count} sorgente/i orfane proposte per la rimozione. **Approva la PR** per potare, oppure chiudila da Forgejo per annullare.`\n };\n} else {\n n = {\n topic: 'genome-ingest',\n title: `${genome} \\u00b7 errore potatura`.trim(),\n priority: 'high',\n tags: 'rotating_light',\n click: '',\n actions: '',\n body: `${d.reason || 'errore sconosciuto durante la potatura'}.`\n };\n}\n\nreturn n;"
|
||||
},
|
||||
"id": "ebe99407-6038-4f8f-a73f-7dc7b0a011e0",
|
||||
"name": "Build ntfy",
|
||||
"type": "n8n-nodes-base.code",
|
||||
"typeVersion": 2,
|
||||
"position": [
|
||||
2784,
|
||||
-64
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"method": "POST",
|
||||
"url": "=http://ntfy/{{ $json.topic }}",
|
||||
"authentication": "genericCredentialType",
|
||||
"genericAuthType": "httpBearerAuth",
|
||||
"sendHeaders": true,
|
||||
"headerParameters": {
|
||||
"parameters": [
|
||||
{
|
||||
"name": "Title",
|
||||
"value": "={{ $json.title }}"
|
||||
},
|
||||
{
|
||||
"name": "Priority",
|
||||
"value": "={{ $json.priority }}"
|
||||
},
|
||||
{
|
||||
"name": "Tags",
|
||||
"value": "={{ $json.tags }}"
|
||||
},
|
||||
{
|
||||
"name": "Click",
|
||||
"value": "={{ $json.click }}"
|
||||
},
|
||||
{
|
||||
"name": "Actions",
|
||||
"value": "={{ $json.actions }}"
|
||||
},
|
||||
{
|
||||
"name": "Markdown",
|
||||
"value": "yes"
|
||||
}
|
||||
]
|
||||
},
|
||||
"sendBody": true,
|
||||
"contentType": "raw",
|
||||
"rawContentType": "Raw / Text",
|
||||
"body": "={{ $json.body }}",
|
||||
"options": {
|
||||
"timeout": 15000
|
||||
}
|
||||
},
|
||||
"id": "0bd3654e-a73d-4c3a-83ed-9f57ca4aad24",
|
||||
"name": "ntfy: send",
|
||||
"type": "n8n-nodes-base.httpRequest",
|
||||
"typeVersion": 4.4,
|
||||
"position": [
|
||||
2992,
|
||||
-64
|
||||
],
|
||||
"credentials": {
|
||||
"httpHeaderAuth": {
|
||||
"id": "TBPXSWOF63k9mvm8",
|
||||
"name": "ntfy-token"
|
||||
},
|
||||
"httpBearerAuth": {
|
||||
"id": "nCv4CUN7Ef086Ewj",
|
||||
"name": "Bearer Auth account"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"pinData": {},
|
||||
"connections": {
|
||||
"Webhook prune": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Gate prune",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Gate prune": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Power Manager - ensure-on",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Power Manager - ensure-on": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "SSH: orphan-wiki",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"SSH: orphan-wiki": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Orfani?",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Orfani?": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "SSH: prune",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"SSH: prune": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Parse prune",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Parse prune": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Build ntfy",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Build ntfy": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "ntfy: send",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
},
|
||||
"active": true,
|
||||
"settings": {
|
||||
"executionOrder": "v1",
|
||||
"binaryMode": "separate",
|
||||
"timeSavedMode": "fixed",
|
||||
"errorWorkflow": "7Vws3gCX3QnjM3oD",
|
||||
"callerPolicy": "workflowsFromSameOwner",
|
||||
"availableInMCP": false
|
||||
},
|
||||
"versionId": "999f640c-aae6-42aa-9a95-aba26987e9d0",
|
||||
"meta": {
|
||||
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
|
||||
},
|
||||
"id": "smH5Qrv7CQnTtdAF",
|
||||
"tags": []
|
||||
}
|
||||
|
|
@ -1,266 +0,0 @@
|
|||
{
|
||||
"name": "Genome: run-one-ingest",
|
||||
"nodes": [
|
||||
{
|
||||
"parameters": {
|
||||
"inputSource": "passthrough"
|
||||
},
|
||||
"id": "b1b7ba8e-1e45-4f76-adc0-089180715975",
|
||||
"name": "On ingest request",
|
||||
"type": "n8n-nodes-base.executeWorkflowTrigger",
|
||||
"typeVersion": 1.1,
|
||||
"position": [
|
||||
224,
|
||||
624
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"mode": "runOnceForEachItem",
|
||||
"jsCode": "// SECURITY chokepoint: every ingest to vm101 passes here. Re-validate inputs (defense in depth:\n// callers + the SSH wrapper also validate) and assemble the exact command. Charset-validated\n// fields are safe inside the single-quoted remote command -> no shell injection.\n// Run Once for Each Item: $json is the current ingest request.\nconst d = $json || {};\nconst genome = String(d.genome || '').toLowerCase().trim();\nconst raw = String(d.raw || '');\nconst mode = String(d.mode || 'ingest');\nconst fb = String(d.feedback_b64 || '');\n\nconst okGenome = /^[a-z0-9][a-z0-9-]{0,63}$/.test(genome);\nconst okMode = (mode === 'ingest' || mode === 'rework');\nconst okRaw = raw.startsWith('raw/') && !raw.includes('..') && /^[A-Za-z0-9._\\/-]+$/.test(raw);\n// feedback_b64 is required only for rework mode; for ingest it can be empty\nconst okFb = (mode === 'ingest') || /^[A-Za-z0-9+/=]+$/.test(fb);\n\nif (!okGenome || !okMode || !okRaw || !okFb) {\n return {\n _ok: false,\n genome,\n mode,\n _reason: `bad input (genome:${okGenome} mode:${okMode} raw:${okRaw} fb:${okFb})`\n };\n}\n\n// Build SSH command: single-quoted remote command prevents shell injection\nconst ssh_cmd = (mode === 'rework')\n ? `ssh vm101 'pi ingest-rework ${genome} ${raw} ${fb}'`\n : `ssh vm101 'pi ingest ${genome} ${raw}'`;\n\nreturn {\n _ok: true,\n ssh_cmd,\n genome,\n raw,\n mode,\n reason: String(d.reason || ''),\n prevPr: String(d.prevPr || '')\n};"
|
||||
},
|
||||
"id": "8e538237-0e0e-4308-b2c8-631a52b31185",
|
||||
"name": "Guard & build cmd",
|
||||
"type": "n8n-nodes-base.code",
|
||||
"typeVersion": 2,
|
||||
"position": [
|
||||
448,
|
||||
624
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"conditions": {
|
||||
"options": {
|
||||
"caseSensitive": true,
|
||||
"typeValidation": "loose",
|
||||
"version": 2
|
||||
},
|
||||
"conditions": [
|
||||
{
|
||||
"id": "4507e3a8b9714c7e",
|
||||
"leftValue": "={{ $json._ok }}",
|
||||
"rightValue": true,
|
||||
"operator": {
|
||||
"type": "boolean",
|
||||
"operation": "true",
|
||||
"singleValue": true
|
||||
}
|
||||
}
|
||||
],
|
||||
"combinator": "and"
|
||||
},
|
||||
"options": {}
|
||||
},
|
||||
"id": "4b249e76-7ab6-4aa3-886d-06b865931cf6",
|
||||
"name": "Input valido?",
|
||||
"type": "n8n-nodes-base.if",
|
||||
"typeVersion": 2.2,
|
||||
"position": [
|
||||
672,
|
||||
624
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"authentication": "privateKey",
|
||||
"command": "={{ $json.ssh_cmd }}"
|
||||
},
|
||||
"id": "8740ae9a-4094-48b2-a9a4-d40d501e09f6",
|
||||
"name": "SSH: ingest",
|
||||
"type": "n8n-nodes-base.ssh",
|
||||
"typeVersion": 1,
|
||||
"position": [
|
||||
880,
|
||||
544
|
||||
],
|
||||
"credentials": {
|
||||
"sshPrivateKey": {
|
||||
"id": "GJQjKzte7Hjdfz89",
|
||||
"name": "n8n container -> n8n-runner@nexus"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"mode": "runOnceForEachItem",
|
||||
"jsCode": "// run-ingest.sh prints one JSON line; the wrapper may instead print {status:busy|error,...}.\n// Take the last {...} line from stdout (logs may precede/follow).\n// Run Once for Each Item: $json is the current SSH result item.\nconst out = String($json.stdout || '').trim();\nconst jsonLines = out\n .split('\\n')\n .map(l => l.trim())\n .filter(l => l.startsWith('{') && l.endsWith('}'));\n\nconst line = jsonLines.pop(); // last JSON object line (command prints JSON last)\n\nlet r;\ntry {\n r = line ? JSON.parse(line) : { status: 'error', reason: 'nessuna riga JSON trovata in stdout', raw: out.substring(0, 500) };\n} catch (e) {\n r = { status: 'error', reason: 'JSON non parsabile', rawLine: line?.substring(0, 1000) };\n}\n\n// Ensure consistent shape for downstream Build ntfy\nreturn {\n status: r.status || 'error',\n reason: r.reason || 'errore sconosciuto',\n pr_url: r.pr_url || '',\n slug: r.slug || '',\n lint_clean: r.lint_clean || false,\n conflict: r.conflict || false,\n stage: r.stage || '',\n detail: r.detail || '',\n log: r.log || '',\n _raw: line?.substring(0, 500)\n};"
|
||||
},
|
||||
"id": "928344e3-0712-42e0-b1a8-f5caff489746",
|
||||
"name": "Parse result",
|
||||
"type": "n8n-nodes-base.code",
|
||||
"typeVersion": 2,
|
||||
"position": [
|
||||
1104,
|
||||
544
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"mode": "runOnceForEachItem",
|
||||
"jsCode": "// One builder for ingest + rework outcomes. Title is plain ASCII; the icon comes from Tags\n// (ntfy shortcodes); navigation is via Click (tap) + Actions (button) so it works on every\n// client.\n// Run Once for Each Item: $json is the current parsed result.\n// We read the original request context from the Guard node (same execution, no executeWorkflow in between).\nconst g = $('Guard & build cmd').item.json || {};\nconst verb = (g.mode === 'rework') ? 'rework' : 'ingest';\nconst d = $json || {};\nconst genome = g.genome || 'unknown';\n\n// Build notification based on status\nlet n;\n\nif (g._ok === false) {\n // Input validation failed (Guard & build cmd rejected it)\n n = {\n title: `Errore ${verb}: input non valido`,\n priority: 'high',\n tags: 'rotating_light',\n click: '',\n actions: '',\n body: `Richiesta di ${verb} rifiutata.\\n${g._reason || 'motivo sconosciuto'}`\n };\n} else if (d.status === 'ok') {\n // Success: PR opened\n const pm = (d.pr_url || '').match(/\\/pulls\\/(\\d+)/);\n const num = pm ? `#${pm[1]}` : '';\n const lint = d.lint_clean ? 'lint pulito' : 'lint con avvisi';\n const conflict = d.conflict ? ' · ⚠️ conflitto da risolvere' : '';\n const prevPr = g.prevPr ? ` · sostituisce #${g.prevPr}` : '';\n const reason = (g.reason && verb === 'ingest') ? ` (${g.reason})` : '';\n\n n = {\n title: `${genome} · ${verb} ${d.slug || ''} ${num}`.replace(/\\s+/g, ' ').trim(),\n priority: d.conflict ? 'high' : 'default',\n tags: d.conflict ? 'warning' : 'white_check_mark',\n click: d.pr_url || '',\n actions: d.pr_url ? `view, Apri la PR, ${d.pr_url}` : '',\n body: `**${d.slug || 'sorgente'}** ${verb === 'rework' ? 'rilavorata' : 'ingerita'}`\n + reason + prevPr\n + `.\\n${lint}${conflict}.`\n };\n} else if (d.status === 'busy') {\n // Another ingest is already running on this genome\n n = {\n title: `${genome} · ${verb} in coda`,\n priority: 'min',\n tags: 'hourglass_flowing_sand',\n click: '',\n actions: '',\n body: `Un altro ingest era in corso su questo genoma. La fonte resta pendente e verrà ripresa al prossimo campanello.`\n };\n} else if (d.status === 'pr_failed') {\n // Semantic/lint ok but PR could not be opened\n const detailLine = String(d.detail || '').split('\\n')[0] || 'dettaglio non disponibile';\n n = {\n title: `${genome} · ${d.slug || ''}: PR non aperta`,\n priority: 'high',\n tags: 'warning',\n click: '',\n actions: '',\n body: `Semantic e lint ok, ma la PR non si è aperta.\\n${detailLine}`\n };\n} else {\n // Generic error (including parse errors)\n const stage = d.stage ? ` (stage: ${d.stage})` : '';\n const log = d.log ? `\\nLog: ${d.log}` : '';\n n = {\n title: `${genome} · errore ${verb}`,\n priority: 'high',\n tags: 'rotating_light',\n click: '',\n actions: '',\n body: `${d.reason || 'errore sconosciuto'}${stage}.${log}`\n };\n}\n\nn.topic = 'genome-ingest';\nreturn n;"
|
||||
},
|
||||
"id": "9062dfba-02ba-4abc-8be6-828c0b353114",
|
||||
"name": "Build ntfy",
|
||||
"type": "n8n-nodes-base.code",
|
||||
"typeVersion": 2,
|
||||
"position": [
|
||||
1328,
|
||||
624
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"method": "POST",
|
||||
"url": "=http://ntfy/{{ $json.topic }}",
|
||||
"authentication": "genericCredentialType",
|
||||
"genericAuthType": "httpBearerAuth",
|
||||
"sendHeaders": true,
|
||||
"headerParameters": {
|
||||
"parameters": [
|
||||
{
|
||||
"name": "Title",
|
||||
"value": "={{ $json.title }}"
|
||||
},
|
||||
{
|
||||
"name": "Priority",
|
||||
"value": "={{ $json.priority }}"
|
||||
},
|
||||
{
|
||||
"name": "Tags",
|
||||
"value": "={{ $json.tags }}"
|
||||
},
|
||||
{
|
||||
"name": "Click",
|
||||
"value": "={{ $json.click }}"
|
||||
},
|
||||
{
|
||||
"name": "Actions",
|
||||
"value": "={{ $json.actions }}"
|
||||
},
|
||||
{
|
||||
"name": "Markdown",
|
||||
"value": "yes"
|
||||
}
|
||||
]
|
||||
},
|
||||
"sendBody": true,
|
||||
"contentType": "raw",
|
||||
"rawContentType": "Raw / Text",
|
||||
"body": "={{ $json.body }}",
|
||||
"options": {
|
||||
"timeout": 15000
|
||||
}
|
||||
},
|
||||
"id": "0c2b4d9b-2700-4815-b47c-8523bc4eb2ff",
|
||||
"name": "ntfy: send",
|
||||
"type": "n8n-nodes-base.httpRequest",
|
||||
"typeVersion": 4.4,
|
||||
"position": [
|
||||
1552,
|
||||
624
|
||||
],
|
||||
"credentials": {
|
||||
"httpHeaderAuth": {
|
||||
"id": "TBPXSWOF63k9mvm8",
|
||||
"name": "ntfy-token"
|
||||
},
|
||||
"httpBearerAuth": {
|
||||
"id": "nCv4CUN7Ef086Ewj",
|
||||
"name": "Bearer Auth account"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"pinData": {},
|
||||
"connections": {
|
||||
"On ingest request": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Guard & build cmd",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Guard & build cmd": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Input valido?",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Input valido?": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "SSH: ingest",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"node": "Build ntfy",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"SSH: ingest": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Parse result",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Parse result": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Build ntfy",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Build ntfy": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "ntfy: send",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
},
|
||||
"active": true,
|
||||
"settings": {
|
||||
"executionOrder": "v1",
|
||||
"binaryMode": "separate",
|
||||
"timeSavedMode": "fixed",
|
||||
"errorWorkflow": "7Vws3gCX3QnjM3oD",
|
||||
"callerPolicy": "workflowsFromSameOwner",
|
||||
"availableInMCP": false
|
||||
},
|
||||
"versionId": "fd8c1cf6-c5df-4074-b777-113349e32a03",
|
||||
"meta": {
|
||||
"instanceId": "96b2f0ec76a4400bbd481c617b24b3b87024cc7a913efacccaf9fc85722e7417"
|
||||
},
|
||||
"id": "VIi2ovb5gJxNJLbg",
|
||||
"tags": []
|
||||
}
|
||||
|
|
@ -1,81 +0,0 @@
|
|||
# Componenti di Sistema — Gestione Sincronizzazione e Automazione Genoma
|
||||
|
||||
Questo modulo contiene gli script di backend che vengono installati sul server `nexus` per gestire il ciclo di vita dei vault locali (scratch di lavoro), l'integrazione con Syncthing e l'autocommit dei file grezzi (`raw/`) provenienti dai dispositivi mobili o desktop (es. Obsidian).
|
||||
|
||||
## Architettura dei File di Sistema
|
||||
|
||||
Gli script sono progettati per girare in un ambiente multi-utente protetto, dove l'istanza globale di `n8n` (tramite l'utente di sistema `n8n-runner`) pilota le operazioni senza possedere i diritti di lettura/scrittura diretti sui file del genoma o sui segreti di configurazione.
|
||||
|
||||
### 1. Posizionamento e Permessi degli Script
|
||||
|
||||
I file inclusi in questa cartella devono essere installati sul server di produzione nella directory `/usr/local/bin/` con privilegi di esecuzione globali, ma modificabili solo da `root`.
|
||||
|
||||
- **Destinazione:** `/usr/local/bin/`
|
||||
- **Proprietario (Owner):** `root:root`
|
||||
- **Permessi (Chmod):** `0755` (`-rwxr-xr-x`)
|
||||
|
||||
#### Elenco degli Script:
|
||||
|
||||
- `ensure-genome-vault`: Script idempotente che inizializza o riallinea il vault locale clonandolo da Forgejo (in loopback) sul branch `develop`, configura gli `.stignore` ed effettua il provisioning automatico della cartella condivisa su Syncthing via API.
|
||||
- `genome-askpass`: Helper di autenticazione per Git (`GIT_ASKPASS`). Intercetta le richieste di credenziali di Git durante i cloni e i push HTTP su Forgejo, iniettando l'utente e il token applicativo senza esporli nei log di sistema o negli argomenti dei processi.
|
||||
- `genome-raw-commit`: Script di polling periodico invocato da n8n. Isola i file modificati nella cartella `raw/`, interroga Syncthing per capire quale dispositivo (e quindi quale autore umano) ha generato la modifica, crea commit atomici attribuiti al singolo autore e pusha le modifiche su Forgejo (`develop`).
|
||||
|
||||
---
|
||||
|
||||
## Modello di Sicurezza e Visibilità
|
||||
|
||||
Per garantire l'isolamento del sistema operativo, l'infrastruttura si basa su tre livelli di confinamento:
|
||||
|
||||
### A. Variabili d'Ambiente Protette (`.env`)
|
||||
|
||||
Le credenziali (Token Forgejo, API Key Syncthing) risiedono nella Home dell'utente operativo del servizio (`homelab`) e sono completamente invisibili a n8n e ad altri utenti del sistema.
|
||||
|
||||
- **Path:** `/home/homelab/.config/knowledge-genome.env`
|
||||
- **Permessi:** `0600` (`-rw-------`), di proprietà esclusiva di `homelab:homelab`.
|
||||
|
||||
#### env
|
||||
|
||||
Nella cartella `~/.config/knowledge-genome.env`.
|
||||
|
||||
```text
|
||||
# knowledge-genome.env Configuration Profile
|
||||
# Requirements: Must be owned by the service user with 0600 permissions.
|
||||
|
||||
# Vault path and operational branch
|
||||
GENOME_VAULTS_ROOT=/srv/genome-vaults
|
||||
GENOME_BASE=develop
|
||||
|
||||
# Forgejo Target Instance
|
||||
# Replace 127.0.0.1 with vm101 IP if Forgejo is hosted on the virtual machine
|
||||
FORGEJO_HOST=127.0.0.1:3001
|
||||
FORGEJO_OWNER=Keru
|
||||
FORGEJO_USER=n8n-bot
|
||||
FORGEJO_TOKEN="............"
|
||||
|
||||
# Git Commit Identity
|
||||
COMMITTER_NAME=n8n-bot
|
||||
COMMITTER_EMAIL=n8n-bot@homelab
|
||||
DEFAULT_AUTHOR_NAME="Matteo Cherubini"
|
||||
DEFAULT_AUTHOR_EMAIL=matteo@keruhomelab.com
|
||||
|
||||
# Syncthing Target Instance
|
||||
# Replace 127.0.0.1 with vm101 IP if Syncthing API is hosted on the virtual machine
|
||||
SYNCTHING_URL=http://127.0.0.1:8384
|
||||
SYNCTHING_API_KEY="............"
|
||||
```
|
||||
|
||||
### B. Confine dei Privilegi in Sudoers
|
||||
|
||||
L'utente di automazione `n8n-runner` (usato dall'agente SSH di n8n) non ha accesso alla shell e non può invocare comandi arbitrari. Può unicamente chiamare i due script principali impersonando l'utente `homelab` senza l'inserimento della password.
|
||||
|
||||
Configurazione da applicare in `/etc/sudoers.d/n8n-genome` (con permessi rigorosi `0440`):
|
||||
|
||||
```text
|
||||
n8n-runner ALL=(homelab) NOPASSWD: /usr/local/bin/ensure-genome-vault, /usr/local/bin/genome-raw-commit
|
||||
```
|
||||
|
||||
### C. Directory dei Vault
|
||||
|
||||
I dati veri e propri sincronizzati da Syncthing risiedono isolati in `/srv/genome-vaults/`.
|
||||
|
||||
- **Proprietario**: homelab:homelab (UID/GID 1000), permettendo la convivenza nativa e fluida tra il demone Syncthing in esecuzione nel container e gli script Git locali.
|
||||
|
|
@ -1,126 +0,0 @@
|
|||
#!/bin/bash
|
||||
# ensure-genome-vault <genome> [--status-only]
|
||||
#
|
||||
# Idempotent, unified command for managing genome vaults.
|
||||
# Called by n8n during genome creation and as a safety net mechanism.
|
||||
#
|
||||
# Operation workflow:
|
||||
# - Vault absent -> Clone from Forgejo (loopback) + track develop branch
|
||||
# - Vault present -> Realign to origin/develop (treated as a rebuildable scratchpad)
|
||||
# - Post-clone/fetch -> Write raw/.stignore and register/update the Syncthing folder.
|
||||
#
|
||||
# Source of truth is Forgejo. Vaults are scratch spaces and not backed up directly.
|
||||
# All operations run locally via loopback.
|
||||
|
||||
set -euo pipefail
|
||||
genome="${1:?usage: ensure-genome-vault <genome> [--status-only]}"
|
||||
mode="${2:-}"
|
||||
|
||||
# Slug validation inside the script to prevent path/URL traversal:
|
||||
# Lowercase kebab-case, no '/', '..', or spaces.
|
||||
[[ "$genome" =~ ^[a-z0-9][a-z0-9-]{0,63}$ ]] || { echo '{"status":"error","reason":"invalid genome name"}'; exit 1; }
|
||||
|
||||
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
|
||||
: "${GENOME_VAULTS_ROOT:=/srv/genome-vaults}"
|
||||
: "${GENOME_BASE:=develop}"
|
||||
: "${FORGEJO_USER:=n8n-bot}"
|
||||
: "${FORGEJO_HOST:=127.0.0.1:3001}"
|
||||
: "${FORGEJO_OWNER:=Keru}"
|
||||
: "${SYNCTHING_URL:=http://127.0.0.1:8384}"
|
||||
|
||||
vault="${GENOME_VAULTS_ROOT}/${genome}"
|
||||
fid="${genome}-public"
|
||||
clone_url="http://${FORGEJO_USER}@${FORGEJO_HOST}/${FORGEJO_OWNER}/${genome}.git"
|
||||
export GIT_ASKPASS=/usr/local/bin/genome-askpass # Provides the n8n-bot token
|
||||
|
||||
mkdir -p "$GENOME_VAULTS_ROOT"
|
||||
|
||||
# ── 1. Clone (if missing) or realign (if present) ────────────────────────────
|
||||
if [[ ! -d "${vault}/.git" ]]; then
|
||||
[[ "$mode" == "--status-only" ]] && { printf '{"status":"absent","genome":"%s"}\n' "$genome"; exit 0; }
|
||||
git clone -q "$clone_url" "$vault"
|
||||
cd "$vault"
|
||||
if git show-ref --verify --quiet "refs/remotes/origin/${GENOME_BASE}"; then
|
||||
git switch -q -c "$GENOME_BASE" --track "origin/${GENOME_BASE}" 2>/dev/null || git switch -q "$GENOME_BASE"
|
||||
else
|
||||
# develop does not exist on remote yet: create it from current base and publish
|
||||
git switch -q -c "$GENOME_BASE"
|
||||
git push -q "$clone_url" "${GENOME_BASE}:${GENOME_BASE}"
|
||||
fi
|
||||
state="cloned"
|
||||
else
|
||||
cd "$vault"
|
||||
if [[ "$mode" == "--status-only" ]]; then
|
||||
printf '{"status":"present","genome":"%s","head":"%s"}\n' "$genome" "$(git rev-parse --short HEAD)"
|
||||
exit 0
|
||||
fi
|
||||
git fetch -q origin
|
||||
if git show-ref --verify --quiet "refs/remotes/origin/${GENOME_BASE}"; then
|
||||
git switch -q "$GENOME_BASE" 2>/dev/null || git switch -q -c "$GENOME_BASE" --track "origin/${GENOME_BASE}"
|
||||
# GUARD: hard reset is allowed ONLY if the working tree is clean.
|
||||
# If Syncthing has already written uncommitted raw files, DO NOT destroy them: soft fast-forward.
|
||||
if [[ -z "$(git status --porcelain -- raw/ 2>/dev/null)" ]]; then
|
||||
git reset -q --hard "origin/${GENOME_BASE}"
|
||||
state="realigned"
|
||||
else
|
||||
git merge -q --ff-only "origin/${GENOME_BASE}" 2>/dev/null || true
|
||||
state="realigned-kept-dirty"
|
||||
fi
|
||||
else
|
||||
git switch -q -c "$GENOME_BASE" 2>/dev/null || true
|
||||
git push -q "$clone_url" "${GENOME_BASE}:${GENOME_BASE}"
|
||||
state="base-created"
|
||||
fi
|
||||
fi
|
||||
|
||||
# ── 2. raw/.stignore + exclusion from git (infrastructure, not content) ────────────
|
||||
mkdir -p "${vault}/raw"
|
||||
cat > "${vault}/raw/.stignore" <<'EOF'
|
||||
// Knowledge Genome — Syncthing exclusions for raw/
|
||||
// NEVER unencrypted private data: git-crypt protects INSIDE the repo, not in Syncthing transit
|
||||
private
|
||||
// Obsidian / editor noise
|
||||
.obsidian
|
||||
.trash
|
||||
*.tmp
|
||||
workspace*.json
|
||||
// security
|
||||
.git
|
||||
EOF
|
||||
# .stignore must not be included in genome commits
|
||||
grep -qxF 'raw/.stignore' "${vault}/.git/info/exclude" 2>/dev/null \
|
||||
|| echo 'raw/.stignore' >> "${vault}/.git/info/exclude"
|
||||
|
||||
# Syncthing folder marker: must exist on disk (locally, NOT on Git).
|
||||
# Without it, Syncthing refuses to scan (“folder marker missing”).
|
||||
mkdir -p "${vault}/raw/.stfolder"
|
||||
|
||||
# .stfolder must not be included in genome commits
|
||||
grep -qxF 'raw/.stfolder' "${vault}/.git/info/exclude" 2>/dev/null \
|
||||
|| echo 'raw/.stfolder' >> "${vault}/.git/info/exclude"
|
||||
|
||||
# ── 3. Idempotent Syncthing folder configuration (best-effort, does not block the vault) ────────
|
||||
folder_state="skipped(no api key)"
|
||||
if [[ -n "${SYNCTHING_API_KEY:-}" ]]; then
|
||||
if curl -fsS -o /dev/null -H "X-API-Key: ${SYNCTHING_API_KEY}" \
|
||||
"${SYNCTHING_URL}/rest/config/folders/${fid}" 2>/dev/null; then
|
||||
folder_state="exists"
|
||||
else
|
||||
body="$(curl -fsS -H "X-API-Key: ${SYNCTHING_API_KEY}" \
|
||||
"${SYNCTHING_URL}/rest/config/defaults/folder" \
|
||||
| jq --arg id "$fid" --arg label "${genome} (raw public)" --arg path "${vault}/raw" \
|
||||
'.id=$id | .label=$label | .path=$path | .type="sendreceive"
|
||||
| .fsWatcherEnabled=true | .rescanIntervalS=3600')"
|
||||
|
||||
if curl -fsS -o /dev/null -X PUT \
|
||||
-H "X-API-Key: ${SYNCTHING_API_KEY}" -H "Content-Type: application/json" \
|
||||
-d "$body" "${SYNCTHING_URL}/rest/config/folders/${fid}" 2>/dev/null; then
|
||||
folder_state="created"
|
||||
else
|
||||
folder_state="error(check syncthing api)"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
printf '{"status":"ok","genome":"%s","vault":"%s","state":"%s","syncthing_folder":"%s"}\n' \
|
||||
"$genome" "$vault" "$state" "$folder_state"
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# GIT_ASKPASS helper for Forgejo HTTP authentication.
|
||||
# Git invokes this script when it needs a username or password.
|
||||
#
|
||||
|
||||
set -eu
|
||||
|
||||
# Load environment variables
|
||||
. "${HOME}/.config/knowledge-genome.env"
|
||||
|
||||
case "${1:-}" in
|
||||
*[Uu]sername*)
|
||||
printf '%s\n' "${FORGEJO_USER:-n8n-bot}"
|
||||
;;
|
||||
*)
|
||||
printf '%s\n' "${FORGEJO_TOKEN:?FORGEJO_TOKEN not set}"
|
||||
;;
|
||||
esac
|
||||
|
|
@ -1,155 +0,0 @@
|
|||
#!/bin/bash
|
||||
# genome-raw-commit <genome>
|
||||
#
|
||||
# Commit the raw files that Syncthing has placed in the vault and push them to origin/<base>.
|
||||
# - Committer = n8n-bot (sole pusher); Author = the person who wrote it (Syncthing modifiedBy -> .authors.json)
|
||||
# - One commit per author (single-device => one commit). No-op if there is nothing.
|
||||
# - JSON output built with jq (safe escaping), with a `files` array:
|
||||
# for each raw -> file, author, local_path, local_url (file://), remote_url (Forgejo web).
|
||||
set -euo pipefail
|
||||
genome="${1:?usage: genome-raw-commit <genome>}"
|
||||
|
||||
# Input validation to prevent path or URL traversal inside the script
|
||||
[[ "$genome" =~ ^[a-z0-9][a-z0-9-]{0,63}$ ]] || { echo '{"status":"error","reason":"invalid genome name"}'; exit 1; }
|
||||
|
||||
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
|
||||
: "${GENOME_VAULTS_ROOT:=/srv/genome-vaults}"
|
||||
: "${GENOME_BASE:=develop}"
|
||||
: "${FORGEJO_USER:=n8n-bot}"
|
||||
: "${FORGEJO_HOST:=127.0.0.1:3001}"
|
||||
: "${FORGEJO_OWNER:=Keru}"
|
||||
: "${FORGEJO_WEB_BASE:=https://git.keruhomelab.com}" # human-facing URL for remote links (not the loopback)
|
||||
: "${SYNCTHING_URL:=http://127.0.0.1:8384}"
|
||||
: "${COMMITTER_NAME:=n8n-bot}"
|
||||
: "${COMMITTER_EMAIL:=n8n-bot@homelab}"
|
||||
: "${DEFAULT_AUTHOR_NAME:=Unknown}"
|
||||
: "${DEFAULT_AUTHOR_EMAIL:=unknown@syncthing}"
|
||||
|
||||
vault="${GENOME_VAULTS_ROOT}/${genome}"
|
||||
fid="${genome}-public"
|
||||
authors_map="${GENOME_VAULTS_ROOT}/.authors.json"
|
||||
# GENOME_PUSH_URL is a test seam: defaults to the Forgejo loopback URL in production.
|
||||
clone_url="${GENOME_PUSH_URL:-http://${FORGEJO_USER}@${FORGEJO_HOST}/${FORGEJO_OWNER}/${genome}.git}"
|
||||
export GIT_ASKPASS=/usr/local/bin/genome-askpass
|
||||
|
||||
[[ -d "${vault}/.git" ]] || { printf '{"status":"error","reason":"vault absent","genome":"%s"}\n' "$genome"; exit 1; }
|
||||
cd "$vault"
|
||||
git config user.name "$COMMITTER_NAME"
|
||||
git config user.email "$COMMITTER_EMAIL"
|
||||
git config commit.gpgsign false
|
||||
|
||||
grep -qxF 'raw/.stignore' "${vault}/.git/info/exclude" 2>/dev/null || echo 'raw/.stignore' >> "${vault}/.git/info/exclude"
|
||||
grep -qxF 'raw/.stfolder' "${vault}/.git/info/exclude" 2>/dev/null || echo 'raw/.stfolder' >> "${vault}/.git/info/exclude"
|
||||
|
||||
git add -A -- raw/
|
||||
git reset -q -- raw/.stignore raw/.stfolder 2>/dev/null || true
|
||||
|
||||
# --- Quiet window: only commit raw files that have STOPPED changing. ----------------
|
||||
# While a note is being written (Obsidian autosave -> Syncthing -> here) its mtime stays
|
||||
# fresh; we leave it UNSTAGED so a half-written note never triggers an ingest. A file is
|
||||
# committed only after it has been still for RAW_QUIET_MINUTES. Deletions (nothing on disk)
|
||||
# are stable by definition and pass straight through. Deterministic — no model in the loop.
|
||||
quiet_min="${RAW_QUIET_MINUTES:-2}"
|
||||
held=0
|
||||
while IFS= read -r f; do
|
||||
[[ -z "$f" ]] && continue
|
||||
# Only an existing file can be "hot"; a staged deletion has nothing on disk to settle.
|
||||
if [[ -e "$f" && -n "$(find "$f" -mmin -"$quiet_min" 2>/dev/null)" ]]; then
|
||||
git reset -q -- "$f" 2>/dev/null || true
|
||||
held=$((held+1))
|
||||
fi
|
||||
done < <(git diff --cached --name-only -- raw/)
|
||||
|
||||
if git diff --cached --quiet; then
|
||||
if [[ "$held" -gt 0 ]]; then
|
||||
printf '{"status":"noop","reason":"raw still settling","genome":"%s","held":%d,"quiet_minutes":%d}\n' \
|
||||
"$genome" "$held" "$quiet_min"
|
||||
else
|
||||
printf '{"status":"noop","genome":"%s"}\n' "$genome"
|
||||
fi
|
||||
exit 0
|
||||
fi
|
||||
|
||||
resolve_dev() { # $1 = path relative to the vault (raw/...) -> prints the short device id, or empty
|
||||
[[ -z "${SYNCTHING_API_KEY:-}" ]] && return 0
|
||||
curl -fsS -H "X-API-Key: ${SYNCTHING_API_KEY}" --get "${SYNCTHING_URL}/rest/db/file" \
|
||||
--data-urlencode "folder=${fid}" --data-urlencode "file=${1#raw/}" 2>/dev/null \
|
||||
| jq -r '.local.modifiedBy // empty' 2>/dev/null || true
|
||||
}
|
||||
author_for_dev() { # $1 = device id -> prints "name\temail"
|
||||
local dev="$1" name="$DEFAULT_AUTHOR_NAME" email="$DEFAULT_AUTHOR_EMAIL"
|
||||
if [[ -n "$dev" && -f "$authors_map" ]] && jq -e --arg d "$dev" '.[$d]' "$authors_map" >/dev/null 2>&1; then
|
||||
name="$(jq -r --arg d "$dev" '.[$d].name' "$authors_map")"
|
||||
email="$(jq -r --arg d "$dev" '.[$d].email' "$authors_map")"
|
||||
fi
|
||||
printf '%s\t%s' "$name" "$email"
|
||||
}
|
||||
|
||||
# Collect per-file (relpath, author) and group by author for committing
|
||||
declare -A G_FILES G_NAME G_EMAIL
|
||||
declare -a ROWS
|
||||
while IFS= read -r f; do
|
||||
[[ -z "$f" ]] && continue
|
||||
dev="$(resolve_dev "$f")"
|
||||
IFS=$'\t' read -r aname aemail <<< "$(author_for_dev "$dev")"
|
||||
ROWS+=("${f}"$'\t'"${aname}")
|
||||
key="${aname} <${aemail}>"
|
||||
G_FILES["$key"]+="${f}"$'\n'
|
||||
G_NAME["$key"]="$aname"; G_EMAIL["$key"]="$aemail"
|
||||
done < <(git diff --cached --name-only -- raw/)
|
||||
|
||||
ts="$(date +%Y-%m-%dT%H:%M:%S%z)"
|
||||
commits=0; summary=""
|
||||
for key in "${!G_FILES[@]}"; do
|
||||
mapfile -t files < <(printf '%s' "${G_FILES[$key]}")
|
||||
short="$(printf '%s\n' "${files[@]}" | sed 's#^raw/##' | paste -sd, -)"
|
||||
msg="$(printf 'raw(%s): sync %s\n\nAdded-by: %s\nSource: syncthing-autocommit\nSynced-at: %s\n' \
|
||||
"$genome" "$short" "${G_NAME[$key]}" "$ts")"
|
||||
git commit -q --author="$key" -m "$msg" -- "${files[@]}"
|
||||
commits=$((commits+1))
|
||||
summary="${summary}${summary:+; }${G_NAME[$key]}:${short}"
|
||||
done
|
||||
|
||||
# Push to origin/<base>. The vault is SCRATCH, so we never do an interactive rebase
|
||||
# (which can conflict when the same raw file is edited repeatedly). Strategy:
|
||||
# try a fast-forward push; if origin moved, re-apply our raw changes on top of a
|
||||
# fresh origin/<base> and push again. Deterministic, conflict-free.
|
||||
git fetch -q origin
|
||||
if ! git push -q "$clone_url" "HEAD:${GENOME_BASE}" 2>/dev/null; then
|
||||
# origin advanced: capture our just-made tree for raw/, realign hard, re-apply, retry once.
|
||||
tmp="$(mktemp -d)"
|
||||
cp -a raw/. "$tmp"/ 2>/dev/null || true
|
||||
git reset -q --hard "origin/${GENOME_BASE}"
|
||||
git clean -q -fd
|
||||
cp -a "$tmp"/. raw/ 2>/dev/null || true
|
||||
rm -rf "$tmp"
|
||||
git add -A -- raw/
|
||||
git reset -q -- raw/.stignore raw/.stfolder 2>/dev/null || true
|
||||
if git diff --cached --quiet; then
|
||||
# our content already matches origin -> nothing to push, report ok-noop-after-realign
|
||||
printf '{"status":"ok","genome":"%s","base":"%s","commits":0,"head":"%s","summary":"already in sync after realign","files":[]}\n' \
|
||||
"$genome" "$GENOME_BASE" "$(git rev-parse --short HEAD)"
|
||||
exit 0
|
||||
fi
|
||||
git commit -q --author="${DEFAULT_AUTHOR_NAME} <${DEFAULT_AUTHOR_EMAIL}>" \
|
||||
-m "raw(${genome}): re-apply after realign" -- raw/ || true
|
||||
git push -q "$clone_url" "HEAD:${GENOME_BASE}" \
|
||||
|| { printf '{"status":"error","reason":"push-failed-after-realign","genome":"%s"}\n' "$genome"; exit 1; }
|
||||
fi
|
||||
head="$(git rev-parse --short HEAD)"
|
||||
|
||||
# `files` array: local (file://) and remote (Forgejo web) link for each committed raw
|
||||
files_json="$(
|
||||
for row in "${ROWS[@]}"; do
|
||||
IFS=$'\t' read -r rel aname <<< "$row"
|
||||
jq -n --arg file "$rel" --arg author "$aname" \
|
||||
--arg lpath "${vault}/${rel}" \
|
||||
--arg lurl "file://${vault}/${rel}" \
|
||||
--arg rurl "${FORGEJO_WEB_BASE}/${FORGEJO_OWNER}/${genome}/src/branch/${GENOME_BASE}/${rel}" \
|
||||
'{file:$file, author:$author, local_path:$lpath, local_url:$lurl, remote_url:$rurl}'
|
||||
done | jq -s '.'
|
||||
)"
|
||||
|
||||
jq -n --arg genome "$genome" --arg base "$GENOME_BASE" --argjson commits "$commits" \
|
||||
--arg head "$head" --arg summary "$summary" --argjson files "$files_json" \
|
||||
'{status:"ok", genome:$genome, base:$base, commits:$commits, head:$head, summary:$summary, files:$files}'
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
# deploy/vm101
|
||||
|
||||
System artifacts deployed to **vm101** (the GPU ingest node). The repo is the
|
||||
source of truth; the live copies live in `/usr/local/bin/`. Edit here, then
|
||||
`sudo ./install.sh` on vm101 to push changes.
|
||||
|
||||
## Contents
|
||||
|
||||
- `n8n-pi-wrap` — forced-command wrapper that fronts every n8n→vm101 SSH call.
|
||||
- `install.sh` — installs the wrapper(s) into `/usr/local/bin` (idempotent).
|
||||
|
||||
## n8n-pi-wrap
|
||||
|
||||
The only entry point for the `n8n-runner` identity onto vm101. n8n never gets a
|
||||
shell here: whatever it sends arrives as `SSH_ORIGINAL_COMMAND`, and a `case`
|
||||
whitelist decides what runs. Anything outside the whitelist is denied and logged.
|
||||
|
||||
Allowed commands:
|
||||
|
||||
| Command | What it does |
|
||||
|---|---|
|
||||
| `pi run` | one-shot prompt via stdin (proof-of-life / health) |
|
||||
| `pi ingest <genome> <raw_path>` | the real two-phase ingest (below) |
|
||||
| `ollama list` / `ollama ps` | model introspection |
|
||||
|
||||
### The two-phase ingest
|
||||
|
||||
`pi ingest` runs the clean-start + two phases, then stops:
|
||||
|
||||
1. **Clean start** — `git fetch && switch <INGEST_BASE> && reset --hard origin/<base>`.
|
||||
Destroys only vm101's *scratch* checkout (never a shared branch, never a
|
||||
force-push) — this determinism is by design.
|
||||
2. **Semantic** — `skills/ingest/scripts/ingest-semantic.py <genome> <raw_path>`
|
||||
drives `pi` to WRITE `wiki/*` pages + `.ingest-manifest.json`.
|
||||
NOTE: this is the script, NOT `pi -p "/skill:ingest ..."` (that form makes the
|
||||
model reply in chat and write nothing — the classic "manifest not found" trap).
|
||||
3. **Mechanical** — `skills/ingest/scripts/run-ingest.sh <genome>` validates the
|
||||
manifest, then index/log/scoped-lint/commit on `feat/ai-ingest-<slug>` and opens
|
||||
a PR onto `<INGEST_BASE>`. Emits one JSON line `{status,slug,pr_url,...}`.
|
||||
|
||||
The PR then waits for the human gate. One raw per session, sequential.
|
||||
|
||||
### Input hardening
|
||||
|
||||
Both inputs come from `SSH_ORIGINAL_COMMAND`, so both are validated:
|
||||
|
||||
- `genome` — kebab lowercase `^[a-z0-9-]+$`.
|
||||
- `raw_path` — must be under `raw/`, no `..` traversal, restricted charset
|
||||
`[A-Za-z0-9._/-]`, and the file must exist. Rejected paths return a JSON error.
|
||||
|
||||
Config (`INGEST_BASE`, `GENOMES_ROOT`, `INGEST_MODEL`, Forgejo token) is sourced
|
||||
from `~/.config/knowledge-genome.env` (0600, owner-only).
|
||||
|
||||
## Install / update
|
||||
|
||||
```bash
|
||||
# on vm101
|
||||
cd ~/knowledge-genome-orchestrator/deploy/vm101
|
||||
sudo ./install.sh
|
||||
```
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
#!/bin/bash
|
||||
# deploy/vm101/install.sh — install vm101 wrappers from repo -> /usr/local/bin (idempotent).
|
||||
# Run ON vm101 with sudo: sudo ./install.sh
|
||||
set -euo pipefail
|
||||
here="$(cd "$(dirname "$0")" && pwd)"
|
||||
install -m 0755 "${here}/n8n-pi-wrap" /usr/local/bin/n8n-pi-wrap
|
||||
echo "installed: /usr/local/bin/n8n-pi-wrap"
|
||||
bash -n /usr/local/bin/n8n-pi-wrap && echo "syntax: ok"
|
||||
|
|
@ -1,196 +0,0 @@
|
|||
#!/bin/bash
|
||||
set -eu
|
||||
cmd="${SSH_ORIGINAL_COMMAND:-}"
|
||||
case "$cmd" in
|
||||
"pi pending-raw "*)
|
||||
genome="${cmd#pi pending-raw }"
|
||||
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac
|
||||
logger -t n8n-pi-wrap "ok: pi pending-raw ${genome}"
|
||||
set -a; . "${HOME}/.config/knowledge-genome.env" 2>/dev/null || true; set +a
|
||||
# Run from the DEPLOYED skill dir (same place as ingest-semantic.py / run-ingest.sh on
|
||||
# lines 54/59), so pending-raw.sh resolves its sibling slug.sh via BASH_SOURCE.
|
||||
exec "${HOME}/.pi/agent/skills/ingest/scripts/pending-raw.sh" "$genome"
|
||||
;;
|
||||
"pi orphan-wiki "*)
|
||||
genome="${cmd#pi orphan-wiki }"
|
||||
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac
|
||||
logger -t n8n-pi-wrap "ok: pi orphan-wiki ${genome}"
|
||||
set -a; . "${HOME}/.config/knowledge-genome.env" 2>/dev/null || true; set +a
|
||||
exec "${HOME}/.pi/agent/skills/ingest/scripts/orphan-wiki.sh" "$genome"
|
||||
;;
|
||||
"pi run")
|
||||
logger -t n8n-pi-wrap "ok: pi run (prompt via stdin)"
|
||||
prompt=$(cat)
|
||||
exec /usr/local/bin/pi --no-tools --mode json -p "$prompt" </dev/null
|
||||
;;
|
||||
"pi ingest "*)
|
||||
# Strict positional parse: EXACTLY `pi ingest <genome> <raw_path>` (two tokens).
|
||||
rest="${cmd#pi ingest }"
|
||||
genome="${rest%% *}"
|
||||
raw_path="${rest#* }"
|
||||
# reject: missing second token, or any extra token (a space left in raw_path)
|
||||
if [ "$genome" = "$rest" ] || [ -z "$raw_path" ] || [ "$raw_path" != "${raw_path#* }" ]; then
|
||||
echo '{"status":"error","reason":"usage: pi ingest <genome> <raw_path>"}'; exit 1
|
||||
fi
|
||||
# genome slug: kebab lowercase only
|
||||
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac
|
||||
# raw_path whitelist: MUST live under raw/, no traversal, restricted charset.
|
||||
# - must start with "raw/" - no ".." segment - no absolute path / leading slash
|
||||
# - allowed chars: [A-Za-z0-9._/-] (kebab slugs + subdirs like raw/articles/foo.md)
|
||||
case "$raw_path" in
|
||||
raw/*) : ;;
|
||||
*) echo '{"status":"error","reason":"raw_path must be under raw/"}'; exit 1;;
|
||||
esac
|
||||
case "$raw_path" in
|
||||
*..*|*//*) echo '{"status":"error","reason":"raw_path traversal"}'; exit 1;;
|
||||
esac
|
||||
case "$raw_path" in
|
||||
*[!A-Za-z0-9._/-]*) echo '{"status":"error","reason":"raw_path illegal chars"}'; exit 1;;
|
||||
esac
|
||||
|
||||
logger -t n8n-pi-wrap "ok: pi ingest ${genome} ${raw_path}"
|
||||
|
||||
# Per-genome lock: serialize writes; never two concurrent ingests on the same genome.
|
||||
exec 9>"/run/lock/kg-ingest-${genome}.lock" 2>/dev/null || exec 9>"/tmp/kg-ingest-${genome}.lock"
|
||||
if ! flock -n 9; then
|
||||
echo '{"status":"busy","reason":"another ingest is running for this genome","genome":"'"$genome"'"}'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
|
||||
cd "${GENOMES_ROOT}/${genome}" || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
|
||||
|
||||
# The raw file must actually exist under the genome's raw/ dir.
|
||||
[ -f "$raw_path" ] || { echo '{"status":"error","reason":"raw file not found"}'; exit 1; }
|
||||
|
||||
# Clean start on the configured base (single source of truth in lib/clean-start.sh).
|
||||
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
|
||||
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|
||||
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
|
||||
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
|
||||
|
||||
# SEMANTIC step: dedicated script drives pi to WRITE wiki pages + manifest.
|
||||
# (NOT `pi -p "/skill:ingest ..."`, which makes the model reply in chat and write nothing.)
|
||||
log="$(mktemp -t pi-ingest.XXXXXX.log)"
|
||||
"${HOME}/.pi/agent/skills/ingest/scripts/ingest-semantic.py" "${genome}" "${raw_path}" \
|
||||
>"$log" 2>&1 \
|
||||
|| { echo "{\"status\":\"error\",\"stage\":\"semantic\",\"reason\":\"ingest-semantic failed\",\"log\":\"${log}\"}"; exit 1; }
|
||||
|
||||
# MECHANICAL step: validate manifest -> index/log/scoped-lint/commit/PR -> 1 JSON line
|
||||
exec "${HOME}/.pi/agent/skills/ingest/scripts/run-ingest.sh" "${genome}"
|
||||
;;
|
||||
"pi prune "*)
|
||||
# Pota le source orfane. Stesso lock dell'ingest (serializza le scritture per genoma),
|
||||
# clean_start, poi run-prune.sh (che ri-deriva gli orfani e apre una PR gated).
|
||||
genome="${cmd#pi prune }"
|
||||
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac
|
||||
logger -t n8n-pi-wrap "ok: pi prune ${genome}"
|
||||
|
||||
exec 9>"/run/lock/kg-ingest-${genome}.lock" 2>/dev/null || exec 9>"/tmp/kg-ingest-${genome}.lock"
|
||||
if ! flock -n 9; then
|
||||
echo '{"status":"busy","reason":"another ingest/prune is running for this genome","genome":"'"$genome"'"}'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
|
||||
cd "${GENOMES_ROOT}/${genome}" || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
|
||||
|
||||
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
|
||||
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|
||||
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
|
||||
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
|
||||
|
||||
exec "${HOME}/.pi/agent/skills/ingest/scripts/run-prune.sh" "${genome}"
|
||||
;;
|
||||
"pi ingest-rework "*)
|
||||
# args: <genome> <raw_path> <feedback_base64> (3 token).
|
||||
# Feedback in base64 nell'argv: il nodo SSH di n8n non passa stdin, e cosi' i metacaratteri
|
||||
# della review (apici, newline, $(...)) sono neutralizzati.
|
||||
args="${cmd#pi ingest-rework }"
|
||||
genome="${args%% *}"; tmp="${args#* }"
|
||||
raw_path="${tmp%% *}"; fb_b64="${tmp#* }"
|
||||
if [ "$genome" = "$args" ] || [ "$raw_path" = "$tmp" ] || [ -z "$fb_b64" ]; then
|
||||
echo '{"status":"error","reason":"usage: pi ingest-rework <genome> <raw_path> <feedback_b64>"}'; exit 1
|
||||
fi
|
||||
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome"}'; exit 1;; esac
|
||||
case "$raw_path" in raw/*) : ;; *) echo '{"status":"error","reason":"raw_path must be under raw/"}'; exit 1;; esac
|
||||
case "$raw_path" in *..*|*//*) echo '{"status":"error","reason":"raw_path traversal"}'; exit 1;; esac
|
||||
case "$raw_path" in *[!A-Za-z0-9._/-]*) echo '{"status":"error","reason":"raw_path illegal chars"}'; exit 1;; esac
|
||||
case "$fb_b64" in *[!A-Za-z0-9+/=]*) echo '{"status":"error","reason":"feedback not base64"}'; exit 1;; esac
|
||||
|
||||
logger -t n8n-pi-wrap "ok: pi ingest-rework ${genome} ${raw_path}"
|
||||
feedback="$(printf '%s' "$fb_b64" | base64 -d 2>/dev/null || true)"
|
||||
|
||||
# lock per-genoma: serializza con gli ingest normali
|
||||
exec 9>"/run/lock/kg-ingest-${genome}.lock" 2>/dev/null || exec 9>"/tmp/kg-ingest-${genome}.lock"
|
||||
if ! flock -n 9; then
|
||||
echo '{"status":"busy","reason":"another ingest is running for this genome","genome":"'"$genome"'"}'; exit 0
|
||||
fi
|
||||
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
|
||||
cd "${GENOMES_ROOT}/${genome}" || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
|
||||
[ -f "$raw_path" ] || { echo '{"status":"error","reason":"raw file not found"}'; exit 1; }
|
||||
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
|
||||
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|
||||
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
|
||||
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
|
||||
log="$(mktemp -t pi-rework.XXXXXX.log)"
|
||||
INGEST_FEEDBACK="$feedback" \
|
||||
"${HOME}/.pi/agent/skills/ingest/scripts/ingest-semantic.py" "${genome}" "${raw_path}" \
|
||||
>"$log" 2>&1 \
|
||||
|| { echo "{\"status\":\"error\",\"stage\":\"semantic\",\"reason\":\"rework failed\",\"log\":\"${log}\"}"; exit 1; }
|
||||
exec "${HOME}/.pi/agent/skills/ingest/scripts/run-ingest.sh" "${genome}"
|
||||
;;
|
||||
"pi changed-raw "*)
|
||||
# List raw/ files changed between two commits, one per line (the webhook payload
|
||||
# does NOT include file lists, so vm101's checkout computes the diff itself).
|
||||
rest="${cmd#pi changed-raw }"
|
||||
genome="${rest%% *}"
|
||||
range="${rest#* }"
|
||||
before="${range%% *}"
|
||||
after="${range#* }"
|
||||
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac
|
||||
case "$before$after" in *[!a-f0-9]*|"") echo '{"status":"error","reason":"invalid commit range"}'; exit 1;; esac
|
||||
logger -t n8n-pi-wrap "ok: pi changed-raw ${genome} ${before}..${after}"
|
||||
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
|
||||
cd "${GENOMES_ROOT}/${genome}" 2>/dev/null || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
|
||||
git fetch -q origin
|
||||
# Resolve the diff base robustly:
|
||||
# - before all-zero (brand-new branch) or unreachable (force-push) -> fall back to after~1
|
||||
# - if even after~1 is missing (root commit) -> list all raw files in `after`
|
||||
base="$before"
|
||||
case "$before" in *[!0]*) : ;; *) base="" ;; esac # all-zero -> empty
|
||||
if [ -n "$base" ] && ! git cat-file -e "${base}^{commit}" 2>/dev/null; then base=""; fi
|
||||
if [ -z "$base" ]; then
|
||||
if git cat-file -e "${after}~1^{commit}" 2>/dev/null; then base="${after}~1"; else base=""; fi
|
||||
fi
|
||||
if [ -n "$base" ]; then
|
||||
files="$(git diff --name-only --diff-filter=d "${base}" "${after}" -- raw/ 2>/dev/null \
|
||||
| grep -vE '(^|/)\.st(folder|ignore)' || true)"
|
||||
else
|
||||
# no usable base: enumerate raw files present at `after`
|
||||
files="$(git ls-tree -r --name-only "${after}" -- raw/ 2>/dev/null \
|
||||
| grep -vE '(^|/)\.st(folder|ignore)' || true)"
|
||||
fi
|
||||
# emit a JSON array via jq (safe escaping)
|
||||
printf '%s\n' "$files" | grep -c . >/dev/null 2>&1 || files=""
|
||||
if [ -z "$files" ]; then
|
||||
echo '{"status":"ok","genome":"'"$genome"'","count":0,"files":[]}'
|
||||
else
|
||||
printf '%s\n' "$files" | jq -R . | jq -s \
|
||||
--arg g "$genome" '{status:"ok", genome:$g, count:length, files:.}'
|
||||
fi
|
||||
;;
|
||||
"ollama list")
|
||||
logger -t n8n-pi-wrap "ok: ollama list"
|
||||
exec /usr/local/bin/ollama list
|
||||
;;
|
||||
"ollama ps")
|
||||
logger -t n8n-pi-wrap "ok: ollama ps"
|
||||
exec /usr/local/bin/ollama ps
|
||||
;;
|
||||
*)
|
||||
logger -t n8n-pi-wrap "denied: ${cmd:-<empty>}"
|
||||
echo "unauthorized command" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
|
@ -1,130 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# diagnose-run-ingest.sh
|
||||
# Run from the repo root: bash diagnose-run-ingest.sh
|
||||
# Builds the same fixture the bats test uses and runs run-ingest under `bash -x`
|
||||
# so we can see exactly which command makes it exit non-zero.
|
||||
|
||||
set -uo pipefail
|
||||
|
||||
REPO="$(pwd)"
|
||||
RI="${REPO}/skills/ingest/scripts/run-ingest.sh"
|
||||
|
||||
echo "==================== ENV ===================="
|
||||
echo "bash: $(bash --version | head -1)"
|
||||
echo "git : $(git --version)"
|
||||
echo "jq : $(jq --version 2>/dev/null || echo MISSING)"
|
||||
echo "py : $(python3 --version 2>/dev/null || echo MISSING)"
|
||||
echo
|
||||
|
||||
echo "============ run-ingest.sh on disk ============"
|
||||
if [[ ! -f "$RI" ]]; then echo "NOT FOUND: $RI (run me from the repo root)"; exit 1; fi
|
||||
echo "-- helper invocations (want 'bash ...'): --"
|
||||
grep -nE 'log-append\.sh|scoped-lint\.sh|open-pr\.sh' "$RI"
|
||||
echo "-- result emitter (want 'jq -nc'): --"
|
||||
grep -nE 'jq -nc?|jq -n ' "$RI"
|
||||
echo
|
||||
|
||||
echo "============ build hermetic fixture ============"
|
||||
T="$(mktemp -d)"
|
||||
mkdir -p "$T/nohooks"
|
||||
git init --bare -q "$T/origin.git"
|
||||
g="$T/g"
|
||||
mkdir -p "$g"/{raw/articles,wiki/sources,wiki/entities,wiki/concepts,wiki/queries,wiki/private}
|
||||
|
||||
cat > "$g/wiki/index.md" <<'EOF'
|
||||
---
|
||||
title: "Index"
|
||||
type: index
|
||||
domain: genome-test
|
||||
maturity: stable
|
||||
last_updated: 2026-01-01
|
||||
private: false
|
||||
---
|
||||
|
||||
# Index
|
||||
|
||||
---
|
||||
|
||||
## Sources (`wiki/sources/`)
|
||||
*x*
|
||||
|
||||
|
||||
## Entities (`wiki/entities/`)
|
||||
*x*
|
||||
|
||||
|
||||
## Concepts (`wiki/concepts/`)
|
||||
*x*
|
||||
|
||||
|
||||
## Queries (`wiki/queries/`)
|
||||
*x*
|
||||
|
||||
|
||||
## Conflicts Pending Review (`wiki/queries/conflict-*.md`)
|
||||
*x*
|
||||
EOF
|
||||
|
||||
cat > "$g/wiki/log.md" <<'EOF'
|
||||
---
|
||||
title: "Log"
|
||||
type: log
|
||||
domain: genome-test
|
||||
maturity: stable
|
||||
last_updated: 2026-01-01
|
||||
private: false
|
||||
---
|
||||
|
||||
# Log
|
||||
|
||||
---
|
||||
|
||||
## [2026-01-01] CONFIG | init
|
||||
- run_id: `init`
|
||||
EOF
|
||||
|
||||
echo raw > "$g/raw/articles/test.md"
|
||||
|
||||
(
|
||||
cd "$g"
|
||||
git init -q
|
||||
git config commit.gpgsign false
|
||||
git config core.hooksPath "$T/nohooks"
|
||||
git config user.email t@t
|
||||
git config user.name t
|
||||
git add .
|
||||
git commit -qm init
|
||||
git branch -M main
|
||||
git remote add origin "$T/origin.git"
|
||||
git push -q -u origin main
|
||||
) && echo "fixture commit+push OK" || echo "FIXTURE SETUP FAILED (look above)"
|
||||
|
||||
cat > "$g/wiki/sources/test-source.md" <<'EOF'
|
||||
---
|
||||
title: "Test Source"
|
||||
type: source
|
||||
domain: genome-test
|
||||
tags: [t]
|
||||
maturity: draft
|
||||
last_updated: 2026-06-04
|
||||
private: false
|
||||
---
|
||||
body
|
||||
EOF
|
||||
|
||||
cat > "$g/.ingest-manifest.json" <<'EOF'
|
||||
{ "raw_source":"raw/articles/test.md","model":"m","reasoning":"r","pr_summary":"s","contradictions":"None",
|
||||
"pages":[{"path":"wiki/sources/test-source.md","summary":"a source","maturity":"draft","status":"created"}] }
|
||||
EOF
|
||||
|
||||
echo
|
||||
echo "============ run-ingest (bash -x) ============"
|
||||
cd "$g"
|
||||
export KG_LIB_DIR="${REPO}/lib" FORGEJO_URL=http://x FORGEJO_USER=u FORGEJO_TOKEN=t DRY_RUN=1
|
||||
bash -x "$RI" genome-test >"$T/out.txt" 2>"$T/trace.txt"
|
||||
rc=$?
|
||||
echo "EXIT=$rc"
|
||||
echo "-- run-ingest stdout (final JSON should be here): --"
|
||||
cat "$T/out.txt"
|
||||
echo "-- last 25 lines of the trace (the failing command is near the end): --"
|
||||
tail -n 25 "$T/trace.txt"
|
||||
|
|
@ -9,7 +9,7 @@ PROVIDER=forgejo
|
|||
|
||||
# --- FORGEJO ---
|
||||
FORGEJO_URL=https://git.keruhomelab.com
|
||||
FORGEJO_USER=Keru
|
||||
FORGEJO_USER=keru
|
||||
FORGEJO_SSH_PORT=222
|
||||
|
||||
# --- GITHUB (used when PROVIDER=github) ---
|
||||
|
|
|
|||
|
|
@ -1,18 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# lib/clean-start.sh — single source of truth for the pre-session reset.
|
||||
# Caller must already be INSIDE the genome checkout.
|
||||
# Aligns the working tree to origin/<base>. Never force-pushes a shared branch.
|
||||
# Tolerates a missing remote branch (first-setup scenario).
|
||||
# NOTE: sourced library — no `set -euo pipefail` (would leak into the caller).
|
||||
# =============================================================================
|
||||
|
||||
clean_start() {
|
||||
local base="${INGEST_BASE:-main}"
|
||||
git fetch -q origin || return 1
|
||||
git switch -q "$base" 2>/dev/null || git checkout -q -b "$base" || return 1
|
||||
if git ls-remote --exit-code --heads origin "$base" >/dev/null 2>&1; then
|
||||
git reset -q --hard "origin/${base}" || return 1
|
||||
fi
|
||||
git clean -q -fd || return 1
|
||||
}
|
||||
|
|
@ -27,10 +27,6 @@ check_deps() {
|
|||
if ! command -v bw &>/dev/null; then
|
||||
warn "Optional tool 'bw' (Bitwarden CLI) not found. Vaultwarden integration will be manual."
|
||||
fi
|
||||
|
||||
if ! command -v python3 &>/dev/null; then
|
||||
warn "Optional tool 'python3' not found. Needed for 'make test' and the ingest skill (index-append.py), not for setup."
|
||||
fi
|
||||
}
|
||||
|
||||
check_git_identity() {
|
||||
|
|
|
|||
|
|
@ -21,29 +21,18 @@ gcrypt_export_key() {
|
|||
|
||||
gcrypt_verify() {
|
||||
local genome_name="$1"
|
||||
local key_path="${KEYS_DIR}/${genome_name}.key"
|
||||
|
||||
info "Verifying git-crypt configuration for ${genome_name}..."
|
||||
info "Verifying git-crypt status for ${genome_name}..."
|
||||
git-crypt lock
|
||||
|
||||
# `git-crypt status` reports the CONFIGURED status (from `.gitattributes`), not the
|
||||
# lock/unlock status of the working tree. Encrypted lines have their labels right-aligned
|
||||
# (with leading whitespace), so you CANNOT anchor on `^encrypted`.
|
||||
# We filter by private/ and distinguish “encrypted” from “not encrypted” without
|
||||
# relying on exact spacing.
|
||||
local status_out encrypted_count not_encrypted_count
|
||||
status_out=$(git-crypt status 2>/dev/null || true)
|
||||
encrypted_count=$(printf '%s\n' "$status_out" | grep 'private/' | grep -cE '^[[:space:]]*encrypted:' || true)
|
||||
not_encrypted_count=$(printf '%s\n' "$status_out" | grep 'private/' | grep -cE '^not encrypted:' || true)
|
||||
|
||||
if [[ "$encrypted_count" -gt 0 ]]; then
|
||||
success "Encryption configured: ${encrypted_count} private file(s) under git-crypt."
|
||||
if [[ "$not_encrypted_count" -gt 0 ]]; then
|
||||
warn "${not_encrypted_count} file(s) under private/ are NOT covered by the git-crypt filter — check .gitattributes (leak risk)."
|
||||
fi
|
||||
elif [[ "$not_encrypted_count" -gt 0 ]]; then
|
||||
warn "private/ files exist but none are covered by the git-crypt filter — check the .gitattributes filter (leak risk)."
|
||||
if file "raw/private/.gitkeep" 2>/dev/null | grep -q "data"; then
|
||||
success "Encryption verified: private/ directory is protected."
|
||||
else
|
||||
info "No private/ files present yet — nothing to verify."
|
||||
warn "Encryption check inconclusive. Run 'git-crypt status' manually."
|
||||
fi
|
||||
|
||||
[[ -f "$key_path" ]] && git-crypt unlock "$key_path"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -66,7 +55,7 @@ gcrypt_verify() {
|
|||
#
|
||||
# USAGE:
|
||||
# source lib/git-crypt.sh
|
||||
# cd ~/knowledge-genome-orchestrator/genome-dev
|
||||
# cd ~/knowledge-genome-setup/genome-dev
|
||||
# gcrypt_rotate_key "genome-dev"
|
||||
#
|
||||
# REQUIRES:
|
||||
|
|
@ -118,8 +107,6 @@ gcrypt_rotate_key() {
|
|||
|
||||
# 5. Re-stage private files so they are committed encrypted with the new key
|
||||
local staged=0
|
||||
# compgen -G requires bash 4+ for reliable glob expansion. macOS stock
|
||||
# bash is 3.2; use Homebrew bash (already recommended in README) for rotation.
|
||||
if compgen -G "raw/private/*" > /dev/null 2>&1; then
|
||||
git add raw/private/
|
||||
staged=1
|
||||
|
|
|
|||
133
lib/lint.sh
133
lib/lint.sh
|
|
@ -23,7 +23,7 @@ lint_markdown_file() {
|
|||
|
||||
# 1. Check frontmatter delimiters
|
||||
if [[ $(head -n 1 "$file") != "---" ]]; then
|
||||
error "Missing frontmatter start (---) in: $file"
|
||||
warn "Missing frontmatter start (---) in: $file"
|
||||
errors=$((errors + 1))
|
||||
fi
|
||||
|
||||
|
|
@ -31,14 +31,14 @@ lint_markdown_file() {
|
|||
local mandatory_fields=("title:" "type:" "domain:" "maturity:" "last_updated:")
|
||||
for field in "${mandatory_fields[@]}"; do
|
||||
if ! grep -q "^${field}" "$file"; then
|
||||
error "Missing mandatory field '${field}' in: $file"
|
||||
warn "Missing mandatory field '${field}' in: $file"
|
||||
errors=$((errors + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
# 3. Check domain matches genome name
|
||||
if grep -q "^domain:" "$file" && ! grep -q "^domain: ${genome_name}" "$file"; then
|
||||
error "Domain mismatch in $file (expected '${genome_name}')"
|
||||
warn "Domain mismatch in $file (expected '${genome_name}')"
|
||||
errors=$((errors + 1))
|
||||
fi
|
||||
|
||||
|
|
@ -70,8 +70,8 @@ check_valid_type() {
|
|||
done
|
||||
|
||||
if [[ $valid -eq 0 ]]; then
|
||||
error "Invalid type value '${type_value}' in: $file"
|
||||
error " Valid types: ${VALID_TYPES[*]}"
|
||||
warn "Invalid type value '${type_value}' in: $file"
|
||||
warn " Valid types: ${VALID_TYPES[*]}"
|
||||
return 1
|
||||
fi
|
||||
|
||||
|
|
@ -144,8 +144,8 @@ check_knowledge_decay() {
|
|||
esac
|
||||
|
||||
if [[ $days_old -gt $threshold ]]; then
|
||||
error "STALE: $file"
|
||||
error " maturity: ${maturity} | last_updated: ${last_updated} | ${days_old} days ago (threshold: ${threshold})"
|
||||
warn "STALE: $file"
|
||||
warn " maturity: ${maturity} | last_updated: ${last_updated} | ${days_old} days ago (threshold: ${threshold})"
|
||||
return 1
|
||||
fi
|
||||
|
||||
|
|
@ -190,129 +190,12 @@ check_broken_links() {
|
|||
local links
|
||||
links=$(grep -oE '\[\[[^\]]+' "$file" 2>/dev/null | sed 's/^\[\[//' | cut -d'|' -f1)
|
||||
|
||||
# Cross-genome links (../other-genome/…) are not resolvable from a single
|
||||
# genome checkout and are skipped — they would always fall
|
||||
# through the two-level lookup and produce non-actionable warnings.
|
||||
while IFS= read -r link; do
|
||||
[[ -z "$link" ]] && continue
|
||||
|
||||
if [[ "$link" == ../* ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
for link in $links; do
|
||||
local target="$link"
|
||||
[[ "$target" != *.md ]] && target="${target}.md"
|
||||
|
||||
if [[ ! -f "${base_dir}/${target}" && ! -f "${base_dir}/../${target}" ]]; then
|
||||
warn "Potential broken link: [[$link]] in $file"
|
||||
fi
|
||||
done <<< "$links"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# levenshtein <s1> <s2>
|
||||
# Classic edit distance via a two-row rolling buffer, so every array subscript
|
||||
# is a single integer. The previous implementation used comma subscripts
|
||||
# (d[i,j]); in bash arithmetic the comma operator collapses to one dimension,
|
||||
# so the table aliased onto itself and returned wrong distances — it could not
|
||||
# even score two identical strings as 0. This form is portable to bash 3.2
|
||||
# (no associative arrays). Echoes the integer distance.
|
||||
# ---------------------------------------------------------------------------
|
||||
levenshtein() {
|
||||
local s1="$1" s2="$2"
|
||||
local len1=${#s1} len2=${#s2}
|
||||
(( len1 == 0 )) && { echo "$len2"; return; }
|
||||
(( len2 == 0 )) && { echo "$len1"; return; }
|
||||
|
||||
local -a prev=() curr=()
|
||||
local i j cost del ins sub min
|
||||
for (( j = 0; j <= len2; j++ )); do prev[j]=$j; done
|
||||
|
||||
for (( i = 1; i <= len1; i++ )); do
|
||||
curr[0]=$i
|
||||
for (( j = 1; j <= len2; j++ )); do
|
||||
cost=1
|
||||
[[ "${s1:i-1:1}" == "${s2:j-1:1}" ]] && cost=0
|
||||
del=$(( prev[j] + 1 ))
|
||||
ins=$(( curr[j-1] + 1 ))
|
||||
sub=$(( prev[j-1] + cost ))
|
||||
min=$del
|
||||
(( ins < min )) && min=$ins
|
||||
(( sub < min )) && min=$sub
|
||||
curr[j]=$min
|
||||
done
|
||||
prev=( "${curr[@]}" )
|
||||
done
|
||||
|
||||
echo "${prev[len2]}"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# similarity <s1> <s2>
|
||||
# Percentage similarity from the edit distance: 100 = identical, 0 = entirely
|
||||
# different. Two empty strings are treated as identical (100), so the divide
|
||||
# is always guarded.
|
||||
# ---------------------------------------------------------------------------
|
||||
similarity() {
|
||||
local s1="$1" s2="$2"
|
||||
local maxlen=${#s1}
|
||||
(( ${#s2} > maxlen )) && maxlen=${#s2}
|
||||
(( maxlen == 0 )) && { echo "100"; return; }
|
||||
local dist
|
||||
dist=$(levenshtein "$s1" "$s2")
|
||||
echo $(( 100 - (dist * 100 / maxlen) ))
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# check_duplicates <manifest>
|
||||
# Advisory only: warns when a page created this run has a slug suspiciously
|
||||
# close to an entity/concept already listed in wiki/index.md, so a human can
|
||||
# merge them in the PR rather than grow two near-identical pages. Never fails
|
||||
# the lint (always returns 0), exactly like check_broken_links.
|
||||
#
|
||||
# The threshold is tunable via KG_DUP_THRESHOLD (default 70). Exact self-matches
|
||||
# are skipped: step 1 of run-ingest.sh appends this run's new slugs to the index
|
||||
# BEFORE the lint runs, so without the skip every new slug would match itself at
|
||||
# 100%. A page that genuinely collides with a pre-existing file is reported by
|
||||
# the manifest as 'modified', not 'created', so skipping created==existing pairs
|
||||
# can never mask a real collision.
|
||||
# ---------------------------------------------------------------------------
|
||||
check_duplicates() {
|
||||
local manifest="$1"
|
||||
[[ -f "$manifest" ]] || return 0
|
||||
command -v jq >/dev/null 2>&1 || return 0
|
||||
|
||||
# New leaf slugs from pages created this run.
|
||||
local -a new_slugs=()
|
||||
local slug
|
||||
while IFS= read -r slug; do
|
||||
[[ -n "$slug" ]] && new_slugs+=("$slug")
|
||||
done < <(jq -r '.pages[]? | select(.status=="created") | .path
|
||||
| split("/")[-1] | sub("\\.md$";"")' "$manifest" 2>/dev/null)
|
||||
|
||||
# Existing entity/concept slugs already catalogued in the index.
|
||||
local -a existing_slugs=()
|
||||
if [[ -f "wiki/index.md" ]]; then
|
||||
local line
|
||||
while IFS= read -r line; do
|
||||
if [[ $line =~ \[\[(entities|concepts)/([a-z0-9-]+)\]\] ]]; then
|
||||
existing_slugs+=("${BASH_REMATCH[2]}")
|
||||
fi
|
||||
done < "wiki/index.md"
|
||||
fi
|
||||
|
||||
(( ${#new_slugs[@]} && ${#existing_slugs[@]} )) || return 0
|
||||
|
||||
local threshold="${KG_DUP_THRESHOLD:-70}"
|
||||
local new exist sim
|
||||
for new in "${new_slugs[@]}"; do
|
||||
for exist in "${existing_slugs[@]}"; do
|
||||
[[ "$new" == "$exist" ]] && continue # skip exact self-match (see header)
|
||||
sim=$(similarity "$new" "$exist")
|
||||
if (( sim > threshold )); then
|
||||
warn "Possible duplicate: '${new}' ≈ '${exist}' (${sim}% similar) — review in PR"
|
||||
fi
|
||||
done
|
||||
done
|
||||
return 0
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,9 +4,6 @@
|
|||
# Directory structure creation and template rendering engine.
|
||||
# =============================================================================
|
||||
|
||||
# Canonical directory layout lives in one place (lib/structure.sh).
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/structure.sh"
|
||||
|
||||
render_template() {
|
||||
local template_file="$1"
|
||||
local output_file="$2"
|
||||
|
|
@ -16,27 +13,17 @@ render_template() {
|
|||
local content
|
||||
content=$(<"$template_file")
|
||||
|
||||
# HARDENING: collapse any “spaced” placeholders from a formatter
|
||||
# { { KEY } } -> {{KEY}} (KEY = UPPERCASE/underscore)
|
||||
# Defense in depth: if Prettier or a copy-paste breaks the syntax again,
|
||||
# the scaffold fixes itself. sed is a core utility (like tr/date already used here).
|
||||
content=$(sed -E 's/\{[[:space:]]*\{[[:space:]]*([A-Z_]+)[[:space:]]*\}[[:space:]]*\}/{{\1}}/g' <<<"$content")
|
||||
|
||||
# Defaults (:-) so master-repo templates render even when GENOME_* are unset
|
||||
# (scaffold_master runs before any genome; set -u would otherwise abort here).
|
||||
local genome_name_upper
|
||||
genome_name_upper=$(tr '[:lower:]' '[:upper:]' <<< "${GENOME_NAME:-}")
|
||||
genome_name_upper=$(tr '[:lower:]' '[:upper:]' <<< "${GENOME_NAME}")
|
||||
|
||||
# Placeholder replacement
|
||||
content="${content//\{\{GENOME_NAME\}\}/${GENOME_NAME:-}}"
|
||||
content="${content//\{\{GENOME_NAME\}\}/${GENOME_NAME}}"
|
||||
content="${content//\{\{GENOME_NAME_UPPER\}\}/${genome_name_upper}}"
|
||||
content="${content//\{\{GENOME_DESC\}\}/${GENOME_DESC:-}}"
|
||||
content="${content//\{\{FORGEJO_URL\}\}/${FORGEJO_URL:-}}"
|
||||
content="${content//\{\{FORGEJO_USER\}\}/${FORGEJO_USER:-}}"
|
||||
content="${content//\{\{VAULTWARDEN_URL\}\}/${VAULTWARDEN_URL:-}}"
|
||||
content="${content//\{\{MASTER_REPO\}\}/${MASTER_REPO:-}}"
|
||||
# linked project reference (optional) — empty registry field renders as 'none'
|
||||
content="${content//\{\{LINKED_PROJECT\}\}/${GENOME_LINKED:-none}}"
|
||||
content="${content//\{\{GENOME_DESC\}\}/${GENOME_DESC}}"
|
||||
content="${content//\{\{FORGEJO_URL\}\}/${FORGEJO_URL}}"
|
||||
content="${content//\{\{FORGEJO_USER\}\}/${FORGEJO_USER}}"
|
||||
content="${content//\{\{VAULTWARDEN_URL\}\}/${VAULTWARDEN_URL}}"
|
||||
content="${content//\{\{MASTER_REPO\}\}/${MASTER_REPO}}"
|
||||
content="${content//\{\{DATE\}\}/$(date +%Y-%m-%d)}"
|
||||
|
||||
mkdir -p "$(dirname "$output_file")"
|
||||
|
|
@ -45,9 +32,13 @@ render_template() {
|
|||
|
||||
scaffold_genome() {
|
||||
local base="$1"
|
||||
local dirs=(
|
||||
"raw/articles" "raw/transcripts" "raw/code-packs" "raw/assets" "raw/private"
|
||||
"wiki/sources" "wiki/entities" "wiki/concepts" "wiki/queries" "wiki/private"
|
||||
)
|
||||
|
||||
info "Building directory structure in ${base}..."
|
||||
for dir in "${GENOME_DIRS[@]}"; do
|
||||
for dir in "${dirs[@]}"; do
|
||||
mkdir -p "${base}/${dir}"
|
||||
touch "${base}/${dir}/.gitkeep"
|
||||
done
|
||||
|
|
@ -64,11 +55,8 @@ scaffold_genome() {
|
|||
|
||||
install_precommit_hook() {
|
||||
local repo_path="$1"
|
||||
local hooks_dir
|
||||
hooks_dir="$(git -C "$repo_path" rev-parse --git-path hooks)"
|
||||
local hook_path="${hooks_dir}/pre-commit"
|
||||
local hook_path="${repo_path}/.git/hooks/pre-commit"
|
||||
|
||||
mkdir -p "$hooks_dir"
|
||||
cp "${TEMPLATES_DIR}/pre-commit.sh" "$hook_path"
|
||||
chmod +x "$hook_path"
|
||||
success "Pre-commit security hook installed at: $hook_path"
|
||||
|
|
|
|||
|
|
@ -1,79 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# lib/structure.sh
|
||||
# Single source of truth for the canonical genome directory layout, plus the
|
||||
# verify/sync helpers used by scripts/verify-genomes.sh.
|
||||
#
|
||||
# IMPORTANT: this is the ONE place the structure is defined. scaffold.sh sources
|
||||
# this file and builds new genomes from GENOME_DIRS, so scaffolding and the
|
||||
# structure check can never drift apart.
|
||||
# =============================================================================
|
||||
|
||||
# NOTE — Return-code smell
|
||||
# Several functions in this file (and in lint.sh) use the return code as a
|
||||
# numeric counter (e.g. return $missing). This is a known smell: exit codes
|
||||
# wrap at 256 and conflate "count of problems" with "exit status". At the
|
||||
# current scale (<10 problems per run) the wrap-around risk is zero, so we
|
||||
# accept it pragmatically. If counts ever grow, switch to stdout counters
|
||||
# or dedicated global variables.
|
||||
|
||||
# Canonical directories every genome must have.
|
||||
# raw/* are input buckets (collaborator-writable); wiki/* is the agent-owned,
|
||||
# contract-bound layout the lint, the index sections and the ingest skill depend on.
|
||||
GENOME_DIRS=(
|
||||
"raw/articles" "raw/transcripts" "raw/code-packs" "raw/assets" "raw/private"
|
||||
"wiki/sources" "wiki/entities" "wiki/concepts" "wiki/queries" "wiki/private"
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# structure_report <base>
|
||||
# Reports drift of <base> against GENOME_DIRS.
|
||||
# - missing canonical dir → counted as drift (returns non-zero)
|
||||
# - extra dir under raw/ or wiki/ → warning only (does not fail)
|
||||
# Returns the number of MISSING canonical directories.
|
||||
# ---------------------------------------------------------------------------
|
||||
structure_report() {
|
||||
local base="$1"
|
||||
local missing=0
|
||||
|
||||
for d in "${GENOME_DIRS[@]}"; do
|
||||
if [[ ! -d "${base}/${d}" ]]; then
|
||||
warn "missing: ${d}"
|
||||
missing=$((missing + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
# Extra directories (drift the other way) — informational only.
|
||||
local canon=" ${GENOME_DIRS[*]} "
|
||||
while IFS= read -r d; do
|
||||
d="${d#"${base}/"}"
|
||||
[[ "$canon" == *" ${d} "* ]] && continue
|
||||
info "extra (not in canon): ${d}"
|
||||
done < <(find "${base}/raw" "${base}/wiki" -mindepth 1 -type d 2>/dev/null)
|
||||
|
||||
# NOTE: return $missing is a smell — see header. Kept for compatibility.
|
||||
return $missing
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# structure_sync <base>
|
||||
# Creates any MISSING canonical directories (idempotent). Never deletes —
|
||||
# retiring a bucket is a deliberate, contract-aware change to GENOME_DIRS +
|
||||
# the templates, not an automatic prune.
|
||||
# ---------------------------------------------------------------------------
|
||||
structure_sync() {
|
||||
local base="$1"
|
||||
local added=0
|
||||
|
||||
for d in "${GENOME_DIRS[@]}"; do
|
||||
if [[ ! -d "${base}/${d}" ]]; then
|
||||
mkdir -p "${base}/${d}"
|
||||
touch "${base}/${d}/.gitkeep"
|
||||
success "created: ${d}"
|
||||
added=$((added + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
[[ $added -eq 0 ]] && info "already in sync: ${base}"
|
||||
return 0
|
||||
}
|
||||
|
|
@ -15,7 +15,6 @@ provider_create_repo() {
|
|||
local name="$1"
|
||||
local desc="$2"
|
||||
local private="$3"
|
||||
local auto_init="${4:-false}" # genomi: true (submodule add esige un branch). master: false (git init locale + push).
|
||||
|
||||
local http_code
|
||||
http_code=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
|
|
@ -26,7 +25,7 @@ provider_create_repo() {
|
|||
\"name\": \"${name}\",
|
||||
\"description\": \"${desc}\",
|
||||
\"private\": ${private},
|
||||
\"auto_init\": ${auto_init}
|
||||
\"auto_init\": false
|
||||
}")
|
||||
|
||||
case "$http_code" in
|
||||
|
|
|
|||
18
registry.sh
18
registry.sh
|
|
@ -12,24 +12,16 @@ _REGISTRY_LOADED=1
|
|||
PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
# Dynamic paths
|
||||
WORK_DIR="${PROJECT_ROOT}"
|
||||
WORK_DIR="${HOME}/knowledge-genome-setup"
|
||||
KEYS_DIR="${WORK_DIR}/keys"
|
||||
TEMPLATES_DIR="${PROJECT_ROOT}/templates"
|
||||
LIB_DIR="${PROJECT_ROOT}/lib"
|
||||
PROVIDERS_DIR="${PROJECT_ROOT}/providers"
|
||||
|
||||
# --- GENOME REGISTRY ---
|
||||
# Format: "name|description|linked_repo|cross_source"
|
||||
# - linked_repo: OPTIONAL. Leave empty for knowledge-only genomes.
|
||||
# - cross_source: "yes" or "no" (default: no). Controls whether the collector
|
||||
# may read this genome as a source during cross-genome pulls.
|
||||
#
|
||||
# HOW TO CUSTOMIZE:
|
||||
# Replace the placeholder below with your actual genome domains.
|
||||
# Example: "genome-work|Work notes and architecture logs||no"
|
||||
# "genome-finance|Personal finance|user/repo-finance|no"
|
||||
# Format: "name|description"
|
||||
GENOMES=(
|
||||
# Disposable sandbox: exercise the full pipeline (ingest -> PR) end-to-end.
|
||||
# Created by `make setup`. Replace with real domains once the circle is validated.
|
||||
"genome-test|Disposable sandbox for pipeline testing||no"
|
||||
"genome-dev|Web development, TUI, Angular, software architecture"
|
||||
"genome-finance|Personal finance, investments, market analysis"
|
||||
"genome-homelab|Keru infrastructure, network configs, architecture logs"
|
||||
)
|
||||
|
|
|
|||
25
scripts/add-genome.sh
Executable file → Normal file
25
scripts/add-genome.sh
Executable file → Normal file
|
|
@ -11,37 +11,16 @@ source "registry.sh"
|
|||
|
||||
GENOME_NAME="${1:-}"
|
||||
GENOME_DESC="${2:-}"
|
||||
GENOME_LINKED="${3:-}" # optional: linked project repo reference
|
||||
GENOME_CROSS_SOURCE="${4:-no}" # optional: cross_source flag (default: no)
|
||||
|
||||
# 1. Check mandatory arguments first
|
||||
if [[ -z "$GENOME_NAME" || -z "$GENOME_DESC" ]]; then
|
||||
error "Missing arguments."
|
||||
echo "Usage: $0 <genome-name> <description> [linked-repo] [cross_source]"
|
||||
echo " cross_source: yes|no (default: no)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 2. Then validate the flag if a non-default value was passed
|
||||
if [[ "$GENOME_CROSS_SOURCE" != "yes" && "$GENOME_CROSS_SOURCE" != "no" ]]; then
|
||||
error "Invalid cross_source value: $GENOME_CROSS_SOURCE"
|
||||
echo "cross_source must be 'yes' or 'no'"
|
||||
echo "Usage: $0 <genome-name> <description>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
step "Adding New Genome: ${GENOME_NAME}"
|
||||
|
||||
# Build a 4-field registry entry (linked_repo may be empty, cross_source defaults to no)
|
||||
GENOMES=("${GENOME_NAME}|${GENOME_DESC}|${GENOME_LINKED}|${GENOME_CROSS_SOURCE}")
|
||||
|
||||
# NOTE — Maintenance smell
|
||||
# We source setup-genomes.sh as a library/orchestrator hybrid. This works because:
|
||||
# - registry.sh is guarded against double-source (idempotent guard)
|
||||
# - setup-genomes.sh checks WORK_DIR before re-sourcing registry.sh
|
||||
# - GENOMES is built locally just before the source, so it is not clobbered
|
||||
# However, sourcing an orchestration script as a library makes the control flow
|
||||
# harder to trace. If this grows, refactor into a shared function (e.g. setup_one_genome)
|
||||
# called by both add-genome.sh and setup-genomes.sh.
|
||||
GENOMES=("${GENOME_NAME}|${GENOME_DESC}")
|
||||
|
||||
source "scripts/setup-genomes.sh"
|
||||
|
||||
|
|
|
|||
0
scripts/lint-genomes.sh
Executable file → Normal file
0
scripts/lint-genomes.sh
Executable file → Normal file
18
scripts/setup-genomes.sh
Executable file → Normal file
18
scripts/setup-genomes.sh
Executable file → Normal file
|
|
@ -19,15 +19,13 @@ source "providers/${PROVIDER}.sh"
|
|||
step "Processing Genome Registry"
|
||||
|
||||
for entry in "${GENOMES[@]}"; do
|
||||
# 4-field format: name|description|linked_repo|cross_source linked_repo optional (may be empty); cross_source defaults to "no".
|
||||
IFS='|' read -r GENOME_NAME GENOME_DESC GENOME_LINKED GENOME_CROSS_SOURCE <<< "$entry"
|
||||
GENOME_CROSS_SOURCE="${GENOME_CROSS_SOURCE:-no}"
|
||||
export GENOME_NAME GENOME_DESC GENOME_LINKED GENOME_CROSS_SOURCE
|
||||
IFS='|' read -r GENOME_NAME GENOME_DESC <<< "$entry"
|
||||
export GENOME_NAME GENOME_DESC
|
||||
|
||||
info "Processing: ${GENOME_NAME} (cross_source: ${GENOME_CROSS_SOURCE})..."
|
||||
info "Processing: ${GENOME_NAME}..."
|
||||
|
||||
# 1. Remote Creation (Idempotent)
|
||||
provider_create_repo "${GENOME_NAME}" "${GENOME_DESC}" "true" "true"
|
||||
provider_create_repo "${GENOME_NAME}" "${GENOME_DESC}" "true"
|
||||
SSH_URL=$(provider_ssh_url "${GENOME_NAME}")
|
||||
|
||||
GENOME_PATH="${WORK_DIR}/${MASTER_REPO}/${GENOME_NAME}"
|
||||
|
|
@ -40,8 +38,6 @@ for entry in "${GENOMES[@]}"; do
|
|||
|
||||
cd "${GENOME_NAME}"
|
||||
|
||||
git switch -C main
|
||||
|
||||
# IMPORTANT: Initialize git-crypt BEFORE creating any files
|
||||
gcrypt_init
|
||||
|
||||
|
|
@ -51,18 +47,12 @@ for entry in "${GENOMES[@]}"; do
|
|||
# Initial genome push
|
||||
git add .
|
||||
git commit -m "feat: initial scaffold and git-crypt init for ${GENOME_NAME}"
|
||||
|
||||
git push -u origin main
|
||||
|
||||
# Key export and instructions
|
||||
gcrypt_export_key "${GENOME_NAME}"
|
||||
gcrypt_print_key_instructions "${GENOME_NAME}"
|
||||
|
||||
cd "${WORK_DIR}/${MASTER_REPO}"
|
||||
git add .gitmodules "${GENOME_NAME}"
|
||||
git diff --cached --quiet || git commit -m "chore: register submodule ${GENOME_NAME}"
|
||||
git push
|
||||
|
||||
# Commit the submodule reference in the master repo
|
||||
cd "${WORK_DIR}/${MASTER_REPO}"
|
||||
git commit -m "feat: add ${GENOME_NAME} as submodule"
|
||||
|
|
|
|||
2
scripts/setup-master.sh
Executable file → Normal file
2
scripts/setup-master.sh
Executable file → Normal file
|
|
@ -37,7 +37,5 @@ scaffold_master "."
|
|||
git add .
|
||||
git commit -m "chore: initialize master scaffold" || info "No changes to commit in master."
|
||||
|
||||
git branch -M main
|
||||
|
||||
# 3. Initial Push
|
||||
git push -u origin main
|
||||
|
|
|
|||
0
scripts/setup.sh
Executable file → Normal file
0
scripts/setup.sh
Executable file → Normal file
|
|
@ -1,50 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# scripts/verify-genomes.sh
|
||||
# Check (default) or --sync the directory structure of every registered genome
|
||||
# against the canonical layout in lib/structure.sh.
|
||||
#
|
||||
# bash scripts/verify-genomes.sh # report drift, non-zero exit on drift
|
||||
# bash scripts/verify-genomes.sh --sync # create missing dirs everywhere (safe)
|
||||
#
|
||||
# No hardware/LLM involved — pure structure check. Run anywhere.
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
source "lib/output.sh"
|
||||
source "globals.env"
|
||||
source "registry.sh"
|
||||
source "lib/structure.sh"
|
||||
|
||||
MODE="verify"
|
||||
[[ "${1:-}" == "--sync" ]] && MODE="sync"
|
||||
|
||||
step "Genome structure: ${MODE}"
|
||||
|
||||
TOTAL_MISSING=0
|
||||
for entry in "${GENOMES[@]}"; do
|
||||
IFS='|' read -r GENOME_NAME _ <<< "$entry" # 4-field registry; only GENOME_NAME used here
|
||||
genome_dir="${WORK_DIR}/${MASTER_REPO}/${GENOME_NAME}"
|
||||
|
||||
if [[ ! -d "$genome_dir" ]]; then
|
||||
warn "not found locally, skipping: ${GENOME_NAME}"
|
||||
continue
|
||||
fi
|
||||
|
||||
info "Genome: ${GENOME_NAME}"
|
||||
if [[ "$MODE" == "sync" ]]; then
|
||||
structure_sync "$genome_dir"
|
||||
else
|
||||
structure_report "$genome_dir" && m=0 || m=$?
|
||||
TOTAL_MISSING=$((TOTAL_MISSING + m))
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
if [[ "$MODE" == "sync" ]]; then
|
||||
success "Structure sync complete."
|
||||
elif [[ $TOTAL_MISSING -eq 0 ]]; then
|
||||
success "Structure verified: all genomes match the canonical layout."
|
||||
else
|
||||
error "Structure drift: ${TOTAL_MISSING} missing directory(ies). Fix with: make sync-structure"
|
||||
exit 1
|
||||
fi
|
||||
|
|
@ -1,92 +0,0 @@
|
|||
---
|
||||
name: ingest
|
||||
description: Semantic pass of a single raw source into the current genome's wiki. The model ONLY extracts structured semantic content (summary, entities, concepts, contradictions) and returns one JSON object — it does not write files, produce frontmatter, slugs, git, index, log or PRs. A deterministic conform script (ingest-semantic.py) turns that JSON into properly-structured wiki pages + a manifest; run-ingest.sh then does index/log/lint/PR.
|
||||
license: see repository
|
||||
compatibility: Driven by scripts/ingest-semantic.py (one schema-constrained call to a local model via Ollama /api/chat). NO agent tools are used — no read, no edit, no bash. The model never touches the filesystem. PRIVATE_CONTEXT must be disabled.
|
||||
metadata:
|
||||
framework: knowledge-genome
|
||||
phase: "1-ingest-semantic"
|
||||
mode: structured-json # lightweight agent + deterministic conform
|
||||
---
|
||||
|
||||
# Ingest — semantic pass (structured-JSON)
|
||||
|
||||
This is the **light** semantic pass. The model's only job is to read one source
|
||||
and return a single JSON object describing what the source contains. It does
|
||||
**not** write files, choose paths, produce frontmatter, pick slugs, or touch
|
||||
git / index / log / PRs. All structure is owned by `scripts/ingest-semantic.py`,
|
||||
which conforms the model's JSON into wiki pages with enforced kebab-case paths
|
||||
and frontmatter, and writes `.ingest-manifest.json` in the exact schema
|
||||
`run-ingest.sh` consumes. This keeps the agent minimal and makes the output
|
||||
impossible to mis-shape, regardless of how small or quirky the local model is.
|
||||
|
||||
Pipeline:
|
||||
|
||||
cd <genome checkout>
|
||||
scripts/ingest-semantic.py <genome> raw/articles/<file>.md # phase 1 (this)
|
||||
scripts/run-ingest.sh <genome> # phase 2 (deterministic)
|
||||
|
||||
## Pre-flight (enforced by ingest-semantic.py, not by the model)
|
||||
|
||||
1. Refuse if the source path is under any `private/` directory.
|
||||
2. Refuse if `PRIVATE_CONTEXT` is not `disabled`.
|
||||
3. Confirm the file exists under `raw/` and is non-empty.
|
||||
|
||||
## What the model returns (the only contract)
|
||||
|
||||
A single JSON object, decoding-constrained to this shape via Ollama's `format`:
|
||||
|
||||
```json
|
||||
{
|
||||
"source_title": "Human title of the source",
|
||||
"source_summary": "Faithful, self-contained prose summary of the source.",
|
||||
"key_points": ["Concrete fact or claim worth indexing", "..."],
|
||||
"entities": [
|
||||
{ "name": "Acme", "kind": "org", "description": "Vendor referenced by the source." }
|
||||
],
|
||||
"concepts": [
|
||||
{ "name": "JWT RS256", "description": "Asymmetric token signing scheme the source uses." }
|
||||
],
|
||||
"contradictions": [
|
||||
{ "concept": "auth", "description": "Source claims X, contradicting the existing claim Y." }
|
||||
],
|
||||
"reasoning": "One sentence for the log: what this source adds.",
|
||||
"pr_summary": "One or two sentences describing this ingest for the PR."
|
||||
}
|
||||
```
|
||||
|
||||
Field rules (guidance for the model; the script enforces _structure_):
|
||||
|
||||
- `source_summary` is faithful and in the source's own language. No markdown
|
||||
headings inside any description field. No padding.
|
||||
- `entities` = every person, tool, org or product the source names. `kind` ∈
|
||||
`person|tool|org|product`. `description` = one or two factual sentences.
|
||||
- `concepts` = every pattern, theory, decision or named idea the source explains.
|
||||
- `contradictions` = only a claim that directly contradicts a widely-known fact
|
||||
or contradicts the source itself; otherwise an empty list.
|
||||
- Names are the natural name of the thing. The script normalises them to
|
||||
kebab-case and guarantees a single stable page per entity/concept.
|
||||
|
||||
## What the conform script guarantees (so the model cannot break it)
|
||||
|
||||
- **Paths:** `wiki/sources/<slug>.md`, `wiki/entities/<slug>.md`,
|
||||
`wiki/concepts/<slug>.md`, `wiki/queries/conflict-<concept>-<YYYY-MM-DD>.md`.
|
||||
- **Slugs:** minimal kebab-case (lowercase, digits, hyphens; no spaces /
|
||||
underscores / capitals).
|
||||
- **Frontmatter:** `type`, `domain: <genome>`, `maturity: draft`,
|
||||
`last_updated: <today>`, `private: false`, `tags`.
|
||||
- **Create-vs-update:** existing entity/concept pages are **appended to** (a
|
||||
section attributed to the new source), never overwritten. The source page is
|
||||
the canonical summary of that exact source and is (re)written.
|
||||
- **Manifest:** `.ingest-manifest.json` with `raw_source`, `reasoning`,
|
||||
`pr_summary`, `contradictions` (string), and `pages[]` (`path`, `summary`,
|
||||
`status`, plus `maturity` on created pages) — exactly what `run-ingest.sh`
|
||||
validates.
|
||||
|
||||
The model name is recorded by the orchestrator (`INGEST_MODEL`); the model does
|
||||
not self-report it. No `run_id`, branch, commit or PR is invented here — those
|
||||
belong to phase 2.
|
||||
|
||||
> Interactive use of `pi` (TUI) is unaffected and still available for manual
|
||||
> exploration. The **automated** ingest path no longer relies on `pi` or on
|
||||
> native tool-calling: it is the single schema-constrained call above.
|
||||
|
|
@ -1,152 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# =============================================================================
|
||||
# skills/ingest/scripts/index-append.py
|
||||
# Insert OR remove an entry line in wiki/index.md, keeping the target section
|
||||
# alphabetically ordered. Bumps frontmatter last_updated.
|
||||
#
|
||||
# index-append.py --section Sources \
|
||||
# --entry '- [[sources/foo]] — One-line summary. `maturity: draft`'
|
||||
# index-append.py --remove 'sources/foo' # delete the entry by wikilink
|
||||
# =============================================================================
|
||||
import argparse
|
||||
import datetime
|
||||
import re
|
||||
import sys
|
||||
|
||||
ENTRY_RE = re.compile(r"^- \[\[")
|
||||
LINK_RE = re.compile(r"^- \[\[([^\]]+)\]\]")
|
||||
HEADER_RE = re.compile(r"^## ")
|
||||
|
||||
|
||||
def bump_last_updated(lines, today):
|
||||
"""Bump (or self-heal) last_updated inside the first frontmatter block."""
|
||||
fm_open = False
|
||||
fm_close_idx = None
|
||||
bumped = False
|
||||
for i, ln in enumerate(lines):
|
||||
if ln.strip() == "---":
|
||||
if not fm_open:
|
||||
fm_open = True
|
||||
continue
|
||||
fm_close_idx = i
|
||||
break
|
||||
if fm_open and ln.startswith("last_updated:"):
|
||||
lines[i] = f"last_updated: {today}"
|
||||
bumped = True
|
||||
if not fm_open:
|
||||
print("index-append: warning: no frontmatter found, last_updated not bumped",
|
||||
file=sys.stderr)
|
||||
elif not bumped and fm_close_idx is not None:
|
||||
lines.insert(fm_close_idx, f"last_updated: {today}")
|
||||
print("index-append: last_updated key was missing — inserted", file=sys.stderr)
|
||||
|
||||
|
||||
def do_remove(lines, link, today):
|
||||
"""Remove every entry line whose wikilink == link. Idempotent."""
|
||||
bump_last_updated(lines, today)
|
||||
kept = []
|
||||
removed = 0
|
||||
for ln in lines:
|
||||
m = LINK_RE.match(ln)
|
||||
if m and m.group(1) == link:
|
||||
removed += 1
|
||||
continue
|
||||
kept.append(ln)
|
||||
if removed:
|
||||
print(f"index-append: removed [[{link}]] ({removed} line(s))")
|
||||
else:
|
||||
# Idempotent: the goal state (entry absent) already holds.
|
||||
print(f"index-append: [[{link}]] not present, nothing to remove")
|
||||
return kept
|
||||
|
||||
|
||||
def do_append(lines, section, entry, today):
|
||||
bump_last_updated(lines, today)
|
||||
# Locate the target section [start, end)
|
||||
start = None
|
||||
for i, ln in enumerate(lines):
|
||||
if HEADER_RE.match(ln) and ln[3:].startswith(section):
|
||||
start = i
|
||||
break
|
||||
if start is None:
|
||||
print(f"index-append: section '{section}' not found", file=sys.stderr)
|
||||
return None
|
||||
|
||||
end = len(lines)
|
||||
for i in range(start + 1, len(lines)):
|
||||
if HEADER_RE.match(lines[i]):
|
||||
end = i
|
||||
break
|
||||
|
||||
body = lines[start + 1:end]
|
||||
intro = [ln for ln in body if not ENTRY_RE.match(ln)]
|
||||
entries = [ln for ln in body if ENTRY_RE.match(ln)]
|
||||
|
||||
new_m = LINK_RE.match(entry)
|
||||
new_link = new_m.group(1) if new_m else None
|
||||
|
||||
if new_link is not None:
|
||||
replaced = False
|
||||
for idx, ln in enumerate(entries):
|
||||
m = LINK_RE.match(ln)
|
||||
if m and m.group(1) == new_link:
|
||||
if ln == entry:
|
||||
print("index-append: entry already present, skipping")
|
||||
return lines
|
||||
entries[idx] = entry
|
||||
replaced = True
|
||||
break
|
||||
if not replaced:
|
||||
entries.append(entry)
|
||||
else:
|
||||
if entry in entries:
|
||||
print("index-append: entry already present, skipping")
|
||||
return lines
|
||||
entries.append(entry)
|
||||
|
||||
entries.sort(key=str.casefold)
|
||||
while intro and intro[-1].strip() == "":
|
||||
intro.pop()
|
||||
new_section = intro + [""] + entries + [""]
|
||||
print(f"index-append: added to {section}")
|
||||
return lines[:start + 1] + new_section + lines[end:]
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--section", help="Section name (required with --entry)")
|
||||
ap.add_argument("--entry", help="Full index line to insert")
|
||||
ap.add_argument("--remove", metavar="WIKILINK",
|
||||
help="Remove the entry with this wikilink, e.g. sources/foo")
|
||||
ap.add_argument("--file", default="wiki/index.md")
|
||||
args = ap.parse_args()
|
||||
|
||||
if bool(args.remove) == bool(args.entry):
|
||||
print("index-append: provide exactly one of --entry or --remove", file=sys.stderr)
|
||||
return 2
|
||||
if args.entry and not args.section:
|
||||
print("index-append: --entry requires --section", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
try:
|
||||
with open(args.file, encoding="utf-8") as fh:
|
||||
lines = fh.read().splitlines()
|
||||
except FileNotFoundError:
|
||||
print(f"index-append: not found: {args.file}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
today = datetime.date.today().isoformat()
|
||||
if args.remove:
|
||||
out = do_remove(lines, args.remove, today)
|
||||
else:
|
||||
out = do_append(lines, args.section, args.entry, today)
|
||||
if out is None:
|
||||
return 1
|
||||
|
||||
with open(args.file, "w", encoding="utf-8") as fh:
|
||||
fh.write("\n".join(out) + "\n")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
|
@ -1,374 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# =============================================================================
|
||||
# skills/ingest/scripts/ingest-semantic.py
|
||||
# Phase 1 (semantic) of the Knowledge Genome ingest — light agent + deterministic conform.
|
||||
#
|
||||
# - FIXED: Add 'title:' field to frontmatter (lint was complaining about missing title)
|
||||
# - NEW: Inject existing index (entity/concept names) into prompt to prevent duplicates
|
||||
# - NEW: Richer prompt asking for 2-4 sentences per description (not 1-2), with concrete details
|
||||
# - Enhanced schema to handle longer descriptions naturally
|
||||
#
|
||||
# The model does ONLY semantic extraction and returns ONE schema-constrained JSON
|
||||
# object (no tools, no file writing, no git, no frontmatter, no slugs). This script
|
||||
# then CONFORMS that output deterministically into wiki pages with enforced
|
||||
# frontmatter + kebab-case paths, and writes a .ingest-manifest.json in EXACTLY the
|
||||
# schema run-ingest.sh expects.
|
||||
#
|
||||
# cd <genome checkout>
|
||||
# ingest-semantic.py <genome> raw/articles/<file>.md # phase 1 (this)
|
||||
# run-ingest.sh <genome> # phase 2 (deterministic)
|
||||
#
|
||||
# Emits a single JSON status line on stdout (for n8n / logs).
|
||||
# =============================================================================
|
||||
import json, os, hashlib, subprocess, re, sys, datetime, urllib.request, urllib.error, time
|
||||
|
||||
# --- config (override via env; these live in ~/.config/knowledge-genome.env) ---
|
||||
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/chat")
|
||||
MODEL = os.environ.get("INGEST_MODEL", "qwen2.5:14b")
|
||||
NUM_CTX = int(os.environ.get("INGEST_NUM_CTX", "16384"))
|
||||
TIMEOUT = int(os.environ.get("INGEST_TIMEOUT", "600"))
|
||||
# INGEST_THINK: "false" disables a reasoning model's thinking trace, so models like
|
||||
# gemma / qwq / qwen3 emit only the structured JSON (no truncation from long thinking).
|
||||
# Unset = omit the flag entirely (correct for plain instruct models such as qwen2.5).
|
||||
THINK = os.environ.get("INGEST_THINK")
|
||||
TODAY = datetime.date.today().isoformat()
|
||||
FEEDBACK = os.environ.get("INGEST_FEEDBACK", "").strip()
|
||||
|
||||
|
||||
def die(stage, reason):
|
||||
print(json.dumps({"status": "error", "stage": stage, "reason": reason}))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# --- args + pre-flight (mirror the old skill's guards, enforced in code) ---
|
||||
if len(sys.argv) < 3:
|
||||
die("args", "usage: ingest-semantic.py <genome> <raw/rel/path.md>")
|
||||
genome = sys.argv[1]
|
||||
raw_rel = sys.argv[2].lstrip("./")
|
||||
|
||||
if "private/" in raw_rel or raw_rel.startswith("private"):
|
||||
die("preflight", "refusing private source: " + raw_rel)
|
||||
if os.environ.get("PRIVATE_CONTEXT", "disabled") != "disabled":
|
||||
die("preflight", "PRIVATE_CONTEXT must be disabled")
|
||||
if not raw_rel.startswith("raw/"):
|
||||
die("preflight", "source must live under raw/: " + raw_rel)
|
||||
if not os.path.isfile(raw_rel):
|
||||
die("preflight", "source not found in cwd: " + raw_rel)
|
||||
|
||||
with open(raw_rel, "r", encoding="utf-8") as fh:
|
||||
source_text = fh.read()
|
||||
if not source_text.strip():
|
||||
die("preflight", "source is empty: " + raw_rel)
|
||||
|
||||
# --- pre-flight check: if the prompt exceeds context window, exit cleanly with stage:input ---
|
||||
# Conservative estimate: ~4 chars/token for mixed IT/EN text
|
||||
SAFETY_MARGIN = 4096 # room for system prompt + JSON response
|
||||
MAX_SOURCE_TOKENS = NUM_CTX - SAFETY_MARGIN
|
||||
MAX_SOURCE_CHARS = MAX_SOURCE_TOKENS * 4
|
||||
|
||||
if len(source_text) > MAX_SOURCE_CHARS:
|
||||
die("input", f"source too large ({len(source_text)} chars, limit ~{MAX_SOURCE_CHARS}). "
|
||||
f"Use the SPLIT directive or divide the document.")
|
||||
|
||||
# --- read existing index to avoid duplicate slugs ---
|
||||
existing_entities = set()
|
||||
existing_concepts = set()
|
||||
if os.path.isfile("wiki/index.md"):
|
||||
try:
|
||||
with open("wiki/index.md", "r", encoding="utf-8") as f:
|
||||
idx_text = f.read()
|
||||
# extract slugs from [[entities/slug]] and [[concepts/slug]] patterns
|
||||
for m in re.finditer(r"\[\[entities/([a-z0-9\-]+)\]\]", idx_text):
|
||||
existing_entities.add(m.group(1))
|
||||
for m in re.finditer(r"\[\[concepts/([a-z0-9\-]+)\]\]", idx_text):
|
||||
existing_concepts.add(m.group(1))
|
||||
except Exception:
|
||||
pass # index not readable or not found; that's OK
|
||||
|
||||
|
||||
def slugify(s):
|
||||
s = re.sub(r"[^a-z0-9]+", "-", (s or "").strip().lower())
|
||||
return re.sub(r"-+", "-", s).strip("-") or "untitled"
|
||||
|
||||
|
||||
def twords(s, n=20):
|
||||
"""Truncate at n words; used for index entry summaries."""
|
||||
s = " ".join((s or "").split())
|
||||
w = s.split(" ")
|
||||
return s if len(w) <= n else " ".join(w[:n]) + "…"
|
||||
|
||||
|
||||
def yaml_dq(s):
|
||||
"""Render a value as a YAML double-quoted scalar.
|
||||
|
||||
Titles can contain characters that break a bare scalar — most commonly a
|
||||
colon-space ('Conflict: X' would parse as a mapping), but also '#', leading
|
||||
'-'/'?', quotes, etc. Double-quoting and escaping '\\' and '"' makes any
|
||||
title valid YAML (and keeps Obsidian/Dataview/qmd happy). Newlines are
|
||||
collapsed to spaces so the scalar stays on one line.
|
||||
"""
|
||||
s = " ".join((s or "").split())
|
||||
s = s.replace("\\", "\\\\").replace('"', '\\"')
|
||||
return f'"{s}"'
|
||||
|
||||
|
||||
def frontmatter(ptype, title, tags):
|
||||
"""Return YAML frontmatter with title field."""
|
||||
taglist = "[" + ", ".join(sorted(set(t for t in tags if t))) + "]"
|
||||
return ("---\n"
|
||||
f"title: {yaml_dq(title)}\n"
|
||||
f"type: {ptype}\n"
|
||||
f"domain: {genome}\n"
|
||||
"maturity: draft\n"
|
||||
f"last_updated: {TODAY}\n"
|
||||
"private: false\n"
|
||||
f"tags: {taglist}\n"
|
||||
"---\n")
|
||||
|
||||
|
||||
def write_new(path, ptype, title, body, tags):
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(frontmatter(ptype, title, tags))
|
||||
f.write(f"\n# {title}\n\n{body}\n")
|
||||
|
||||
|
||||
def append_section(path, source_slug, body):
|
||||
# never overwrite an existing page: accumulate, attributed to the new source
|
||||
with open(path, "a", encoding="utf-8") as f:
|
||||
f.write(f"\n\n## From [[sources/{source_slug}]]\n\n{body}\n")
|
||||
try: # best-effort bump of last_updated in the existing frontmatter
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
txt = f.read()
|
||||
txt = re.sub(r"(?m)^last_updated:.*$", "last_updated: " + TODAY, txt, count=1)
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(txt)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# --- the semantic contract ---
|
||||
SYSTEM_PROMPT = """You perform the SEMANTIC PASS of a single source into a knowledge wiki.
|
||||
Read the source and return ONLY structured data describing what it contains.
|
||||
You do not write files, you do not produce frontmatter, and you do not invent
|
||||
paths, slugs, branches, commits or PRs — a deterministic script does all of that.
|
||||
|
||||
Rules:
|
||||
- source_summary: a faithful, self-contained summary of the source, in the
|
||||
source's own language. Plain prose, NO markdown headings. 2-4 sentences,
|
||||
with concrete details. Preserve the essence and nuance of the source.
|
||||
- key_points: 3-5 concrete facts or claims worth indexing; no padding.
|
||||
- entities: every person, tool, organisation or product the source names.
|
||||
kind is one of person|tool|org|product. description is 2-3 factual sentences
|
||||
with specifics. No markdown headings inside the description.
|
||||
- concepts: every pattern, theory, decision or named idea the source explains.
|
||||
description is 2-3 factual sentences with concrete examples or context.
|
||||
- contradictions: ONLY when the source makes a claim that directly contradicts a
|
||||
widely-known fact or contradicts itself. Otherwise return an empty list.
|
||||
- Names must be the natural name of the thing; the script will normalise them.
|
||||
|
||||
If the source references an entity or concept already in the wiki (see the list below),
|
||||
use the EXACT name already present; do not invent a variant. This prevents duplicates.
|
||||
|
||||
Existing entities in this genome:
|
||||
{existing_entities}
|
||||
|
||||
Existing concepts in this genome:
|
||||
{existing_concepts}
|
||||
|
||||
Be faithful to the source. Be specific. Do not pad or improvise."""
|
||||
|
||||
|
||||
# --- JSON schema -> constrained decoding (Ollama structured outputs) ---
|
||||
SCHEMA = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"source_title": {"type": "string"},
|
||||
"source_summary": {"type": "string"},
|
||||
"key_points": {"type": "array", "items": {"type": "string"}},
|
||||
"entities": {"type": "array", "items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {"type": "string"},
|
||||
"kind": {"type": "string",
|
||||
"enum": ["person", "tool", "org", "product"]},
|
||||
"description": {"type": "string"},
|
||||
},
|
||||
"required": ["name", "description"],
|
||||
}},
|
||||
"concepts": {"type": "array", "items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {"type": "string"},
|
||||
"description": {"type": "string"},
|
||||
},
|
||||
"required": ["name", "description"],
|
||||
}},
|
||||
"contradictions": {"type": "array", "items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"concept": {"type": "string"},
|
||||
"description": {"type": "string"},
|
||||
},
|
||||
"required": ["concept", "description"],
|
||||
}},
|
||||
"reasoning": {"type": "string"},
|
||||
"pr_summary": {"type": "string"},
|
||||
},
|
||||
"required": ["source_title", "source_summary", "entities", "concepts"],
|
||||
}
|
||||
|
||||
|
||||
def call_model(max_retries=2, base_delay=2.0):
|
||||
"""Call Ollama with retry on transient errors (connection, timeout, malformed JSON).
|
||||
Retries up to max_retries times with exponential backoff. Does NOT retry on
|
||||
content errors (schema violations, empty response) — those are model issues."""
|
||||
existing_ents = ", ".join(sorted(existing_entities)) or "(none yet)"
|
||||
existing_conc = ", ".join(sorted(existing_concepts)) or "(none yet)"
|
||||
prompt = SYSTEM_PROMPT.format(existing_entities=existing_ents, existing_concepts=existing_conc)
|
||||
|
||||
user_content = (
|
||||
("REVISION REQUESTED BY THE MAINTAINER (address this explicitly):\n"
|
||||
+ FEEDBACK + "\n\n") if FEEDBACK else ""
|
||||
) + (
|
||||
"Source path: " + raw_rel + "\n\n--- SOURCE START ---\n"
|
||||
+ source_text + "\n--- SOURCE END ---\n\nReturn the JSON now."
|
||||
)
|
||||
|
||||
payload = {
|
||||
"model": MODEL,
|
||||
"messages": [
|
||||
{"role": "system", "content": prompt},
|
||||
{"role": "user", "content": user_content },
|
||||
],
|
||||
"format": SCHEMA,
|
||||
"stream": False,
|
||||
"options": {"temperature": 0.2, "repeat_penalty": 1.0, "num_ctx": NUM_CTX},
|
||||
}
|
||||
if THINK is not None:
|
||||
payload["think"] = THINK.strip().lower() in ("1", "true", "yes", "on")
|
||||
data = json.dumps(payload).encode("utf-8")
|
||||
|
||||
last_error = None
|
||||
for attempt in range(max_retries + 1):
|
||||
if attempt > 0:
|
||||
delay = base_delay * (2 ** (attempt - 1))
|
||||
print(f"call_model: retry {attempt}/{max_retries} after {delay}s: {last_error}", file=sys.stderr)
|
||||
time.sleep(delay)
|
||||
|
||||
req = urllib.request.Request(OLLAMA_URL, data=data, headers={"Content-Type": "application/json"})
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
|
||||
resp = json.loads(r.read().decode("utf-8"))
|
||||
except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError) as e:
|
||||
last_error = f"connection/transport error: {e}"; continue
|
||||
except json.JSONDecodeError as e:
|
||||
last_error = f"invalid JSON from Ollama API: {e}"; continue
|
||||
|
||||
content = ((resp.get("message") or {}).get("content") or "").strip()
|
||||
if content.startswith("```"):
|
||||
content = content.strip("`")
|
||||
brace = content.find("{")
|
||||
if brace >= 0:
|
||||
content = content[brace:]
|
||||
try:
|
||||
return json.loads(content)
|
||||
except json.JSONDecodeError as e:
|
||||
last_error = f"model did not return valid JSON: {e}"
|
||||
if len(content) < 10:
|
||||
continue # likely truncated -> retry
|
||||
break # long but malformed -> model issue, stop
|
||||
|
||||
die("model", last_error or "model call failed after retries")
|
||||
|
||||
# --- run the semantic pass ---
|
||||
sem = call_model()
|
||||
|
||||
# Source of truth: slug from slug.sh --raw (deterministic, path-aware, collision-proof)
|
||||
source_slug = subprocess.check_output(
|
||||
["bash", os.path.join(os.path.dirname(__file__), "slug.sh"), "--raw", raw_rel],
|
||||
text=True
|
||||
).strip()
|
||||
|
||||
with open(raw_rel, "rb") as f:
|
||||
src_sha = hashlib.sha256(f.read()).hexdigest()
|
||||
|
||||
pages = []
|
||||
|
||||
# 1. source page — canonical summary of THIS source (re)written
|
||||
src_path = f"wiki/sources/{source_slug}.md"
|
||||
src_status = "modified" if os.path.exists(src_path) else "created"
|
||||
kp_lines = "\n".join("- " + p for p in (sem.get("key_points") or []) if p.strip())
|
||||
src_body = (sem.get("source_summary") or "").strip()
|
||||
if kp_lines:
|
||||
src_body += "\n\n## Key points\n\n" + kp_lines
|
||||
src_body += f"\n\n## Source\n\n- [[{raw_rel}]]\n"
|
||||
src_title = sem.get('source_title') or source_slug
|
||||
src_tags = ([slugify(e.get("name", "")) for e in sem.get("entities", [])]
|
||||
+ [slugify(c.get("name", "")) for c in sem.get("concepts", [])])[:8]
|
||||
os.makedirs("wiki/sources", exist_ok=True)
|
||||
with open(src_path, "w", encoding="utf-8") as f:
|
||||
fm = frontmatter("source", src_title, src_tags)
|
||||
# Inject tracking fields before the closing '---' (first newline-dash-dash-dash-newline)
|
||||
fm = fm.replace("\n---\n", f"\nsource_path: {raw_rel}\nsource_sha256: {src_sha}\n---\n", 1)
|
||||
f.write(fm)
|
||||
f.write(f"\n# {src_title}\n\n{src_body}\n")
|
||||
pages.append({"path": src_path,
|
||||
"summary": twords(src_title),
|
||||
"maturity": "draft", "status": src_status})
|
||||
|
||||
|
||||
def handle(kind_dir, ptype, items):
|
||||
for it in items or []:
|
||||
name = (it.get("name") or "").strip()
|
||||
if not name:
|
||||
continue
|
||||
slug = slugify(name)
|
||||
path = f"wiki/{kind_dir}/{slug}.md"
|
||||
desc = (it.get("description") or "").strip()
|
||||
if os.path.exists(path):
|
||||
append_section(path, source_slug, desc)
|
||||
pages.append({"path": path, "summary": twords(desc), "status": "modified"})
|
||||
else:
|
||||
body = desc + f"\n\n## Sources\n\n- [[sources/{source_slug}]]\n"
|
||||
write_new(path, ptype, name, body, [genome, ptype])
|
||||
pages.append({"path": path, "summary": twords(desc),
|
||||
"maturity": "draft", "status": "created"})
|
||||
|
||||
|
||||
# 2. entities, 3. concepts
|
||||
handle("entities", "entity", sem.get("entities", []))
|
||||
handle("concepts", "concept", sem.get("concepts", []))
|
||||
|
||||
# 4. contradictions -> conflict pages (run-ingest routes wiki/queries/conflict-*)
|
||||
conflicts = sem.get("contradictions") or []
|
||||
conf_slugs = []
|
||||
for c in conflicts:
|
||||
cslug = slugify(c.get("concept", "unknown"))
|
||||
conf_slugs.append(cslug)
|
||||
path = f"wiki/queries/conflict-{cslug}-{TODAY}.md"
|
||||
write_new(path, "query", f"Conflict: {c.get('concept', '')}",
|
||||
(c.get("description") or "").strip()
|
||||
+ f"\n\n## Source\n\n- [[sources/{source_slug}]]\n",
|
||||
[genome, "conflict"])
|
||||
pages.append({"path": path, "summary": "", "maturity": "draft",
|
||||
"status": "created"})
|
||||
|
||||
contradictions_str = ("None" if not conflicts
|
||||
else f"{len(conflicts)} conflict file(s) created — "
|
||||
+ ", ".join(conf_slugs))
|
||||
|
||||
# --- write the manifest in EXACTLY run-ingest.sh's schema ---
|
||||
manifest = {
|
||||
"raw_source": raw_rel,
|
||||
"reasoning": sem.get("reasoning") or ("Ingest of " + raw_rel),
|
||||
"pr_summary": sem.get("pr_summary") or ("Semantic ingest of " + raw_rel),
|
||||
"contradictions": contradictions_str,
|
||||
"pages": pages,
|
||||
}
|
||||
with open(".ingest-manifest.json", "w", encoding="utf-8") as f:
|
||||
json.dump(manifest, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(json.dumps({"status": "ok", "stage": "semantic",
|
||||
"pages": len(pages), "model": MODEL,
|
||||
"manifest": ".ingest-manifest.json"}))
|
||||
|
|
@ -1,57 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# skills/ingest/scripts/log-append.sh
|
||||
# Append one entry to the append-only ledger wiki/log.md, in the exact format
|
||||
# defined by AGENTS.md / wiki-log.md. Generates run_id. Never edits prior entries.
|
||||
#
|
||||
# log-append.sh --type INGEST --subject "<slug>" --model "<model>" \
|
||||
# --context "[[raw/x]]" --output "[[sources/x]]" \
|
||||
# --reasoning "One sentence."
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
LOG_FILE="${LOG_FILE:-wiki/log.md}"
|
||||
|
||||
type="" subject="" model="" context="" output="" reasoning=""
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--type) type="$2"; shift 2 ;;
|
||||
--subject) subject="$2"; shift 2 ;;
|
||||
--model) model="$2"; shift 2 ;;
|
||||
--context) context="$2"; shift 2 ;;
|
||||
--output) output="$2"; shift 2 ;;
|
||||
--reasoning) reasoning="$2"; shift 2 ;;
|
||||
--run-id) run_id_arg="$2"; shift 2 ;;
|
||||
*) echo "log-append: unknown arg: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
: "${type:?--type required}"
|
||||
: "${subject:?--subject required}"
|
||||
|
||||
case "$type" in
|
||||
INGEST|LINT|QUERY|CONFLICT|CONFIG|SECURITY) ;;
|
||||
*) echo "log-append: invalid TYPE '${type}'" >&2; exit 1 ;;
|
||||
esac
|
||||
|
||||
[[ -f "$LOG_FILE" ]] || { echo "log-append: not found: $LOG_FILE" >&2; exit 1; }
|
||||
|
||||
run_id="${run_id_arg:-$(uuidgen 2>/dev/null || cat /proc/sys/kernel/random/uuid 2>/dev/null || python3 -c 'import uuid; print(uuid.uuid4())')}"
|
||||
today="$(date +%Y-%m-%d)"
|
||||
|
||||
if grep -qF "run_id: \`${run_id}\`" "$LOG_FILE" 2>/dev/null; then
|
||||
echo "log-append: run_id ${run_id} already present — skipping (idempotent)" >&2
|
||||
echo "run_id=${run_id}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
{
|
||||
printf '\n## [%s] %s | %s\n\n' "$today" "$type" "$subject"
|
||||
printf -- '- run_id: `%s`\n' "$run_id"
|
||||
printf -- '- model: `%s`\n' "${model:-unknown}"
|
||||
printf -- '- context_read: %s\n' "${context:-*(none)*}"
|
||||
printf -- '- output_written: %s\n' "${output:-*(none)*}"
|
||||
printf -- '- reasoning: %s\n' "${reasoning:-No reasoning provided.}"
|
||||
} >> "$LOG_FILE"
|
||||
|
||||
echo "run_id=${run_id}"
|
||||
|
|
@ -1,129 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# skills/ingest/scripts/open-pr.sh
|
||||
# Branch, commit (conventional), push, and open a Forgejo PR for the wiki/ changes.
|
||||
# Mirrors the API conventions of providers/forgejo.sh (token auth + http_code).
|
||||
# Runs inside the genome checkout (cwd = genome root). Never touches main.
|
||||
#
|
||||
# open-pr.sh --slug <slug> --title "feat: ingest <slug>" --body-file <path> \
|
||||
# [--base main] [--label CONFLICT]
|
||||
#
|
||||
# Requires env: FORGEJO_URL, FORGEJO_USER, FORGEJO_TOKEN.
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
: "${FORGEJO_URL:?missing FORGEJO_URL}"
|
||||
: "${FORGEJO_USER:?missing FORGEJO_USER}"
|
||||
: "${FORGEJO_TOKEN:?missing FORGEJO_TOKEN}"
|
||||
|
||||
slug="" title="" body_file="" base="main" label="" branch=""
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--slug) slug="$2"; shift 2 ;;
|
||||
--branch) branch="$2"; shift 2 ;;
|
||||
--title) title="$2"; shift 2 ;;
|
||||
--body-file) body_file="$2"; shift 2 ;;
|
||||
--base) base="$2"; shift 2 ;;
|
||||
--label) label="$2"; shift 2 ;;
|
||||
*) echo "open-pr: unknown arg: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
: "${title:?--title required}"
|
||||
: "${body_file:?--body-file required}"
|
||||
[[ -f "$body_file" ]] || { echo "open-pr: body file not found: $body_file" >&2; exit 1; }
|
||||
|
||||
# --branch overrides the default; otherwise derive the ingest branch from --slug.
|
||||
# (run-prune passes its own chore/prune-orphans-* branch; run-ingest passes --slug.)
|
||||
if [[ -z "$branch" ]]; then
|
||||
: "${slug:?--slug or --branch required}"
|
||||
branch="feat/ai-ingest-${slug}"
|
||||
fi
|
||||
repo="$(basename -s .git "$(git config --get remote.origin.url)")"
|
||||
|
||||
# 1. Branch + commit + push (AGENTS.md rule 5: never commit to main)
|
||||
# Rolling PR: -C force-resets the branch label to the current base (we are on it after
|
||||
# clean_start) and CARRIES the freshly-written wiki/ changes, so a re-ingest of the same
|
||||
# source rebuilds the branch cleanly instead of hitting a dirty-switch refusal.
|
||||
git switch -C "$branch"
|
||||
git add wiki/
|
||||
# Scope BOTH the emptiness check and the commit to wiki/ — never commit anything that
|
||||
# happened to be staged outside wiki/ (a stray hook, an aborted prior run, etc.).
|
||||
if git diff --cached --quiet -- wiki/; then
|
||||
echo "open-pr: nothing staged under wiki/ — aborting" >&2
|
||||
exit 1
|
||||
fi
|
||||
git commit -m "$title" -- wiki/
|
||||
# Try a normal push (new branch / fast-forward). If the branch was rebuilt from base and
|
||||
# diverged, force-with-lease updates the open PR in place — the lease refuses to clobber if
|
||||
# origin moved unexpectedly since our fetch, so concurrent work is never lost.
|
||||
git push -u origin "$branch" 2>/dev/null || git push -u --force-with-lease origin "$branch"
|
||||
|
||||
# DRY_RUN: local git work done; skip the Forgejo API (offline tests).
|
||||
if [[ -n "${DRY_RUN:-}" ]]; then
|
||||
echo "PR opened: DRY-RUN ${branch} -> ${base}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 2. Open the PR via Forgejo API (jq builds the JSON safely)
|
||||
# TODO: Forgejo-only. When registry.sh/globals.env sets PROVIDER=github, branch on
|
||||
# $PROVIDER here and delegate to providers/github.sh (same token + http_code contract).
|
||||
body="$(cat "$body_file")"
|
||||
payload="$(jq -n --arg head "$branch" --arg base "$base" \
|
||||
--arg title "$title" --arg body "$body" \
|
||||
'{head:$head, base:$base, title:$title, body:$body}')"
|
||||
|
||||
resp="$(curl --max-time 30 -s -w '\n%{http_code}' \
|
||||
-H "Authorization: token ${FORGEJO_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-X POST "${FORGEJO_URL}/api/v1/repos/${FORGEJO_USER}/${repo}/pulls" \
|
||||
-d "$payload")"
|
||||
|
||||
# curl -w appends '\n<code>' AFTER the body, so the code is always the final line and the
|
||||
# body is everything before it. Parameter expansion (no subshells), robust to multi-line JSON.
|
||||
code="${resp##*$'\n'}"
|
||||
json="${resp%$'\n'*}"
|
||||
|
||||
case "$code" in
|
||||
201)
|
||||
url="$(printf '%s' "$json" | jq -r '.html_url')"
|
||||
number="$(printf '%s' "$json" | jq -r '.number')"
|
||||
echo "PR opened: ${url}"
|
||||
;;
|
||||
409)
|
||||
# PR already exists — fetch it so the orchestrator still gets the URL.
|
||||
existing="$(curl --max-time 15 -s -H "Authorization: token ${FORGEJO_TOKEN}" \
|
||||
"${FORGEJO_URL}/api/v1/repos/${FORGEJO_USER}/${repo}/pulls?state=open" \
|
||||
| jq -r --arg b "$branch" '.[] | select(.head.ref==$b) | .html_url' | head -n1)"
|
||||
if [[ -n "$existing" && "$existing" != "null" ]]; then
|
||||
echo "PR opened: ${existing}"
|
||||
else
|
||||
echo "open-pr: a PR for '${branch}' already exists (push updated the branch)." >&2
|
||||
fi
|
||||
exit 0
|
||||
;;
|
||||
401)
|
||||
echo "open-pr: unauthorized — check FORGEJO_TOKEN (n8n-bot)." >&2
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
echo "open-pr: Forgejo API HTTP ${code}: ${json}" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
# 3. Optional label (e.g. CONFLICT). Best-effort; non-fatal.
|
||||
if [[ -n "$label" && -n "${number:-}" ]]; then
|
||||
label_id="$(curl --max-time 15 -s -H "Authorization: token ${FORGEJO_TOKEN}" \
|
||||
"${FORGEJO_URL}/api/v1/repos/${FORGEJO_USER}/${repo}/labels" \
|
||||
| jq -r --arg n "$label" '.[] | select(.name==$n) | .id' | head -n1)"
|
||||
if [[ -n "$label_id" && "$label_id" != "null" ]]; then
|
||||
curl --max-time 15 -s -o /dev/null \
|
||||
-H "Authorization: token ${FORGEJO_TOKEN}" -H "Content-Type: application/json" \
|
||||
-X POST "${FORGEJO_URL}/api/v1/repos/${FORGEJO_USER}/${repo}/issues/${number}/labels" \
|
||||
-d "{\"labels\":[${label_id}]}" \
|
||||
&& echo "label '${label}' applied" >&2
|
||||
else
|
||||
echo "open-pr: label '${label}' not found in repo — skipped." >&2
|
||||
fi
|
||||
fi
|
||||
|
|
@ -1,35 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# orphan-wiki.sh — find source pages whose raw source no longer exists.
|
||||
# Reads source_path from each wiki/sources/*.md frontmatter. If the raw is gone,
|
||||
# the page is orphaned. Emits JSON envelope: {status, genome, count, files[], detail[]}.
|
||||
# Read-only: no lock needed (same policy as pending-raw).
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
genome="${1:?usage: orphan-wiki.sh <genome>}"
|
||||
base_dir="${GENOMES_ROOT:-${HOME}/genomes}"
|
||||
cd "${base_dir}/${genome}" 2>/dev/null || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
|
||||
|
||||
# Clean start on the configured base (single source of truth in lib/clean-start.sh).
|
||||
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
|
||||
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|
||||
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
|
||||
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
|
||||
|
||||
declare -a ORPH=()
|
||||
for page in wiki/sources/*.md; do
|
||||
[[ -e "$page" ]] || continue
|
||||
sp="$(sed -n 's/^source_path:[[:space:]]*//p' "$page" | tr -d '\r' | head -n1)"
|
||||
# Pages without source_path are pre-Step-2 legacy: ignore, don't false-positive.
|
||||
[[ -n "$sp" ]] || continue
|
||||
[[ -f "$sp" ]] || ORPH+=("$page")
|
||||
done
|
||||
|
||||
if [[ ${#ORPH[@]} -eq 0 ]]; then
|
||||
echo '{"status":"ok","genome":"'"$genome"'","count":0,"files":[],"detail":[]}'
|
||||
else
|
||||
for x in "${ORPH[@]}"; do printf '%s\torphan\n' "$x"; done \
|
||||
| jq -R 'split("\t") | {path: .[0], reason: .[1]}' \
|
||||
| jq -s --arg g "$genome" '{status:"ok", genome:$g, count:length, files:[.[].path], detail:.}'
|
||||
fi
|
||||
|
|
@ -1,64 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# pending-raw.sh — deterministic "what needs ingesting" calculator.
|
||||
# Reads the clean base checkout and classifies each raw/articles/*.md as:
|
||||
# new -> no wiki/sources/<slug>.md
|
||||
# modified -> page exists but its source_sha256 != current file hash
|
||||
# Emits the same JSON envelope as changed-raw (drop-in), plus detail[] for ntfy.
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
genome="${1:?usage: pending-raw.sh <genome>}"
|
||||
base_dir="${GENOMES_ROOT:-${HOME}/genomes}"
|
||||
cd "${base_dir}/${genome}" 2>/dev/null || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
|
||||
|
||||
# Clean start on the configured base (single source of truth in lib/clean-start.sh).
|
||||
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
|
||||
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|
||||
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
|
||||
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
SLUG="${SCRIPT_DIR}/slug.sh"
|
||||
|
||||
declare -a NEW=()
|
||||
declare -a MOD=()
|
||||
declare -A SEEN_SLUG=()
|
||||
|
||||
if [[ -d raw/articles ]]; then
|
||||
while IFS= read -r -d '' f; do
|
||||
rel="${f#./}"
|
||||
case "$rel" in
|
||||
*/.stfolder/*|*/.stignore|*/.gitkeep) continue ;;
|
||||
esac
|
||||
slug="$("$SLUG" --raw "$rel")" || continue
|
||||
|
||||
# Residual collision (two distinct raws -> same slug): warn, do not silence.
|
||||
if [[ -n "${SEEN_SLUG[$slug]:-}" && "${SEEN_SLUG[$slug]}" != "$rel" ]]; then
|
||||
logger -t pending-raw "warn: slug collision ${slug}: ${SEEN_SLUG[$slug]} <-> ${rel}"
|
||||
fi
|
||||
SEEN_SLUG[$slug]="$rel"
|
||||
|
||||
page="wiki/sources/${slug}.md"
|
||||
if [[ ! -f "$page" ]]; then
|
||||
NEW+=("$rel")
|
||||
else
|
||||
cur="$(sha256sum "$rel" | cut -d' ' -f1)"
|
||||
rec="$(sed -n 's/^source_sha256:[[:space:]]*//p' "$page" | tr -d '\r' | head -n1)"
|
||||
if [[ "$cur" != "$rec" ]]; then
|
||||
MOD+=("$rel")
|
||||
fi
|
||||
fi
|
||||
done < <(find raw/articles -type f -name '*.md' -print0 2>/dev/null)
|
||||
fi
|
||||
|
||||
if [[ ${#NEW[@]} -eq 0 && ${#MOD[@]} -eq 0 ]]; then
|
||||
echo '{"status":"ok","genome":"'"$genome"'","count":0,"files":[],"detail":[]}'
|
||||
else
|
||||
{
|
||||
for x in "${NEW[@]}"; do printf '%s\tnew\n' "$x"; done
|
||||
for x in "${MOD[@]}"; do printf '%s\tmodified\n' "$x"; done
|
||||
} | jq -R 'split("\t") | {path: .[0], reason: .[1]}' \
|
||||
| jq -s --arg g "$genome" \
|
||||
'{status: "ok", genome: $g, count: length, files: [.[].path], detail: .}'
|
||||
fi
|
||||
|
|
@ -1,174 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# skills/ingest/scripts/run-ingest.sh
|
||||
# Post-semantic orchestrator. Runs OUTSIDE the model, on vm101, in the genome
|
||||
# checkout. Consumes .ingest-manifest.json (written by ingest-semantic.py) and
|
||||
# performs every deterministic step — index, log, scoped lint, PR.
|
||||
#
|
||||
# run-ingest.sh <genome_name> [manifest_path]
|
||||
#
|
||||
# Emits a single JSON result line on stdout for n8n to parse.
|
||||
#
|
||||
# every page listed in the manifest must exist on disk before we trust the run.
|
||||
# Everything else is unchanged: the manifest the semantic phase now produces is
|
||||
# already in this script's expected schema.
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
genome="${1:?usage: run-ingest.sh <genome> [manifest]}"
|
||||
manifest="${2:-.ingest-manifest.json}"
|
||||
SCRIPTS="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
fail() {
|
||||
jq -nc --arg stage "$1" --arg reason "$2" \
|
||||
'{status:"error", stage:$stage, reason:$reason}'
|
||||
exit 1
|
||||
}
|
||||
|
||||
command -v jq >/dev/null 2>&1 || { echo '{"status":"error","reason":"jq missing"}'; exit 1; }
|
||||
command -v python3 >/dev/null 2>&1 || fail "deps" "python3 missing (needed by index-append.py)"
|
||||
[[ -f "$manifest" ]] || fail "manifest" "manifest not found: ${manifest}"
|
||||
|
||||
# --- validate the manifest BEFORE trusting any field (LLM output is stochastic) ---
|
||||
# 1) well-formed JSON object with a string raw_source and an array of pages
|
||||
jq -e 'type=="object" and (.raw_source|type=="string") and (.pages|type=="array")' \
|
||||
"$manifest" >/dev/null 2>&1 \
|
||||
|| fail "manifest" "invalid manifest: need object with string raw_source and array pages"
|
||||
|
||||
# 2) every page.path must be a string, live under wiki/, and contain no '..' (no traversal)
|
||||
if jq -e '[.pages[].path
|
||||
| select((type!="string") or (startswith("wiki/")|not) or contains(".."))]
|
||||
| length > 0' "$manifest" >/dev/null 2>&1; then
|
||||
fail "manifest" "unsafe page path (must be a string under wiki/, no '..')"
|
||||
fi
|
||||
|
||||
# --- read manifest scalars ---
|
||||
raw_source="$(jq -r '.raw_source' "$manifest")"
|
||||
# model name comes from the orchestrator/wrapper (INGEST_MODEL); the agent cannot know its
|
||||
# own tag, so we do not trust a self-reported manifest field. Fall back only if unset.
|
||||
model="${INGEST_MODEL:-$(jq -r '.model // "unknown"' "$manifest")}"
|
||||
reasoning="$(jq -r '.reasoning // "Ingest."' "$manifest")"
|
||||
pr_summary="$(jq -r '.pr_summary // "Ingest."' "$manifest")"
|
||||
contradictions="$(jq -r '.contradictions // "None"' "$manifest")"
|
||||
|
||||
[[ -n "$raw_source" && "$raw_source" != "null" ]] || fail "manifest" "raw_source missing"
|
||||
|
||||
slug="$(bash "${SCRIPTS}/slug.sh" --raw "$raw_source")" || fail "slug" "empty or invalid slug for ${raw_source}"
|
||||
|
||||
# --- collect touched paths ---
|
||||
mapfile -t created_paths < <(jq -r '.pages[] | select(.status=="created") | .path' "$manifest")
|
||||
mapfile -t modified_paths < <(jq -r '.pages[] | select(.status=="modified") | .path' "$manifest")
|
||||
all_paths=( "${created_paths[@]}" "${modified_paths[@]}" )
|
||||
[[ ${#all_paths[@]} -gt 0 ]] || fail "manifest" "no pages reported"
|
||||
|
||||
# --- the semantic phase (ingest-semantic.py) writes the files; verify
|
||||
# every manifest page actually exists on disk before trusting the run. Catches any
|
||||
# drift between what the manifest claims and what was really written. ---
|
||||
for _p in "${all_paths[@]}"; do
|
||||
[[ -f "$_p" ]] || fail "pages" "manifest lists a file not present on disk: ${_p}"
|
||||
done
|
||||
|
||||
conflict_label=""
|
||||
|
||||
# NOTE: No rollback. The steps below modify the working tree in order (index → log → commit).
|
||||
# All steps are idempotent on re-run EXCEPT log-append (append-only). If a step fails midway,
|
||||
# nothing is committed (open-pr is the only committer) — the operator re-runs, or checks
|
||||
# wiki/ if log-append has already written a line. The manifest is removed only upon full success.
|
||||
# log-append is not idempotent: a re-run after a post-log failure produces
|
||||
# duplicate lines. This is accepted by design (append-only ledger, no rollback). If this
|
||||
# becomes a nuisance tomorrow, add a dedup check on run_id in log-append.sh
|
||||
# (grep for run_id before appending). Manual recovery: grep for run_id in wiki/log.md.
|
||||
|
||||
# --- 1. index entries (created pages only), inserted in order ---
|
||||
while IFS=$'\t' read -r path summary maturity; do
|
||||
[[ -z "$path" ]] && continue
|
||||
link="${path#wiki/}"; link="${link%.md}" # e.g. sources/foo
|
||||
folder="${link%%/*}"
|
||||
case "$folder" in
|
||||
sources) section="Sources" ;;
|
||||
entities) section="Entities" ;;
|
||||
concepts) section="Concepts" ;;
|
||||
queries)
|
||||
if [[ "$link" == queries/conflict-* ]]; then section="Conflicts"; conflict_label="CONFLICT"
|
||||
else section="Queries"; fi ;;
|
||||
# private/ is not routed here — ingest is public-only. Add when private ingest is built.
|
||||
*) section="Sources" ;;
|
||||
esac
|
||||
|
||||
if [[ "$section" == "Conflicts" ]]; then
|
||||
entry="- [[${link}]]" # conflicts: slug only
|
||||
else
|
||||
entry="- [[${link}]] — ${summary} \`maturity: ${maturity}\`"
|
||||
fi
|
||||
|
||||
python3 "${SCRIPTS}/index-append.py" --section "$section" --entry "$entry" \
|
||||
|| fail "index" "index-append failed for ${path}"
|
||||
done < <(jq -r '.pages[] | select(.status=="created")
|
||||
| [.path, (.summary // ""), (.maturity // "draft")] | @tsv' "$manifest")
|
||||
|
||||
# --- 2. log entry ---
|
||||
# Stable run_id: deterministic from the input (raw path + content hash). Survives wrapper
|
||||
# re-runs and makes the append-only log idempotent (paired with the guard in log-append.sh).
|
||||
src_sha="$(sha256sum "$raw_source" 2>/dev/null | cut -d' ' -f1)" || src_sha="unknown"
|
||||
run_id="$(printf '%s' "${raw_source}:${src_sha}" | sha256sum | cut -c1-16)"
|
||||
out="$(jq -r '[.pages[].path | "[[" + (sub("^wiki/";"") | sub("\\.md$";"")) + "]]"] | join(", ")' "$manifest")"
|
||||
bash "${SCRIPTS}/log-append.sh" --run-id "$run_id" --type INGEST --subject "$slug" --model "$model" \
|
||||
--context "[[${raw_source}]]" --output "${out:-*(none)*}" --reasoning "$reasoning" \
|
||||
|| fail "log" "log-append failed"
|
||||
|
||||
# --- 3. scoped linter (capture findings for the PR; never aborts the run) ---
|
||||
# Point scoped-lint at the same manifest we were handed so its duplicate
|
||||
# advisory reads the right file even when a non-default path arrives as $2.
|
||||
# (The dedup check lives inside lib/lint.sh and is invoked by scoped-lint —
|
||||
# there is no separate check-duplicates.sh script.)
|
||||
export INGEST_MANIFEST="$manifest"
|
||||
lint_out="$(
|
||||
bash "${SCRIPTS}/scoped-lint.sh" "$genome" "${all_paths[@]}" 2>&1
|
||||
)" && lint_rc=0 || lint_rc=$?
|
||||
|
||||
# --- 4. assemble the PR body (manifest tables + lint results) ---
|
||||
body="$(mktemp)"
|
||||
trap 'rm -f "$body"' EXIT # auto-clean on any exit (success, fail(), or crash)
|
||||
|
||||
{
|
||||
echo "<!-- kg:raw=${raw_source} -->" # marker for the rejection loop (invisible in the render)
|
||||
echo "## Summary"
|
||||
echo "$pr_summary"
|
||||
echo ""
|
||||
echo "## Pages"
|
||||
echo "| Path | Status | Maturity |"
|
||||
echo "|------|--------|----------|"
|
||||
jq -r '.pages[] | "| `\(.path)` | \(.status) | \(.maturity // "draft") |"' "$manifest"
|
||||
echo ""
|
||||
echo "## Contradictions"
|
||||
echo "$contradictions"
|
||||
echo ""
|
||||
echo "## Scoped Lint (post-ingest)"
|
||||
echo '```'
|
||||
echo "$lint_out"
|
||||
echo '```'
|
||||
} > "$body"
|
||||
|
||||
# --- 5. open the PR ---
|
||||
pr_args=( --slug "$slug" --title "feat: ingest ${slug}" --body-file "$body" --base "${INGEST_BASE:-main}" )
|
||||
[[ -n "$conflict_label" ]] && pr_args+=( --label "$conflict_label" )
|
||||
pr_out="$( bash "${SCRIPTS}/open-pr.sh" "${pr_args[@]}" 2>&1 )" && pr_rc=0 || pr_rc=$?
|
||||
pr_url="$(printf '%s\n' "$pr_out" | sed -n 's/^PR opened: //p' | head -n1)"
|
||||
|
||||
# --- final result line for n8n ---
|
||||
jq -nc \
|
||||
--arg status "$([[ $pr_rc -eq 0 ]] && echo ok || echo pr_failed)" \
|
||||
--arg slug "$slug" \
|
||||
--arg pr_url "$pr_url" \
|
||||
--argjson lint_clean "$([[ $lint_rc -eq 0 ]] && echo true || echo false)" \
|
||||
--argjson conflict "$([[ -n "$conflict_label" ]] && echo true || echo false)" \
|
||||
--arg detail "$pr_out" \
|
||||
'{status:$status, slug:$slug, pr_url:$pr_url, lint_clean:$lint_clean, conflict:$conflict, detail:$detail}'
|
||||
|
||||
# The manifest is a single file that is overwritten with each run, but if the process is
|
||||
# completely successful, we remove it to prevent an outdated manifest from being reprocessed by mistake.
|
||||
if [[ $pr_rc -eq 0 ]]; then
|
||||
rm -f "$manifest"
|
||||
else
|
||||
exit 1
|
||||
fi
|
||||
|
|
@ -1,96 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# skills/ingest/scripts/run-prune.sh
|
||||
# Symmetric companion to run-ingest: prune source pages whose raw source no
|
||||
# longer exists. RE-DERIVES the orphan set itself (mirrors orphan-wiki.sh) — it
|
||||
# never trusts a list handed in by n8n, so there is no "detected-vs-pruned"
|
||||
# race. Removes ONLY the pages it derived plus their index entries, commits
|
||||
# ONLY wiki/ on chore/prune-orphans-<date>, and opens a GATED removal PR (the
|
||||
# operator approves the deletion; principle 2). Never deletes of its own accord.
|
||||
#
|
||||
# Runs OUTSIDE the model, on vm101, cwd = genome checkout. The wrapper (`pi
|
||||
# prune`) has already taken the per-genome lock and done clean_start, exactly
|
||||
# like `pi ingest` — so this script does neither.
|
||||
#
|
||||
# run-prune.sh <genome>
|
||||
#
|
||||
# Emits a single JSON result line on stdout for n8n to parse.
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
genome="${1:?usage: run-prune.sh <genome>}"
|
||||
SCRIPTS="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
fail() {
|
||||
jq -nc --arg stage "$1" --arg reason "$2" '{status:"error", stage:$stage, reason:$reason}'
|
||||
exit 1
|
||||
}
|
||||
|
||||
command -v jq >/dev/null 2>&1 || { echo '{"status":"error","reason":"jq missing"}'; exit 1; }
|
||||
command -v python3 >/dev/null 2>&1 || fail "deps" "python3 missing (needed by index-append.py)"
|
||||
|
||||
# --- re-derive orphans (same rule as orphan-wiki.sh; computed fresh, here, now) ---
|
||||
# A wiki/sources/*.md page is orphaned when its frontmatter source_path points at
|
||||
# a raw file that no longer exists. Legacy pages without source_path are ignored.
|
||||
declare -a ORPH=()
|
||||
for page in wiki/sources/*.md; do
|
||||
[[ -e "$page" ]] || continue
|
||||
sp="$(sed -n 's/^source_path:[[:space:]]*//p' "$page" | tr -d '\r' | head -n1)"
|
||||
[[ -n "$sp" ]] || continue
|
||||
[[ -f "$sp" ]] || ORPH+=("$page")
|
||||
done
|
||||
|
||||
if [[ ${#ORPH[@]} -eq 0 ]]; then
|
||||
jq -nc '{status:"ok", count:0, pruned:[], detail:"no orphans"}'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# --- remove each orphan page + its index entry (anti-traversal, wiki/-only) ---
|
||||
declare -a PRUNED=()
|
||||
for page in "${ORPH[@]}"; do
|
||||
case "$page" in
|
||||
wiki/*) : ;;
|
||||
*) fail "prune" "refusing to remove outside wiki/: ${page}" ;;
|
||||
esac
|
||||
case "$page" in *..*) fail "prune" "path traversal in page: ${page}" ;; esac
|
||||
[[ -f "$page" ]] || continue
|
||||
rm -f "$page"
|
||||
link="${page#wiki/}"; link="${link%.md}" # e.g. sources/foo
|
||||
python3 "${SCRIPTS}/index-append.py" --remove "$link" \
|
||||
|| fail "index" "index-append --remove failed for ${link}"
|
||||
PRUNED+=("$link")
|
||||
done
|
||||
|
||||
# --- assemble the PR body ---
|
||||
date_tag="$(date +%F)"
|
||||
body="$(mktemp)"
|
||||
trap 'rm -f "$body"' EXIT
|
||||
{
|
||||
echo "## Prune orphaned sources"
|
||||
echo ""
|
||||
echo "These source pages reference a \`source_path\` whose raw file no longer exists"
|
||||
echo "in \`raw/\`. Removing them keeps the wiki in sync with git (the source of truth)."
|
||||
echo ""
|
||||
echo "| Removed page |"
|
||||
echo "|--------------|"
|
||||
for l in "${PRUNED[@]}"; do echo "| \`wiki/${l}.md\` |"; done
|
||||
} > "$body"
|
||||
|
||||
# --- open the GATED removal PR on a chore/ branch (open-pr --branch override) ---
|
||||
branch="chore/prune-orphans-${date_tag}"
|
||||
pr_out="$( bash "${SCRIPTS}/open-pr.sh" \
|
||||
--branch "$branch" \
|
||||
--title "chore: prune ${#PRUNED[@]} orphaned source(s)" \
|
||||
--body-file "$body" --base "${INGEST_BASE:-main}" 2>&1 )" && pr_rc=0 || pr_rc=$?
|
||||
pr_url="$(printf '%s\n' "$pr_out" | sed -n 's/^PR opened: //p' | head -n1)"
|
||||
|
||||
# --- result line for n8n ---
|
||||
jq -nc \
|
||||
--arg status "$([[ $pr_rc -eq 0 ]] && echo ok || echo pr_failed)" \
|
||||
--argjson count "${#PRUNED[@]}" \
|
||||
--arg pr_url "$pr_url" \
|
||||
--arg detail "$pr_out" \
|
||||
--argjson pruned "$(printf '%s\n' "${PRUNED[@]}" | jq -R . | jq -s .)" \
|
||||
'{status:$status, count:$count, pr_url:$pr_url, pruned:$pruned, detail:$detail}'
|
||||
|
||||
[[ $pr_rc -eq 0 ]] || exit 1
|
||||
|
|
@ -1,62 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# skills/ingest/scripts/scoped-lint.sh
|
||||
# Run the framework's validation on ONLY the files touched this session.
|
||||
# Reuses lib/lint.sh + lib/output.sh — same checks as `make lint`, scoped.
|
||||
#
|
||||
# KG_LIB_DIR=/opt/knowledge-genome-orchestrator/lib \
|
||||
# scoped-lint.sh <genome_name> wiki/sources/x.md wiki/entities/y.md
|
||||
#
|
||||
# Exits non-zero if any hard error is found, so the agent notices.
|
||||
# Findings are printed (stderr from the lint functions + a summary on stdout).
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
: "${KG_LIB_DIR:?set KG_LIB_DIR to the framework lib/ dir (e.g. /opt/knowledge-genome-orchestrator/lib)}"
|
||||
|
||||
# Fail clearly if the lib files are missing, rather than a raw `source: No such file`.
|
||||
for _f in output.sh lint.sh; do
|
||||
[[ -f "${KG_LIB_DIR}/${_f}" ]] || { echo "scoped-lint: missing ${KG_LIB_DIR}/${_f}" >&2; exit 1; }
|
||||
done
|
||||
|
||||
# shellcheck source=/dev/null
|
||||
source "${KG_LIB_DIR}/output.sh"
|
||||
# shellcheck source=/dev/null
|
||||
source "${KG_LIB_DIR}/lint.sh"
|
||||
|
||||
genome="${1:?usage: scoped-lint.sh <genome> <file...>}"
|
||||
shift
|
||||
[[ $# -gt 0 ]] || { echo "scoped-lint: no files given" >&2; exit 1; }
|
||||
|
||||
errors=0
|
||||
stale=0
|
||||
count=$#
|
||||
|
||||
for f in "$@"; do
|
||||
if [[ ! -f "$f" ]]; then
|
||||
warn "scoped-lint: missing file (skipped): $f"
|
||||
continue
|
||||
fi
|
||||
|
||||
lint_markdown_file "$f" "$genome" && fe=0 || fe=$?
|
||||
check_privacy_consistency "$f" && pce=0 || pce=$?
|
||||
check_page_size "$f" && pse=0 || pse=$?
|
||||
errors=$(( errors + fe + pce + pse ))
|
||||
|
||||
check_knowledge_decay "$f" && st=0 || st=$?
|
||||
stale=$(( stale + st ))
|
||||
|
||||
check_broken_links "$f" || true # warnings only
|
||||
done
|
||||
|
||||
# Cross-page duplicate advisory: runs ONCE over the whole manifest (not per
|
||||
# file) — it compares this run's created slugs against the index, so repeating
|
||||
# it for every file would only print the same warnings N times. Warn-only;
|
||||
# never affects the exit status. INGEST_MANIFEST lets run-ingest.sh point us at
|
||||
# a non-default manifest path; falls back to the conventional name otherwise.
|
||||
check_duplicates "${INGEST_MANIFEST:-.ingest-manifest.json}"
|
||||
|
||||
echo ""
|
||||
echo "scoped-lint: ${errors} error(s), ${stale} stale across ${count} file(s)"
|
||||
|
||||
[[ $errors -eq 0 ]]
|
||||
|
|
@ -1,35 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# skills/ingest/scripts/slug.sh
|
||||
# Derive a wiki slug from a path, filename, or title string.
|
||||
# slug.sh "raw/articles/My Source.md" -> my-source
|
||||
# slug.sh "Some Concept Name" -> some-concept-name
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
if [[ "${1:-}" == "--raw" ]]; then
|
||||
raw="${2:?usage: slug.sh --raw <raw/bucket/rel/path>}"
|
||||
rel="${raw#raw/}"; rel="${rel#*/}" # strip "raw/" and the bucket name
|
||||
rel="${rel%.*}" # strip extension
|
||||
slug="$(printf '%s\n' "$rel" | tr '/' '\n' \
|
||||
| sed -E 's/[^a-zA-Z0-9]+/-/g; s/-{2,}/-/g; s/^-+//; s/-+$//' \
|
||||
| tr '[:upper:]' '[:lower:]' | paste -sd- -)"
|
||||
[[ -n "$slug" ]] || { echo "slug: empty result for input '${raw}'" >&2; exit 1; }
|
||||
printf '%s\n' "$slug"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
input="${1:?usage: slug.sh <path-or-title>}"
|
||||
|
||||
# Strip directory and extension when given a path
|
||||
base="${input##*/}"
|
||||
base="${base%.*}"
|
||||
|
||||
slug="$(printf '%s\n' "$base" \
|
||||
| tr '[:upper:]' '[:lower:]' \
|
||||
| sed -E 's/[^a-z0-9]+/-/g; s/-{2,}/-/g; s/^-+//; s/-+$//')"
|
||||
|
||||
# An all-symbols input (e.g. "!!!.md") collapses to "" — refuse rather than emit a
|
||||
# broken/empty slug that would produce an invalid branch name downstream.
|
||||
[[ -n "$slug" ]] || { echo "slug: empty result for input '${input}'" >&2; exit 1; }
|
||||
printf '%s\n' "$slug"
|
||||
|
|
@ -2,11 +2,11 @@
|
|||
|
||||
## Identity
|
||||
|
||||
| Field | Value |
|
||||
| ------ | -------------------------------------------------- |
|
||||
| Genome | `{{GENOME_NAME}}` |
|
||||
| Domain | `{{GENOME_DESC}}` |
|
||||
| Owner | `{{FORGEJO_USER}}` |
|
||||
| Field | Value |
|
||||
|--------|-------|
|
||||
| Genome | `{{GENOME_NAME}}` |
|
||||
| Domain | `{{GENOME_DESC}}` |
|
||||
| Owner | `{{FORGEJO_USER}}` |
|
||||
| Remote | `{{FORGEJO_URL}}/{{FORGEJO_USER}}/{{GENOME_NAME}}` |
|
||||
|
||||
**Role:** Wiki maintainer for `{{GENOME_NAME}}`.
|
||||
|
|
@ -14,28 +14,14 @@
|
|||
|
||||
---
|
||||
|
||||
## Linked Project
|
||||
|
||||
| Field | Value |
|
||||
| --------------- | --------------------- |
|
||||
| Project repo | `{{LINKED_PROJECT}}` |
|
||||
| Branch | `main` |
|
||||
| Allowed tasks | `readme, tests, code` |
|
||||
| Preferred model | `auto` |
|
||||
|
||||
If `Project repo` is `none`, this genome is knowledge-only — phase-2 project work
|
||||
does not apply. When set, after a wiki PR is **merged**, the orchestrator may trigger
|
||||
work on this repo within _Allowed tasks_. The agent never touches the project repo
|
||||
during ingest.
|
||||
|
||||
## PRIVATE_CONTEXT
|
||||
|
||||
**Default: `disabled`** — never infer; require explicit operator declaration per session.
|
||||
|
||||
| State | Behavior |
|
||||
| ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `disabled` | `raw/private/` and `wiki/private/` do not exist. No read, list, grep, or summary on private paths. All outputs safe for collaborators. |
|
||||
| `enabled` | Operator has confirmed `git-crypt unlock` ran on host. Read/write `private/` authorized. All outputs from private data go exclusively to `wiki/private/`. Prefix every response drawing on private data: `[PRIVATE DATA INCLUDED]`. Never leak private synthesis into public wiki paths. |
|
||||
| State | Behavior |
|
||||
|-------|----------|
|
||||
| `disabled` | `raw/private/` and `wiki/private/` do not exist. No read, list, grep, or summary on private paths. All outputs safe for collaborators. |
|
||||
| `enabled` | Operator has confirmed `git-crypt unlock` ran on host. Read/write `private/` authorized. All outputs from private data go exclusively to `wiki/private/`. Prefix every response drawing on private data: `[PRIVATE DATA INCLUDED]`. Never leak private synthesis into public wiki paths. |
|
||||
|
||||
Pre-commit `PLAINTEXT LEAK DETECTED`: stop immediately. Do not use `--no-verify`. Ask operator to verify `.gitattributes` and encryption state.
|
||||
|
||||
|
|
@ -47,15 +33,14 @@ Session end or return to `disabled`: remind operator to run `git-crypt lock` on
|
|||
|
||||
1. `raw/` is read-only. Never create, modify, or delete files in `raw/`.
|
||||
2. `wiki/` is agent-owned. Create, update, and maintain all wiki pages here.
|
||||
3. Every operation → one log entry appended to `wiki/log.md` (§Log) (automated via manifest during Ingest).
|
||||
4. Every new page → one entry appended to `wiki/index.md` (§Index) (automated via manifest during Ingest).
|
||||
3. Every operation → one log entry appended to `wiki/log.md` (§Log).
|
||||
4. Every new page → one entry appended to `wiki/index.md` (§Index).
|
||||
5. Never commit to `main`. Branch per task; PR required; no self-merge.
|
||||
6. Contradict, don't overwrite. New evidence contradicts existing claim → §Conflict.
|
||||
7. Never commit plaintext to any path marked for encryption in `.gitattributes`.
|
||||
8. Every PR must use `templates/pr-description.md`. Do not omit the tabular summary (automated via run-ingest.sh during Ingest).
|
||||
8. Every PR must use `templates/pr-description.md`. Do not omit the tabular summary.
|
||||
|
||||
### NEVER
|
||||
|
||||
- Load `wiki/log.md` in full — read only the tail injected by the orchestrator.
|
||||
- Rewrite `wiki/index.md` to reorder entries — append only; sorting is automated.
|
||||
- Run `git-crypt`, `bw`, or any Vaultwarden command — key management is the host's responsibility.
|
||||
|
|
@ -63,7 +48,6 @@ Session end or return to `disabled`: remind operator to run `git-crypt lock` on
|
|||
- Merge PRs — human approval required.
|
||||
|
||||
### ASK FIRST
|
||||
|
||||
- Deleting any wiki page.
|
||||
- Changing `maturity` from `stable` to `deprecated`.
|
||||
- Writing to `wiki/private/` when PRIVATE_CONTEXT state is ambiguous.
|
||||
|
|
@ -86,32 +70,24 @@ Execute in this order before any file operation:
|
|||
## Workflows
|
||||
|
||||
### Ingest
|
||||
*Triggered by new file in `raw/`.*
|
||||
|
||||
_Triggered by new file in `raw/`._
|
||||
|
||||
**Phase 1 — Semantic Pass (Agent Skill)**
|
||||
1. Read source once.
|
||||
2. Create `wiki/sources/<slug>.md` — summary + key points.
|
||||
3. Per entity (person, tool, org): create or update `wiki/entities/<name>.md`.
|
||||
4. Per concept (pattern, theory, decision): create or update `wiki/concepts/<name>.md`.
|
||||
5. Check each touched page for contradictions → apply §Conflict if found.
|
||||
6. **Final action:** Write `.ingest-manifest.json` at the genome root.
|
||||
7. **STOP.** Do not proceed to index, log, lint, commit, or PR — these are Phase 2.
|
||||
6. Append entry to `wiki/index.md` (bottom of relevant section).
|
||||
7. Append log entry: `INGEST | <slug>`.
|
||||
8. Run scoped lint on pages created or modified in this session. Report issues in PR description. Do not auto-fix.
|
||||
9. Commit on `feat/ai-ingest-<slug>`. Open PR using `templates/pr-description.md`.
|
||||
|
||||
**Phase 2 — Deterministic Post-Processing (`run-ingest.sh`)**
|
||||
_Executed automatically by the orchestrator after Phase 1._
|
||||
8. Append entry to `wiki/index.md` (bottom of relevant section).
|
||||
9. Append log entry: `INGEST | <slug>`.
|
||||
10. Run scoped lint on pages created or modified in this session. Report issues in PR description. Do not auto-fix.
|
||||
11. Commit on `feat/ai-ingest-<slug>`. Open PR using `templates/pr-description.md`.
|
||||
|
||||
_Private source_ (`PRIVATE_CONTEXT: enabled` required):
|
||||
*Private source* (`PRIVATE_CONTEXT: enabled` required):
|
||||
- All output → `wiki/private/<slug>.md` only.
|
||||
- PR title: `[PRIVATE] ingest: <slug>`.
|
||||
|
||||
### Query
|
||||
|
||||
_Triggered by operator question._
|
||||
*Triggered by operator question.*
|
||||
|
||||
1. `qmd search "<query>"` → identify candidate pages.
|
||||
2. Read candidate pages directly.
|
||||
|
|
@ -120,11 +96,10 @@ _Triggered by operator question._
|
|||
5. Append entry to `wiki/index.md` under Queries.
|
||||
6. Append log entry: `QUERY | <subject>`.
|
||||
|
||||
_For general orientation without a specific query: read `wiki/index.md` directly._
|
||||
*For general orientation without a specific query: read `wiki/index.md` directly.*
|
||||
|
||||
### Lint
|
||||
|
||||
_Triggered by operator with bash pre-scan output._
|
||||
*Triggered by operator with bash pre-scan output.*
|
||||
|
||||
Pre-requisite: operator runs `bash scripts/lint-genomes.sh` and provides output to this session.
|
||||
The script handles deterministically: broken links, knowledge decay, page size, frontmatter validation.
|
||||
|
|
@ -144,14 +119,13 @@ Append log entry: `LINT | <summary of findings>`.
|
|||
## File Conventions
|
||||
|
||||
### Frontmatter
|
||||
|
||||
Required on every wiki page:
|
||||
|
||||
```yaml
|
||||
---
|
||||
title: "Strict String Title"
|
||||
type: source | entity | concept | query | conflict | private
|
||||
domain: {{ GENOME_NAME }}
|
||||
domain: {{GENOME_NAME}}
|
||||
tags: [lowercase, hyphen-separated]
|
||||
maturity: draft | stable | deprecated
|
||||
last_updated: YYYY-MM-DD
|
||||
|
|
@ -164,29 +138,19 @@ private: true | false
|
|||
- `deprecated` — superseded. Add `> **DEPRECATED:** <reason>` callout at top of body.
|
||||
|
||||
### Links
|
||||
|
||||
- Internal: `[[folder/file]]` — Obsidian wikilinks only. Never `[text](url)` for internal refs.
|
||||
- Cross-genome: NOT via wikilink (submodule pointers make relative paths brittle). A concept owned by another genome is pulled in by the navigation skill as a raw under `raw/articles/crossgen-<topic>-<date>.md`, then ingested here normally. See master `AGENTS.md` §Cross-Genome Pull.
|
||||
- Cross-genome: `[[../genome-target/wiki/folder/file]]`.
|
||||
- External: `[text](https://...)`.
|
||||
|
||||
### Index entries
|
||||
|
||||
> **Skill mode:** auto-generated by `run-ingest.sh` from manifest. Below applies to manual workflows only.
|
||||
|
||||
Append at bottom of relevant section in `wiki/index.md`:
|
||||
|
||||
```
|
||||
- [[folder/slug]] — One-line summary. `maturity: draft`
|
||||
```
|
||||
|
||||
Never reorder. Alphabetical sorting is handled by the post-processor (index-append.py); the pre-commit hook only enforces the security policy.
|
||||
Never reorder. Alphabetical sort is handled by the pre-commit hook.
|
||||
|
||||
### Log entries
|
||||
|
||||
> **Skill mode:** auto-generated by `run-ingest.sh` from manifest. Below applies to manual workflows only.
|
||||
|
||||
Append one entry per operation to `wiki/log.md`:
|
||||
|
||||
```markdown
|
||||
## [YYYY-MM-DD] TYPE | Subject
|
||||
|
||||
|
|
@ -196,7 +160,6 @@ Append one entry per operation to `wiki/log.md`:
|
|||
- output_written: `[[path/C]]`
|
||||
- reasoning: One sentence — what changed and why.
|
||||
```
|
||||
|
||||
Valid TYPEs: `INGEST` `LINT` `QUERY` `CONFLICT` `CONFIG` `SECURITY`
|
||||
|
||||
Parse: `grep "^## \[" wiki/log.md | tail -5`
|
||||
|
|
@ -214,26 +177,22 @@ When new evidence contradicts an existing wiki claim:
|
|||
---
|
||||
title: "Conflict: <concept>"
|
||||
type: conflict
|
||||
domain: {{ GENOME_NAME }}
|
||||
domain: {{GENOME_NAME}}
|
||||
maturity: draft
|
||||
last_updated: YYYY-MM-DD
|
||||
private: false
|
||||
---
|
||||
```
|
||||
|
||||
```markdown
|
||||
## Conflict: <concept>
|
||||
|
||||
**Claim A (existing):** [[path/to/existing-page]]
|
||||
|
||||
> Summary of current wiki position.
|
||||
|
||||
**Claim B (new):** [[path/to/new-source]]
|
||||
|
||||
> Summary of contradicting evidence.
|
||||
|
||||
**Assessment:**
|
||||
|
||||
- Confidence A: high | medium | low — <reason>
|
||||
- Confidence B: high | medium | low — <reason>
|
||||
- Recommendation: `accept_b` | `keep_a` | `requires_human_review`
|
||||
|
|
@ -253,22 +212,20 @@ private: false
|
|||
- `maturity: draft` not updated in **90 days** → flag during lint.
|
||||
|
||||
Flagged pages: prepend to body:
|
||||
|
||||
```markdown
|
||||
> **⚠️ STALE:** Last validated {{last_updated}}. Re-validation required.
|
||||
```
|
||||
|
||||
Propose re-validation task. Do not change `maturity` without new source evidence.
|
||||
|
||||
---
|
||||
|
||||
## Collaboration
|
||||
|
||||
| Role | Access | Permitted |
|
||||
| -------------- | ----------------- | ------------------------------------------------------------------------------------ |
|
||||
| Owner | Full — key holder | Read/write everywhere |
|
||||
| Collaborator | No key | Push to `raw/articles`, `raw/transcripts`, `raw/code-packs`, `raw/assets` |
|
||||
| Local AI agent | Conditional | `private/` only when `PRIVATE_CONTEXT: enabled` |
|
||||
| Cloud AI model | Public only | `PRIVATE_CONTEXT` must be `disabled`; never send private files outside local network |
|
||||
| Role | Access | Permitted |
|
||||
|------|--------|-----------|
|
||||
| Owner | Full — key holder | Read/write everywhere |
|
||||
| Collaborator | No key | Push to `raw/articles`, `raw/transcripts`, `raw/code-packs`, `raw/assets` |
|
||||
| Local AI agent | Conditional | `private/` only when `PRIVATE_CONTEXT: enabled` |
|
||||
| Cloud AI model | Public only | `PRIVATE_CONTEXT` must be `disabled`; never send private files outside local network |
|
||||
|
||||
Grant collaborator: add as Forgejo contributor with Write role. Never share the git-crypt key.
|
||||
|
|
|
|||
|
|
@ -2,14 +2,14 @@
|
|||
|
||||
## Identity
|
||||
|
||||
| Field | Value |
|
||||
| ------ | -------------------------------------------------- |
|
||||
| Repo | `{{MASTER_REPO}}` |
|
||||
| Owner | `{{FORGEJO_USER}}` |
|
||||
| Field | Value |
|
||||
|--------|-------|
|
||||
| Repo | `{{MASTER_REPO}}` |
|
||||
| Owner | `{{FORGEJO_USER}}` |
|
||||
| Remote | `{{FORGEJO_URL}}/{{FORGEJO_USER}}/{{MASTER_REPO}}` |
|
||||
|
||||
**Role:** Cross-genome coordinator for the Knowledge Genome network.
|
||||
**Metrics:** no cross-genome boundary violations · submodule pointers current · cross-genome discoveries routed to target raw/ · zero stale submodule-relative wikilinks.
|
||||
**Metrics:** no cross-genome boundary violations · submodule pointers current · cross-genome wikilinks valid · no private data outside local network.
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -18,8 +18,10 @@
|
|||
```text
|
||||
{{MASTER_REPO}}/
|
||||
├── core-karpathy/ ← Reference pattern — read-only, never modify
|
||||
├── genome-example/ ← Submodule placeholder (replace with your domain)
|
||||
└── AGENTS.md
|
||||
├── genome-dev/ ← Submodule: web development, Angular, TUI
|
||||
├── genome-finance/ ← Submodule: personal finance (git-crypt on private/)
|
||||
├── genome-homelab/ ← Submodule: Keru infrastructure and network
|
||||
└── AGENTS.md ← This file (update diagram when adding a genome)
|
||||
```
|
||||
|
||||
Each genome has its own `AGENTS.md` with domain-specific rules.
|
||||
|
|
@ -30,17 +32,14 @@ Genome-level operations are governed by the genome's `AGENTS.md`, not this file.
|
|||
## Global Security Rules
|
||||
|
||||
### PRIVATE_CONTEXT scope
|
||||
|
||||
- Toggle is **per-genome and per-session**. Enabling for `genome-finance` does NOT enable for `genome-dev`.
|
||||
- Cloud LLM models: `PRIVATE_CONTEXT` must be `disabled` for all genomes. Private data never leaves the local network.
|
||||
|
||||
### Log sanitization
|
||||
|
||||
- Never print decrypted secrets, session tokens, or key contents to stdout or log files.
|
||||
- Document only `run_id` and genome name — never the key value.
|
||||
|
||||
### Key management
|
||||
|
||||
- Key injection is the host's responsibility — executed before this session starts.
|
||||
- Never write, suggest, or generate scripts that save `.key` files to disk.
|
||||
|
||||
|
|
@ -50,20 +49,17 @@ Genome-level operations are governed by the genome's `AGENTS.md`, not this file.
|
|||
|
||||
1. Operate within ONE genome at a time. No atomic commits across multiple genomes.
|
||||
2. `core-karpathy` is read-only. Never commit to it.
|
||||
3. Cross-genome references are NEVER expressed as wikilinks. When a concept belongs to another genome, use the navigation skill to emit a raw stub into that genome's `raw/articles/` and let its own ingest pipeline handle it asynchronously.
|
||||
3. Cross-genome references use relative wikilinks only: `[[../genome-target/wiki/folder/page]]`.
|
||||
4. Never commit to `main` in any genome. PRs required; no self-merge.
|
||||
5. Per-genome `AGENTS.md` governs all wiki operations within that genome. This file governs boundaries only.
|
||||
|
||||
### NEVER
|
||||
|
||||
- Load multiple `wiki/index.md` files simultaneously for cross-genome comparison — use qmd.
|
||||
- Run `git-crypt`, `bw`, or Vaultwarden commands — host responsibility.
|
||||
- Modify files in more than one genome in the same operation.
|
||||
- Create cross-genome wikilinks (e.g., `[[../genome-*/wiki/...]]`). All cross-domain connections must be routed via the navigation skill as raw stubs.
|
||||
- Modify `core-karpathy` in any way.
|
||||
|
||||
### ASK FIRST
|
||||
|
||||
- Any operation that touches two or more genomes.
|
||||
- Updating submodule pointers in master.
|
||||
- Any key rotation procedure.
|
||||
|
|
@ -80,55 +76,13 @@ Genome-level operations are governed by the genome's `AGENTS.md`, not this file.
|
|||
|
||||
---
|
||||
|
||||
## Cross-Genome Pull (Navigation Skill)
|
||||
## Cross-Genome Lint
|
||||
*Manual, monthly — requires operator initiation. Not automated.*
|
||||
|
||||
Cross-genome knowledge moves by **pull, never push**: the genome you are working in draws material *in*; nothing is ever written into another genome. The cross-genome reading is performed by a deterministic collector **outside any agent's context**, so the agent still operates within ONE genome (Immutable Rule 1 holds). The `cross_source` registry flag decides which genomes may be read as sources.
|
||||
|
||||
There is **no separate synthesis step**: retrieving and then distilling twice would only add LLM cost and lose information. The collector *retrieves* (like a search) and deposits the result as a raw; the working genome's own ingest *distills* it once, for this genome's needs.
|
||||
|
||||
### How it works
|
||||
|
||||
Two actors:
|
||||
|
||||
1. **Collector** (`collect-crossgen.sh`, deterministic, agent-free). Clones each genome flagged `cross_source: yes` **read-only at its remote HEAD** — a disposable checkout, for freshness; never the pinned submodule state. The clone is **keyless**, so `private/` stays an encrypted blob and is unreadable. It indexes the public wikis with `qmd`, runs `qmd search "<topic>"`, and assembles a **dossier**: the text of the matching pages plus per-excerpt provenance (source genome, page, HEAD short-sha, date), with every `[[wikilink]]` neutralized to plain text. It deposits the dossier as **one** raw in the working genome at `raw/articles/crossgen-<topic>-<YYYY-MM-DD>.md`, commits, and pushes. Nothing is written to any source genome.
|
||||
2. **Target ingest.** The working genome's standard ingest reads that raw as an ordinary source and distills it into wiki pages for the local domain — one semantic pass → PR → human gate. Same gate as any other source.
|
||||
|
||||
### When to pull
|
||||
|
||||
Pull is initiated deliberately (operator- or context-driven, never on a timer). Produce a crossgen raw ONLY when all three hold:
|
||||
|
||||
1. **Ownership elsewhere.** The concept, entity, or pattern is defined and maintained in another genome, and you need it framed for the working domain.
|
||||
2. **Structural relevance.** It influences decisions, patterns, or entities here — not a casual mention.
|
||||
3. **No fresh local coverage.** `qmd search "<concept>"` in the working genome returns nothing, or only a stub that needs enrichment.
|
||||
|
||||
If in doubt, do NOT pull. A missed cross-reference is cheaper than crossgen spam.
|
||||
|
||||
### Boundaries (enforced by the master)
|
||||
|
||||
- **Sources are restricted to `cross_source: yes` genomes.** A genome flagged `no` (e.g., a client / confidential file) is NEVER read as a source — the collector skips it physically. The wall is structural, not a matter of the agent's discipline.
|
||||
- **Keyless collection.** The collector holds no git-crypt key, so `private/` stays ciphertext and cannot be read — privacy does not depend on the agent behaving.
|
||||
- **Sources are read-only, at HEAD.** No write, commit, branch, or PR in any genome other than the one being worked on.
|
||||
- **NEVER `git submodule update --remote`.** Read other genomes via disposable read-only clones — never by moving this master's submodule pointers (that is ASK FIRST).
|
||||
- The deposited raw must contain **no wikilinks and no private data**; it is processed by the working genome's normal ingest + human gate.
|
||||
|
||||
### Output raw (the only artifact written)
|
||||
|
||||
**Path (in the working genome):** `raw/articles/crossgen-<topic>-<YYYY-MM-DD>.md`
|
||||
Plain text. No YAML frontmatter (raw is immutable input). **No wikilinks of any kind** — `[[...]]` from source pages are flattened to plain text so they never become broken cross-references here.
|
||||
|
||||
```markdown
|
||||
> Cross-genome pull | Into: genome-<working> | Query: "<topic>" | Date: YYYY-MM-DD
|
||||
|
||||
## From genome-<a> — wiki/concepts/<x>.md (HEAD <short-sha>)
|
||||
[retrieved page text — wikilinks flattened to plain text, no private data]
|
||||
|
||||
## From genome-<b> — wiki/entities/<y>.md (HEAD <short-sha>)
|
||||
[retrieved page text]
|
||||
```
|
||||
|
||||
**Rules:**
|
||||
|
||||
- **Deterministic deposit.** The raw is written by the collector (the skill's mechanical side), never edited by an agent — agents never create, modify, or delete files in any `raw/`. Each pull is a **new, dated** file (raw is immutable).
|
||||
- **Distillation happens at ingest, once.** The working genome's normal ingest turns the dossier into wiki pages and **deduplicates against existing pages** via its §Conflict procedure. There is no pre-summarization.
|
||||
- **Bound large retrievals deterministically** (top-N pages / relevant sections) rather than adding an LLM pass — keeps the dossier-raw and the ingest job reasonable at any scale.
|
||||
- *Optional (large + expensive-cloud deployments only):* a cheap **local** pre-distillation may be inserted before an expensive cloud ingest to shrink its input. This is an opt-in optimization; the default is no synthesis.
|
||||
1. Use `qmd search "<concept>"` to find pages covering the same concept across genomes.
|
||||
2. Identify:
|
||||
- Concepts defined in 2+ genomes with potentially conflicting definitions.
|
||||
- Entities referenced across genomes without a canonical cross-genome wikilink.
|
||||
- Concepts in genome-X that should link to genome-Y but don't.
|
||||
3. Report findings. Do not modify any files.
|
||||
4. For each finding: create a conflict note in the genome where resolution belongs, following that genome's §Conflict procedure.
|
||||
|
|
|
|||
|
|
@ -1,44 +1,25 @@
|
|||
## Summary
|
||||
|
||||
<!-- One sentence: goal of this session and source processed. -->
|
||||
|
||||
<!--
|
||||
REVIEW GUIDELINES (write the guideline as the FIRST word of your review):
|
||||
REWORK: <what to fix> -> same branch, guided retry
|
||||
RESTART: <why restart> -> close PR, start over from scratch
|
||||
SPLIT: <how to split> -> close PR, reopen as separate branches
|
||||
REJECT: <why not> -> close PR, no retry
|
||||
MERGE -> approve and merge
|
||||
Rules: one concern per directive; be specific to lines/pages; name the principle
|
||||
that was violated; describe the DESIRED STATE; avoid saying “do better.”
|
||||
-->
|
||||
|
||||
Translated with DeepL.com (free version)
|
||||
|
||||
## Pages Created
|
||||
|
||||
| Path | Type | Maturity |
|
||||
| ----------------- | --------------------------------- | -------- |
|
||||
| `[[folder/slug]]` | entity / concept / source / query | draft |
|
||||
| Path | Type | Maturity |
|
||||
|------|------|----------|
|
||||
| `[[folder/slug]]` | entity / concept / source / query | draft |
|
||||
|
||||
## Pages Modified
|
||||
|
||||
| Path | Change |
|
||||
| ----------------- | ----------------------------------------- |
|
||||
| Path | Change |
|
||||
|------|--------|
|
||||
| `[[folder/slug]]` | Added cross-reference to `[[other/page]]` |
|
||||
|
||||
## Contradictions Found
|
||||
|
||||
- [ ] None
|
||||
- [ ] `n` conflict file(s) created — listed below
|
||||
|
||||
## Private Data Accessed
|
||||
|
||||
- [ ] No — `PRIVATE_CONTEXT: disabled`
|
||||
- [ ] Yes — `PRIVATE_CONTEXT: enabled` · outputs in `wiki/private/` only
|
||||
|
||||
## Scoped Lint (post-ingest)
|
||||
|
||||
- [ ] Frontmatter valid on all touched pages
|
||||
- [ ] No broken wikilinks on touched pages
|
||||
- [ ] No issues found
|
||||
|
|
|
|||
|
|
@ -11,8 +11,9 @@ set -euo pipefail
|
|||
FAILED=0
|
||||
|
||||
# Verify git-crypt is initialized
|
||||
if ! git-crypt status >/dev/null 2>&1; then
|
||||
printf "\n[CRITICAL] git-crypt not initialized.\n"
|
||||
if [[ ! -d ".git-crypt" ]]; then
|
||||
printf "\n\033[0;31m[CRITICAL] git-crypt not initialized.\033[0m\n"
|
||||
printf "Run 'git-crypt init' and 'make setup' before committing.\n"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
|
|
|||
|
|
@ -1,45 +0,0 @@
|
|||
# {{MASTER_REPO}}
|
||||
|
||||
Master (umbrella) repository for the Knowledge Genome network.
|
||||
|
||||
| Field | Value |
|
||||
| ---------- | -------------------------------------------------- |
|
||||
| Owner | `{{FORGEJO_USER}}` |
|
||||
| Remote | `{{FORGEJO_URL}}/{{FORGEJO_USER}}/{{MASTER_REPO}}` |
|
||||
| Scaffolded | `{{DATE}}` |
|
||||
|
||||
## What this repo is
|
||||
|
||||
This repository does **not** hold knowledge itself. It is the orchestrator: each genome
|
||||
is a Git submodule, plus `core-karpathy` as a read-only reference pattern. Cross-genome
|
||||
coordination rules live in `AGENTS.md`.
|
||||
|
||||
```text
|
||||
{{MASTER_REPO}}/
|
||||
├── core-karpathy/ ← reference pattern — read-only, never modify
|
||||
├── genome-*/ ← one submodule per genome (own AGENTS.md, own git-crypt)
|
||||
└── AGENTS.md ← cross-genome coordinator (boundaries only)
|
||||
```
|
||||
|
||||
## Working with submodules
|
||||
|
||||
```bash
|
||||
# Clone with all genomes
|
||||
git clone --recurse-submodules {{FORGEJO_URL}}/{{FORGEJO_USER}}/{{MASTER_REPO}}.git
|
||||
|
||||
# Pull the latest pointers for every genome
|
||||
git submodule update --remote --merge
|
||||
|
||||
# Operate inside a single genome (one genome at a time — see AGENTS.md)
|
||||
cd genome-<name>
|
||||
```
|
||||
|
||||
## Rules of the road
|
||||
|
||||
- Operate within **one genome at a time**; no commits spanning multiple genomes.
|
||||
- `core-karpathy` is read-only.
|
||||
- Never commit to `main` in a genome — PRs only, no self-merge.
|
||||
- Private data (`**/private/**`) is git-crypt encrypted and never leaves the local network.
|
||||
|
||||
Genome-level operations are governed by each genome's own `AGENTS.md`. This README and the
|
||||
master `AGENTS.md` govern boundaries only.
|
||||
|
|
@ -1,9 +1,9 @@
|
|||
---
|
||||
title: "Index — {{GENOME_NAME}}"
|
||||
type: index
|
||||
domain: {{ GENOME_NAME }}
|
||||
domain: {{GENOME_NAME}}
|
||||
maturity: stable
|
||||
last_updated: {{ DATE }}
|
||||
last_updated: {{DATE}}
|
||||
private: false
|
||||
---
|
||||
|
||||
|
|
@ -12,35 +12,34 @@ private: false
|
|||
**[AGENT INSTRUCTION]**
|
||||
This is the primary navigation file. Read it first on every session before accessing individual pages.
|
||||
Append new entries at the bottom of the relevant section — do not reorder or rewrite sections.
|
||||
Alphabetical sorting is handled by the post-processor (index-append.py); the pre-commit hook only enforces the security policy.
|
||||
Alphabetical sorting is handled automatically by the pre-commit hook.
|
||||
Update `last_updated` in the YAML frontmatter on every edit.
|
||||
Entry format: `- [[folder/slug]] — One-line summary. \`maturity: <value>\``
|
||||
Entry format: `- [[folder/slug]] — One-line summary. \`maturity: <value>\``
|
||||
|
||||
---
|
||||
|
||||
## Sources (`wiki/sources/`)
|
||||
*Ingested raw materials. One entry per processed source.*
|
||||
|
||||
_Ingested raw materials. One entry per processed source._
|
||||
|
||||
## Entities (`wiki/entities/`)
|
||||
*People, organisations, tools, projects.*
|
||||
|
||||
_People, organisations, tools, projects._
|
||||
|
||||
## Concepts (`wiki/concepts/`)
|
||||
*Theories, methodologies, patterns, architectural decisions.*
|
||||
|
||||
_Theories, methodologies, patterns, architectural decisions._
|
||||
|
||||
## Queries (`wiki/queries/`)
|
||||
*Synthesised answers worth preserving. Archived explorations and analyses.*
|
||||
|
||||
_Synthesised answers worth preserving. Archived explorations and analyses._
|
||||
|
||||
## Conflicts Pending Review (`wiki/queries/conflict-*.md`)
|
||||
*Created automatically when the agent detects contradictions between sources.*
|
||||
*Do not summarise entries here — list slugs only to avoid surfacing unresolved claims.*
|
||||
*Remove entry once the operator has resolved and closed the corresponding PR.*
|
||||
|
||||
_Created automatically when the agent detects contradictions between sources._
|
||||
_Do not summarise entries here — list slugs only to avoid surfacing unresolved claims._
|
||||
_Remove entry once the operator has resolved and closed the corresponding PR._
|
||||
|
||||
## Private Synthesis (`wiki/private/`)
|
||||
|
||||
_Restricted access. Requires `PRIVATE_CONTEXT: enabled` and unlocked repo._
|
||||
_List slug names ONLY. Do not append summaries — prevents metadata leakage._
|
||||
*Restricted access. Requires `PRIVATE_CONTEXT: enabled` and unlocked repo.*
|
||||
*List slug names ONLY. Do not append summaries — prevents metadata leakage.*
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
---
|
||||
title: "Operations Log — {{GENOME_NAME}}"
|
||||
type: log
|
||||
domain: {{ GENOME_NAME }}
|
||||
domain: {{GENOME_NAME}}
|
||||
maturity: stable
|
||||
last_updated: {{ DATE }}
|
||||
last_updated: {{DATE}}
|
||||
private: false
|
||||
---
|
||||
|
||||
|
|
@ -22,13 +22,11 @@ Append new entries at the bottom using the format defined below.
|
|||
## Entry Format
|
||||
|
||||
### Required header (enables shell parsing):
|
||||
|
||||
```text
|
||||
## [YYYY-MM-DD] TYPE | Subject or title
|
||||
```
|
||||
|
||||
### Required metadata block for all agent-generated entries:
|
||||
|
||||
```markdown
|
||||
- run_id: `<short-uuid or session-identifier>`
|
||||
- model: `<model-name-and-version>`
|
||||
|
|
@ -40,7 +38,6 @@ Append new entries at the bottom using the format defined below.
|
|||
**Valid TYPEs:** `INGEST` | `LINT` | `QUERY` | `CONFLICT` | `CONFIG` | `SECURITY`
|
||||
|
||||
**Parse examples:**
|
||||
|
||||
```bash
|
||||
# Last 5 entries
|
||||
grep "^## \[" wiki/log.md | tail -5
|
||||
|
|
@ -57,7 +54,7 @@ grep "^## \[2026-05" wiki/log.md
|
|||
## [{{DATE}}] CONFIG | Genome scaffolded
|
||||
|
||||
- run_id: `system-init`
|
||||
- model: `scaffold.sh`
|
||||
- context_read: _(none — initial scaffold)_
|
||||
- model: `setup-knowledge-genome.sh`
|
||||
- context_read: *(none — initial scaffold)*
|
||||
- output_written: `[[wiki/index.md]]`, `[[wiki/log.md]]`, `[[AGENTS.md]]`
|
||||
- reasoning: Initial directory structure and encryption layer initialized by setup script.
|
||||
|
|
|
|||
|
|
@ -1,56 +0,0 @@
|
|||
# Tests
|
||||
|
||||
Deterministic tests for the mechanical layer of the framework — **no LLM, no GPU, no
|
||||
network**. They simulate pi's output with fixtures and exercise the scripts directly, so
|
||||
they run anywhere (laptop, CI, a git hook). They do **not** belong on vm101 or in n8n.
|
||||
|
||||
## What's covered
|
||||
|
||||
| File | Covers |
|
||||
|------|--------|
|
||||
| `scripts.bats` | `slug.sh`, `log-append.sh`, `index-append.py` (insert, sort, bump, idempotent) |
|
||||
| `lint.bats` | `lib/lint.sh` validators + `scoped-lint.sh` reuse + duplicate-slug advisory (edit-distance math, self-match skip, once-per-run) |
|
||||
| `structure.bats` | `lib/structure.sh` report/sync |
|
||||
| `run-ingest.bats` | `run-ingest.sh` end-to-end (DRY_RUN, local bare remote) — needs `jq` |
|
||||
|
||||
`run-ingest.bats` auto-`skip`s if `jq` is missing; everything else needs only bash + git
|
||||
(+ `python3` for the index tests).
|
||||
|
||||
## Install bats
|
||||
|
||||
```bash
|
||||
# Debian/Ubuntu
|
||||
sudo apt install bats
|
||||
# or pinned, as a vendored submodule
|
||||
git submodule add https://github.com/bats-core/bats-core.git test/bats
|
||||
```
|
||||
|
||||
## Run
|
||||
|
||||
```bash
|
||||
bats tests/ # whole suite
|
||||
bats tests/lint.bats # one file
|
||||
bats -f "sorted" tests/scripts.bats # filter by name
|
||||
```
|
||||
|
||||
Each test builds its own throwaway genome under `BATS_TEST_TMPDIR` (auto-cleaned) with a
|
||||
local bare git remote, so `open-pr.sh --DRY_RUN` can branch/commit/push without touching
|
||||
Forgejo.
|
||||
|
||||
## Makefile targets
|
||||
|
||||
```make
|
||||
test:
|
||||
@bats tests/
|
||||
|
||||
verify-structure:
|
||||
@bash scripts/verify-genomes.sh
|
||||
|
||||
sync-structure:
|
||||
@bash scripts/verify-genomes.sh --sync
|
||||
```
|
||||
|
||||
## Note on `helpers.bash`
|
||||
|
||||
`FIXTURE_DIRS` in `helpers.bash` must match `GENOME_DIRS` in `lib/structure.sh`. If you
|
||||
change the canonical layout, update both (the structure tests assume a clean baseline).
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
#!/usr/bin/env bats
|
||||
setup() {
|
||||
load 'helpers'
|
||||
source "${LIB_DIR}/clean-start.sh" 2>/dev/null || source "${REPO_ROOT}/lib/clean-start.sh"
|
||||
}
|
||||
@test "clean_start: aligns to origin/base, reverts tracked edits, removes untracked" {
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
echo "from origin" >> wiki/index.md
|
||||
git add -A && git commit -q -m "origin ahead" && git push -q
|
||||
git reset --hard HEAD~1 # local BEHIND origin/main
|
||||
echo "local junk" >> wiki/log.md # tracked edit, uncommitted
|
||||
echo "scratch" > scratch.txt # genuinely untracked
|
||||
INGEST_BASE="main" clean_start
|
||||
git diff --quiet origin/main # aligned to origin
|
||||
grep -q "from origin" wiki/index.md # forwarded to origin state
|
||||
! grep -q "local junk" wiki/log.md # tracked edit reverted
|
||||
[ ! -f scratch.txt ] # untracked removed
|
||||
}
|
||||
|
|
@ -1,104 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# tests/helpers.bash — shared helpers for the bats suite.
|
||||
|
||||
REPO_ROOT="$(cd "${BATS_TEST_DIRNAME}/.." && pwd)"
|
||||
LIB_DIR="${REPO_ROOT}/lib"
|
||||
SKILL_SCRIPTS="${REPO_ROOT}/skills/ingest/scripts"
|
||||
|
||||
# Canonical dirs a fresh genome must contain (kept in sync with lib/structure.sh).
|
||||
FIXTURE_DIRS=(
|
||||
raw/articles raw/transcripts raw/code-packs raw/assets raw/private
|
||||
wiki/sources wiki/entities wiki/concepts wiki/queries wiki/private
|
||||
)
|
||||
|
||||
# make_fixture_genome → echoes the path to a throwaway genome checkout with a
|
||||
# local bare remote, the full canonical structure, and rendered index/log.
|
||||
# Uses BATS_TEST_TMPDIR so bats cleans it up automatically.
|
||||
make_fixture_genome() {
|
||||
local base; base="$(mktemp -d "${BATS_TEST_TMPDIR:-/tmp}/genome.XXXXXX")"
|
||||
git init --bare -q "${base}/origin.git"
|
||||
|
||||
local g="${base}/genome"
|
||||
local d
|
||||
for d in "${FIXTURE_DIRS[@]}"; do mkdir -p "${g}/${d}"; touch "${g}/${d}/.gitkeep"; done
|
||||
|
||||
cat > "${g}/wiki/index.md" <<'EOF'
|
||||
---
|
||||
title: "Index — genome-test"
|
||||
type: index
|
||||
domain: genome-test
|
||||
maturity: stable
|
||||
last_updated: 2026-01-01
|
||||
private: false
|
||||
---
|
||||
|
||||
# Master Index: genome-test
|
||||
|
||||
---
|
||||
|
||||
## Sources (`wiki/sources/`)
|
||||
*Ingested raw materials.*
|
||||
|
||||
|
||||
## Entities (`wiki/entities/`)
|
||||
*People, tools.*
|
||||
|
||||
|
||||
## Concepts (`wiki/concepts/`)
|
||||
*Patterns.*
|
||||
|
||||
|
||||
## Queries (`wiki/queries/`)
|
||||
*Answers.*
|
||||
|
||||
|
||||
## Conflicts Pending Review (`wiki/queries/conflict-*.md`)
|
||||
*slugs only.*
|
||||
|
||||
|
||||
## Private Synthesis (`wiki/private/`)
|
||||
_Restricted access. Requires `PRIVATE_CONTEXT: enabled` and unlocked repo._
|
||||
_List slug names ONLY. Do not append summaries — prevents metadata leakage._
|
||||
EOF
|
||||
|
||||
cat > "${g}/wiki/log.md" <<'EOF'
|
||||
---
|
||||
title: "Operations Log — genome-test"
|
||||
type: log
|
||||
domain: genome-test
|
||||
maturity: stable
|
||||
last_updated: 2026-01-01
|
||||
private: false
|
||||
---
|
||||
|
||||
# Operations Log
|
||||
|
||||
---
|
||||
|
||||
## [2026-01-01] CONFIG | scaffolded
|
||||
- run_id: `init`
|
||||
EOF
|
||||
|
||||
echo "raw test" > "${g}/raw/articles/test.md"
|
||||
|
||||
mkdir -p "${base}/nohooks"
|
||||
|
||||
(
|
||||
cd "${g}"
|
||||
git init -q
|
||||
# Hermetic: ignore the user's global git config (signing, global hooks);
|
||||
# otherwise commit.gpgsign or a global core.hooksPath makes git commit fail here.
|
||||
git config --local user.name "Framework Test"
|
||||
git config --local user.email "test@genome.local"
|
||||
git config --local commit.gpgsign false
|
||||
git config --local core.hooksPath "${base}/nohooks"
|
||||
|
||||
git branch -M main
|
||||
git remote add origin "${base}/origin.git"
|
||||
git add .
|
||||
git commit -q -m "chore: initial scaffold"
|
||||
git push -q -u origin main
|
||||
)
|
||||
|
||||
echo "${g}"
|
||||
}
|
||||
|
|
@ -1,44 +0,0 @@
|
|||
#!/usr/bin/env bats
|
||||
# tests/index-remove.bats — index-append.py --remove mode.
|
||||
setup() {
|
||||
load 'helpers'
|
||||
export GENOMES_ROOT="${BATS_TEST_TMPDIR}"
|
||||
g_src="$(make_fixture_genome)"; export g="$g_src"
|
||||
}
|
||||
|
||||
@test "index --remove: deletes the matching entry, keeps the others" {
|
||||
cd "$g"
|
||||
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/a]] — A. `maturity: draft`'
|
||||
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/b]] — B. `maturity: draft`'
|
||||
grep -q 'sources/a' wiki/index.md
|
||||
grep -q 'sources/b' wiki/index.md
|
||||
|
||||
run python3 "$SKILL_SCRIPTS/index-append.py" --remove 'sources/a'
|
||||
[ "$status" -eq 0 ]
|
||||
! grep -q '\[\[sources/a\]\]' wiki/index.md
|
||||
grep -q 'sources/b' wiki/index.md
|
||||
}
|
||||
|
||||
@test "index --remove: idempotent when the entry is absent" {
|
||||
cd "$g"
|
||||
run python3 "$SKILL_SCRIPTS/index-append.py" --remove 'sources/does-not-exist'
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *'nothing to remove'* ]]
|
||||
}
|
||||
|
||||
@test "index --remove: bumps last_updated" {
|
||||
cd "$g"
|
||||
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/a]] — A. `maturity: draft`'
|
||||
# set last_updated to an old date, then remove and check it moved
|
||||
sed -i 's/^last_updated:.*/last_updated: 2000-01-01/' wiki/index.md
|
||||
run python3 "$SKILL_SCRIPTS/index-append.py" --remove 'sources/a'
|
||||
[ "$status" -eq 0 ]
|
||||
! grep -q '2000-01-01' wiki/index.md
|
||||
grep -q "last_updated: $(date +%F)" wiki/index.md
|
||||
}
|
||||
|
||||
@test "index --remove: rejects passing both --entry and --remove" {
|
||||
cd "$g"
|
||||
run python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/a]] — x' --remove 'sources/a'
|
||||
[ "$status" -eq 2 ]
|
||||
}
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
#!/usr/bin/env bats
|
||||
|
||||
setup() {
|
||||
load 'helpers'
|
||||
source "$LIB_DIR/output.sh"
|
||||
source "$LIB_DIR/lint.sh"
|
||||
}
|
||||
|
||||
@test "lint tolerates source_path/source_sha256 in source frontmatter" {
|
||||
G="$(make_fixture_genome)"
|
||||
mkdir -p "$G/wiki/sources"
|
||||
cat > "$G/wiki/sources/test-source.md" <<'EOFMD'
|
||||
---
|
||||
title: "Test Source"
|
||||
type: source
|
||||
domain: genome-test
|
||||
maturity: draft
|
||||
last_updated: 2026-06-25
|
||||
private: false
|
||||
tags: [test]
|
||||
source_path: raw/articles/test.md
|
||||
source_sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
|
||||
---
|
||||
# Test Source
|
||||
body
|
||||
EOFMD
|
||||
run lint_markdown_file "$G/wiki/sources/test-source.md" genome-test
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
148
tests/lint.bats
148
tests/lint.bats
|
|
@ -1,148 +0,0 @@
|
|||
#!/usr/bin/env bats
|
||||
# tests/lint.bats — lib/lint.sh validators and the scoped-lint wrapper.
|
||||
load helpers
|
||||
|
||||
setup() {
|
||||
source "$LIB_DIR/output.sh"
|
||||
source "$LIB_DIR/lint.sh"
|
||||
}
|
||||
|
||||
write_page() { # write_page <path> <type> <domain>
|
||||
cat > "$1" <<EOF
|
||||
---
|
||||
title: "T"
|
||||
type: $2
|
||||
domain: $3
|
||||
tags: [x]
|
||||
maturity: draft
|
||||
last_updated: $(date +%F)
|
||||
private: false
|
||||
---
|
||||
body
|
||||
EOF
|
||||
}
|
||||
|
||||
@test "lint_markdown_file: a clean page passes (0 errors)" {
|
||||
G="$(make_fixture_genome)"
|
||||
write_page "$G/wiki/sources/good.md" source genome-test
|
||||
run lint_markdown_file "$G/wiki/sources/good.md" genome-test
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
||||
@test "lint_markdown_file: invalid type + wrong domain are caught" {
|
||||
G="$(make_fixture_genome)"
|
||||
write_page "$G/wiki/sources/bad.md" banana wrong-genome
|
||||
run lint_markdown_file "$G/wiki/sources/bad.md" genome-test
|
||||
[ "$status" -ne 0 ]
|
||||
}
|
||||
|
||||
@test "check_privacy_consistency: a private/ file without 'private: true' fails" {
|
||||
G="$(make_fixture_genome)"
|
||||
# page sits in wiki/private/ but is flagged private: false → leak
|
||||
write_page "$G/wiki/private/p.md" private genome-test
|
||||
run check_privacy_consistency "$G/wiki/private/p.md"
|
||||
[ "$status" -ne 0 ]
|
||||
}
|
||||
|
||||
@test "check_page_size: a >800-line page errors" {
|
||||
G="$(make_fixture_genome)"
|
||||
{ write_page "$G/wiki/sources/big.md" source genome-test; yes "x" | head -n 850 >> "$G/wiki/sources/big.md"; }
|
||||
run check_page_size "$G/wiki/sources/big.md"
|
||||
[ "$status" -ne 0 ]
|
||||
}
|
||||
|
||||
@test "scoped-lint: aggregates findings and exits non-zero on errors" {
|
||||
G="$(make_fixture_genome)"
|
||||
write_page "$G/wiki/sources/bad.md" banana wrong-genome
|
||||
cd "$G"
|
||||
export KG_LIB_DIR="$LIB_DIR"
|
||||
run bash "$SKILL_SCRIPTS/scoped-lint.sh" genome-test wiki/sources/bad.md
|
||||
[ "$status" -ne 0 ]
|
||||
[[ "$output" == *"error(s)"* ]]
|
||||
}
|
||||
|
||||
@test "scoped-lint: a clean page passes (exit 0)" {
|
||||
G="$(make_fixture_genome)"
|
||||
write_page "$G/wiki/sources/good.md" source genome-test
|
||||
cd "$G"
|
||||
export KG_LIB_DIR="$LIB_DIR"
|
||||
run bash "$SKILL_SCRIPTS/scoped-lint.sh" genome-test wiki/sources/good.md
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
||||
# --- duplicate-slug advisory (check_duplicates + its distance helpers) --------
|
||||
# These guard the dedup feature: correct edit-distance math, the warn-only
|
||||
# contract, the exact-self-match skip (run-ingest appends new slugs to the
|
||||
# index before lint runs), and that the advisory fires once per run, not once
|
||||
# per file.
|
||||
|
||||
@test "levenshtein: identical strings have distance 0" {
|
||||
run levenshtein cat cat
|
||||
[ "$status" -eq 0 ]
|
||||
[ "$output" -eq 0 ]
|
||||
}
|
||||
|
||||
@test "levenshtein: kitten→sitting is 3 (textbook case)" {
|
||||
run levenshtein kitten sitting
|
||||
[ "$output" -eq 3 ]
|
||||
}
|
||||
|
||||
@test "similarity: identical strings score 100" {
|
||||
run similarity gpu-pricing gpu-pricing
|
||||
[ "$output" -eq 100 ]
|
||||
}
|
||||
|
||||
@test "check_duplicates: warns on a near-duplicate of an indexed concept" {
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
printf -- '- [[concepts/llm-routing]] — x\n' >> wiki/index.md
|
||||
cat > .ingest-manifest.json <<'JSON'
|
||||
{"raw_source":"src","pages":[{"path":"wiki/concepts/llm-routings.md","status":"created"}]}
|
||||
JSON
|
||||
run check_duplicates .ingest-manifest.json
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *"≈"* ]]
|
||||
[[ "$output" == *"llm-routings"* ]]
|
||||
}
|
||||
|
||||
@test "check_duplicates: silent when the new slug is unlike anything indexed" {
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
printf -- '- [[concepts/llm-routing]] — x\n' >> wiki/index.md
|
||||
cat > .ingest-manifest.json <<'JSON'
|
||||
{"raw_source":"src","pages":[{"path":"wiki/concepts/budget-hardware.md","status":"created"}]}
|
||||
JSON
|
||||
run check_duplicates .ingest-manifest.json
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" != *"≈"* ]]
|
||||
}
|
||||
|
||||
@test "check_duplicates: an exact self-match is not flagged (index already has the slug)" {
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
# run-ingest step 1 inserts this run's slug into the index BEFORE lint runs;
|
||||
# the slug must not be reported as a duplicate of itself.
|
||||
printf -- '- [[concepts/llm-routing]] — x\n' >> wiki/index.md
|
||||
cat > .ingest-manifest.json <<'JSON'
|
||||
{"raw_source":"src","pages":[{"path":"wiki/concepts/llm-routing.md","status":"created"}]}
|
||||
JSON
|
||||
run check_duplicates .ingest-manifest.json
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" != *"≈"* ]]
|
||||
}
|
||||
|
||||
@test "scoped-lint: duplicate advisory fires once across multiple files, not per file" {
|
||||
G="$(make_fixture_genome)"
|
||||
write_page "$G/wiki/concepts/data-pipelines.md" concept genome-test
|
||||
write_page "$G/wiki/concepts/other-topic.md" concept genome-test
|
||||
printf -- '- [[concepts/data-pipeline]] — x\n' >> "$G/wiki/index.md"
|
||||
cat > "$G/.ingest-manifest.json" <<'JSON'
|
||||
{"raw_source":"src","pages":[
|
||||
{"path":"wiki/concepts/data-pipelines.md","status":"created"},
|
||||
{"path":"wiki/concepts/other-topic.md","status":"created"}
|
||||
]}
|
||||
JSON
|
||||
cd "$G"
|
||||
export KG_LIB_DIR="$LIB_DIR"
|
||||
run bash "$SKILL_SCRIPTS/scoped-lint.sh" genome-test \
|
||||
wiki/concepts/data-pipelines.md wiki/concepts/other-topic.md
|
||||
[ "$status" -eq 0 ]
|
||||
[ "$(grep -c "≈" <<< "$output")" -eq 1 ]
|
||||
}
|
||||
|
|
@ -1,48 +0,0 @@
|
|||
#!/usr/bin/env bats
|
||||
# open-pr-rolling.bats — a re-ingest of the same slug updates the OPEN PR's branch
|
||||
# (force-with-lease) instead of failing. Uses the local bare remote from make_fixture_genome.
|
||||
load helpers
|
||||
setup_file() { :; }
|
||||
|
||||
@test "open-pr: re-ingest of the same slug rolls the branch forward (force-with-lease)" {
|
||||
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER=u FORGEJO_TOKEN=t DRY_RUN=1
|
||||
body="$(mktemp)"; echo body > "$body"
|
||||
|
||||
# first ingest of slug x (v1)
|
||||
mkdir -p wiki/sources; printf 'v1\n' > wiki/sources/x.md
|
||||
run bash "$SKILL_SCRIPTS/open-pr.sh" --slug x --title "feat: ingest x" --body-file "$body" --base main
|
||||
[ "$status" -eq 0 ]
|
||||
git rev-parse --verify feat/ai-ingest-x
|
||||
first="$(git rev-parse feat/ai-ingest-x)"
|
||||
|
||||
# simulate clean_start back to base, then an edited re-ingest (v2)
|
||||
git switch -q main; git reset -q --hard origin/main; git clean -q -fd
|
||||
printf 'v2-edited\n' > wiki/sources/x.md
|
||||
run bash "$SKILL_SCRIPTS/open-pr.sh" --slug x --title "feat: ingest x" --body-file "$body" --base main
|
||||
[ "$status" -eq 0 ]
|
||||
second="$(git rev-parse feat/ai-ingest-x)"
|
||||
|
||||
# the branch was REBUILT from base (diverged), not appended: second is not a descendant of first
|
||||
run git merge-base --is-ancestor "$first" "$second"
|
||||
[ "$status" -ne 0 ]
|
||||
|
||||
# origin received the v2 content (force-with-lease pushed the rebuilt branch)
|
||||
git fetch -q origin
|
||||
run git show "origin/feat/ai-ingest-x:wiki/sources/x.md"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *"v2-edited"* ]]
|
||||
}
|
||||
|
||||
@test "open-pr: prune branch override still works after the rolling change" {
|
||||
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER=u FORGEJO_TOKEN=t DRY_RUN=1
|
||||
body="$(mktemp)"; echo body > "$body"
|
||||
mkdir -p wiki/sources; printf 'p\n' > wiki/sources/p.md
|
||||
run bash "$SKILL_SCRIPTS/open-pr.sh" --branch "chore/prune-orphans-2026-06-30" \
|
||||
--title "chore: prune 1 orphaned source(s)" --body-file "$body" --base main
|
||||
[ "$status" -eq 0 ]
|
||||
git rev-parse --verify "chore/prune-orphans-2026-06-30"
|
||||
}
|
||||
|
|
@ -1,38 +0,0 @@
|
|||
#!/usr/bin/env bats
|
||||
setup() {
|
||||
load 'helpers'
|
||||
export ORPHAN="${SKILL_SCRIPTS}/orphan-wiki.sh"
|
||||
export GENOMES_ROOT="${BATS_TEST_TMPDIR}"
|
||||
export INGEST_BASE="main"
|
||||
export KG_LIB_DIR="${LIB_DIR}" # orphan-wiki.sh sources clean-start.sh via KG_LIB_DIR
|
||||
g_src="$(make_fixture_genome)"
|
||||
export g_name="fixture-genome"
|
||||
mv "$g_src" "${GENOMES_ROOT}/${g_name}"
|
||||
export g="${GENOMES_ROOT}/${g_name}"
|
||||
( cd "$g" && rm -f raw/articles/test.md && git add -A && git commit -q -m "clear" && git push -q )
|
||||
}
|
||||
@test "orphan-wiki: no orphans when raw and source page match" {
|
||||
mkdir -p "${g}/raw/articles"; echo "content" > "${g}/raw/articles/existing.md"
|
||||
hash="$(sha256sum "${g}/raw/articles/existing.md" | cut -d' ' -f1)"
|
||||
mkdir -p "${g}/wiki/sources"
|
||||
printf -- '---\nsource_path: raw/articles/existing.md\nsource_sha256: %s\n---\n' "$hash" > "${g}/wiki/sources/existing.md"
|
||||
( cd "$g" && git add . && git commit -q -m "setup" && git push -q )
|
||||
run bash "$ORPHAN" "$g_name"
|
||||
[ "$status" -eq 0 ]; echo "$output" | jq -e '.count == 0'
|
||||
}
|
||||
@test "orphan-wiki: detects orphaned source page" {
|
||||
mkdir -p "${g}/wiki/sources"
|
||||
printf -- '---\nsource_path: raw/articles/deleted.md\nsource_sha256: abc123\n---\n' > "${g}/wiki/sources/orphaned.md"
|
||||
( cd "$g" && git add . && git commit -q -m "orphan" && git push -q )
|
||||
run bash "$ORPHAN" "$g_name"
|
||||
[ "$status" -eq 0 ]
|
||||
echo "$output" | jq -e '.count == 1'
|
||||
echo "$output" | jq -e '.detail[0].reason == "orphan"'
|
||||
}
|
||||
@test "orphan-wiki: ignores legacy pages without source_path" {
|
||||
mkdir -p "${g}/wiki/sources"
|
||||
printf -- '---\ntitle: "Legacy"\ntype: source\n---\n' > "${g}/wiki/sources/legacy.md"
|
||||
( cd "$g" && git add . && git commit -q -m "legacy" && git push -q )
|
||||
run bash "$ORPHAN" "$g_name"
|
||||
[ "$status" -eq 0 ]; echo "$output" | jq -e '.count == 0'
|
||||
}
|
||||
|
|
@ -1,91 +0,0 @@
|
|||
#!/usr/bin/env bats
|
||||
|
||||
setup() {
|
||||
load 'helpers'
|
||||
export PENDING="${SKILL_SCRIPTS}/pending-raw.sh"
|
||||
export GENOMES_ROOT="${BATS_TEST_TMPDIR}"
|
||||
export INGEST_BASE="main"
|
||||
export KG_LIB_DIR="${LIB_DIR}"
|
||||
|
||||
g_src="$(make_fixture_genome)"
|
||||
export g_name="fixture-genome"
|
||||
mv "$g_src" "${GENOMES_ROOT}/${g_name}"
|
||||
export g="${GENOMES_ROOT}/${g_name}"
|
||||
|
||||
# FIX: make_fixture_genome ships raw/articles/test.md with no source page, which would
|
||||
# otherwise count as a permanent 'new' and break every count assertion. Clear it so each
|
||||
# test controls exactly what is pending (verified: count base becomes 0).
|
||||
( cd "$g" && rm -f raw/articles/test.md && git add -A \
|
||||
&& git commit -q -m "test: clear default raw" && git push -q )
|
||||
}
|
||||
|
||||
@test "pending-raw: detects a brand new raw file" {
|
||||
echo "new content" > "${g}/raw/articles/new-file.md"
|
||||
( cd "$g" && git add . && git commit -q -m "add raw" && git push -q )
|
||||
run bash "$PENDING" "$g_name"
|
||||
[ "$status" -eq 0 ]
|
||||
echo "$output" | jq -e '.count == 1'
|
||||
echo "$output" | jq -e '.detail[0].path == "raw/articles/new-file.md"'
|
||||
echo "$output" | jq -e '.detail[0].reason == "new"'
|
||||
}
|
||||
|
||||
@test "pending-raw: skips up-to-date files" {
|
||||
echo "ok content" > "${g}/raw/articles/ok-file.md"
|
||||
hash_ok="$(sha256sum "${g}/raw/articles/ok-file.md" | cut -d' ' -f1)"
|
||||
cat > "${g}/wiki/sources/ok-file.md" <<FM
|
||||
---
|
||||
source_sha256: $hash_ok
|
||||
---
|
||||
FM
|
||||
( cd "$g" && git add . && git commit -q -m "add ok" && git push -q )
|
||||
run bash "$PENDING" "$g_name"
|
||||
[ "$status" -eq 0 ]
|
||||
echo "$output" | jq -e '.count == 0'
|
||||
}
|
||||
|
||||
@test "pending-raw: flags modified files" {
|
||||
echo "content v1" > "${g}/raw/articles/mod-file.md"
|
||||
hash_v1="$(sha256sum "${g}/raw/articles/mod-file.md" | cut -d' ' -f1)"
|
||||
cat > "${g}/wiki/sources/mod-file.md" <<FM
|
||||
---
|
||||
source_sha256: $hash_v1
|
||||
---
|
||||
FM
|
||||
( cd "$g" && git add . && git commit -q -m "v1" && git push -q )
|
||||
echo "content v2" > "${g}/raw/articles/mod-file.md"
|
||||
( cd "$g" && git add . && git commit -q -m "v2" && git push -q )
|
||||
run bash "$PENDING" "$g_name"
|
||||
[ "$status" -eq 0 ]
|
||||
echo "$output" | jq -e '.count == 1'
|
||||
echo "$output" | jq -e '.detail[0].reason == "modified"'
|
||||
}
|
||||
|
||||
@test "pending-raw: nested subdirectory yields prefixed slug" {
|
||||
mkdir -p "${g}/raw/articles/sub-b"
|
||||
echo "subdir content" > "${g}/raw/articles/sub-b/file.md"
|
||||
( cd "$g" && git add . && git commit -q -m "subdir" && git push -q )
|
||||
run bash "$PENDING" "$g_name"
|
||||
[ "$status" -eq 0 ]
|
||||
echo "$output" | jq -e '.count == 1'
|
||||
echo "$output" | jq -e '.files[0] == "raw/articles/sub-b/file.md"'
|
||||
}
|
||||
|
||||
@test "pending-raw: excludes noise (.stfolder, .gitkeep)" {
|
||||
touch "${g}/raw/articles/.gitkeep"
|
||||
mkdir -p "${g}/raw/articles/.stfolder"
|
||||
touch "${g}/raw/articles/.stfolder/sync.log"
|
||||
( cd "$g" && git add . && git commit -q -m "noise" && git push -q )
|
||||
run bash "$PENDING" "$g_name"
|
||||
[ "$status" -eq 0 ]
|
||||
echo "$output" | jq -e '.count == 0'
|
||||
}
|
||||
|
||||
@test "pending-raw: reports both files on a slug collision" {
|
||||
mkdir -p "${g}/raw/articles/cibo"
|
||||
echo "c1" > "${g}/raw/articles/cibo-pane.md"
|
||||
echo "c2" > "${g}/raw/articles/cibo/pane.md"
|
||||
( cd "$g" && git add . && git commit -q -m "collision" && git push -q )
|
||||
run bash "$PENDING" "$g_name"
|
||||
[ "$status" -eq 0 ]
|
||||
echo "$output" | jq -e '.count == 2'
|
||||
}
|
||||
|
|
@ -1,68 +0,0 @@
|
|||
#!/usr/bin/env bats
|
||||
# tests/permissions.bats
|
||||
# Blinda i permessi del repo, cosi' un `cp`/deploy preserva l'eseguibilita' e non
|
||||
# ricapita il "Permission denied" (es. ingest-semantic.py lanciato diretto).
|
||||
#
|
||||
# Principio:
|
||||
# - script con shebang lanciati direttamente -> eseguibili (git mode 100755)
|
||||
# - librerie *sourced* (lib/, providers/, registry.sh, globals.env) -> NON eseguibili (100644)
|
||||
|
||||
REPO="${BATS_TEST_DIRNAME}/.."
|
||||
|
||||
# Entry-point / script eseguibili (tutti hanno shebang; alcuni anche lanciati a mano per debug)
|
||||
EXECUTABLES=(
|
||||
skills/ingest/scripts/ingest-semantic.py
|
||||
skills/ingest/scripts/run-ingest.sh
|
||||
skills/ingest/scripts/scoped-lint.sh
|
||||
skills/ingest/scripts/open-pr.sh
|
||||
skills/ingest/scripts/log-append.sh
|
||||
skills/ingest/scripts/slug.sh
|
||||
skills/ingest/scripts/pending-raw.sh
|
||||
skills/ingest/scripts/orphan-wiki.sh
|
||||
skills/ingest/scripts/index-append.py
|
||||
scripts/add-genome.sh
|
||||
scripts/setup.sh
|
||||
scripts/setup-genomes.sh
|
||||
scripts/setup-master.sh
|
||||
scripts/lint-genomes.sh
|
||||
scripts/verify-genomes.sh
|
||||
)
|
||||
|
||||
# Librerie sourced: NON devono essere eseguibili.
|
||||
LIBRARIES=(
|
||||
lib/lint.sh lib/output.sh lib/deps.sh lib/git-crypt.sh lib/scaffold.sh lib/structure.sh lib/clean-start.sh
|
||||
providers/forgejo.sh providers/github.sh
|
||||
registry.sh globals.env
|
||||
)
|
||||
|
||||
git_mode() { git -C "$REPO" ls-files -s -- "$1" | awk '{print $1}'; }
|
||||
|
||||
@test "executable scripts have the +x bit on disk" {
|
||||
for f in "${EXECUTABLES[@]}"; do
|
||||
[ -x "${REPO}/${f}" ] || { echo "NON eseguibile su disco: $f"; return 1; }
|
||||
done
|
||||
}
|
||||
|
||||
@test "executable scripts are recorded 100755 in git" {
|
||||
for f in "${EXECUTABLES[@]}"; do
|
||||
mode="$(git_mode "$f")"
|
||||
[ -n "$mode" ] || { echo "non tracciato in git: $f"; return 1; }
|
||||
[ "$mode" = "100755" ] || { echo "git mode $mode (atteso 100755): $f"; return 1; }
|
||||
done
|
||||
}
|
||||
|
||||
@test "sourced libraries are NOT executable in git (100644)" {
|
||||
for f in "${LIBRARIES[@]}"; do
|
||||
mode="$(git_mode "$f")"
|
||||
[ -z "$mode" ] && continue # non tracciato/opzionale -> salta
|
||||
[ "$mode" = "100644" ] || { echo "git mode $mode (atteso 100644, e' sourced): $f"; return 1; }
|
||||
done
|
||||
}
|
||||
|
||||
@test "executable shell scripts pass bash -n (syntax)" {
|
||||
for f in "${EXECUTABLES[@]}"; do
|
||||
case "$f" in
|
||||
*.sh) bash -n "${REPO}/${f}" || { echo "syntax error: $f"; return 1; } ;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
|
@ -1,75 +0,0 @@
|
|||
#!/usr/bin/env bats
|
||||
# raw-commit-quiet.bats — quiet-window behaviour of genome-raw-commit.sh.
|
||||
# No Syncthing (no API key -> default author); pushes to a local bare repo via GENOME_PUSH_URL.
|
||||
setup() {
|
||||
SCRIPT="${BATS_TEST_DIRNAME}/../deploy/nexus/genome-raw-commit.sh"
|
||||
export HOME="${BATS_TEST_TMPDIR}/home"; mkdir -p "$HOME/.config"
|
||||
root="${BATS_TEST_TMPDIR}/vaults"; mkdir -p "$root"
|
||||
bare="${BATS_TEST_TMPDIR}/origin.git"; git init -q --bare "$bare"
|
||||
cat > "$HOME/.config/knowledge-genome.env" <<EOF
|
||||
GENOME_VAULTS_ROOT=$root
|
||||
GENOME_BASE=main
|
||||
FORGEJO_USER=n8n-bot
|
||||
FORGEJO_HOST=127.0.0.1:3001
|
||||
FORGEJO_OWNER=Keru
|
||||
COMMITTER_NAME=n8n-bot
|
||||
COMMITTER_EMAIL=n8n-bot@homelab
|
||||
DEFAULT_AUTHOR_NAME=Tester
|
||||
DEFAULT_AUTHOR_EMAIL=tester@local
|
||||
EOF
|
||||
export g="genome-test"; export vault="$root/$g"
|
||||
git clone -q "$bare" "$vault" 2>/dev/null || mkdir -p "$vault"
|
||||
( cd "$vault"
|
||||
git init -q 2>/dev/null || true
|
||||
git config user.name n8n-bot; git config user.email n8n-bot@homelab; git config commit.gpgsign false
|
||||
git checkout -q -b main 2>/dev/null || git switch -q main
|
||||
mkdir -p raw/articles; echo seed > raw/articles/.gitkeep
|
||||
git add -A; git commit -q -m init
|
||||
git remote add origin "$bare" 2>/dev/null || git remote set-url origin "$bare"
|
||||
git push -q -u origin main )
|
||||
export GENOME_PUSH_URL="$bare" # test seam -> push to the local bare repo
|
||||
}
|
||||
files() { ( cd "$vault" && git ls-files raw/ ) > "${BATS_TEST_TMPDIR}/f.txt"; }
|
||||
|
||||
@test "raw-commit: holds a freshly-written raw, commits it once it settles" {
|
||||
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||
echo "still typing" > "$vault/raw/articles/hot.md" # fresh -> hot
|
||||
echo "finished" > "$vault/raw/articles/stable.md"
|
||||
touch -d "10 minutes ago" "$vault/raw/articles/stable.md" # settled
|
||||
|
||||
run bash "$SCRIPT" "$g"
|
||||
[ "$status" -eq 0 ]
|
||||
echo "$output" | jq -e '.status=="ok"'
|
||||
files
|
||||
grep -q 'raw/articles/stable.md' "${BATS_TEST_TMPDIR}/f.txt" # committed
|
||||
! grep -q 'raw/articles/hot.md' "${BATS_TEST_TMPDIR}/f.txt" # held back
|
||||
|
||||
touch -d "10 minutes ago" "$vault/raw/articles/hot.md" # now it settles
|
||||
run bash "$SCRIPT" "$g"
|
||||
[ "$status" -eq 0 ]
|
||||
files
|
||||
grep -q 'raw/articles/hot.md' "${BATS_TEST_TMPDIR}/f.txt" # now committed
|
||||
}
|
||||
|
||||
@test "raw-commit: noop with held count while everything is still settling" {
|
||||
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||
echo "typing" > "$vault/raw/articles/wip.md" # fresh -> hot
|
||||
run bash "$SCRIPT" "$g"
|
||||
[ "$status" -eq 0 ]
|
||||
echo "$output" | jq -e '.status=="noop"'
|
||||
echo "$output" | jq -e '.held==1'
|
||||
}
|
||||
|
||||
@test "raw-commit: a deletion is committed immediately (not subject to the quiet window)" {
|
||||
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||
# commit a settled file first
|
||||
echo done > "$vault/raw/articles/old.md"; touch -d "10 minutes ago" "$vault/raw/articles/old.md"
|
||||
run bash "$SCRIPT" "$g"; [ "$status" -eq 0 ]
|
||||
files; grep -q 'raw/articles/old.md' "${BATS_TEST_TMPDIR}/f.txt"
|
||||
# now delete it -> should commit the removal even though "just changed"
|
||||
rm "$vault/raw/articles/old.md"
|
||||
run bash "$SCRIPT" "$g"
|
||||
[ "$status" -eq 0 ]
|
||||
echo "$output" | jq -e '.status=="ok"'
|
||||
files; ! grep -q 'raw/articles/old.md' "${BATS_TEST_TMPDIR}/f.txt"
|
||||
}
|
||||
|
|
@ -1,211 +0,0 @@
|
|||
#!/usr/bin/env bats
|
||||
# tests/run-ingest.bats — end-to-end orchestrator test (no LLM, no network).
|
||||
# Simulates pi's output (a source page + manifest) and runs the mechanical pass.
|
||||
load helpers
|
||||
|
||||
@test "run-ingest: DRY_RUN end-to-end updates index + log and opens a dry PR" {
|
||||
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
|
||||
# --- simulate the semantic pass that pi would have done ---
|
||||
cat > wiki/sources/test-source.md <<'EOF'
|
||||
---
|
||||
title: "Test Source"
|
||||
type: source
|
||||
domain: genome-test
|
||||
tags: [t]
|
||||
maturity: draft
|
||||
last_updated: 2026-06-03
|
||||
private: false
|
||||
---
|
||||
body
|
||||
EOF
|
||||
|
||||
cat > .ingest-manifest.json <<'EOF'
|
||||
{
|
||||
"raw_source": "raw/articles/test.md",
|
||||
"model": "qwen3.5-9b",
|
||||
"reasoning": "Ingested the test source.",
|
||||
"pr_summary": "Ingest of test: 1 source page.",
|
||||
"contradictions": "None",
|
||||
"pages": [
|
||||
{"path": "wiki/sources/test-source.md", "summary": "A smoke-test source.", "maturity": "draft", "status": "created"}
|
||||
]
|
||||
}
|
||||
EOF
|
||||
|
||||
export KG_LIB_DIR="$LIB_DIR"
|
||||
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER="u" FORGEJO_TOKEN="t"
|
||||
export DRY_RUN=1
|
||||
|
||||
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *'"status":"ok"'* ]]
|
||||
[[ "$output" == *'"lint_clean":true'* ]]
|
||||
[[ "$output" == *'"conflict":false'* ]]
|
||||
|
||||
# side effects on the working tree
|
||||
grep -q 'sources/test-source' wiki/index.md
|
||||
grep -q 'INGEST | test' wiki/log.md
|
||||
git rev-parse --verify feat/ai-ingest-test
|
||||
}
|
||||
|
||||
@test "run-ingest: a conflict page is labelled and lands in the Conflicts section" {
|
||||
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
|
||||
cat > wiki/queries/conflict-pricing-2026-06-03.md <<'EOF'
|
||||
---
|
||||
title: "Conflict: pricing"
|
||||
type: conflict
|
||||
domain: genome-test
|
||||
maturity: draft
|
||||
last_updated: 2026-06-03
|
||||
private: false
|
||||
---
|
||||
conflict body
|
||||
EOF
|
||||
|
||||
cat > .ingest-manifest.json <<'EOF'
|
||||
{
|
||||
"raw_source": "raw/articles/test.md",
|
||||
"model": "m",
|
||||
"reasoning": "Flagged a contradiction.",
|
||||
"pr_summary": "Conflict on pricing.",
|
||||
"contradictions": "1 conflict file created — pricing",
|
||||
"pages": [
|
||||
{"path": "wiki/queries/conflict-pricing-2026-06-03.md", "summary": "ignored", "maturity": "draft", "status": "created"}
|
||||
]
|
||||
}
|
||||
EOF
|
||||
|
||||
export KG_LIB_DIR="$LIB_DIR"
|
||||
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER="u" FORGEJO_TOKEN="t"
|
||||
export DRY_RUN=1
|
||||
|
||||
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *'"conflict":true'* ]]
|
||||
# listed by slug under the Conflicts section
|
||||
grep -q 'queries/conflict-pricing-2026-06-03' wiki/index.md
|
||||
}
|
||||
|
||||
@test "run-ingest: records INGEST_MODEL in the log (manifest carries no model field)" {
|
||||
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
|
||||
cat > wiki/sources/test-source.md <<'EOF'
|
||||
---
|
||||
title: "Test Source"
|
||||
type: source
|
||||
domain: genome-test
|
||||
tags: [t]
|
||||
maturity: draft
|
||||
last_updated: 2026-06-04
|
||||
private: false
|
||||
---
|
||||
body
|
||||
EOF
|
||||
|
||||
# New contract: NO "model" field — the orchestrator supplies it via INGEST_MODEL.
|
||||
cat > .ingest-manifest.json <<'EOF'
|
||||
{
|
||||
"raw_source": "raw/articles/test.md",
|
||||
"reasoning": "Ingested the test source.",
|
||||
"pr_summary": "Ingest of test: 1 source page.",
|
||||
"contradictions": "None",
|
||||
"pages": [
|
||||
{"path": "wiki/sources/test-source.md", "summary": "A smoke-test source.", "maturity": "draft", "status": "created"}
|
||||
]
|
||||
}
|
||||
EOF
|
||||
|
||||
export KG_LIB_DIR="$LIB_DIR"
|
||||
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER="u" FORGEJO_TOKEN="t" DRY_RUN=1
|
||||
export INGEST_MODEL="qwen-test-tag"
|
||||
|
||||
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *'"status":"ok"'* ]]
|
||||
grep -q 'qwen-test-tag' wiki/log.md
|
||||
}
|
||||
|
||||
@test "run-ingest: rejects a manifest path that escapes wiki/ (traversal)" {
|
||||
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
cat > .ingest-manifest.json <<'EOF'
|
||||
{ "raw_source":"raw/articles/test.md","reasoning":"r","pr_summary":"s","contradictions":"None",
|
||||
"pages":[{"path":"wiki/../etc/passwd","summary":"x","maturity":"draft","status":"created"}] }
|
||||
EOF
|
||||
export KG_LIB_DIR="$LIB_DIR" FORGEJO_URL=http://x FORGEJO_USER=u FORGEJO_TOKEN=t DRY_RUN=1
|
||||
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
|
||||
[ "$status" -ne 0 ]
|
||||
[[ "$output" == *'"status":"error"'* ]]
|
||||
}
|
||||
|
||||
@test "run-ingest: honours INGEST_BASE for the PR base" {
|
||||
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
cat > wiki/sources/test-source.md <<'EOF'
|
||||
---
|
||||
title: "Test Source"
|
||||
type: source
|
||||
domain: genome-test
|
||||
tags: [t]
|
||||
maturity: draft
|
||||
last_updated: 2026-06-04
|
||||
private: false
|
||||
---
|
||||
body
|
||||
EOF
|
||||
cat > .ingest-manifest.json <<'EOF'
|
||||
{ "raw_source":"raw/articles/test.md","reasoning":"r","pr_summary":"s","contradictions":"None",
|
||||
"pages":[{"path":"wiki/sources/test-source.md","summary":"s","maturity":"draft","status":"created"}] }
|
||||
EOF
|
||||
export KG_LIB_DIR="$LIB_DIR" FORGEJO_URL=http://x FORGEJO_USER=u FORGEJO_TOKEN=t DRY_RUN=1
|
||||
export INGEST_BASE="develop"
|
||||
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *"develop"* ]]
|
||||
}
|
||||
|
||||
@test "run-ingest: branch name matches slug.sh --raw for nested raw paths" {
|
||||
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
mkdir -p wiki/sources
|
||||
cat > wiki/sources/cibo-il-pane.md <<'EOFMD'
|
||||
---
|
||||
title: "Il Pane"
|
||||
type: source
|
||||
domain: genome-test
|
||||
tags: [cibo]
|
||||
maturity: draft
|
||||
last_updated: 2026-06-25
|
||||
private: false
|
||||
source_path: raw/articles/cibo/il-pane.md
|
||||
source_sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
|
||||
---
|
||||
# Il Pane
|
||||
body
|
||||
EOFMD
|
||||
cat > .ingest-manifest.json <<'EOFJSON'
|
||||
{
|
||||
"raw_source": "raw/articles/cibo/il-pane.md",
|
||||
"model": "qwen3.5-9b",
|
||||
"reasoning": "Ingest.",
|
||||
"pr_summary": "Ingest summary.",
|
||||
"contradictions": "None",
|
||||
"pages": [
|
||||
{"path": "wiki/sources/cibo-il-pane.md", "summary": "Summary.", "maturity": "draft", "status": "created"}
|
||||
]
|
||||
}
|
||||
EOFJSON
|
||||
export KG_LIB_DIR="$LIB_DIR"
|
||||
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER="u" FORGEJO_TOKEN="t" DRY_RUN=1
|
||||
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *"cibo-il-pane"* ]]
|
||||
}
|
||||
|
|
@ -1,68 +0,0 @@
|
|||
#!/usr/bin/env bats
|
||||
# tests/run-prune.bats — prune orphaned sources (no LLM, no network; DRY_RUN).
|
||||
setup() {
|
||||
load 'helpers'
|
||||
export PRUNE="${SKILL_SCRIPTS}/run-prune.sh"
|
||||
export GENOMES_ROOT="${BATS_TEST_TMPDIR}"
|
||||
export INGEST_BASE="main"
|
||||
export KG_LIB_DIR="${LIB_DIR}"
|
||||
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER="u" FORGEJO_TOKEN="t"
|
||||
export DRY_RUN=1
|
||||
g_src="$(make_fixture_genome)"; export g_name="fixture-genome"
|
||||
mv "$g_src" "${GENOMES_ROOT}/${g_name}"; export g="${GENOMES_ROOT}/${g_name}"
|
||||
( cd "$g" && rm -f raw/articles/test.md && git add -A && git commit -q -m clear && git push -q )
|
||||
}
|
||||
|
||||
@test "run-prune: removes only the orphaned source + its index entry, opens a dry PR" {
|
||||
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||
cd "$g"
|
||||
# kept: raw exists. orphan: raw missing.
|
||||
echo content > raw/articles/kept.md
|
||||
h="$(sha256sum raw/articles/kept.md | cut -d' ' -f1)"
|
||||
printf -- '---\nsource_path: raw/articles/kept.md\nsource_sha256: %s\n---\nbody\n' "$h" > wiki/sources/kept.md
|
||||
printf -- '---\nsource_path: raw/articles/gone.md\nsource_sha256: abc\n---\nbody\n' > wiki/sources/orphan.md
|
||||
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/kept]] — kept. `maturity: draft`'
|
||||
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/orphan]] — orphan. `maturity: draft`'
|
||||
git add -A && git commit -q -m setup && git push -q
|
||||
|
||||
run bash "$PRUNE" "$g_name"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *'"status":"ok"'* ]]
|
||||
[[ "$output" == *'"count":1'* ]]
|
||||
|
||||
# only the orphan page is gone
|
||||
[ ! -f wiki/sources/orphan.md ]
|
||||
[ -f wiki/sources/kept.md ]
|
||||
# index reflects the removal
|
||||
! grep -q 'sources/orphan' wiki/index.md
|
||||
grep -q 'sources/kept' wiki/index.md
|
||||
# committed on a chore/ branch (NOT feat/ai-ingest-*)
|
||||
git rev-parse --verify "chore/prune-orphans-$(date +%F)"
|
||||
}
|
||||
|
||||
@test "run-prune: no orphans -> count 0 and no PR/branch" {
|
||||
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||
cd "$g"
|
||||
echo content > raw/articles/kept.md
|
||||
h="$(sha256sum raw/articles/kept.md | cut -d' ' -f1)"
|
||||
printf -- '---\nsource_path: raw/articles/kept.md\nsource_sha256: %s\n---\nbody\n' "$h" > wiki/sources/kept.md
|
||||
git add -A && git commit -q -m setup && git push -q
|
||||
|
||||
run bash "$PRUNE" "$g_name"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *'"count":0'* ]]
|
||||
run git rev-parse --verify "chore/prune-orphans-$(date +%F)"
|
||||
[ "$status" -ne 0 ]
|
||||
}
|
||||
|
||||
@test "run-prune: refuses when an orphan path would escape wiki/ (defense in depth)" {
|
||||
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||
cd "$g"
|
||||
# legacy page without source_path is ignored; a page with a missing raw is the orphan.
|
||||
printf -- '---\nsource_path: raw/articles/gone.md\nsource_sha256: abc\n---\nbody\n' > wiki/sources/orphan.md
|
||||
git add -A && git commit -q -m setup && git push -q
|
||||
run bash "$PRUNE" "$g_name"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *'"count":1'* ]]
|
||||
[ ! -f wiki/sources/orphan.md ]
|
||||
}
|
||||
|
|
@ -1,102 +0,0 @@
|
|||
#!/usr/bin/env bats
|
||||
# tests/scripts.bats — unit tests for the deterministic skill scripts.
|
||||
load helpers
|
||||
|
||||
@test "slug: path with extension and spaces" {
|
||||
run bash "$SKILL_SCRIPTS/slug.sh" "raw/articles/My Test Source.md"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "$output" = "my-test-source" ]
|
||||
}
|
||||
|
||||
@test "slug: punctuation and repeats collapse to single hyphens" {
|
||||
run bash "$SKILL_SCRIPTS/slug.sh" "Qualche Concetto!! Strano"
|
||||
[ "$output" = "qualche-concetto-strano" ]
|
||||
}
|
||||
|
||||
@test "log-append: appends a well-formed INGEST entry with a run_id" {
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
run bash "$SKILL_SCRIPTS/log-append.sh" --type INGEST --subject foo --model m \
|
||||
--context "[[raw/x]]" --output "[[sources/foo]]" --reasoning "why"
|
||||
[ "$status" -eq 0 ]
|
||||
grep -q "INGEST | foo" wiki/log.md
|
||||
grep -q '^- run_id: `' wiki/log.md
|
||||
grep -q '^- model: `m`' wiki/log.md
|
||||
}
|
||||
|
||||
@test "log-append: rejects an invalid TYPE" {
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
run bash "$SKILL_SCRIPTS/log-append.sh" --type BOGUS --subject foo
|
||||
[ "$status" -ne 0 ]
|
||||
}
|
||||
|
||||
@test "index-append: inserts under the right section and keeps it sorted" {
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/zzz]] — z. `maturity: draft`'
|
||||
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/aaa]] — a. `maturity: draft`'
|
||||
a=$(grep -n 'sources/aaa' wiki/index.md | cut -d: -f1)
|
||||
z=$(grep -n 'sources/zzz' wiki/index.md | cut -d: -f1)
|
||||
[ -n "$a" ] && [ -n "$z" ]
|
||||
[ "$a" -lt "$z" ]
|
||||
}
|
||||
|
||||
@test "index-append: bumps frontmatter last_updated to today" {
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
python3 "$SKILL_SCRIPTS/index-append.py" --section Concepts --entry '- [[concepts/x]] — x. `maturity: draft`'
|
||||
grep -q "^last_updated: $(date +%F)$" wiki/index.md
|
||||
}
|
||||
|
||||
@test "index-append: is idempotent for the same entry" {
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/dup]] — d. `maturity: draft`'
|
||||
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/dup]] — d. `maturity: draft`'
|
||||
[ "$(grep -c 'sources/dup' wiki/index.md)" -eq 1 ]
|
||||
}
|
||||
|
||||
@test "index-append: updates an existing entry by wikilink path (no duplicate)" {
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/foo]] — old summary. `maturity: draft`'
|
||||
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/foo]] — new summary. `maturity: stable`'
|
||||
[ "$(grep -c 'sources/foo' wiki/index.md)" -eq 1 ]
|
||||
grep -q 'new summary' wiki/index.md
|
||||
! grep -q 'old summary' wiki/index.md
|
||||
}
|
||||
|
||||
@test "slug: refuses an all-symbols input (no empty slug)" {
|
||||
run bash "$SKILL_SCRIPTS/slug.sh" "!!!.md"
|
||||
[ "$status" -ne 0 ]
|
||||
[ -z "$output" ] || [[ "$output" != *"feat/ai-ingest-"* ]]
|
||||
}
|
||||
|
||||
@test "index-append: self-heals a frontmatter missing last_updated" {
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
cat > wiki/index.md <<'EOF'
|
||||
---
|
||||
title: "Index"
|
||||
type: index
|
||||
domain: genome-test
|
||||
maturity: stable
|
||||
private: false
|
||||
---
|
||||
|
||||
# Index
|
||||
|
||||
## Sources (`wiki/sources/`)
|
||||
*x*
|
||||
EOF
|
||||
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/foo]] — s. `maturity: draft`'
|
||||
grep -q "^last_updated: $(date +%F)$" wiki/index.md
|
||||
}
|
||||
|
||||
@test "log-append: dedup on stable run_id prevents duplicate entries" {
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
stable_id="test-stable-run-id-001"
|
||||
run bash "$SKILL_SCRIPTS/log-append.sh" --run-id "$stable_id" --type INGEST --subject "test" --model "m" \
|
||||
--context "[[raw/x]]" --output "[[sources/x]]" --reasoning "r"
|
||||
[ "$status" -eq 0 ]
|
||||
run bash "$SKILL_SCRIPTS/log-append.sh" --run-id "$stable_id" --type INGEST --subject "test" --model "m" \
|
||||
--context "[[raw/x]]" --output "[[sources/x]]" --reasoning "r"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *"already present"* ]]
|
||||
count="$(grep -cF "run_id: \`${stable_id}\`" wiki/log.md || true)"
|
||||
[ "$count" -eq 1 ]
|
||||
}
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
#!/usr/bin/env bats
|
||||
|
||||
setup() {
|
||||
load 'helpers'
|
||||
SLUG="${SKILL_SCRIPTS}/slug.sh"
|
||||
}
|
||||
|
||||
@test "slug --raw: flat file remains unchanged" {
|
||||
run bash "$SLUG" --raw "raw/articles/il-pane.md"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "$output" = "il-pane" ]
|
||||
}
|
||||
|
||||
@test "slug --raw: nested file gets folder prefix" {
|
||||
run bash "$SLUG" --raw "raw/articles/cibo/il-pane.md"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "$output" = "cibo-il-pane" ]
|
||||
}
|
||||
|
||||
@test "slug --raw: distinct subdirs avoid collision" {
|
||||
s1="$(bash "$SLUG" --raw "raw/articles/cibo/pane.md")"
|
||||
s2="$(bash "$SLUG" --raw "raw/articles/storia/pane.md")"
|
||||
[ "$s1" != "$s2" ]
|
||||
}
|
||||
|
||||
@test "slug --raw: Bash and Python-calling-bash agree (single implementation)" {
|
||||
b="$(bash "$SLUG" --raw "raw/articles/cibo/il-pane.md")"
|
||||
p="$(python3 -c "import subprocess;print(subprocess.check_output(['bash','$SLUG','--raw','raw/articles/cibo/il-pane.md'],text=True).strip())")"
|
||||
[ "$b" = "$p" ]
|
||||
}
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
#!/usr/bin/env bats
|
||||
# tests/structure.bats — canonical-structure verify/sync.
|
||||
load helpers
|
||||
|
||||
setup() {
|
||||
source "$LIB_DIR/output.sh"
|
||||
source "$LIB_DIR/structure.sh"
|
||||
}
|
||||
|
||||
@test "structure_report: a full fixture has no drift" {
|
||||
G="$(make_fixture_genome)"
|
||||
run structure_report "$G"
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
||||
@test "structure_report: flags a missing canonical dir" {
|
||||
G="$(make_fixture_genome)"
|
||||
rm -rf "$G/wiki/private"
|
||||
run structure_report "$G"
|
||||
[ "$status" -ne 0 ]
|
||||
[[ "$output" == *"wiki/private"* ]]
|
||||
}
|
||||
|
||||
@test "structure_report: notes an extra dir but does not fail on it" {
|
||||
G="$(make_fixture_genome)"
|
||||
mkdir -p "$G/wiki/experiments"
|
||||
run structure_report "$G"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *"experiments"* ]]
|
||||
}
|
||||
|
||||
@test "structure_sync: creates missing dirs and is idempotent" {
|
||||
G="$(make_fixture_genome)"
|
||||
rm -rf "$G/wiki/private" "$G/raw/transcripts"
|
||||
structure_sync "$G"
|
||||
[ -d "$G/wiki/private" ] && [ -d "$G/raw/transcripts" ]
|
||||
run structure_report "$G"
|
||||
[ "$status" -eq 0 ]
|
||||
structure_sync "$G" # second run: nothing to do
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue