Compare commits
32 commits
66346f5f4e
...
bcba29fc76
| Author | SHA1 | Date | |
|---|---|---|---|
| bcba29fc76 | |||
| 1d83af9639 | |||
| f3af62897f | |||
| ab1141e132 | |||
| 2426b09b50 | |||
| 93bc5bb007 | |||
| ddf34944e0 | |||
| 9a81bb2d6f | |||
| 3272450ec5 | |||
| 00fb74c76a | |||
| 99806b8b3d | |||
| e8980b5526 | |||
| 203fbadd63 | |||
| 6d1151fa5a | |||
| 50d3f39f51 | |||
| 39775398f7 | |||
| 76700cd2a6 | |||
| 13d34b4906 | |||
| 42c1302035 | |||
| 624bd5f8d5 | |||
| 35f476c2c7 | |||
| e0465b6f25 | |||
| b88468cc06 | |||
| 9b61e74821 | |||
| ff0828f5a7 | |||
| e531135bf3 | |||
| 33697e9b82 | |||
| 2e06d8f4e8 | |||
| 3005366cfd | |||
| ee4f5beacf | |||
| ea9283637b | |||
| 528e9c6c48 |
28 changed files with 1900 additions and 216 deletions
28
Makefile
28
Makefile
|
|
@ -1,19 +1,22 @@
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Knowledge Genome - Makefile v. 1.0.0
|
# Knowledge Genome - Makefile v. 1.1.1
|
||||||
# Orchestrates the setup and management of the knowledge base.
|
# Orchestrates the setup and management of the knowledge base.
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
include globals.env
|
include globals.env
|
||||||
export $(shell grep -v '^[#[:space:]]' globals.env | sed 's/=.*//')
|
export $(shell grep -v '^[#[:space:]]' globals.env | sed 's/=.*//')
|
||||||
|
|
||||||
.PHONY: setup add-genome status lint lock doctor sync help
|
.PHONY: setup add-genome status lint lock doctor sync test verify-structure sync-structure help
|
||||||
|
|
||||||
help:
|
help:
|
||||||
@echo "Available commands:"
|
@echo "Available commands:"
|
||||||
@echo " make setup - Full system initialization"
|
@echo " make setup - Full system initialization"
|
||||||
@echo " make add-genome - Register and scaffold a new genome"
|
@echo " make add-genome - Register and scaffold a new genome [LINKED=owner/repo]"
|
||||||
@echo " make status - Check submodule and encryption status"
|
@echo " make status - Check submodule and encryption status"
|
||||||
@echo " make lint - Verify schema, privacy flags, and metadata"
|
@echo " make lint - Verify schema, privacy flags, and metadata"
|
||||||
|
@echo " make verify-structure - Report directory drift across all genomes"
|
||||||
|
@echo " make sync-structure - Create any missing canonical dirs (safe)"
|
||||||
|
@echo " make test - Run the bats test suite (no LLM/GPU needed)"
|
||||||
@echo " make lock - Lock all encrypted files across all genomes"
|
@echo " make lock - Lock all encrypted files across all genomes"
|
||||||
@echo " make doctor - Verify all required tools are installed"
|
@echo " make doctor - Verify all required tools are installed"
|
||||||
@echo " make sync - Sync submodules and report unpushed commits"
|
@echo " make sync - Sync submodules and report unpushed commits"
|
||||||
|
|
@ -27,16 +30,26 @@ setup:
|
||||||
add-genome:
|
add-genome:
|
||||||
@if [ -z "$(NAME)" ] || [ -z "$(DESC)" ]; then \
|
@if [ -z "$(NAME)" ] || [ -z "$(DESC)" ]; then \
|
||||||
echo "Error: NAME and DESC are required."; \
|
echo "Error: NAME and DESC are required."; \
|
||||||
echo "Usage: make add-genome NAME=my-genome DESC='My description'"; \
|
echo "Usage: make add-genome NAME=my-genome DESC='My description' [LINKED=owner/project-repo]"; \
|
||||||
exit 1; \
|
exit 1; \
|
||||||
fi
|
fi
|
||||||
@bash scripts/add-genome.sh "$(NAME)" "$(DESC)"
|
@bash scripts/add-genome.sh "$(NAME)" "$(DESC)" "$(LINKED)"
|
||||||
|
|
||||||
status:
|
status:
|
||||||
@echo "--- Master Status ---"
|
@echo "--- Master Status ---"
|
||||||
@git submodule status
|
@git submodule status
|
||||||
@echo "--- Encryption Status (First 10 files) ---"
|
@echo "--- Encryption Status (per genome) ---"
|
||||||
@git-crypt status | head -n 10
|
@git submodule foreach 'git-crypt status 2>/dev/null | head -n 10 || true'
|
||||||
|
|
||||||
|
verify-structure:
|
||||||
|
@bash scripts/verify-genomes.sh
|
||||||
|
|
||||||
|
sync-structure:
|
||||||
|
@bash scripts/verify-genomes.sh --sync
|
||||||
|
|
||||||
|
test:
|
||||||
|
@command -v bats >/dev/null 2>&1 || { echo " MISSING: bats (sudo apt install bats)"; exit 1; }
|
||||||
|
@bats tests/
|
||||||
|
|
||||||
doctor:
|
doctor:
|
||||||
@echo "Checking required tools..."
|
@echo "Checking required tools..."
|
||||||
|
|
@ -45,6 +58,7 @@ doctor:
|
||||||
@command -v curl >/dev/null 2>&1 || { echo " MISSING: curl"; exit 1; }
|
@command -v curl >/dev/null 2>&1 || { echo " MISSING: curl"; exit 1; }
|
||||||
@command -v jq >/dev/null 2>&1 || { echo " MISSING: jq"; exit 1; }
|
@command -v jq >/dev/null 2>&1 || { echo " MISSING: jq"; exit 1; }
|
||||||
@command -v bw >/dev/null 2>&1 || echo " OPTIONAL: bw (Bitwarden CLI) not found — key injection will be manual."
|
@command -v bw >/dev/null 2>&1 || echo " OPTIONAL: bw (Bitwarden CLI) not found — key injection will be manual."
|
||||||
|
@command -v python3 >/dev/null 2>&1 || echo " OPTIONAL: python3 not found — needed for 'make test' and the ingest skill (index-append.py), not for setup."
|
||||||
@echo "System ready."
|
@echo "System ready."
|
||||||
|
|
||||||
sync:
|
sync:
|
||||||
|
|
|
||||||
307
README.md
307
README.md
|
|
@ -19,16 +19,17 @@ and a human-in-the-loop Git Flow for quality control.
|
||||||
5. [Configuration](#configuration)
|
5. [Configuration](#configuration)
|
||||||
6. [Quick Start](#quick-start)
|
6. [Quick Start](#quick-start)
|
||||||
7. [Makefile Reference](#makefile-reference)
|
7. [Makefile Reference](#makefile-reference)
|
||||||
8. [Genome Lifecycle](#genome-lifecycle)
|
8. [Testing](#testing)
|
||||||
9. [Security Model](#security-model)
|
9. [Genome Lifecycle](#genome-lifecycle)
|
||||||
10. [Key Management](#key-management)
|
10. [Security Model](#security-model)
|
||||||
11. [Agent Sessions](#agent-sessions)
|
11. [Key Management](#key-management)
|
||||||
12. [Workflows](#workflows)
|
12. [Agent Sessions](#agent-sessions)
|
||||||
13. [Knowledge Quality](#knowledge-quality)
|
13. [Workflows](#workflows)
|
||||||
14. [Knowledge Schema](#knowledge-schema)
|
14. [Knowledge Quality](#knowledge-quality)
|
||||||
15. [Collaboration Model](#collaboration-model)
|
15. [Knowledge Schema](#knowledge-schema)
|
||||||
16. [Optional Extensions](#optional-extensions)
|
16. [Collaboration Model](#collaboration-model)
|
||||||
17. [Troubleshooting](#troubleshooting)
|
17. [Optional Extensions](#optional-extensions)
|
||||||
|
18. [Troubleshooting](#troubleshooting)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -49,6 +50,7 @@ evolving synthesis. Knowledge is compiled once and kept current.
|
||||||
Contradictions have been flagged. The synthesis already reflects everything ingested.
|
Contradictions have been flagged. The synthesis already reflects everything ingested.
|
||||||
|
|
||||||
This means:
|
This means:
|
||||||
|
|
||||||
- No vector database.
|
- No vector database.
|
||||||
- No embedding pipeline.
|
- No embedding pipeline.
|
||||||
- No external retrieval infrastructure.
|
- No external retrieval infrastructure.
|
||||||
|
|
@ -104,15 +106,23 @@ genome-{name}/
|
||||||
### Three layers
|
### Three layers
|
||||||
|
|
||||||
| Layer | Path | Owner | Rule |
|
| Layer | Path | Owner | Rule |
|
||||||
|-------|------|-------|------|
|
| ----------- | ----------- | ----------- | ----------------------------------------------------- |
|
||||||
| Raw sources | `raw/` | Human | Immutable. LLM reads only. Never modified. |
|
| Raw sources | `raw/` | Human | Immutable. LLM reads only. Never modified. |
|
||||||
| Wiki | `wiki/` | LLM | Agent creates, updates, cross-links, maintains. |
|
| Wiki | `wiki/` | LLM | Agent creates, updates, cross-links, maintains. |
|
||||||
| Schema | `AGENTS.md` | Human + LLM | Co-evolved contract defining structure and workflows. |
|
| Schema | `AGENTS.md` | Human + LLM | Co-evolved contract defining structure and workflows. |
|
||||||
|
|
||||||
|
### Linked projects (optional)
|
||||||
|
|
||||||
|
A genome can optionally declare a **linked project repository** — a separate repo where
|
||||||
|
the knowledge in that genome is meant to be applied (e.g. `genome-dev` linked to an app
|
||||||
|
repo). The link is recorded as a third field in the registry and rendered into the
|
||||||
|
genome's `AGENTS.md` (`## Linked Project`). A genome with no link is _knowledge-only_ and
|
||||||
|
behaves exactly as before. See [Configuration](#configuration).
|
||||||
|
|
||||||
### Framework structure
|
### Framework structure
|
||||||
|
|
||||||
```text
|
```text
|
||||||
knowledge-genome-setup/ ← This repository (setup tooling)
|
knowledge-genome-orchestrator/ ← This repository (setup tooling)
|
||||||
├── globals.env ← Static KEY=VALUE config (Make-includable)
|
├── globals.env ← Static KEY=VALUE config (Make-includable)
|
||||||
├── registry.sh ← Bash-only: GENOMES array + dynamic paths
|
├── registry.sh ← Bash-only: GENOMES array + dynamic paths
|
||||||
├── Makefile ← Entry point for all operations
|
├── Makefile ← Entry point for all operations
|
||||||
|
|
@ -120,6 +130,7 @@ knowledge-genome-setup/ ← This repository (setup tooling)
|
||||||
│ ├── output.sh ← Terminal helpers (colors, log levels)
|
│ ├── output.sh ← Terminal helpers (colors, log levels)
|
||||||
│ ├── deps.sh ← Dependency validation
|
│ ├── deps.sh ← Dependency validation
|
||||||
│ ├── scaffold.sh ← Template rendering engine
|
│ ├── scaffold.sh ← Template rendering engine
|
||||||
|
│ ├── structure.sh ← Canonical genome layout (single source of truth)
|
||||||
│ ├── lint.sh ← Per-file validation functions
|
│ ├── lint.sh ← Per-file validation functions
|
||||||
│ └── git-crypt.sh ← git-crypt lifecycle (init, export, verify, rotate)
|
│ └── git-crypt.sh ← git-crypt lifecycle (init, export, verify, rotate)
|
||||||
├── providers/
|
├── providers/
|
||||||
|
|
@ -130,18 +141,41 @@ knowledge-genome-setup/ ← This repository (setup tooling)
|
||||||
│ ├── setup-master.sh ← Master repo initialisation
|
│ ├── setup-master.sh ← Master repo initialisation
|
||||||
│ ├── setup-genomes.sh ← Genome provisioning loop
|
│ ├── setup-genomes.sh ← Genome provisioning loop
|
||||||
│ ├── add-genome.sh ← Add a single new genome
|
│ ├── add-genome.sh ← Add a single new genome
|
||||||
│ └── lint-genomes.sh ← Quality control across all genomes
|
│ ├── lint-genomes.sh ← Quality control across all genomes
|
||||||
└── templates/
|
│ └── verify-genomes.sh ← Structure verify / --sync across all genomes
|
||||||
├── agents-genome.md ← Per-genome agent contract template
|
├── templates/
|
||||||
├── agents-master.md ← Master coordination schema template
|
│ ├── agents-genome.md ← Per-genome agent contract template
|
||||||
├── wiki-index.md ← Index template (rendered per genome)
|
│ ├── agents-master.md ← Master coordination schema template
|
||||||
├── wiki-log.md ← Log template (rendered per genome)
|
│ ├── readme-master.md ← Master repo README template
|
||||||
├── pr-description.md ← PR review checklist template
|
│ ├── wiki-index.md ← Index template (rendered per genome)
|
||||||
├── pre-commit.sh ← Security hook template
|
│ ├── wiki-log.md ← Log template (rendered per genome)
|
||||||
├── gitattributes ← Git encryption rules template
|
│ ├── pr-description.md ← PR review checklist template
|
||||||
└── gitignore ← Git ignore template
|
│ ├── pre-commit.sh ← Security hook template
|
||||||
|
│ ├── gitattributes ← Git encryption rules template
|
||||||
|
│ └── gitignore ← Git ignore template
|
||||||
|
├── skills/
|
||||||
|
│ └── ingest/ ← pi skill: deployed to the AI node (vm101)
|
||||||
|
│ ├── SKILL.md ← Semantic-only contract (read/edit, emits manifest)
|
||||||
|
│ ├── references/ ← On-demand reference docs for the agent
|
||||||
|
│ └── scripts/ ← Deterministic post-processor (runs outside the agent)
|
||||||
|
│ ├── run-ingest.sh ← Orchestrator: consumes the manifest, emits one JSON line
|
||||||
|
│ ├── slug.sh ← Slug normalisation
|
||||||
|
│ ├── index-append.py ← Sorted insert into wiki/index.md + last_updated bump
|
||||||
|
│ ├── log-append.sh ← Append a wiki/log.md entry
|
||||||
|
│ ├── scoped-lint.sh ← Lint only the pages touched this run (reuses lib/lint.sh)
|
||||||
|
│ └── open-pr.sh ← Branch / commit / push / open PR (DRY_RUN seam for tests)
|
||||||
|
└── tests/ ← bats suite — deterministic, no LLM/GPU (see Testing)
|
||||||
|
├── helpers.bash
|
||||||
|
├── scripts.bats
|
||||||
|
├── lint.bats
|
||||||
|
├── structure.bats
|
||||||
|
└── run-ingest.bats
|
||||||
```
|
```
|
||||||
|
|
||||||
|
> The `skills/ingest/` directory is version-controlled here but **deployed** to the AI
|
||||||
|
> node (vm101) under `~/.pi/agent/skills/ingest`. The agent (`pi`) does only semantic work
|
||||||
|
> and writes a manifest; `run-ingest.sh` does the mechanical steps. See [Workflows → Ingest](#ingest).
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## System Requirements
|
## System Requirements
|
||||||
|
|
@ -154,7 +188,10 @@ All tools (git-crypt, bw, qmd) have native Linux binaries.
|
||||||
### macOS — full support
|
### macOS — full support
|
||||||
|
|
||||||
All scripts are compatible with macOS. Requirements:
|
All scripts are compatible with macOS. Requirements:
|
||||||
- bash 3.2+ (macOS default) — fully supported. All `bash 4+` constructs removed.
|
|
||||||
|
- bash 3.2+ (macOS default) — supported for the **setup scripts** (`make` targets, scaffolding).
|
||||||
|
The `ingest` skill uses bash 4+ constructs (`mapfile`), but it is deployed and run on the
|
||||||
|
Linux AI node, not on the macOS setup machine — so this is not a constraint in practice.
|
||||||
- GNU coreutils not required — BSD variants of `date`, `grep`, `sed` all handled.
|
- GNU coreutils not required — BSD variants of `date`, `grep`, `sed` all handled.
|
||||||
- `git-crypt`: install via Homebrew — `brew install git-crypt`
|
- `git-crypt`: install via Homebrew — `brew install git-crypt`
|
||||||
- `jq`, `curl`: pre-installed or via Homebrew
|
- `jq`, `curl`: pre-installed or via Homebrew
|
||||||
|
|
@ -166,6 +203,7 @@ If you use Homebrew bash (`brew install bash`), the scripts work identically to
|
||||||
**Git Bash and native Windows are not supported.**
|
**Git Bash and native Windows are not supported.**
|
||||||
|
|
||||||
Reasons:
|
Reasons:
|
||||||
|
|
||||||
- `git-crypt` has no native Windows binary.
|
- `git-crypt` has no native Windows binary.
|
||||||
- Process substitution `<(...)` used for runtime key injection is not available
|
- Process substitution `<(...)` used for runtime key injection is not available
|
||||||
in Git Bash or PowerShell.
|
in Git Bash or PowerShell.
|
||||||
|
|
@ -180,7 +218,7 @@ All setup and runtime operations work identically to native Linux inside WSL2.
|
||||||
The system is designed for a homelab architecture:
|
The system is designed for a homelab architecture:
|
||||||
|
|
||||||
| Component | Recommended | Role |
|
| Component | Recommended | Role |
|
||||||
|-----------|-------------|------|
|
| --------------- | ------------------------- | --------------------------------------------------------------- |
|
||||||
| Storage node | Any Linux server with NFS | Hosts Forgejo, stores genome repos |
|
| Storage node | Any Linux server with NFS | Hosts Forgejo, stores genome repos |
|
||||||
| AI compute node | GPU server (16GB+ VRAM) | Runs local LLM agent sessions |
|
| AI compute node | GPU server (16GB+ VRAM) | Runs local LLM agent sessions |
|
||||||
| VRAM | 16GB minimum | 14B model at Q5_K_M ≈ 10GB weights; ~6GB for KV cache |
|
| VRAM | 16GB minimum | 14B model at Q5_K_M ≈ 10GB weights; ~6GB for KV cache |
|
||||||
|
|
@ -192,6 +230,11 @@ The system is designed for a homelab architecture:
|
||||||
> the index, and the log tail is a cost. This is why all agent files are token-optimised
|
> the index, and the log tail is a cost. This is why all agent files are token-optimised
|
||||||
> and sessions are kept to one source at a time.
|
> and sessions are kept to one source at a time.
|
||||||
|
|
||||||
|
> **Reference deployment:** the table above is a target profile, not a hard requirement.
|
||||||
|
> The current setup runs a single 16GB GPU (RTX 5060 Ti) with a ~9B model for interactive
|
||||||
|
> ingest, and offloads heavy/async synthesis to a cloud model. Smaller models work — they
|
||||||
|
> just make the "one source per session" discipline and the token budget matter more.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
@ -199,7 +242,7 @@ The system is designed for a homelab architecture:
|
||||||
### Required
|
### Required
|
||||||
|
|
||||||
| Tool | Purpose |
|
| Tool | Purpose |
|
||||||
|------|---------|
|
| ----------- | -------------------------------- |
|
||||||
| `git` | Version control |
|
| `git` | Version control |
|
||||||
| `git-crypt` | Transparent file encryption |
|
| `git-crypt` | Transparent file encryption |
|
||||||
| `curl` | REST API calls to Forgejo/GitHub |
|
| `curl` | REST API calls to Forgejo/GitHub |
|
||||||
|
|
@ -208,7 +251,7 @@ The system is designed for a homelab architecture:
|
||||||
### Optional
|
### Optional
|
||||||
|
|
||||||
| Tool | Purpose |
|
| Tool | Purpose |
|
||||||
|------|---------|
|
| ----- | ----------------------------------------------------------------------- |
|
||||||
| `bw` | Bitwarden CLI — runtime key injection from Vaultwarden (no key on disk) |
|
| `bw` | Bitwarden CLI — runtime key injection from Vaultwarden (no key on disk) |
|
||||||
| `qmd` | Local BM25 + vector search for Markdown files with MCP server interface |
|
| `qmd` | Local BM25 + vector search for Markdown files with MCP server interface |
|
||||||
|
|
||||||
|
|
@ -282,14 +325,17 @@ resolution. Never included by Make.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Dynamic paths (resolved at source time)
|
# Dynamic paths (resolved at source time)
|
||||||
WORK_DIR="${HOME}/knowledge-genome-setup"
|
WORK_DIR="${HOME}/knowledge-genome-orchestrator"
|
||||||
KEYS_DIR="${WORK_DIR}/keys"
|
KEYS_DIR="${WORK_DIR}/keys"
|
||||||
|
|
||||||
# Genome registry — format: "name|description"
|
# Genome registry — format: "name|description|linked_repo"
|
||||||
|
# The third field is OPTIONAL:
|
||||||
|
# - leave it empty → knowledge-only genome (no linked project)
|
||||||
|
# - owner/repo → genome is linked to that project repository (rendered into AGENTS.md)
|
||||||
GENOMES=(
|
GENOMES=(
|
||||||
"genome-dev|Web development, TUI, Angular, software architecture"
|
"genome-dev|Web development, TUI, Angular, software architecture|myorg/my-app"
|
||||||
"genome-finance|Personal finance, investments, market analysis"
|
"genome-finance|Personal finance, investments, market analysis|"
|
||||||
"genome-homelab|Infrastructure, network configs, architecture logs"
|
"genome-homelab|Infrastructure, network configs, architecture logs|"
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -312,8 +358,8 @@ export GITHUB_TOKEN="your_github_token"
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# 1. Clone the setup framework
|
# 1. Clone the setup framework
|
||||||
git clone <setup-repo-url> knowledge-genome-setup
|
git clone <setup-repo-url> knowledge-genome-orchestrator
|
||||||
cd knowledge-genome-setup
|
cd knowledge-genome-orchestrator
|
||||||
|
|
||||||
# 2. Configure your environment
|
# 2. Configure your environment
|
||||||
cp globals.env.example globals.env # edit with your values
|
cp globals.env.example globals.env # edit with your values
|
||||||
|
|
@ -347,6 +393,7 @@ make setup
|
||||||
- Commits submodule pointer in master repo
|
- Commits submodule pointer in master repo
|
||||||
|
|
||||||
After setup completes:
|
After setup completes:
|
||||||
|
|
||||||
- Upload all files in `keys/` to Vaultwarden (see Key Management)
|
- Upload all files in `keys/` to Vaultwarden (see Key Management)
|
||||||
- Delete key files from disk: `rm keys/*.key`
|
- Delete key files from disk: `rm keys/*.key`
|
||||||
|
|
||||||
|
|
@ -355,11 +402,14 @@ After setup completes:
|
||||||
## Makefile Reference
|
## Makefile Reference
|
||||||
|
|
||||||
| Target | Description |
|
| Target | Description |
|
||||||
|--------|-------------|
|
| ----------------------------------------------------- | ------------------------------------------------------------------------------------- |
|
||||||
| `make setup` | Full system initialisation — master repo + all genomes in `registry.sh` |
|
| `make setup` | Full system initialisation — master repo + all genomes in `registry.sh` |
|
||||||
| `make add-genome NAME=x DESC="y"` | Scaffold and register a single new genome |
|
| `make add-genome NAME=x DESC="y" [LINKED=owner/repo]` | Scaffold and register a single new genome (optional linked project) |
|
||||||
| `make lint` | Run quality checks across all genomes (schema, privacy, decay, page size) |
|
| `make lint` | Run quality checks across all genomes (schema, privacy, decay, page size) |
|
||||||
| `make status` | Show submodule status and first 10 git-crypt encryption states |
|
| `make verify-structure` | Report directory drift of each genome vs the canonical layout (`lib/structure.sh`) |
|
||||||
|
| `make sync-structure` | Create any missing canonical directories across all genomes (safe, idempotent) |
|
||||||
|
| `make test` | Run the bats test suite (deterministic; no LLM/GPU/network) — see [Testing](#testing) |
|
||||||
|
| `make status` | Show submodule status and per-genome git-crypt encryption state |
|
||||||
| `make lock` | Lock all encrypted repos (master + all genome submodules) |
|
| `make lock` | Lock all encrypted repos (master + all genome submodules) |
|
||||||
| `make doctor` | Verify required tools: git, git-crypt, curl, jq; warn if bw missing |
|
| `make doctor` | Verify required tools: git, git-crypt, curl, jq; warn if bw missing |
|
||||||
| `make sync` | `git submodule update --init --recursive` + report unpushed commits per genome |
|
| `make sync` | `git submodule update --init --recursive` + report unpushed commits per genome |
|
||||||
|
|
@ -374,6 +424,12 @@ make doctor
|
||||||
# Add a new genome after initial setup
|
# Add a new genome after initial setup
|
||||||
make add-genome NAME=genome-research DESC="Academic papers and deep research"
|
make add-genome NAME=genome-research DESC="Academic papers and deep research"
|
||||||
|
|
||||||
|
# Add a genome linked to a project repository
|
||||||
|
make add-genome NAME=genome-dev DESC="Web development" LINKED=myorg/my-app
|
||||||
|
|
||||||
|
# Check every genome against the canonical directory layout
|
||||||
|
make verify-structure
|
||||||
|
|
||||||
# Run full lint pass (bash deterministic checks)
|
# Run full lint pass (bash deterministic checks)
|
||||||
make lint
|
make lint
|
||||||
|
|
||||||
|
|
@ -386,6 +442,38 @@ make lock
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
The mechanical layer (slug, index, log, lint, structure, the ingest orchestrator) is
|
||||||
|
covered by a [bats](https://github.com/bats-core/bats-core) suite. The tests are
|
||||||
|
**deterministic and have zero dependency on the LLM, the GPU, or the network** — they
|
||||||
|
simulate the agent's output with fixtures and exercise the scripts directly, so they run
|
||||||
|
anywhere git + bash live (laptop, CI, a git hook). They are **not** meant to run on the AI
|
||||||
|
node or via n8n.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo apt install bats # once
|
||||||
|
make test # or: bats tests/
|
||||||
|
```
|
||||||
|
|
||||||
|
| File | Covers |
|
||||||
|
| ----------------- | ------------------------------------------------------------------------------ |
|
||||||
|
| `scripts.bats` | `slug.sh`, `log-append.sh`, `index-append.py` (insert, sort, bump, idempotent) |
|
||||||
|
| `lint.bats` | `lib/lint.sh` validators + `scoped-lint.sh` |
|
||||||
|
| `structure.bats` | `lib/structure.sh` report / sync |
|
||||||
|
| `run-ingest.bats` | `run-ingest.sh` end-to-end (DRY_RUN, local bare remote) — needs `jq` |
|
||||||
|
|
||||||
|
Each test builds its own throwaway genome with a local bare remote, configured to ignore
|
||||||
|
the operator's global git settings (signing, global hooks) so the suite is hermetic. The
|
||||||
|
`run-ingest` tests auto-`skip` if `jq` is absent. If you change the canonical layout in
|
||||||
|
`lib/structure.sh`, update `FIXTURE_DIRS` in `tests/helpers.bash` to match.
|
||||||
|
|
||||||
|
> Why this matters: the only non-deterministic part of the system is the model. Pinning
|
||||||
|
> the mechanical layer with tests means that when an ingest misbehaves, you know it's the
|
||||||
|
> model or the prompt — not the plumbing.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## Genome Lifecycle
|
## Genome Lifecycle
|
||||||
|
|
||||||
### Initial setup
|
### Initial setup
|
||||||
|
|
@ -407,6 +495,7 @@ After adding: upload the new key to Vaultwarden and delete the key file.
|
||||||
### Removing a genome
|
### Removing a genome
|
||||||
|
|
||||||
Manual process:
|
Manual process:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# In master repo
|
# In master repo
|
||||||
git submodule deinit genome-name
|
git submodule deinit genome-name
|
||||||
|
|
@ -422,10 +511,11 @@ When a genome is scaffolded, `render_template` replaces these placeholders in al
|
||||||
template files:
|
template files:
|
||||||
|
|
||||||
| Placeholder | Source | Example |
|
| Placeholder | Source | Example |
|
||||||
|-------------|--------|---------|
|
| ----------------------- | ----------- | ------------------------------ |
|
||||||
| `{{GENOME_NAME}}` | registry.sh | `genome-dev` |
|
| `{{GENOME_NAME}}` | registry.sh | `genome-dev` |
|
||||||
| `{{GENOME_NAME_UPPER}}` | derived | `GENOME-DEV` |
|
| `{{GENOME_NAME_UPPER}}` | derived | `GENOME-DEV` |
|
||||||
| `{{GENOME_DESC}}` | registry.sh | `Web development...` |
|
| `{{GENOME_DESC}}` | registry.sh | `Web development...` |
|
||||||
|
| `{{LINKED_PROJECT}}` | registry.sh | `myorg/my-app` (or `none`) |
|
||||||
| `{{FORGEJO_URL}}` | globals.env | `https://git.yourserver.com` |
|
| `{{FORGEJO_URL}}` | globals.env | `https://git.yourserver.com` |
|
||||||
| `{{FORGEJO_USER}}` | globals.env | `yourusername` |
|
| `{{FORGEJO_USER}}` | globals.env | `yourusername` |
|
||||||
| `{{VAULTWARDEN_URL}}` | globals.env | `https://vault.yourserver.com` |
|
| `{{VAULTWARDEN_URL}}` | globals.env | `https://vault.yourserver.com` |
|
||||||
|
|
@ -442,7 +532,7 @@ Each genome uses a unique symmetric AES-256-CTR key managed by git-crypt.
|
||||||
Two directories in every genome are always encrypted:
|
Two directories in every genome are always encrypted:
|
||||||
|
|
||||||
| Directory | Contents | On remote |
|
| Directory | Contents | On remote |
|
||||||
|-----------|----------|-----------|
|
| --------------- | --------------------------- | ------------------ |
|
||||||
| `raw/private/` | Sensitive source material | Opaque binary blob |
|
| `raw/private/` | Sensitive source material | Opaque binary blob |
|
||||||
| `wiki/private/` | Private synthesis and notes | Opaque binary blob |
|
| `wiki/private/` | Private synthesis and notes | Opaque binary blob |
|
||||||
|
|
||||||
|
|
@ -490,6 +580,17 @@ This means: any file matching `**/private/**` in `.gitattributes` is protected,
|
||||||
including future `private/` directories created anywhere in the repo.
|
including future `private/` directories created anywhere in the repo.
|
||||||
The hook never needs updating when the encryption rules change.
|
The hook never needs updating when the encryption rules change.
|
||||||
|
|
||||||
|
### Untrusted agent output — manifest validation
|
||||||
|
|
||||||
|
The ingest agent's output is stochastic: a hallucinated manifest could carry a missing field,
|
||||||
|
a wrong type, or a malicious path such as `wiki/../../etc/passwd`. `run-ingest.sh` therefore
|
||||||
|
**validates the manifest before trusting any field** — it must be well-formed JSON with a
|
||||||
|
string `raw_source` and an array `pages`, and **every `path` must be a string under `wiki/`
|
||||||
|
with no `..`**. Anything else fails fast with a structured `{"status":"error"}` and no
|
||||||
|
filesystem access outside the wiki, so a bad path can't drive a read or a lint outside the
|
||||||
|
knowledge tree. This is the trust boundary between the (stochastic) model and the
|
||||||
|
(deterministic, tested) post-processor.
|
||||||
|
|
||||||
### PRIVATE_CONTEXT toggle
|
### PRIVATE_CONTEXT toggle
|
||||||
|
|
||||||
The `PRIVATE_CONTEXT` toggle in `AGENTS.md` controls whether the LLM agent
|
The `PRIVATE_CONTEXT` toggle in `AGENTS.md` controls whether the LLM agent
|
||||||
|
|
@ -502,6 +603,7 @@ PRIVATE_CONTEXT: enabled ← Agent may read/write private/. Requires git-cryp
|
||||||
```
|
```
|
||||||
|
|
||||||
Rules:
|
Rules:
|
||||||
|
|
||||||
- Never inferred. Never carried over from a previous session.
|
- Never inferred. Never carried over from a previous session.
|
||||||
- `enabled` requires the operator to confirm that `git-crypt unlock` has run on the host.
|
- `enabled` requires the operator to confirm that `git-crypt unlock` has run on the host.
|
||||||
- Per-genome, per-session: enabling for `genome-finance` does NOT enable for `genome-dev`.
|
- Per-genome, per-session: enabling for `genome-finance` does NOT enable for `genome-dev`.
|
||||||
|
|
@ -530,6 +632,7 @@ The key flows: Vaultwarden → `bw get notes` → `base64 -d` → kernel pipe
|
||||||
At no point is the key written to any file on disk.
|
At no point is the key written to any file on disk.
|
||||||
|
|
||||||
Lock a genome when the session ends:
|
Lock a genome when the session ends:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git-crypt lock
|
git-crypt lock
|
||||||
```
|
```
|
||||||
|
|
@ -545,7 +648,7 @@ git-crypt lock
|
||||||
Each genome key is stored as a base64-encoded Secure Note in Vaultwarden:
|
Each genome key is stored as a base64-encoded Secure Note in Vaultwarden:
|
||||||
|
|
||||||
| Genome | Vaultwarden Note Name |
|
| Genome | Vaultwarden Note Name |
|
||||||
|--------|----------------------|
|
| ---------------- | --------------------- |
|
||||||
| `genome-dev` | `genome-dev key` |
|
| `genome-dev` | `genome-dev key` |
|
||||||
| `genome-finance` | `genome-finance key` |
|
| `genome-finance` | `genome-finance key` |
|
||||||
| `genome-homelab` | `genome-homelab key` |
|
| `genome-homelab` | `genome-homelab key` |
|
||||||
|
|
@ -586,13 +689,14 @@ git clone https://git.yourserver.com/yourusername/genome-dev.git
|
||||||
If a key is lost or compromised:
|
If a key is lost or compromised:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# From the knowledge-genome-setup/ directory
|
# From the knowledge-genome-orchestrator/ directory
|
||||||
source lib/git-crypt.sh
|
source lib/git-crypt.sh
|
||||||
cd ~/knowledge-genome-setup/genome-dev
|
cd ~/knowledge-genome-orchestrator/genome-dev
|
||||||
gcrypt_rotate_key "genome-dev"
|
gcrypt_rotate_key "genome-dev"
|
||||||
```
|
```
|
||||||
|
|
||||||
`gcrypt_rotate_key` performs:
|
`gcrypt_rotate_key` performs:
|
||||||
|
|
||||||
1. Unlocks repo with existing key
|
1. Unlocks repo with existing key
|
||||||
2. Removes old key material
|
2. Removes old key material
|
||||||
3. Generates new symmetric key via `git-crypt init`
|
3. Generates new symmetric key via `git-crypt init`
|
||||||
|
|
@ -603,13 +707,16 @@ gcrypt_rotate_key "genome-dev"
|
||||||
> **Limitation:** git history still contains blobs encrypted with the old key.
|
> **Limitation:** git history still contains blobs encrypted with the old key.
|
||||||
> Anyone with the old key and git history access can decrypt them. To purge old
|
> Anyone with the old key and git history access can decrypt them. To purge old
|
||||||
> encrypted blobs from history:
|
> encrypted blobs from history:
|
||||||
|
>
|
||||||
> ```bash
|
> ```bash
|
||||||
> git filter-repo --invert-paths --path raw/private --path wiki/private
|
> git filter-repo --invert-paths --path raw/private --path wiki/private
|
||||||
> git push --force origin main
|
> git push --force origin main
|
||||||
> ```
|
> ```
|
||||||
|
>
|
||||||
> This rewrites all commit hashes — coordinate with any collaborators first.
|
> This rewrites all commit hashes — coordinate with any collaborators first.
|
||||||
|
|
||||||
After rotation:
|
After rotation:
|
||||||
|
|
||||||
- Upload new key to Vaultwarden (replace existing note)
|
- Upload new key to Vaultwarden (replace existing note)
|
||||||
- Delete both `keys/genome-dev.key` and `keys/genome-dev-rotated-*.key` from disk
|
- Delete both `keys/genome-dev.key` and `keys/genome-dev-rotated-*.key` from disk
|
||||||
- Revoke access from previous key holders
|
- Revoke access from previous key holders
|
||||||
|
|
@ -621,6 +728,7 @@ After rotation:
|
||||||
### Prerequisites for every session
|
### Prerequisites for every session
|
||||||
|
|
||||||
Before starting an LLM agent session on a genome:
|
Before starting an LLM agent session on a genome:
|
||||||
|
|
||||||
1. The host (AI server) runs `git-crypt unlock` for the required genomes
|
1. The host (AI server) runs `git-crypt unlock` for the required genomes
|
||||||
2. The orchestrator prepares context: `tail -n 20 wiki/log.md`
|
2. The orchestrator prepares context: `tail -n 20 wiki/log.md`
|
||||||
3. Declare `PRIVATE_CONTEXT` state explicitly in the opening prompt
|
3. Declare `PRIVATE_CONTEXT` state explicitly in the opening prompt
|
||||||
|
|
@ -631,7 +739,8 @@ The agent executes in this order at the start of every session:
|
||||||
|
|
||||||
1. Read `wiki/index.md` — primary catalog of all pages and maturity
|
1. Read `wiki/index.md` — primary catalog of all pages and maturity
|
||||||
2. Read last 20 log entries (injected by orchestrator — does NOT open `wiki/log.md` directly)
|
2. Read last 20 log entries (injected by orchestrator — does NOT open `wiki/log.md` directly)
|
||||||
3. For tasks involving related pages: `qmd search "<query>"` before opening any files
|
3. For tasks involving related pages: if the optional `qmd` extension is installed,
|
||||||
|
`qmd search "<query>"` before opening files; otherwise navigate from `wiki/index.md`
|
||||||
4. Operate on individual files — never scan entire directories
|
4. Operate on individual files — never scan entire directories
|
||||||
|
|
||||||
### One source per session
|
### One source per session
|
||||||
|
|
@ -651,12 +760,13 @@ sequentially — not one session with 5 files.
|
||||||
### n8n automation
|
### n8n automation
|
||||||
|
|
||||||
For Forgejo webhook → automated ingest:
|
For Forgejo webhook → automated ingest:
|
||||||
|
|
||||||
1. Forgejo sends webhook on push to `raw/`
|
1. Forgejo sends webhook on push to `raw/`
|
||||||
2. n8n receives webhook, identifies new files
|
2. n8n receives webhook, identifies new files
|
||||||
3. n8n starts one agent session per new file (sequential, not parallel)
|
3. n8n starts one agent session per new file (sequential, not parallel)
|
||||||
4. Each session: inject `tail -n 20 wiki/log.md` + `PRIVATE_CONTEXT` state + source path
|
4. Each session: realign the checkout to the base (`git switch <base> && git reset --hard origin/<base>`), then inject `tail -n 20 wiki/log.md` + `PRIVATE_CONTEXT` state + source path
|
||||||
5. Agent ingest workflow runs, opens PR
|
5. Phase 1 agent (`/skill:ingest`) writes the manifest; Phase 2 `run-ingest.sh` opens the PR, then **stops**
|
||||||
6. Human reviews and merges PR
|
6. Human reviews — **merge to accept**, or close the PR + delete the `feat` branch to reject
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -664,27 +774,76 @@ For Forgejo webhook → automated ingest:
|
||||||
|
|
||||||
### Ingest
|
### Ingest
|
||||||
|
|
||||||
Triggered by a new file in `raw/` (manual or via webhook).
|
Triggered by a new file in `raw/` (manual or via webhook). Ingest is split into two
|
||||||
|
phases so that the small local model spends its limited context only on judgement, and
|
||||||
|
all the deterministic bookkeeping happens outside the model's loop.
|
||||||
|
|
||||||
1. Read source once
|
**Phase 1 — agent (semantic only).** The `ingest` skill gives the agent read/edit tools
|
||||||
2. Create `wiki/sources/<slug>.md` — summary and key points
|
only (no shell). It:
|
||||||
3. Per entity (person, tool, organisation): create or update `wiki/entities/<name>.md`
|
|
||||||
4. Per concept (pattern, theory, decision): create or update `wiki/concepts/<name>.md`
|
1. Reads the source once
|
||||||
5. Check each touched page for contradictions → apply Conflict Resolution if found
|
2. Creates `wiki/sources/<slug>.md` — summary and key points
|
||||||
6. Append entry to `wiki/index.md` (bottom of relevant section — do not reorder)
|
3. Per entity (person, tool, organisation): creates or updates `wiki/entities/<name>.md`
|
||||||
7. Append log entry: `INGEST | <slug>`
|
4. Per concept (pattern, theory, decision): creates or updates `wiki/concepts/<name>.md`
|
||||||
8. Run scoped lint on pages created or modified in this session; report in PR
|
5. Checks each touched page for contradictions → applies Conflict Resolution if found
|
||||||
9. Commit on `feat/ai-ingest-<slug>`; open PR using `templates/pr-description.md`
|
6. Writes `.ingest-manifest.json` (the list of pages it created/modified, the model name,
|
||||||
|
a one-line reasoning, the PR summary, and any contradictions) — then **stops**
|
||||||
|
|
||||||
|
**Phase 2 — `run-ingest.sh` (deterministic, outside the agent).** The post-processor first
|
||||||
|
**validates the manifest** — well-formed JSON, expected shape, and every page path confined to
|
||||||
|
`wiki/` with no `..` (see [Security Model](#security-model)) — then does the mechanical work the
|
||||||
|
model must not waste context on:
|
||||||
|
|
||||||
|
7. Inserts each page into the correct `wiki/index.md` section **in alphabetical order**,
|
||||||
|
deduplicated by wikilink (a re-ingest updates the entry, never duplicates it), and bumps the
|
||||||
|
index `last_updated` (`index-append.py`)
|
||||||
|
8. Appends the `INGEST | <slug>` entry to `wiki/log.md` (the model name comes from the
|
||||||
|
orchestrator via `INGEST_MODEL` — the agent cannot reliably know its own tag)
|
||||||
|
9. Runs scoped lint on exactly the pages touched this run (`scoped-lint.sh`, reusing
|
||||||
|
`lib/lint.sh`)
|
||||||
|
10. Commits **only `wiki/`** on `feat/ai-ingest-<slug>` and opens a PR against the integration
|
||||||
|
base (`INGEST_BASE`, default `main`); the body matches the `templates/pr-description.md`
|
||||||
|
structure (Summary / Pages / Contradictions / Scoped Lint)
|
||||||
|
11. Emits a single compact JSON line (status, slug, PR url, lint_clean, conflict) for n8n
|
||||||
|
|
||||||
|
The agent never runs git, never edits the index/log mechanically, and never lints — those
|
||||||
|
are deterministic and tested (see [Testing](#testing)). Invocation on the AI node:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pi --mode json -p "/skill:ingest raw/articles/<file>.md" # phase 1 → writes manifest
|
||||||
|
run-ingest.sh <genome> # phase 2 → index/log/lint/PR
|
||||||
|
```
|
||||||
|
|
||||||
For private sources (`PRIVATE_CONTEXT: enabled` required):
|
For private sources (`PRIVATE_CONTEXT: enabled` required):
|
||||||
|
|
||||||
- All output goes to `wiki/private/<slug>.md` only
|
- All output goes to `wiki/private/<slug>.md` only
|
||||||
- PR title: `[PRIVATE] ingest: <slug>`
|
- PR title: `[PRIVATE] ingest: <slug>`
|
||||||
|
|
||||||
|
**Branch lifecycle & the manual gate.** `run-ingest.sh` / `open-pr.sh` are deliberately
|
||||||
|
"dumb": they create the `feat/ai-ingest-<slug>` branch, commit only `wiki/`, open the PR, and
|
||||||
|
stop. They never reset, revert, or touch the integration branch — that lifecycle belongs to
|
||||||
|
the orchestrator, around the human gate:
|
||||||
|
|
||||||
|
- **Before each session** the orchestrator realigns the checkout to the base
|
||||||
|
(`git fetch && git switch <base> && git reset --hard origin/<base>`) — a reset of the _local_
|
||||||
|
checkout to match the remote, never a force-push to the shared branch.
|
||||||
|
- **After the PR opens, everything stops** until a human approves: one source per session,
|
||||||
|
sequential, no new ingest until the pending PR is closed.
|
||||||
|
- **Approve = merge. Reject = close the PR and delete the remote `feat` branch.** To undo an
|
||||||
|
already-merged ingest, open a _revert PR_ against the base — never rewrite history on a
|
||||||
|
shared branch.
|
||||||
|
|
||||||
|
The PR base is configurable via `INGEST_BASE` (default `main`). Per-page `maturity` already
|
||||||
|
encodes stability and tags/releases mark versioned snapshots, so `main` is the integration
|
||||||
|
branch today. If a linked project later _consumes_ a genome, set `INGEST_BASE=develop` to
|
||||||
|
buffer ingests on `develop` and cut manual `develop → main` releases — no code change.
|
||||||
|
|
||||||
### Query
|
### Query
|
||||||
|
|
||||||
Triggered by an operator question.
|
Triggered by an operator question.
|
||||||
|
|
||||||
1. `qmd search "<query>"` → identify candidate pages
|
1. `qmd search "<query>"` (if the optional qmd extension is installed) → identify
|
||||||
|
candidate pages; otherwise start from `wiki/index.md`
|
||||||
2. Read candidate pages directly (qmd already returns file paths — no intermediate index lookup)
|
2. Read candidate pages directly (qmd already returns file paths — no intermediate index lookup)
|
||||||
3. Synthesise answer with `[[wikilink]]` citations
|
3. Synthesise answer with `[[wikilink]]` citations
|
||||||
4. If answer is non-trivial: save as `wiki/queries/<slug>.md` and append to index
|
4. If answer is non-trivial: save as `wiki/queries/<slug>.md` and append to index
|
||||||
|
|
@ -697,11 +856,13 @@ For general orientation without a specific query: read `wiki/index.md` directly.
|
||||||
The lint workflow is split between deterministic bash checks and semantic LLM judgment.
|
The lint workflow is split between deterministic bash checks and semantic LLM judgment.
|
||||||
|
|
||||||
**Step 1 — operator runs bash linter:**
|
**Step 1 — operator runs bash linter:**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
make lint
|
make lint
|
||||||
```
|
```
|
||||||
|
|
||||||
The bash linter checks automatically:
|
The bash linter checks automatically:
|
||||||
|
|
||||||
- YAML frontmatter validity (all mandatory fields present)
|
- YAML frontmatter validity (all mandatory fields present)
|
||||||
- Domain consistency (domain field matches genome name)
|
- Domain consistency (domain field matches genome name)
|
||||||
- Type validity (value from allowed list)
|
- Type validity (value from allowed list)
|
||||||
|
|
@ -713,6 +874,7 @@ The bash linter checks automatically:
|
||||||
**Step 2 — operator provides bash output to LLM agent:**
|
**Step 2 — operator provides bash output to LLM agent:**
|
||||||
|
|
||||||
The agent applies semantic judgment to findings the bash linter cannot make:
|
The agent applies semantic judgment to findings the bash linter cannot make:
|
||||||
|
|
||||||
- **Orphan pages** (from bash list): for each orphan, identify 1-3 existing pages
|
- **Orphan pages** (from bash list): for each orphan, identify 1-3 existing pages
|
||||||
that should link to it; propose specific additions
|
that should link to it; propose specific additions
|
||||||
- **Implicit concepts** (from bash term frequency list): determine if a candidate
|
- **Implicit concepts** (from bash term frequency list): determine if a candidate
|
||||||
|
|
@ -735,21 +897,27 @@ The PR description uses `templates/pr-description.md`:
|
||||||
|
|
||||||
```markdown
|
```markdown
|
||||||
## Summary
|
## Summary
|
||||||
|
|
||||||
One sentence: goal of this session and source processed.
|
One sentence: goal of this session and source processed.
|
||||||
|
|
||||||
## Pages Created
|
## Pages Created
|
||||||
|
|
||||||
| Path | Type | Maturity |
|
| Path | Type | Maturity |
|
||||||
|
|
||||||
## Pages Modified
|
## Pages Modified
|
||||||
|
|
||||||
| Path | Change |
|
| Path | Change |
|
||||||
|
|
||||||
## Contradictions Found
|
## Contradictions Found
|
||||||
|
|
||||||
[ ] None / [ ] n conflict file(s) created
|
[ ] None / [ ] n conflict file(s) created
|
||||||
|
|
||||||
## Private Data Accessed
|
## Private Data Accessed
|
||||||
|
|
||||||
[ ] No (PRIVATE_CONTEXT: disabled) / [ ] Yes
|
[ ] No (PRIVATE_CONTEXT: disabled) / [ ] Yes
|
||||||
|
|
||||||
## Scoped Lint (post-ingest)
|
## Scoped Lint (post-ingest)
|
||||||
|
|
||||||
[ ] Frontmatter valid [ ] No broken links [ ] No issues found
|
[ ] Frontmatter valid [ ] No broken links [ ] No issues found
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -777,7 +945,7 @@ The operator resolves the conflict, updates relevant pages, closes the PR.
|
||||||
Pages have a `last_updated` field in frontmatter. During lint passes:
|
Pages have a `last_updated` field in frontmatter. During lint passes:
|
||||||
|
|
||||||
| Maturity | Threshold | Action |
|
| Maturity | Threshold | Action |
|
||||||
|----------|-----------|--------|
|
| -------- | --------- | -------------------------------------- |
|
||||||
| `stable` | 180 days | Flag as stale — add `⚠️ STALE` callout |
|
| `stable` | 180 days | Flag as stale — add `⚠️ STALE` callout |
|
||||||
| `draft` | 90 days | Flag as stale — add `⚠️ STALE` callout |
|
| `draft` | 90 days | Flag as stale — add `⚠️ STALE` callout |
|
||||||
|
|
||||||
|
|
@ -817,7 +985,7 @@ private: true | false
|
||||||
```
|
```
|
||||||
|
|
||||||
| Field | Rules |
|
| Field | Rules |
|
||||||
|-------|-------|
|
| ---------------------- | ------------------------------------------------------------------------ |
|
||||||
| `type` | Must be one of: `source entity concept query conflict private index log` |
|
| `type` | Must be one of: `source entity concept query conflict private index log` |
|
||||||
| `maturity: draft` | Single source or unvalidated |
|
| `maturity: draft` | Single source or unvalidated |
|
||||||
| `maturity: stable` | Confirmed by 2+ independent sources |
|
| `maturity: stable` | Confirmed by 2+ independent sources |
|
||||||
|
|
@ -830,7 +998,7 @@ Do not use semantic versioning for content. Git history tracks every change.
|
||||||
### Page types and directories
|
### Page types and directories
|
||||||
|
|
||||||
| Type | Directory | Description |
|
| Type | Directory | Description |
|
||||||
|------|-----------|-------------|
|
| ---------- | ---------------------------- | -------------------------------------------- |
|
||||||
| `source` | `wiki/sources/` | One page per processed raw source |
|
| `source` | `wiki/sources/` | One page per processed raw source |
|
||||||
| `entity` | `wiki/entities/` | People, tools, organisations, projects |
|
| `entity` | `wiki/entities/` | People, tools, organisations, projects |
|
||||||
| `concept` | `wiki/concepts/` | Patterns, theories, architectural decisions |
|
| `concept` | `wiki/concepts/` | Patterns, theories, architectural decisions |
|
||||||
|
|
@ -843,7 +1011,7 @@ Do not use semantic versioning for content. Git history tracks every change.
|
||||||
### Page size limits
|
### Page size limits
|
||||||
|
|
||||||
| Limit | Lines | Action |
|
| Limit | Lines | Action |
|
||||||
|-------|-------|--------|
|
| -------- | ----- | ----------------------------------- |
|
||||||
| Soft cap | 400 | Bash linter warns |
|
| Soft cap | 400 | Bash linter warns |
|
||||||
| Hard cap | 800 | Bash linter errors — split the page |
|
| Hard cap | 800 | Bash linter errors — split the page |
|
||||||
|
|
||||||
|
|
@ -853,7 +1021,7 @@ and keep the wiki atomically navigable.
|
||||||
### Linking conventions
|
### Linking conventions
|
||||||
|
|
||||||
| Type | Format |
|
| Type | Format |
|
||||||
|------|--------|
|
| ---------------------- | ------------------------------------------- |
|
||||||
| Internal (same genome) | `[[folder/slug]]` — Obsidian wikilinks only |
|
| Internal (same genome) | `[[folder/slug]]` — Obsidian wikilinks only |
|
||||||
| Cross-genome | `[[../genome-target/wiki/folder/slug]]` |
|
| Cross-genome | `[[../genome-target/wiki/folder/slug]]` |
|
||||||
| External | `[text](https://url)` — standard Markdown |
|
| External | `[text](https://url)` — standard Markdown |
|
||||||
|
|
@ -878,6 +1046,7 @@ Every operation appends one entry to `wiki/log.md`:
|
||||||
Valid TYPEs: `INGEST` `LINT` `QUERY` `CONFLICT` `CONFIG` `SECURITY`
|
Valid TYPEs: `INGEST` `LINT` `QUERY` `CONFLICT` `CONFIG` `SECURITY`
|
||||||
|
|
||||||
Parse examples:
|
Parse examples:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
grep "^## \[" wiki/log.md | tail -5 # Last 5 entries
|
grep "^## \[" wiki/log.md | tail -5 # Last 5 entries
|
||||||
grep "^## \[" wiki/log.md | grep "CONFLICT" # All conflicts
|
grep "^## \[" wiki/log.md | grep "CONFLICT" # All conflicts
|
||||||
|
|
@ -892,7 +1061,7 @@ The LLM never loads the full log.
|
||||||
## Collaboration Model
|
## Collaboration Model
|
||||||
|
|
||||||
| Role | Key access | Permitted operations |
|
| Role | Key access | Permitted operations |
|
||||||
|------|-----------|----------------------|
|
| -------------- | ----------------- | ----------------------------------------------------------------------------- |
|
||||||
| Owner | Full — key holder | Read/write everywhere |
|
| Owner | Full — key holder | Read/write everywhere |
|
||||||
| Collaborator | None | Push to `raw/articles/`, `raw/transcripts/`, `raw/code-packs/`, `raw/assets/` |
|
| Collaborator | None | Push to `raw/articles/`, `raw/transcripts/`, `raw/code-packs/`, `raw/assets/` |
|
||||||
| Local AI agent | Conditional | `private/` only when `PRIVATE_CONTEXT: enabled` |
|
| Local AI agent | Conditional | `private/` only when `PRIVATE_CONTEXT: enabled` |
|
||||||
|
|
@ -930,6 +1099,7 @@ qmd serve --port 3333
|
||||||
Obsidian is the recommended wiki browser. Open any genome directory as an Obsidian vault.
|
Obsidian is the recommended wiki browser. Open any genome directory as an Obsidian vault.
|
||||||
|
|
||||||
Recommended setup:
|
Recommended setup:
|
||||||
|
|
||||||
- **Graph view** — visualise page connections; spot orphans and hubs instantly
|
- **Graph view** — visualise page connections; spot orphans and hubs instantly
|
||||||
- **Obsidian Web Clipper** — browser extension to clip articles directly to `raw/articles/`
|
- **Obsidian Web Clipper** — browser extension to clip articles directly to `raw/articles/`
|
||||||
as Markdown
|
as Markdown
|
||||||
|
|
@ -949,7 +1119,8 @@ n8n (running on the storage node) can automate the ingest pipeline:
|
||||||
2. n8n flow identifies new files
|
2. n8n flow identifies new files
|
||||||
3. For each new file: starts one agent session (sequential — never parallel)
|
3. For each new file: starts one agent session (sequential — never parallel)
|
||||||
4. Each session receives: `tail -n 20 wiki/log.md` + `PRIVATE_CONTEXT` state + source path
|
4. Each session receives: `tail -n 20 wiki/log.md` + `PRIVATE_CONTEXT` state + source path
|
||||||
5. Agent runs ingest workflow and opens PR
|
5. Phase 1 — agent runs `/skill:ingest` (semantic → writes manifest); Phase 2 —
|
||||||
|
`run-ingest.sh` does index/log/lint and opens the PR, returning one JSON line to n8n
|
||||||
6. Human reviews the PR
|
6. Human reviews the PR
|
||||||
|
|
||||||
Key constraint: one source per session, sessions sequential.
|
Key constraint: one source per session, sessions sequential.
|
||||||
|
|
@ -959,11 +1130,13 @@ Never batch multiple sources into one agent session.
|
||||||
|
|
||||||
If the AI compute node has an Intel NPU (e.g. Core Ultra series):
|
If the AI compute node has an Intel NPU (e.g. Core Ultra series):
|
||||||
|
|
||||||
- Background tasks (embedding updates, index refresh) → Intel NPU via OpenVINO
|
- Background/auxiliary tasks (OCR of `raw/assets/`, async summarisation, or qmd
|
||||||
|
re-indexing **if** the optional qmd extension is in use) → Intel NPU via OpenVINO
|
||||||
- Active reasoning sessions (ingest, query, synthesis) → GPU
|
- Active reasoning sessions (ingest, query, synthesis) → GPU
|
||||||
|
|
||||||
This keeps the GPU's KV cache free for interactive work and reduces power consumption
|
Note: the core system has no embedding pipeline (see [Core Philosophy](#core-philosophy)),
|
||||||
for background operations.
|
so there is nothing to embed here — the NPU is only for auxiliary work. This keeps the
|
||||||
|
GPU's KV cache free for interactive sessions and lowers power draw for background jobs.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -991,6 +1164,7 @@ sudo apt install git git-crypt curl jq
|
||||||
The staged file is in a path matching `**/private/**` but is not encrypted.
|
The staged file is in a path matching `**/private/**` but is not encrypted.
|
||||||
|
|
||||||
Fix options:
|
Fix options:
|
||||||
|
|
||||||
1. Verify `.gitattributes` contains `**/private/** filter=git-crypt diff=git-crypt -text`
|
1. Verify `.gitattributes` contains `**/private/** filter=git-crypt diff=git-crypt -text`
|
||||||
2. Run `git-crypt init` if git-crypt is not initialised in this repo
|
2. Run `git-crypt init` if git-crypt is not initialised in this repo
|
||||||
3. Run `git-crypt status` to check the encryption state of all files
|
3. Run `git-crypt status` to check the encryption state of all files
|
||||||
|
|
@ -1011,6 +1185,7 @@ git commit -m "fix: re-stage private files for encryption"
|
||||||
### Agent returns stale or missing cross-references
|
### Agent returns stale or missing cross-references
|
||||||
|
|
||||||
Likely causes:
|
Likely causes:
|
||||||
|
|
||||||
1. Session was too long — KV cache degraded. Use one source per session.
|
1. Session was too long — KV cache degraded. Use one source per session.
|
||||||
2. `wiki/index.md` was not read at session start — agent lacked the page catalog.
|
2. `wiki/index.md` was not read at session start — agent lacked the page catalog.
|
||||||
3. qmd index is stale — re-index: `qmd index <genome>/wiki/`
|
3. qmd index is stale — re-index: `qmd index <genome>/wiki/`
|
||||||
|
|
|
||||||
130
diagnose-run-ingest.sh
Normal file
130
diagnose-run-ingest.sh
Normal file
|
|
@ -0,0 +1,130 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# diagnose-run-ingest.sh
|
||||||
|
# Run from the repo root: bash diagnose-run-ingest.sh
|
||||||
|
# Builds the same fixture the bats test uses and runs run-ingest under `bash -x`
|
||||||
|
# so we can see exactly which command makes it exit non-zero.
|
||||||
|
|
||||||
|
set -uo pipefail
|
||||||
|
|
||||||
|
REPO="$(pwd)"
|
||||||
|
RI="${REPO}/skills/ingest/scripts/run-ingest.sh"
|
||||||
|
|
||||||
|
echo "==================== ENV ===================="
|
||||||
|
echo "bash: $(bash --version | head -1)"
|
||||||
|
echo "git : $(git --version)"
|
||||||
|
echo "jq : $(jq --version 2>/dev/null || echo MISSING)"
|
||||||
|
echo "py : $(python3 --version 2>/dev/null || echo MISSING)"
|
||||||
|
echo
|
||||||
|
|
||||||
|
echo "============ run-ingest.sh on disk ============"
|
||||||
|
if [[ ! -f "$RI" ]]; then echo "NOT FOUND: $RI (run me from the repo root)"; exit 1; fi
|
||||||
|
echo "-- helper invocations (want 'bash ...'): --"
|
||||||
|
grep -nE 'log-append\.sh|scoped-lint\.sh|open-pr\.sh' "$RI"
|
||||||
|
echo "-- result emitter (want 'jq -nc'): --"
|
||||||
|
grep -nE 'jq -nc?|jq -n ' "$RI"
|
||||||
|
echo
|
||||||
|
|
||||||
|
echo "============ build hermetic fixture ============"
|
||||||
|
T="$(mktemp -d)"
|
||||||
|
mkdir -p "$T/nohooks"
|
||||||
|
git init --bare -q "$T/origin.git"
|
||||||
|
g="$T/g"
|
||||||
|
mkdir -p "$g"/{raw/articles,wiki/sources,wiki/entities,wiki/concepts,wiki/queries,wiki/private}
|
||||||
|
|
||||||
|
cat > "$g/wiki/index.md" <<'EOF'
|
||||||
|
---
|
||||||
|
title: "Index"
|
||||||
|
type: index
|
||||||
|
domain: genome-test
|
||||||
|
maturity: stable
|
||||||
|
last_updated: 2026-01-01
|
||||||
|
private: false
|
||||||
|
---
|
||||||
|
|
||||||
|
# Index
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Sources (`wiki/sources/`)
|
||||||
|
*x*
|
||||||
|
|
||||||
|
|
||||||
|
## Entities (`wiki/entities/`)
|
||||||
|
*x*
|
||||||
|
|
||||||
|
|
||||||
|
## Concepts (`wiki/concepts/`)
|
||||||
|
*x*
|
||||||
|
|
||||||
|
|
||||||
|
## Queries (`wiki/queries/`)
|
||||||
|
*x*
|
||||||
|
|
||||||
|
|
||||||
|
## Conflicts Pending Review (`wiki/queries/conflict-*.md`)
|
||||||
|
*x*
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat > "$g/wiki/log.md" <<'EOF'
|
||||||
|
---
|
||||||
|
title: "Log"
|
||||||
|
type: log
|
||||||
|
domain: genome-test
|
||||||
|
maturity: stable
|
||||||
|
last_updated: 2026-01-01
|
||||||
|
private: false
|
||||||
|
---
|
||||||
|
|
||||||
|
# Log
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## [2026-01-01] CONFIG | init
|
||||||
|
- run_id: `init`
|
||||||
|
EOF
|
||||||
|
|
||||||
|
echo raw > "$g/raw/articles/test.md"
|
||||||
|
|
||||||
|
(
|
||||||
|
cd "$g"
|
||||||
|
git init -q
|
||||||
|
git config commit.gpgsign false
|
||||||
|
git config core.hooksPath "$T/nohooks"
|
||||||
|
git config user.email t@t
|
||||||
|
git config user.name t
|
||||||
|
git add .
|
||||||
|
git commit -qm init
|
||||||
|
git branch -M main
|
||||||
|
git remote add origin "$T/origin.git"
|
||||||
|
git push -q -u origin main
|
||||||
|
) && echo "fixture commit+push OK" || echo "FIXTURE SETUP FAILED (look above)"
|
||||||
|
|
||||||
|
cat > "$g/wiki/sources/test-source.md" <<'EOF'
|
||||||
|
---
|
||||||
|
title: "Test Source"
|
||||||
|
type: source
|
||||||
|
domain: genome-test
|
||||||
|
tags: [t]
|
||||||
|
maturity: draft
|
||||||
|
last_updated: 2026-06-04
|
||||||
|
private: false
|
||||||
|
---
|
||||||
|
body
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat > "$g/.ingest-manifest.json" <<'EOF'
|
||||||
|
{ "raw_source":"raw/articles/test.md","model":"m","reasoning":"r","pr_summary":"s","contradictions":"None",
|
||||||
|
"pages":[{"path":"wiki/sources/test-source.md","summary":"a source","maturity":"draft","status":"created"}] }
|
||||||
|
EOF
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo "============ run-ingest (bash -x) ============"
|
||||||
|
cd "$g"
|
||||||
|
export KG_LIB_DIR="${REPO}/lib" FORGEJO_URL=http://x FORGEJO_USER=u FORGEJO_TOKEN=t DRY_RUN=1
|
||||||
|
bash -x "$RI" genome-test >"$T/out.txt" 2>"$T/trace.txt"
|
||||||
|
rc=$?
|
||||||
|
echo "EXIT=$rc"
|
||||||
|
echo "-- run-ingest stdout (final JSON should be here): --"
|
||||||
|
cat "$T/out.txt"
|
||||||
|
echo "-- last 25 lines of the trace (the failing command is near the end): --"
|
||||||
|
tail -n 25 "$T/trace.txt"
|
||||||
|
|
@ -4,6 +4,9 @@
|
||||||
# Directory structure creation and template rendering engine.
|
# Directory structure creation and template rendering engine.
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
|
# Canonical directory layout lives in one place (lib/structure.sh).
|
||||||
|
source "$(dirname "${BASH_SOURCE[0]}")/structure.sh"
|
||||||
|
|
||||||
render_template() {
|
render_template() {
|
||||||
local template_file="$1"
|
local template_file="$1"
|
||||||
local output_file="$2"
|
local output_file="$2"
|
||||||
|
|
@ -13,17 +16,21 @@ render_template() {
|
||||||
local content
|
local content
|
||||||
content=$(<"$template_file")
|
content=$(<"$template_file")
|
||||||
|
|
||||||
|
# Defaults (:-) so master-repo templates render even when GENOME_* are unset
|
||||||
|
# (scaffold_master runs before any genome; set -u would otherwise abort here).
|
||||||
local genome_name_upper
|
local genome_name_upper
|
||||||
genome_name_upper=$(tr '[:lower:]' '[:upper:]' <<< "${GENOME_NAME}")
|
genome_name_upper=$(tr '[:lower:]' '[:upper:]' <<< "${GENOME_NAME:-}")
|
||||||
|
|
||||||
# Placeholder replacement
|
# Placeholder replacement
|
||||||
content="${content//\{\{GENOME_NAME\}\}/${GENOME_NAME}}"
|
content="${content//\{\{GENOME_NAME\}\}/${GENOME_NAME:-}}"
|
||||||
content="${content//\{\{GENOME_NAME_UPPER\}\}/${genome_name_upper}}"
|
content="${content//\{\{GENOME_NAME_UPPER\}\}/${genome_name_upper}}"
|
||||||
content="${content//\{\{GENOME_DESC\}\}/${GENOME_DESC}}"
|
content="${content//\{\{GENOME_DESC\}\}/${GENOME_DESC:-}}"
|
||||||
content="${content//\{\{FORGEJO_URL\}\}/${FORGEJO_URL}}"
|
content="${content//\{\{FORGEJO_URL\}\}/${FORGEJO_URL:-}}"
|
||||||
content="${content//\{\{FORGEJO_USER\}\}/${FORGEJO_USER}}"
|
content="${content//\{\{FORGEJO_USER\}\}/${FORGEJO_USER:-}}"
|
||||||
content="${content//\{\{VAULTWARDEN_URL\}\}/${VAULTWARDEN_URL}}"
|
content="${content//\{\{VAULTWARDEN_URL\}\}/${VAULTWARDEN_URL:-}}"
|
||||||
content="${content//\{\{MASTER_REPO\}\}/${MASTER_REPO}}"
|
content="${content//\{\{MASTER_REPO\}\}/${MASTER_REPO:-}}"
|
||||||
|
# linked project reference (optional) — empty registry field renders as 'none'
|
||||||
|
content="${content//\{\{LINKED_PROJECT\}\}/${GENOME_LINKED:-none}}"
|
||||||
content="${content//\{\{DATE\}\}/$(date +%Y-%m-%d)}"
|
content="${content//\{\{DATE\}\}/$(date +%Y-%m-%d)}"
|
||||||
|
|
||||||
mkdir -p "$(dirname "$output_file")"
|
mkdir -p "$(dirname "$output_file")"
|
||||||
|
|
@ -32,13 +39,9 @@ render_template() {
|
||||||
|
|
||||||
scaffold_genome() {
|
scaffold_genome() {
|
||||||
local base="$1"
|
local base="$1"
|
||||||
local dirs=(
|
|
||||||
"raw/articles" "raw/transcripts" "raw/code-packs" "raw/assets" "raw/private"
|
|
||||||
"wiki/sources" "wiki/entities" "wiki/concepts" "wiki/queries" "wiki/private"
|
|
||||||
)
|
|
||||||
|
|
||||||
info "Building directory structure in ${base}..."
|
info "Building directory structure in ${base}..."
|
||||||
for dir in "${dirs[@]}"; do
|
for dir in "${GENOME_DIRS[@]}"; do
|
||||||
mkdir -p "${base}/${dir}"
|
mkdir -p "${base}/${dir}"
|
||||||
touch "${base}/${dir}/.gitkeep"
|
touch "${base}/${dir}/.gitkeep"
|
||||||
done
|
done
|
||||||
|
|
|
||||||
70
lib/structure.sh
Normal file
70
lib/structure.sh
Normal file
|
|
@ -0,0 +1,70 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# =============================================================================
|
||||||
|
# lib/structure.sh
|
||||||
|
# Single source of truth for the canonical genome directory layout, plus the
|
||||||
|
# verify/sync helpers used by scripts/verify-genomes.sh.
|
||||||
|
#
|
||||||
|
# IMPORTANT: this is the ONE place the structure is defined. scaffold.sh sources
|
||||||
|
# this file and builds new genomes from GENOME_DIRS, so scaffolding and the
|
||||||
|
# structure check can never drift apart.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Canonical directories every genome must have.
|
||||||
|
# raw/* are input buckets (collaborator-writable); wiki/* is the agent-owned,
|
||||||
|
# contract-bound layout the lint, the index sections and the ingest skill depend on.
|
||||||
|
GENOME_DIRS=(
|
||||||
|
"raw/articles" "raw/transcripts" "raw/code-packs" "raw/assets" "raw/private"
|
||||||
|
"wiki/sources" "wiki/entities" "wiki/concepts" "wiki/queries" "wiki/private"
|
||||||
|
)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# structure_report <base>
|
||||||
|
# Reports drift of <base> against GENOME_DIRS.
|
||||||
|
# - missing canonical dir → counted as drift (returns non-zero)
|
||||||
|
# - extra dir under raw/ or wiki/ → warning only (does not fail)
|
||||||
|
# Returns the number of MISSING canonical directories.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
structure_report() {
|
||||||
|
local base="$1"
|
||||||
|
local missing=0
|
||||||
|
|
||||||
|
for d in "${GENOME_DIRS[@]}"; do
|
||||||
|
if [[ ! -d "${base}/${d}" ]]; then
|
||||||
|
warn "missing: ${d}"
|
||||||
|
missing=$((missing + 1))
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# Extra directories (drift the other way) — informational only.
|
||||||
|
local canon=" ${GENOME_DIRS[*]} "
|
||||||
|
while IFS= read -r d; do
|
||||||
|
d="${d#"${base}/"}"
|
||||||
|
[[ "$canon" == *" ${d} "* ]] && continue
|
||||||
|
info "extra (not in canon): ${d}"
|
||||||
|
done < <(find "${base}/raw" "${base}/wiki" -mindepth 1 -type d 2>/dev/null)
|
||||||
|
|
||||||
|
return $missing
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# structure_sync <base>
|
||||||
|
# Creates any MISSING canonical directories (idempotent). Never deletes —
|
||||||
|
# retiring a bucket is a deliberate, contract-aware change to GENOME_DIRS +
|
||||||
|
# the templates, not an automatic prune.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
structure_sync() {
|
||||||
|
local base="$1"
|
||||||
|
local added=0
|
||||||
|
|
||||||
|
for d in "${GENOME_DIRS[@]}"; do
|
||||||
|
if [[ ! -d "${base}/${d}" ]]; then
|
||||||
|
mkdir -p "${base}/${d}"
|
||||||
|
touch "${base}/${d}/.gitkeep"
|
||||||
|
success "created: ${d}"
|
||||||
|
added=$((added + 1))
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
[[ $added -eq 0 ]] && info "already in sync: ${base}"
|
||||||
|
return 0
|
||||||
|
}
|
||||||
12
registry.sh
12
registry.sh
|
|
@ -19,9 +19,13 @@ LIB_DIR="${PROJECT_ROOT}/lib"
|
||||||
PROVIDERS_DIR="${PROJECT_ROOT}/providers"
|
PROVIDERS_DIR="${PROJECT_ROOT}/providers"
|
||||||
|
|
||||||
# --- GENOME REGISTRY ---
|
# --- GENOME REGISTRY ---
|
||||||
# Format: "name|description"
|
# Format: "name|description|linked_repo"
|
||||||
|
# - linked_repo is OPTIONAL. Leave empty (trailing pipe) for knowledge-only genomes.
|
||||||
|
# - It is an opaque reference rendered verbatim into the genome's AGENTS.md
|
||||||
|
# (phase-2 project work is parked, so the framework does not act on it yet).
|
||||||
|
# - Example with a project: "genome-homelab|Keru infrastructure...|keru/homelab-infra"
|
||||||
GENOMES=(
|
GENOMES=(
|
||||||
"genome-dev|Web development, TUI, Angular, software architecture"
|
"genome-dev|Web development, TUI, Angular, software architecture|"
|
||||||
"genome-finance|Personal finance, investments, market analysis"
|
"genome-finance|Personal finance, investments, market analysis|"
|
||||||
"genome-homelab|Keru infrastructure, network configs, architecture logs"
|
"genome-homelab|Keru infrastructure, network configs, architecture logs|"
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -11,16 +11,18 @@ source "registry.sh"
|
||||||
|
|
||||||
GENOME_NAME="${1:-}"
|
GENOME_NAME="${1:-}"
|
||||||
GENOME_DESC="${2:-}"
|
GENOME_DESC="${2:-}"
|
||||||
|
GENOME_LINKED="${3:-}" # optional: linked project repo reference
|
||||||
|
|
||||||
if [[ -z "$GENOME_NAME" || -z "$GENOME_DESC" ]]; then
|
if [[ -z "$GENOME_NAME" || -z "$GENOME_DESC" ]]; then
|
||||||
error "Missing arguments."
|
error "Missing arguments."
|
||||||
echo "Usage: $0 <genome-name> <description>"
|
echo "Usage: $0 <genome-name> <description> [linked-repo]"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
step "Adding New Genome: ${GENOME_NAME}"
|
step "Adding New Genome: ${GENOME_NAME}"
|
||||||
|
|
||||||
GENOMES=("${GENOME_NAME}|${GENOME_DESC}")
|
# Build a 3-field registry entry (linked_repo may be empty)
|
||||||
|
GENOMES=("${GENOME_NAME}|${GENOME_DESC}|${GENOME_LINKED}")
|
||||||
|
|
||||||
source "scripts/setup-genomes.sh"
|
source "scripts/setup-genomes.sh"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -19,8 +19,9 @@ source "providers/${PROVIDER}.sh"
|
||||||
step "Processing Genome Registry"
|
step "Processing Genome Registry"
|
||||||
|
|
||||||
for entry in "${GENOMES[@]}"; do
|
for entry in "${GENOMES[@]}"; do
|
||||||
IFS='|' read -r GENOME_NAME GENOME_DESC <<< "$entry"
|
# 3-field format: name|description|linked_repo (linked_repo optional → may be empty)
|
||||||
export GENOME_NAME GENOME_DESC
|
IFS='|' read -r GENOME_NAME GENOME_DESC GENOME_LINKED <<< "$entry"
|
||||||
|
export GENOME_NAME GENOME_DESC GENOME_LINKED
|
||||||
|
|
||||||
info "Processing: ${GENOME_NAME}..."
|
info "Processing: ${GENOME_NAME}..."
|
||||||
|
|
||||||
|
|
|
||||||
50
scripts/verify-genomes.sh
Normal file
50
scripts/verify-genomes.sh
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# =============================================================================
|
||||||
|
# scripts/verify-genomes.sh
|
||||||
|
# Check (default) or --sync the directory structure of every registered genome
|
||||||
|
# against the canonical layout in lib/structure.sh.
|
||||||
|
#
|
||||||
|
# bash scripts/verify-genomes.sh # report drift, non-zero exit on drift
|
||||||
|
# bash scripts/verify-genomes.sh --sync # create missing dirs everywhere (safe)
|
||||||
|
#
|
||||||
|
# No hardware/LLM involved — pure structure check. Run anywhere.
|
||||||
|
# =============================================================================
|
||||||
|
set -euo pipefail
|
||||||
|
source "lib/output.sh"
|
||||||
|
source "globals.env"
|
||||||
|
source "registry.sh"
|
||||||
|
source "lib/structure.sh"
|
||||||
|
|
||||||
|
MODE="verify"
|
||||||
|
[[ "${1:-}" == "--sync" ]] && MODE="sync"
|
||||||
|
|
||||||
|
step "Genome structure: ${MODE}"
|
||||||
|
|
||||||
|
TOTAL_MISSING=0
|
||||||
|
for entry in "${GENOMES[@]}"; do
|
||||||
|
IFS='|' read -r GENOME_NAME _ _ <<< "$entry" # 3-field registry; ignore desc + linked
|
||||||
|
genome_dir="${WORK_DIR}/${MASTER_REPO}/${GENOME_NAME}"
|
||||||
|
|
||||||
|
if [[ ! -d "$genome_dir" ]]; then
|
||||||
|
warn "not found locally, skipping: ${GENOME_NAME}"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
info "Genome: ${GENOME_NAME}"
|
||||||
|
if [[ "$MODE" == "sync" ]]; then
|
||||||
|
structure_sync "$genome_dir"
|
||||||
|
else
|
||||||
|
structure_report "$genome_dir" && m=0 || m=$?
|
||||||
|
TOTAL_MISSING=$((TOTAL_MISSING + m))
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
if [[ "$MODE" == "sync" ]]; then
|
||||||
|
success "Structure sync complete."
|
||||||
|
elif [[ $TOTAL_MISSING -eq 0 ]]; then
|
||||||
|
success "Structure verified: all genomes match the canonical layout."
|
||||||
|
else
|
||||||
|
error "Structure drift: ${TOTAL_MISSING} missing directory(ies). Fix with: make sync-structure"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
93
skills/ingest/SKILL.md
Normal file
93
skills/ingest/SKILL.md
Normal file
|
|
@ -0,0 +1,93 @@
|
||||||
|
---
|
||||||
|
name: ingest
|
||||||
|
description: Semantic pass of a single raw source into the current genome's wiki — read the source, write sources/entities/concepts, handle contradictions, then emit a manifest and STOP. Use when a new file lands in raw/. Does NOT do git, log, index, lint, or PRs (a post-processor handles those), and does NOT handle private sources or project repos.
|
||||||
|
license: see repository
|
||||||
|
compatibility: Runs inside one genome checkout (cwd = genome root). Tools needed — read, edit only. NO bash, NO git. The deterministic steps (index, log, scoped lint, PR) run AFTER you exit, via run-ingest.sh. PRIVATE_CONTEXT must be disabled.
|
||||||
|
allowed-tools: read edit
|
||||||
|
metadata:
|
||||||
|
framework: knowledge-genome
|
||||||
|
phase: "1-ingest-semantic"
|
||||||
|
---
|
||||||
|
|
||||||
|
# Ingest — semantic pass
|
||||||
|
|
||||||
|
You run inside ONE genome checkout. `AGENTS.md` (already in your context) is the
|
||||||
|
authoritative contract. Your job is the **semantic pass only**: read the source, write
|
||||||
|
the wiki pages, handle contradictions. You do **not** touch git, the log, the index, the
|
||||||
|
linter, or PRs — a post-processor (`run-ingest.sh`) does all of that _after you stop_,
|
||||||
|
from the manifest you leave behind. This keeps your context clean and your turns few,
|
||||||
|
which matters on a small local model.
|
||||||
|
|
||||||
|
**Argument:** the relative path of the single raw source to ingest
|
||||||
|
(e.g. `raw/articles/foo.md`). Process only this one.
|
||||||
|
|
||||||
|
## Pre-flight — stop the session if any check fails
|
||||||
|
|
||||||
|
1. Refuse if the argument path is under any `private/` directory.
|
||||||
|
2. Refuse if `PRIVATE_CONTEXT` is not `disabled`.
|
||||||
|
3. Confirm the file exists under `raw/`.
|
||||||
|
|
||||||
|
## Semantic work (your only job)
|
||||||
|
|
||||||
|
1. Read the source once.
|
||||||
|
2. Write `wiki/sources/<kebab-slug>.md` — faithful summary + key points, with the required
|
||||||
|
frontmatter (`type: source`, `domain: <genome>`, `maturity: draft`,
|
||||||
|
`last_updated: <today>`, `private: false`, sensible `tags`).
|
||||||
|
3. For each entity (person, tool, org) → create or update `wiki/entities/<kebab-name>.md`.
|
||||||
|
4. For each concept (pattern, theory, decision) → create or update
|
||||||
|
`wiki/concepts/<kebab-name>.md`.
|
||||||
|
5. On a real contradiction with an existing claim, follow `AGENTS.md` §Conflict: create
|
||||||
|
`wiki/queries/conflict-<concept>-<YYYY-MM-DD>.md`. Never overwrite the existing page.
|
||||||
|
|
||||||
|
**Naming — you are the sole author of these names; nothing renames your files.** Use
|
||||||
|
minimal kebab-case: lowercase letters, digits and hyphens only — no spaces, no underscores,
|
||||||
|
no capitals. Pick stable names so the same entity is never created twice (always `acme`,
|
||||||
|
never also `acme-corp`). The path you write a file to MUST be byte-for-byte the path you
|
||||||
|
list in the manifest.
|
||||||
|
|
||||||
|
**Deciding create-vs-update and spotting contradictions — mind the context budget.** Use
|
||||||
|
`wiki/index.md` to locate existing pages, then read **only** the handful that _this source
|
||||||
|
actually names_ — the entities and concepts in the source's title and opening paragraphs —
|
||||||
|
not everything the index lists. When in doubt, read fewer: a missed cross-link is far
|
||||||
|
cheaper than a saturated context. Never scan whole directories.
|
||||||
|
|
||||||
|
## Finish: write the manifest, then STOP
|
||||||
|
|
||||||
|
As your **final action**, write `.ingest-manifest.json` at the genome root
|
||||||
|
(NOT under `wiki/`) describing exactly what you did. Then stop — do not commit, lint,
|
||||||
|
append to the log/index, or open anything.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"raw_source": "raw/articles/foo.md",
|
||||||
|
"reasoning": "One sentence for the log: what changed and why.",
|
||||||
|
"pr_summary": "One or two sentences describing this ingest for the PR.",
|
||||||
|
"contradictions": "None (or: 1 conflict file created — <concept>)",
|
||||||
|
"pages": [
|
||||||
|
{
|
||||||
|
"path": "wiki/sources/foo.md",
|
||||||
|
"summary": "One-line index summary.",
|
||||||
|
"maturity": "draft",
|
||||||
|
"status": "created"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": "wiki/entities/acme.md",
|
||||||
|
"summary": "Acme — vendor.",
|
||||||
|
"status": "modified"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Manifest rules:
|
||||||
|
|
||||||
|
- List every page you created or modified, with `status` `created` or `modified`.
|
||||||
|
- `summary` is the one-line index description (≈12 words max). For conflict pages the
|
||||||
|
summary is ignored — the index lists conflicts by slug only.
|
||||||
|
- `maturity` is required only on `created` pages (it seeds the new index entry). It is
|
||||||
|
ignored for `modified` pages, so omit it there.
|
||||||
|
- Do NOT add a `model` field — the orchestrator records which model produced this run; you
|
||||||
|
cannot know your own model name reliably, so do not guess one.
|
||||||
|
- Do not invent a `run_id`, branch, commit, or PR — those belong to the post-processor.
|
||||||
|
|
||||||
|
One source per session. After writing the manifest, stop.
|
||||||
129
skills/ingest/scripts/index-append.py
Normal file
129
skills/ingest/scripts/index-append.py
Normal file
|
|
@ -0,0 +1,129 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# =============================================================================
|
||||||
|
# skills/ingest/scripts/index-append.py
|
||||||
|
# Insert an entry line into the correct section of wiki/index.md and keep that
|
||||||
|
# section's entries alphabetically ordered. Bumps frontmatter last_updated.
|
||||||
|
#
|
||||||
|
# NOTE: agents-genome.md and wiki-index.md claim the pre-commit hook sorts the
|
||||||
|
# index. The actual pre-commit.sh only runs the plaintext-leak check — it does
|
||||||
|
# NOT sort. This script owns the ordering instead. (If you later move sorting
|
||||||
|
# into the hook, reduce this to a plain append.)
|
||||||
|
#
|
||||||
|
# index-append.py --section Sources \
|
||||||
|
# --entry '- [[sources/foo]] — One-line summary. `maturity: draft`'
|
||||||
|
# =============================================================================
|
||||||
|
import argparse
|
||||||
|
import datetime
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
ENTRY_RE = re.compile(r"^- \[\[")
|
||||||
|
LINK_RE = re.compile(r"^- \[\[([^\]]+)\]\]")
|
||||||
|
HEADER_RE = re.compile(r"^## ")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
ap = argparse.ArgumentParser()
|
||||||
|
ap.add_argument("--section", required=True,
|
||||||
|
help="Section name, e.g. Sources / Entities / Concepts / Queries / Conflicts")
|
||||||
|
ap.add_argument("--entry", required=True, help="Full index line to insert")
|
||||||
|
ap.add_argument("--file", default="wiki/index.md")
|
||||||
|
args = ap.parse_args()
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(args.file, encoding="utf-8") as fh:
|
||||||
|
lines = fh.read().splitlines()
|
||||||
|
except FileNotFoundError:
|
||||||
|
print(f"index-append: not found: {args.file}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
today = datetime.date.today().isoformat()
|
||||||
|
|
||||||
|
# 1. Bump last_updated inside the first frontmatter block
|
||||||
|
fm_open = False
|
||||||
|
fm_close_idx = None
|
||||||
|
bumped = False
|
||||||
|
for i, ln in enumerate(lines):
|
||||||
|
if ln.strip() == "---":
|
||||||
|
if not fm_open:
|
||||||
|
fm_open = True
|
||||||
|
continue
|
||||||
|
fm_close_idx = i # the closing ---
|
||||||
|
break
|
||||||
|
if fm_open and ln.startswith("last_updated:"):
|
||||||
|
lines[i] = f"last_updated: {today}"
|
||||||
|
bumped = True
|
||||||
|
|
||||||
|
if not fm_open:
|
||||||
|
print("index-append: warning: no frontmatter found, last_updated not bumped",
|
||||||
|
file=sys.stderr)
|
||||||
|
elif not bumped and fm_close_idx is not None:
|
||||||
|
# self-heal: frontmatter present but missing the key — insert it before the close
|
||||||
|
lines.insert(fm_close_idx, f"last_updated: {today}")
|
||||||
|
print("index-append: last_updated key was missing — inserted", file=sys.stderr)
|
||||||
|
|
||||||
|
# 2. Locate the target section [start, end)
|
||||||
|
start = None
|
||||||
|
for i, ln in enumerate(lines):
|
||||||
|
if HEADER_RE.match(ln) and ln[3:].startswith(args.section):
|
||||||
|
start = i
|
||||||
|
break
|
||||||
|
if start is None:
|
||||||
|
print(f"index-append: section '{args.section}' not found in {args.file}",
|
||||||
|
file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
end = len(lines)
|
||||||
|
for i in range(start + 1, len(lines)):
|
||||||
|
if HEADER_RE.match(lines[i]):
|
||||||
|
end = i
|
||||||
|
break
|
||||||
|
|
||||||
|
# 3. Split the section body into intro (non-entry) and entries
|
||||||
|
body = lines[start + 1:end]
|
||||||
|
intro = [ln for ln in body if not ENTRY_RE.match(ln)]
|
||||||
|
entries = [ln for ln in body if ENTRY_RE.match(ln)]
|
||||||
|
|
||||||
|
# Deduplicate by wikilink PATH, not by exact line: a re-ingest with a changed
|
||||||
|
# summary/maturity should UPDATE the existing entry, not add a duplicate line.
|
||||||
|
new_m = LINK_RE.match(args.entry)
|
||||||
|
new_link = new_m.group(1) if new_m else None
|
||||||
|
|
||||||
|
if new_link is not None:
|
||||||
|
replaced = False
|
||||||
|
for idx, ln in enumerate(entries):
|
||||||
|
m = LINK_RE.match(ln)
|
||||||
|
if m and m.group(1) == new_link:
|
||||||
|
if ln == args.entry:
|
||||||
|
print("index-append: entry already present, skipping")
|
||||||
|
return 0
|
||||||
|
entries[idx] = args.entry # same page, refreshed text
|
||||||
|
replaced = True
|
||||||
|
break
|
||||||
|
if not replaced:
|
||||||
|
entries.append(args.entry)
|
||||||
|
else:
|
||||||
|
# No parseable wikilink — fall back to exact-line dedup.
|
||||||
|
if args.entry in entries:
|
||||||
|
print("index-append: entry already present, skipping")
|
||||||
|
return 0
|
||||||
|
entries.append(args.entry)
|
||||||
|
|
||||||
|
entries.sort(key=str.casefold)
|
||||||
|
|
||||||
|
# Normalise intro: drop trailing blanks, keep header + comment(s)
|
||||||
|
while intro and intro[-1].strip() == "":
|
||||||
|
intro.pop()
|
||||||
|
|
||||||
|
new_section = intro + [""] + entries + [""]
|
||||||
|
lines = lines[:start + 1] + new_section + lines[end:]
|
||||||
|
|
||||||
|
with open(args.file, "w", encoding="utf-8") as fh:
|
||||||
|
fh.write("\n".join(lines) + "\n")
|
||||||
|
|
||||||
|
print(f"index-append: added to {args.section}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
50
skills/ingest/scripts/log-append.sh
Normal file
50
skills/ingest/scripts/log-append.sh
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# =============================================================================
|
||||||
|
# skills/ingest/scripts/log-append.sh
|
||||||
|
# Append one entry to the append-only ledger wiki/log.md, in the exact format
|
||||||
|
# defined by AGENTS.md / wiki-log.md. Generates run_id. Never edits prior entries.
|
||||||
|
#
|
||||||
|
# log-append.sh --type INGEST --subject "<slug>" --model "<model>" \
|
||||||
|
# --context "[[raw/x]]" --output "[[sources/x]]" \
|
||||||
|
# --reasoning "One sentence."
|
||||||
|
# =============================================================================
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
LOG_FILE="${LOG_FILE:-wiki/log.md}"
|
||||||
|
|
||||||
|
type="" subject="" model="" context="" output="" reasoning=""
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case "$1" in
|
||||||
|
--type) type="$2"; shift 2 ;;
|
||||||
|
--subject) subject="$2"; shift 2 ;;
|
||||||
|
--model) model="$2"; shift 2 ;;
|
||||||
|
--context) context="$2"; shift 2 ;;
|
||||||
|
--output) output="$2"; shift 2 ;;
|
||||||
|
--reasoning) reasoning="$2"; shift 2 ;;
|
||||||
|
*) echo "log-append: unknown arg: $1" >&2; exit 1 ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
: "${type:?--type required}"
|
||||||
|
: "${subject:?--subject required}"
|
||||||
|
|
||||||
|
case "$type" in
|
||||||
|
INGEST|LINT|QUERY|CONFLICT|CONFIG|SECURITY) ;;
|
||||||
|
*) echo "log-append: invalid TYPE '${type}'" >&2; exit 1 ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
[[ -f "$LOG_FILE" ]] || { echo "log-append: not found: $LOG_FILE" >&2; exit 1; }
|
||||||
|
|
||||||
|
run_id="$(uuidgen 2>/dev/null || cat /proc/sys/kernel/random/uuid 2>/dev/null || python3 -c 'import uuid; print(uuid.uuid4())')"
|
||||||
|
today="$(date +%Y-%m-%d)"
|
||||||
|
|
||||||
|
{
|
||||||
|
printf '\n## [%s] %s | %s\n\n' "$today" "$type" "$subject"
|
||||||
|
printf -- '- run_id: `%s`\n' "$run_id"
|
||||||
|
printf -- '- model: `%s`\n' "${model:-unknown}"
|
||||||
|
printf -- '- context_read: %s\n' "${context:-*(none)*}"
|
||||||
|
printf -- '- output_written: %s\n' "${output:-*(none)*}"
|
||||||
|
printf -- '- reasoning: %s\n' "${reasoning:-No reasoning provided.}"
|
||||||
|
} >> "$LOG_FILE"
|
||||||
|
|
||||||
|
echo "run_id=${run_id}"
|
||||||
118
skills/ingest/scripts/open-pr.sh
Normal file
118
skills/ingest/scripts/open-pr.sh
Normal file
|
|
@ -0,0 +1,118 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# =============================================================================
|
||||||
|
# skills/ingest/scripts/open-pr.sh
|
||||||
|
# Branch, commit (conventional), push, and open a Forgejo PR for the wiki/ changes.
|
||||||
|
# Mirrors the API conventions of providers/forgejo.sh (token auth + http_code).
|
||||||
|
# Runs inside the genome checkout (cwd = genome root). Never touches main.
|
||||||
|
#
|
||||||
|
# open-pr.sh --slug <slug> --title "feat: ingest <slug>" --body-file <path> \
|
||||||
|
# [--base main] [--label CONFLICT]
|
||||||
|
#
|
||||||
|
# Requires env: FORGEJO_URL, FORGEJO_USER, FORGEJO_TOKEN.
|
||||||
|
# =============================================================================
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
: "${FORGEJO_URL:?missing FORGEJO_URL}"
|
||||||
|
: "${FORGEJO_USER:?missing FORGEJO_USER}"
|
||||||
|
: "${FORGEJO_TOKEN:?missing FORGEJO_TOKEN}"
|
||||||
|
|
||||||
|
slug="" title="" body_file="" base="main" label=""
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case "$1" in
|
||||||
|
--slug) slug="$2"; shift 2 ;;
|
||||||
|
--title) title="$2"; shift 2 ;;
|
||||||
|
--body-file) body_file="$2"; shift 2 ;;
|
||||||
|
--base) base="$2"; shift 2 ;;
|
||||||
|
--label) label="$2"; shift 2 ;;
|
||||||
|
*) echo "open-pr: unknown arg: $1" >&2; exit 1 ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
: "${slug:?--slug required}"
|
||||||
|
: "${title:?--title required}"
|
||||||
|
: "${body_file:?--body-file required}"
|
||||||
|
[[ -f "$body_file" ]] || { echo "open-pr: body file not found: $body_file" >&2; exit 1; }
|
||||||
|
|
||||||
|
branch="feat/ai-ingest-${slug}"
|
||||||
|
repo="$(basename -s .git "$(git config --get remote.origin.url)")"
|
||||||
|
|
||||||
|
# 1. Branch + commit + push (AGENTS.md rule 5: never commit to main)
|
||||||
|
git switch -c "$branch" 2>/dev/null || git switch "$branch"
|
||||||
|
git add wiki/
|
||||||
|
# Scope BOTH the emptiness check and the commit to wiki/ — never commit anything that
|
||||||
|
# happened to be staged outside wiki/ (a stray hook, an aborted prior run, etc.).
|
||||||
|
if git diff --cached --quiet -- wiki/; then
|
||||||
|
echo "open-pr: nothing staged under wiki/ — aborting" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
git commit -m "$title" -- wiki/
|
||||||
|
git push -u origin "$branch"
|
||||||
|
|
||||||
|
# DRY_RUN: local git work done; skip the Forgejo API (offline tests).
|
||||||
|
if [[ -n "${DRY_RUN:-}" ]]; then
|
||||||
|
echo "PR opened: DRY-RUN ${branch} -> ${base}"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 2. Open the PR via Forgejo API (jq builds the JSON safely)
|
||||||
|
# TODO: Forgejo-only. When registry.sh/globals.env sets PROVIDER=github, branch on
|
||||||
|
# $PROVIDER here and delegate to providers/github.sh (same token + http_code contract).
|
||||||
|
body="$(cat "$body_file")"
|
||||||
|
payload="$(jq -n --arg head "$branch" --arg base "$base" \
|
||||||
|
--arg title "$title" --arg body "$body" \
|
||||||
|
'{head:$head, base:$base, title:$title, body:$body}')"
|
||||||
|
|
||||||
|
resp="$(curl --max-time 30 -s -w '\n%{http_code}' \
|
||||||
|
-H "Authorization: token ${FORGEJO_TOKEN}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-X POST "${FORGEJO_URL}/api/v1/repos/${FORGEJO_USER}/${repo}/pulls" \
|
||||||
|
-d "$payload")"
|
||||||
|
|
||||||
|
# curl -w appends '\n<code>' AFTER the body, so the code is always the final line and the
|
||||||
|
# body is everything before it. Parameter expansion (no subshells), robust to multi-line JSON.
|
||||||
|
code="${resp##*$'\n'}"
|
||||||
|
json="${resp%$'\n'*}"
|
||||||
|
|
||||||
|
case "$code" in
|
||||||
|
201)
|
||||||
|
url="$(printf '%s' "$json" | jq -r '.html_url')"
|
||||||
|
number="$(printf '%s' "$json" | jq -r '.number')"
|
||||||
|
echo "PR opened: ${url}"
|
||||||
|
;;
|
||||||
|
409)
|
||||||
|
# PR already exists — fetch it so the orchestrator still gets the URL.
|
||||||
|
existing="$(curl --max-time 15 -s -H "Authorization: token ${FORGEJO_TOKEN}" \
|
||||||
|
"${FORGEJO_URL}/api/v1/repos/${FORGEJO_USER}/${repo}/pulls?state=open" \
|
||||||
|
| jq -r --arg b "$branch" '.[] | select(.head.ref==$b) | .html_url' | head -n1)"
|
||||||
|
if [[ -n "$existing" && "$existing" != "null" ]]; then
|
||||||
|
echo "PR opened: ${existing}"
|
||||||
|
else
|
||||||
|
echo "open-pr: a PR for '${branch}' already exists (push updated the branch)." >&2
|
||||||
|
fi
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
401)
|
||||||
|
echo "open-pr: unauthorized — check FORGEJO_TOKEN (n8n-bot)." >&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "open-pr: Forgejo API HTTP ${code}: ${json}" >&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# 3. Optional label (e.g. CONFLICT). Best-effort; non-fatal.
|
||||||
|
if [[ -n "$label" && -n "${number:-}" ]]; then
|
||||||
|
label_id="$(curl --max-time 15 -s -H "Authorization: token ${FORGEJO_TOKEN}" \
|
||||||
|
"${FORGEJO_URL}/api/v1/repos/${FORGEJO_USER}/${repo}/labels" \
|
||||||
|
| jq -r --arg n "$label" '.[] | select(.name==$n) | .id' | head -n1)"
|
||||||
|
if [[ -n "$label_id" && "$label_id" != "null" ]]; then
|
||||||
|
curl --max-time 15 -s -o /dev/null \
|
||||||
|
-H "Authorization: token ${FORGEJO_TOKEN}" -H "Content-Type: application/json" \
|
||||||
|
-X POST "${FORGEJO_URL}/api/v1/repos/${FORGEJO_USER}/${repo}/issues/${number}/labels" \
|
||||||
|
-d "{\"labels\":[${label_id}]}" \
|
||||||
|
&& echo "label '${label}' applied" >&2
|
||||||
|
else
|
||||||
|
echo "open-pr: label '${label}' not found in repo — skipped." >&2
|
||||||
|
fi
|
||||||
|
fi
|
||||||
146
skills/ingest/scripts/run-ingest.sh
Normal file
146
skills/ingest/scripts/run-ingest.sh
Normal file
|
|
@ -0,0 +1,146 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# =============================================================================
|
||||||
|
# skills/ingest/scripts/run-ingest.sh
|
||||||
|
# Post-pi orchestrator. Runs OUTSIDE pi's loop, on vm101, in the genome checkout.
|
||||||
|
# Consumes .ingest-manifest.json (written by the ingest skill) and performs every
|
||||||
|
# deterministic step — index, log, scoped lint, PR — so pi's context stays clean.
|
||||||
|
#
|
||||||
|
# run-ingest.sh <genome_name> [manifest_path]
|
||||||
|
#
|
||||||
|
# Emits a single JSON result line on stdout for n8n to parse.
|
||||||
|
# =============================================================================
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
genome="${1:?usage: run-ingest.sh <genome> [manifest]}"
|
||||||
|
manifest="${2:-.ingest-manifest.json}"
|
||||||
|
SCRIPTS="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
|
||||||
|
fail() {
|
||||||
|
jq -nc --arg stage "$1" --arg reason "$2" \
|
||||||
|
'{status:"error", stage:$stage, reason:$reason}'
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
command -v jq >/dev/null 2>&1 || { echo '{"status":"error","reason":"jq missing"}'; exit 1; }
|
||||||
|
command -v python3 >/dev/null 2>&1 || fail "deps" "python3 missing (needed by index-append.py)"
|
||||||
|
[[ -f "$manifest" ]] || fail "manifest" "manifest not found: ${manifest}"
|
||||||
|
|
||||||
|
# --- validate the manifest BEFORE trusting any field (LLM output is stochastic) ---
|
||||||
|
# 1) well-formed JSON object with a string raw_source and an array of pages
|
||||||
|
jq -e 'type=="object" and (.raw_source|type=="string") and (.pages|type=="array")' \
|
||||||
|
"$manifest" >/dev/null 2>&1 \
|
||||||
|
|| fail "manifest" "invalid manifest: need object with string raw_source and array pages"
|
||||||
|
|
||||||
|
# 2) every page.path must be a string, live under wiki/, and contain no '..' (no traversal)
|
||||||
|
if jq -e '[.pages[].path
|
||||||
|
| select((type!="string") or (startswith("wiki/")|not) or contains(".."))]
|
||||||
|
| length > 0' "$manifest" >/dev/null 2>&1; then
|
||||||
|
fail "manifest" "unsafe page path (must be a string under wiki/, no '..')"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- read manifest scalars ---
|
||||||
|
raw_source="$(jq -r '.raw_source' "$manifest")"
|
||||||
|
# model name comes from the orchestrator/wrapper (INGEST_MODEL); the agent cannot know its
|
||||||
|
# own tag, so we do not trust a self-reported manifest field. Fall back only if unset.
|
||||||
|
model="${INGEST_MODEL:-$(jq -r '.model // "unknown"' "$manifest")}"
|
||||||
|
reasoning="$(jq -r '.reasoning // "Ingest."' "$manifest")"
|
||||||
|
pr_summary="$(jq -r '.pr_summary // "Ingest."' "$manifest")"
|
||||||
|
contradictions="$(jq -r '.contradictions // "None"' "$manifest")"
|
||||||
|
|
||||||
|
[[ -n "$raw_source" && "$raw_source" != "null" ]] || fail "manifest" "raw_source missing"
|
||||||
|
|
||||||
|
slug="$(bash "${SCRIPTS}/slug.sh" "$raw_source")" || fail "slug" "empty or invalid slug for ${raw_source}"
|
||||||
|
|
||||||
|
# --- collect touched paths ---
|
||||||
|
mapfile -t created_paths < <(jq -r '.pages[] | select(.status=="created") | .path' "$manifest")
|
||||||
|
mapfile -t modified_paths < <(jq -r '.pages[] | select(.status=="modified") | .path' "$manifest")
|
||||||
|
all_paths=( "${created_paths[@]}" "${modified_paths[@]}" )
|
||||||
|
[[ ${#all_paths[@]} -gt 0 ]] || fail "manifest" "no pages reported"
|
||||||
|
|
||||||
|
conflict_label=""
|
||||||
|
|
||||||
|
# NOTE: no rollback. Steps below mutate the working tree in order (index → log → commit).
|
||||||
|
# All are idempotent on re-run EXCEPT log-append (append-only). If a step fails midway,
|
||||||
|
# nothing is committed (open-pr is the only committer) — the operator re-runs, or inspects
|
||||||
|
# wiki/ if log-append already wrote a line. The manifest is removed only on full success.
|
||||||
|
|
||||||
|
# --- 1. index entries (created pages only), inserted in order ---
|
||||||
|
while IFS=$'\t' read -r path summary maturity; do
|
||||||
|
[[ -z "$path" ]] && continue
|
||||||
|
link="${path#wiki/}"; link="${link%.md}" # e.g. sources/foo
|
||||||
|
folder="${link%%/*}"
|
||||||
|
case "$folder" in
|
||||||
|
sources) section="Sources" ;;
|
||||||
|
entities) section="Entities" ;;
|
||||||
|
concepts) section="Concepts" ;;
|
||||||
|
queries)
|
||||||
|
if [[ "$link" == queries/conflict-* ]]; then section="Conflicts"; conflict_label="CONFLICT"
|
||||||
|
else section="Queries"; fi ;;
|
||||||
|
*) section="Sources" ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
if [[ "$section" == "Conflicts" ]]; then
|
||||||
|
entry="- [[${link}]]" # conflicts: slug only
|
||||||
|
else
|
||||||
|
entry="- [[${link}]] — ${summary} \`maturity: ${maturity}\`"
|
||||||
|
fi
|
||||||
|
|
||||||
|
python3 "${SCRIPTS}/index-append.py" --section "$section" --entry "$entry" \
|
||||||
|
|| fail "index" "index-append failed for ${path}"
|
||||||
|
done < <(jq -r '.pages[] | select(.status=="created")
|
||||||
|
| [.path, (.summary // ""), (.maturity // "draft")] | @tsv' "$manifest")
|
||||||
|
|
||||||
|
# --- 2. log entry ---
|
||||||
|
out="$(jq -r '[.pages[].path | "[[" + (sub("^wiki/";"") | sub("\\.md$";"")) + "]]"] | join(", ")' "$manifest")"
|
||||||
|
bash "${SCRIPTS}/log-append.sh" --type INGEST --subject "$slug" --model "$model" \
|
||||||
|
--context "[[${raw_source}]]" --output "${out:-*(none)*}" --reasoning "$reasoning" \
|
||||||
|
|| fail "log" "log-append failed"
|
||||||
|
|
||||||
|
# --- 3. scoped linter (capture findings for the PR; never aborts the run) ---
|
||||||
|
lint_out="$( bash "${SCRIPTS}/scoped-lint.sh" "$genome" "${all_paths[@]}" 2>&1 )" && lint_rc=0 || lint_rc=$?
|
||||||
|
|
||||||
|
# --- 4. assemble the PR body (manifest tables + lint results) ---
|
||||||
|
body="$(mktemp)"
|
||||||
|
trap 'rm -f "$body"' EXIT # auto-clean on any exit (success, fail(), or crash)
|
||||||
|
|
||||||
|
{
|
||||||
|
echo "## Summary"
|
||||||
|
echo "$pr_summary"
|
||||||
|
echo ""
|
||||||
|
echo "## Pages"
|
||||||
|
echo "| Path | Status | Maturity |"
|
||||||
|
echo "|------|--------|----------|"
|
||||||
|
jq -r '.pages[] | "| `\(.path)` | \(.status) | \(.maturity // "draft") |"' "$manifest"
|
||||||
|
echo ""
|
||||||
|
echo "## Contradictions"
|
||||||
|
echo "$contradictions"
|
||||||
|
echo ""
|
||||||
|
echo "## Scoped Lint (post-ingest)"
|
||||||
|
echo '```'
|
||||||
|
echo "$lint_out"
|
||||||
|
echo '```'
|
||||||
|
} > "$body"
|
||||||
|
|
||||||
|
# --- 5. open the PR ---
|
||||||
|
pr_args=( --slug "$slug" --title "feat: ingest ${slug}" --body-file "$body" --base "${INGEST_BASE:-main}" )
|
||||||
|
[[ -n "$conflict_label" ]] && pr_args+=( --label "$conflict_label" )
|
||||||
|
pr_out="$( bash "${SCRIPTS}/open-pr.sh" "${pr_args[@]}" 2>&1 )" && pr_rc=0 || pr_rc=$?
|
||||||
|
pr_url="$(printf '%s\n' "$pr_out" | sed -n 's/^PR opened: //p' | head -n1)"
|
||||||
|
|
||||||
|
# --- final result line for n8n ---
|
||||||
|
jq -nc \
|
||||||
|
--arg status "$([[ $pr_rc -eq 0 ]] && echo ok || echo pr_failed)" \
|
||||||
|
--arg slug "$slug" \
|
||||||
|
--arg pr_url "$pr_url" \
|
||||||
|
--argjson lint_clean "$([[ $lint_rc -eq 0 ]] && echo true || echo false)" \
|
||||||
|
--argjson conflict "$([[ -n "$conflict_label" ]] && echo true || echo false)" \
|
||||||
|
--arg detail "$pr_out" \
|
||||||
|
'{status:$status, slug:$slug, pr_url:$pr_url, lint_clean:$lint_clean, conflict:$conflict, detail:$detail}'
|
||||||
|
|
||||||
|
# The manifest is a single file that is overwritten with each run, but if the process is
|
||||||
|
# completely successful, we remove it to prevent an outdated manifest from being reprocessed by mistake.
|
||||||
|
if [[ $pr_rc -eq 0 ]]; then
|
||||||
|
rm -f "$manifest"
|
||||||
|
else
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
55
skills/ingest/scripts/scoped-lint.sh
Normal file
55
skills/ingest/scripts/scoped-lint.sh
Normal file
|
|
@ -0,0 +1,55 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# =============================================================================
|
||||||
|
# skills/ingest/scripts/scoped-lint.sh
|
||||||
|
# Run the framework's validation on ONLY the files touched this session.
|
||||||
|
# Reuses lib/lint.sh + lib/output.sh — same checks as `make lint`, scoped.
|
||||||
|
#
|
||||||
|
# KG_LIB_DIR=/opt/knowledge-genome-setup/lib \
|
||||||
|
# scoped-lint.sh <genome_name> wiki/sources/x.md wiki/entities/y.md
|
||||||
|
#
|
||||||
|
# Exits non-zero if any hard error is found, so the agent notices.
|
||||||
|
# Findings are printed (stderr from the lint functions + a summary on stdout).
|
||||||
|
# =============================================================================
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
: "${KG_LIB_DIR:?set KG_LIB_DIR to the framework lib/ dir (e.g. /opt/knowledge-genome-orchestrator/lib)}"
|
||||||
|
|
||||||
|
# Fail clearly if the lib files are missing, rather than a raw `source: No such file`.
|
||||||
|
for _f in output.sh lint.sh; do
|
||||||
|
[[ -f "${KG_LIB_DIR}/${_f}" ]] || { echo "scoped-lint: missing ${KG_LIB_DIR}/${_f}" >&2; exit 1; }
|
||||||
|
done
|
||||||
|
|
||||||
|
# shellcheck source=/dev/null
|
||||||
|
source "${KG_LIB_DIR}/output.sh"
|
||||||
|
# shellcheck source=/dev/null
|
||||||
|
source "${KG_LIB_DIR}/lint.sh"
|
||||||
|
|
||||||
|
genome="${1:?usage: scoped-lint.sh <genome> <file...>}"
|
||||||
|
shift
|
||||||
|
[[ $# -gt 0 ]] || { echo "scoped-lint: no files given" >&2; exit 1; }
|
||||||
|
|
||||||
|
errors=0
|
||||||
|
stale=0
|
||||||
|
count=$#
|
||||||
|
|
||||||
|
for f in "$@"; do
|
||||||
|
if [[ ! -f "$f" ]]; then
|
||||||
|
warn "scoped-lint: missing file (skipped): $f"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
lint_markdown_file "$f" "$genome" && fe=0 || fe=$?
|
||||||
|
check_privacy_consistency "$f" && pce=0 || pce=$?
|
||||||
|
check_page_size "$f" && pse=0 || pse=$?
|
||||||
|
errors=$(( errors + fe + pce + pse ))
|
||||||
|
|
||||||
|
check_knowledge_decay "$f" && st=0 || st=$?
|
||||||
|
stale=$(( stale + st ))
|
||||||
|
|
||||||
|
check_broken_links "$f" || true # warnings only
|
||||||
|
done
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "scoped-lint: ${errors} error(s), ${stale} stale across ${count} file(s)"
|
||||||
|
|
||||||
|
[[ $errors -eq 0 ]]
|
||||||
23
skills/ingest/scripts/slug.sh
Normal file
23
skills/ingest/scripts/slug.sh
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# =============================================================================
|
||||||
|
# skills/ingest/scripts/slug.sh
|
||||||
|
# Derive a wiki slug from a path, filename, or title string.
|
||||||
|
# slug.sh "raw/articles/My Source.md" -> my-source
|
||||||
|
# slug.sh "Some Concept Name" -> some-concept-name
|
||||||
|
# =============================================================================
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
input="${1:?usage: slug.sh <path-or-title>}"
|
||||||
|
|
||||||
|
# Strip directory and extension when given a path
|
||||||
|
base="${input##*/}"
|
||||||
|
base="${base%.*}"
|
||||||
|
|
||||||
|
slug="$(printf '%s\n' "$base" \
|
||||||
|
| tr '[:upper:]' '[:lower:]' \
|
||||||
|
| sed -E 's/[^a-z0-9]+/-/g; s/-{2,}/-/g; s/^-+//; s/-+$//')"
|
||||||
|
|
||||||
|
# An all-symbols input (e.g. "!!!.md") collapses to "" — refuse rather than emit a
|
||||||
|
# broken/empty slug that would produce an invalid branch name downstream.
|
||||||
|
[[ -n "$slug" ]] || { echo "slug: empty result for input '${input}'" >&2; exit 1; }
|
||||||
|
printf '%s\n' "$slug"
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
## Identity
|
## Identity
|
||||||
|
|
||||||
| Field | Value |
|
| Field | Value |
|
||||||
|--------|-------|
|
| ------ | -------------------------------------------------- |
|
||||||
| Genome | `{{GENOME_NAME}}` |
|
| Genome | `{{GENOME_NAME}}` |
|
||||||
| Domain | `{{GENOME_DESC}}` |
|
| Domain | `{{GENOME_DESC}}` |
|
||||||
| Owner | `{{FORGEJO_USER}}` |
|
| Owner | `{{FORGEJO_USER}}` |
|
||||||
|
|
@ -14,12 +14,26 @@
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## Linked Project
|
||||||
|
|
||||||
|
| Field | Value |
|
||||||
|
| --------------- | --------------------- |
|
||||||
|
| Project repo | `{{LINKED_PROJECT}}` |
|
||||||
|
| Branch | `main` |
|
||||||
|
| Allowed tasks | `readme, tests, code` |
|
||||||
|
| Preferred model | `auto` |
|
||||||
|
|
||||||
|
If `Project repo` is `none`, this genome is knowledge-only — phase-2 project work
|
||||||
|
does not apply. When set, after a wiki PR is **merged**, the orchestrator may trigger
|
||||||
|
work on this repo within _Allowed tasks_. The agent never touches the project repo
|
||||||
|
during ingest.
|
||||||
|
|
||||||
## PRIVATE_CONTEXT
|
## PRIVATE_CONTEXT
|
||||||
|
|
||||||
**Default: `disabled`** — never infer; require explicit operator declaration per session.
|
**Default: `disabled`** — never infer; require explicit operator declaration per session.
|
||||||
|
|
||||||
| State | Behavior |
|
| State | Behavior |
|
||||||
|-------|----------|
|
| ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `disabled` | `raw/private/` and `wiki/private/` do not exist. No read, list, grep, or summary on private paths. All outputs safe for collaborators. |
|
| `disabled` | `raw/private/` and `wiki/private/` do not exist. No read, list, grep, or summary on private paths. All outputs safe for collaborators. |
|
||||||
| `enabled` | Operator has confirmed `git-crypt unlock` ran on host. Read/write `private/` authorized. All outputs from private data go exclusively to `wiki/private/`. Prefix every response drawing on private data: `[PRIVATE DATA INCLUDED]`. Never leak private synthesis into public wiki paths. |
|
| `enabled` | Operator has confirmed `git-crypt unlock` ran on host. Read/write `private/` authorized. All outputs from private data go exclusively to `wiki/private/`. Prefix every response drawing on private data: `[PRIVATE DATA INCLUDED]`. Never leak private synthesis into public wiki paths. |
|
||||||
|
|
||||||
|
|
@ -41,6 +55,7 @@ Session end or return to `disabled`: remind operator to run `git-crypt lock` on
|
||||||
8. Every PR must use `templates/pr-description.md`. Do not omit the tabular summary.
|
8. Every PR must use `templates/pr-description.md`. Do not omit the tabular summary.
|
||||||
|
|
||||||
### NEVER
|
### NEVER
|
||||||
|
|
||||||
- Load `wiki/log.md` in full — read only the tail injected by the orchestrator.
|
- Load `wiki/log.md` in full — read only the tail injected by the orchestrator.
|
||||||
- Rewrite `wiki/index.md` to reorder entries — append only; sorting is automated.
|
- Rewrite `wiki/index.md` to reorder entries — append only; sorting is automated.
|
||||||
- Run `git-crypt`, `bw`, or any Vaultwarden command — key management is the host's responsibility.
|
- Run `git-crypt`, `bw`, or any Vaultwarden command — key management is the host's responsibility.
|
||||||
|
|
@ -48,6 +63,7 @@ Session end or return to `disabled`: remind operator to run `git-crypt lock` on
|
||||||
- Merge PRs — human approval required.
|
- Merge PRs — human approval required.
|
||||||
|
|
||||||
### ASK FIRST
|
### ASK FIRST
|
||||||
|
|
||||||
- Deleting any wiki page.
|
- Deleting any wiki page.
|
||||||
- Changing `maturity` from `stable` to `deprecated`.
|
- Changing `maturity` from `stable` to `deprecated`.
|
||||||
- Writing to `wiki/private/` when PRIVATE_CONTEXT state is ambiguous.
|
- Writing to `wiki/private/` when PRIVATE_CONTEXT state is ambiguous.
|
||||||
|
|
@ -70,7 +86,8 @@ Execute in this order before any file operation:
|
||||||
## Workflows
|
## Workflows
|
||||||
|
|
||||||
### Ingest
|
### Ingest
|
||||||
*Triggered by new file in `raw/`.*
|
|
||||||
|
_Triggered by new file in `raw/`._
|
||||||
|
|
||||||
1. Read source once.
|
1. Read source once.
|
||||||
2. Create `wiki/sources/<slug>.md` — summary + key points.
|
2. Create `wiki/sources/<slug>.md` — summary + key points.
|
||||||
|
|
@ -82,12 +99,14 @@ Execute in this order before any file operation:
|
||||||
8. Run scoped lint on pages created or modified in this session. Report issues in PR description. Do not auto-fix.
|
8. Run scoped lint on pages created or modified in this session. Report issues in PR description. Do not auto-fix.
|
||||||
9. Commit on `feat/ai-ingest-<slug>`. Open PR using `templates/pr-description.md`.
|
9. Commit on `feat/ai-ingest-<slug>`. Open PR using `templates/pr-description.md`.
|
||||||
|
|
||||||
*Private source* (`PRIVATE_CONTEXT: enabled` required):
|
_Private source_ (`PRIVATE_CONTEXT: enabled` required):
|
||||||
|
|
||||||
- All output → `wiki/private/<slug>.md` only.
|
- All output → `wiki/private/<slug>.md` only.
|
||||||
- PR title: `[PRIVATE] ingest: <slug>`.
|
- PR title: `[PRIVATE] ingest: <slug>`.
|
||||||
|
|
||||||
### Query
|
### Query
|
||||||
*Triggered by operator question.*
|
|
||||||
|
_Triggered by operator question._
|
||||||
|
|
||||||
1. `qmd search "<query>"` → identify candidate pages.
|
1. `qmd search "<query>"` → identify candidate pages.
|
||||||
2. Read candidate pages directly.
|
2. Read candidate pages directly.
|
||||||
|
|
@ -96,10 +115,11 @@ Execute in this order before any file operation:
|
||||||
5. Append entry to `wiki/index.md` under Queries.
|
5. Append entry to `wiki/index.md` under Queries.
|
||||||
6. Append log entry: `QUERY | <subject>`.
|
6. Append log entry: `QUERY | <subject>`.
|
||||||
|
|
||||||
*For general orientation without a specific query: read `wiki/index.md` directly.*
|
_For general orientation without a specific query: read `wiki/index.md` directly._
|
||||||
|
|
||||||
### Lint
|
### Lint
|
||||||
*Triggered by operator with bash pre-scan output.*
|
|
||||||
|
_Triggered by operator with bash pre-scan output._
|
||||||
|
|
||||||
Pre-requisite: operator runs `bash scripts/lint-genomes.sh` and provides output to this session.
|
Pre-requisite: operator runs `bash scripts/lint-genomes.sh` and provides output to this session.
|
||||||
The script handles deterministically: broken links, knowledge decay, page size, frontmatter validation.
|
The script handles deterministically: broken links, knowledge decay, page size, frontmatter validation.
|
||||||
|
|
@ -119,6 +139,7 @@ Append log entry: `LINT | <summary of findings>`.
|
||||||
## File Conventions
|
## File Conventions
|
||||||
|
|
||||||
### Frontmatter
|
### Frontmatter
|
||||||
|
|
||||||
Required on every wiki page:
|
Required on every wiki page:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
|
|
@ -138,19 +159,25 @@ private: true | false
|
||||||
- `deprecated` — superseded. Add `> **DEPRECATED:** <reason>` callout at top of body.
|
- `deprecated` — superseded. Add `> **DEPRECATED:** <reason>` callout at top of body.
|
||||||
|
|
||||||
### Links
|
### Links
|
||||||
|
|
||||||
- Internal: `[[folder/file]]` — Obsidian wikilinks only. Never `[text](url)` for internal refs.
|
- Internal: `[[folder/file]]` — Obsidian wikilinks only. Never `[text](url)` for internal refs.
|
||||||
- Cross-genome: `[[../genome-target/wiki/folder/file]]`.
|
- Cross-genome: `[[../genome-target/wiki/folder/file]]`.
|
||||||
- External: `[text](https://...)`.
|
- External: `[text](https://...)`.
|
||||||
|
|
||||||
### Index entries
|
### Index entries
|
||||||
|
|
||||||
Append at bottom of relevant section in `wiki/index.md`:
|
Append at bottom of relevant section in `wiki/index.md`:
|
||||||
|
|
||||||
```
|
```
|
||||||
- [[folder/slug]] — One-line summary. `maturity: draft`
|
- [[folder/slug]] — One-line summary. `maturity: draft`
|
||||||
```
|
```
|
||||||
|
|
||||||
Never reorder. Alphabetical sort is handled by the pre-commit hook.
|
Never reorder. Alphabetical sort is handled by the pre-commit hook.
|
||||||
|
|
||||||
### Log entries
|
### Log entries
|
||||||
|
|
||||||
Append one entry per operation to `wiki/log.md`:
|
Append one entry per operation to `wiki/log.md`:
|
||||||
|
|
||||||
```markdown
|
```markdown
|
||||||
## [YYYY-MM-DD] TYPE | Subject
|
## [YYYY-MM-DD] TYPE | Subject
|
||||||
|
|
||||||
|
|
@ -160,6 +187,7 @@ Append one entry per operation to `wiki/log.md`:
|
||||||
- output_written: `[[path/C]]`
|
- output_written: `[[path/C]]`
|
||||||
- reasoning: One sentence — what changed and why.
|
- reasoning: One sentence — what changed and why.
|
||||||
```
|
```
|
||||||
|
|
||||||
Valid TYPEs: `INGEST` `LINT` `QUERY` `CONFLICT` `CONFIG` `SECURITY`
|
Valid TYPEs: `INGEST` `LINT` `QUERY` `CONFLICT` `CONFIG` `SECURITY`
|
||||||
|
|
||||||
Parse: `grep "^## \[" wiki/log.md | tail -5`
|
Parse: `grep "^## \[" wiki/log.md | tail -5`
|
||||||
|
|
@ -183,16 +211,20 @@ last_updated: YYYY-MM-DD
|
||||||
private: false
|
private: false
|
||||||
---
|
---
|
||||||
```
|
```
|
||||||
|
|
||||||
```markdown
|
```markdown
|
||||||
## Conflict: <concept>
|
## Conflict: <concept>
|
||||||
|
|
||||||
**Claim A (existing):** [[path/to/existing-page]]
|
**Claim A (existing):** [[path/to/existing-page]]
|
||||||
|
|
||||||
> Summary of current wiki position.
|
> Summary of current wiki position.
|
||||||
|
|
||||||
**Claim B (new):** [[path/to/new-source]]
|
**Claim B (new):** [[path/to/new-source]]
|
||||||
|
|
||||||
> Summary of contradicting evidence.
|
> Summary of contradicting evidence.
|
||||||
|
|
||||||
**Assessment:**
|
**Assessment:**
|
||||||
|
|
||||||
- Confidence A: high | medium | low — <reason>
|
- Confidence A: high | medium | low — <reason>
|
||||||
- Confidence B: high | medium | low — <reason>
|
- Confidence B: high | medium | low — <reason>
|
||||||
- Recommendation: `accept_b` | `keep_a` | `requires_human_review`
|
- Recommendation: `accept_b` | `keep_a` | `requires_human_review`
|
||||||
|
|
@ -212,9 +244,11 @@ private: false
|
||||||
- `maturity: draft` not updated in **90 days** → flag during lint.
|
- `maturity: draft` not updated in **90 days** → flag during lint.
|
||||||
|
|
||||||
Flagged pages: prepend to body:
|
Flagged pages: prepend to body:
|
||||||
|
|
||||||
```markdown
|
```markdown
|
||||||
> **⚠️ STALE:** Last validated {{last_updated}}. Re-validation required.
|
> **⚠️ STALE:** Last validated {{last_updated}}. Re-validation required.
|
||||||
```
|
```
|
||||||
|
|
||||||
Propose re-validation task. Do not change `maturity` without new source evidence.
|
Propose re-validation task. Do not change `maturity` without new source evidence.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
@ -222,7 +256,7 @@ Propose re-validation task. Do not change `maturity` without new source evidence
|
||||||
## Collaboration
|
## Collaboration
|
||||||
|
|
||||||
| Role | Access | Permitted |
|
| Role | Access | Permitted |
|
||||||
|------|--------|-----------|
|
| -------------- | ----------------- | ------------------------------------------------------------------------------------ |
|
||||||
| Owner | Full — key holder | Read/write everywhere |
|
| Owner | Full — key holder | Read/write everywhere |
|
||||||
| Collaborator | No key | Push to `raw/articles`, `raw/transcripts`, `raw/code-packs`, `raw/assets` |
|
| Collaborator | No key | Push to `raw/articles`, `raw/transcripts`, `raw/code-packs`, `raw/assets` |
|
||||||
| Local AI agent | Conditional | `private/` only when `PRIVATE_CONTEXT: enabled` |
|
| Local AI agent | Conditional | `private/` only when `PRIVATE_CONTEXT: enabled` |
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
## Identity
|
## Identity
|
||||||
|
|
||||||
| Field | Value |
|
| Field | Value |
|
||||||
|--------|-------|
|
| ------ | -------------------------------------------------- |
|
||||||
| Repo | `{{MASTER_REPO}}` |
|
| Repo | `{{MASTER_REPO}}` |
|
||||||
| Owner | `{{FORGEJO_USER}}` |
|
| Owner | `{{FORGEJO_USER}}` |
|
||||||
| Remote | `{{FORGEJO_URL}}/{{FORGEJO_USER}}/{{MASTER_REPO}}` |
|
| Remote | `{{FORGEJO_URL}}/{{FORGEJO_USER}}/{{MASTER_REPO}}` |
|
||||||
|
|
@ -32,14 +32,17 @@ Genome-level operations are governed by the genome's `AGENTS.md`, not this file.
|
||||||
## Global Security Rules
|
## Global Security Rules
|
||||||
|
|
||||||
### PRIVATE_CONTEXT scope
|
### PRIVATE_CONTEXT scope
|
||||||
|
|
||||||
- Toggle is **per-genome and per-session**. Enabling for `genome-finance` does NOT enable for `genome-dev`.
|
- Toggle is **per-genome and per-session**. Enabling for `genome-finance` does NOT enable for `genome-dev`.
|
||||||
- Cloud LLM models: `PRIVATE_CONTEXT` must be `disabled` for all genomes. Private data never leaves the local network.
|
- Cloud LLM models: `PRIVATE_CONTEXT` must be `disabled` for all genomes. Private data never leaves the local network.
|
||||||
|
|
||||||
### Log sanitization
|
### Log sanitization
|
||||||
|
|
||||||
- Never print decrypted secrets, session tokens, or key contents to stdout or log files.
|
- Never print decrypted secrets, session tokens, or key contents to stdout or log files.
|
||||||
- Document only `run_id` and genome name — never the key value.
|
- Document only `run_id` and genome name — never the key value.
|
||||||
|
|
||||||
### Key management
|
### Key management
|
||||||
|
|
||||||
- Key injection is the host's responsibility — executed before this session starts.
|
- Key injection is the host's responsibility — executed before this session starts.
|
||||||
- Never write, suggest, or generate scripts that save `.key` files to disk.
|
- Never write, suggest, or generate scripts that save `.key` files to disk.
|
||||||
|
|
||||||
|
|
@ -54,12 +57,14 @@ Genome-level operations are governed by the genome's `AGENTS.md`, not this file.
|
||||||
5. Per-genome `AGENTS.md` governs all wiki operations within that genome. This file governs boundaries only.
|
5. Per-genome `AGENTS.md` governs all wiki operations within that genome. This file governs boundaries only.
|
||||||
|
|
||||||
### NEVER
|
### NEVER
|
||||||
|
|
||||||
- Load multiple `wiki/index.md` files simultaneously for cross-genome comparison — use qmd.
|
- Load multiple `wiki/index.md` files simultaneously for cross-genome comparison — use qmd.
|
||||||
- Run `git-crypt`, `bw`, or Vaultwarden commands — host responsibility.
|
- Run `git-crypt`, `bw`, or Vaultwarden commands — host responsibility.
|
||||||
- Modify files in more than one genome in the same operation.
|
- Modify files in more than one genome in the same operation.
|
||||||
- Modify `core-karpathy` in any way.
|
- Modify `core-karpathy` in any way.
|
||||||
|
|
||||||
### ASK FIRST
|
### ASK FIRST
|
||||||
|
|
||||||
- Any operation that touches two or more genomes.
|
- Any operation that touches two or more genomes.
|
||||||
- Updating submodule pointers in master.
|
- Updating submodule pointers in master.
|
||||||
- Any key rotation procedure.
|
- Any key rotation procedure.
|
||||||
|
|
@ -77,7 +82,8 @@ Genome-level operations are governed by the genome's `AGENTS.md`, not this file.
|
||||||
---
|
---
|
||||||
|
|
||||||
## Cross-Genome Lint
|
## Cross-Genome Lint
|
||||||
*Manual, monthly — requires operator initiation. Not automated.*
|
|
||||||
|
_Manual, monthly — requires operator initiation. Not automated._
|
||||||
|
|
||||||
1. Use `qmd search "<concept>"` to find pages covering the same concept across genomes.
|
1. Use `qmd search "<concept>"` to find pages covering the same concept across genomes.
|
||||||
2. Identify:
|
2. Identify:
|
||||||
|
|
|
||||||
|
|
@ -1,25 +1,31 @@
|
||||||
## Summary
|
## Summary
|
||||||
|
|
||||||
<!-- One sentence: goal of this session and source processed. -->
|
<!-- One sentence: goal of this session and source processed. -->
|
||||||
|
|
||||||
## Pages Created
|
## Pages Created
|
||||||
|
|
||||||
| Path | Type | Maturity |
|
| Path | Type | Maturity |
|
||||||
|------|------|----------|
|
| ----------------- | --------------------------------- | -------- |
|
||||||
| `[[folder/slug]]` | entity / concept / source / query | draft |
|
| `[[folder/slug]]` | entity / concept / source / query | draft |
|
||||||
|
|
||||||
## Pages Modified
|
## Pages Modified
|
||||||
|
|
||||||
| Path | Change |
|
| Path | Change |
|
||||||
|------|--------|
|
| ----------------- | ----------------------------------------- |
|
||||||
| `[[folder/slug]]` | Added cross-reference to `[[other/page]]` |
|
| `[[folder/slug]]` | Added cross-reference to `[[other/page]]` |
|
||||||
|
|
||||||
## Contradictions Found
|
## Contradictions Found
|
||||||
|
|
||||||
- [ ] None
|
- [ ] None
|
||||||
- [ ] `n` conflict file(s) created — listed below
|
- [ ] `n` conflict file(s) created — listed below
|
||||||
|
|
||||||
## Private Data Accessed
|
## Private Data Accessed
|
||||||
|
|
||||||
- [ ] No — `PRIVATE_CONTEXT: disabled`
|
- [ ] No — `PRIVATE_CONTEXT: disabled`
|
||||||
- [ ] Yes — `PRIVATE_CONTEXT: enabled` · outputs in `wiki/private/` only
|
- [ ] Yes — `PRIVATE_CONTEXT: enabled` · outputs in `wiki/private/` only
|
||||||
|
|
||||||
## Scoped Lint (post-ingest)
|
## Scoped Lint (post-ingest)
|
||||||
|
|
||||||
- [ ] Frontmatter valid on all touched pages
|
- [ ] Frontmatter valid on all touched pages
|
||||||
- [ ] No broken wikilinks on touched pages
|
- [ ] No broken wikilinks on touched pages
|
||||||
- [ ] No issues found
|
- [ ] No issues found
|
||||||
|
|
|
||||||
45
templates/readme-master.md
Normal file
45
templates/readme-master.md
Normal file
|
|
@ -0,0 +1,45 @@
|
||||||
|
# {{MASTER_REPO}}
|
||||||
|
|
||||||
|
Master (umbrella) repository for the Knowledge Genome network.
|
||||||
|
|
||||||
|
| Field | Value |
|
||||||
|
| ---------- | -------------------------------------------------- |
|
||||||
|
| Owner | `{{FORGEJO_USER}}` |
|
||||||
|
| Remote | `{{FORGEJO_URL}}/{{FORGEJO_USER}}/{{MASTER_REPO}}` |
|
||||||
|
| Scaffolded | `{{DATE}}` |
|
||||||
|
|
||||||
|
## What this repo is
|
||||||
|
|
||||||
|
This repository does **not** hold knowledge itself. It is the orchestrator: each genome
|
||||||
|
is a Git submodule, plus `core-karpathy` as a read-only reference pattern. Cross-genome
|
||||||
|
coordination rules live in `AGENTS.md`.
|
||||||
|
|
||||||
|
```text
|
||||||
|
{{MASTER_REPO}}/
|
||||||
|
├── core-karpathy/ ← reference pattern — read-only, never modify
|
||||||
|
├── genome-*/ ← one submodule per genome (own AGENTS.md, own git-crypt)
|
||||||
|
└── AGENTS.md ← cross-genome coordinator (boundaries only)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Working with submodules
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Clone with all genomes
|
||||||
|
git clone --recurse-submodules {{FORGEJO_URL}}/{{FORGEJO_USER}}/{{MASTER_REPO}}.git
|
||||||
|
|
||||||
|
# Pull the latest pointers for every genome
|
||||||
|
git submodule update --remote --merge
|
||||||
|
|
||||||
|
# Operate inside a single genome (one genome at a time — see AGENTS.md)
|
||||||
|
cd genome-<name>
|
||||||
|
```
|
||||||
|
|
||||||
|
## Rules of the road
|
||||||
|
|
||||||
|
- Operate within **one genome at a time**; no commits spanning multiple genomes.
|
||||||
|
- `core-karpathy` is read-only.
|
||||||
|
- Never commit to `main` in a genome — PRs only, no self-merge.
|
||||||
|
- Private data (`**/private/**`) is git-crypt encrypted and never leaves the local network.
|
||||||
|
|
||||||
|
Genome-level operations are governed by each genome's own `AGENTS.md`. This README and the
|
||||||
|
master `AGENTS.md` govern boundaries only.
|
||||||
|
|
@ -19,27 +19,28 @@ Entry format: `- [[folder/slug]] — One-line summary. \`maturity: <value>\``
|
||||||
---
|
---
|
||||||
|
|
||||||
## Sources (`wiki/sources/`)
|
## Sources (`wiki/sources/`)
|
||||||
*Ingested raw materials. One entry per processed source.*
|
|
||||||
|
|
||||||
|
_Ingested raw materials. One entry per processed source._
|
||||||
|
|
||||||
## Entities (`wiki/entities/`)
|
## Entities (`wiki/entities/`)
|
||||||
*People, organisations, tools, projects.*
|
|
||||||
|
|
||||||
|
_People, organisations, tools, projects._
|
||||||
|
|
||||||
## Concepts (`wiki/concepts/`)
|
## Concepts (`wiki/concepts/`)
|
||||||
*Theories, methodologies, patterns, architectural decisions.*
|
|
||||||
|
|
||||||
|
_Theories, methodologies, patterns, architectural decisions._
|
||||||
|
|
||||||
## Queries (`wiki/queries/`)
|
## Queries (`wiki/queries/`)
|
||||||
*Synthesised answers worth preserving. Archived explorations and analyses.*
|
|
||||||
|
|
||||||
|
_Synthesised answers worth preserving. Archived explorations and analyses._
|
||||||
|
|
||||||
## Conflicts Pending Review (`wiki/queries/conflict-*.md`)
|
## Conflicts Pending Review (`wiki/queries/conflict-*.md`)
|
||||||
*Created automatically when the agent detects contradictions between sources.*
|
|
||||||
*Do not summarise entries here — list slugs only to avoid surfacing unresolved claims.*
|
|
||||||
*Remove entry once the operator has resolved and closed the corresponding PR.*
|
|
||||||
|
|
||||||
|
_Created automatically when the agent detects contradictions between sources._
|
||||||
|
_Do not summarise entries here — list slugs only to avoid surfacing unresolved claims._
|
||||||
|
_Remove entry once the operator has resolved and closed the corresponding PR._
|
||||||
|
|
||||||
## Private Synthesis (`wiki/private/`)
|
## Private Synthesis (`wiki/private/`)
|
||||||
*Restricted access. Requires `PRIVATE_CONTEXT: enabled` and unlocked repo.*
|
|
||||||
*List slug names ONLY. Do not append summaries — prevents metadata leakage.*
|
_Restricted access. Requires `PRIVATE_CONTEXT: enabled` and unlocked repo._
|
||||||
|
_List slug names ONLY. Do not append summaries — prevents metadata leakage._
|
||||||
|
|
|
||||||
|
|
@ -22,11 +22,13 @@ Append new entries at the bottom using the format defined below.
|
||||||
## Entry Format
|
## Entry Format
|
||||||
|
|
||||||
### Required header (enables shell parsing):
|
### Required header (enables shell parsing):
|
||||||
|
|
||||||
```text
|
```text
|
||||||
## [YYYY-MM-DD] TYPE | Subject or title
|
## [YYYY-MM-DD] TYPE | Subject or title
|
||||||
```
|
```
|
||||||
|
|
||||||
### Required metadata block for all agent-generated entries:
|
### Required metadata block for all agent-generated entries:
|
||||||
|
|
||||||
```markdown
|
```markdown
|
||||||
- run_id: `<short-uuid or session-identifier>`
|
- run_id: `<short-uuid or session-identifier>`
|
||||||
- model: `<model-name-and-version>`
|
- model: `<model-name-and-version>`
|
||||||
|
|
@ -38,6 +40,7 @@ Append new entries at the bottom using the format defined below.
|
||||||
**Valid TYPEs:** `INGEST` | `LINT` | `QUERY` | `CONFLICT` | `CONFIG` | `SECURITY`
|
**Valid TYPEs:** `INGEST` | `LINT` | `QUERY` | `CONFLICT` | `CONFIG` | `SECURITY`
|
||||||
|
|
||||||
**Parse examples:**
|
**Parse examples:**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Last 5 entries
|
# Last 5 entries
|
||||||
grep "^## \[" wiki/log.md | tail -5
|
grep "^## \[" wiki/log.md | tail -5
|
||||||
|
|
@ -55,6 +58,6 @@ grep "^## \[2026-05" wiki/log.md
|
||||||
|
|
||||||
- run_id: `system-init`
|
- run_id: `system-init`
|
||||||
- model: `setup-knowledge-genome.sh`
|
- model: `setup-knowledge-genome.sh`
|
||||||
- context_read: *(none — initial scaffold)*
|
- context_read: _(none — initial scaffold)_
|
||||||
- output_written: `[[wiki/index.md]]`, `[[wiki/log.md]]`, `[[AGENTS.md]]`
|
- output_written: `[[wiki/index.md]]`, `[[wiki/log.md]]`, `[[AGENTS.md]]`
|
||||||
- reasoning: Initial directory structure and encryption layer initialized by setup script.
|
- reasoning: Initial directory structure and encryption layer initialized by setup script.
|
||||||
|
|
|
||||||
56
tests/README.md
Normal file
56
tests/README.md
Normal file
|
|
@ -0,0 +1,56 @@
|
||||||
|
# Tests
|
||||||
|
|
||||||
|
Deterministic tests for the mechanical layer of the framework — **no LLM, no GPU, no
|
||||||
|
network**. They simulate pi's output with fixtures and exercise the scripts directly, so
|
||||||
|
they run anywhere (laptop, CI, a git hook). They do **not** belong on vm101 or in n8n.
|
||||||
|
|
||||||
|
## What's covered
|
||||||
|
|
||||||
|
| File | Covers |
|
||||||
|
|------|--------|
|
||||||
|
| `scripts.bats` | `slug.sh`, `log-append.sh`, `index-append.py` (insert, sort, bump, idempotent) |
|
||||||
|
| `lint.bats` | `lib/lint.sh` validators + `scoped-lint.sh` reuse |
|
||||||
|
| `structure.bats` | `lib/structure.sh` report/sync |
|
||||||
|
| `run-ingest.bats` | `run-ingest.sh` end-to-end (DRY_RUN, local bare remote) — needs `jq` |
|
||||||
|
|
||||||
|
`run-ingest.bats` auto-`skip`s if `jq` is missing; everything else needs only bash + git
|
||||||
|
(+ `python3` for the index tests).
|
||||||
|
|
||||||
|
## Install bats
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Debian/Ubuntu
|
||||||
|
sudo apt install bats
|
||||||
|
# or pinned, as a vendored submodule
|
||||||
|
git submodule add https://github.com/bats-core/bats-core.git test/bats
|
||||||
|
```
|
||||||
|
|
||||||
|
## Run
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bats tests/ # whole suite
|
||||||
|
bats tests/lint.bats # one file
|
||||||
|
bats -f "sorted" tests/scripts.bats # filter by name
|
||||||
|
```
|
||||||
|
|
||||||
|
Each test builds its own throwaway genome under `BATS_TEST_TMPDIR` (auto-cleaned) with a
|
||||||
|
local bare git remote, so `open-pr.sh --DRY_RUN` can branch/commit/push without touching
|
||||||
|
Forgejo.
|
||||||
|
|
||||||
|
## Makefile targets
|
||||||
|
|
||||||
|
```make
|
||||||
|
test:
|
||||||
|
@bats tests/
|
||||||
|
|
||||||
|
verify-structure:
|
||||||
|
@bash scripts/verify-genomes.sh
|
||||||
|
|
||||||
|
sync-structure:
|
||||||
|
@bash scripts/verify-genomes.sh --sync
|
||||||
|
```
|
||||||
|
|
||||||
|
## Note on `helpers.bash`
|
||||||
|
|
||||||
|
`FIXTURE_DIRS` in `helpers.bash` must match `GENOME_DIRS` in `lib/structure.sh`. If you
|
||||||
|
change the canonical layout, update both (the structure tests assume a clean baseline).
|
||||||
98
tests/helpers.bash
Normal file
98
tests/helpers.bash
Normal file
|
|
@ -0,0 +1,98 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# tests/helpers.bash — shared helpers for the bats suite.
|
||||||
|
|
||||||
|
REPO_ROOT="$(cd "${BATS_TEST_DIRNAME}/.." && pwd)"
|
||||||
|
LIB_DIR="${REPO_ROOT}/lib"
|
||||||
|
SKILL_SCRIPTS="${REPO_ROOT}/skills/ingest/scripts"
|
||||||
|
|
||||||
|
# Canonical dirs a fresh genome must contain (kept in sync with lib/structure.sh).
|
||||||
|
FIXTURE_DIRS=(
|
||||||
|
raw/articles raw/transcripts raw/code-packs raw/assets raw/private
|
||||||
|
wiki/sources wiki/entities wiki/concepts wiki/queries wiki/private
|
||||||
|
)
|
||||||
|
|
||||||
|
# make_fixture_genome → echoes the path to a throwaway genome checkout with a
|
||||||
|
# local bare remote, the full canonical structure, and rendered index/log.
|
||||||
|
# Uses BATS_TEST_TMPDIR so bats cleans it up automatically.
|
||||||
|
make_fixture_genome() {
|
||||||
|
local base; base="$(mktemp -d "${BATS_TEST_TMPDIR:-/tmp}/genome.XXXXXX")"
|
||||||
|
git init --bare -q "${base}/origin.git"
|
||||||
|
|
||||||
|
local g="${base}/genome"
|
||||||
|
local d
|
||||||
|
for d in "${FIXTURE_DIRS[@]}"; do mkdir -p "${g}/${d}"; touch "${g}/${d}/.gitkeep"; done
|
||||||
|
|
||||||
|
cat > "${g}/wiki/index.md" <<'EOF'
|
||||||
|
---
|
||||||
|
title: "Index — genome-test"
|
||||||
|
type: index
|
||||||
|
domain: genome-test
|
||||||
|
maturity: stable
|
||||||
|
last_updated: 2026-01-01
|
||||||
|
private: false
|
||||||
|
---
|
||||||
|
|
||||||
|
# Master Index: genome-test
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Sources (`wiki/sources/`)
|
||||||
|
*Ingested raw materials.*
|
||||||
|
|
||||||
|
|
||||||
|
## Entities (`wiki/entities/`)
|
||||||
|
*People, tools.*
|
||||||
|
|
||||||
|
|
||||||
|
## Concepts (`wiki/concepts/`)
|
||||||
|
*Patterns.*
|
||||||
|
|
||||||
|
|
||||||
|
## Queries (`wiki/queries/`)
|
||||||
|
*Answers.*
|
||||||
|
|
||||||
|
|
||||||
|
## Conflicts Pending Review (`wiki/queries/conflict-*.md`)
|
||||||
|
*slugs only.*
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat > "${g}/wiki/log.md" <<'EOF'
|
||||||
|
---
|
||||||
|
title: "Operations Log — genome-test"
|
||||||
|
type: log
|
||||||
|
domain: genome-test
|
||||||
|
maturity: stable
|
||||||
|
last_updated: 2026-01-01
|
||||||
|
private: false
|
||||||
|
---
|
||||||
|
|
||||||
|
# Operations Log
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## [2026-01-01] CONFIG | scaffolded
|
||||||
|
- run_id: `init`
|
||||||
|
EOF
|
||||||
|
|
||||||
|
echo "raw test" > "${g}/raw/articles/test.md"
|
||||||
|
|
||||||
|
mkdir -p "${base}/nohooks"
|
||||||
|
|
||||||
|
(
|
||||||
|
cd "${g}"
|
||||||
|
git init -q
|
||||||
|
# Hermetic: ignore the user's global git config (signing, global hooks);
|
||||||
|
# otherwise commit.gpgsign or a global core.hooksPath makes git commit fail here.
|
||||||
|
git config commit.gpgsign false
|
||||||
|
git config core.hooksPath "${base}/nohooks"
|
||||||
|
git config user.email t@t
|
||||||
|
git config user.name tester
|
||||||
|
git add .
|
||||||
|
git commit -qm init
|
||||||
|
git branch -M main
|
||||||
|
git remote add origin "${base}/origin.git"
|
||||||
|
git push -q -u origin main
|
||||||
|
) >/dev/null
|
||||||
|
|
||||||
|
echo "${g}"
|
||||||
|
}
|
||||||
71
tests/lint.bats
Normal file
71
tests/lint.bats
Normal file
|
|
@ -0,0 +1,71 @@
|
||||||
|
#!/usr/bin/env bats
|
||||||
|
# tests/lint.bats — lib/lint.sh validators and the scoped-lint wrapper.
|
||||||
|
load helpers
|
||||||
|
|
||||||
|
setup() {
|
||||||
|
source "$LIB_DIR/output.sh"
|
||||||
|
source "$LIB_DIR/lint.sh"
|
||||||
|
}
|
||||||
|
|
||||||
|
write_page() { # write_page <path> <type> <domain>
|
||||||
|
cat > "$1" <<EOF
|
||||||
|
---
|
||||||
|
title: "T"
|
||||||
|
type: $2
|
||||||
|
domain: $3
|
||||||
|
tags: [x]
|
||||||
|
maturity: draft
|
||||||
|
last_updated: $(date +%F)
|
||||||
|
private: false
|
||||||
|
---
|
||||||
|
body
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "lint_markdown_file: a clean page passes (0 errors)" {
|
||||||
|
G="$(make_fixture_genome)"
|
||||||
|
write_page "$G/wiki/sources/good.md" source genome-test
|
||||||
|
run lint_markdown_file "$G/wiki/sources/good.md" genome-test
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "lint_markdown_file: invalid type + wrong domain are caught" {
|
||||||
|
G="$(make_fixture_genome)"
|
||||||
|
write_page "$G/wiki/sources/bad.md" banana wrong-genome
|
||||||
|
run lint_markdown_file "$G/wiki/sources/bad.md" genome-test
|
||||||
|
[ "$status" -ne 0 ]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "check_privacy_consistency: a private/ file without 'private: true' fails" {
|
||||||
|
G="$(make_fixture_genome)"
|
||||||
|
# page sits in wiki/private/ but is flagged private: false → leak
|
||||||
|
write_page "$G/wiki/private/p.md" private genome-test
|
||||||
|
run check_privacy_consistency "$G/wiki/private/p.md"
|
||||||
|
[ "$status" -ne 0 ]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "check_page_size: a >800-line page errors" {
|
||||||
|
G="$(make_fixture_genome)"
|
||||||
|
{ write_page "$G/wiki/sources/big.md" source genome-test; yes "x" | head -n 850 >> "$G/wiki/sources/big.md"; }
|
||||||
|
run check_page_size "$G/wiki/sources/big.md"
|
||||||
|
[ "$status" -ne 0 ]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "scoped-lint: aggregates findings and exits non-zero on errors" {
|
||||||
|
G="$(make_fixture_genome)"
|
||||||
|
write_page "$G/wiki/sources/bad.md" banana wrong-genome
|
||||||
|
cd "$G"
|
||||||
|
export KG_LIB_DIR="$LIB_DIR"
|
||||||
|
run bash "$SKILL_SCRIPTS/scoped-lint.sh" genome-test wiki/sources/bad.md
|
||||||
|
[ "$status" -ne 0 ]
|
||||||
|
[[ "$output" == *"error(s)"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "scoped-lint: a clean page passes (exit 0)" {
|
||||||
|
G="$(make_fixture_genome)"
|
||||||
|
write_page "$G/wiki/sources/good.md" source genome-test
|
||||||
|
cd "$G"
|
||||||
|
export KG_LIB_DIR="$LIB_DIR"
|
||||||
|
run bash "$SKILL_SCRIPTS/scoped-lint.sh" genome-test wiki/sources/good.md
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
}
|
||||||
173
tests/run-ingest.bats
Normal file
173
tests/run-ingest.bats
Normal file
|
|
@ -0,0 +1,173 @@
|
||||||
|
#!/usr/bin/env bats
|
||||||
|
# tests/run-ingest.bats — end-to-end orchestrator test (no LLM, no network).
|
||||||
|
# Simulates pi's output (a source page + manifest) and runs the mechanical pass.
|
||||||
|
load helpers
|
||||||
|
|
||||||
|
@test "run-ingest: DRY_RUN end-to-end updates index + log and opens a dry PR" {
|
||||||
|
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||||
|
|
||||||
|
G="$(make_fixture_genome)"; cd "$G"
|
||||||
|
|
||||||
|
# --- simulate the semantic pass that pi would have done ---
|
||||||
|
cat > wiki/sources/test-source.md <<'EOF'
|
||||||
|
---
|
||||||
|
title: "Test Source"
|
||||||
|
type: source
|
||||||
|
domain: genome-test
|
||||||
|
tags: [t]
|
||||||
|
maturity: draft
|
||||||
|
last_updated: 2026-06-03
|
||||||
|
private: false
|
||||||
|
---
|
||||||
|
body
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat > .ingest-manifest.json <<'EOF'
|
||||||
|
{
|
||||||
|
"raw_source": "raw/articles/test.md",
|
||||||
|
"model": "qwen3.5-9b",
|
||||||
|
"reasoning": "Ingested the test source.",
|
||||||
|
"pr_summary": "Ingest of test: 1 source page.",
|
||||||
|
"contradictions": "None",
|
||||||
|
"pages": [
|
||||||
|
{"path": "wiki/sources/test-source.md", "summary": "A smoke-test source.", "maturity": "draft", "status": "created"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
export KG_LIB_DIR="$LIB_DIR"
|
||||||
|
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER="u" FORGEJO_TOKEN="t"
|
||||||
|
export DRY_RUN=1
|
||||||
|
|
||||||
|
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
[[ "$output" == *'"status":"ok"'* ]]
|
||||||
|
[[ "$output" == *'"lint_clean":true'* ]]
|
||||||
|
[[ "$output" == *'"conflict":false'* ]]
|
||||||
|
|
||||||
|
# side effects on the working tree
|
||||||
|
grep -q 'sources/test-source' wiki/index.md
|
||||||
|
grep -q 'INGEST | test' wiki/log.md
|
||||||
|
git rev-parse --verify feat/ai-ingest-test
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "run-ingest: a conflict page is labelled and lands in the Conflicts section" {
|
||||||
|
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||||
|
|
||||||
|
G="$(make_fixture_genome)"; cd "$G"
|
||||||
|
|
||||||
|
cat > wiki/queries/conflict-pricing-2026-06-03.md <<'EOF'
|
||||||
|
---
|
||||||
|
title: "Conflict: pricing"
|
||||||
|
type: conflict
|
||||||
|
domain: genome-test
|
||||||
|
maturity: draft
|
||||||
|
last_updated: 2026-06-03
|
||||||
|
private: false
|
||||||
|
---
|
||||||
|
conflict body
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat > .ingest-manifest.json <<'EOF'
|
||||||
|
{
|
||||||
|
"raw_source": "raw/articles/test.md",
|
||||||
|
"model": "m",
|
||||||
|
"reasoning": "Flagged a contradiction.",
|
||||||
|
"pr_summary": "Conflict on pricing.",
|
||||||
|
"contradictions": "1 conflict file created — pricing",
|
||||||
|
"pages": [
|
||||||
|
{"path": "wiki/queries/conflict-pricing-2026-06-03.md", "summary": "ignored", "maturity": "draft", "status": "created"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
export KG_LIB_DIR="$LIB_DIR"
|
||||||
|
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER="u" FORGEJO_TOKEN="t"
|
||||||
|
export DRY_RUN=1
|
||||||
|
|
||||||
|
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
[[ "$output" == *'"conflict":true'* ]]
|
||||||
|
# listed by slug under the Conflicts section
|
||||||
|
grep -q 'queries/conflict-pricing-2026-06-03' wiki/index.md
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "run-ingest: records INGEST_MODEL in the log (manifest carries no model field)" {
|
||||||
|
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||||
|
|
||||||
|
G="$(make_fixture_genome)"; cd "$G"
|
||||||
|
|
||||||
|
cat > wiki/sources/test-source.md <<'EOF'
|
||||||
|
---
|
||||||
|
title: "Test Source"
|
||||||
|
type: source
|
||||||
|
domain: genome-test
|
||||||
|
tags: [t]
|
||||||
|
maturity: draft
|
||||||
|
last_updated: 2026-06-04
|
||||||
|
private: false
|
||||||
|
---
|
||||||
|
body
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# New contract: NO "model" field — the orchestrator supplies it via INGEST_MODEL.
|
||||||
|
cat > .ingest-manifest.json <<'EOF'
|
||||||
|
{
|
||||||
|
"raw_source": "raw/articles/test.md",
|
||||||
|
"reasoning": "Ingested the test source.",
|
||||||
|
"pr_summary": "Ingest of test: 1 source page.",
|
||||||
|
"contradictions": "None",
|
||||||
|
"pages": [
|
||||||
|
{"path": "wiki/sources/test-source.md", "summary": "A smoke-test source.", "maturity": "draft", "status": "created"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
export KG_LIB_DIR="$LIB_DIR"
|
||||||
|
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER="u" FORGEJO_TOKEN="t" DRY_RUN=1
|
||||||
|
export INGEST_MODEL="qwen-test-tag"
|
||||||
|
|
||||||
|
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
[[ "$output" == *'"status":"ok"'* ]]
|
||||||
|
grep -q 'qwen-test-tag' wiki/log.md
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "run-ingest: rejects a manifest path that escapes wiki/ (traversal)" {
|
||||||
|
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||||
|
G="$(make_fixture_genome)"; cd "$G"
|
||||||
|
cat > .ingest-manifest.json <<'EOF'
|
||||||
|
{ "raw_source":"raw/articles/test.md","reasoning":"r","pr_summary":"s","contradictions":"None",
|
||||||
|
"pages":[{"path":"wiki/../etc/passwd","summary":"x","maturity":"draft","status":"created"}] }
|
||||||
|
EOF
|
||||||
|
export KG_LIB_DIR="$LIB_DIR" FORGEJO_URL=http://x FORGEJO_USER=u FORGEJO_TOKEN=t DRY_RUN=1
|
||||||
|
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
|
||||||
|
[ "$status" -ne 0 ]
|
||||||
|
[[ "$output" == *'"status":"error"'* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "run-ingest: honours INGEST_BASE for the PR base" {
|
||||||
|
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||||
|
G="$(make_fixture_genome)"; cd "$G"
|
||||||
|
cat > wiki/sources/test-source.md <<'EOF'
|
||||||
|
---
|
||||||
|
title: "Test Source"
|
||||||
|
type: source
|
||||||
|
domain: genome-test
|
||||||
|
tags: [t]
|
||||||
|
maturity: draft
|
||||||
|
last_updated: 2026-06-04
|
||||||
|
private: false
|
||||||
|
---
|
||||||
|
body
|
||||||
|
EOF
|
||||||
|
cat > .ingest-manifest.json <<'EOF'
|
||||||
|
{ "raw_source":"raw/articles/test.md","reasoning":"r","pr_summary":"s","contradictions":"None",
|
||||||
|
"pages":[{"path":"wiki/sources/test-source.md","summary":"s","maturity":"draft","status":"created"}] }
|
||||||
|
EOF
|
||||||
|
export KG_LIB_DIR="$LIB_DIR" FORGEJO_URL=http://x FORGEJO_USER=u FORGEJO_TOKEN=t DRY_RUN=1
|
||||||
|
export INGEST_BASE="develop"
|
||||||
|
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
[[ "$output" == *"develop"* ]]
|
||||||
|
}
|
||||||
88
tests/scripts.bats
Normal file
88
tests/scripts.bats
Normal file
|
|
@ -0,0 +1,88 @@
|
||||||
|
#!/usr/bin/env bats
|
||||||
|
# tests/scripts.bats — unit tests for the deterministic skill scripts.
|
||||||
|
load helpers
|
||||||
|
|
||||||
|
@test "slug: path with extension and spaces" {
|
||||||
|
run bash "$SKILL_SCRIPTS/slug.sh" "raw/articles/My Test Source.md"
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
[ "$output" = "my-test-source" ]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "slug: punctuation and repeats collapse to single hyphens" {
|
||||||
|
run bash "$SKILL_SCRIPTS/slug.sh" "Qualche Concetto!! Strano"
|
||||||
|
[ "$output" = "qualche-concetto-strano" ]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "log-append: appends a well-formed INGEST entry with a run_id" {
|
||||||
|
G="$(make_fixture_genome)"; cd "$G"
|
||||||
|
run bash "$SKILL_SCRIPTS/log-append.sh" --type INGEST --subject foo --model m \
|
||||||
|
--context "[[raw/x]]" --output "[[sources/foo]]" --reasoning "why"
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
grep -q "INGEST | foo" wiki/log.md
|
||||||
|
grep -q '^- run_id: `' wiki/log.md
|
||||||
|
grep -q '^- model: `m`' wiki/log.md
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "log-append: rejects an invalid TYPE" {
|
||||||
|
G="$(make_fixture_genome)"; cd "$G"
|
||||||
|
run bash "$SKILL_SCRIPTS/log-append.sh" --type BOGUS --subject foo
|
||||||
|
[ "$status" -ne 0 ]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "index-append: inserts under the right section and keeps it sorted" {
|
||||||
|
G="$(make_fixture_genome)"; cd "$G"
|
||||||
|
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/zzz]] — z. `maturity: draft`'
|
||||||
|
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/aaa]] — a. `maturity: draft`'
|
||||||
|
a=$(grep -n 'sources/aaa' wiki/index.md | cut -d: -f1)
|
||||||
|
z=$(grep -n 'sources/zzz' wiki/index.md | cut -d: -f1)
|
||||||
|
[ -n "$a" ] && [ -n "$z" ]
|
||||||
|
[ "$a" -lt "$z" ]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "index-append: bumps frontmatter last_updated to today" {
|
||||||
|
G="$(make_fixture_genome)"; cd "$G"
|
||||||
|
python3 "$SKILL_SCRIPTS/index-append.py" --section Concepts --entry '- [[concepts/x]] — x. `maturity: draft`'
|
||||||
|
grep -q "^last_updated: $(date +%F)$" wiki/index.md
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "index-append: is idempotent for the same entry" {
|
||||||
|
G="$(make_fixture_genome)"; cd "$G"
|
||||||
|
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/dup]] — d. `maturity: draft`'
|
||||||
|
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/dup]] — d. `maturity: draft`'
|
||||||
|
[ "$(grep -c 'sources/dup' wiki/index.md)" -eq 1 ]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "index-append: updates an existing entry by wikilink path (no duplicate)" {
|
||||||
|
G="$(make_fixture_genome)"; cd "$G"
|
||||||
|
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/foo]] — old summary. `maturity: draft`'
|
||||||
|
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/foo]] — new summary. `maturity: stable`'
|
||||||
|
[ "$(grep -c 'sources/foo' wiki/index.md)" -eq 1 ]
|
||||||
|
grep -q 'new summary' wiki/index.md
|
||||||
|
! grep -q 'old summary' wiki/index.md
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "slug: refuses an all-symbols input (no empty slug)" {
|
||||||
|
run bash "$SKILL_SCRIPTS/slug.sh" "!!!.md"
|
||||||
|
[ "$status" -ne 0 ]
|
||||||
|
[ -z "$output" ] || [[ "$output" != *"feat/ai-ingest-"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "index-append: self-heals a frontmatter missing last_updated" {
|
||||||
|
G="$(make_fixture_genome)"; cd "$G"
|
||||||
|
cat > wiki/index.md <<'EOF'
|
||||||
|
---
|
||||||
|
title: "Index"
|
||||||
|
type: index
|
||||||
|
domain: genome-test
|
||||||
|
maturity: stable
|
||||||
|
private: false
|
||||||
|
---
|
||||||
|
|
||||||
|
# Index
|
||||||
|
|
||||||
|
## Sources (`wiki/sources/`)
|
||||||
|
*x*
|
||||||
|
EOF
|
||||||
|
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/foo]] — s. `maturity: draft`'
|
||||||
|
grep -q "^last_updated: $(date +%F)$" wiki/index.md
|
||||||
|
}
|
||||||
40
tests/structure.bats
Normal file
40
tests/structure.bats
Normal file
|
|
@ -0,0 +1,40 @@
|
||||||
|
#!/usr/bin/env bats
|
||||||
|
# tests/structure.bats — canonical-structure verify/sync.
|
||||||
|
load helpers
|
||||||
|
|
||||||
|
setup() {
|
||||||
|
source "$LIB_DIR/output.sh"
|
||||||
|
source "$LIB_DIR/structure.sh"
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "structure_report: a full fixture has no drift" {
|
||||||
|
G="$(make_fixture_genome)"
|
||||||
|
run structure_report "$G"
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "structure_report: flags a missing canonical dir" {
|
||||||
|
G="$(make_fixture_genome)"
|
||||||
|
rm -rf "$G/wiki/private"
|
||||||
|
run structure_report "$G"
|
||||||
|
[ "$status" -ne 0 ]
|
||||||
|
[[ "$output" == *"wiki/private"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "structure_report: notes an extra dir but does not fail on it" {
|
||||||
|
G="$(make_fixture_genome)"
|
||||||
|
mkdir -p "$G/wiki/experiments"
|
||||||
|
run structure_report "$G"
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
[[ "$output" == *"experiments"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "structure_sync: creates missing dirs and is idempotent" {
|
||||||
|
G="$(make_fixture_genome)"
|
||||||
|
rm -rf "$G/wiki/private" "$G/raw/transcripts"
|
||||||
|
structure_sync "$G"
|
||||||
|
[ -d "$G/wiki/private" ] && [ -d "$G/raw/transcripts" ]
|
||||||
|
run structure_report "$G"
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
structure_sync "$G" # second run: nothing to do
|
||||||
|
}
|
||||||
Loading…
Add table
Reference in a new issue