Merge branch 'release/1.1.1' into main
This commit is contained in:
commit
bcba29fc76
28 changed files with 1900 additions and 216 deletions
40
Makefile
40
Makefile
|
|
@ -1,22 +1,25 @@
|
|||
# =============================================================================
|
||||
# Knowledge Genome - Makefile v. 1.0.0
|
||||
# Knowledge Genome - Makefile v. 1.1.1
|
||||
# Orchestrates the setup and management of the knowledge base.
|
||||
# =============================================================================
|
||||
|
||||
include globals.env
|
||||
export $(shell grep -v '^[#[:space:]]' globals.env | sed 's/=.*//')
|
||||
|
||||
.PHONY: setup add-genome status lint lock doctor sync help
|
||||
.PHONY: setup add-genome status lint lock doctor sync test verify-structure sync-structure help
|
||||
|
||||
help:
|
||||
@echo "Available commands:"
|
||||
@echo " make setup - Full system initialization"
|
||||
@echo " make add-genome - Register and scaffold a new genome"
|
||||
@echo " make status - Check submodule and encryption status"
|
||||
@echo " make lint - Verify schema, privacy flags, and metadata"
|
||||
@echo " make lock - Lock all encrypted files across all genomes"
|
||||
@echo " make doctor - Verify all required tools are installed"
|
||||
@echo " make sync - Sync submodules and report unpushed commits"
|
||||
@echo " make setup - Full system initialization"
|
||||
@echo " make add-genome - Register and scaffold a new genome [LINKED=owner/repo]"
|
||||
@echo " make status - Check submodule and encryption status"
|
||||
@echo " make lint - Verify schema, privacy flags, and metadata"
|
||||
@echo " make verify-structure - Report directory drift across all genomes"
|
||||
@echo " make sync-structure - Create any missing canonical dirs (safe)"
|
||||
@echo " make test - Run the bats test suite (no LLM/GPU needed)"
|
||||
@echo " make lock - Lock all encrypted files across all genomes"
|
||||
@echo " make doctor - Verify all required tools are installed"
|
||||
@echo " make sync - Sync submodules and report unpushed commits"
|
||||
|
||||
lint:
|
||||
@bash scripts/lint-genomes.sh
|
||||
|
|
@ -27,16 +30,26 @@ setup:
|
|||
add-genome:
|
||||
@if [ -z "$(NAME)" ] || [ -z "$(DESC)" ]; then \
|
||||
echo "Error: NAME and DESC are required."; \
|
||||
echo "Usage: make add-genome NAME=my-genome DESC='My description'"; \
|
||||
echo "Usage: make add-genome NAME=my-genome DESC='My description' [LINKED=owner/project-repo]"; \
|
||||
exit 1; \
|
||||
fi
|
||||
@bash scripts/add-genome.sh "$(NAME)" "$(DESC)"
|
||||
@bash scripts/add-genome.sh "$(NAME)" "$(DESC)" "$(LINKED)"
|
||||
|
||||
status:
|
||||
@echo "--- Master Status ---"
|
||||
@git submodule status
|
||||
@echo "--- Encryption Status (First 10 files) ---"
|
||||
@git-crypt status | head -n 10
|
||||
@echo "--- Encryption Status (per genome) ---"
|
||||
@git submodule foreach 'git-crypt status 2>/dev/null | head -n 10 || true'
|
||||
|
||||
verify-structure:
|
||||
@bash scripts/verify-genomes.sh
|
||||
|
||||
sync-structure:
|
||||
@bash scripts/verify-genomes.sh --sync
|
||||
|
||||
test:
|
||||
@command -v bats >/dev/null 2>&1 || { echo " MISSING: bats (sudo apt install bats)"; exit 1; }
|
||||
@bats tests/
|
||||
|
||||
doctor:
|
||||
@echo "Checking required tools..."
|
||||
|
|
@ -45,6 +58,7 @@ doctor:
|
|||
@command -v curl >/dev/null 2>&1 || { echo " MISSING: curl"; exit 1; }
|
||||
@command -v jq >/dev/null 2>&1 || { echo " MISSING: jq"; exit 1; }
|
||||
@command -v bw >/dev/null 2>&1 || echo " OPTIONAL: bw (Bitwarden CLI) not found — key injection will be manual."
|
||||
@command -v python3 >/dev/null 2>&1 || echo " OPTIONAL: python3 not found — needed for 'make test' and the ingest skill (index-append.py), not for setup."
|
||||
@echo "System ready."
|
||||
|
||||
sync:
|
||||
|
|
|
|||
449
README.md
449
README.md
|
|
@ -19,16 +19,17 @@ and a human-in-the-loop Git Flow for quality control.
|
|||
5. [Configuration](#configuration)
|
||||
6. [Quick Start](#quick-start)
|
||||
7. [Makefile Reference](#makefile-reference)
|
||||
8. [Genome Lifecycle](#genome-lifecycle)
|
||||
9. [Security Model](#security-model)
|
||||
10. [Key Management](#key-management)
|
||||
11. [Agent Sessions](#agent-sessions)
|
||||
12. [Workflows](#workflows)
|
||||
13. [Knowledge Quality](#knowledge-quality)
|
||||
14. [Knowledge Schema](#knowledge-schema)
|
||||
15. [Collaboration Model](#collaboration-model)
|
||||
16. [Optional Extensions](#optional-extensions)
|
||||
17. [Troubleshooting](#troubleshooting)
|
||||
8. [Testing](#testing)
|
||||
9. [Genome Lifecycle](#genome-lifecycle)
|
||||
10. [Security Model](#security-model)
|
||||
11. [Key Management](#key-management)
|
||||
12. [Agent Sessions](#agent-sessions)
|
||||
13. [Workflows](#workflows)
|
||||
14. [Knowledge Quality](#knowledge-quality)
|
||||
15. [Knowledge Schema](#knowledge-schema)
|
||||
16. [Collaboration Model](#collaboration-model)
|
||||
17. [Optional Extensions](#optional-extensions)
|
||||
18. [Troubleshooting](#troubleshooting)
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -49,6 +50,7 @@ evolving synthesis. Knowledge is compiled once and kept current.
|
|||
Contradictions have been flagged. The synthesis already reflects everything ingested.
|
||||
|
||||
This means:
|
||||
|
||||
- No vector database.
|
||||
- No embedding pipeline.
|
||||
- No external retrieval infrastructure.
|
||||
|
|
@ -103,16 +105,24 @@ genome-{name}/
|
|||
|
||||
### Three layers
|
||||
|
||||
| Layer | Path | Owner | Rule |
|
||||
|-------|------|-------|------|
|
||||
| Raw sources | `raw/` | Human | Immutable. LLM reads only. Never modified. |
|
||||
| Wiki | `wiki/` | LLM | Agent creates, updates, cross-links, maintains. |
|
||||
| Schema | `AGENTS.md` | Human + LLM | Co-evolved contract defining structure and workflows. |
|
||||
| Layer | Path | Owner | Rule |
|
||||
| ----------- | ----------- | ----------- | ----------------------------------------------------- |
|
||||
| Raw sources | `raw/` | Human | Immutable. LLM reads only. Never modified. |
|
||||
| Wiki | `wiki/` | LLM | Agent creates, updates, cross-links, maintains. |
|
||||
| Schema | `AGENTS.md` | Human + LLM | Co-evolved contract defining structure and workflows. |
|
||||
|
||||
### Linked projects (optional)
|
||||
|
||||
A genome can optionally declare a **linked project repository** — a separate repo where
|
||||
the knowledge in that genome is meant to be applied (e.g. `genome-dev` linked to an app
|
||||
repo). The link is recorded as a third field in the registry and rendered into the
|
||||
genome's `AGENTS.md` (`## Linked Project`). A genome with no link is _knowledge-only_ and
|
||||
behaves exactly as before. See [Configuration](#configuration).
|
||||
|
||||
### Framework structure
|
||||
|
||||
```text
|
||||
knowledge-genome-setup/ ← This repository (setup tooling)
|
||||
knowledge-genome-orchestrator/ ← This repository (setup tooling)
|
||||
├── globals.env ← Static KEY=VALUE config (Make-includable)
|
||||
├── registry.sh ← Bash-only: GENOMES array + dynamic paths
|
||||
├── Makefile ← Entry point for all operations
|
||||
|
|
@ -120,6 +130,7 @@ knowledge-genome-setup/ ← This repository (setup tooling)
|
|||
│ ├── output.sh ← Terminal helpers (colors, log levels)
|
||||
│ ├── deps.sh ← Dependency validation
|
||||
│ ├── scaffold.sh ← Template rendering engine
|
||||
│ ├── structure.sh ← Canonical genome layout (single source of truth)
|
||||
│ ├── lint.sh ← Per-file validation functions
|
||||
│ └── git-crypt.sh ← git-crypt lifecycle (init, export, verify, rotate)
|
||||
├── providers/
|
||||
|
|
@ -130,18 +141,41 @@ knowledge-genome-setup/ ← This repository (setup tooling)
|
|||
│ ├── setup-master.sh ← Master repo initialisation
|
||||
│ ├── setup-genomes.sh ← Genome provisioning loop
|
||||
│ ├── add-genome.sh ← Add a single new genome
|
||||
│ └── lint-genomes.sh ← Quality control across all genomes
|
||||
└── templates/
|
||||
├── agents-genome.md ← Per-genome agent contract template
|
||||
├── agents-master.md ← Master coordination schema template
|
||||
├── wiki-index.md ← Index template (rendered per genome)
|
||||
├── wiki-log.md ← Log template (rendered per genome)
|
||||
├── pr-description.md ← PR review checklist template
|
||||
├── pre-commit.sh ← Security hook template
|
||||
├── gitattributes ← Git encryption rules template
|
||||
└── gitignore ← Git ignore template
|
||||
│ ├── lint-genomes.sh ← Quality control across all genomes
|
||||
│ └── verify-genomes.sh ← Structure verify / --sync across all genomes
|
||||
├── templates/
|
||||
│ ├── agents-genome.md ← Per-genome agent contract template
|
||||
│ ├── agents-master.md ← Master coordination schema template
|
||||
│ ├── readme-master.md ← Master repo README template
|
||||
│ ├── wiki-index.md ← Index template (rendered per genome)
|
||||
│ ├── wiki-log.md ← Log template (rendered per genome)
|
||||
│ ├── pr-description.md ← PR review checklist template
|
||||
│ ├── pre-commit.sh ← Security hook template
|
||||
│ ├── gitattributes ← Git encryption rules template
|
||||
│ └── gitignore ← Git ignore template
|
||||
├── skills/
|
||||
│ └── ingest/ ← pi skill: deployed to the AI node (vm101)
|
||||
│ ├── SKILL.md ← Semantic-only contract (read/edit, emits manifest)
|
||||
│ ├── references/ ← On-demand reference docs for the agent
|
||||
│ └── scripts/ ← Deterministic post-processor (runs outside the agent)
|
||||
│ ├── run-ingest.sh ← Orchestrator: consumes the manifest, emits one JSON line
|
||||
│ ├── slug.sh ← Slug normalisation
|
||||
│ ├── index-append.py ← Sorted insert into wiki/index.md + last_updated bump
|
||||
│ ├── log-append.sh ← Append a wiki/log.md entry
|
||||
│ ├── scoped-lint.sh ← Lint only the pages touched this run (reuses lib/lint.sh)
|
||||
│ └── open-pr.sh ← Branch / commit / push / open PR (DRY_RUN seam for tests)
|
||||
└── tests/ ← bats suite — deterministic, no LLM/GPU (see Testing)
|
||||
├── helpers.bash
|
||||
├── scripts.bats
|
||||
├── lint.bats
|
||||
├── structure.bats
|
||||
└── run-ingest.bats
|
||||
```
|
||||
|
||||
> The `skills/ingest/` directory is version-controlled here but **deployed** to the AI
|
||||
> node (vm101) under `~/.pi/agent/skills/ingest`. The agent (`pi`) does only semantic work
|
||||
> and writes a manifest; `run-ingest.sh` does the mechanical steps. See [Workflows → Ingest](#ingest).
|
||||
|
||||
---
|
||||
|
||||
## System Requirements
|
||||
|
|
@ -154,7 +188,10 @@ All tools (git-crypt, bw, qmd) have native Linux binaries.
|
|||
### macOS — full support
|
||||
|
||||
All scripts are compatible with macOS. Requirements:
|
||||
- bash 3.2+ (macOS default) — fully supported. All `bash 4+` constructs removed.
|
||||
|
||||
- bash 3.2+ (macOS default) — supported for the **setup scripts** (`make` targets, scaffolding).
|
||||
The `ingest` skill uses bash 4+ constructs (`mapfile`), but it is deployed and run on the
|
||||
Linux AI node, not on the macOS setup machine — so this is not a constraint in practice.
|
||||
- GNU coreutils not required — BSD variants of `date`, `grep`, `sed` all handled.
|
||||
- `git-crypt`: install via Homebrew — `brew install git-crypt`
|
||||
- `jq`, `curl`: pre-installed or via Homebrew
|
||||
|
|
@ -166,6 +203,7 @@ If you use Homebrew bash (`brew install bash`), the scripts work identically to
|
|||
**Git Bash and native Windows are not supported.**
|
||||
|
||||
Reasons:
|
||||
|
||||
- `git-crypt` has no native Windows binary.
|
||||
- Process substitution `<(...)` used for runtime key injection is not available
|
||||
in Git Bash or PowerShell.
|
||||
|
|
@ -179,37 +217,42 @@ All setup and runtime operations work identically to native Linux inside WSL2.
|
|||
|
||||
The system is designed for a homelab architecture:
|
||||
|
||||
| Component | Recommended | Role |
|
||||
|-----------|-------------|------|
|
||||
| Storage node | Any Linux server with NFS | Hosts Forgejo, stores genome repos |
|
||||
| AI compute node | GPU server (16GB+ VRAM) | Runs local LLM agent sessions |
|
||||
| VRAM | 16GB minimum | 14B model at Q5_K_M ≈ 10GB weights; ~6GB for KV cache |
|
||||
| Local LLM | 14B–32B quantised | Active wiki maintenance sessions |
|
||||
| Large LLM | 70B (async) | Deep reflection, complex synthesis (scheduled, not interactive) |
|
||||
| Component | Recommended | Role |
|
||||
| --------------- | ------------------------- | --------------------------------------------------------------- |
|
||||
| Storage node | Any Linux server with NFS | Hosts Forgejo, stores genome repos |
|
||||
| AI compute node | GPU server (16GB+ VRAM) | Runs local LLM agent sessions |
|
||||
| VRAM | 16GB minimum | 14B model at Q5_K_M ≈ 10GB weights; ~6GB for KV cache |
|
||||
| Local LLM | 14B–32B quantised | Active wiki maintenance sessions |
|
||||
| Large LLM | 70B (async) | Deep reflection, complex synthesis (scheduled, not interactive) |
|
||||
|
||||
> **On VRAM constraints:** with a 16GB card and a 14B model, the KV cache budget
|
||||
> is ~6GB — approximately 32k tokens of effective context. Every token in `AGENTS.md`,
|
||||
> the index, and the log tail is a cost. This is why all agent files are token-optimised
|
||||
> and sessions are kept to one source at a time.
|
||||
|
||||
> **Reference deployment:** the table above is a target profile, not a hard requirement.
|
||||
> The current setup runs a single 16GB GPU (RTX 5060 Ti) with a ~9B model for interactive
|
||||
> ingest, and offloads heavy/async synthesis to a cloud model. Smaller models work — they
|
||||
> just make the "one source per session" discipline and the token budget matter more.
|
||||
|
||||
---
|
||||
|
||||
## Prerequisites
|
||||
|
||||
### Required
|
||||
|
||||
| Tool | Purpose |
|
||||
|------|---------|
|
||||
| `git` | Version control |
|
||||
| `git-crypt` | Transparent file encryption |
|
||||
| `curl` | REST API calls to Forgejo/GitHub |
|
||||
| `jq` | JSON parsing |
|
||||
| Tool | Purpose |
|
||||
| ----------- | -------------------------------- |
|
||||
| `git` | Version control |
|
||||
| `git-crypt` | Transparent file encryption |
|
||||
| `curl` | REST API calls to Forgejo/GitHub |
|
||||
| `jq` | JSON parsing |
|
||||
|
||||
### Optional
|
||||
|
||||
| Tool | Purpose |
|
||||
|------|---------|
|
||||
| `bw` | Bitwarden CLI — runtime key injection from Vaultwarden (no key on disk) |
|
||||
| Tool | Purpose |
|
||||
| ----- | ----------------------------------------------------------------------- |
|
||||
| `bw` | Bitwarden CLI — runtime key injection from Vaultwarden (no key on disk) |
|
||||
| `qmd` | Local BM25 + vector search for Markdown files with MCP server interface |
|
||||
|
||||
> **`bw` vs `bws`:** Use `bw` (standard Bitwarden CLI). `bws` is the Bitwarden
|
||||
|
|
@ -282,14 +325,17 @@ resolution. Never included by Make.
|
|||
|
||||
```bash
|
||||
# Dynamic paths (resolved at source time)
|
||||
WORK_DIR="${HOME}/knowledge-genome-setup"
|
||||
WORK_DIR="${HOME}/knowledge-genome-orchestrator"
|
||||
KEYS_DIR="${WORK_DIR}/keys"
|
||||
|
||||
# Genome registry — format: "name|description"
|
||||
# Genome registry — format: "name|description|linked_repo"
|
||||
# The third field is OPTIONAL:
|
||||
# - leave it empty → knowledge-only genome (no linked project)
|
||||
# - owner/repo → genome is linked to that project repository (rendered into AGENTS.md)
|
||||
GENOMES=(
|
||||
"genome-dev|Web development, TUI, Angular, software architecture"
|
||||
"genome-finance|Personal finance, investments, market analysis"
|
||||
"genome-homelab|Infrastructure, network configs, architecture logs"
|
||||
"genome-dev|Web development, TUI, Angular, software architecture|myorg/my-app"
|
||||
"genome-finance|Personal finance, investments, market analysis|"
|
||||
"genome-homelab|Infrastructure, network configs, architecture logs|"
|
||||
)
|
||||
```
|
||||
|
||||
|
|
@ -312,8 +358,8 @@ export GITHUB_TOKEN="your_github_token"
|
|||
|
||||
```bash
|
||||
# 1. Clone the setup framework
|
||||
git clone <setup-repo-url> knowledge-genome-setup
|
||||
cd knowledge-genome-setup
|
||||
git clone <setup-repo-url> knowledge-genome-orchestrator
|
||||
cd knowledge-genome-orchestrator
|
||||
|
||||
# 2. Configure your environment
|
||||
cp globals.env.example globals.env # edit with your values
|
||||
|
|
@ -347,6 +393,7 @@ make setup
|
|||
- Commits submodule pointer in master repo
|
||||
|
||||
After setup completes:
|
||||
|
||||
- Upload all files in `keys/` to Vaultwarden (see Key Management)
|
||||
- Delete key files from disk: `rm keys/*.key`
|
||||
|
||||
|
|
@ -354,16 +401,19 @@ After setup completes:
|
|||
|
||||
## Makefile Reference
|
||||
|
||||
| Target | Description |
|
||||
|--------|-------------|
|
||||
| `make setup` | Full system initialisation — master repo + all genomes in `registry.sh` |
|
||||
| `make add-genome NAME=x DESC="y"` | Scaffold and register a single new genome |
|
||||
| `make lint` | Run quality checks across all genomes (schema, privacy, decay, page size) |
|
||||
| `make status` | Show submodule status and first 10 git-crypt encryption states |
|
||||
| `make lock` | Lock all encrypted repos (master + all genome submodules) |
|
||||
| `make doctor` | Verify required tools: git, git-crypt, curl, jq; warn if bw missing |
|
||||
| `make sync` | `git submodule update --init --recursive` + report unpushed commits per genome |
|
||||
| `make help` | Print all available targets |
|
||||
| Target | Description |
|
||||
| ----------------------------------------------------- | ------------------------------------------------------------------------------------- |
|
||||
| `make setup` | Full system initialisation — master repo + all genomes in `registry.sh` |
|
||||
| `make add-genome NAME=x DESC="y" [LINKED=owner/repo]` | Scaffold and register a single new genome (optional linked project) |
|
||||
| `make lint` | Run quality checks across all genomes (schema, privacy, decay, page size) |
|
||||
| `make verify-structure` | Report directory drift of each genome vs the canonical layout (`lib/structure.sh`) |
|
||||
| `make sync-structure` | Create any missing canonical directories across all genomes (safe, idempotent) |
|
||||
| `make test` | Run the bats test suite (deterministic; no LLM/GPU/network) — see [Testing](#testing) |
|
||||
| `make status` | Show submodule status and per-genome git-crypt encryption state |
|
||||
| `make lock` | Lock all encrypted repos (master + all genome submodules) |
|
||||
| `make doctor` | Verify required tools: git, git-crypt, curl, jq; warn if bw missing |
|
||||
| `make sync` | `git submodule update --init --recursive` + report unpushed commits per genome |
|
||||
| `make help` | Print all available targets |
|
||||
|
||||
### Examples
|
||||
|
||||
|
|
@ -374,6 +424,12 @@ make doctor
|
|||
# Add a new genome after initial setup
|
||||
make add-genome NAME=genome-research DESC="Academic papers and deep research"
|
||||
|
||||
# Add a genome linked to a project repository
|
||||
make add-genome NAME=genome-dev DESC="Web development" LINKED=myorg/my-app
|
||||
|
||||
# Check every genome against the canonical directory layout
|
||||
make verify-structure
|
||||
|
||||
# Run full lint pass (bash deterministic checks)
|
||||
make lint
|
||||
|
||||
|
|
@ -386,6 +442,38 @@ make lock
|
|||
|
||||
---
|
||||
|
||||
## Testing
|
||||
|
||||
The mechanical layer (slug, index, log, lint, structure, the ingest orchestrator) is
|
||||
covered by a [bats](https://github.com/bats-core/bats-core) suite. The tests are
|
||||
**deterministic and have zero dependency on the LLM, the GPU, or the network** — they
|
||||
simulate the agent's output with fixtures and exercise the scripts directly, so they run
|
||||
anywhere git + bash live (laptop, CI, a git hook). They are **not** meant to run on the AI
|
||||
node or via n8n.
|
||||
|
||||
```bash
|
||||
sudo apt install bats # once
|
||||
make test # or: bats tests/
|
||||
```
|
||||
|
||||
| File | Covers |
|
||||
| ----------------- | ------------------------------------------------------------------------------ |
|
||||
| `scripts.bats` | `slug.sh`, `log-append.sh`, `index-append.py` (insert, sort, bump, idempotent) |
|
||||
| `lint.bats` | `lib/lint.sh` validators + `scoped-lint.sh` |
|
||||
| `structure.bats` | `lib/structure.sh` report / sync |
|
||||
| `run-ingest.bats` | `run-ingest.sh` end-to-end (DRY_RUN, local bare remote) — needs `jq` |
|
||||
|
||||
Each test builds its own throwaway genome with a local bare remote, configured to ignore
|
||||
the operator's global git settings (signing, global hooks) so the suite is hermetic. The
|
||||
`run-ingest` tests auto-`skip` if `jq` is absent. If you change the canonical layout in
|
||||
`lib/structure.sh`, update `FIXTURE_DIRS` in `tests/helpers.bash` to match.
|
||||
|
||||
> Why this matters: the only non-deterministic part of the system is the model. Pinning
|
||||
> the mechanical layer with tests means that when an ingest misbehaves, you know it's the
|
||||
> model or the prompt — not the plumbing.
|
||||
|
||||
---
|
||||
|
||||
## Genome Lifecycle
|
||||
|
||||
### Initial setup
|
||||
|
|
@ -407,6 +495,7 @@ After adding: upload the new key to Vaultwarden and delete the key file.
|
|||
### Removing a genome
|
||||
|
||||
Manual process:
|
||||
|
||||
```bash
|
||||
# In master repo
|
||||
git submodule deinit genome-name
|
||||
|
|
@ -421,16 +510,17 @@ git push
|
|||
When a genome is scaffolded, `render_template` replaces these placeholders in all
|
||||
template files:
|
||||
|
||||
| Placeholder | Source | Example |
|
||||
|-------------|--------|---------|
|
||||
| `{{GENOME_NAME}}` | registry.sh | `genome-dev` |
|
||||
| `{{GENOME_NAME_UPPER}}` | derived | `GENOME-DEV` |
|
||||
| `{{GENOME_DESC}}` | registry.sh | `Web development...` |
|
||||
| `{{FORGEJO_URL}}` | globals.env | `https://git.yourserver.com` |
|
||||
| `{{FORGEJO_USER}}` | globals.env | `yourusername` |
|
||||
| `{{VAULTWARDEN_URL}}` | globals.env | `https://vault.yourserver.com` |
|
||||
| `{{MASTER_REPO}}` | globals.env | `master-knowledge-genome` |
|
||||
| `{{DATE}}` | runtime | `2026-05-11` |
|
||||
| Placeholder | Source | Example |
|
||||
| ----------------------- | ----------- | ------------------------------ |
|
||||
| `{{GENOME_NAME}}` | registry.sh | `genome-dev` |
|
||||
| `{{GENOME_NAME_UPPER}}` | derived | `GENOME-DEV` |
|
||||
| `{{GENOME_DESC}}` | registry.sh | `Web development...` |
|
||||
| `{{LINKED_PROJECT}}` | registry.sh | `myorg/my-app` (or `none`) |
|
||||
| `{{FORGEJO_URL}}` | globals.env | `https://git.yourserver.com` |
|
||||
| `{{FORGEJO_USER}}` | globals.env | `yourusername` |
|
||||
| `{{VAULTWARDEN_URL}}` | globals.env | `https://vault.yourserver.com` |
|
||||
| `{{MASTER_REPO}}` | globals.env | `master-knowledge-genome` |
|
||||
| `{{DATE}}` | runtime | `2026-05-11` |
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -441,9 +531,9 @@ template files:
|
|||
Each genome uses a unique symmetric AES-256-CTR key managed by git-crypt.
|
||||
Two directories in every genome are always encrypted:
|
||||
|
||||
| Directory | Contents | On remote |
|
||||
|-----------|----------|-----------|
|
||||
| `raw/private/` | Sensitive source material | Opaque binary blob |
|
||||
| Directory | Contents | On remote |
|
||||
| --------------- | --------------------------- | ------------------ |
|
||||
| `raw/private/` | Sensitive source material | Opaque binary blob |
|
||||
| `wiki/private/` | Private synthesis and notes | Opaque binary blob |
|
||||
|
||||
All other directories (`raw/articles/`, `wiki/sources/`, etc.) are plaintext.
|
||||
|
|
@ -490,6 +580,17 @@ This means: any file matching `**/private/**` in `.gitattributes` is protected,
|
|||
including future `private/` directories created anywhere in the repo.
|
||||
The hook never needs updating when the encryption rules change.
|
||||
|
||||
### Untrusted agent output — manifest validation
|
||||
|
||||
The ingest agent's output is stochastic: a hallucinated manifest could carry a missing field,
|
||||
a wrong type, or a malicious path such as `wiki/../../etc/passwd`. `run-ingest.sh` therefore
|
||||
**validates the manifest before trusting any field** — it must be well-formed JSON with a
|
||||
string `raw_source` and an array `pages`, and **every `path` must be a string under `wiki/`
|
||||
with no `..`**. Anything else fails fast with a structured `{"status":"error"}` and no
|
||||
filesystem access outside the wiki, so a bad path can't drive a read or a lint outside the
|
||||
knowledge tree. This is the trust boundary between the (stochastic) model and the
|
||||
(deterministic, tested) post-processor.
|
||||
|
||||
### PRIVATE_CONTEXT toggle
|
||||
|
||||
The `PRIVATE_CONTEXT` toggle in `AGENTS.md` controls whether the LLM agent
|
||||
|
|
@ -502,6 +603,7 @@ PRIVATE_CONTEXT: enabled ← Agent may read/write private/. Requires git-cryp
|
|||
```
|
||||
|
||||
Rules:
|
||||
|
||||
- Never inferred. Never carried over from a previous session.
|
||||
- `enabled` requires the operator to confirm that `git-crypt unlock` has run on the host.
|
||||
- Per-genome, per-session: enabling for `genome-finance` does NOT enable for `genome-dev`.
|
||||
|
|
@ -530,6 +632,7 @@ The key flows: Vaultwarden → `bw get notes` → `base64 -d` → kernel pipe
|
|||
At no point is the key written to any file on disk.
|
||||
|
||||
Lock a genome when the session ends:
|
||||
|
||||
```bash
|
||||
git-crypt lock
|
||||
```
|
||||
|
|
@ -544,11 +647,11 @@ git-crypt lock
|
|||
|
||||
Each genome key is stored as a base64-encoded Secure Note in Vaultwarden:
|
||||
|
||||
| Genome | Vaultwarden Note Name |
|
||||
|--------|----------------------|
|
||||
| `genome-dev` | `genome-dev key` |
|
||||
| `genome-finance` | `genome-finance key` |
|
||||
| `genome-homelab` | `genome-homelab key` |
|
||||
| Genome | Vaultwarden Note Name |
|
||||
| ---------------- | --------------------- |
|
||||
| `genome-dev` | `genome-dev key` |
|
||||
| `genome-finance` | `genome-finance key` |
|
||||
| `genome-homelab` | `genome-homelab key` |
|
||||
|
||||
After `make setup` or `make add-genome`, key files are exported to `keys/`.
|
||||
Upload procedure:
|
||||
|
|
@ -586,13 +689,14 @@ git clone https://git.yourserver.com/yourusername/genome-dev.git
|
|||
If a key is lost or compromised:
|
||||
|
||||
```bash
|
||||
# From the knowledge-genome-setup/ directory
|
||||
# From the knowledge-genome-orchestrator/ directory
|
||||
source lib/git-crypt.sh
|
||||
cd ~/knowledge-genome-setup/genome-dev
|
||||
cd ~/knowledge-genome-orchestrator/genome-dev
|
||||
gcrypt_rotate_key "genome-dev"
|
||||
```
|
||||
|
||||
`gcrypt_rotate_key` performs:
|
||||
|
||||
1. Unlocks repo with existing key
|
||||
2. Removes old key material
|
||||
3. Generates new symmetric key via `git-crypt init`
|
||||
|
|
@ -603,13 +707,16 @@ gcrypt_rotate_key "genome-dev"
|
|||
> **Limitation:** git history still contains blobs encrypted with the old key.
|
||||
> Anyone with the old key and git history access can decrypt them. To purge old
|
||||
> encrypted blobs from history:
|
||||
>
|
||||
> ```bash
|
||||
> git filter-repo --invert-paths --path raw/private --path wiki/private
|
||||
> git push --force origin main
|
||||
> ```
|
||||
>
|
||||
> This rewrites all commit hashes — coordinate with any collaborators first.
|
||||
|
||||
After rotation:
|
||||
|
||||
- Upload new key to Vaultwarden (replace existing note)
|
||||
- Delete both `keys/genome-dev.key` and `keys/genome-dev-rotated-*.key` from disk
|
||||
- Revoke access from previous key holders
|
||||
|
|
@ -621,6 +728,7 @@ After rotation:
|
|||
### Prerequisites for every session
|
||||
|
||||
Before starting an LLM agent session on a genome:
|
||||
|
||||
1. The host (AI server) runs `git-crypt unlock` for the required genomes
|
||||
2. The orchestrator prepares context: `tail -n 20 wiki/log.md`
|
||||
3. Declare `PRIVATE_CONTEXT` state explicitly in the opening prompt
|
||||
|
|
@ -631,7 +739,8 @@ The agent executes in this order at the start of every session:
|
|||
|
||||
1. Read `wiki/index.md` — primary catalog of all pages and maturity
|
||||
2. Read last 20 log entries (injected by orchestrator — does NOT open `wiki/log.md` directly)
|
||||
3. For tasks involving related pages: `qmd search "<query>"` before opening any files
|
||||
3. For tasks involving related pages: if the optional `qmd` extension is installed,
|
||||
`qmd search "<query>"` before opening files; otherwise navigate from `wiki/index.md`
|
||||
4. Operate on individual files — never scan entire directories
|
||||
|
||||
### One source per session
|
||||
|
|
@ -651,12 +760,13 @@ sequentially — not one session with 5 files.
|
|||
### n8n automation
|
||||
|
||||
For Forgejo webhook → automated ingest:
|
||||
|
||||
1. Forgejo sends webhook on push to `raw/`
|
||||
2. n8n receives webhook, identifies new files
|
||||
3. n8n starts one agent session per new file (sequential, not parallel)
|
||||
4. Each session: inject `tail -n 20 wiki/log.md` + `PRIVATE_CONTEXT` state + source path
|
||||
5. Agent ingest workflow runs, opens PR
|
||||
6. Human reviews and merges PR
|
||||
4. Each session: realign the checkout to the base (`git switch <base> && git reset --hard origin/<base>`), then inject `tail -n 20 wiki/log.md` + `PRIVATE_CONTEXT` state + source path
|
||||
5. Phase 1 agent (`/skill:ingest`) writes the manifest; Phase 2 `run-ingest.sh` opens the PR, then **stops**
|
||||
6. Human reviews — **merge to accept**, or close the PR + delete the `feat` branch to reject
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -664,27 +774,76 @@ For Forgejo webhook → automated ingest:
|
|||
|
||||
### Ingest
|
||||
|
||||
Triggered by a new file in `raw/` (manual or via webhook).
|
||||
Triggered by a new file in `raw/` (manual or via webhook). Ingest is split into two
|
||||
phases so that the small local model spends its limited context only on judgement, and
|
||||
all the deterministic bookkeeping happens outside the model's loop.
|
||||
|
||||
1. Read source once
|
||||
2. Create `wiki/sources/<slug>.md` — summary and key points
|
||||
3. Per entity (person, tool, organisation): create or update `wiki/entities/<name>.md`
|
||||
4. Per concept (pattern, theory, decision): create or update `wiki/concepts/<name>.md`
|
||||
5. Check each touched page for contradictions → apply Conflict Resolution if found
|
||||
6. Append entry to `wiki/index.md` (bottom of relevant section — do not reorder)
|
||||
7. Append log entry: `INGEST | <slug>`
|
||||
8. Run scoped lint on pages created or modified in this session; report in PR
|
||||
9. Commit on `feat/ai-ingest-<slug>`; open PR using `templates/pr-description.md`
|
||||
**Phase 1 — agent (semantic only).** The `ingest` skill gives the agent read/edit tools
|
||||
only (no shell). It:
|
||||
|
||||
1. Reads the source once
|
||||
2. Creates `wiki/sources/<slug>.md` — summary and key points
|
||||
3. Per entity (person, tool, organisation): creates or updates `wiki/entities/<name>.md`
|
||||
4. Per concept (pattern, theory, decision): creates or updates `wiki/concepts/<name>.md`
|
||||
5. Checks each touched page for contradictions → applies Conflict Resolution if found
|
||||
6. Writes `.ingest-manifest.json` (the list of pages it created/modified, the model name,
|
||||
a one-line reasoning, the PR summary, and any contradictions) — then **stops**
|
||||
|
||||
**Phase 2 — `run-ingest.sh` (deterministic, outside the agent).** The post-processor first
|
||||
**validates the manifest** — well-formed JSON, expected shape, and every page path confined to
|
||||
`wiki/` with no `..` (see [Security Model](#security-model)) — then does the mechanical work the
|
||||
model must not waste context on:
|
||||
|
||||
7. Inserts each page into the correct `wiki/index.md` section **in alphabetical order**,
|
||||
deduplicated by wikilink (a re-ingest updates the entry, never duplicates it), and bumps the
|
||||
index `last_updated` (`index-append.py`)
|
||||
8. Appends the `INGEST | <slug>` entry to `wiki/log.md` (the model name comes from the
|
||||
orchestrator via `INGEST_MODEL` — the agent cannot reliably know its own tag)
|
||||
9. Runs scoped lint on exactly the pages touched this run (`scoped-lint.sh`, reusing
|
||||
`lib/lint.sh`)
|
||||
10. Commits **only `wiki/`** on `feat/ai-ingest-<slug>` and opens a PR against the integration
|
||||
base (`INGEST_BASE`, default `main`); the body matches the `templates/pr-description.md`
|
||||
structure (Summary / Pages / Contradictions / Scoped Lint)
|
||||
11. Emits a single compact JSON line (status, slug, PR url, lint_clean, conflict) for n8n
|
||||
|
||||
The agent never runs git, never edits the index/log mechanically, and never lints — those
|
||||
are deterministic and tested (see [Testing](#testing)). Invocation on the AI node:
|
||||
|
||||
```bash
|
||||
pi --mode json -p "/skill:ingest raw/articles/<file>.md" # phase 1 → writes manifest
|
||||
run-ingest.sh <genome> # phase 2 → index/log/lint/PR
|
||||
```
|
||||
|
||||
For private sources (`PRIVATE_CONTEXT: enabled` required):
|
||||
|
||||
- All output goes to `wiki/private/<slug>.md` only
|
||||
- PR title: `[PRIVATE] ingest: <slug>`
|
||||
|
||||
**Branch lifecycle & the manual gate.** `run-ingest.sh` / `open-pr.sh` are deliberately
|
||||
"dumb": they create the `feat/ai-ingest-<slug>` branch, commit only `wiki/`, open the PR, and
|
||||
stop. They never reset, revert, or touch the integration branch — that lifecycle belongs to
|
||||
the orchestrator, around the human gate:
|
||||
|
||||
- **Before each session** the orchestrator realigns the checkout to the base
|
||||
(`git fetch && git switch <base> && git reset --hard origin/<base>`) — a reset of the _local_
|
||||
checkout to match the remote, never a force-push to the shared branch.
|
||||
- **After the PR opens, everything stops** until a human approves: one source per session,
|
||||
sequential, no new ingest until the pending PR is closed.
|
||||
- **Approve = merge. Reject = close the PR and delete the remote `feat` branch.** To undo an
|
||||
already-merged ingest, open a _revert PR_ against the base — never rewrite history on a
|
||||
shared branch.
|
||||
|
||||
The PR base is configurable via `INGEST_BASE` (default `main`). Per-page `maturity` already
|
||||
encodes stability and tags/releases mark versioned snapshots, so `main` is the integration
|
||||
branch today. If a linked project later _consumes_ a genome, set `INGEST_BASE=develop` to
|
||||
buffer ingests on `develop` and cut manual `develop → main` releases — no code change.
|
||||
|
||||
### Query
|
||||
|
||||
Triggered by an operator question.
|
||||
|
||||
1. `qmd search "<query>"` → identify candidate pages
|
||||
1. `qmd search "<query>"` (if the optional qmd extension is installed) → identify
|
||||
candidate pages; otherwise start from `wiki/index.md`
|
||||
2. Read candidate pages directly (qmd already returns file paths — no intermediate index lookup)
|
||||
3. Synthesise answer with `[[wikilink]]` citations
|
||||
4. If answer is non-trivial: save as `wiki/queries/<slug>.md` and append to index
|
||||
|
|
@ -697,11 +856,13 @@ For general orientation without a specific query: read `wiki/index.md` directly.
|
|||
The lint workflow is split between deterministic bash checks and semantic LLM judgment.
|
||||
|
||||
**Step 1 — operator runs bash linter:**
|
||||
|
||||
```bash
|
||||
make lint
|
||||
```
|
||||
|
||||
The bash linter checks automatically:
|
||||
|
||||
- YAML frontmatter validity (all mandatory fields present)
|
||||
- Domain consistency (domain field matches genome name)
|
||||
- Type validity (value from allowed list)
|
||||
|
|
@ -713,6 +874,7 @@ The bash linter checks automatically:
|
|||
**Step 2 — operator provides bash output to LLM agent:**
|
||||
|
||||
The agent applies semantic judgment to findings the bash linter cannot make:
|
||||
|
||||
- **Orphan pages** (from bash list): for each orphan, identify 1-3 existing pages
|
||||
that should link to it; propose specific additions
|
||||
- **Implicit concepts** (from bash term frequency list): determine if a candidate
|
||||
|
|
@ -735,22 +897,28 @@ The PR description uses `templates/pr-description.md`:
|
|||
|
||||
```markdown
|
||||
## Summary
|
||||
|
||||
One sentence: goal of this session and source processed.
|
||||
|
||||
## Pages Created
|
||||
|
||||
| Path | Type | Maturity |
|
||||
|
||||
## Pages Modified
|
||||
|
||||
| Path | Change |
|
||||
|
||||
## Contradictions Found
|
||||
[ ] None / [ ] n conflict file(s) created
|
||||
|
||||
[ ] None / [ ] n conflict file(s) created
|
||||
|
||||
## Private Data Accessed
|
||||
[ ] No (PRIVATE_CONTEXT: disabled) / [ ] Yes
|
||||
|
||||
[ ] No (PRIVATE_CONTEXT: disabled) / [ ] Yes
|
||||
|
||||
## Scoped Lint (post-ingest)
|
||||
[ ] Frontmatter valid [ ] No broken links [ ] No issues found
|
||||
|
||||
[ ] Frontmatter valid [ ] No broken links [ ] No issues found
|
||||
```
|
||||
|
||||
This makes human review fast and structured: read the table, scan the diff,
|
||||
|
|
@ -776,10 +944,10 @@ The operator resolves the conflict, updates relevant pages, closes the PR.
|
|||
|
||||
Pages have a `last_updated` field in frontmatter. During lint passes:
|
||||
|
||||
| Maturity | Threshold | Action |
|
||||
|----------|-----------|--------|
|
||||
| `stable` | 180 days | Flag as stale — add `⚠️ STALE` callout |
|
||||
| `draft` | 90 days | Flag as stale — add `⚠️ STALE` callout |
|
||||
| Maturity | Threshold | Action |
|
||||
| -------- | --------- | -------------------------------------- |
|
||||
| `stable` | 180 days | Flag as stale — add `⚠️ STALE` callout |
|
||||
| `draft` | 90 days | Flag as stale — add `⚠️ STALE` callout |
|
||||
|
||||
The agent proposes re-validation but does not change `maturity` without new source evidence.
|
||||
|
||||
|
|
@ -816,47 +984,47 @@ private: true | false
|
|||
---
|
||||
```
|
||||
|
||||
| Field | Rules |
|
||||
|-------|-------|
|
||||
| `type` | Must be one of: `source entity concept query conflict private index log` |
|
||||
| `maturity: draft` | Single source or unvalidated |
|
||||
| `maturity: stable` | Confirmed by 2+ independent sources |
|
||||
| `maturity: deprecated` | Superseded — add `> **DEPRECATED:** <reason>` callout at top |
|
||||
| `private: true` | Required on all pages in `wiki/private/` and `raw/private/` |
|
||||
| Field | Rules |
|
||||
| ---------------------- | ------------------------------------------------------------------------ |
|
||||
| `type` | Must be one of: `source entity concept query conflict private index log` |
|
||||
| `maturity: draft` | Single source or unvalidated |
|
||||
| `maturity: stable` | Confirmed by 2+ independent sources |
|
||||
| `maturity: deprecated` | Superseded — add `> **DEPRECATED:** <reason>` callout at top |
|
||||
| `private: true` | Required on all pages in `wiki/private/` and `raw/private/` |
|
||||
|
||||
Do not use semantic versioning for content. Git history tracks every change.
|
||||
`maturity` captures epistemic state; `last_updated` tracks recency.
|
||||
|
||||
### Page types and directories
|
||||
|
||||
| Type | Directory | Description |
|
||||
|------|-----------|-------------|
|
||||
| `source` | `wiki/sources/` | One page per processed raw source |
|
||||
| `entity` | `wiki/entities/` | People, tools, organisations, projects |
|
||||
| `concept` | `wiki/concepts/` | Patterns, theories, architectural decisions |
|
||||
| `query` | `wiki/queries/` | Preserved answers and analyses |
|
||||
| `conflict` | `wiki/queries/conflict-*.md` | Unresolved contradictions |
|
||||
| `private` | `wiki/private/` | Private synthesis (PRIVATE_CONTEXT: enabled) |
|
||||
| `index` | `wiki/index.md` | Primary navigation catalog (singleton) |
|
||||
| `log` | `wiki/log.md` | Operations ledger (singleton) |
|
||||
| Type | Directory | Description |
|
||||
| ---------- | ---------------------------- | -------------------------------------------- |
|
||||
| `source` | `wiki/sources/` | One page per processed raw source |
|
||||
| `entity` | `wiki/entities/` | People, tools, organisations, projects |
|
||||
| `concept` | `wiki/concepts/` | Patterns, theories, architectural decisions |
|
||||
| `query` | `wiki/queries/` | Preserved answers and analyses |
|
||||
| `conflict` | `wiki/queries/conflict-*.md` | Unresolved contradictions |
|
||||
| `private` | `wiki/private/` | Private synthesis (PRIVATE_CONTEXT: enabled) |
|
||||
| `index` | `wiki/index.md` | Primary navigation catalog (singleton) |
|
||||
| `log` | `wiki/log.md` | Operations ledger (singleton) |
|
||||
|
||||
### Page size limits
|
||||
|
||||
| Limit | Lines | Action |
|
||||
|-------|-------|--------|
|
||||
| Soft cap | 400 | Bash linter warns |
|
||||
| Hard cap | 800 | Bash linter errors — split the page |
|
||||
| Limit | Lines | Action |
|
||||
| -------- | ----- | ----------------------------------- |
|
||||
| Soft cap | 400 | Bash linter warns |
|
||||
| Hard cap | 800 | Bash linter errors — split the page |
|
||||
|
||||
These limits ensure pages fit within the LLM context window without attention degradation
|
||||
and keep the wiki atomically navigable.
|
||||
|
||||
### Linking conventions
|
||||
|
||||
| Type | Format |
|
||||
|------|--------|
|
||||
| Type | Format |
|
||||
| ---------------------- | ------------------------------------------- |
|
||||
| Internal (same genome) | `[[folder/slug]]` — Obsidian wikilinks only |
|
||||
| Cross-genome | `[[../genome-target/wiki/folder/slug]]` |
|
||||
| External | `[text](https://url)` — standard Markdown |
|
||||
| Cross-genome | `[[../genome-target/wiki/folder/slug]]` |
|
||||
| External | `[text](https://url)` — standard Markdown |
|
||||
|
||||
Never use `[text](relative/path)` for internal references. Obsidian wikilinks are
|
||||
bidirectional and appear in the graph view.
|
||||
|
|
@ -878,6 +1046,7 @@ Every operation appends one entry to `wiki/log.md`:
|
|||
Valid TYPEs: `INGEST` `LINT` `QUERY` `CONFLICT` `CONFIG` `SECURITY`
|
||||
|
||||
Parse examples:
|
||||
|
||||
```bash
|
||||
grep "^## \[" wiki/log.md | tail -5 # Last 5 entries
|
||||
grep "^## \[" wiki/log.md | grep "CONFLICT" # All conflicts
|
||||
|
|
@ -891,12 +1060,12 @@ The LLM never loads the full log.
|
|||
|
||||
## Collaboration Model
|
||||
|
||||
| Role | Key access | Permitted operations |
|
||||
|------|-----------|----------------------|
|
||||
| Owner | Full — key holder | Read/write everywhere |
|
||||
| Collaborator | None | Push to `raw/articles/`, `raw/transcripts/`, `raw/code-packs/`, `raw/assets/` |
|
||||
| Local AI agent | Conditional | `private/` only when `PRIVATE_CONTEXT: enabled` |
|
||||
| Cloud AI model | Never | `PRIVATE_CONTEXT` must be `disabled`; private data stays on local network |
|
||||
| Role | Key access | Permitted operations |
|
||||
| -------------- | ----------------- | ----------------------------------------------------------------------------- |
|
||||
| Owner | Full — key holder | Read/write everywhere |
|
||||
| Collaborator | None | Push to `raw/articles/`, `raw/transcripts/`, `raw/code-packs/`, `raw/assets/` |
|
||||
| Local AI agent | Conditional | `private/` only when `PRIVATE_CONTEXT: enabled` |
|
||||
| Cloud AI model | Never | `PRIVATE_CONTEXT` must be `disabled`; private data stays on local network |
|
||||
|
||||
Grant collaborator access: add as Forgejo contributor with Write role.
|
||||
Never share the git-crypt key — collaborators operate exclusively in public directories.
|
||||
|
|
@ -930,6 +1099,7 @@ qmd serve --port 3333
|
|||
Obsidian is the recommended wiki browser. Open any genome directory as an Obsidian vault.
|
||||
|
||||
Recommended setup:
|
||||
|
||||
- **Graph view** — visualise page connections; spot orphans and hubs instantly
|
||||
- **Obsidian Web Clipper** — browser extension to clip articles directly to `raw/articles/`
|
||||
as Markdown
|
||||
|
|
@ -949,7 +1119,8 @@ n8n (running on the storage node) can automate the ingest pipeline:
|
|||
2. n8n flow identifies new files
|
||||
3. For each new file: starts one agent session (sequential — never parallel)
|
||||
4. Each session receives: `tail -n 20 wiki/log.md` + `PRIVATE_CONTEXT` state + source path
|
||||
5. Agent runs ingest workflow and opens PR
|
||||
5. Phase 1 — agent runs `/skill:ingest` (semantic → writes manifest); Phase 2 —
|
||||
`run-ingest.sh` does index/log/lint and opens the PR, returning one JSON line to n8n
|
||||
6. Human reviews the PR
|
||||
|
||||
Key constraint: one source per session, sessions sequential.
|
||||
|
|
@ -959,11 +1130,13 @@ Never batch multiple sources into one agent session.
|
|||
|
||||
If the AI compute node has an Intel NPU (e.g. Core Ultra series):
|
||||
|
||||
- Background tasks (embedding updates, index refresh) → Intel NPU via OpenVINO
|
||||
- Background/auxiliary tasks (OCR of `raw/assets/`, async summarisation, or qmd
|
||||
re-indexing **if** the optional qmd extension is in use) → Intel NPU via OpenVINO
|
||||
- Active reasoning sessions (ingest, query, synthesis) → GPU
|
||||
|
||||
This keeps the GPU's KV cache free for interactive work and reduces power consumption
|
||||
for background operations.
|
||||
Note: the core system has no embedding pipeline (see [Core Philosophy](#core-philosophy)),
|
||||
so there is nothing to embed here — the NPU is only for auxiliary work. This keeps the
|
||||
GPU's KV cache free for interactive sessions and lowers power draw for background jobs.
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -991,6 +1164,7 @@ sudo apt install git git-crypt curl jq
|
|||
The staged file is in a path matching `**/private/**` but is not encrypted.
|
||||
|
||||
Fix options:
|
||||
|
||||
1. Verify `.gitattributes` contains `**/private/** filter=git-crypt diff=git-crypt -text`
|
||||
2. Run `git-crypt init` if git-crypt is not initialised in this repo
|
||||
3. Run `git-crypt status` to check the encryption state of all files
|
||||
|
|
@ -1011,6 +1185,7 @@ git commit -m "fix: re-stage private files for encryption"
|
|||
### Agent returns stale or missing cross-references
|
||||
|
||||
Likely causes:
|
||||
|
||||
1. Session was too long — KV cache degraded. Use one source per session.
|
||||
2. `wiki/index.md` was not read at session start — agent lacked the page catalog.
|
||||
3. qmd index is stale — re-index: `qmd index <genome>/wiki/`
|
||||
|
|
|
|||
130
diagnose-run-ingest.sh
Normal file
130
diagnose-run-ingest.sh
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
#!/usr/bin/env bash
|
||||
# diagnose-run-ingest.sh
|
||||
# Run from the repo root: bash diagnose-run-ingest.sh
|
||||
# Builds the same fixture the bats test uses and runs run-ingest under `bash -x`
|
||||
# so we can see exactly which command makes it exit non-zero.
|
||||
|
||||
set -uo pipefail
|
||||
|
||||
REPO="$(pwd)"
|
||||
RI="${REPO}/skills/ingest/scripts/run-ingest.sh"
|
||||
|
||||
echo "==================== ENV ===================="
|
||||
echo "bash: $(bash --version | head -1)"
|
||||
echo "git : $(git --version)"
|
||||
echo "jq : $(jq --version 2>/dev/null || echo MISSING)"
|
||||
echo "py : $(python3 --version 2>/dev/null || echo MISSING)"
|
||||
echo
|
||||
|
||||
echo "============ run-ingest.sh on disk ============"
|
||||
if [[ ! -f "$RI" ]]; then echo "NOT FOUND: $RI (run me from the repo root)"; exit 1; fi
|
||||
echo "-- helper invocations (want 'bash ...'): --"
|
||||
grep -nE 'log-append\.sh|scoped-lint\.sh|open-pr\.sh' "$RI"
|
||||
echo "-- result emitter (want 'jq -nc'): --"
|
||||
grep -nE 'jq -nc?|jq -n ' "$RI"
|
||||
echo
|
||||
|
||||
echo "============ build hermetic fixture ============"
|
||||
T="$(mktemp -d)"
|
||||
mkdir -p "$T/nohooks"
|
||||
git init --bare -q "$T/origin.git"
|
||||
g="$T/g"
|
||||
mkdir -p "$g"/{raw/articles,wiki/sources,wiki/entities,wiki/concepts,wiki/queries,wiki/private}
|
||||
|
||||
cat > "$g/wiki/index.md" <<'EOF'
|
||||
---
|
||||
title: "Index"
|
||||
type: index
|
||||
domain: genome-test
|
||||
maturity: stable
|
||||
last_updated: 2026-01-01
|
||||
private: false
|
||||
---
|
||||
|
||||
# Index
|
||||
|
||||
---
|
||||
|
||||
## Sources (`wiki/sources/`)
|
||||
*x*
|
||||
|
||||
|
||||
## Entities (`wiki/entities/`)
|
||||
*x*
|
||||
|
||||
|
||||
## Concepts (`wiki/concepts/`)
|
||||
*x*
|
||||
|
||||
|
||||
## Queries (`wiki/queries/`)
|
||||
*x*
|
||||
|
||||
|
||||
## Conflicts Pending Review (`wiki/queries/conflict-*.md`)
|
||||
*x*
|
||||
EOF
|
||||
|
||||
cat > "$g/wiki/log.md" <<'EOF'
|
||||
---
|
||||
title: "Log"
|
||||
type: log
|
||||
domain: genome-test
|
||||
maturity: stable
|
||||
last_updated: 2026-01-01
|
||||
private: false
|
||||
---
|
||||
|
||||
# Log
|
||||
|
||||
---
|
||||
|
||||
## [2026-01-01] CONFIG | init
|
||||
- run_id: `init`
|
||||
EOF
|
||||
|
||||
echo raw > "$g/raw/articles/test.md"
|
||||
|
||||
(
|
||||
cd "$g"
|
||||
git init -q
|
||||
git config commit.gpgsign false
|
||||
git config core.hooksPath "$T/nohooks"
|
||||
git config user.email t@t
|
||||
git config user.name t
|
||||
git add .
|
||||
git commit -qm init
|
||||
git branch -M main
|
||||
git remote add origin "$T/origin.git"
|
||||
git push -q -u origin main
|
||||
) && echo "fixture commit+push OK" || echo "FIXTURE SETUP FAILED (look above)"
|
||||
|
||||
cat > "$g/wiki/sources/test-source.md" <<'EOF'
|
||||
---
|
||||
title: "Test Source"
|
||||
type: source
|
||||
domain: genome-test
|
||||
tags: [t]
|
||||
maturity: draft
|
||||
last_updated: 2026-06-04
|
||||
private: false
|
||||
---
|
||||
body
|
||||
EOF
|
||||
|
||||
cat > "$g/.ingest-manifest.json" <<'EOF'
|
||||
{ "raw_source":"raw/articles/test.md","model":"m","reasoning":"r","pr_summary":"s","contradictions":"None",
|
||||
"pages":[{"path":"wiki/sources/test-source.md","summary":"a source","maturity":"draft","status":"created"}] }
|
||||
EOF
|
||||
|
||||
echo
|
||||
echo "============ run-ingest (bash -x) ============"
|
||||
cd "$g"
|
||||
export KG_LIB_DIR="${REPO}/lib" FORGEJO_URL=http://x FORGEJO_USER=u FORGEJO_TOKEN=t DRY_RUN=1
|
||||
bash -x "$RI" genome-test >"$T/out.txt" 2>"$T/trace.txt"
|
||||
rc=$?
|
||||
echo "EXIT=$rc"
|
||||
echo "-- run-ingest stdout (final JSON should be here): --"
|
||||
cat "$T/out.txt"
|
||||
echo "-- last 25 lines of the trace (the failing command is near the end): --"
|
||||
tail -n 25 "$T/trace.txt"
|
||||
|
|
@ -4,6 +4,9 @@
|
|||
# Directory structure creation and template rendering engine.
|
||||
# =============================================================================
|
||||
|
||||
# Canonical directory layout lives in one place (lib/structure.sh).
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/structure.sh"
|
||||
|
||||
render_template() {
|
||||
local template_file="$1"
|
||||
local output_file="$2"
|
||||
|
|
@ -13,17 +16,21 @@ render_template() {
|
|||
local content
|
||||
content=$(<"$template_file")
|
||||
|
||||
# Defaults (:-) so master-repo templates render even when GENOME_* are unset
|
||||
# (scaffold_master runs before any genome; set -u would otherwise abort here).
|
||||
local genome_name_upper
|
||||
genome_name_upper=$(tr '[:lower:]' '[:upper:]' <<< "${GENOME_NAME}")
|
||||
genome_name_upper=$(tr '[:lower:]' '[:upper:]' <<< "${GENOME_NAME:-}")
|
||||
|
||||
# Placeholder replacement
|
||||
content="${content//\{\{GENOME_NAME\}\}/${GENOME_NAME}}"
|
||||
content="${content//\{\{GENOME_NAME\}\}/${GENOME_NAME:-}}"
|
||||
content="${content//\{\{GENOME_NAME_UPPER\}\}/${genome_name_upper}}"
|
||||
content="${content//\{\{GENOME_DESC\}\}/${GENOME_DESC}}"
|
||||
content="${content//\{\{FORGEJO_URL\}\}/${FORGEJO_URL}}"
|
||||
content="${content//\{\{FORGEJO_USER\}\}/${FORGEJO_USER}}"
|
||||
content="${content//\{\{VAULTWARDEN_URL\}\}/${VAULTWARDEN_URL}}"
|
||||
content="${content//\{\{MASTER_REPO\}\}/${MASTER_REPO}}"
|
||||
content="${content//\{\{GENOME_DESC\}\}/${GENOME_DESC:-}}"
|
||||
content="${content//\{\{FORGEJO_URL\}\}/${FORGEJO_URL:-}}"
|
||||
content="${content//\{\{FORGEJO_USER\}\}/${FORGEJO_USER:-}}"
|
||||
content="${content//\{\{VAULTWARDEN_URL\}\}/${VAULTWARDEN_URL:-}}"
|
||||
content="${content//\{\{MASTER_REPO\}\}/${MASTER_REPO:-}}"
|
||||
# linked project reference (optional) — empty registry field renders as 'none'
|
||||
content="${content//\{\{LINKED_PROJECT\}\}/${GENOME_LINKED:-none}}"
|
||||
content="${content//\{\{DATE\}\}/$(date +%Y-%m-%d)}"
|
||||
|
||||
mkdir -p "$(dirname "$output_file")"
|
||||
|
|
@ -32,13 +39,9 @@ render_template() {
|
|||
|
||||
scaffold_genome() {
|
||||
local base="$1"
|
||||
local dirs=(
|
||||
"raw/articles" "raw/transcripts" "raw/code-packs" "raw/assets" "raw/private"
|
||||
"wiki/sources" "wiki/entities" "wiki/concepts" "wiki/queries" "wiki/private"
|
||||
)
|
||||
|
||||
info "Building directory structure in ${base}..."
|
||||
for dir in "${dirs[@]}"; do
|
||||
for dir in "${GENOME_DIRS[@]}"; do
|
||||
mkdir -p "${base}/${dir}"
|
||||
touch "${base}/${dir}/.gitkeep"
|
||||
done
|
||||
|
|
|
|||
70
lib/structure.sh
Normal file
70
lib/structure.sh
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# lib/structure.sh
|
||||
# Single source of truth for the canonical genome directory layout, plus the
|
||||
# verify/sync helpers used by scripts/verify-genomes.sh.
|
||||
#
|
||||
# IMPORTANT: this is the ONE place the structure is defined. scaffold.sh sources
|
||||
# this file and builds new genomes from GENOME_DIRS, so scaffolding and the
|
||||
# structure check can never drift apart.
|
||||
# =============================================================================
|
||||
|
||||
# Canonical directories every genome must have.
|
||||
# raw/* are input buckets (collaborator-writable); wiki/* is the agent-owned,
|
||||
# contract-bound layout the lint, the index sections and the ingest skill depend on.
|
||||
GENOME_DIRS=(
|
||||
"raw/articles" "raw/transcripts" "raw/code-packs" "raw/assets" "raw/private"
|
||||
"wiki/sources" "wiki/entities" "wiki/concepts" "wiki/queries" "wiki/private"
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# structure_report <base>
|
||||
# Reports drift of <base> against GENOME_DIRS.
|
||||
# - missing canonical dir → counted as drift (returns non-zero)
|
||||
# - extra dir under raw/ or wiki/ → warning only (does not fail)
|
||||
# Returns the number of MISSING canonical directories.
|
||||
# ---------------------------------------------------------------------------
|
||||
structure_report() {
|
||||
local base="$1"
|
||||
local missing=0
|
||||
|
||||
for d in "${GENOME_DIRS[@]}"; do
|
||||
if [[ ! -d "${base}/${d}" ]]; then
|
||||
warn "missing: ${d}"
|
||||
missing=$((missing + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
# Extra directories (drift the other way) — informational only.
|
||||
local canon=" ${GENOME_DIRS[*]} "
|
||||
while IFS= read -r d; do
|
||||
d="${d#"${base}/"}"
|
||||
[[ "$canon" == *" ${d} "* ]] && continue
|
||||
info "extra (not in canon): ${d}"
|
||||
done < <(find "${base}/raw" "${base}/wiki" -mindepth 1 -type d 2>/dev/null)
|
||||
|
||||
return $missing
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# structure_sync <base>
|
||||
# Creates any MISSING canonical directories (idempotent). Never deletes —
|
||||
# retiring a bucket is a deliberate, contract-aware change to GENOME_DIRS +
|
||||
# the templates, not an automatic prune.
|
||||
# ---------------------------------------------------------------------------
|
||||
structure_sync() {
|
||||
local base="$1"
|
||||
local added=0
|
||||
|
||||
for d in "${GENOME_DIRS[@]}"; do
|
||||
if [[ ! -d "${base}/${d}" ]]; then
|
||||
mkdir -p "${base}/${d}"
|
||||
touch "${base}/${d}/.gitkeep"
|
||||
success "created: ${d}"
|
||||
added=$((added + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
[[ $added -eq 0 ]] && info "already in sync: ${base}"
|
||||
return 0
|
||||
}
|
||||
12
registry.sh
12
registry.sh
|
|
@ -19,9 +19,13 @@ LIB_DIR="${PROJECT_ROOT}/lib"
|
|||
PROVIDERS_DIR="${PROJECT_ROOT}/providers"
|
||||
|
||||
# --- GENOME REGISTRY ---
|
||||
# Format: "name|description"
|
||||
# Format: "name|description|linked_repo"
|
||||
# - linked_repo is OPTIONAL. Leave empty (trailing pipe) for knowledge-only genomes.
|
||||
# - It is an opaque reference rendered verbatim into the genome's AGENTS.md
|
||||
# (phase-2 project work is parked, so the framework does not act on it yet).
|
||||
# - Example with a project: "genome-homelab|Keru infrastructure...|keru/homelab-infra"
|
||||
GENOMES=(
|
||||
"genome-dev|Web development, TUI, Angular, software architecture"
|
||||
"genome-finance|Personal finance, investments, market analysis"
|
||||
"genome-homelab|Keru infrastructure, network configs, architecture logs"
|
||||
"genome-dev|Web development, TUI, Angular, software architecture|"
|
||||
"genome-finance|Personal finance, investments, market analysis|"
|
||||
"genome-homelab|Keru infrastructure, network configs, architecture logs|"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -11,16 +11,18 @@ source "registry.sh"
|
|||
|
||||
GENOME_NAME="${1:-}"
|
||||
GENOME_DESC="${2:-}"
|
||||
GENOME_LINKED="${3:-}" # optional: linked project repo reference
|
||||
|
||||
if [[ -z "$GENOME_NAME" || -z "$GENOME_DESC" ]]; then
|
||||
error "Missing arguments."
|
||||
echo "Usage: $0 <genome-name> <description>"
|
||||
echo "Usage: $0 <genome-name> <description> [linked-repo]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
step "Adding New Genome: ${GENOME_NAME}"
|
||||
|
||||
GENOMES=("${GENOME_NAME}|${GENOME_DESC}")
|
||||
# Build a 3-field registry entry (linked_repo may be empty)
|
||||
GENOMES=("${GENOME_NAME}|${GENOME_DESC}|${GENOME_LINKED}")
|
||||
|
||||
source "scripts/setup-genomes.sh"
|
||||
|
||||
|
|
|
|||
|
|
@ -19,8 +19,9 @@ source "providers/${PROVIDER}.sh"
|
|||
step "Processing Genome Registry"
|
||||
|
||||
for entry in "${GENOMES[@]}"; do
|
||||
IFS='|' read -r GENOME_NAME GENOME_DESC <<< "$entry"
|
||||
export GENOME_NAME GENOME_DESC
|
||||
# 3-field format: name|description|linked_repo (linked_repo optional → may be empty)
|
||||
IFS='|' read -r GENOME_NAME GENOME_DESC GENOME_LINKED <<< "$entry"
|
||||
export GENOME_NAME GENOME_DESC GENOME_LINKED
|
||||
|
||||
info "Processing: ${GENOME_NAME}..."
|
||||
|
||||
|
|
|
|||
50
scripts/verify-genomes.sh
Normal file
50
scripts/verify-genomes.sh
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# scripts/verify-genomes.sh
|
||||
# Check (default) or --sync the directory structure of every registered genome
|
||||
# against the canonical layout in lib/structure.sh.
|
||||
#
|
||||
# bash scripts/verify-genomes.sh # report drift, non-zero exit on drift
|
||||
# bash scripts/verify-genomes.sh --sync # create missing dirs everywhere (safe)
|
||||
#
|
||||
# No hardware/LLM involved — pure structure check. Run anywhere.
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
source "lib/output.sh"
|
||||
source "globals.env"
|
||||
source "registry.sh"
|
||||
source "lib/structure.sh"
|
||||
|
||||
MODE="verify"
|
||||
[[ "${1:-}" == "--sync" ]] && MODE="sync"
|
||||
|
||||
step "Genome structure: ${MODE}"
|
||||
|
||||
TOTAL_MISSING=0
|
||||
for entry in "${GENOMES[@]}"; do
|
||||
IFS='|' read -r GENOME_NAME _ _ <<< "$entry" # 3-field registry; ignore desc + linked
|
||||
genome_dir="${WORK_DIR}/${MASTER_REPO}/${GENOME_NAME}"
|
||||
|
||||
if [[ ! -d "$genome_dir" ]]; then
|
||||
warn "not found locally, skipping: ${GENOME_NAME}"
|
||||
continue
|
||||
fi
|
||||
|
||||
info "Genome: ${GENOME_NAME}"
|
||||
if [[ "$MODE" == "sync" ]]; then
|
||||
structure_sync "$genome_dir"
|
||||
else
|
||||
structure_report "$genome_dir" && m=0 || m=$?
|
||||
TOTAL_MISSING=$((TOTAL_MISSING + m))
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
if [[ "$MODE" == "sync" ]]; then
|
||||
success "Structure sync complete."
|
||||
elif [[ $TOTAL_MISSING -eq 0 ]]; then
|
||||
success "Structure verified: all genomes match the canonical layout."
|
||||
else
|
||||
error "Structure drift: ${TOTAL_MISSING} missing directory(ies). Fix with: make sync-structure"
|
||||
exit 1
|
||||
fi
|
||||
93
skills/ingest/SKILL.md
Normal file
93
skills/ingest/SKILL.md
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
---
|
||||
name: ingest
|
||||
description: Semantic pass of a single raw source into the current genome's wiki — read the source, write sources/entities/concepts, handle contradictions, then emit a manifest and STOP. Use when a new file lands in raw/. Does NOT do git, log, index, lint, or PRs (a post-processor handles those), and does NOT handle private sources or project repos.
|
||||
license: see repository
|
||||
compatibility: Runs inside one genome checkout (cwd = genome root). Tools needed — read, edit only. NO bash, NO git. The deterministic steps (index, log, scoped lint, PR) run AFTER you exit, via run-ingest.sh. PRIVATE_CONTEXT must be disabled.
|
||||
allowed-tools: read edit
|
||||
metadata:
|
||||
framework: knowledge-genome
|
||||
phase: "1-ingest-semantic"
|
||||
---
|
||||
|
||||
# Ingest — semantic pass
|
||||
|
||||
You run inside ONE genome checkout. `AGENTS.md` (already in your context) is the
|
||||
authoritative contract. Your job is the **semantic pass only**: read the source, write
|
||||
the wiki pages, handle contradictions. You do **not** touch git, the log, the index, the
|
||||
linter, or PRs — a post-processor (`run-ingest.sh`) does all of that _after you stop_,
|
||||
from the manifest you leave behind. This keeps your context clean and your turns few,
|
||||
which matters on a small local model.
|
||||
|
||||
**Argument:** the relative path of the single raw source to ingest
|
||||
(e.g. `raw/articles/foo.md`). Process only this one.
|
||||
|
||||
## Pre-flight — stop the session if any check fails
|
||||
|
||||
1. Refuse if the argument path is under any `private/` directory.
|
||||
2. Refuse if `PRIVATE_CONTEXT` is not `disabled`.
|
||||
3. Confirm the file exists under `raw/`.
|
||||
|
||||
## Semantic work (your only job)
|
||||
|
||||
1. Read the source once.
|
||||
2. Write `wiki/sources/<kebab-slug>.md` — faithful summary + key points, with the required
|
||||
frontmatter (`type: source`, `domain: <genome>`, `maturity: draft`,
|
||||
`last_updated: <today>`, `private: false`, sensible `tags`).
|
||||
3. For each entity (person, tool, org) → create or update `wiki/entities/<kebab-name>.md`.
|
||||
4. For each concept (pattern, theory, decision) → create or update
|
||||
`wiki/concepts/<kebab-name>.md`.
|
||||
5. On a real contradiction with an existing claim, follow `AGENTS.md` §Conflict: create
|
||||
`wiki/queries/conflict-<concept>-<YYYY-MM-DD>.md`. Never overwrite the existing page.
|
||||
|
||||
**Naming — you are the sole author of these names; nothing renames your files.** Use
|
||||
minimal kebab-case: lowercase letters, digits and hyphens only — no spaces, no underscores,
|
||||
no capitals. Pick stable names so the same entity is never created twice (always `acme`,
|
||||
never also `acme-corp`). The path you write a file to MUST be byte-for-byte the path you
|
||||
list in the manifest.
|
||||
|
||||
**Deciding create-vs-update and spotting contradictions — mind the context budget.** Use
|
||||
`wiki/index.md` to locate existing pages, then read **only** the handful that _this source
|
||||
actually names_ — the entities and concepts in the source's title and opening paragraphs —
|
||||
not everything the index lists. When in doubt, read fewer: a missed cross-link is far
|
||||
cheaper than a saturated context. Never scan whole directories.
|
||||
|
||||
## Finish: write the manifest, then STOP
|
||||
|
||||
As your **final action**, write `.ingest-manifest.json` at the genome root
|
||||
(NOT under `wiki/`) describing exactly what you did. Then stop — do not commit, lint,
|
||||
append to the log/index, or open anything.
|
||||
|
||||
```json
|
||||
{
|
||||
"raw_source": "raw/articles/foo.md",
|
||||
"reasoning": "One sentence for the log: what changed and why.",
|
||||
"pr_summary": "One or two sentences describing this ingest for the PR.",
|
||||
"contradictions": "None (or: 1 conflict file created — <concept>)",
|
||||
"pages": [
|
||||
{
|
||||
"path": "wiki/sources/foo.md",
|
||||
"summary": "One-line index summary.",
|
||||
"maturity": "draft",
|
||||
"status": "created"
|
||||
},
|
||||
{
|
||||
"path": "wiki/entities/acme.md",
|
||||
"summary": "Acme — vendor.",
|
||||
"status": "modified"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Manifest rules:
|
||||
|
||||
- List every page you created or modified, with `status` `created` or `modified`.
|
||||
- `summary` is the one-line index description (≈12 words max). For conflict pages the
|
||||
summary is ignored — the index lists conflicts by slug only.
|
||||
- `maturity` is required only on `created` pages (it seeds the new index entry). It is
|
||||
ignored for `modified` pages, so omit it there.
|
||||
- Do NOT add a `model` field — the orchestrator records which model produced this run; you
|
||||
cannot know your own model name reliably, so do not guess one.
|
||||
- Do not invent a `run_id`, branch, commit, or PR — those belong to the post-processor.
|
||||
|
||||
One source per session. After writing the manifest, stop.
|
||||
129
skills/ingest/scripts/index-append.py
Normal file
129
skills/ingest/scripts/index-append.py
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
#!/usr/bin/env python3
|
||||
# =============================================================================
|
||||
# skills/ingest/scripts/index-append.py
|
||||
# Insert an entry line into the correct section of wiki/index.md and keep that
|
||||
# section's entries alphabetically ordered. Bumps frontmatter last_updated.
|
||||
#
|
||||
# NOTE: agents-genome.md and wiki-index.md claim the pre-commit hook sorts the
|
||||
# index. The actual pre-commit.sh only runs the plaintext-leak check — it does
|
||||
# NOT sort. This script owns the ordering instead. (If you later move sorting
|
||||
# into the hook, reduce this to a plain append.)
|
||||
#
|
||||
# index-append.py --section Sources \
|
||||
# --entry '- [[sources/foo]] — One-line summary. `maturity: draft`'
|
||||
# =============================================================================
|
||||
import argparse
|
||||
import datetime
|
||||
import re
|
||||
import sys
|
||||
|
||||
ENTRY_RE = re.compile(r"^- \[\[")
|
||||
LINK_RE = re.compile(r"^- \[\[([^\]]+)\]\]")
|
||||
HEADER_RE = re.compile(r"^## ")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--section", required=True,
|
||||
help="Section name, e.g. Sources / Entities / Concepts / Queries / Conflicts")
|
||||
ap.add_argument("--entry", required=True, help="Full index line to insert")
|
||||
ap.add_argument("--file", default="wiki/index.md")
|
||||
args = ap.parse_args()
|
||||
|
||||
try:
|
||||
with open(args.file, encoding="utf-8") as fh:
|
||||
lines = fh.read().splitlines()
|
||||
except FileNotFoundError:
|
||||
print(f"index-append: not found: {args.file}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
today = datetime.date.today().isoformat()
|
||||
|
||||
# 1. Bump last_updated inside the first frontmatter block
|
||||
fm_open = False
|
||||
fm_close_idx = None
|
||||
bumped = False
|
||||
for i, ln in enumerate(lines):
|
||||
if ln.strip() == "---":
|
||||
if not fm_open:
|
||||
fm_open = True
|
||||
continue
|
||||
fm_close_idx = i # the closing ---
|
||||
break
|
||||
if fm_open and ln.startswith("last_updated:"):
|
||||
lines[i] = f"last_updated: {today}"
|
||||
bumped = True
|
||||
|
||||
if not fm_open:
|
||||
print("index-append: warning: no frontmatter found, last_updated not bumped",
|
||||
file=sys.stderr)
|
||||
elif not bumped and fm_close_idx is not None:
|
||||
# self-heal: frontmatter present but missing the key — insert it before the close
|
||||
lines.insert(fm_close_idx, f"last_updated: {today}")
|
||||
print("index-append: last_updated key was missing — inserted", file=sys.stderr)
|
||||
|
||||
# 2. Locate the target section [start, end)
|
||||
start = None
|
||||
for i, ln in enumerate(lines):
|
||||
if HEADER_RE.match(ln) and ln[3:].startswith(args.section):
|
||||
start = i
|
||||
break
|
||||
if start is None:
|
||||
print(f"index-append: section '{args.section}' not found in {args.file}",
|
||||
file=sys.stderr)
|
||||
return 1
|
||||
|
||||
end = len(lines)
|
||||
for i in range(start + 1, len(lines)):
|
||||
if HEADER_RE.match(lines[i]):
|
||||
end = i
|
||||
break
|
||||
|
||||
# 3. Split the section body into intro (non-entry) and entries
|
||||
body = lines[start + 1:end]
|
||||
intro = [ln for ln in body if not ENTRY_RE.match(ln)]
|
||||
entries = [ln for ln in body if ENTRY_RE.match(ln)]
|
||||
|
||||
# Deduplicate by wikilink PATH, not by exact line: a re-ingest with a changed
|
||||
# summary/maturity should UPDATE the existing entry, not add a duplicate line.
|
||||
new_m = LINK_RE.match(args.entry)
|
||||
new_link = new_m.group(1) if new_m else None
|
||||
|
||||
if new_link is not None:
|
||||
replaced = False
|
||||
for idx, ln in enumerate(entries):
|
||||
m = LINK_RE.match(ln)
|
||||
if m and m.group(1) == new_link:
|
||||
if ln == args.entry:
|
||||
print("index-append: entry already present, skipping")
|
||||
return 0
|
||||
entries[idx] = args.entry # same page, refreshed text
|
||||
replaced = True
|
||||
break
|
||||
if not replaced:
|
||||
entries.append(args.entry)
|
||||
else:
|
||||
# No parseable wikilink — fall back to exact-line dedup.
|
||||
if args.entry in entries:
|
||||
print("index-append: entry already present, skipping")
|
||||
return 0
|
||||
entries.append(args.entry)
|
||||
|
||||
entries.sort(key=str.casefold)
|
||||
|
||||
# Normalise intro: drop trailing blanks, keep header + comment(s)
|
||||
while intro and intro[-1].strip() == "":
|
||||
intro.pop()
|
||||
|
||||
new_section = intro + [""] + entries + [""]
|
||||
lines = lines[:start + 1] + new_section + lines[end:]
|
||||
|
||||
with open(args.file, "w", encoding="utf-8") as fh:
|
||||
fh.write("\n".join(lines) + "\n")
|
||||
|
||||
print(f"index-append: added to {args.section}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
50
skills/ingest/scripts/log-append.sh
Normal file
50
skills/ingest/scripts/log-append.sh
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# skills/ingest/scripts/log-append.sh
|
||||
# Append one entry to the append-only ledger wiki/log.md, in the exact format
|
||||
# defined by AGENTS.md / wiki-log.md. Generates run_id. Never edits prior entries.
|
||||
#
|
||||
# log-append.sh --type INGEST --subject "<slug>" --model "<model>" \
|
||||
# --context "[[raw/x]]" --output "[[sources/x]]" \
|
||||
# --reasoning "One sentence."
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
LOG_FILE="${LOG_FILE:-wiki/log.md}"
|
||||
|
||||
type="" subject="" model="" context="" output="" reasoning=""
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--type) type="$2"; shift 2 ;;
|
||||
--subject) subject="$2"; shift 2 ;;
|
||||
--model) model="$2"; shift 2 ;;
|
||||
--context) context="$2"; shift 2 ;;
|
||||
--output) output="$2"; shift 2 ;;
|
||||
--reasoning) reasoning="$2"; shift 2 ;;
|
||||
*) echo "log-append: unknown arg: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
: "${type:?--type required}"
|
||||
: "${subject:?--subject required}"
|
||||
|
||||
case "$type" in
|
||||
INGEST|LINT|QUERY|CONFLICT|CONFIG|SECURITY) ;;
|
||||
*) echo "log-append: invalid TYPE '${type}'" >&2; exit 1 ;;
|
||||
esac
|
||||
|
||||
[[ -f "$LOG_FILE" ]] || { echo "log-append: not found: $LOG_FILE" >&2; exit 1; }
|
||||
|
||||
run_id="$(uuidgen 2>/dev/null || cat /proc/sys/kernel/random/uuid 2>/dev/null || python3 -c 'import uuid; print(uuid.uuid4())')"
|
||||
today="$(date +%Y-%m-%d)"
|
||||
|
||||
{
|
||||
printf '\n## [%s] %s | %s\n\n' "$today" "$type" "$subject"
|
||||
printf -- '- run_id: `%s`\n' "$run_id"
|
||||
printf -- '- model: `%s`\n' "${model:-unknown}"
|
||||
printf -- '- context_read: %s\n' "${context:-*(none)*}"
|
||||
printf -- '- output_written: %s\n' "${output:-*(none)*}"
|
||||
printf -- '- reasoning: %s\n' "${reasoning:-No reasoning provided.}"
|
||||
} >> "$LOG_FILE"
|
||||
|
||||
echo "run_id=${run_id}"
|
||||
118
skills/ingest/scripts/open-pr.sh
Normal file
118
skills/ingest/scripts/open-pr.sh
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# skills/ingest/scripts/open-pr.sh
|
||||
# Branch, commit (conventional), push, and open a Forgejo PR for the wiki/ changes.
|
||||
# Mirrors the API conventions of providers/forgejo.sh (token auth + http_code).
|
||||
# Runs inside the genome checkout (cwd = genome root). Never touches main.
|
||||
#
|
||||
# open-pr.sh --slug <slug> --title "feat: ingest <slug>" --body-file <path> \
|
||||
# [--base main] [--label CONFLICT]
|
||||
#
|
||||
# Requires env: FORGEJO_URL, FORGEJO_USER, FORGEJO_TOKEN.
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
: "${FORGEJO_URL:?missing FORGEJO_URL}"
|
||||
: "${FORGEJO_USER:?missing FORGEJO_USER}"
|
||||
: "${FORGEJO_TOKEN:?missing FORGEJO_TOKEN}"
|
||||
|
||||
slug="" title="" body_file="" base="main" label=""
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--slug) slug="$2"; shift 2 ;;
|
||||
--title) title="$2"; shift 2 ;;
|
||||
--body-file) body_file="$2"; shift 2 ;;
|
||||
--base) base="$2"; shift 2 ;;
|
||||
--label) label="$2"; shift 2 ;;
|
||||
*) echo "open-pr: unknown arg: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
: "${slug:?--slug required}"
|
||||
: "${title:?--title required}"
|
||||
: "${body_file:?--body-file required}"
|
||||
[[ -f "$body_file" ]] || { echo "open-pr: body file not found: $body_file" >&2; exit 1; }
|
||||
|
||||
branch="feat/ai-ingest-${slug}"
|
||||
repo="$(basename -s .git "$(git config --get remote.origin.url)")"
|
||||
|
||||
# 1. Branch + commit + push (AGENTS.md rule 5: never commit to main)
|
||||
git switch -c "$branch" 2>/dev/null || git switch "$branch"
|
||||
git add wiki/
|
||||
# Scope BOTH the emptiness check and the commit to wiki/ — never commit anything that
|
||||
# happened to be staged outside wiki/ (a stray hook, an aborted prior run, etc.).
|
||||
if git diff --cached --quiet -- wiki/; then
|
||||
echo "open-pr: nothing staged under wiki/ — aborting" >&2
|
||||
exit 1
|
||||
fi
|
||||
git commit -m "$title" -- wiki/
|
||||
git push -u origin "$branch"
|
||||
|
||||
# DRY_RUN: local git work done; skip the Forgejo API (offline tests).
|
||||
if [[ -n "${DRY_RUN:-}" ]]; then
|
||||
echo "PR opened: DRY-RUN ${branch} -> ${base}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 2. Open the PR via Forgejo API (jq builds the JSON safely)
|
||||
# TODO: Forgejo-only. When registry.sh/globals.env sets PROVIDER=github, branch on
|
||||
# $PROVIDER here and delegate to providers/github.sh (same token + http_code contract).
|
||||
body="$(cat "$body_file")"
|
||||
payload="$(jq -n --arg head "$branch" --arg base "$base" \
|
||||
--arg title "$title" --arg body "$body" \
|
||||
'{head:$head, base:$base, title:$title, body:$body}')"
|
||||
|
||||
resp="$(curl --max-time 30 -s -w '\n%{http_code}' \
|
||||
-H "Authorization: token ${FORGEJO_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-X POST "${FORGEJO_URL}/api/v1/repos/${FORGEJO_USER}/${repo}/pulls" \
|
||||
-d "$payload")"
|
||||
|
||||
# curl -w appends '\n<code>' AFTER the body, so the code is always the final line and the
|
||||
# body is everything before it. Parameter expansion (no subshells), robust to multi-line JSON.
|
||||
code="${resp##*$'\n'}"
|
||||
json="${resp%$'\n'*}"
|
||||
|
||||
case "$code" in
|
||||
201)
|
||||
url="$(printf '%s' "$json" | jq -r '.html_url')"
|
||||
number="$(printf '%s' "$json" | jq -r '.number')"
|
||||
echo "PR opened: ${url}"
|
||||
;;
|
||||
409)
|
||||
# PR already exists — fetch it so the orchestrator still gets the URL.
|
||||
existing="$(curl --max-time 15 -s -H "Authorization: token ${FORGEJO_TOKEN}" \
|
||||
"${FORGEJO_URL}/api/v1/repos/${FORGEJO_USER}/${repo}/pulls?state=open" \
|
||||
| jq -r --arg b "$branch" '.[] | select(.head.ref==$b) | .html_url' | head -n1)"
|
||||
if [[ -n "$existing" && "$existing" != "null" ]]; then
|
||||
echo "PR opened: ${existing}"
|
||||
else
|
||||
echo "open-pr: a PR for '${branch}' already exists (push updated the branch)." >&2
|
||||
fi
|
||||
exit 0
|
||||
;;
|
||||
401)
|
||||
echo "open-pr: unauthorized — check FORGEJO_TOKEN (n8n-bot)." >&2
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
echo "open-pr: Forgejo API HTTP ${code}: ${json}" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
# 3. Optional label (e.g. CONFLICT). Best-effort; non-fatal.
|
||||
if [[ -n "$label" && -n "${number:-}" ]]; then
|
||||
label_id="$(curl --max-time 15 -s -H "Authorization: token ${FORGEJO_TOKEN}" \
|
||||
"${FORGEJO_URL}/api/v1/repos/${FORGEJO_USER}/${repo}/labels" \
|
||||
| jq -r --arg n "$label" '.[] | select(.name==$n) | .id' | head -n1)"
|
||||
if [[ -n "$label_id" && "$label_id" != "null" ]]; then
|
||||
curl --max-time 15 -s -o /dev/null \
|
||||
-H "Authorization: token ${FORGEJO_TOKEN}" -H "Content-Type: application/json" \
|
||||
-X POST "${FORGEJO_URL}/api/v1/repos/${FORGEJO_USER}/${repo}/issues/${number}/labels" \
|
||||
-d "{\"labels\":[${label_id}]}" \
|
||||
&& echo "label '${label}' applied" >&2
|
||||
else
|
||||
echo "open-pr: label '${label}' not found in repo — skipped." >&2
|
||||
fi
|
||||
fi
|
||||
146
skills/ingest/scripts/run-ingest.sh
Normal file
146
skills/ingest/scripts/run-ingest.sh
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# skills/ingest/scripts/run-ingest.sh
|
||||
# Post-pi orchestrator. Runs OUTSIDE pi's loop, on vm101, in the genome checkout.
|
||||
# Consumes .ingest-manifest.json (written by the ingest skill) and performs every
|
||||
# deterministic step — index, log, scoped lint, PR — so pi's context stays clean.
|
||||
#
|
||||
# run-ingest.sh <genome_name> [manifest_path]
|
||||
#
|
||||
# Emits a single JSON result line on stdout for n8n to parse.
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
genome="${1:?usage: run-ingest.sh <genome> [manifest]}"
|
||||
manifest="${2:-.ingest-manifest.json}"
|
||||
SCRIPTS="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
fail() {
|
||||
jq -nc --arg stage "$1" --arg reason "$2" \
|
||||
'{status:"error", stage:$stage, reason:$reason}'
|
||||
exit 1
|
||||
}
|
||||
|
||||
command -v jq >/dev/null 2>&1 || { echo '{"status":"error","reason":"jq missing"}'; exit 1; }
|
||||
command -v python3 >/dev/null 2>&1 || fail "deps" "python3 missing (needed by index-append.py)"
|
||||
[[ -f "$manifest" ]] || fail "manifest" "manifest not found: ${manifest}"
|
||||
|
||||
# --- validate the manifest BEFORE trusting any field (LLM output is stochastic) ---
|
||||
# 1) well-formed JSON object with a string raw_source and an array of pages
|
||||
jq -e 'type=="object" and (.raw_source|type=="string") and (.pages|type=="array")' \
|
||||
"$manifest" >/dev/null 2>&1 \
|
||||
|| fail "manifest" "invalid manifest: need object with string raw_source and array pages"
|
||||
|
||||
# 2) every page.path must be a string, live under wiki/, and contain no '..' (no traversal)
|
||||
if jq -e '[.pages[].path
|
||||
| select((type!="string") or (startswith("wiki/")|not) or contains(".."))]
|
||||
| length > 0' "$manifest" >/dev/null 2>&1; then
|
||||
fail "manifest" "unsafe page path (must be a string under wiki/, no '..')"
|
||||
fi
|
||||
|
||||
# --- read manifest scalars ---
|
||||
raw_source="$(jq -r '.raw_source' "$manifest")"
|
||||
# model name comes from the orchestrator/wrapper (INGEST_MODEL); the agent cannot know its
|
||||
# own tag, so we do not trust a self-reported manifest field. Fall back only if unset.
|
||||
model="${INGEST_MODEL:-$(jq -r '.model // "unknown"' "$manifest")}"
|
||||
reasoning="$(jq -r '.reasoning // "Ingest."' "$manifest")"
|
||||
pr_summary="$(jq -r '.pr_summary // "Ingest."' "$manifest")"
|
||||
contradictions="$(jq -r '.contradictions // "None"' "$manifest")"
|
||||
|
||||
[[ -n "$raw_source" && "$raw_source" != "null" ]] || fail "manifest" "raw_source missing"
|
||||
|
||||
slug="$(bash "${SCRIPTS}/slug.sh" "$raw_source")" || fail "slug" "empty or invalid slug for ${raw_source}"
|
||||
|
||||
# --- collect touched paths ---
|
||||
mapfile -t created_paths < <(jq -r '.pages[] | select(.status=="created") | .path' "$manifest")
|
||||
mapfile -t modified_paths < <(jq -r '.pages[] | select(.status=="modified") | .path' "$manifest")
|
||||
all_paths=( "${created_paths[@]}" "${modified_paths[@]}" )
|
||||
[[ ${#all_paths[@]} -gt 0 ]] || fail "manifest" "no pages reported"
|
||||
|
||||
conflict_label=""
|
||||
|
||||
# NOTE: no rollback. Steps below mutate the working tree in order (index → log → commit).
|
||||
# All are idempotent on re-run EXCEPT log-append (append-only). If a step fails midway,
|
||||
# nothing is committed (open-pr is the only committer) — the operator re-runs, or inspects
|
||||
# wiki/ if log-append already wrote a line. The manifest is removed only on full success.
|
||||
|
||||
# --- 1. index entries (created pages only), inserted in order ---
|
||||
while IFS=$'\t' read -r path summary maturity; do
|
||||
[[ -z "$path" ]] && continue
|
||||
link="${path#wiki/}"; link="${link%.md}" # e.g. sources/foo
|
||||
folder="${link%%/*}"
|
||||
case "$folder" in
|
||||
sources) section="Sources" ;;
|
||||
entities) section="Entities" ;;
|
||||
concepts) section="Concepts" ;;
|
||||
queries)
|
||||
if [[ "$link" == queries/conflict-* ]]; then section="Conflicts"; conflict_label="CONFLICT"
|
||||
else section="Queries"; fi ;;
|
||||
*) section="Sources" ;;
|
||||
esac
|
||||
|
||||
if [[ "$section" == "Conflicts" ]]; then
|
||||
entry="- [[${link}]]" # conflicts: slug only
|
||||
else
|
||||
entry="- [[${link}]] — ${summary} \`maturity: ${maturity}\`"
|
||||
fi
|
||||
|
||||
python3 "${SCRIPTS}/index-append.py" --section "$section" --entry "$entry" \
|
||||
|| fail "index" "index-append failed for ${path}"
|
||||
done < <(jq -r '.pages[] | select(.status=="created")
|
||||
| [.path, (.summary // ""), (.maturity // "draft")] | @tsv' "$manifest")
|
||||
|
||||
# --- 2. log entry ---
|
||||
out="$(jq -r '[.pages[].path | "[[" + (sub("^wiki/";"") | sub("\\.md$";"")) + "]]"] | join(", ")' "$manifest")"
|
||||
bash "${SCRIPTS}/log-append.sh" --type INGEST --subject "$slug" --model "$model" \
|
||||
--context "[[${raw_source}]]" --output "${out:-*(none)*}" --reasoning "$reasoning" \
|
||||
|| fail "log" "log-append failed"
|
||||
|
||||
# --- 3. scoped linter (capture findings for the PR; never aborts the run) ---
|
||||
lint_out="$( bash "${SCRIPTS}/scoped-lint.sh" "$genome" "${all_paths[@]}" 2>&1 )" && lint_rc=0 || lint_rc=$?
|
||||
|
||||
# --- 4. assemble the PR body (manifest tables + lint results) ---
|
||||
body="$(mktemp)"
|
||||
trap 'rm -f "$body"' EXIT # auto-clean on any exit (success, fail(), or crash)
|
||||
|
||||
{
|
||||
echo "## Summary"
|
||||
echo "$pr_summary"
|
||||
echo ""
|
||||
echo "## Pages"
|
||||
echo "| Path | Status | Maturity |"
|
||||
echo "|------|--------|----------|"
|
||||
jq -r '.pages[] | "| `\(.path)` | \(.status) | \(.maturity // "draft") |"' "$manifest"
|
||||
echo ""
|
||||
echo "## Contradictions"
|
||||
echo "$contradictions"
|
||||
echo ""
|
||||
echo "## Scoped Lint (post-ingest)"
|
||||
echo '```'
|
||||
echo "$lint_out"
|
||||
echo '```'
|
||||
} > "$body"
|
||||
|
||||
# --- 5. open the PR ---
|
||||
pr_args=( --slug "$slug" --title "feat: ingest ${slug}" --body-file "$body" --base "${INGEST_BASE:-main}" )
|
||||
[[ -n "$conflict_label" ]] && pr_args+=( --label "$conflict_label" )
|
||||
pr_out="$( bash "${SCRIPTS}/open-pr.sh" "${pr_args[@]}" 2>&1 )" && pr_rc=0 || pr_rc=$?
|
||||
pr_url="$(printf '%s\n' "$pr_out" | sed -n 's/^PR opened: //p' | head -n1)"
|
||||
|
||||
# --- final result line for n8n ---
|
||||
jq -nc \
|
||||
--arg status "$([[ $pr_rc -eq 0 ]] && echo ok || echo pr_failed)" \
|
||||
--arg slug "$slug" \
|
||||
--arg pr_url "$pr_url" \
|
||||
--argjson lint_clean "$([[ $lint_rc -eq 0 ]] && echo true || echo false)" \
|
||||
--argjson conflict "$([[ -n "$conflict_label" ]] && echo true || echo false)" \
|
||||
--arg detail "$pr_out" \
|
||||
'{status:$status, slug:$slug, pr_url:$pr_url, lint_clean:$lint_clean, conflict:$conflict, detail:$detail}'
|
||||
|
||||
# The manifest is a single file that is overwritten with each run, but if the process is
|
||||
# completely successful, we remove it to prevent an outdated manifest from being reprocessed by mistake.
|
||||
if [[ $pr_rc -eq 0 ]]; then
|
||||
rm -f "$manifest"
|
||||
else
|
||||
exit 1
|
||||
fi
|
||||
55
skills/ingest/scripts/scoped-lint.sh
Normal file
55
skills/ingest/scripts/scoped-lint.sh
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# skills/ingest/scripts/scoped-lint.sh
|
||||
# Run the framework's validation on ONLY the files touched this session.
|
||||
# Reuses lib/lint.sh + lib/output.sh — same checks as `make lint`, scoped.
|
||||
#
|
||||
# KG_LIB_DIR=/opt/knowledge-genome-setup/lib \
|
||||
# scoped-lint.sh <genome_name> wiki/sources/x.md wiki/entities/y.md
|
||||
#
|
||||
# Exits non-zero if any hard error is found, so the agent notices.
|
||||
# Findings are printed (stderr from the lint functions + a summary on stdout).
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
: "${KG_LIB_DIR:?set KG_LIB_DIR to the framework lib/ dir (e.g. /opt/knowledge-genome-orchestrator/lib)}"
|
||||
|
||||
# Fail clearly if the lib files are missing, rather than a raw `source: No such file`.
|
||||
for _f in output.sh lint.sh; do
|
||||
[[ -f "${KG_LIB_DIR}/${_f}" ]] || { echo "scoped-lint: missing ${KG_LIB_DIR}/${_f}" >&2; exit 1; }
|
||||
done
|
||||
|
||||
# shellcheck source=/dev/null
|
||||
source "${KG_LIB_DIR}/output.sh"
|
||||
# shellcheck source=/dev/null
|
||||
source "${KG_LIB_DIR}/lint.sh"
|
||||
|
||||
genome="${1:?usage: scoped-lint.sh <genome> <file...>}"
|
||||
shift
|
||||
[[ $# -gt 0 ]] || { echo "scoped-lint: no files given" >&2; exit 1; }
|
||||
|
||||
errors=0
|
||||
stale=0
|
||||
count=$#
|
||||
|
||||
for f in "$@"; do
|
||||
if [[ ! -f "$f" ]]; then
|
||||
warn "scoped-lint: missing file (skipped): $f"
|
||||
continue
|
||||
fi
|
||||
|
||||
lint_markdown_file "$f" "$genome" && fe=0 || fe=$?
|
||||
check_privacy_consistency "$f" && pce=0 || pce=$?
|
||||
check_page_size "$f" && pse=0 || pse=$?
|
||||
errors=$(( errors + fe + pce + pse ))
|
||||
|
||||
check_knowledge_decay "$f" && st=0 || st=$?
|
||||
stale=$(( stale + st ))
|
||||
|
||||
check_broken_links "$f" || true # warnings only
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "scoped-lint: ${errors} error(s), ${stale} stale across ${count} file(s)"
|
||||
|
||||
[[ $errors -eq 0 ]]
|
||||
23
skills/ingest/scripts/slug.sh
Normal file
23
skills/ingest/scripts/slug.sh
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# skills/ingest/scripts/slug.sh
|
||||
# Derive a wiki slug from a path, filename, or title string.
|
||||
# slug.sh "raw/articles/My Source.md" -> my-source
|
||||
# slug.sh "Some Concept Name" -> some-concept-name
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
input="${1:?usage: slug.sh <path-or-title>}"
|
||||
|
||||
# Strip directory and extension when given a path
|
||||
base="${input##*/}"
|
||||
base="${base%.*}"
|
||||
|
||||
slug="$(printf '%s\n' "$base" \
|
||||
| tr '[:upper:]' '[:lower:]' \
|
||||
| sed -E 's/[^a-z0-9]+/-/g; s/-{2,}/-/g; s/^-+//; s/-+$//')"
|
||||
|
||||
# An all-symbols input (e.g. "!!!.md") collapses to "" — refuse rather than emit a
|
||||
# broken/empty slug that would produce an invalid branch name downstream.
|
||||
[[ -n "$slug" ]] || { echo "slug: empty result for input '${input}'" >&2; exit 1; }
|
||||
printf '%s\n' "$slug"
|
||||
|
|
@ -2,11 +2,11 @@
|
|||
|
||||
## Identity
|
||||
|
||||
| Field | Value |
|
||||
|--------|-------|
|
||||
| Genome | `{{GENOME_NAME}}` |
|
||||
| Domain | `{{GENOME_DESC}}` |
|
||||
| Owner | `{{FORGEJO_USER}}` |
|
||||
| Field | Value |
|
||||
| ------ | -------------------------------------------------- |
|
||||
| Genome | `{{GENOME_NAME}}` |
|
||||
| Domain | `{{GENOME_DESC}}` |
|
||||
| Owner | `{{FORGEJO_USER}}` |
|
||||
| Remote | `{{FORGEJO_URL}}/{{FORGEJO_USER}}/{{GENOME_NAME}}` |
|
||||
|
||||
**Role:** Wiki maintainer for `{{GENOME_NAME}}`.
|
||||
|
|
@ -14,14 +14,28 @@
|
|||
|
||||
---
|
||||
|
||||
## Linked Project
|
||||
|
||||
| Field | Value |
|
||||
| --------------- | --------------------- |
|
||||
| Project repo | `{{LINKED_PROJECT}}` |
|
||||
| Branch | `main` |
|
||||
| Allowed tasks | `readme, tests, code` |
|
||||
| Preferred model | `auto` |
|
||||
|
||||
If `Project repo` is `none`, this genome is knowledge-only — phase-2 project work
|
||||
does not apply. When set, after a wiki PR is **merged**, the orchestrator may trigger
|
||||
work on this repo within _Allowed tasks_. The agent never touches the project repo
|
||||
during ingest.
|
||||
|
||||
## PRIVATE_CONTEXT
|
||||
|
||||
**Default: `disabled`** — never infer; require explicit operator declaration per session.
|
||||
|
||||
| State | Behavior |
|
||||
|-------|----------|
|
||||
| `disabled` | `raw/private/` and `wiki/private/` do not exist. No read, list, grep, or summary on private paths. All outputs safe for collaborators. |
|
||||
| `enabled` | Operator has confirmed `git-crypt unlock` ran on host. Read/write `private/` authorized. All outputs from private data go exclusively to `wiki/private/`. Prefix every response drawing on private data: `[PRIVATE DATA INCLUDED]`. Never leak private synthesis into public wiki paths. |
|
||||
| State | Behavior |
|
||||
| ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `disabled` | `raw/private/` and `wiki/private/` do not exist. No read, list, grep, or summary on private paths. All outputs safe for collaborators. |
|
||||
| `enabled` | Operator has confirmed `git-crypt unlock` ran on host. Read/write `private/` authorized. All outputs from private data go exclusively to `wiki/private/`. Prefix every response drawing on private data: `[PRIVATE DATA INCLUDED]`. Never leak private synthesis into public wiki paths. |
|
||||
|
||||
Pre-commit `PLAINTEXT LEAK DETECTED`: stop immediately. Do not use `--no-verify`. Ask operator to verify `.gitattributes` and encryption state.
|
||||
|
||||
|
|
@ -41,6 +55,7 @@ Session end or return to `disabled`: remind operator to run `git-crypt lock` on
|
|||
8. Every PR must use `templates/pr-description.md`. Do not omit the tabular summary.
|
||||
|
||||
### NEVER
|
||||
|
||||
- Load `wiki/log.md` in full — read only the tail injected by the orchestrator.
|
||||
- Rewrite `wiki/index.md` to reorder entries — append only; sorting is automated.
|
||||
- Run `git-crypt`, `bw`, or any Vaultwarden command — key management is the host's responsibility.
|
||||
|
|
@ -48,6 +63,7 @@ Session end or return to `disabled`: remind operator to run `git-crypt lock` on
|
|||
- Merge PRs — human approval required.
|
||||
|
||||
### ASK FIRST
|
||||
|
||||
- Deleting any wiki page.
|
||||
- Changing `maturity` from `stable` to `deprecated`.
|
||||
- Writing to `wiki/private/` when PRIVATE_CONTEXT state is ambiguous.
|
||||
|
|
@ -70,7 +86,8 @@ Execute in this order before any file operation:
|
|||
## Workflows
|
||||
|
||||
### Ingest
|
||||
*Triggered by new file in `raw/`.*
|
||||
|
||||
_Triggered by new file in `raw/`._
|
||||
|
||||
1. Read source once.
|
||||
2. Create `wiki/sources/<slug>.md` — summary + key points.
|
||||
|
|
@ -82,12 +99,14 @@ Execute in this order before any file operation:
|
|||
8. Run scoped lint on pages created or modified in this session. Report issues in PR description. Do not auto-fix.
|
||||
9. Commit on `feat/ai-ingest-<slug>`. Open PR using `templates/pr-description.md`.
|
||||
|
||||
*Private source* (`PRIVATE_CONTEXT: enabled` required):
|
||||
_Private source_ (`PRIVATE_CONTEXT: enabled` required):
|
||||
|
||||
- All output → `wiki/private/<slug>.md` only.
|
||||
- PR title: `[PRIVATE] ingest: <slug>`.
|
||||
|
||||
### Query
|
||||
*Triggered by operator question.*
|
||||
|
||||
_Triggered by operator question._
|
||||
|
||||
1. `qmd search "<query>"` → identify candidate pages.
|
||||
2. Read candidate pages directly.
|
||||
|
|
@ -96,10 +115,11 @@ Execute in this order before any file operation:
|
|||
5. Append entry to `wiki/index.md` under Queries.
|
||||
6. Append log entry: `QUERY | <subject>`.
|
||||
|
||||
*For general orientation without a specific query: read `wiki/index.md` directly.*
|
||||
_For general orientation without a specific query: read `wiki/index.md` directly._
|
||||
|
||||
### Lint
|
||||
*Triggered by operator with bash pre-scan output.*
|
||||
|
||||
_Triggered by operator with bash pre-scan output._
|
||||
|
||||
Pre-requisite: operator runs `bash scripts/lint-genomes.sh` and provides output to this session.
|
||||
The script handles deterministically: broken links, knowledge decay, page size, frontmatter validation.
|
||||
|
|
@ -119,13 +139,14 @@ Append log entry: `LINT | <summary of findings>`.
|
|||
## File Conventions
|
||||
|
||||
### Frontmatter
|
||||
|
||||
Required on every wiki page:
|
||||
|
||||
```yaml
|
||||
---
|
||||
title: "Strict String Title"
|
||||
type: source | entity | concept | query | conflict | private
|
||||
domain: {{GENOME_NAME}}
|
||||
domain: { { GENOME_NAME } }
|
||||
tags: [lowercase, hyphen-separated]
|
||||
maturity: draft | stable | deprecated
|
||||
last_updated: YYYY-MM-DD
|
||||
|
|
@ -138,19 +159,25 @@ private: true | false
|
|||
- `deprecated` — superseded. Add `> **DEPRECATED:** <reason>` callout at top of body.
|
||||
|
||||
### Links
|
||||
|
||||
- Internal: `[[folder/file]]` — Obsidian wikilinks only. Never `[text](url)` for internal refs.
|
||||
- Cross-genome: `[[../genome-target/wiki/folder/file]]`.
|
||||
- External: `[text](https://...)`.
|
||||
|
||||
### Index entries
|
||||
|
||||
Append at bottom of relevant section in `wiki/index.md`:
|
||||
|
||||
```
|
||||
- [[folder/slug]] — One-line summary. `maturity: draft`
|
||||
```
|
||||
|
||||
Never reorder. Alphabetical sort is handled by the pre-commit hook.
|
||||
|
||||
### Log entries
|
||||
|
||||
Append one entry per operation to `wiki/log.md`:
|
||||
|
||||
```markdown
|
||||
## [YYYY-MM-DD] TYPE | Subject
|
||||
|
||||
|
|
@ -160,6 +187,7 @@ Append one entry per operation to `wiki/log.md`:
|
|||
- output_written: `[[path/C]]`
|
||||
- reasoning: One sentence — what changed and why.
|
||||
```
|
||||
|
||||
Valid TYPEs: `INGEST` `LINT` `QUERY` `CONFLICT` `CONFIG` `SECURITY`
|
||||
|
||||
Parse: `grep "^## \[" wiki/log.md | tail -5`
|
||||
|
|
@ -177,22 +205,26 @@ When new evidence contradicts an existing wiki claim:
|
|||
---
|
||||
title: "Conflict: <concept>"
|
||||
type: conflict
|
||||
domain: {{GENOME_NAME}}
|
||||
domain: { { GENOME_NAME } }
|
||||
maturity: draft
|
||||
last_updated: YYYY-MM-DD
|
||||
private: false
|
||||
---
|
||||
```
|
||||
|
||||
```markdown
|
||||
## Conflict: <concept>
|
||||
|
||||
**Claim A (existing):** [[path/to/existing-page]]
|
||||
|
||||
> Summary of current wiki position.
|
||||
|
||||
**Claim B (new):** [[path/to/new-source]]
|
||||
|
||||
> Summary of contradicting evidence.
|
||||
|
||||
**Assessment:**
|
||||
|
||||
- Confidence A: high | medium | low — <reason>
|
||||
- Confidence B: high | medium | low — <reason>
|
||||
- Recommendation: `accept_b` | `keep_a` | `requires_human_review`
|
||||
|
|
@ -212,20 +244,22 @@ private: false
|
|||
- `maturity: draft` not updated in **90 days** → flag during lint.
|
||||
|
||||
Flagged pages: prepend to body:
|
||||
|
||||
```markdown
|
||||
> **⚠️ STALE:** Last validated {{last_updated}}. Re-validation required.
|
||||
```
|
||||
|
||||
Propose re-validation task. Do not change `maturity` without new source evidence.
|
||||
|
||||
---
|
||||
|
||||
## Collaboration
|
||||
|
||||
| Role | Access | Permitted |
|
||||
|------|--------|-----------|
|
||||
| Owner | Full — key holder | Read/write everywhere |
|
||||
| Collaborator | No key | Push to `raw/articles`, `raw/transcripts`, `raw/code-packs`, `raw/assets` |
|
||||
| Local AI agent | Conditional | `private/` only when `PRIVATE_CONTEXT: enabled` |
|
||||
| Cloud AI model | Public only | `PRIVATE_CONTEXT` must be `disabled`; never send private files outside local network |
|
||||
| Role | Access | Permitted |
|
||||
| -------------- | ----------------- | ------------------------------------------------------------------------------------ |
|
||||
| Owner | Full — key holder | Read/write everywhere |
|
||||
| Collaborator | No key | Push to `raw/articles`, `raw/transcripts`, `raw/code-packs`, `raw/assets` |
|
||||
| Local AI agent | Conditional | `private/` only when `PRIVATE_CONTEXT: enabled` |
|
||||
| Cloud AI model | Public only | `PRIVATE_CONTEXT` must be `disabled`; never send private files outside local network |
|
||||
|
||||
Grant collaborator: add as Forgejo contributor with Write role. Never share the git-crypt key.
|
||||
|
|
|
|||
|
|
@ -2,10 +2,10 @@
|
|||
|
||||
## Identity
|
||||
|
||||
| Field | Value |
|
||||
|--------|-------|
|
||||
| Repo | `{{MASTER_REPO}}` |
|
||||
| Owner | `{{FORGEJO_USER}}` |
|
||||
| Field | Value |
|
||||
| ------ | -------------------------------------------------- |
|
||||
| Repo | `{{MASTER_REPO}}` |
|
||||
| Owner | `{{FORGEJO_USER}}` |
|
||||
| Remote | `{{FORGEJO_URL}}/{{FORGEJO_USER}}/{{MASTER_REPO}}` |
|
||||
|
||||
**Role:** Cross-genome coordinator for the Knowledge Genome network.
|
||||
|
|
@ -32,14 +32,17 @@ Genome-level operations are governed by the genome's `AGENTS.md`, not this file.
|
|||
## Global Security Rules
|
||||
|
||||
### PRIVATE_CONTEXT scope
|
||||
|
||||
- Toggle is **per-genome and per-session**. Enabling for `genome-finance` does NOT enable for `genome-dev`.
|
||||
- Cloud LLM models: `PRIVATE_CONTEXT` must be `disabled` for all genomes. Private data never leaves the local network.
|
||||
|
||||
### Log sanitization
|
||||
|
||||
- Never print decrypted secrets, session tokens, or key contents to stdout or log files.
|
||||
- Document only `run_id` and genome name — never the key value.
|
||||
|
||||
### Key management
|
||||
|
||||
- Key injection is the host's responsibility — executed before this session starts.
|
||||
- Never write, suggest, or generate scripts that save `.key` files to disk.
|
||||
|
||||
|
|
@ -54,12 +57,14 @@ Genome-level operations are governed by the genome's `AGENTS.md`, not this file.
|
|||
5. Per-genome `AGENTS.md` governs all wiki operations within that genome. This file governs boundaries only.
|
||||
|
||||
### NEVER
|
||||
|
||||
- Load multiple `wiki/index.md` files simultaneously for cross-genome comparison — use qmd.
|
||||
- Run `git-crypt`, `bw`, or Vaultwarden commands — host responsibility.
|
||||
- Modify files in more than one genome in the same operation.
|
||||
- Modify `core-karpathy` in any way.
|
||||
|
||||
### ASK FIRST
|
||||
|
||||
- Any operation that touches two or more genomes.
|
||||
- Updating submodule pointers in master.
|
||||
- Any key rotation procedure.
|
||||
|
|
@ -77,7 +82,8 @@ Genome-level operations are governed by the genome's `AGENTS.md`, not this file.
|
|||
---
|
||||
|
||||
## Cross-Genome Lint
|
||||
*Manual, monthly — requires operator initiation. Not automated.*
|
||||
|
||||
_Manual, monthly — requires operator initiation. Not automated._
|
||||
|
||||
1. Use `qmd search "<concept>"` to find pages covering the same concept across genomes.
|
||||
2. Identify:
|
||||
|
|
|
|||
|
|
@ -1,25 +1,31 @@
|
|||
## Summary
|
||||
|
||||
<!-- One sentence: goal of this session and source processed. -->
|
||||
|
||||
## Pages Created
|
||||
| Path | Type | Maturity |
|
||||
|------|------|----------|
|
||||
| `[[folder/slug]]` | entity / concept / source / query | draft |
|
||||
|
||||
| Path | Type | Maturity |
|
||||
| ----------------- | --------------------------------- | -------- |
|
||||
| `[[folder/slug]]` | entity / concept / source / query | draft |
|
||||
|
||||
## Pages Modified
|
||||
| Path | Change |
|
||||
|------|--------|
|
||||
|
||||
| Path | Change |
|
||||
| ----------------- | ----------------------------------------- |
|
||||
| `[[folder/slug]]` | Added cross-reference to `[[other/page]]` |
|
||||
|
||||
## Contradictions Found
|
||||
|
||||
- [ ] None
|
||||
- [ ] `n` conflict file(s) created — listed below
|
||||
|
||||
## Private Data Accessed
|
||||
|
||||
- [ ] No — `PRIVATE_CONTEXT: disabled`
|
||||
- [ ] Yes — `PRIVATE_CONTEXT: enabled` · outputs in `wiki/private/` only
|
||||
|
||||
## Scoped Lint (post-ingest)
|
||||
|
||||
- [ ] Frontmatter valid on all touched pages
|
||||
- [ ] No broken wikilinks on touched pages
|
||||
- [ ] No issues found
|
||||
|
|
|
|||
45
templates/readme-master.md
Normal file
45
templates/readme-master.md
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
# {{MASTER_REPO}}
|
||||
|
||||
Master (umbrella) repository for the Knowledge Genome network.
|
||||
|
||||
| Field | Value |
|
||||
| ---------- | -------------------------------------------------- |
|
||||
| Owner | `{{FORGEJO_USER}}` |
|
||||
| Remote | `{{FORGEJO_URL}}/{{FORGEJO_USER}}/{{MASTER_REPO}}` |
|
||||
| Scaffolded | `{{DATE}}` |
|
||||
|
||||
## What this repo is
|
||||
|
||||
This repository does **not** hold knowledge itself. It is the orchestrator: each genome
|
||||
is a Git submodule, plus `core-karpathy` as a read-only reference pattern. Cross-genome
|
||||
coordination rules live in `AGENTS.md`.
|
||||
|
||||
```text
|
||||
{{MASTER_REPO}}/
|
||||
├── core-karpathy/ ← reference pattern — read-only, never modify
|
||||
├── genome-*/ ← one submodule per genome (own AGENTS.md, own git-crypt)
|
||||
└── AGENTS.md ← cross-genome coordinator (boundaries only)
|
||||
```
|
||||
|
||||
## Working with submodules
|
||||
|
||||
```bash
|
||||
# Clone with all genomes
|
||||
git clone --recurse-submodules {{FORGEJO_URL}}/{{FORGEJO_USER}}/{{MASTER_REPO}}.git
|
||||
|
||||
# Pull the latest pointers for every genome
|
||||
git submodule update --remote --merge
|
||||
|
||||
# Operate inside a single genome (one genome at a time — see AGENTS.md)
|
||||
cd genome-<name>
|
||||
```
|
||||
|
||||
## Rules of the road
|
||||
|
||||
- Operate within **one genome at a time**; no commits spanning multiple genomes.
|
||||
- `core-karpathy` is read-only.
|
||||
- Never commit to `main` in a genome — PRs only, no self-merge.
|
||||
- Private data (`**/private/**`) is git-crypt encrypted and never leaves the local network.
|
||||
|
||||
Genome-level operations are governed by each genome's own `AGENTS.md`. This README and the
|
||||
master `AGENTS.md` govern boundaries only.
|
||||
|
|
@ -1,9 +1,9 @@
|
|||
---
|
||||
title: "Index — {{GENOME_NAME}}"
|
||||
type: index
|
||||
domain: {{GENOME_NAME}}
|
||||
domain: { { GENOME_NAME } }
|
||||
maturity: stable
|
||||
last_updated: {{DATE}}
|
||||
last_updated: { { DATE } }
|
||||
private: false
|
||||
---
|
||||
|
||||
|
|
@ -19,27 +19,28 @@ Entry format: `- [[folder/slug]] — One-line summary. \`maturity: <value>\``
|
|||
---
|
||||
|
||||
## Sources (`wiki/sources/`)
|
||||
*Ingested raw materials. One entry per processed source.*
|
||||
|
||||
_Ingested raw materials. One entry per processed source._
|
||||
|
||||
## Entities (`wiki/entities/`)
|
||||
*People, organisations, tools, projects.*
|
||||
|
||||
_People, organisations, tools, projects._
|
||||
|
||||
## Concepts (`wiki/concepts/`)
|
||||
*Theories, methodologies, patterns, architectural decisions.*
|
||||
|
||||
_Theories, methodologies, patterns, architectural decisions._
|
||||
|
||||
## Queries (`wiki/queries/`)
|
||||
*Synthesised answers worth preserving. Archived explorations and analyses.*
|
||||
|
||||
_Synthesised answers worth preserving. Archived explorations and analyses._
|
||||
|
||||
## Conflicts Pending Review (`wiki/queries/conflict-*.md`)
|
||||
*Created automatically when the agent detects contradictions between sources.*
|
||||
*Do not summarise entries here — list slugs only to avoid surfacing unresolved claims.*
|
||||
*Remove entry once the operator has resolved and closed the corresponding PR.*
|
||||
|
||||
_Created automatically when the agent detects contradictions between sources._
|
||||
_Do not summarise entries here — list slugs only to avoid surfacing unresolved claims._
|
||||
_Remove entry once the operator has resolved and closed the corresponding PR._
|
||||
|
||||
## Private Synthesis (`wiki/private/`)
|
||||
*Restricted access. Requires `PRIVATE_CONTEXT: enabled` and unlocked repo.*
|
||||
*List slug names ONLY. Do not append summaries — prevents metadata leakage.*
|
||||
|
||||
_Restricted access. Requires `PRIVATE_CONTEXT: enabled` and unlocked repo._
|
||||
_List slug names ONLY. Do not append summaries — prevents metadata leakage._
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
---
|
||||
title: "Operations Log — {{GENOME_NAME}}"
|
||||
type: log
|
||||
domain: {{GENOME_NAME}}
|
||||
domain: { { GENOME_NAME } }
|
||||
maturity: stable
|
||||
last_updated: {{DATE}}
|
||||
last_updated: { { DATE } }
|
||||
private: false
|
||||
---
|
||||
|
||||
|
|
@ -22,11 +22,13 @@ Append new entries at the bottom using the format defined below.
|
|||
## Entry Format
|
||||
|
||||
### Required header (enables shell parsing):
|
||||
|
||||
```text
|
||||
## [YYYY-MM-DD] TYPE | Subject or title
|
||||
```
|
||||
|
||||
### Required metadata block for all agent-generated entries:
|
||||
|
||||
```markdown
|
||||
- run_id: `<short-uuid or session-identifier>`
|
||||
- model: `<model-name-and-version>`
|
||||
|
|
@ -38,6 +40,7 @@ Append new entries at the bottom using the format defined below.
|
|||
**Valid TYPEs:** `INGEST` | `LINT` | `QUERY` | `CONFLICT` | `CONFIG` | `SECURITY`
|
||||
|
||||
**Parse examples:**
|
||||
|
||||
```bash
|
||||
# Last 5 entries
|
||||
grep "^## \[" wiki/log.md | tail -5
|
||||
|
|
@ -55,6 +58,6 @@ grep "^## \[2026-05" wiki/log.md
|
|||
|
||||
- run_id: `system-init`
|
||||
- model: `setup-knowledge-genome.sh`
|
||||
- context_read: *(none — initial scaffold)*
|
||||
- context_read: _(none — initial scaffold)_
|
||||
- output_written: `[[wiki/index.md]]`, `[[wiki/log.md]]`, `[[AGENTS.md]]`
|
||||
- reasoning: Initial directory structure and encryption layer initialized by setup script.
|
||||
|
|
|
|||
56
tests/README.md
Normal file
56
tests/README.md
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
# Tests
|
||||
|
||||
Deterministic tests for the mechanical layer of the framework — **no LLM, no GPU, no
|
||||
network**. They simulate pi's output with fixtures and exercise the scripts directly, so
|
||||
they run anywhere (laptop, CI, a git hook). They do **not** belong on vm101 or in n8n.
|
||||
|
||||
## What's covered
|
||||
|
||||
| File | Covers |
|
||||
|------|--------|
|
||||
| `scripts.bats` | `slug.sh`, `log-append.sh`, `index-append.py` (insert, sort, bump, idempotent) |
|
||||
| `lint.bats` | `lib/lint.sh` validators + `scoped-lint.sh` reuse |
|
||||
| `structure.bats` | `lib/structure.sh` report/sync |
|
||||
| `run-ingest.bats` | `run-ingest.sh` end-to-end (DRY_RUN, local bare remote) — needs `jq` |
|
||||
|
||||
`run-ingest.bats` auto-`skip`s if `jq` is missing; everything else needs only bash + git
|
||||
(+ `python3` for the index tests).
|
||||
|
||||
## Install bats
|
||||
|
||||
```bash
|
||||
# Debian/Ubuntu
|
||||
sudo apt install bats
|
||||
# or pinned, as a vendored submodule
|
||||
git submodule add https://github.com/bats-core/bats-core.git test/bats
|
||||
```
|
||||
|
||||
## Run
|
||||
|
||||
```bash
|
||||
bats tests/ # whole suite
|
||||
bats tests/lint.bats # one file
|
||||
bats -f "sorted" tests/scripts.bats # filter by name
|
||||
```
|
||||
|
||||
Each test builds its own throwaway genome under `BATS_TEST_TMPDIR` (auto-cleaned) with a
|
||||
local bare git remote, so `open-pr.sh --DRY_RUN` can branch/commit/push without touching
|
||||
Forgejo.
|
||||
|
||||
## Makefile targets
|
||||
|
||||
```make
|
||||
test:
|
||||
@bats tests/
|
||||
|
||||
verify-structure:
|
||||
@bash scripts/verify-genomes.sh
|
||||
|
||||
sync-structure:
|
||||
@bash scripts/verify-genomes.sh --sync
|
||||
```
|
||||
|
||||
## Note on `helpers.bash`
|
||||
|
||||
`FIXTURE_DIRS` in `helpers.bash` must match `GENOME_DIRS` in `lib/structure.sh`. If you
|
||||
change the canonical layout, update both (the structure tests assume a clean baseline).
|
||||
98
tests/helpers.bash
Normal file
98
tests/helpers.bash
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
#!/usr/bin/env bash
|
||||
# tests/helpers.bash — shared helpers for the bats suite.
|
||||
|
||||
REPO_ROOT="$(cd "${BATS_TEST_DIRNAME}/.." && pwd)"
|
||||
LIB_DIR="${REPO_ROOT}/lib"
|
||||
SKILL_SCRIPTS="${REPO_ROOT}/skills/ingest/scripts"
|
||||
|
||||
# Canonical dirs a fresh genome must contain (kept in sync with lib/structure.sh).
|
||||
FIXTURE_DIRS=(
|
||||
raw/articles raw/transcripts raw/code-packs raw/assets raw/private
|
||||
wiki/sources wiki/entities wiki/concepts wiki/queries wiki/private
|
||||
)
|
||||
|
||||
# make_fixture_genome → echoes the path to a throwaway genome checkout with a
|
||||
# local bare remote, the full canonical structure, and rendered index/log.
|
||||
# Uses BATS_TEST_TMPDIR so bats cleans it up automatically.
|
||||
make_fixture_genome() {
|
||||
local base; base="$(mktemp -d "${BATS_TEST_TMPDIR:-/tmp}/genome.XXXXXX")"
|
||||
git init --bare -q "${base}/origin.git"
|
||||
|
||||
local g="${base}/genome"
|
||||
local d
|
||||
for d in "${FIXTURE_DIRS[@]}"; do mkdir -p "${g}/${d}"; touch "${g}/${d}/.gitkeep"; done
|
||||
|
||||
cat > "${g}/wiki/index.md" <<'EOF'
|
||||
---
|
||||
title: "Index — genome-test"
|
||||
type: index
|
||||
domain: genome-test
|
||||
maturity: stable
|
||||
last_updated: 2026-01-01
|
||||
private: false
|
||||
---
|
||||
|
||||
# Master Index: genome-test
|
||||
|
||||
---
|
||||
|
||||
## Sources (`wiki/sources/`)
|
||||
*Ingested raw materials.*
|
||||
|
||||
|
||||
## Entities (`wiki/entities/`)
|
||||
*People, tools.*
|
||||
|
||||
|
||||
## Concepts (`wiki/concepts/`)
|
||||
*Patterns.*
|
||||
|
||||
|
||||
## Queries (`wiki/queries/`)
|
||||
*Answers.*
|
||||
|
||||
|
||||
## Conflicts Pending Review (`wiki/queries/conflict-*.md`)
|
||||
*slugs only.*
|
||||
EOF
|
||||
|
||||
cat > "${g}/wiki/log.md" <<'EOF'
|
||||
---
|
||||
title: "Operations Log — genome-test"
|
||||
type: log
|
||||
domain: genome-test
|
||||
maturity: stable
|
||||
last_updated: 2026-01-01
|
||||
private: false
|
||||
---
|
||||
|
||||
# Operations Log
|
||||
|
||||
---
|
||||
|
||||
## [2026-01-01] CONFIG | scaffolded
|
||||
- run_id: `init`
|
||||
EOF
|
||||
|
||||
echo "raw test" > "${g}/raw/articles/test.md"
|
||||
|
||||
mkdir -p "${base}/nohooks"
|
||||
|
||||
(
|
||||
cd "${g}"
|
||||
git init -q
|
||||
# Hermetic: ignore the user's global git config (signing, global hooks);
|
||||
# otherwise commit.gpgsign or a global core.hooksPath makes git commit fail here.
|
||||
git config commit.gpgsign false
|
||||
git config core.hooksPath "${base}/nohooks"
|
||||
git config user.email t@t
|
||||
git config user.name tester
|
||||
git add .
|
||||
git commit -qm init
|
||||
git branch -M main
|
||||
git remote add origin "${base}/origin.git"
|
||||
git push -q -u origin main
|
||||
) >/dev/null
|
||||
|
||||
echo "${g}"
|
||||
}
|
||||
71
tests/lint.bats
Normal file
71
tests/lint.bats
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
#!/usr/bin/env bats
|
||||
# tests/lint.bats — lib/lint.sh validators and the scoped-lint wrapper.
|
||||
load helpers
|
||||
|
||||
setup() {
|
||||
source "$LIB_DIR/output.sh"
|
||||
source "$LIB_DIR/lint.sh"
|
||||
}
|
||||
|
||||
write_page() { # write_page <path> <type> <domain>
|
||||
cat > "$1" <<EOF
|
||||
---
|
||||
title: "T"
|
||||
type: $2
|
||||
domain: $3
|
||||
tags: [x]
|
||||
maturity: draft
|
||||
last_updated: $(date +%F)
|
||||
private: false
|
||||
---
|
||||
body
|
||||
EOF
|
||||
}
|
||||
|
||||
@test "lint_markdown_file: a clean page passes (0 errors)" {
|
||||
G="$(make_fixture_genome)"
|
||||
write_page "$G/wiki/sources/good.md" source genome-test
|
||||
run lint_markdown_file "$G/wiki/sources/good.md" genome-test
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
||||
@test "lint_markdown_file: invalid type + wrong domain are caught" {
|
||||
G="$(make_fixture_genome)"
|
||||
write_page "$G/wiki/sources/bad.md" banana wrong-genome
|
||||
run lint_markdown_file "$G/wiki/sources/bad.md" genome-test
|
||||
[ "$status" -ne 0 ]
|
||||
}
|
||||
|
||||
@test "check_privacy_consistency: a private/ file without 'private: true' fails" {
|
||||
G="$(make_fixture_genome)"
|
||||
# page sits in wiki/private/ but is flagged private: false → leak
|
||||
write_page "$G/wiki/private/p.md" private genome-test
|
||||
run check_privacy_consistency "$G/wiki/private/p.md"
|
||||
[ "$status" -ne 0 ]
|
||||
}
|
||||
|
||||
@test "check_page_size: a >800-line page errors" {
|
||||
G="$(make_fixture_genome)"
|
||||
{ write_page "$G/wiki/sources/big.md" source genome-test; yes "x" | head -n 850 >> "$G/wiki/sources/big.md"; }
|
||||
run check_page_size "$G/wiki/sources/big.md"
|
||||
[ "$status" -ne 0 ]
|
||||
}
|
||||
|
||||
@test "scoped-lint: aggregates findings and exits non-zero on errors" {
|
||||
G="$(make_fixture_genome)"
|
||||
write_page "$G/wiki/sources/bad.md" banana wrong-genome
|
||||
cd "$G"
|
||||
export KG_LIB_DIR="$LIB_DIR"
|
||||
run bash "$SKILL_SCRIPTS/scoped-lint.sh" genome-test wiki/sources/bad.md
|
||||
[ "$status" -ne 0 ]
|
||||
[[ "$output" == *"error(s)"* ]]
|
||||
}
|
||||
|
||||
@test "scoped-lint: a clean page passes (exit 0)" {
|
||||
G="$(make_fixture_genome)"
|
||||
write_page "$G/wiki/sources/good.md" source genome-test
|
||||
cd "$G"
|
||||
export KG_LIB_DIR="$LIB_DIR"
|
||||
run bash "$SKILL_SCRIPTS/scoped-lint.sh" genome-test wiki/sources/good.md
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
173
tests/run-ingest.bats
Normal file
173
tests/run-ingest.bats
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
#!/usr/bin/env bats
|
||||
# tests/run-ingest.bats — end-to-end orchestrator test (no LLM, no network).
|
||||
# Simulates pi's output (a source page + manifest) and runs the mechanical pass.
|
||||
load helpers
|
||||
|
||||
@test "run-ingest: DRY_RUN end-to-end updates index + log and opens a dry PR" {
|
||||
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
|
||||
# --- simulate the semantic pass that pi would have done ---
|
||||
cat > wiki/sources/test-source.md <<'EOF'
|
||||
---
|
||||
title: "Test Source"
|
||||
type: source
|
||||
domain: genome-test
|
||||
tags: [t]
|
||||
maturity: draft
|
||||
last_updated: 2026-06-03
|
||||
private: false
|
||||
---
|
||||
body
|
||||
EOF
|
||||
|
||||
cat > .ingest-manifest.json <<'EOF'
|
||||
{
|
||||
"raw_source": "raw/articles/test.md",
|
||||
"model": "qwen3.5-9b",
|
||||
"reasoning": "Ingested the test source.",
|
||||
"pr_summary": "Ingest of test: 1 source page.",
|
||||
"contradictions": "None",
|
||||
"pages": [
|
||||
{"path": "wiki/sources/test-source.md", "summary": "A smoke-test source.", "maturity": "draft", "status": "created"}
|
||||
]
|
||||
}
|
||||
EOF
|
||||
|
||||
export KG_LIB_DIR="$LIB_DIR"
|
||||
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER="u" FORGEJO_TOKEN="t"
|
||||
export DRY_RUN=1
|
||||
|
||||
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *'"status":"ok"'* ]]
|
||||
[[ "$output" == *'"lint_clean":true'* ]]
|
||||
[[ "$output" == *'"conflict":false'* ]]
|
||||
|
||||
# side effects on the working tree
|
||||
grep -q 'sources/test-source' wiki/index.md
|
||||
grep -q 'INGEST | test' wiki/log.md
|
||||
git rev-parse --verify feat/ai-ingest-test
|
||||
}
|
||||
|
||||
@test "run-ingest: a conflict page is labelled and lands in the Conflicts section" {
|
||||
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
|
||||
cat > wiki/queries/conflict-pricing-2026-06-03.md <<'EOF'
|
||||
---
|
||||
title: "Conflict: pricing"
|
||||
type: conflict
|
||||
domain: genome-test
|
||||
maturity: draft
|
||||
last_updated: 2026-06-03
|
||||
private: false
|
||||
---
|
||||
conflict body
|
||||
EOF
|
||||
|
||||
cat > .ingest-manifest.json <<'EOF'
|
||||
{
|
||||
"raw_source": "raw/articles/test.md",
|
||||
"model": "m",
|
||||
"reasoning": "Flagged a contradiction.",
|
||||
"pr_summary": "Conflict on pricing.",
|
||||
"contradictions": "1 conflict file created — pricing",
|
||||
"pages": [
|
||||
{"path": "wiki/queries/conflict-pricing-2026-06-03.md", "summary": "ignored", "maturity": "draft", "status": "created"}
|
||||
]
|
||||
}
|
||||
EOF
|
||||
|
||||
export KG_LIB_DIR="$LIB_DIR"
|
||||
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER="u" FORGEJO_TOKEN="t"
|
||||
export DRY_RUN=1
|
||||
|
||||
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *'"conflict":true'* ]]
|
||||
# listed by slug under the Conflicts section
|
||||
grep -q 'queries/conflict-pricing-2026-06-03' wiki/index.md
|
||||
}
|
||||
|
||||
@test "run-ingest: records INGEST_MODEL in the log (manifest carries no model field)" {
|
||||
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
|
||||
cat > wiki/sources/test-source.md <<'EOF'
|
||||
---
|
||||
title: "Test Source"
|
||||
type: source
|
||||
domain: genome-test
|
||||
tags: [t]
|
||||
maturity: draft
|
||||
last_updated: 2026-06-04
|
||||
private: false
|
||||
---
|
||||
body
|
||||
EOF
|
||||
|
||||
# New contract: NO "model" field — the orchestrator supplies it via INGEST_MODEL.
|
||||
cat > .ingest-manifest.json <<'EOF'
|
||||
{
|
||||
"raw_source": "raw/articles/test.md",
|
||||
"reasoning": "Ingested the test source.",
|
||||
"pr_summary": "Ingest of test: 1 source page.",
|
||||
"contradictions": "None",
|
||||
"pages": [
|
||||
{"path": "wiki/sources/test-source.md", "summary": "A smoke-test source.", "maturity": "draft", "status": "created"}
|
||||
]
|
||||
}
|
||||
EOF
|
||||
|
||||
export KG_LIB_DIR="$LIB_DIR"
|
||||
export FORGEJO_URL="http://forgejo.local" FORGEJO_USER="u" FORGEJO_TOKEN="t" DRY_RUN=1
|
||||
export INGEST_MODEL="qwen-test-tag"
|
||||
|
||||
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *'"status":"ok"'* ]]
|
||||
grep -q 'qwen-test-tag' wiki/log.md
|
||||
}
|
||||
|
||||
@test "run-ingest: rejects a manifest path that escapes wiki/ (traversal)" {
|
||||
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
cat > .ingest-manifest.json <<'EOF'
|
||||
{ "raw_source":"raw/articles/test.md","reasoning":"r","pr_summary":"s","contradictions":"None",
|
||||
"pages":[{"path":"wiki/../etc/passwd","summary":"x","maturity":"draft","status":"created"}] }
|
||||
EOF
|
||||
export KG_LIB_DIR="$LIB_DIR" FORGEJO_URL=http://x FORGEJO_USER=u FORGEJO_TOKEN=t DRY_RUN=1
|
||||
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
|
||||
[ "$status" -ne 0 ]
|
||||
[[ "$output" == *'"status":"error"'* ]]
|
||||
}
|
||||
|
||||
@test "run-ingest: honours INGEST_BASE for the PR base" {
|
||||
command -v jq >/dev/null 2>&1 || skip "jq not installed"
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
cat > wiki/sources/test-source.md <<'EOF'
|
||||
---
|
||||
title: "Test Source"
|
||||
type: source
|
||||
domain: genome-test
|
||||
tags: [t]
|
||||
maturity: draft
|
||||
last_updated: 2026-06-04
|
||||
private: false
|
||||
---
|
||||
body
|
||||
EOF
|
||||
cat > .ingest-manifest.json <<'EOF'
|
||||
{ "raw_source":"raw/articles/test.md","reasoning":"r","pr_summary":"s","contradictions":"None",
|
||||
"pages":[{"path":"wiki/sources/test-source.md","summary":"s","maturity":"draft","status":"created"}] }
|
||||
EOF
|
||||
export KG_LIB_DIR="$LIB_DIR" FORGEJO_URL=http://x FORGEJO_USER=u FORGEJO_TOKEN=t DRY_RUN=1
|
||||
export INGEST_BASE="develop"
|
||||
run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *"develop"* ]]
|
||||
}
|
||||
88
tests/scripts.bats
Normal file
88
tests/scripts.bats
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
#!/usr/bin/env bats
|
||||
# tests/scripts.bats — unit tests for the deterministic skill scripts.
|
||||
load helpers
|
||||
|
||||
@test "slug: path with extension and spaces" {
|
||||
run bash "$SKILL_SCRIPTS/slug.sh" "raw/articles/My Test Source.md"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "$output" = "my-test-source" ]
|
||||
}
|
||||
|
||||
@test "slug: punctuation and repeats collapse to single hyphens" {
|
||||
run bash "$SKILL_SCRIPTS/slug.sh" "Qualche Concetto!! Strano"
|
||||
[ "$output" = "qualche-concetto-strano" ]
|
||||
}
|
||||
|
||||
@test "log-append: appends a well-formed INGEST entry with a run_id" {
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
run bash "$SKILL_SCRIPTS/log-append.sh" --type INGEST --subject foo --model m \
|
||||
--context "[[raw/x]]" --output "[[sources/foo]]" --reasoning "why"
|
||||
[ "$status" -eq 0 ]
|
||||
grep -q "INGEST | foo" wiki/log.md
|
||||
grep -q '^- run_id: `' wiki/log.md
|
||||
grep -q '^- model: `m`' wiki/log.md
|
||||
}
|
||||
|
||||
@test "log-append: rejects an invalid TYPE" {
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
run bash "$SKILL_SCRIPTS/log-append.sh" --type BOGUS --subject foo
|
||||
[ "$status" -ne 0 ]
|
||||
}
|
||||
|
||||
@test "index-append: inserts under the right section and keeps it sorted" {
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/zzz]] — z. `maturity: draft`'
|
||||
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/aaa]] — a. `maturity: draft`'
|
||||
a=$(grep -n 'sources/aaa' wiki/index.md | cut -d: -f1)
|
||||
z=$(grep -n 'sources/zzz' wiki/index.md | cut -d: -f1)
|
||||
[ -n "$a" ] && [ -n "$z" ]
|
||||
[ "$a" -lt "$z" ]
|
||||
}
|
||||
|
||||
@test "index-append: bumps frontmatter last_updated to today" {
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
python3 "$SKILL_SCRIPTS/index-append.py" --section Concepts --entry '- [[concepts/x]] — x. `maturity: draft`'
|
||||
grep -q "^last_updated: $(date +%F)$" wiki/index.md
|
||||
}
|
||||
|
||||
@test "index-append: is idempotent for the same entry" {
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/dup]] — d. `maturity: draft`'
|
||||
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/dup]] — d. `maturity: draft`'
|
||||
[ "$(grep -c 'sources/dup' wiki/index.md)" -eq 1 ]
|
||||
}
|
||||
|
||||
@test "index-append: updates an existing entry by wikilink path (no duplicate)" {
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/foo]] — old summary. `maturity: draft`'
|
||||
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/foo]] — new summary. `maturity: stable`'
|
||||
[ "$(grep -c 'sources/foo' wiki/index.md)" -eq 1 ]
|
||||
grep -q 'new summary' wiki/index.md
|
||||
! grep -q 'old summary' wiki/index.md
|
||||
}
|
||||
|
||||
@test "slug: refuses an all-symbols input (no empty slug)" {
|
||||
run bash "$SKILL_SCRIPTS/slug.sh" "!!!.md"
|
||||
[ "$status" -ne 0 ]
|
||||
[ -z "$output" ] || [[ "$output" != *"feat/ai-ingest-"* ]]
|
||||
}
|
||||
|
||||
@test "index-append: self-heals a frontmatter missing last_updated" {
|
||||
G="$(make_fixture_genome)"; cd "$G"
|
||||
cat > wiki/index.md <<'EOF'
|
||||
---
|
||||
title: "Index"
|
||||
type: index
|
||||
domain: genome-test
|
||||
maturity: stable
|
||||
private: false
|
||||
---
|
||||
|
||||
# Index
|
||||
|
||||
## Sources (`wiki/sources/`)
|
||||
*x*
|
||||
EOF
|
||||
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/foo]] — s. `maturity: draft`'
|
||||
grep -q "^last_updated: $(date +%F)$" wiki/index.md
|
||||
}
|
||||
40
tests/structure.bats
Normal file
40
tests/structure.bats
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
#!/usr/bin/env bats
|
||||
# tests/structure.bats — canonical-structure verify/sync.
|
||||
load helpers
|
||||
|
||||
setup() {
|
||||
source "$LIB_DIR/output.sh"
|
||||
source "$LIB_DIR/structure.sh"
|
||||
}
|
||||
|
||||
@test "structure_report: a full fixture has no drift" {
|
||||
G="$(make_fixture_genome)"
|
||||
run structure_report "$G"
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
||||
@test "structure_report: flags a missing canonical dir" {
|
||||
G="$(make_fixture_genome)"
|
||||
rm -rf "$G/wiki/private"
|
||||
run structure_report "$G"
|
||||
[ "$status" -ne 0 ]
|
||||
[[ "$output" == *"wiki/private"* ]]
|
||||
}
|
||||
|
||||
@test "structure_report: notes an extra dir but does not fail on it" {
|
||||
G="$(make_fixture_genome)"
|
||||
mkdir -p "$G/wiki/experiments"
|
||||
run structure_report "$G"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" == *"experiments"* ]]
|
||||
}
|
||||
|
||||
@test "structure_sync: creates missing dirs and is idempotent" {
|
||||
G="$(make_fixture_genome)"
|
||||
rm -rf "$G/wiki/private" "$G/raw/transcripts"
|
||||
structure_sync "$G"
|
||||
[ -d "$G/wiki/private" ] && [ -d "$G/raw/transcripts" ]
|
||||
run structure_report "$G"
|
||||
[ "$status" -eq 0 ]
|
||||
structure_sync "$G" # second run: nothing to do
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue