From 2669a76711bb5e5720340a223e0cd9000da4c13e Mon Sep 17 00:00:00 2001 From: Matteo Cherubini Date: Tue, 9 Jun 2026 19:43:47 +0200 Subject: [PATCH 1/7] refactor: Improve git-crypt verification logic --- lib/git-crypt.sh | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/lib/git-crypt.sh b/lib/git-crypt.sh index 7972620..0a2825a 100644 --- a/lib/git-crypt.sh +++ b/lib/git-crypt.sh @@ -21,18 +21,29 @@ gcrypt_export_key() { gcrypt_verify() { local genome_name="$1" - local key_path="${KEYS_DIR}/${genome_name}.key" - info "Verifying git-crypt status for ${genome_name}..." - git-crypt lock + info "Verifying git-crypt configuration for ${genome_name}..." - if file "raw/private/.gitkeep" 2>/dev/null | grep -q "data"; then - success "Encryption verified: private/ directory is protected." + # `git-crypt status` reports the CONFIGURED status (from `.gitattributes`), not the + # lock/unlock status of the working tree. Encrypted lines have their labels right-aligned + # (with leading whitespace), so you CANNOT anchor on `^encrypted`. + # We filter by private/ and distinguish “encrypted” from “not encrypted” without + # relying on exact spacing. + local status_out encrypted_count not_encrypted_count + status_out=$(git-crypt status 2>/dev/null || true) + encrypted_count=$(printf '%s\n' "$status_out" | grep 'private/' | grep -cE '^[[:space:]]*encrypted:' || true) + not_encrypted_count=$(printf '%s\n' "$status_out" | grep 'private/' | grep -cE '^not encrypted:' || true) + + if [[ "$encrypted_count" -gt 0 ]]; then + success "Encryption configured: ${encrypted_count} private file(s) under git-crypt." + if [[ "$not_encrypted_count" -gt 0 ]]; then + warn "${not_encrypted_count} file(s) under private/ are NOT covered by the git-crypt filter — check .gitattributes (leak risk)." + fi + elif [[ "$not_encrypted_count" -gt 0 ]]; then + warn "private/ files exist but none are covered by the git-crypt filter — check the .gitattributes filter (leak risk)." else - warn "Encryption check inconclusive. Run 'git-crypt status' manually." + info "No private/ files present yet — nothing to verify." fi - - [[ -f "$key_path" ]] && git-crypt unlock "$key_path" } # --------------------------------------------------------------------------- From 5ad338c5bf8a9ac37b9dd02f527bb9f1c0e7bb93 Mon Sep 17 00:00:00 2001 From: Matteo Cherubini Date: Tue, 9 Jun 2026 19:43:47 +0200 Subject: [PATCH 2/7] docs: Clarify macOS bash requirements for git-crypt rotation --- README.md | 8 ++++++-- lib/git-crypt.sh | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a5c5df6..f698786 100644 --- a/README.md +++ b/README.md @@ -190,8 +190,9 @@ All tools (git-crypt, bw, qmd) have native Linux binaries. All scripts are compatible with macOS. Requirements: - bash 3.2+ (macOS default) — supported for the **setup scripts** (`make` targets, scaffolding). - The `ingest` skill uses bash 4+ constructs (`mapfile`), but it is deployed and run on the - Linux AI node, not on the macOS setup machine — so this is not a constraint in practice. + Two things need bash 4+: the `ingest` skill (`mapfile`), which runs on the Linux AI node (not a + constraint on the macOS setup machine); and `gcrypt_rotate_key` (`compgen -G`), which **does** + run on the laptop. For key rotation on macOS, use Homebrew bash (`brew install bash`). - GNU coreutils not required — BSD variants of `date`, `grep`, `sed` all handled. - `git-crypt`: install via Homebrew — `brew install git-crypt` - `jq`, `curl`: pre-installed or via Homebrew @@ -695,6 +696,9 @@ cd ~/knowledge-genome-orchestrator/genome-dev gcrypt_rotate_key "genome-dev" ``` +> **macOS:** `gcrypt_rotate_key` uses `compgen -G` (bash 4+). The stock macOS bash 3.2 is not +> enough — run rotation under Homebrew bash (`brew install bash`). + `gcrypt_rotate_key` performs: 1. Unlocks repo with existing key diff --git a/lib/git-crypt.sh b/lib/git-crypt.sh index 0a2825a..44d4716 100644 --- a/lib/git-crypt.sh +++ b/lib/git-crypt.sh @@ -118,6 +118,8 @@ gcrypt_rotate_key() { # 5. Re-stage private files so they are committed encrypted with the new key local staged=0 + # compgen -G requires bash 4+ for reliable glob expansion. macOS stock + # bash is 3.2; use Homebrew bash (already recommended in README) for rotation. if compgen -G "raw/private/*" > /dev/null 2>&1; then git add raw/private/ staged=1 From 8fb0890622f3cf451b6f1569c91cc33184acacd3 Mon Sep 17 00:00:00 2001 From: Matteo Cherubini Date: Tue, 9 Jun 2026 19:43:47 +0200 Subject: [PATCH 3/7] feat: Introduce pull-based cross-genome reference mechanism --- README.md | 29 ++++++++++++++++++----------- lib/lint.sh | 13 +++++++++++-- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index f698786..01aadb1 100644 --- a/README.md +++ b/README.md @@ -955,18 +955,25 @@ Pages have a `last_updated` field in frontmatter. During lint passes: The agent proposes re-validation but does not change `maturity` without new source evidence. -### Cross-genome lint +### Cross-genome references -A manual, monthly operation. Not automated in CI/CD — the token cost and coordination -complexity are not justified at this scale. +Cross-domain knowledge moves by **pull, never push**: the genome you are working in draws +material _in_; nothing is ever written into another genome. There are **no cross-genome +wikilinks** — submodule pointers make relative paths brittle. -1. Operator initiates a master-repo agent session -2. Agent uses `qmd search ""` across the multi-genome index to find: - - Concepts defined in 2+ genomes with potentially conflicting definitions - - Entities referenced cross-genome without canonical cross-genome wikilinks - - Concepts in genome-X that should link to genome-Y -3. Agent reports findings — does not modify files -4. For each finding: create conflict note in the genome where resolution belongs +When the working genome needs a concept that lives elsewhere, the **navigation skill** handles +it in the same two-phase shape as ingest: + +1. A deterministic collector clones the relevant genomes **read-only at HEAD** (fresh — never the + pinned submodule state) and assembles a dossier of excerpts with provenance. +2. A semantic pass reads only that dossier; the skill then deposits **one** abstract, non-private + raw into the working genome at `raw/articles/crossgen--.md`. +3. That raw goes through the working genome's normal ingest → PR → human gate, like any source. + +Which genomes may be read as **sources** is gated by a per-genome `cross_source: yes|no` flag: a +confidential genome (e.g. a client file) is marked `no` and is never read as a source — the wall +is structural, not a matter of the agent's discipline. The master `AGENTS.md` holds the full +boundary contract. --- @@ -1025,7 +1032,7 @@ and keep the wiki atomically navigable. ### Linking conventions - **Intra-genome:** `[[folder/file]]` — Obsidian wikilinks only. -- **Cross-genome:** NOT supported via wikilink. Submodule pointers make relative paths brittle. When a concept belongs to another genome, use the navigation skill to emit a raw stub into that genome's `raw/articles/` directory so its local ingest pipeline can process it. +- **Cross-genome:** NOT supported via wikilink — submodule pointers make relative paths brittle. When the working genome needs a concept that lives elsewhere, the navigation skill **pulls it in** as one abstract raw under _this_ genome's `raw/articles/`, which then goes through normal ingest. See [Cross-genome references](#cross-genome-references). - **External:** `[text](https://...)` — standard Markdown. ### Log format diff --git a/lib/lint.sh b/lib/lint.sh index 189105a..7ecc644 100644 --- a/lib/lint.sh +++ b/lib/lint.sh @@ -190,12 +190,21 @@ check_broken_links() { local links links=$(grep -oE '\[\[[^\]]+' "$file" 2>/dev/null | sed 's/^\[\[//' | cut -d'|' -f1) - for link in $links; do + # Cross-genome links (../other-genome/…) are not resolvable from a single + # genome checkout and are skipped — they would always fall + # through the two-level lookup and produce non-actionable warnings. + while IFS= read -r link; do + [[ -z "$link" ]] && continue + + if [[ "$link" == ../* ]]; then + continue + fi + local target="$link" [[ "$target" != *.md ]] && target="${target}.md" if [[ ! -f "${base_dir}/${target}" && ! -f "${base_dir}/../${target}" ]]; then warn "Potential broken link: [[$link]] in $file" fi - done + done <<< "$links" } From 516beca43b78bddd419e8cfe0dd4276994802f73 Mon Sep 17 00:00:00 2001 From: Matteo Cherubini Date: Tue, 9 Jun 2026 19:43:47 +0200 Subject: [PATCH 4/7] refactor: Promote lint warnings to errors for critical issues --- lib/lint.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/lint.sh b/lib/lint.sh index 7ecc644..960eba0 100644 --- a/lib/lint.sh +++ b/lib/lint.sh @@ -23,7 +23,7 @@ lint_markdown_file() { # 1. Check frontmatter delimiters if [[ $(head -n 1 "$file") != "---" ]]; then - warn "Missing frontmatter start (---) in: $file" + error "Missing frontmatter start (---) in: $file" errors=$((errors + 1)) fi @@ -31,14 +31,14 @@ lint_markdown_file() { local mandatory_fields=("title:" "type:" "domain:" "maturity:" "last_updated:") for field in "${mandatory_fields[@]}"; do if ! grep -q "^${field}" "$file"; then - warn "Missing mandatory field '${field}' in: $file" + error "Missing mandatory field '${field}' in: $file" errors=$((errors + 1)) fi done # 3. Check domain matches genome name if grep -q "^domain:" "$file" && ! grep -q "^domain: ${genome_name}" "$file"; then - warn "Domain mismatch in $file (expected '${genome_name}')" + error "Domain mismatch in $file (expected '${genome_name}')" errors=$((errors + 1)) fi @@ -70,8 +70,8 @@ check_valid_type() { done if [[ $valid -eq 0 ]]; then - warn "Invalid type value '${type_value}' in: $file" - warn " Valid types: ${VALID_TYPES[*]}" + error "Invalid type value '${type_value}' in: $file" + error " Valid types: ${VALID_TYPES[*]}" return 1 fi @@ -144,8 +144,8 @@ check_knowledge_decay() { esac if [[ $days_old -gt $threshold ]]; then - warn "STALE: $file" - warn " maturity: ${maturity} | last_updated: ${last_updated} | ${days_old} days ago (threshold: ${threshold})" + error "STALE: $file" + error " maturity: ${maturity} | last_updated: ${last_updated} | ${days_old} days ago (threshold: ${threshold})" return 1 fi From a1d14bd2e1eec52853896a5ca477be0134c14b55 Mon Sep 17 00:00:00 2001 From: Matteo Cherubini Date: Tue, 9 Jun 2026 19:43:47 +0200 Subject: [PATCH 5/7] docs: Add developer notes on code smells and design tradeoffs --- lib/structure.sh | 9 +++++++++ scripts/add-genome.sh | 9 +++++++++ skills/ingest/scripts/run-ingest.sh | 12 ++++++++---- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/lib/structure.sh b/lib/structure.sh index f94bba1..e1d983a 100644 --- a/lib/structure.sh +++ b/lib/structure.sh @@ -9,6 +9,14 @@ # structure check can never drift apart. # ============================================================================= +# NOTE — Return-code smell +# Several functions in this file (and in lint.sh) use the return code as a +# numeric counter (e.g. return $missing). This is a known smell: exit codes +# wrap at 256 and conflate "count of problems" with "exit status". At the +# current scale (<10 problems per run) the wrap-around risk is zero, so we +# accept it pragmatically. If counts ever grow, switch to stdout counters +# or dedicated global variables. + # Canonical directories every genome must have. # raw/* are input buckets (collaborator-writable); wiki/* is the agent-owned, # contract-bound layout the lint, the index sections and the ingest skill depend on. @@ -43,6 +51,7 @@ structure_report() { info "extra (not in canon): ${d}" done < <(find "${base}/raw" "${base}/wiki" -mindepth 1 -type d 2>/dev/null) + # NOTE: return $missing is a smell — see header. Kept for compatibility. return $missing } diff --git a/scripts/add-genome.sh b/scripts/add-genome.sh index 53cab5d..dc4dd6c 100644 --- a/scripts/add-genome.sh +++ b/scripts/add-genome.sh @@ -24,6 +24,15 @@ step "Adding New Genome: ${GENOME_NAME}" # Build a 3-field registry entry (linked_repo may be empty) GENOMES=("${GENOME_NAME}|${GENOME_DESC}|${GENOME_LINKED}") +# NOTE — Maintenance smell +# We source setup-genomes.sh as a library/orchestrator hybrid. This works because: +# - registry.sh is guarded against double-source (idempotent guard) +# - setup-genomes.sh checks WORK_DIR before re-sourcing registry.sh +# - GENOMES is built locally just before the source, so it is not clobbered +# However, sourcing an orchestration script as a library makes the control flow +# harder to trace. If this grows, refactor into a shared function (e.g. setup_one_genome) +# called by both add-genome.sh and setup-genomes.sh. + source "scripts/setup-genomes.sh" success "Genome '${GENOME_NAME}' added and linked successfully!" diff --git a/skills/ingest/scripts/run-ingest.sh b/skills/ingest/scripts/run-ingest.sh index bc68bcf..2ffe95d 100644 --- a/skills/ingest/scripts/run-ingest.sh +++ b/skills/ingest/scripts/run-ingest.sh @@ -59,10 +59,14 @@ all_paths=( "${created_paths[@]}" "${modified_paths[@]}" ) conflict_label="" -# NOTE: no rollback. Steps below mutate the working tree in order (index → log → commit). -# All are idempotent on re-run EXCEPT log-append (append-only). If a step fails midway, -# nothing is committed (open-pr is the only committer) — the operator re-runs, or inspects -# wiki/ if log-append already wrote a line. The manifest is removed only on full success. +# NOTE: No rollback. The steps below modify the working tree in order (index → log → commit). +# All steps are idempotent on re-run EXCEPT log-append (append-only). If a step fails midway, +# nothing is committed (open-pr is the only committer) — the operator re-runs, or checks +# wiki/ if log-append has already written a line. The manifest is removed only upon full success. +# log-append is not idempotent: a re-run after a post-log failure produces +# duplicate lines. This is accepted by design (append-only ledger, no rollback). If this +# becomes a nuisance tomorrow, add a dedup check on run_id in log-append.sh +# (grep for run_id before appending). Manual recovery: grep for run_id in wiki/log.md. # --- 1. index entries (created pages only), inserted in order --- while IFS=$'\t' read -r path summary maturity; do From d7611a79f41a6dd911785f67f60038a450c4402a Mon Sep 17 00:00:00 2001 From: Matteo Cherubini Date: Tue, 9 Jun 2026 19:43:47 +0200 Subject: [PATCH 6/7] feat: Prepare for private content handling in ingest and tests --- skills/ingest/scripts/run-ingest.sh | 1 + tests/helpers.bash | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/skills/ingest/scripts/run-ingest.sh b/skills/ingest/scripts/run-ingest.sh index 2ffe95d..5a9b738 100644 --- a/skills/ingest/scripts/run-ingest.sh +++ b/skills/ingest/scripts/run-ingest.sh @@ -80,6 +80,7 @@ while IFS=$'\t' read -r path summary maturity; do queries) if [[ "$link" == queries/conflict-* ]]; then section="Conflicts"; conflict_label="CONFLICT" else section="Queries"; fi ;; + # private/ is not routed here — ingest is public-only. Add when private ingest is built. *) section="Sources" ;; esac diff --git a/tests/helpers.bash b/tests/helpers.bash index 1b31397..1a25ac1 100644 --- a/tests/helpers.bash +++ b/tests/helpers.bash @@ -54,6 +54,11 @@ private: false ## Conflicts Pending Review (`wiki/queries/conflict-*.md`) *slugs only.* + + +## Private Synthesis (`wiki/private/`) +*Restricted access. Requires PRIVATE_CONTEXT: enabled and unlocked repo.* +*List slug names ONLY. Do not append summaries — prevents metadata leakage.* EOF cat > "${g}/wiki/log.md" <<'EOF' From 909ee15ffb3278ae46acbe16d2c8f1f6037b6638 Mon Sep 17 00:00:00 2001 From: Matteo Cherubini Date: Tue, 9 Jun 2026 19:45:20 +0200 Subject: [PATCH 7/7] Version update --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3d2ea4b..bcde255 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # ============================================================================= -# Knowledge Genome - Makefile v. 1.1.4 +# Knowledge Genome - Makefile v. 1.1.5 # Orchestrates the setup and management of the knowledge base. # =============================================================================