feat: centralize genome directory structure definition

This commit is contained in:
Matteo Cherubini 2026-06-03 12:28:18 +02:00
parent 528e9c6c48
commit ea9283637b
3 changed files with 124 additions and 5 deletions

View file

@ -4,6 +4,9 @@
# Directory structure creation and template rendering engine.
# =============================================================================
# Canonical directory layout lives in one place (lib/structure.sh).
source "$(dirname "${BASH_SOURCE[0]}")/structure.sh"
render_template() {
local template_file="$1"
local output_file="$2"
@ -32,13 +35,9 @@ render_template() {
scaffold_genome() {
local base="$1"
local dirs=(
"raw/articles" "raw/transcripts" "raw/code-packs" "raw/assets" "raw/private"
"wiki/sources" "wiki/entities" "wiki/concepts" "wiki/queries" "wiki/private"
)
info "Building directory structure in ${base}..."
for dir in "${dirs[@]}"; do
for dir in "${GENOME_DIRS[@]}"; do
mkdir -p "${base}/${dir}"
touch "${base}/${dir}/.gitkeep"
done

70
lib/structure.sh Normal file
View file

@ -0,0 +1,70 @@
#!/usr/bin/env bash
# =============================================================================
# lib/structure.sh
# Single source of truth for the canonical genome directory layout, plus the
# verify/sync helpers used by scripts/verify-genomes.sh.
#
# IMPORTANT: this is the ONE place the structure is defined. scaffold.sh sources
# this file and builds new genomes from GENOME_DIRS, so scaffolding and the
# structure check can never drift apart.
# =============================================================================
# Canonical directories every genome must have.
# raw/* are input buckets (collaborator-writable); wiki/* is the agent-owned,
# contract-bound layout the lint, the index sections and the ingest skill depend on.
GENOME_DIRS=(
"raw/articles" "raw/transcripts" "raw/code-packs" "raw/assets" "raw/private"
"wiki/sources" "wiki/entities" "wiki/concepts" "wiki/queries" "wiki/private"
)
# ---------------------------------------------------------------------------
# structure_report <base>
# Reports drift of <base> against GENOME_DIRS.
# - missing canonical dir → counted as drift (returns non-zero)
# - extra dir under raw/ or wiki/ → warning only (does not fail)
# Returns the number of MISSING canonical directories.
# ---------------------------------------------------------------------------
structure_report() {
local base="$1"
local missing=0
for d in "${GENOME_DIRS[@]}"; do
if [[ ! -d "${base}/${d}" ]]; then
warn "missing: ${d}"
missing=$((missing + 1))
fi
done
# Extra directories (drift the other way) — informational only.
local canon=" ${GENOME_DIRS[*]} "
while IFS= read -r d; do
d="${d#"${base}/"}"
[[ "$canon" == *" ${d} "* ]] && continue
info "extra (not in canon): ${d}"
done < <(find "${base}/raw" "${base}/wiki" -mindepth 1 -type d 2>/dev/null)
return $missing
}
# ---------------------------------------------------------------------------
# structure_sync <base>
# Creates any MISSING canonical directories (idempotent). Never deletes —
# retiring a bucket is a deliberate, contract-aware change to GENOME_DIRS +
# the templates, not an automatic prune.
# ---------------------------------------------------------------------------
structure_sync() {
local base="$1"
local added=0
for d in "${GENOME_DIRS[@]}"; do
if [[ ! -d "${base}/${d}" ]]; then
mkdir -p "${base}/${d}"
touch "${base}/${d}/.gitkeep"
success "created: ${d}"
added=$((added + 1))
fi
done
[[ $added -eq 0 ]] && info "already in sync: ${base}"
return 0
}

50
scripts/verify-genomes.sh Normal file
View file

@ -0,0 +1,50 @@
#!/usr/bin/env bash
# =============================================================================
# scripts/verify-genomes.sh
# Check (default) or --sync the directory structure of every registered genome
# against the canonical layout in lib/structure.sh.
#
# bash scripts/verify-genomes.sh # report drift, non-zero exit on drift
# bash scripts/verify-genomes.sh --sync # create missing dirs everywhere (safe)
#
# No hardware/LLM involved — pure structure check. Run anywhere.
# =============================================================================
set -euo pipefail
source "lib/output.sh"
source "globals.env"
source "registry.sh"
source "lib/structure.sh"
MODE="verify"
[[ "${1:-}" == "--sync" ]] && MODE="sync"
step "Genome structure: ${MODE}"
TOTAL_MISSING=0
for entry in "${GENOMES[@]}"; do
IFS='|' read -r GENOME_NAME _ _ <<< "$entry" # 3-field registry; ignore desc + linked
genome_dir="${WORK_DIR}/${MASTER_REPO}/${GENOME_NAME}"
if [[ ! -d "$genome_dir" ]]; then
warn "not found locally, skipping: ${GENOME_NAME}"
continue
fi
info "Genome: ${GENOME_NAME}"
if [[ "$MODE" == "sync" ]]; then
structure_sync "$genome_dir"
else
structure_report "$genome_dir" && m=0 || m=$?
TOTAL_MISSING=$((TOTAL_MISSING + m))
fi
done
echo ""
if [[ "$MODE" == "sync" ]]; then
success "Structure sync complete."
elif [[ $TOTAL_MISSING -eq 0 ]]; then
success "Structure verified: all genomes match the canonical layout."
else
error "Structure drift: ${TOTAL_MISSING} missing directory(ies). Fix with: make sync-structure"
exit 1
fi