diff --git a/lib/scaffold.sh b/lib/scaffold.sh index fbefb6c..5add2ed 100644 --- a/lib/scaffold.sh +++ b/lib/scaffold.sh @@ -4,6 +4,9 @@ # Directory structure creation and template rendering engine. # ============================================================================= +# Canonical directory layout lives in one place (lib/structure.sh). +source "$(dirname "${BASH_SOURCE[0]}")/structure.sh" + render_template() { local template_file="$1" local output_file="$2" @@ -32,13 +35,9 @@ render_template() { scaffold_genome() { local base="$1" - local dirs=( - "raw/articles" "raw/transcripts" "raw/code-packs" "raw/assets" "raw/private" - "wiki/sources" "wiki/entities" "wiki/concepts" "wiki/queries" "wiki/private" - ) info "Building directory structure in ${base}..." - for dir in "${dirs[@]}"; do + for dir in "${GENOME_DIRS[@]}"; do mkdir -p "${base}/${dir}" touch "${base}/${dir}/.gitkeep" done diff --git a/lib/structure.sh b/lib/structure.sh new file mode 100644 index 0000000..f94bba1 --- /dev/null +++ b/lib/structure.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +# ============================================================================= +# lib/structure.sh +# Single source of truth for the canonical genome directory layout, plus the +# verify/sync helpers used by scripts/verify-genomes.sh. +# +# IMPORTANT: this is the ONE place the structure is defined. scaffold.sh sources +# this file and builds new genomes from GENOME_DIRS, so scaffolding and the +# structure check can never drift apart. +# ============================================================================= + +# Canonical directories every genome must have. +# raw/* are input buckets (collaborator-writable); wiki/* is the agent-owned, +# contract-bound layout the lint, the index sections and the ingest skill depend on. +GENOME_DIRS=( + "raw/articles" "raw/transcripts" "raw/code-packs" "raw/assets" "raw/private" + "wiki/sources" "wiki/entities" "wiki/concepts" "wiki/queries" "wiki/private" +) + +# --------------------------------------------------------------------------- +# structure_report +# Reports drift of against GENOME_DIRS. +# - missing canonical dir → counted as drift (returns non-zero) +# - extra dir under raw/ or wiki/ → warning only (does not fail) +# Returns the number of MISSING canonical directories. +# --------------------------------------------------------------------------- +structure_report() { + local base="$1" + local missing=0 + + for d in "${GENOME_DIRS[@]}"; do + if [[ ! -d "${base}/${d}" ]]; then + warn "missing: ${d}" + missing=$((missing + 1)) + fi + done + + # Extra directories (drift the other way) — informational only. + local canon=" ${GENOME_DIRS[*]} " + while IFS= read -r d; do + d="${d#"${base}/"}" + [[ "$canon" == *" ${d} "* ]] && continue + info "extra (not in canon): ${d}" + done < <(find "${base}/raw" "${base}/wiki" -mindepth 1 -type d 2>/dev/null) + + return $missing +} + +# --------------------------------------------------------------------------- +# structure_sync +# Creates any MISSING canonical directories (idempotent). Never deletes — +# retiring a bucket is a deliberate, contract-aware change to GENOME_DIRS + +# the templates, not an automatic prune. +# --------------------------------------------------------------------------- +structure_sync() { + local base="$1" + local added=0 + + for d in "${GENOME_DIRS[@]}"; do + if [[ ! -d "${base}/${d}" ]]; then + mkdir -p "${base}/${d}" + touch "${base}/${d}/.gitkeep" + success "created: ${d}" + added=$((added + 1)) + fi + done + + [[ $added -eq 0 ]] && info "already in sync: ${base}" + return 0 +} diff --git a/scripts/verify-genomes.sh b/scripts/verify-genomes.sh new file mode 100644 index 0000000..85a4a62 --- /dev/null +++ b/scripts/verify-genomes.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# ============================================================================= +# scripts/verify-genomes.sh +# Check (default) or --sync the directory structure of every registered genome +# against the canonical layout in lib/structure.sh. +# +# bash scripts/verify-genomes.sh # report drift, non-zero exit on drift +# bash scripts/verify-genomes.sh --sync # create missing dirs everywhere (safe) +# +# No hardware/LLM involved — pure structure check. Run anywhere. +# ============================================================================= +set -euo pipefail +source "lib/output.sh" +source "globals.env" +source "registry.sh" +source "lib/structure.sh" + +MODE="verify" +[[ "${1:-}" == "--sync" ]] && MODE="sync" + +step "Genome structure: ${MODE}" + +TOTAL_MISSING=0 +for entry in "${GENOMES[@]}"; do + IFS='|' read -r GENOME_NAME _ _ <<< "$entry" # 3-field registry; ignore desc + linked + genome_dir="${WORK_DIR}/${MASTER_REPO}/${GENOME_NAME}" + + if [[ ! -d "$genome_dir" ]]; then + warn "not found locally, skipping: ${GENOME_NAME}" + continue + fi + + info "Genome: ${GENOME_NAME}" + if [[ "$MODE" == "sync" ]]; then + structure_sync "$genome_dir" + else + structure_report "$genome_dir" && m=0 || m=$? + TOTAL_MISSING=$((TOTAL_MISSING + m)) + fi +done + +echo "" +if [[ "$MODE" == "sync" ]]; then + success "Structure sync complete." +elif [[ $TOTAL_MISSING -eq 0 ]]; then + success "Structure verified: all genomes match the canonical layout." +else + error "Structure drift: ${TOTAL_MISSING} missing directory(ies). Fix with: make sync-structure" + exit 1 +fi