diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..3366cb9 --- /dev/null +++ b/Makefile @@ -0,0 +1,36 @@ +# ============================================================================= +# Knowledge Genome - Makefile v. 0.1.0 +# Orchestrates the setup and management of the knowledge base. +# ============================================================================= + +include config.env +export $(shell sed 's/=.*//' config.env) + +.PHONY: setup add-genome status lint clean help + +help: + @echo "Available commands:" + @echo " make setup - Full system initialization" + @echo " make add-genome - Register and scaffold a new genome" + @echo " make status - Check submodule and encryption status" + @echo " make lint - Verify schema, privacy flags, and metadata" + +lint: + @bash scripts/lint-genomes.sh + +setup: + @bash scripts/setup.sh + +add-genome: + @if [ -z "$(NAME)" ] || [ -z "$(DESC)" ]; then \ + echo "Error: NAME and DESC are required."; \ + echo "Usage: make add-genome NAME=my-genome DESC='My description'"; \ + exit 1; \ + fi + @bash scripts/add-genome.sh "$(NAME)" "$(DESC)" + +status: + @echo "--- Master Status ---" + @git submodule status + @echo "--- Encryption Status (First 10 files) ---" + @git-crypt status | head -n 10 diff --git a/README.md b/README.md index 24c131f..eedcf3f 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,201 @@ -# knowledge-genome-orchestrator +# Knowledge Genome System +> A distributed, modular, and secure personal knowledge base architecture. + +The **Knowledge Genome System** is a framework designed to manage personal knowledge using a "Master-Genome" architecture. It follows the LLM-Wiki patterns (Karpathy-style) while adding a robust security layer for sensitive data and automated quality control. + +--- + +# Architecture + +This project is structured as a **Master Orchestrator** that manages multiple independent **Genomes** via Git Submodules. + +## Core Components + +### Master Repository + +Contains: + +* Orchestration scripts +* Global configuration (`config.env`) +* Security templates + +### Genomes + +Individual specialized repositories (e.g. `genome-dev`, `genome-finance`) that act as standalone units of knowledge. + +### Security Layers + +#### Physical Security + +`git-crypt` encrypts `private/` directories at rest. + +#### Logical Security + +YAML frontmatter (`private: true`) prevents AI agents from leaking sensitive data during public sessions. + +#### Validation Layer + +A custom linting engine ensures metadata consistency. + +--- + +# Quick Start + +## Prerequisites + +Required dependencies: + +* `git` +* `git-crypt` +* `curl` +* `jq` + +Optional: + +* `bw` (Bitwarden CLI) — used for runtime key injection + +--- + +## Initialization + +```bash +# 1. Clone the master repository +git clone && cd master-knowledge-genome + +# 2. Run the full setup +# (checks dependencies, creates master scaffold, +# initializes genomes) +make setup +``` + +# Management Commands + +The system is controlled through a centralized Makefile. + +| Command | Description | +| ----------------- | -------------------------------------------------------------- | +| `make setup` | Full system initialization (Master + Registry Genomes). | +| `make add-genome` | Scaffolds and registers a new genome (requires NAME and DESC). | +| `make lint` | Runs the validation suite across all genomes. | +| `make status` | Checks Git status and encryption state for all submodules. | + +# Validation & Linting (`make lint`) + +The built-in linter ensures that the knowledge base remains machine-readable and secure. + +It automatically validates: + +## Frontmatter Integrity + +Every `.md` file must contain valid YAML headers. + +## Domain Consistency + +Ensures that a file's domain metadata matches its parent genome. + +## Privacy Leak Detection + +Critical validation step. + +Verifies that any file located in a `/private/` directory contains the flag: + +```yaml +private: true +``` + +This prevents accidental exposure during AI sessions. + +## Broken Wiki-Links + +Detects dead `[[internal-links]]`. + +# Security Model + +## Hybrid Privacy Architecture + +Each genome is divided into two layers. + +### Public Layer + +Directories: + +```text +raw/public/ +wiki/public/ +``` + +Characteristics: + +* Plaintext +* Shareable with collaborators + +### Private Layer + +Directories: + +```text +raw/private/ +wiki/private/ +``` + +Characteristics: + +* Encrypted using AES-256 via `git-crypt` + +## Runtime Key Injection + +To keep the AI environment secure, encryption keys are never stored on the VM disk. + +Instead, the system uses Bitwarden (`bw`) / Vaultwarden for runtime injection. + +### Example + +```bash +# Unlock a genome using a key stored in Vaultwarden +git-crypt unlock <( + bw get notes "genome-dev key" \ + --session "$BW_SESSION" | base64 -d +) +``` + +# Genome Schema + +All wiki documents follow a strict schema to support AI ingestion. + +## YAML Frontmatter Schema + +```yaml +--- +title: "Document Title" +type: entity | concept | source | log +domain: genome-name +private: true/false +last_updated: YYYY-MM-DD +--- +``` + +# Agent Interaction + +When starting a session with an AI agent, always declare the privacy context. + +## Public Context + +```text +PRIVATE_CONTEXT: disabled +``` + +Behavior: + +* The agent ignores all private folders. + +## Private Context + +```text +PRIVATE_CONTEXT: enabled +``` + +Behavior: + +* The agent processes encrypted data. +* Requires the repository to be unlocked. diff --git a/config.env b/config.env new file mode 100644 index 0000000..56e176e --- /dev/null +++ b/config.env @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# ============================================================================= +# config.env +# Single Source of Truth for the Knowledge Genome Framework. +# ============================================================================= + +# --- PROVIDER SELECTION --- +PROVIDER="forgejo" # Options: "forgejo", "github" + +# --- FORGEJO CONFIGURATION --- +FORGEJO_URL="https://git.keruhomelab.com" +FORGEJO_USER="keru" +# Note: FORGEJO_TOKEN must be exported in your shell for security. + +# --- VAULTWARDEN CONFIGURATION --- +# Used for rendering template instructions +VAULTWARDEN_URL="https://vault.keruhomelab.com" + +# --- MASTER REPOSITORY --- +MASTER_REPO="master-knowledge-genome" +GIST_URL="https://gist.github.com/442a6bf555914893e9891c11519de94f.git" + +# --- GENOME REGISTRY --- +# Format: "name|description" +GENOMES=( + "genome-dev|Web development, TUI, Angular, software architecture" + "genome-finance|Personal finance, investments, market analysis" + "genome-homelab|Keru infrastructure, network configs, architecture logs" +) + +# --- SYSTEM PATHS --- +WORK_DIR="${HOME}/knowledge-genome-setup" +KEYS_DIR="${WORK_DIR}/keys" + +# Core directory resolution (DO NOT CHANGE) +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TEMPLATES_DIR="${SCRIPT_DIR}/templates" +LIB_DIR="${SCRIPT_DIR}/lib" +PROVIDERS_DIR="${SCRIPT_DIR}/providers" diff --git a/lib/deps.sh b/lib/deps.sh new file mode 100644 index 0000000..078f1af --- /dev/null +++ b/lib/deps.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# ============================================================================= +# lib/deps.sh +# Dependency and environment validation. +# ============================================================================= + +check_deps() { + local missing=() + local required=(git git-crypt curl jq) + + for cmd in "${required[@]}"; do + if ! command -v "$cmd" &>/dev/null; then + missing+=("$cmd") + fi + done + + if [[ ${#missing[@]} -gt 0 ]]; then + error "Missing required tools: ${missing[*]}" + echo -e "\nInstall them using your package manager:" + echo " Debian/Ubuntu: sudo apt install ${missing[*]}" + echo " MacOS: brew install ${missing[*]}" + exit 1 + fi + + success "Environment check passed: all required tools found." + + if ! command -v bw &>/dev/null; then + warn "Optional tool 'bw' (Bitwarden CLI) not found. Vaultwarden integration will be manual." + fi +} + +check_git_identity() { + if [[ -z "$(git config user.name)" || -z "$(git config user.email)" ]]; then + warn "Git identity not set globally. Scripts will attempt to use local config." + fi +} diff --git a/lib/git-crypt.sh b/lib/git-crypt.sh new file mode 100644 index 0000000..3877c0c --- /dev/null +++ b/lib/git-crypt.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +# ============================================================================= +# lib/git-crypt.sh +# git-crypt lifecycle management (init, export, verify). +# ============================================================================= + +gcrypt_init() { + git-crypt init + success "git-crypt initialized in $(pwd)" +} + +gcrypt_export_key() { + local genome_name="$1" + local key_path="${KEYS_DIR}/${genome_name}.key" + + mkdir -p "${KEYS_DIR}" + git-crypt export-key "$key_path" + success "Symmetric key exported to: $key_path" + warn "Action required: Store this key in Vaultwarden and remove it from local disk." +} + +gcrypt_verify() { + local genome_name="$1" + local key_path="${KEYS_DIR}/${genome_name}.key" + + info "Verifying git-crypt status for ${genome_name}..." + git-crypt lock + + # Checking if the private marker is still encrypted (binary check) + if file "raw/private/.gitkeep" 2>/dev/null | grep -q "data"; then + success "Encryption verified: private/ directory is protected." + else + warn "Encryption check inconclusive. Please run 'git-crypt status' manually." + fi + + [[ -f "$key_path" ]] && git-crypt unlock "$key_path" +} + +gcrypt_print_key_instructions() { + local genome_name="$1" + local v_url="${VAULTWARDEN_URL:-https://your-vaultwarden.com}" + + echo -e "\n ── ${BOLD}Key Management: ${genome_name}${NC} ──\n" + echo " 1. Encode the key to base64:" + echo " base64 < ${KEYS_DIR}/${genome_name}.key" + echo "" + echo " 2. Save to Vaultwarden (${v_url}):" + echo " Name: \"${genome_name} key\"" + echo " Note: " + echo "" + echo " 3. For AI Server / Runtime Injection:" + echo " export BW_SESSION=\$(bw unlock --raw)" + echo " git-crypt unlock <(bw get notes \"${genome_name} key\" --session \"\$BW_SESSION\" | base64 -d)" +} + +gcrypt_print_runtime_model() { + echo "" + echo " RUNTIME SECURITY MODEL:" + echo " ─────────────────────────────────────────────────────────────" + echo " On Forgejo (remote):" + echo " raw/private/ and wiki/private/ are opaque AES-256-CTR blobs." + echo " Collaborators without the key see binary in private/," + echo " plaintext everywhere else. Git handles this gracefully." + echo "" + echo " On your laptop:" + echo " Once unlocked, files are transparently decrypted by the git" + echo " smudge filter. Obsidian reads them as normal Markdown." + echo "" + echo " On the AI VM:" + echo " Same as laptop when unlocked. Use runtime injection (step 5" + echo " above) so the key is never written to disk." + echo "" + echo " Limitation:" + echo " Encryption does NOT protect against a full server compromise" + echo " where an attacker has root access to a machine where the repo" + echo " is already unlocked. Runtime injection mitigates this." + echo " ─────────────────────────────────────────────────────────────" + echo "" +} diff --git a/lib/lint.sh b/lib/lint.sh new file mode 100644 index 0000000..367dda0 --- /dev/null +++ b/lib/lint.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +# ============================================================================= +# lib/lint.sh +# Validation logic for Knowledge Genome files. +# ============================================================================= + +# Validates YAML frontmatter and mandatory fields +lint_markdown_file() { + local file="$1" + local genome_name="$2" + local errors=0 + + # 1. Check Frontmatter delimiters + if [[ $(head -n 1 "$file") != "---" ]]; then + warn "Missing frontmatter start (---) in: $file" + errors=$((errors + 1)) + fi + + # 2. Check mandatory fields + local mandatory_fields=("title:" "type:" "domain:") + for field in "${mandatory_fields[@]}"; do + if ! grep -q "^${field}" "$file"; then + warn "Missing mandatory field '${field}' in: $file" + errors=$((errors + 1)) + fi + done + + # 3. Check if domain matches the genome name + if grep -q "^domain:" "$file" && ! grep -q "^domain: ${genome_name}" "$file"; then + warn "Domain mismatch in $file (expected ${genome_name})" + errors=$((errors + 1)) + fi + + return $errors +} + +# Ensures files in private/ directories have the 'private: true' flag +check_privacy_consistency() { + local file="$1" + local errors=0 + + if [[ "$file" == *"/private/"* ]]; then + if ! grep -q "^private: true" "$file"; then + error "Privacy Leak: $file is in a private folder but lacks 'private: true' metadata." + errors=$((errors + 1)) + fi + else + if grep -q "^private: true" "$file"; then + warn "Metadata Mismatch: $file is marked private but located in a public directory." + # We count this as a warning unless you want to force strict isolation + fi + fi + + return $errors +} + +# Basic check for internal wiki-links [[target]] +check_broken_links() { + local file="$1" + local base_dir + base_dir=$(dirname "$file") + + # Extract links, stripping aliases: [[Link|Alias]] -> Link + local links + links=$(grep -oP '\[\[\K[^\]]+' "$file" | cut -d'|' -f1) + + for link in $links; do + local target="$link" + [[ "$target" != *.md ]] && target="${target}.md" + + # Simple relative check + if [[ ! -f "${base_dir}/${target}" && ! -f "${base_dir}/../${target}" ]]; then + # Only a warning as links might point to other genomes or deep structures + warn "Potential broken link: [[$link]] in $file" + fi + done +} diff --git a/lib/output.sh b/lib/output.sh new file mode 100644 index 0000000..1a65c90 --- /dev/null +++ b/lib/output.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# ============================================================================= +# lib/output.sh +# Terminal output helpers: colors, log levels, and step banners. +# ============================================================================= + +if [[ -t 1 ]]; then + GREEN='\033[0;32m' + YELLOW='\033[1;33m' + CYAN='\033[0;36m' + RED='\033[0;31m' + BOLD='\033[1m' + NC='\033[0m' +else + GREEN='' YELLOW='' CYAN='' RED='' BOLD='' NC='' +fi + +info() { echo -e "${CYAN}[INFO]${NC} $*"; } +success() { echo -e "${GREEN}[OK]${NC} $*"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +error() { echo -e "${RED}[ERROR]${NC} $*" >&2; } +step() { echo -e "\n${BOLD}${YELLOW}━━━ $* ━━━${NC}"; } + +box() { + local max_len=0 + for line in "$@"; do + [[ ${#line} -gt $max_len ]] && max_len=${#line} + done + local border + border=$(printf '─%.0s' $(seq 1 $((max_len + 2)))) + echo -e "${CYAN}┌${border}┐${NC}" + for line in "$@"; do + printf "${CYAN}│${NC} %-${max_len}s ${CYAN}│${NC}\n" "$line" + done + echo -e "${CYAN}└${border}┘${NC}" +} diff --git a/lib/scaffold.sh b/lib/scaffold.sh new file mode 100644 index 0000000..0bea0bd --- /dev/null +++ b/lib/scaffold.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +# ============================================================================= +# lib/scaffold.sh +# Directory structure creation and template rendering engine. +# ============================================================================= + +render_template() { + local template_file="$1" + local output_file="$2" + + [[ ! -f "$template_file" ]] && { error "Template not found: ${template_file}"; exit 1; } + + local content + content=$(cat "$template_file") + + # Placeholder replacement + content="${content//\{\{GENOME_NAME\}\}/${GENOME_NAME}}" + content="${content//\{\{GENOME_NAME_UPPER\}\}/${GENOME_NAME^^}}" + content="${content//\{\{GENOME_DESC\}\}/${GENOME_DESC}}" + content="${content//\{\{FORGEJO_URL\}\}/${FORGEJO_URL}}" + content="${content//\{\{FORGEJO_USER\}\}/${FORGEJO_USER}}" + content="${content//\{\{VAULTWARDEN_URL\}\}/${VAULTWARDEN_URL}}" + content="${content//\{\{MASTER_REPO\}\}/${MASTER_REPO}}" + content="${content//\{\{DATE\}\}/$(date +%Y-%m-%d)}" + + mkdir -p "$(dirname "$output_file")" + printf '%s\n' "$content" > "$output_file" +} + +scaffold_genome() { + local base="$1" + local dirs=( + "raw/articles" "raw/transcripts" "raw/code-packs" "raw/assets" "raw/private" + "wiki/sources" "wiki/entities" "wiki/concepts" "wiki/queries" "wiki/private" + ) + + info "Building directory structure in ${base}..." + for dir in "${dirs[@]}"; do + mkdir -p "${base}/${dir}" + touch "${base}/${dir}/.gitkeep" + done + + # Core templates + render_template "${TEMPLATES_DIR}/gitattributes" "${base}/.gitattributes" + render_template "${TEMPLATES_DIR}/gitignore" "${base}/.gitignore" + render_template "${TEMPLATES_DIR}/agents-genome.md" "${base}/AGENTS.md" + render_template "${TEMPLATES_DIR}/wiki-index.md" "${base}/wiki/index.md" + render_template "${TEMPLATES_DIR}/wiki-log.md" "${base}/wiki/log.md" + + success "Scaffold completed for genome: ${GENOME_NAME}" +} + +install_precommit_hook() { + local repo_path="$1" + local hook_path="${repo_path}/.git/hooks/pre-commit" + + cp "${TEMPLATES_DIR}/pre-commit.sh" "$hook_path" + chmod +x "$hook_path" + success "Pre-commit security hook installed at: $hook_path" +} + +scaffold_master() { + local base="$1" + render_template "${TEMPLATES_DIR}/agents-master.md" "${base}/AGENTS.md" + render_template "${TEMPLATES_DIR}/readme-master.md" "${base}/README.md" + success "Master repository scaffold completed." +} diff --git a/providers/forgejo.sh b/providers/forgejo.sh new file mode 100644 index 0000000..3ed45be --- /dev/null +++ b/providers/forgejo.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# ============================================================================= +# providers/forgejo.sh +# Forgejo REST API provider implementation. +# ============================================================================= + +provider_name() { + echo "Forgejo (${FORGEJO_URL})" +} + +# --------------------------------------------------------------------------- +# provider_create_repo +# --------------------------------------------------------------------------- +provider_create_repo() { + local name="$1" + local desc="$2" + local private="$3" + + local http_code + http_code=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: token ${FORGEJO_TOKEN}" \ + -H "Content-Type: application/json" \ + -X POST "${FORGEJO_URL}/api/v1/user/repos" \ + -d "{ + \"name\": \"${name}\", + \"description\": \"${desc}\", + \"private\": ${private}, + \"auto_init\": false + }") + + case "$http_code" in + 201) success "Repository '${name}' created successfully." ;; + 409) info "Repository '${name}' already exists - skipping." ;; + 401) error "Unauthorized. Check your FORGEJO_TOKEN."; exit 1 ;; + *) error "Forgejo API returned HTTP ${http_code}. Check connectivity."; exit 1 ;; + esac +} + +provider_clone_url() { + echo "${FORGEJO_URL}/${FORGEJO_USER}/${1}.git" +} + +provider_ssh_url() { + local host + # Extract hostname by removing protocol and trailing slashes + host=$(echo "${FORGEJO_URL}" | sed -e 's|^[^/]*//||' -e 's|/*$||') + # Using port 222 as default for many homelab Forgejo/Gitea setups + echo "ssh://git@${host}:222/${FORGEJO_USER}/${1}.git" +} + +provider_web_url() { + echo "${FORGEJO_URL}/${FORGEJO_USER}/${1}" +} diff --git a/providers/github.sh b/providers/github.sh new file mode 100644 index 0000000..14193ae --- /dev/null +++ b/providers/github.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# ============================================================================= +# providers/github.sh +# GitHub REST API provider implementation. +# ============================================================================= + +provider_name() { + echo "GitHub (https://github.com)" +} + +_github_namespace() { + echo "${GITHUB_ORG:-$GITHUB_USER}" +} + +provider_create_repo() { + local name="$1" + local desc="$2" + local private="$3" + local namespace + namespace=$(_github_namespace) + + local endpoint="https://api.github.com/user/repos" + [[ -n "${GITHUB_ORG:-}" ]] && endpoint="https://api.github.com/orgs/${namespace}/repos" + + local http_code + http_code=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: token ${GITHUB_TOKEN}" \ + -H "Accept: application/vnd.github.v3+json" \ + -X POST "$endpoint" \ + -d "{ + \"name\": \"${name}\", + \"description\": \"${desc}\", + \"private\": ${private}, + \"auto_init\": false + }") + + case "$http_code" in + 201) success "Repository '${name}' created on GitHub." ;; + 422) info "Repository '${name}' already exists - skipping." ;; + *) error "GitHub API returned HTTP ${http_code}. check token/permissions."; exit 1 ;; + esac +} + +provider_ssh_url() { + echo "git@github.com:$(_github_namespace)/${1}.git" +} + +provider_clone_url() { + echo "https://github.com/$(_github_namespace)/${1}.git" +} + +provider_web_url() { + echo "https://github.com/$(_github_namespace)/${1}" +} diff --git a/scripts/add-genome.sh b/scripts/add-genome.sh new file mode 100644 index 0000000..09e0aa1 --- /dev/null +++ b/scripts/add-genome.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# ============================================================================= +# scripts/add-genome.sh +# Helper to add a single new genome to the existing infrastructure. +# Usage: make add-genome NAME=my-new-genome DESC="Description here" +# ============================================================================= + +set -euo pipefail +source "lib/output.sh" +source "config.env" + +GENOME_NAME="${1:-}" +GENOME_DESC="${2:-}" + +if [[ -z "$GENOME_NAME" || -z "$GENOME_DESC" ]]; then + error "Missing arguments." + echo "Usage: $0 " + exit 1 +fi + +step "Adding New Genome: ${GENOME_NAME}" + +# Overwrite the GENOMES array for this session to process only the new one +export GENOMES=("${GENOME_NAME}|${GENOME_DESC}") + +# Trigger the standard genome setup logic +bash "scripts/setup-genomes.sh" + +success "Genome '${GENOME_NAME}' added and linked successfully!" diff --git a/scripts/lint-genomes.sh b/scripts/lint-genomes.sh new file mode 100644 index 0000000..1aad8f4 --- /dev/null +++ b/scripts/lint-genomes.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# ============================================================================= +# scripts/lint-genomes.sh +# Executes quality control across all registered genomes. +# ============================================================================= + +set -euo pipefail +source "lib/output.sh" +source "config.env" +source "lib/lint.sh" + +step "Starting Knowledge Genome Linting" + +TOTAL_ERRORS=0 + +# Iterate through genome submodules inside the Master repo +for genome_dir in "${WORK_DIR}/${MASTER_REPO}"/genome-*/; do + [[ -d "$genome_dir" ]] || continue + + GENOME_NAME=$(basename "$genome_dir") + info "Auditing genome: ${GENOME_NAME}..." + + # Find all .md files, excluding AGENTS.md and external references + while IFS= read -r md_file; do + # Run validations + lint_markdown_file "$md_file" "$GENOME_NAME" || TOTAL_ERRORS=$((TOTAL_ERRORS + $?)) + check_privacy_consistency "$md_file" || TOTAL_ERRORS=$((TOTAL_ERRORS + $?)) + check_broken_links "$md_file" + + done < <(find "$genome_dir" -name "*.md" ! -name "AGENTS.md" ! -path "*/core-karpathy/*") +done + +if [[ $TOTAL_ERRORS -eq 0 ]]; then + success "Linting passed: All files are consistent and secure." +else + error "Linting failed: Found ${TOTAL_ERRORS} critical issues." + exit 1 +fi diff --git a/scripts/setup-genomes.sh b/scripts/setup-genomes.sh new file mode 100644 index 0000000..4de8ed8 --- /dev/null +++ b/scripts/setup-genomes.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +# ============================================================================= +# scripts/setup-genomes.sh +# Iterates through the GENOMES registry to provision remote and local repos. +# Handles git-crypt initialization and submodule linking. +# ============================================================================= + +set -euo pipefail +source "lib/output.sh" +source "config.env" +source "lib/scaffold.sh" +source "lib/git-crypt.sh" +source "providers/${PROVIDER}.sh" + +step "Processing Genome Registry" + +for entry in "${GENOMES[@]}"; do + # Parse name and description from the array + IFS='|' read -r GENOME_NAME GENOME_DESC <<< "$entry" + export GENOME_NAME GENOME_DESC + + info "Processing: ${GENOME_NAME}..." + + # 1. Remote Provisioning (Idempotent: skips if exists) + provider_create_repo "${GENOME_NAME}" "${GENOME_DESC}" "true" + + GENOME_PATH="${WORK_DIR}/${MASTER_REPO}/${GENOME_NAME}" + + if [ ! -d "${GENOME_PATH}" ]; then + info "Creating local directory and initializing scaffold..." + mkdir -p "${GENOME_PATH}" + cd "${GENOME_PATH}" + git init + + # IMPORTANT: Initialize git-crypt BEFORE creating sensitive files + gcrypt_init + + # Create directory structure and apply templates + scaffold_genome "." + install_precommit_hook "." + + # Initial commit and push to remote + git add . + git commit -m "feat: initial scaffold for ${GENOME_NAME}" + + SSH_URL=$(provider_ssh_url "${GENOME_NAME}") + git remote add origin "${SSH_URL}" + git push -u origin main + + # Export the AES key for the user to back up + gcrypt_export_key "${GENOME_NAME}" + + # 2. Link as submodule in the Master repository + cd "${WORK_DIR}/${MASTER_REPO}" + info "Linking ${GENOME_NAME} as a submodule..." + git submodule add "${SSH_URL}" "${GENOME_NAME}" + git add .gitmodules "${GENOME_NAME}" + git commit -m "feat: link submodule ${GENOME_NAME}" + else + warn "Genome directory '${GENOME_NAME}' already exists. Skipping local setup." + fi +done diff --git a/scripts/setup-master.sh b/scripts/setup-master.sh new file mode 100644 index 0000000..7d9bf6f --- /dev/null +++ b/scripts/setup-master.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# ============================================================================= +# scripts/setup-master.sh +# Initializes the umbrella (Master) repository and core configurations. +# ============================================================================= + +set -euo pipefail +source "lib/output.sh" +source "config.env" +source "lib/scaffold.sh" + +step "Configuring Master Repository: ${MASTER_REPO}" + +# Ensure workspace exists +mkdir -p "${WORK_DIR}/${MASTER_REPO}" +cd "${WORK_DIR}/${MASTER_REPO}" + +if [ ! -d ".git" ]; then + info "Initializing Git in Master repository..." + git init + + # Optional: Add Karpathy's core reference as a read-only submodule + if [ -n "${GIST_URL:-}" ]; then + info "Adding core-karpathy as an external reference..." + git submodule add "${GIST_URL}" core-karpathy || warn "Could not add core-karpathy submodule." + fi +fi + +# Apply master-level templates (README, AGENTS) +scaffold_master "." + +# Initial commit for the master structure +git add . +git commit -m "chore: initialize master scaffold" || info "No changes to commit in master." diff --git a/scripts/setup.sh b/scripts/setup.sh new file mode 100644 index 0000000..eb2e5b7 --- /dev/null +++ b/scripts/setup.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# ============================================================================= +# scripts/setup.sh +# Main entry point for the Knowledge Genome framework initialization. +# Orchestrates dependency checks, master repo setup, and genome provisioning. +# ============================================================================= + +set -euo pipefail + +# Resolve script directory and source core components +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +source "${SCRIPT_DIR}/lib/output.sh" +source "${SCRIPT_DIR}/config.env" +source "${SCRIPT_DIR}/lib/deps.sh" + +step "Starting Knowledge Genome Setup" + +# 1. Environment and Dependency Validation +info "Validating system dependencies..." +check_deps +check_git_identity + +# 2. Master Repository Provisioning +info "Initializing Master Repository..." +bash "${SCRIPT_DIR}/scripts/setup-master.sh" + +# 3. Genomes Provisioning (as defined in config.env) +info "Provisioning registered Genomes..." +bash "${SCRIPT_DIR}/scripts/setup-genomes.sh" + +success "Setup process completed successfully!" + +box "Next Steps:" \ + "1. Navigate to master: cd ${WORK_DIR}/${MASTER_REPO}" \ + "2. Secure your keys: Upload contents of ${KEYS_DIR} to Vaultwarden" \ + "3. Start building: Add files to raw/articles/ and run your agents" diff --git a/templates/agents-genome.md b/templates/agents-genome.md new file mode 100644 index 0000000..757dbd5 --- /dev/null +++ b/templates/agents-genome.md @@ -0,0 +1,58 @@ +# SYSTEM DIRECTIVE: Agent Schema `{{GENOME_NAME}}` + +**[ROLE]** + +You are the specialized AI maintainer for the `{{GENOME_NAME}}` genome. Read this schema before executing any file operations. + +## 1. Genome Identity + +- **Name:** `{{GENOME_NAME}}` +- **Domain Scope:** `{{GENOME_DESC}}` +- **Owner:** `{{FORGEJO_USER}}` + +## 2. Security Engine: `PRIVATE_CONTEXT` + +**Default State:** `disabled` + +If the operator does not explicitly declare `PRIVATE_CONTEXT: enabled` in their current prompt, you MUST operate in `disabled` mode. + +### Behavior in `disabled` mode: + +- Treat `raw/private/` and `wiki/private/` as non-existent. +- Do not execute `cat`, `ls`, or `grep` on private paths. +- Refuse operator requests to summarize personal data. + +### Behavior in `enabled` mode: + +- Requires standard `git-crypt unlock` verification. +- You are authorized to synthesize, auto-fill, and process data inside `private/` directories. +- Outputs must be confined to `wiki/private/`. DO NOT leak private synthesis into public `wiki/concepts/`. + +## 3. Operations & Linting Protocol + +Every document generation or modification MUST pass this internal linting checklist: + +1. **Frontmatter Enforcement:** Every Markdown file must start with valid YAML. + + ```yaml + --- + title: "Strict String Title" + type: source | entity | concept | private + domain: {{GENOME_NAME}} + tags: [lowercase, hyphen-separated] + last_updated: YYYY-MM-DD + private: true | false + --- + ``` + +2. **Atomic Linking:** If you create `wiki/concepts/new-idea.md`, you MUST instantly add: + + ```text + * [[concepts/new-idea]] - + ``` + + to `wiki/index.md` under the appropriate heading, sorted alphabetically. + +3. **Bi-directional Integrity:** Use Obsidian-style links `[[folder/file]]`. Do not use standard Markdown links `[text](url)` for internal references. + +4. **Log the Action:** Append exactly ONE line to `wiki/log.md` detailing the operation. diff --git a/templates/agents-master.md b/templates/agents-master.md new file mode 100644 index 0000000..7bf35f3 --- /dev/null +++ b/templates/agents-master.md @@ -0,0 +1,40 @@ +# SYSTEM DIRECTIVE: Global Schema `{{MASTER_REPO}}` + +**[ROLE]** You are the Orchestrator AI for the Knowledge Genome network. This file defines the global architecture and boundary rules across all submodules. + +## 1. Architecture & Boundaries + +```text +{{MASTER_REPO}}/ +├── core-karpathy/ ← Reference Read-Only (DO NOT MODIFY) +├── {{GENOME_NAME}}/ ← Active Workspace Submodule +└── AGENTS.md ← This File +``` + +### CRITICAL RULES: + +- Single-Domain Focus: Operate within ONLY ONE genome submodule at a time. Do not attempt atomic commits across multiple genomes. + +- Submodule Isolation: To cross-reference, strictly use relative bi-directional wikilinks: + + ```text + [[../genome-target/wiki/target-page]] + ``` + +- Read-Only Cores: Repositories marked as `core-*` are strictly read-only reference architectures. + +## 2. Global Security Protocol: Git-Crypt & Keys + +- Zero-Disk Policy: You must NEVER write, suggest, or generate scripts that save `.key` files to the disk. + +- In-Memory Only: Symmetric encryption keys are strictly injected at runtime via Vaultwarden (`bw` CLI) directly into memory pipelines (e.g., `<(bw get notes ...)`). + +- Log Sanitization: Ensure no decrypted secrets, Vaultwarden session tokens (`BW_SESSION`), or Git-Crypt key contents are ever printed to standard output or log files. + +## 3. Submodule Initialization State + +To synchronize the workspace, the operational command is strictly: + +```bash +git submodule update --init --recursive +``` diff --git a/templates/gitattributes b/templates/gitattributes new file mode 100644 index 0000000..226c36e --- /dev/null +++ b/templates/gitattributes @@ -0,0 +1,17 @@ +# --- Encryption Rules for Genomes --- +# These directories are stored as encrypted AES-256 blobs on the remote server. +# They require git-crypt and the specific genome key to be readable. + +raw/private/** filter=git-crypt diff=git-crypt +wiki/private/** filter=git-crypt diff=git-crypt + +# --- Binary Integrity --- +# Prevent line-ending conversion for encrypted files to avoid corruption. +raw/private/** -text +wiki/private/** -text + +# --- Standard Text Configuration --- +*.md text eol=lf +*.sh text eol=lf +*.env text eol=lf +Makefile text eol=lf diff --git a/templates/gitignore b/templates/gitignore new file mode 100644 index 0000000..6605615 --- /dev/null +++ b/templates/gitignore @@ -0,0 +1,20 @@ +# --- Security Keys --- +# NEVER commit .key files to the repository +*.key +.resource_key + +# --- Operating System Files --- +.DS_Store +Thumbs.db +.directory +*.swp + +# --- Obsidian & Editor Configs --- +.obsidian/ +.vscode/ +.idea/ + +# --- Temporary Files & Local Logs --- +*.log +.tmp/ +node_modules/ diff --git a/templates/pre-commit.sh b/templates/pre-commit.sh new file mode 100644 index 0000000..1985fc6 --- /dev/null +++ b/templates/pre-commit.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# ============================================================================= +# .git/hooks/pre-commit +# Fail-safe security hook: Prevents plaintext leaks of sensitive data. +# ============================================================================= + +set -euo pipefail + +# Directories that MUST be encrypted +PRIVATE_PATTERNS=("raw/private/" "wiki/private/") +FAILED=0 + +# Get staged files (excluding deletions) +STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACM 2>/dev/null || true) + +if [[ -z "$STAGED_FILES" ]]; then + exit 0 +fi + +for pattern in "${PRIVATE_PATTERNS[@]}"; do + while IFS= read -r file; do + if [[ "$file" == ${pattern}* ]]; then + # Check encryption status via git-crypt + STATUS=$(git-crypt status "$file" 2>/dev/null || echo "error") + if echo "$STATUS" | grep -q "not encrypted"; then + echo -e "\n\033[0;31m[SECURITY ALERT] PLAINTEXT LEAK DETECTED\033[0m" + echo "-----------------------------------------------------------" + echo "File: $file" + echo "Status: This file is in a private/ folder but is NOT encrypted." + echo "Action: Fix your .gitattributes or run 'git-crypt init'." + echo "-----------------------------------------------------------" + FAILED=1 + fi + fi + done <<< "$STAGED_FILES" +done + +if [[ "$FAILED" -ne 0 ]]; then + echo -e "\033[0;31mCommit blocked for security reasons.\033[0m\n" + exit 1 +fi + +exit 0 diff --git a/templates/wiki-index.md b/templates/wiki-index.md new file mode 100644 index 0000000..39c43f6 --- /dev/null +++ b/templates/wiki-index.md @@ -0,0 +1,29 @@ +--- +title: "Index — {{GENOME_NAME}}" +type: index +domain: {{GENOME_NAME}} +last_updated: {{DATE}} +private: false +--- + +# Master Index: {{GENOME_NAME}} + +**[AGENT INSTRUCTION]** +Maintain strict alphabetical sorting within each section. Ensure the `last_updated` YAML field is modified upon every edit. Use only `[[slug]] - Summary` format. + +--- + +## Sources (`wiki/sources/`) +*Ingested raw materials.* + + +## Entities (`wiki/entities/`) +*People, organizations, tools.* + + +## Concepts (`wiki/concepts/`) +*Theories, methodologies, architecture.* + + +## Private Synthesis (`wiki/private/`) +*Restricted access. Use slug names ONLY. Do not append summaries to prevent metadata leakage.* diff --git a/templates/wiki-log.md b/templates/wiki-log.md new file mode 100644 index 0000000..96e6008 --- /dev/null +++ b/templates/wiki-log.md @@ -0,0 +1,24 @@ +--- +title: "Operations Log — {{GENOME_NAME}}" +type: log +domain: {{GENOME_NAME}} +private: false +--- + +# Operations Log: {{GENOME_NAME}} + +**[AGENT INSTRUCTION]** + +This is an append-only system ledger. Do not edit previous lines. + +Append new entries strictly using the following format: + +```text +YYYY-MM-DD | [TYPE] | [AUTHOR] | Message +``` + +*(Valid TYPEs: INGEST, LINT, QUERY, CONFIG, SECURITY)* + +--- + +2024-05-08 | [CONFIG] | SYSTEM | Genome Scaffolded. Directory structure and encryption layer initialized.