diff --git a/skills/ingest/scripts/ingest-semantic.py b/skills/ingest/scripts/ingest-semantic.py index 09991e0..fec6582 100755 --- a/skills/ingest/scripts/ingest-semantic.py +++ b/skills/ingest/scripts/ingest-semantic.py @@ -20,7 +20,7 @@ # # Emits a single JSON status line on stdout (for n8n / logs). # ============================================================================= -import json, os, re, sys, datetime, urllib.request, urllib.error +import json, os, hashlib, subprocess, re, sys, datetime, urllib.request, urllib.error # --- config (override via env; these live in ~/.config/knowledge-genome.env) --- OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/chat") @@ -257,7 +257,16 @@ def call_model(): # --- run the semantic pass --- sem = call_model() -source_slug = slugify(os.path.splitext(os.path.basename(raw_rel))[0]) + +# Source of truth: slug from slug.sh --raw (deterministic, path-aware, collision-proof) +source_slug = subprocess.check_output( + ["bash", os.path.join(os.path.dirname(__file__), "slug.sh"), "--raw", raw_rel], + text=True +).strip() + +with open(raw_rel, "rb") as f: + src_sha = hashlib.sha256(f.read()).hexdigest() + pages = [] # 1. source page — canonical summary of THIS source (re)written @@ -273,7 +282,10 @@ src_tags = ([slugify(e.get("name", "")) for e in sem.get("entities", [])] + [slugify(c.get("name", "")) for c in sem.get("concepts", [])])[:8] os.makedirs("wiki/sources", exist_ok=True) with open(src_path, "w", encoding="utf-8") as f: - f.write(frontmatter("source", src_title, src_tags)) + fm = frontmatter("source", src_title, src_tags) + # Inject tracking fields before the closing '---' (first newline-dash-dash-dash-newline) + fm = fm.replace("\n---\n", f"\nsource_path: {raw_rel}\nsource_sha256: {src_sha}\n---\n", 1) + f.write(fm) f.write(f"\n# {src_title}\n\n{src_body}\n") pages.append({"path": src_path, "summary": twords(src_title), diff --git a/skills/ingest/scripts/run-ingest.sh b/skills/ingest/scripts/run-ingest.sh index cb998cd..07197b2 100755 --- a/skills/ingest/scripts/run-ingest.sh +++ b/skills/ingest/scripts/run-ingest.sh @@ -53,7 +53,7 @@ contradictions="$(jq -r '.contradictions // "None"' "$manifest")" [[ -n "$raw_source" && "$raw_source" != "null" ]] || fail "manifest" "raw_source missing" -slug="$(bash "${SCRIPTS}/slug.sh" "$raw_source")" || fail "slug" "empty or invalid slug for ${raw_source}" +slug="$(bash "${SCRIPTS}/slug.sh" --raw "$raw_source")" || fail "slug" "empty or invalid slug for ${raw_source}" # --- collect touched paths --- mapfile -t created_paths < <(jq -r '.pages[] | select(.status=="created") | .path' "$manifest") diff --git a/tests/ingest-semantic.bats b/tests/ingest-semantic.bats new file mode 100644 index 0000000..11e48b6 --- /dev/null +++ b/tests/ingest-semantic.bats @@ -0,0 +1,29 @@ +#!/usr/bin/env bats + +setup() { + load 'helpers' + source "$LIB_DIR/output.sh" + source "$LIB_DIR/lint.sh" +} + +@test "lint tolerates source_path/source_sha256 in source frontmatter" { + G="$(make_fixture_genome)" + mkdir -p "$G/wiki/sources" + cat > "$G/wiki/sources/test-source.md" <<'EOFMD' +--- +title: "Test Source" +type: source +domain: genome-test +maturity: draft +last_updated: 2026-06-25 +private: false +tags: [test] +source_path: raw/articles/test.md +source_sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 +--- +# Test Source +body +EOFMD + run lint_markdown_file "$G/wiki/sources/test-source.md" genome-test + [ "$status" -eq 0 ] +} diff --git a/tests/run-ingest.bats b/tests/run-ingest.bats index 6b7c30b..b478743 100644 --- a/tests/run-ingest.bats +++ b/tests/run-ingest.bats @@ -171,3 +171,41 @@ EOF [ "$status" -eq 0 ] [[ "$output" == *"develop"* ]] } + +@test "run-ingest: branch name matches slug.sh --raw for nested raw paths" { + command -v jq >/dev/null 2>&1 || skip "jq not installed" + G="$(make_fixture_genome)"; cd "$G" + mkdir -p wiki/sources + cat > wiki/sources/cibo-il-pane.md <<'EOFMD' +--- +title: "Il Pane" +type: source +domain: genome-test +tags: [cibo] +maturity: draft +last_updated: 2026-06-25 +private: false +source_path: raw/articles/cibo/il-pane.md +source_sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 +--- +# Il Pane +body +EOFMD + cat > .ingest-manifest.json <<'EOFJSON' +{ + "raw_source": "raw/articles/cibo/il-pane.md", + "model": "qwen3.5-9b", + "reasoning": "Ingest.", + "pr_summary": "Ingest summary.", + "contradictions": "None", + "pages": [ + {"path": "wiki/sources/cibo-il-pane.md", "summary": "Summary.", "maturity": "draft", "status": "created"} + ] +} +EOFJSON + export KG_LIB_DIR="$LIB_DIR" + export FORGEJO_URL="http://forgejo.local" FORGEJO_USER="u" FORGEJO_TOKEN="t" DRY_RUN=1 + run bash "$SKILL_SCRIPTS/run-ingest.sh" genome-test + [ "$status" -eq 0 ] + [[ "$output" == *"cibo-il-pane"* ]] +}