feat: Implement shared page generation helpers and frontmatter title support
This commit is contained in:
parent
ed63895fea
commit
cdab1e089e
1 changed files with 84 additions and 51 deletions
|
|
@ -57,7 +57,84 @@ if not source_text.strip():
|
|||
die("preflight", "source is empty: " + raw_rel)
|
||||
|
||||
|
||||
# --- the semantic contract (authoritative copy; SKILL.md documents it) ---
|
||||
# --- read existing index to avoid duplicate slugs ---
|
||||
existing_entities = set()
|
||||
existing_concepts = set()
|
||||
if os.path.isfile("wiki/index.md"):
|
||||
try:
|
||||
with open("wiki/index.md", "r", encoding="utf-8") as f:
|
||||
idx_text = f.read()
|
||||
# extract slugs from [[entities/slug]] and [[concepts/slug]] patterns
|
||||
for m in re.finditer(r"\[\[entities/([a-z0-9\-]+)\]\]", idx_text):
|
||||
existing_entities.add(m.group(1))
|
||||
for m in re.finditer(r"\[\[concepts/([a-z0-9\-]+)\]\]", idx_text):
|
||||
existing_concepts.add(m.group(1))
|
||||
except Exception:
|
||||
pass # index not readable or not found; that's OK
|
||||
|
||||
|
||||
def slugify(s):
|
||||
s = re.sub(r"[^a-z0-9]+", "-", (s or "").strip().lower())
|
||||
return re.sub(r"-+", "-", s).strip("-") or "untitled"
|
||||
|
||||
|
||||
def twords(s, n=20):
|
||||
"""Truncate at n words; used for index entry summaries."""
|
||||
s = " ".join((s or "").split())
|
||||
w = s.split(" ")
|
||||
return s if len(w) <= n else " ".join(w[:n]) + "…"
|
||||
|
||||
|
||||
def yaml_dq(s):
|
||||
"""Render a value as a YAML double-quoted scalar.
|
||||
|
||||
Titles can contain characters that break a bare scalar — most commonly a
|
||||
colon-space ('Conflict: X' would parse as a mapping), but also '#', leading
|
||||
'-'/'?', quotes, etc. Double-quoting and escaping '\\' and '"' makes any
|
||||
title valid YAML (and keeps Obsidian/Dataview/qmd happy). Newlines are
|
||||
collapsed to spaces so the scalar stays on one line.
|
||||
"""
|
||||
s = " ".join((s or "").split())
|
||||
s = s.replace("\\", "\\\\").replace('"', '\\"')
|
||||
return f'"{s}"'
|
||||
|
||||
|
||||
def frontmatter(ptype, title, tags):
|
||||
"""Return YAML frontmatter with title field."""
|
||||
taglist = "[" + ", ".join(sorted(set(t for t in tags if t))) + "]"
|
||||
return ("---\n"
|
||||
f"title: {yaml_dq(title)}\n"
|
||||
f"type: {ptype}\n"
|
||||
f"domain: {genome}\n"
|
||||
"maturity: draft\n"
|
||||
f"last_updated: {TODAY}\n"
|
||||
"private: false\n"
|
||||
f"tags: {taglist}\n"
|
||||
"---\n")
|
||||
|
||||
|
||||
def write_new(path, ptype, title, body, tags):
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(frontmatter(ptype, title, tags))
|
||||
f.write(f"\n# {title}\n\n{body}\n")
|
||||
|
||||
|
||||
def append_section(path, source_slug, body):
|
||||
# never overwrite an existing page: accumulate, attributed to the new source
|
||||
with open(path, "a", encoding="utf-8") as f:
|
||||
f.write(f"\n\n## From [[sources/{source_slug}]]\n\n{body}\n")
|
||||
try: # best-effort bump of last_updated in the existing frontmatter
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
txt = f.read()
|
||||
txt = re.sub(r"(?m)^last_updated:.*$", "last_updated: " + TODAY, txt, count=1)
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(txt)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# --- the semantic contract ---
|
||||
SYSTEM_PROMPT = """You perform the SEMANTIC PASS of a single source into a knowledge wiki.
|
||||
Read the source and return ONLY structured data describing what it contains.
|
||||
You do not write files, you do not produce frontmatter, and you do not invent
|
||||
|
|
@ -152,51 +229,6 @@ def call_model():
|
|||
die("model", "model did not return valid JSON: " + str(e))
|
||||
|
||||
|
||||
# --- conform helpers (the script OWNS all structure) ---
|
||||
def slugify(s):
|
||||
s = re.sub(r"[^a-z0-9]+", "-", (s or "").strip().lower())
|
||||
return re.sub(r"-+", "-", s).strip("-") or "untitled"
|
||||
|
||||
|
||||
def twords(s, n=12):
|
||||
s = " ".join((s or "").split())
|
||||
w = s.split(" ")
|
||||
return s if len(w) <= n else " ".join(w[:n]) + "…"
|
||||
|
||||
|
||||
def frontmatter(ptype, tags):
|
||||
taglist = "[" + ", ".join(sorted(set(t for t in tags if t))) + "]"
|
||||
return ("---\n"
|
||||
f"type: {ptype}\n"
|
||||
f"domain: {genome}\n"
|
||||
"maturity: draft\n"
|
||||
f"last_updated: {TODAY}\n"
|
||||
"private: false\n"
|
||||
f"tags: {taglist}\n"
|
||||
"---\n")
|
||||
|
||||
|
||||
def write_new(path, ptype, title, body, tags):
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(frontmatter(ptype, tags))
|
||||
f.write(f"\n# {title}\n\n{body}\n")
|
||||
|
||||
|
||||
def append_section(path, source_slug, body):
|
||||
# never overwrite an existing page: accumulate, attributed to the new source
|
||||
with open(path, "a", encoding="utf-8") as f:
|
||||
f.write(f"\n\n## From [[sources/{source_slug}]]\n\n{body}\n")
|
||||
try: # best-effort bump of last_updated in the existing frontmatter
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
txt = f.read()
|
||||
txt = re.sub(r"(?m)^last_updated:.*$", "last_updated: " + TODAY, txt, count=1)
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(txt)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# --- run the semantic pass ---
|
||||
sem = call_model()
|
||||
source_slug = slugify(os.path.splitext(os.path.basename(raw_rel))[0])
|
||||
|
|
@ -210,14 +242,15 @@ src_body = (sem.get("source_summary") or "").strip()
|
|||
if kp_lines:
|
||||
src_body += "\n\n## Key points\n\n" + kp_lines
|
||||
src_body += f"\n\n## Source\n\n- [[{raw_rel}]]\n"
|
||||
src_tags = ([slugify(e.get("name", "")) for e in sem.get("entities", [])]
|
||||
+ [slugify(c.get("name", "")) for c in sem.get("concepts", [])])[:8]
|
||||
src_title = sem.get('source_title') or source_slug
|
||||
src_tags = ([slugify(e.get("name", "")) for e in sem.get("entities", [])]
|
||||
+ [slugify(c.get("name", "")) for c in sem.get("concepts", [])])[:8]
|
||||
os.makedirs("wiki/sources", exist_ok=True)
|
||||
with open(src_path, "w", encoding="utf-8") as f:
|
||||
f.write(frontmatter("source", src_tags))
|
||||
f.write(f"\n# {sem.get('source_title') or source_slug}\n\n{src_body}\n")
|
||||
f.write(frontmatter("source", src_title, src_tags))
|
||||
f.write(f"\n# {src_title}\n\n{src_body}\n")
|
||||
pages.append({"path": src_path,
|
||||
"summary": twords(sem.get("source_title") or source_slug),
|
||||
"summary": twords(src_title),
|
||||
"maturity": "draft", "status": src_status})
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue