knowledge-genome-orchestrator/skills/ingest/scripts/index-append.py

124 lines
4.3 KiB
Python
Executable file

#!/usr/bin/env python3
# =============================================================================
# skills/ingest/scripts/index-append.py
# Insert an entry line into the correct section of wiki/index.md and keep that
# section's entries alphabetically ordered. Bumps frontmatter last_updated.
#
# index-append.py --section Sources \
# --entry '- [[sources/foo]] — One-line summary. `maturity: draft`'
# =============================================================================
import argparse
import datetime
import re
import sys
ENTRY_RE = re.compile(r"^- \[\[")
LINK_RE = re.compile(r"^- \[\[([^\]]+)\]\]")
HEADER_RE = re.compile(r"^## ")
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--section", required=True,
help="Section name, e.g. Sources / Entities / Concepts / Queries / Conflicts")
ap.add_argument("--entry", required=True, help="Full index line to insert")
ap.add_argument("--file", default="wiki/index.md")
args = ap.parse_args()
try:
with open(args.file, encoding="utf-8") as fh:
lines = fh.read().splitlines()
except FileNotFoundError:
print(f"index-append: not found: {args.file}", file=sys.stderr)
return 1
today = datetime.date.today().isoformat()
# 1. Bump last_updated inside the first frontmatter block
fm_open = False
fm_close_idx = None
bumped = False
for i, ln in enumerate(lines):
if ln.strip() == "---":
if not fm_open:
fm_open = True
continue
fm_close_idx = i # the closing ---
break
if fm_open and ln.startswith("last_updated:"):
lines[i] = f"last_updated: {today}"
bumped = True
if not fm_open:
print("index-append: warning: no frontmatter found, last_updated not bumped",
file=sys.stderr)
elif not bumped and fm_close_idx is not None:
# self-heal: frontmatter present but missing the key — insert it before the close
lines.insert(fm_close_idx, f"last_updated: {today}")
print("index-append: last_updated key was missing — inserted", file=sys.stderr)
# 2. Locate the target section [start, end)
start = None
for i, ln in enumerate(lines):
if HEADER_RE.match(ln) and ln[3:].startswith(args.section):
start = i
break
if start is None:
print(f"index-append: section '{args.section}' not found in {args.file}",
file=sys.stderr)
return 1
end = len(lines)
for i in range(start + 1, len(lines)):
if HEADER_RE.match(lines[i]):
end = i
break
# 3. Split the section body into intro (non-entry) and entries
body = lines[start + 1:end]
intro = [ln for ln in body if not ENTRY_RE.match(ln)]
entries = [ln for ln in body if ENTRY_RE.match(ln)]
# Deduplicate by wikilink PATH, not by exact line: a re-ingest with a changed
# summary/maturity should UPDATE the existing entry, not add a duplicate line.
new_m = LINK_RE.match(args.entry)
new_link = new_m.group(1) if new_m else None
if new_link is not None:
replaced = False
for idx, ln in enumerate(entries):
m = LINK_RE.match(ln)
if m and m.group(1) == new_link:
if ln == args.entry:
print("index-append: entry already present, skipping")
return 0
entries[idx] = args.entry # same page, refreshed text
replaced = True
break
if not replaced:
entries.append(args.entry)
else:
# No parseable wikilink — fall back to exact-line dedup.
if args.entry in entries:
print("index-append: entry already present, skipping")
return 0
entries.append(args.entry)
entries.sort(key=str.casefold)
# Normalise intro: drop trailing blanks, keep header + comment(s)
while intro and intro[-1].strip() == "":
intro.pop()
new_section = intro + [""] + entries + [""]
lines = lines[:start + 1] + new_section + lines[end:]
with open(args.file, "w", encoding="utf-8") as fh:
fh.write("\n".join(lines) + "\n")
print(f"index-append: added to {args.section}")
return 0
if __name__ == "__main__":
sys.exit(main())