feat(ingest): allow index-append.py to remove entries
This commit is contained in:
parent
95b3866549
commit
990118de71
1 changed files with 114 additions and 86 deletions
|
|
@ -1,11 +1,12 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# skills/ingest/scripts/index-append.py
|
# skills/ingest/scripts/index-append.py
|
||||||
# Insert an entry line into the correct section of wiki/index.md and keep that
|
# Insert OR remove an entry line in wiki/index.md, keeping the target section
|
||||||
# section's entries alphabetically ordered. Bumps frontmatter last_updated.
|
# alphabetically ordered. Bumps frontmatter last_updated.
|
||||||
#
|
#
|
||||||
# index-append.py --section Sources \
|
# index-append.py --section Sources \
|
||||||
# --entry '- [[sources/foo]] — One-line summary. `maturity: draft`'
|
# --entry '- [[sources/foo]] — One-line summary. `maturity: draft`'
|
||||||
|
# index-append.py --remove 'sources/foo' # delete the entry by wikilink
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
import argparse
|
import argparse
|
||||||
import datetime
|
import datetime
|
||||||
|
|
@ -17,14 +18,116 @@ LINK_RE = re.compile(r"^- \[\[([^\]]+)\]\]")
|
||||||
HEADER_RE = re.compile(r"^## ")
|
HEADER_RE = re.compile(r"^## ")
|
||||||
|
|
||||||
|
|
||||||
|
def bump_last_updated(lines, today):
|
||||||
|
"""Bump (or self-heal) last_updated inside the first frontmatter block."""
|
||||||
|
fm_open = False
|
||||||
|
fm_close_idx = None
|
||||||
|
bumped = False
|
||||||
|
for i, ln in enumerate(lines):
|
||||||
|
if ln.strip() == "---":
|
||||||
|
if not fm_open:
|
||||||
|
fm_open = True
|
||||||
|
continue
|
||||||
|
fm_close_idx = i
|
||||||
|
break
|
||||||
|
if fm_open and ln.startswith("last_updated:"):
|
||||||
|
lines[i] = f"last_updated: {today}"
|
||||||
|
bumped = True
|
||||||
|
if not fm_open:
|
||||||
|
print("index-append: warning: no frontmatter found, last_updated not bumped",
|
||||||
|
file=sys.stderr)
|
||||||
|
elif not bumped and fm_close_idx is not None:
|
||||||
|
lines.insert(fm_close_idx, f"last_updated: {today}")
|
||||||
|
print("index-append: last_updated key was missing — inserted", file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
def do_remove(lines, link, today):
|
||||||
|
"""Remove every entry line whose wikilink == link. Idempotent."""
|
||||||
|
bump_last_updated(lines, today)
|
||||||
|
kept = []
|
||||||
|
removed = 0
|
||||||
|
for ln in lines:
|
||||||
|
m = LINK_RE.match(ln)
|
||||||
|
if m and m.group(1) == link:
|
||||||
|
removed += 1
|
||||||
|
continue
|
||||||
|
kept.append(ln)
|
||||||
|
if removed:
|
||||||
|
print(f"index-append: removed [[{link}]] ({removed} line(s))")
|
||||||
|
else:
|
||||||
|
# Idempotent: the goal state (entry absent) already holds.
|
||||||
|
print(f"index-append: [[{link}]] not present, nothing to remove")
|
||||||
|
return kept
|
||||||
|
|
||||||
|
|
||||||
|
def do_append(lines, section, entry, today):
|
||||||
|
bump_last_updated(lines, today)
|
||||||
|
# Locate the target section [start, end)
|
||||||
|
start = None
|
||||||
|
for i, ln in enumerate(lines):
|
||||||
|
if HEADER_RE.match(ln) and ln[3:].startswith(section):
|
||||||
|
start = i
|
||||||
|
break
|
||||||
|
if start is None:
|
||||||
|
print(f"index-append: section '{section}' not found", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
|
end = len(lines)
|
||||||
|
for i in range(start + 1, len(lines)):
|
||||||
|
if HEADER_RE.match(lines[i]):
|
||||||
|
end = i
|
||||||
|
break
|
||||||
|
|
||||||
|
body = lines[start + 1:end]
|
||||||
|
intro = [ln for ln in body if not ENTRY_RE.match(ln)]
|
||||||
|
entries = [ln for ln in body if ENTRY_RE.match(ln)]
|
||||||
|
|
||||||
|
new_m = LINK_RE.match(entry)
|
||||||
|
new_link = new_m.group(1) if new_m else None
|
||||||
|
|
||||||
|
if new_link is not None:
|
||||||
|
replaced = False
|
||||||
|
for idx, ln in enumerate(entries):
|
||||||
|
m = LINK_RE.match(ln)
|
||||||
|
if m and m.group(1) == new_link:
|
||||||
|
if ln == entry:
|
||||||
|
print("index-append: entry already present, skipping")
|
||||||
|
return lines
|
||||||
|
entries[idx] = entry
|
||||||
|
replaced = True
|
||||||
|
break
|
||||||
|
if not replaced:
|
||||||
|
entries.append(entry)
|
||||||
|
else:
|
||||||
|
if entry in entries:
|
||||||
|
print("index-append: entry already present, skipping")
|
||||||
|
return lines
|
||||||
|
entries.append(entry)
|
||||||
|
|
||||||
|
entries.sort(key=str.casefold)
|
||||||
|
while intro and intro[-1].strip() == "":
|
||||||
|
intro.pop()
|
||||||
|
new_section = intro + [""] + entries + [""]
|
||||||
|
print(f"index-append: added to {section}")
|
||||||
|
return lines[:start + 1] + new_section + lines[end:]
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
def main() -> int:
|
||||||
ap = argparse.ArgumentParser()
|
ap = argparse.ArgumentParser()
|
||||||
ap.add_argument("--section", required=True,
|
ap.add_argument("--section", help="Section name (required with --entry)")
|
||||||
help="Section name, e.g. Sources / Entities / Concepts / Queries / Conflicts")
|
ap.add_argument("--entry", help="Full index line to insert")
|
||||||
ap.add_argument("--entry", required=True, help="Full index line to insert")
|
ap.add_argument("--remove", metavar="WIKILINK",
|
||||||
|
help="Remove the entry with this wikilink, e.g. sources/foo")
|
||||||
ap.add_argument("--file", default="wiki/index.md")
|
ap.add_argument("--file", default="wiki/index.md")
|
||||||
args = ap.parse_args()
|
args = ap.parse_args()
|
||||||
|
|
||||||
|
if bool(args.remove) == bool(args.entry):
|
||||||
|
print("index-append: provide exactly one of --entry or --remove", file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
if args.entry and not args.section:
|
||||||
|
print("index-append: --entry requires --section", file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(args.file, encoding="utf-8") as fh:
|
with open(args.file, encoding="utf-8") as fh:
|
||||||
lines = fh.read().splitlines()
|
lines = fh.read().splitlines()
|
||||||
|
|
@ -33,90 +136,15 @@ def main() -> int:
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
today = datetime.date.today().isoformat()
|
today = datetime.date.today().isoformat()
|
||||||
|
if args.remove:
|
||||||
# 1. Bump last_updated inside the first frontmatter block
|
out = do_remove(lines, args.remove, today)
|
||||||
fm_open = False
|
|
||||||
fm_close_idx = None
|
|
||||||
bumped = False
|
|
||||||
for i, ln in enumerate(lines):
|
|
||||||
if ln.strip() == "---":
|
|
||||||
if not fm_open:
|
|
||||||
fm_open = True
|
|
||||||
continue
|
|
||||||
fm_close_idx = i # the closing ---
|
|
||||||
break
|
|
||||||
if fm_open and ln.startswith("last_updated:"):
|
|
||||||
lines[i] = f"last_updated: {today}"
|
|
||||||
bumped = True
|
|
||||||
|
|
||||||
if not fm_open:
|
|
||||||
print("index-append: warning: no frontmatter found, last_updated not bumped",
|
|
||||||
file=sys.stderr)
|
|
||||||
elif not bumped and fm_close_idx is not None:
|
|
||||||
# self-heal: frontmatter present but missing the key — insert it before the close
|
|
||||||
lines.insert(fm_close_idx, f"last_updated: {today}")
|
|
||||||
print("index-append: last_updated key was missing — inserted", file=sys.stderr)
|
|
||||||
|
|
||||||
# 2. Locate the target section [start, end)
|
|
||||||
start = None
|
|
||||||
for i, ln in enumerate(lines):
|
|
||||||
if HEADER_RE.match(ln) and ln[3:].startswith(args.section):
|
|
||||||
start = i
|
|
||||||
break
|
|
||||||
if start is None:
|
|
||||||
print(f"index-append: section '{args.section}' not found in {args.file}",
|
|
||||||
file=sys.stderr)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
end = len(lines)
|
|
||||||
for i in range(start + 1, len(lines)):
|
|
||||||
if HEADER_RE.match(lines[i]):
|
|
||||||
end = i
|
|
||||||
break
|
|
||||||
|
|
||||||
# 3. Split the section body into intro (non-entry) and entries
|
|
||||||
body = lines[start + 1:end]
|
|
||||||
intro = [ln for ln in body if not ENTRY_RE.match(ln)]
|
|
||||||
entries = [ln for ln in body if ENTRY_RE.match(ln)]
|
|
||||||
|
|
||||||
# Deduplicate by wikilink PATH, not by exact line: a re-ingest with a changed
|
|
||||||
# summary/maturity should UPDATE the existing entry, not add a duplicate line.
|
|
||||||
new_m = LINK_RE.match(args.entry)
|
|
||||||
new_link = new_m.group(1) if new_m else None
|
|
||||||
|
|
||||||
if new_link is not None:
|
|
||||||
replaced = False
|
|
||||||
for idx, ln in enumerate(entries):
|
|
||||||
m = LINK_RE.match(ln)
|
|
||||||
if m and m.group(1) == new_link:
|
|
||||||
if ln == args.entry:
|
|
||||||
print("index-append: entry already present, skipping")
|
|
||||||
return 0
|
|
||||||
entries[idx] = args.entry # same page, refreshed text
|
|
||||||
replaced = True
|
|
||||||
break
|
|
||||||
if not replaced:
|
|
||||||
entries.append(args.entry)
|
|
||||||
else:
|
else:
|
||||||
# No parseable wikilink — fall back to exact-line dedup.
|
out = do_append(lines, args.section, args.entry, today)
|
||||||
if args.entry in entries:
|
if out is None:
|
||||||
print("index-append: entry already present, skipping")
|
return 1
|
||||||
return 0
|
|
||||||
entries.append(args.entry)
|
|
||||||
|
|
||||||
entries.sort(key=str.casefold)
|
|
||||||
|
|
||||||
# Normalise intro: drop trailing blanks, keep header + comment(s)
|
|
||||||
while intro and intro[-1].strip() == "":
|
|
||||||
intro.pop()
|
|
||||||
|
|
||||||
new_section = intro + [""] + entries + [""]
|
|
||||||
lines = lines[:start + 1] + new_section + lines[end:]
|
|
||||||
|
|
||||||
with open(args.file, "w", encoding="utf-8") as fh:
|
with open(args.file, "w", encoding="utf-8") as fh:
|
||||||
fh.write("\n".join(lines) + "\n")
|
fh.write("\n".join(out) + "\n")
|
||||||
|
|
||||||
print(f"index-append: added to {args.section}")
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue