Merge branch 'release/1.10.0' into main
This commit is contained in:
commit
bab4e987c3
13 changed files with 203 additions and 45 deletions
2
Makefile
2
Makefile
|
|
@ -1,5 +1,5 @@
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Knowledge Genome - Makefile v. 1.9.1
|
# Knowledge Genome - Makefile v. 1.10.0
|
||||||
# Orchestrates the setup and management of the knowledge base.
|
# Orchestrates the setup and management of the knowledge base.
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,13 @@ case "$cmd" in
|
||||||
# lines 54/59), so pending-raw.sh resolves its sibling slug.sh via BASH_SOURCE.
|
# lines 54/59), so pending-raw.sh resolves its sibling slug.sh via BASH_SOURCE.
|
||||||
exec "${HOME}/.pi/agent/skills/ingest/scripts/pending-raw.sh" "$genome"
|
exec "${HOME}/.pi/agent/skills/ingest/scripts/pending-raw.sh" "$genome"
|
||||||
;;
|
;;
|
||||||
|
"pi orphan-wiki "*)
|
||||||
|
genome="${cmd#pi orphan-wiki }"
|
||||||
|
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome name"}'; exit 1;; esac
|
||||||
|
logger -t n8n-pi-wrap "ok: pi orphan-wiki ${genome}"
|
||||||
|
set -a; . "${HOME}/.config/knowledge-genome.env" 2>/dev/null || true; set +a
|
||||||
|
exec "${HOME}/.pi/agent/skills/ingest/scripts/orphan-wiki.sh" "$genome"
|
||||||
|
;;
|
||||||
"pi run")
|
"pi run")
|
||||||
logger -t n8n-pi-wrap "ok: pi run (prompt via stdin)"
|
logger -t n8n-pi-wrap "ok: pi run (prompt via stdin)"
|
||||||
prompt=$(cat)
|
prompt=$(cat)
|
||||||
|
|
@ -42,20 +49,25 @@ case "$cmd" in
|
||||||
esac
|
esac
|
||||||
|
|
||||||
logger -t n8n-pi-wrap "ok: pi ingest ${genome} ${raw_path}"
|
logger -t n8n-pi-wrap "ok: pi ingest ${genome} ${raw_path}"
|
||||||
|
|
||||||
|
# Per-genome lock: serialize writes; never two concurrent ingests on the same genome.
|
||||||
|
exec 9>"/run/lock/kg-ingest-${genome}.lock" 2>/dev/null || exec 9>"/tmp/kg-ingest-${genome}.lock"
|
||||||
|
if ! flock -n 9; then
|
||||||
|
echo '{"status":"busy","reason":"another ingest is running for this genome","genome":"'"$genome"'"}'
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
|
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
|
||||||
cd "${GENOMES_ROOT}/${genome}" || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
|
cd "${GENOMES_ROOT}/${genome}" || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
|
||||||
|
|
||||||
# The raw file must actually exist under the genome's raw/ dir.
|
# The raw file must actually exist under the genome's raw/ dir.
|
||||||
[ -f "$raw_path" ] || { echo '{"status":"error","reason":"raw file not found"}'; exit 1; }
|
[ -f "$raw_path" ] || { echo '{"status":"error","reason":"raw file not found"}'; exit 1; }
|
||||||
|
|
||||||
# Clean start on the configured base (develop), pinned to the remote. Destroys only
|
# Clean start on the configured base (single source of truth in lib/clean-start.sh).
|
||||||
# vm101's scratch checkout (never a shared branch, never a force-push) — this is by design.
|
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
|
||||||
# `clean -fd` also removes leftover UNTRACKED files (e.g. wiki/sources/* or a stale
|
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|
||||||
# .ingest-manifest.json from a half-finished previous run) that `reset --hard` won't touch.
|
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
|
||||||
git fetch -q origin \
|
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
|
||||||
&& git switch -q "${INGEST_BASE:-main}" 2>/dev/null \
|
|
||||||
&& git reset -q --hard "origin/${INGEST_BASE:-main}" \
|
|
||||||
&& git clean -q -fd
|
|
||||||
|
|
||||||
# SEMANTIC step: dedicated script drives pi to WRITE wiki pages + manifest.
|
# SEMANTIC step: dedicated script drives pi to WRITE wiki pages + manifest.
|
||||||
# (NOT `pi -p "/skill:ingest ..."`, which makes the model reply in chat and write nothing.)
|
# (NOT `pi -p "/skill:ingest ..."`, which makes the model reply in chat and write nothing.)
|
||||||
|
|
|
||||||
18
lib/clean-start.sh
Normal file
18
lib/clean-start.sh
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# =============================================================================
|
||||||
|
# lib/clean-start.sh — single source of truth for the pre-session reset.
|
||||||
|
# Caller must already be INSIDE the genome checkout.
|
||||||
|
# Aligns the working tree to origin/<base>. Never force-pushes a shared branch.
|
||||||
|
# Tolerates a missing remote branch (first-setup scenario).
|
||||||
|
# NOTE: sourced library — no `set -euo pipefail` (would leak into the caller).
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
clean_start() {
|
||||||
|
local base="${INGEST_BASE:-main}"
|
||||||
|
git fetch -q origin || return 1
|
||||||
|
git switch -q "$base" 2>/dev/null || git checkout -q -b "$base" || return 1
|
||||||
|
if git ls-remote --exit-code --heads origin "$base" >/dev/null 2>&1; then
|
||||||
|
git reset -q --hard "origin/${base}" || return 1
|
||||||
|
fi
|
||||||
|
git clean -q -fd || return 1
|
||||||
|
}
|
||||||
|
|
@ -20,7 +20,7 @@
|
||||||
#
|
#
|
||||||
# Emits a single JSON status line on stdout (for n8n / logs).
|
# Emits a single JSON status line on stdout (for n8n / logs).
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
import json, os, hashlib, subprocess, re, sys, datetime, urllib.request, urllib.error
|
import json, os, hashlib, subprocess, re, sys, datetime, urllib.request, urllib.error, time
|
||||||
|
|
||||||
# --- config (override via env; these live in ~/.config/knowledge-genome.env) ---
|
# --- config (override via env; these live in ~/.config/knowledge-genome.env) ---
|
||||||
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/chat")
|
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/chat")
|
||||||
|
|
@ -209,15 +209,13 @@ SCHEMA = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def call_model():
|
def call_model(max_retries=2, base_delay=2.0):
|
||||||
# format existing names as a human-readable list
|
"""Call Ollama with retry on transient errors (connection, timeout, malformed JSON).
|
||||||
|
Retries up to max_retries times with exponential backoff. Does NOT retry on
|
||||||
|
content errors (schema violations, empty response) — those are model issues."""
|
||||||
existing_ents = ", ".join(sorted(existing_entities)) or "(none yet)"
|
existing_ents = ", ".join(sorted(existing_entities)) or "(none yet)"
|
||||||
existing_conc = ", ".join(sorted(existing_concepts)) or "(none yet)"
|
existing_conc = ", ".join(sorted(existing_concepts)) or "(none yet)"
|
||||||
|
prompt = SYSTEM_PROMPT.format(existing_entities=existing_ents, existing_concepts=existing_conc)
|
||||||
prompt = SYSTEM_PROMPT.format(
|
|
||||||
existing_entities=existing_ents,
|
|
||||||
existing_concepts=existing_conc,
|
|
||||||
)
|
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": MODEL,
|
"model": MODEL,
|
||||||
|
|
@ -227,33 +225,45 @@ def call_model():
|
||||||
"Source path: " + raw_rel + "\n\n--- SOURCE START ---\n"
|
"Source path: " + raw_rel + "\n\n--- SOURCE START ---\n"
|
||||||
+ source_text + "\n--- SOURCE END ---\n\nReturn the JSON now."},
|
+ source_text + "\n--- SOURCE END ---\n\nReturn the JSON now."},
|
||||||
],
|
],
|
||||||
"format": SCHEMA, # schema-constrained generation
|
"format": SCHEMA,
|
||||||
"stream": False,
|
"stream": False,
|
||||||
# deterministic extraction; repetition penalties OFF for structured output
|
|
||||||
"options": {"temperature": 0.2, "repeat_penalty": 1.0, "num_ctx": NUM_CTX},
|
"options": {"temperature": 0.2, "repeat_penalty": 1.0, "num_ctx": NUM_CTX},
|
||||||
}
|
}
|
||||||
if THINK is not None:
|
if THINK is not None:
|
||||||
payload["think"] = THINK.strip().lower() in ("1", "true", "yes", "on")
|
payload["think"] = THINK.strip().lower() in ("1", "true", "yes", "on")
|
||||||
data = json.dumps(payload).encode("utf-8")
|
data = json.dumps(payload).encode("utf-8")
|
||||||
req = urllib.request.Request(
|
|
||||||
OLLAMA_URL, data=data, headers={"Content-Type": "application/json"})
|
|
||||||
try:
|
|
||||||
with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
|
|
||||||
resp = json.loads(r.read().decode("utf-8"))
|
|
||||||
except urllib.error.URLError as e:
|
|
||||||
die("model", "ollama request failed: " + str(e))
|
|
||||||
content = ((resp.get("message") or {}).get("content") or "").strip()
|
|
||||||
# schema-constrained, but stay defensive if a model wraps it in a fence
|
|
||||||
if content.startswith("```"):
|
|
||||||
content = content.strip("`")
|
|
||||||
brace = content.find("{")
|
|
||||||
if brace >= 0:
|
|
||||||
content = content[brace:]
|
|
||||||
try:
|
|
||||||
return json.loads(content)
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
die("model", "model did not return valid JSON: " + str(e))
|
|
||||||
|
|
||||||
|
last_error = None
|
||||||
|
for attempt in range(max_retries + 1):
|
||||||
|
if attempt > 0:
|
||||||
|
delay = base_delay * (2 ** (attempt - 1))
|
||||||
|
print(f"call_model: retry {attempt}/{max_retries} after {delay}s: {last_error}", file=sys.stderr)
|
||||||
|
time.sleep(delay)
|
||||||
|
|
||||||
|
req = urllib.request.Request(OLLAMA_URL, data=data, headers={"Content-Type": "application/json"})
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
|
||||||
|
resp = json.loads(r.read().decode("utf-8"))
|
||||||
|
except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError) as e:
|
||||||
|
last_error = f"connection/transport error: {e}"; continue
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
last_error = f"invalid JSON from Ollama API: {e}"; continue
|
||||||
|
|
||||||
|
content = ((resp.get("message") or {}).get("content") or "").strip()
|
||||||
|
if content.startswith("```"):
|
||||||
|
content = content.strip("`")
|
||||||
|
brace = content.find("{")
|
||||||
|
if brace >= 0:
|
||||||
|
content = content[brace:]
|
||||||
|
try:
|
||||||
|
return json.loads(content)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
last_error = f"model did not return valid JSON: {e}"
|
||||||
|
if len(content) < 10:
|
||||||
|
continue # likely truncated -> retry
|
||||||
|
break # long but malformed -> model issue, stop
|
||||||
|
|
||||||
|
die("model", last_error or "model call failed after retries")
|
||||||
|
|
||||||
# --- run the semantic pass ---
|
# --- run the semantic pass ---
|
||||||
sem = call_model()
|
sem = call_model()
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@ while [[ $# -gt 0 ]]; do
|
||||||
--context) context="$2"; shift 2 ;;
|
--context) context="$2"; shift 2 ;;
|
||||||
--output) output="$2"; shift 2 ;;
|
--output) output="$2"; shift 2 ;;
|
||||||
--reasoning) reasoning="$2"; shift 2 ;;
|
--reasoning) reasoning="$2"; shift 2 ;;
|
||||||
|
--run-id) run_id_arg="$2"; shift 2 ;;
|
||||||
*) echo "log-append: unknown arg: $1" >&2; exit 1 ;;
|
*) echo "log-append: unknown arg: $1" >&2; exit 1 ;;
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
@ -35,9 +36,15 @@ esac
|
||||||
|
|
||||||
[[ -f "$LOG_FILE" ]] || { echo "log-append: not found: $LOG_FILE" >&2; exit 1; }
|
[[ -f "$LOG_FILE" ]] || { echo "log-append: not found: $LOG_FILE" >&2; exit 1; }
|
||||||
|
|
||||||
run_id="$(uuidgen 2>/dev/null || cat /proc/sys/kernel/random/uuid 2>/dev/null || python3 -c 'import uuid; print(uuid.uuid4())')"
|
run_id="${run_id_arg:-$(uuidgen 2>/dev/null || cat /proc/sys/kernel/random/uuid 2>/dev/null || python3 -c 'import uuid; print(uuid.uuid4())')}"
|
||||||
today="$(date +%Y-%m-%d)"
|
today="$(date +%Y-%m-%d)"
|
||||||
|
|
||||||
|
if grep -qF "run_id: \`${run_id}\`" "$LOG_FILE" 2>/dev/null; then
|
||||||
|
echo "log-append: run_id ${run_id} already present — skipping (idempotent)" >&2
|
||||||
|
echo "run_id=${run_id}"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
{
|
{
|
||||||
printf '\n## [%s] %s | %s\n\n' "$today" "$type" "$subject"
|
printf '\n## [%s] %s | %s\n\n' "$today" "$type" "$subject"
|
||||||
printf -- '- run_id: `%s`\n' "$run_id"
|
printf -- '- run_id: `%s`\n' "$run_id"
|
||||||
|
|
|
||||||
35
skills/ingest/scripts/orphan-wiki.sh
Executable file
35
skills/ingest/scripts/orphan-wiki.sh
Executable file
|
|
@ -0,0 +1,35 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# =============================================================================
|
||||||
|
# orphan-wiki.sh — find source pages whose raw source no longer exists.
|
||||||
|
# Reads source_path from each wiki/sources/*.md frontmatter. If the raw is gone,
|
||||||
|
# the page is orphaned. Emits JSON envelope: {status, genome, count, files[], detail[]}.
|
||||||
|
# Read-only: no lock needed (same policy as pending-raw).
|
||||||
|
# =============================================================================
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
genome="${1:?usage: orphan-wiki.sh <genome>}"
|
||||||
|
base_dir="${GENOMES_ROOT:-${HOME}/genomes}"
|
||||||
|
cd "${base_dir}/${genome}" 2>/dev/null || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
|
||||||
|
|
||||||
|
# Clean start on the configured base (single source of truth in lib/clean-start.sh).
|
||||||
|
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
|
||||||
|
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|
||||||
|
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
|
||||||
|
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
|
||||||
|
|
||||||
|
declare -a ORPH=()
|
||||||
|
for page in wiki/sources/*.md; do
|
||||||
|
[[ -e "$page" ]] || continue
|
||||||
|
sp="$(sed -n 's/^source_path:[[:space:]]*//p' "$page" | tr -d '\r' | head -n1)"
|
||||||
|
# Pages without source_path are pre-Step-2 legacy: ignore, don't false-positive.
|
||||||
|
[[ -n "$sp" ]] || continue
|
||||||
|
[[ -f "$sp" ]] || ORPH+=("$page")
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ ${#ORPH[@]} -eq 0 ]]; then
|
||||||
|
echo '{"status":"ok","genome":"'"$genome"'","count":0,"files":[],"detail":[]}'
|
||||||
|
else
|
||||||
|
for x in "${ORPH[@]}"; do printf '%s\torphan\n' "$x"; done \
|
||||||
|
| jq -R 'split("\t") | {path: .[0], reason: .[1]}' \
|
||||||
|
| jq -s --arg g "$genome" '{status:"ok", genome:$g, count:length, files:[.[].path], detail:.}'
|
||||||
|
fi
|
||||||
|
|
@ -12,11 +12,11 @@ genome="${1:?usage: pending-raw.sh <genome>}"
|
||||||
base_dir="${GENOMES_ROOT:-${HOME}/genomes}"
|
base_dir="${GENOMES_ROOT:-${HOME}/genomes}"
|
||||||
cd "${base_dir}/${genome}" 2>/dev/null || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
|
cd "${base_dir}/${genome}" 2>/dev/null || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
|
||||||
|
|
||||||
# Clean start on the configured base (Step 3 will extract this to lib/clean-start.sh).
|
# Clean start on the configured base (single source of truth in lib/clean-start.sh).
|
||||||
git fetch -q origin \
|
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
|
||||||
&& git switch -q "${INGEST_BASE:-main}" 2>/dev/null \
|
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|
||||||
&& git reset -q --hard "origin/${INGEST_BASE:-main}" \
|
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
|
||||||
&& git clean -q -fd
|
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
SLUG="${SCRIPT_DIR}/slug.sh"
|
SLUG="${SCRIPT_DIR}/slug.sh"
|
||||||
|
|
|
||||||
|
|
@ -107,8 +107,12 @@ done < <(jq -r '.pages[] | select(.status=="created")
|
||||||
| [.path, (.summary // ""), (.maturity // "draft")] | @tsv' "$manifest")
|
| [.path, (.summary // ""), (.maturity // "draft")] | @tsv' "$manifest")
|
||||||
|
|
||||||
# --- 2. log entry ---
|
# --- 2. log entry ---
|
||||||
|
# Stable run_id: deterministic from the input (raw path + content hash). Survives wrapper
|
||||||
|
# re-runs and makes the append-only log idempotent (paired with the guard in log-append.sh).
|
||||||
|
src_sha="$(sha256sum "$raw_source" 2>/dev/null | cut -d' ' -f1)" || src_sha="unknown"
|
||||||
|
run_id="$(printf '%s' "${raw_source}:${src_sha}" | sha256sum | cut -c1-16)"
|
||||||
out="$(jq -r '[.pages[].path | "[[" + (sub("^wiki/";"") | sub("\\.md$";"")) + "]]"] | join(", ")' "$manifest")"
|
out="$(jq -r '[.pages[].path | "[[" + (sub("^wiki/";"") | sub("\\.md$";"")) + "]]"] | join(", ")' "$manifest")"
|
||||||
bash "${SCRIPTS}/log-append.sh" --type INGEST --subject "$slug" --model "$model" \
|
bash "${SCRIPTS}/log-append.sh" --run-id "$run_id" --type INGEST --subject "$slug" --model "$model" \
|
||||||
--context "[[${raw_source}]]" --output "${out:-*(none)*}" --reasoning "$reasoning" \
|
--context "[[${raw_source}]]" --output "${out:-*(none)*}" --reasoning "$reasoning" \
|
||||||
|| fail "log" "log-append failed"
|
|| fail "log" "log-append failed"
|
||||||
|
|
||||||
|
|
|
||||||
18
tests/clean-start.bats
Normal file
18
tests/clean-start.bats
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
#!/usr/bin/env bats
|
||||||
|
setup() {
|
||||||
|
load 'helpers'
|
||||||
|
source "${LIB_DIR}/clean-start.sh" 2>/dev/null || source "${REPO_ROOT}/lib/clean-start.sh"
|
||||||
|
}
|
||||||
|
@test "clean_start: aligns to origin/base, reverts tracked edits, removes untracked" {
|
||||||
|
G="$(make_fixture_genome)"; cd "$G"
|
||||||
|
echo "from origin" >> wiki/index.md
|
||||||
|
git add -A && git commit -q -m "origin ahead" && git push -q
|
||||||
|
git reset --hard HEAD~1 # local BEHIND origin/main
|
||||||
|
echo "local junk" >> wiki/log.md # tracked edit, uncommitted
|
||||||
|
echo "scratch" > scratch.txt # genuinely untracked
|
||||||
|
INGEST_BASE="main" clean_start
|
||||||
|
git diff --quiet origin/main # aligned to origin
|
||||||
|
grep -q "from origin" wiki/index.md # forwarded to origin state
|
||||||
|
! grep -q "local junk" wiki/log.md # tracked edit reverted
|
||||||
|
[ ! -f scratch.txt ] # untracked removed
|
||||||
|
}
|
||||||
38
tests/orphan-wiki.bats
Normal file
38
tests/orphan-wiki.bats
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
#!/usr/bin/env bats
|
||||||
|
setup() {
|
||||||
|
load 'helpers'
|
||||||
|
export ORPHAN="${SKILL_SCRIPTS}/orphan-wiki.sh"
|
||||||
|
export GENOMES_ROOT="${BATS_TEST_TMPDIR}"
|
||||||
|
export INGEST_BASE="main"
|
||||||
|
export KG_LIB_DIR="${LIB_DIR}" # orphan-wiki.sh sources clean-start.sh via KG_LIB_DIR
|
||||||
|
g_src="$(make_fixture_genome)"
|
||||||
|
export g_name="fixture-genome"
|
||||||
|
mv "$g_src" "${GENOMES_ROOT}/${g_name}"
|
||||||
|
export g="${GENOMES_ROOT}/${g_name}"
|
||||||
|
( cd "$g" && rm -f raw/articles/test.md && git add -A && git commit -q -m "clear" && git push -q )
|
||||||
|
}
|
||||||
|
@test "orphan-wiki: no orphans when raw and source page match" {
|
||||||
|
mkdir -p "${g}/raw/articles"; echo "content" > "${g}/raw/articles/existing.md"
|
||||||
|
hash="$(sha256sum "${g}/raw/articles/existing.md" | cut -d' ' -f1)"
|
||||||
|
mkdir -p "${g}/wiki/sources"
|
||||||
|
printf -- '---\nsource_path: raw/articles/existing.md\nsource_sha256: %s\n---\n' "$hash" > "${g}/wiki/sources/existing.md"
|
||||||
|
( cd "$g" && git add . && git commit -q -m "setup" && git push -q )
|
||||||
|
run bash "$ORPHAN" "$g_name"
|
||||||
|
[ "$status" -eq 0 ]; echo "$output" | jq -e '.count == 0'
|
||||||
|
}
|
||||||
|
@test "orphan-wiki: detects orphaned source page" {
|
||||||
|
mkdir -p "${g}/wiki/sources"
|
||||||
|
printf -- '---\nsource_path: raw/articles/deleted.md\nsource_sha256: abc123\n---\n' > "${g}/wiki/sources/orphaned.md"
|
||||||
|
( cd "$g" && git add . && git commit -q -m "orphan" && git push -q )
|
||||||
|
run bash "$ORPHAN" "$g_name"
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
echo "$output" | jq -e '.count == 1'
|
||||||
|
echo "$output" | jq -e '.detail[0].reason == "orphan"'
|
||||||
|
}
|
||||||
|
@test "orphan-wiki: ignores legacy pages without source_path" {
|
||||||
|
mkdir -p "${g}/wiki/sources"
|
||||||
|
printf -- '---\ntitle: "Legacy"\ntype: source\n---\n' > "${g}/wiki/sources/legacy.md"
|
||||||
|
( cd "$g" && git add . && git commit -q -m "legacy" && git push -q )
|
||||||
|
run bash "$ORPHAN" "$g_name"
|
||||||
|
[ "$status" -eq 0 ]; echo "$output" | jq -e '.count == 0'
|
||||||
|
}
|
||||||
|
|
@ -5,6 +5,7 @@ setup() {
|
||||||
export PENDING="${SKILL_SCRIPTS}/pending-raw.sh"
|
export PENDING="${SKILL_SCRIPTS}/pending-raw.sh"
|
||||||
export GENOMES_ROOT="${BATS_TEST_TMPDIR}"
|
export GENOMES_ROOT="${BATS_TEST_TMPDIR}"
|
||||||
export INGEST_BASE="main"
|
export INGEST_BASE="main"
|
||||||
|
export KG_LIB_DIR="${LIB_DIR}"
|
||||||
|
|
||||||
g_src="$(make_fixture_genome)"
|
g_src="$(make_fixture_genome)"
|
||||||
export g_name="fixture-genome"
|
export g_name="fixture-genome"
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@ EXECUTABLES=(
|
||||||
skills/ingest/scripts/log-append.sh
|
skills/ingest/scripts/log-append.sh
|
||||||
skills/ingest/scripts/slug.sh
|
skills/ingest/scripts/slug.sh
|
||||||
skills/ingest/scripts/pending-raw.sh
|
skills/ingest/scripts/pending-raw.sh
|
||||||
|
skills/ingest/scripts/orphan-wiki.sh
|
||||||
skills/ingest/scripts/index-append.py
|
skills/ingest/scripts/index-append.py
|
||||||
scripts/add-genome.sh
|
scripts/add-genome.sh
|
||||||
scripts/setup.sh
|
scripts/setup.sh
|
||||||
|
|
@ -29,7 +30,7 @@ EXECUTABLES=(
|
||||||
|
|
||||||
# Librerie sourced: NON devono essere eseguibili.
|
# Librerie sourced: NON devono essere eseguibili.
|
||||||
LIBRARIES=(
|
LIBRARIES=(
|
||||||
lib/lint.sh lib/output.sh lib/deps.sh lib/git-crypt.sh lib/scaffold.sh lib/structure.sh
|
lib/lint.sh lib/output.sh lib/deps.sh lib/git-crypt.sh lib/scaffold.sh lib/structure.sh lib/clean-start.sh
|
||||||
providers/forgejo.sh providers/github.sh
|
providers/forgejo.sh providers/github.sh
|
||||||
registry.sh globals.env
|
registry.sh globals.env
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -86,3 +86,17 @@ EOF
|
||||||
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/foo]] — s. `maturity: draft`'
|
python3 "$SKILL_SCRIPTS/index-append.py" --section Sources --entry '- [[sources/foo]] — s. `maturity: draft`'
|
||||||
grep -q "^last_updated: $(date +%F)$" wiki/index.md
|
grep -q "^last_updated: $(date +%F)$" wiki/index.md
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@test "log-append: dedup on stable run_id prevents duplicate entries" {
|
||||||
|
G="$(make_fixture_genome)"; cd "$G"
|
||||||
|
stable_id="test-stable-run-id-001"
|
||||||
|
run bash "$SKILL_SCRIPTS/log-append.sh" --run-id "$stable_id" --type INGEST --subject "test" --model "m" \
|
||||||
|
--context "[[raw/x]]" --output "[[sources/x]]" --reasoning "r"
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
run bash "$SKILL_SCRIPTS/log-append.sh" --run-id "$stable_id" --type INGEST --subject "test" --model "m" \
|
||||||
|
--context "[[raw/x]]" --output "[[sources/x]]" --reasoning "r"
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
[[ "$output" == *"already present"* ]]
|
||||||
|
count="$(grep -cF "run_id: \`${stable_id}\`" wiki/log.md || true)"
|
||||||
|
[ "$count" -eq 1 ]
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue