Compare commits

...

8 commits

5 changed files with 68 additions and 9 deletions

View file

@ -1,5 +1,5 @@
# =============================================================================
# Knowledge Genome - Makefile v. 1.10.0
# Knowledge Genome - Makefile v. 1.11.0
# Orchestrates the setup and management of the knowledge base.
# =============================================================================

View file

@ -18,12 +18,12 @@ case "$cmd" in
set -a; . "${HOME}/.config/knowledge-genome.env" 2>/dev/null || true; set +a
exec "${HOME}/.pi/agent/skills/ingest/scripts/orphan-wiki.sh" "$genome"
;;
"pi run")
"pi run")
logger -t n8n-pi-wrap "ok: pi run (prompt via stdin)"
prompt=$(cat)
exec /usr/local/bin/pi --no-tools --mode json -p "$prompt" </dev/null
;;
"pi ingest "*)
"pi ingest "*)
# Strict positional parse: EXACTLY `pi ingest <genome> <raw_path>` (two tokens).
rest="${cmd#pi ingest }"
genome="${rest%% *}"
@ -79,7 +79,45 @@ case "$cmd" in
# MECHANICAL step: validate manifest -> index/log/scoped-lint/commit/PR -> 1 JSON line
exec "${HOME}/.pi/agent/skills/ingest/scripts/run-ingest.sh" "${genome}"
;;
"pi changed-raw "*)
"pi ingest-rework "*)
# args: <genome> <raw_path> <feedback_base64> (3 token).
# Feedback in base64 nell'argv: il nodo SSH di n8n non passa stdin, e cosi' i metacaratteri
# della review (apici, newline, $(...)) sono neutralizzati.
args="${cmd#pi ingest-rework }"
genome="${args%% *}"; tmp="${args#* }"
raw_path="${tmp%% *}"; fb_b64="${tmp#* }"
if [ "$genome" = "$args" ] || [ "$raw_path" = "$tmp" ] || [ -z "$fb_b64" ]; then
echo '{"status":"error","reason":"usage: pi ingest-rework <genome> <raw_path> <feedback_b64>"}'; exit 1
fi
case "$genome" in ""|*[!a-z0-9-]*) echo '{"status":"error","reason":"invalid genome"}'; exit 1;; esac
case "$raw_path" in raw/*) : ;; *) echo '{"status":"error","reason":"raw_path must be under raw/"}'; exit 1;; esac
case "$raw_path" in *..*|*//*) echo '{"status":"error","reason":"raw_path traversal"}'; exit 1;; esac
case "$raw_path" in *[!A-Za-z0-9._/-]*) echo '{"status":"error","reason":"raw_path illegal chars"}'; exit 1;; esac
case "$fb_b64" in *[!A-Za-z0-9+/=]*) echo '{"status":"error","reason":"feedback not base64"}'; exit 1;; esac
logger -t n8n-pi-wrap "ok: pi ingest-rework ${genome} ${raw_path}"
feedback="$(printf '%s' "$fb_b64" | base64 -d 2>/dev/null || true)"
# lock per-genoma: serializza con gli ingest normali
exec 9>"/run/lock/kg-ingest-${genome}.lock" 2>/dev/null || exec 9>"/tmp/kg-ingest-${genome}.lock"
if ! flock -n 9; then
echo '{"status":"busy","reason":"another ingest is running for this genome","genome":"'"$genome"'"}'; exit 0
fi
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
cd "${GENOMES_ROOT}/${genome}" || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; }
[ -f "$raw_path" ] || { echo '{"status":"error","reason":"raw file not found"}'; exit 1; }
: "${KG_LIB_DIR:=${HOME}/knowledge-genome-orchestrator/lib}"
source "${KG_LIB_DIR}/clean-start.sh" 2>/dev/null \
|| { echo '{"status":"error","reason":"clean-start.sh not found"}'; exit 1; }
clean_start || { echo '{"status":"error","reason":"clean-start failed"}'; exit 1; }
log="$(mktemp -t pi-rework.XXXXXX.log)"
INGEST_FEEDBACK="$feedback" \
"${HOME}/.pi/agent/skills/ingest/scripts/ingest-semantic.py" "${genome}" "${raw_path}" \
>"$log" 2>&1 \
|| { echo "{\"status\":\"error\",\"stage\":\"semantic\",\"reason\":\"rework failed\",\"log\":\"${log}\"}"; exit 1; }
exec "${HOME}/.pi/agent/skills/ingest/scripts/run-ingest.sh" "${genome}"
;;
"pi changed-raw "*)
# List raw/ files changed between two commits, one per line (the webhook payload
# does NOT include file lists, so vm101's checkout computes the diff itself).
rest="${cmd#pi changed-raw }"
@ -119,11 +157,11 @@ case "$cmd" in
--arg g "$genome" '{status:"ok", genome:$g, count:length, files:.}'
fi
;;
"ollama list")
"ollama list")
logger -t n8n-pi-wrap "ok: ollama list"
exec /usr/local/bin/ollama list
;;
"ollama ps")
"ollama ps")
logger -t n8n-pi-wrap "ok: ollama ps"
exec /usr/local/bin/ollama ps
;;

View file

@ -32,6 +32,7 @@ TIMEOUT = int(os.environ.get("INGEST_TIMEOUT", "600"))
# Unset = omit the flag entirely (correct for plain instruct models such as qwen2.5).
THINK = os.environ.get("INGEST_THINK")
TODAY = datetime.date.today().isoformat()
FEEDBACK = os.environ.get("INGEST_FEEDBACK", "").strip()
def die(stage, reason):
@ -217,13 +218,19 @@ def call_model(max_retries=2, base_delay=2.0):
existing_conc = ", ".join(sorted(existing_concepts)) or "(none yet)"
prompt = SYSTEM_PROMPT.format(existing_entities=existing_ents, existing_concepts=existing_conc)
user_content = (
("REVISION REQUESTED BY THE MAINTAINER (address this explicitly):\n"
+ FEEDBACK + "\n\n") if FEEDBACK else ""
) + (
"Source path: " + raw_rel + "\n\n--- SOURCE START ---\n"
+ source_text + "\n--- SOURCE END ---\n\nReturn the JSON now."
)
payload = {
"model": MODEL,
"messages": [
{"role": "system", "content": prompt},
{"role": "user", "content":
"Source path: " + raw_rel + "\n\n--- SOURCE START ---\n"
+ source_text + "\n--- SOURCE END ---\n\nReturn the JSON now."},
{"role": "user", "content": user_content },
],
"format": SCHEMA,
"stream": False,

View file

@ -131,6 +131,7 @@ body="$(mktemp)"
trap 'rm -f "$body"' EXIT # auto-clean on any exit (success, fail(), or crash)
{
echo "<!-- kg:raw=${raw_source} -->" # marker for the rejection loop (invisible in the render)
echo "## Summary"
echo "$pr_summary"
echo ""

View file

@ -2,6 +2,19 @@
<!-- One sentence: goal of this session and source processed. -->
<!--
REVIEW GUIDELINES (write the guideline as the FIRST word of your review):
REWORK: <what to fix> -> same branch, guided retry
RESTART: <why restart> -> close PR, start over from scratch
SPLIT: <how to split> -> close PR, reopen as separate branches
REJECT: <why not> -> close PR, no retry
MERGE -> approve and merge
Rules: one concern per directive; be specific to lines/pages; name the principle
that was violated; describe the DESIRED STATE; avoid saying “do better.”
-->
Translated with DeepL.com (free version)
## Pages Created
| Path | Type | Maturity |