diff --git a/skills/ingest/scripts/ingest-semantic.py b/skills/ingest/scripts/ingest-semantic.py index fec6582..f73612e 100755 --- a/skills/ingest/scripts/ingest-semantic.py +++ b/skills/ingest/scripts/ingest-semantic.py @@ -20,7 +20,7 @@ # # Emits a single JSON status line on stdout (for n8n / logs). # ============================================================================= -import json, os, hashlib, subprocess, re, sys, datetime, urllib.request, urllib.error +import json, os, hashlib, subprocess, re, sys, datetime, urllib.request, urllib.error, time # --- config (override via env; these live in ~/.config/knowledge-genome.env) --- OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/chat") @@ -209,15 +209,13 @@ SCHEMA = { } -def call_model(): - # format existing names as a human-readable list +def call_model(max_retries=2, base_delay=2.0): + """Call Ollama with retry on transient errors (connection, timeout, malformed JSON). + Retries up to max_retries times with exponential backoff. Does NOT retry on + content errors (schema violations, empty response) — those are model issues.""" existing_ents = ", ".join(sorted(existing_entities)) or "(none yet)" existing_conc = ", ".join(sorted(existing_concepts)) or "(none yet)" - - prompt = SYSTEM_PROMPT.format( - existing_entities=existing_ents, - existing_concepts=existing_conc, - ) + prompt = SYSTEM_PROMPT.format(existing_entities=existing_ents, existing_concepts=existing_conc) payload = { "model": MODEL, @@ -227,33 +225,45 @@ def call_model(): "Source path: " + raw_rel + "\n\n--- SOURCE START ---\n" + source_text + "\n--- SOURCE END ---\n\nReturn the JSON now."}, ], - "format": SCHEMA, # schema-constrained generation + "format": SCHEMA, "stream": False, - # deterministic extraction; repetition penalties OFF for structured output "options": {"temperature": 0.2, "repeat_penalty": 1.0, "num_ctx": NUM_CTX}, } if THINK is not None: payload["think"] = THINK.strip().lower() in ("1", "true", "yes", "on") data = json.dumps(payload).encode("utf-8") - req = urllib.request.Request( - OLLAMA_URL, data=data, headers={"Content-Type": "application/json"}) - try: - with urllib.request.urlopen(req, timeout=TIMEOUT) as r: - resp = json.loads(r.read().decode("utf-8")) - except urllib.error.URLError as e: - die("model", "ollama request failed: " + str(e)) - content = ((resp.get("message") or {}).get("content") or "").strip() - # schema-constrained, but stay defensive if a model wraps it in a fence - if content.startswith("```"): - content = content.strip("`") - brace = content.find("{") - if brace >= 0: - content = content[brace:] - try: - return json.loads(content) - except json.JSONDecodeError as e: - die("model", "model did not return valid JSON: " + str(e)) + last_error = None + for attempt in range(max_retries + 1): + if attempt > 0: + delay = base_delay * (2 ** (attempt - 1)) + print(f"call_model: retry {attempt}/{max_retries} after {delay}s: {last_error}", file=sys.stderr) + time.sleep(delay) + + req = urllib.request.Request(OLLAMA_URL, data=data, headers={"Content-Type": "application/json"}) + try: + with urllib.request.urlopen(req, timeout=TIMEOUT) as r: + resp = json.loads(r.read().decode("utf-8")) + except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError) as e: + last_error = f"connection/transport error: {e}"; continue + except json.JSONDecodeError as e: + last_error = f"invalid JSON from Ollama API: {e}"; continue + + content = ((resp.get("message") or {}).get("content") or "").strip() + if content.startswith("```"): + content = content.strip("`") + brace = content.find("{") + if brace >= 0: + content = content[brace:] + try: + return json.loads(content) + except json.JSONDecodeError as e: + last_error = f"model did not return valid JSON: {e}" + if len(content) < 10: + continue # likely truncated -> retry + break # long but malformed -> model issue, stop + + die("model", last_error or "model call failed after retries") # --- run the semantic pass --- sem = call_model()