diff --git a/skills/ingest/scripts/ingest-semantic.py b/skills/ingest/scripts/ingest-semantic.py
index fec6582..f73612e 100755
--- a/skills/ingest/scripts/ingest-semantic.py
+++ b/skills/ingest/scripts/ingest-semantic.py
@@ -20,7 +20,7 @@
 #
 # Emits a single JSON status line on stdout (for n8n / logs).
 # =============================================================================
-import json, os, hashlib, subprocess, re, sys, datetime, urllib.request, urllib.error
+import json, os, hashlib, subprocess, re, sys, datetime, urllib.request, urllib.error, time
 
 # --- config (override via env; these live in ~/.config/knowledge-genome.env) ---
 OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/chat")
@@ -209,15 +209,13 @@ SCHEMA = {
 }
 
 
-def call_model():
-    # format existing names as a human-readable list
+def call_model(max_retries=2, base_delay=2.0):
+    """Call Ollama with retry on transient errors (connection, timeout, malformed JSON).
+    Retries up to max_retries times with exponential backoff. Does NOT retry on
+    content errors (schema violations, empty response) — those are model issues."""
     existing_ents = ", ".join(sorted(existing_entities)) or "(none yet)"
     existing_conc = ", ".join(sorted(existing_concepts)) or "(none yet)"
-
-    prompt = SYSTEM_PROMPT.format(
-        existing_entities=existing_ents,
-        existing_concepts=existing_conc,
-    )
+    prompt = SYSTEM_PROMPT.format(existing_entities=existing_ents, existing_concepts=existing_conc)
 
     payload = {
         "model": MODEL,
@@ -227,33 +225,45 @@ def call_model():
                 "Source path: " + raw_rel + "\n\n--- SOURCE START ---\n"
                 + source_text + "\n--- SOURCE END ---\n\nReturn the JSON now."},
         ],
-        "format": SCHEMA,          # schema-constrained generation
+        "format": SCHEMA,
         "stream": False,
-        # deterministic extraction; repetition penalties OFF for structured output
         "options": {"temperature": 0.2, "repeat_penalty": 1.0, "num_ctx": NUM_CTX},
     }
     if THINK is not None:
         payload["think"] = THINK.strip().lower() in ("1", "true", "yes", "on")
     data = json.dumps(payload).encode("utf-8")
-    req = urllib.request.Request(
-        OLLAMA_URL, data=data, headers={"Content-Type": "application/json"})
-    try:
-        with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
-            resp = json.loads(r.read().decode("utf-8"))
-    except urllib.error.URLError as e:
-        die("model", "ollama request failed: " + str(e))
-    content = ((resp.get("message") or {}).get("content") or "").strip()
-    # schema-constrained, but stay defensive if a model wraps it in a fence
-    if content.startswith("```"):
-        content = content.strip("`")
-        brace = content.find("{")
-        if brace >= 0:
-            content = content[brace:]
-    try:
-        return json.loads(content)
-    except json.JSONDecodeError as e:
-        die("model", "model did not return valid JSON: " + str(e))
 
+    last_error = None
+    for attempt in range(max_retries + 1):
+        if attempt > 0:
+            delay = base_delay * (2 ** (attempt - 1))
+            print(f"call_model: retry {attempt}/{max_retries} after {delay}s: {last_error}", file=sys.stderr)
+            time.sleep(delay)
+
+        req = urllib.request.Request(OLLAMA_URL, data=data, headers={"Content-Type": "application/json"})
+        try:
+            with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
+                resp = json.loads(r.read().decode("utf-8"))
+        except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError) as e:
+            last_error = f"connection/transport error: {e}"; continue
+        except json.JSONDecodeError as e:
+            last_error = f"invalid JSON from Ollama API: {e}"; continue
+
+        content = ((resp.get("message") or {}).get("content") or "").strip()
+        if content.startswith("```"):
+            content = content.strip("`")
+            brace = content.find("{")
+            if brace >= 0:
+                content = content[brace:]
+        try:
+            return json.loads(content)
+        except json.JSONDecodeError as e:
+            last_error = f"model did not return valid JSON: {e}"
+            if len(content) < 10:
+                continue           # likely truncated -> retry
+            break                  # long but malformed -> model issue, stop
+
+    die("model", last_error or "model call failed after retries")
 
 # --- run the semantic pass ---
 sem = call_model()