fix: Implement retry logic for Ollama model API calls
This commit is contained in:
parent
3c9b24c3b2
commit
f962a7fb13
1 changed files with 38 additions and 28 deletions
|
|
@ -20,7 +20,7 @@
|
||||||
#
|
#
|
||||||
# Emits a single JSON status line on stdout (for n8n / logs).
|
# Emits a single JSON status line on stdout (for n8n / logs).
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
import json, os, hashlib, subprocess, re, sys, datetime, urllib.request, urllib.error
|
import json, os, hashlib, subprocess, re, sys, datetime, urllib.request, urllib.error, time
|
||||||
|
|
||||||
# --- config (override via env; these live in ~/.config/knowledge-genome.env) ---
|
# --- config (override via env; these live in ~/.config/knowledge-genome.env) ---
|
||||||
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/chat")
|
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/chat")
|
||||||
|
|
@ -209,15 +209,13 @@ SCHEMA = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def call_model():
|
def call_model(max_retries=2, base_delay=2.0):
|
||||||
# format existing names as a human-readable list
|
"""Call Ollama with retry on transient errors (connection, timeout, malformed JSON).
|
||||||
|
Retries up to max_retries times with exponential backoff. Does NOT retry on
|
||||||
|
content errors (schema violations, empty response) — those are model issues."""
|
||||||
existing_ents = ", ".join(sorted(existing_entities)) or "(none yet)"
|
existing_ents = ", ".join(sorted(existing_entities)) or "(none yet)"
|
||||||
existing_conc = ", ".join(sorted(existing_concepts)) or "(none yet)"
|
existing_conc = ", ".join(sorted(existing_concepts)) or "(none yet)"
|
||||||
|
prompt = SYSTEM_PROMPT.format(existing_entities=existing_ents, existing_concepts=existing_conc)
|
||||||
prompt = SYSTEM_PROMPT.format(
|
|
||||||
existing_entities=existing_ents,
|
|
||||||
existing_concepts=existing_conc,
|
|
||||||
)
|
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": MODEL,
|
"model": MODEL,
|
||||||
|
|
@ -227,33 +225,45 @@ def call_model():
|
||||||
"Source path: " + raw_rel + "\n\n--- SOURCE START ---\n"
|
"Source path: " + raw_rel + "\n\n--- SOURCE START ---\n"
|
||||||
+ source_text + "\n--- SOURCE END ---\n\nReturn the JSON now."},
|
+ source_text + "\n--- SOURCE END ---\n\nReturn the JSON now."},
|
||||||
],
|
],
|
||||||
"format": SCHEMA, # schema-constrained generation
|
"format": SCHEMA,
|
||||||
"stream": False,
|
"stream": False,
|
||||||
# deterministic extraction; repetition penalties OFF for structured output
|
|
||||||
"options": {"temperature": 0.2, "repeat_penalty": 1.0, "num_ctx": NUM_CTX},
|
"options": {"temperature": 0.2, "repeat_penalty": 1.0, "num_ctx": NUM_CTX},
|
||||||
}
|
}
|
||||||
if THINK is not None:
|
if THINK is not None:
|
||||||
payload["think"] = THINK.strip().lower() in ("1", "true", "yes", "on")
|
payload["think"] = THINK.strip().lower() in ("1", "true", "yes", "on")
|
||||||
data = json.dumps(payload).encode("utf-8")
|
data = json.dumps(payload).encode("utf-8")
|
||||||
req = urllib.request.Request(
|
|
||||||
OLLAMA_URL, data=data, headers={"Content-Type": "application/json"})
|
|
||||||
try:
|
|
||||||
with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
|
|
||||||
resp = json.loads(r.read().decode("utf-8"))
|
|
||||||
except urllib.error.URLError as e:
|
|
||||||
die("model", "ollama request failed: " + str(e))
|
|
||||||
content = ((resp.get("message") or {}).get("content") or "").strip()
|
|
||||||
# schema-constrained, but stay defensive if a model wraps it in a fence
|
|
||||||
if content.startswith("```"):
|
|
||||||
content = content.strip("`")
|
|
||||||
brace = content.find("{")
|
|
||||||
if brace >= 0:
|
|
||||||
content = content[brace:]
|
|
||||||
try:
|
|
||||||
return json.loads(content)
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
die("model", "model did not return valid JSON: " + str(e))
|
|
||||||
|
|
||||||
|
last_error = None
|
||||||
|
for attempt in range(max_retries + 1):
|
||||||
|
if attempt > 0:
|
||||||
|
delay = base_delay * (2 ** (attempt - 1))
|
||||||
|
print(f"call_model: retry {attempt}/{max_retries} after {delay}s: {last_error}", file=sys.stderr)
|
||||||
|
time.sleep(delay)
|
||||||
|
|
||||||
|
req = urllib.request.Request(OLLAMA_URL, data=data, headers={"Content-Type": "application/json"})
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
|
||||||
|
resp = json.loads(r.read().decode("utf-8"))
|
||||||
|
except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError) as e:
|
||||||
|
last_error = f"connection/transport error: {e}"; continue
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
last_error = f"invalid JSON from Ollama API: {e}"; continue
|
||||||
|
|
||||||
|
content = ((resp.get("message") or {}).get("content") or "").strip()
|
||||||
|
if content.startswith("```"):
|
||||||
|
content = content.strip("`")
|
||||||
|
brace = content.find("{")
|
||||||
|
if brace >= 0:
|
||||||
|
content = content[brace:]
|
||||||
|
try:
|
||||||
|
return json.loads(content)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
last_error = f"model did not return valid JSON: {e}"
|
||||||
|
if len(content) < 10:
|
||||||
|
continue # likely truncated -> retry
|
||||||
|
break # long but malformed -> model issue, stop
|
||||||
|
|
||||||
|
die("model", last_error or "model call failed after retries")
|
||||||
|
|
||||||
# --- run the semantic pass ---
|
# --- run the semantic pass ---
|
||||||
sem = call_model()
|
sem = call_model()
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue