fix(ingest): Ollama call leggibile sugli errori + toggle INGEST_THINK; script eseguibili + permissions.bats

This commit is contained in:
Matteo Cherubini 2026-06-19 11:21:39 +02:00
parent a18a04a38c
commit dfa1862ddf
14 changed files with 72 additions and 0 deletions

0
scripts/add-genome.sh Normal file → Executable file
View file

0
scripts/lint-genomes.sh Normal file → Executable file
View file

0
scripts/setup-genomes.sh Normal file → Executable file
View file

0
scripts/setup-master.sh Normal file → Executable file
View file

0
scripts/setup.sh Normal file → Executable file
View file

0
scripts/verify-genomes.sh Normal file → Executable file
View file

0
skills/ingest/scripts/index-append.py Normal file → Executable file
View file

6
skills/ingest/scripts/ingest-semantic.py Normal file → Executable file
View file

@ -27,6 +27,10 @@ OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/chat")
MODEL = os.environ.get("INGEST_MODEL", "qwen2.5:14b") MODEL = os.environ.get("INGEST_MODEL", "qwen2.5:14b")
NUM_CTX = int(os.environ.get("INGEST_NUM_CTX", "16384")) NUM_CTX = int(os.environ.get("INGEST_NUM_CTX", "16384"))
TIMEOUT = int(os.environ.get("INGEST_TIMEOUT", "600")) TIMEOUT = int(os.environ.get("INGEST_TIMEOUT", "600"))
# INGEST_THINK: "false" disables a reasoning model's thinking trace, so models like
# gemma / qwq / qwen3 emit only the structured JSON (no truncation from long thinking).
# Unset = omit the flag entirely (correct for plain instruct models such as qwen2.5).
THINK = os.environ.get("INGEST_THINK")
TODAY = datetime.date.today().isoformat() TODAY = datetime.date.today().isoformat()
@ -228,6 +232,8 @@ def call_model():
# deterministic extraction; repetition penalties OFF for structured output # deterministic extraction; repetition penalties OFF for structured output
"options": {"temperature": 0.2, "repeat_penalty": 1.0, "num_ctx": NUM_CTX}, "options": {"temperature": 0.2, "repeat_penalty": 1.0, "num_ctx": NUM_CTX},
} }
if THINK is not None:
payload["think"] = THINK.strip().lower() in ("1", "true", "yes", "on")
data = json.dumps(payload).encode("utf-8") data = json.dumps(payload).encode("utf-8")
req = urllib.request.Request( req = urllib.request.Request(
OLLAMA_URL, data=data, headers={"Content-Type": "application/json"}) OLLAMA_URL, data=data, headers={"Content-Type": "application/json"})

0
skills/ingest/scripts/log-append.sh Normal file → Executable file
View file

0
skills/ingest/scripts/open-pr.sh Normal file → Executable file
View file

0
skills/ingest/scripts/run-ingest.sh Normal file → Executable file
View file

0
skills/ingest/scripts/scoped-lint.sh Normal file → Executable file
View file

0
skills/ingest/scripts/slug.sh Normal file → Executable file
View file

66
tests/permissions.bats Normal file
View file

@ -0,0 +1,66 @@
#!/usr/bin/env bats
# tests/permissions.bats
# Blinda i permessi del repo, cosi' un `cp`/deploy preserva l'eseguibilita' e non
# ricapita il "Permission denied" (es. ingest-semantic.py lanciato diretto).
#
# Principio:
# - script con shebang lanciati direttamente -> eseguibili (git mode 100755)
# - librerie *sourced* (lib/, providers/, registry.sh, globals.env) -> NON eseguibili (100644)
REPO="${BATS_TEST_DIRNAME}/.."
# Entry-point / script eseguibili (tutti hanno shebang; alcuni anche lanciati a mano per debug)
EXECUTABLES=(
skills/ingest/scripts/ingest-semantic.py
skills/ingest/scripts/run-ingest.sh
skills/ingest/scripts/scoped-lint.sh
skills/ingest/scripts/open-pr.sh
skills/ingest/scripts/log-append.sh
skills/ingest/scripts/slug.sh
skills/ingest/scripts/index-append.py
scripts/add-genome.sh
scripts/setup.sh
scripts/setup-genomes.sh
scripts/setup-master.sh
scripts/lint-genomes.sh
scripts/verify-genomes.sh
)
# Librerie sourced: NON devono essere eseguibili.
LIBRARIES=(
lib/lint.sh lib/output.sh lib/deps.sh lib/git-crypt.sh lib/scaffold.sh lib/structure.sh
providers/forgejo.sh providers/github.sh
registry.sh globals.env
)
git_mode() { git -C "$REPO" ls-files -s -- "$1" | awk '{print $1}'; }
@test "executable scripts have the +x bit on disk" {
for f in "${EXECUTABLES[@]}"; do
[ -x "${REPO}/${f}" ] || { echo "NON eseguibile su disco: $f"; return 1; }
done
}
@test "executable scripts are recorded 100755 in git" {
for f in "${EXECUTABLES[@]}"; do
mode="$(git_mode "$f")"
[ -n "$mode" ] || { echo "non tracciato in git: $f"; return 1; }
[ "$mode" = "100755" ] || { echo "git mode $mode (atteso 100755): $f"; return 1; }
done
}
@test "sourced libraries are NOT executable in git (100644)" {
for f in "${LIBRARIES[@]}"; do
mode="$(git_mode "$f")"
[ -z "$mode" ] && continue # non tracciato/opzionale -> salta
[ "$mode" = "100644" ] || { echo "git mode $mode (atteso 100644, e' sourced): $f"; return 1; }
done
}
@test "executable shell scripts pass bash -n (syntax)" {
for f in "${EXECUTABLES[@]}"; do
case "$f" in
*.sh) bash -n "${REPO}/${f}" || { echo "syntax error: $f"; return 1; } ;;
esac
done
}