fix(ingest): Ollama call leggibile sugli errori + toggle INGEST_THINK; script eseguibili + permissions.bats

2026-06-19 11:21:39 +02:00 · 2026-06-19 11:21:39 +02:00 · dfa1862ddf
commit dfa1862ddf
parent a18a04a38c
14 changed files with 72 additions and 0 deletions
--- a/scripts/add-genome.sh
+++ b/scripts/add-genome.sh
--- a/scripts/lint-genomes.sh
+++ b/scripts/lint-genomes.sh
--- a/scripts/setup-genomes.sh
+++ b/scripts/setup-genomes.sh
--- a/scripts/setup-master.sh
+++ b/scripts/setup-master.sh
--- a/scripts/setup.sh
+++ b/scripts/setup.sh
--- a/scripts/verify-genomes.sh
+++ b/scripts/verify-genomes.sh
--- a/skills/ingest/scripts/index-append.py
+++ b/skills/ingest/scripts/index-append.py
--- a/skills/ingest/scripts/ingest-semantic.py
+++ b/skills/ingest/scripts/ingest-semantic.py
@ -27,6 +27,10 @@ OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/chat")
 MODEL      = os.environ.get("INGEST_MODEL", "qwen2.5:14b")
 NUM_CTX    = int(os.environ.get("INGEST_NUM_CTX", "16384"))
 TIMEOUT    = int(os.environ.get("INGEST_TIMEOUT", "600"))
 # INGEST_THINK: "false" disables a reasoning model's thinking trace, so models like
 # gemma / qwq / qwen3 emit only the structured JSON (no truncation from long thinking).
 # Unset = omit the flag entirely (correct for plain instruct models such as qwen2.5).
 THINK      = os.environ.get("INGEST_THINK")
 TODAY      = datetime.date.today().isoformat()
@ -228,6 +232,8 @@ def call_model():
        # deterministic extraction; repetition penalties OFF for structured output
        "options": {"temperature": 0.2, "repeat_penalty": 1.0, "num_ctx": NUM_CTX},
    }
    if THINK is not None:
        payload["think"] = THINK.strip().lower() in ("1", "true", "yes", "on")
    data = json.dumps(payload).encode("utf-8")
    req = urllib.request.Request(
        OLLAMA_URL, data=data, headers={"Content-Type": "application/json"})
--- a/skills/ingest/scripts/log-append.sh
+++ b/skills/ingest/scripts/log-append.sh
--- a/skills/ingest/scripts/open-pr.sh
+++ b/skills/ingest/scripts/open-pr.sh
--- a/skills/ingest/scripts/run-ingest.sh
+++ b/skills/ingest/scripts/run-ingest.sh
--- a/skills/ingest/scripts/scoped-lint.sh
+++ b/skills/ingest/scripts/scoped-lint.sh
--- a/skills/ingest/scripts/slug.sh
+++ b/skills/ingest/scripts/slug.sh
--- a/tests/permissions.bats
+++ b/tests/permissions.bats
@ -0,0 +1,66 @@
 #!/usr/bin/env bats
 # tests/permissions.bats
 # Blinda i permessi del repo, cosi' un `cp`/deploy preserva l'eseguibilita' e non
 # ricapita il "Permission denied" (es. ingest-semantic.py lanciato diretto).
 #
 # Principio:
 #   - script con shebang lanciati direttamente  -> eseguibili (git mode 100755)
 #   - librerie *sourced* (lib/, providers/, registry.sh, globals.env) -> NON eseguibili (100644)
 REPO="${BATS_TEST_DIRNAME}/.."
 # Entry-point / script eseguibili (tutti hanno shebang; alcuni anche lanciati a mano per debug)
 EXECUTABLES=(
  skills/ingest/scripts/ingest-semantic.py
  skills/ingest/scripts/run-ingest.sh
  skills/ingest/scripts/scoped-lint.sh
  skills/ingest/scripts/open-pr.sh
  skills/ingest/scripts/log-append.sh
  skills/ingest/scripts/slug.sh
  skills/ingest/scripts/index-append.py
  scripts/add-genome.sh
  scripts/setup.sh
  scripts/setup-genomes.sh
  scripts/setup-master.sh
  scripts/lint-genomes.sh
  scripts/verify-genomes.sh
 )
 # Librerie sourced: NON devono essere eseguibili.
 LIBRARIES=(
  lib/lint.sh lib/output.sh lib/deps.sh lib/git-crypt.sh lib/scaffold.sh lib/structure.sh
  providers/forgejo.sh providers/github.sh
  registry.sh globals.env
 )
 git_mode() { git -C "$REPO" ls-files -s -- "$1" | awk '{print $1}'; }
@test "executable scripts have the +x bit on disk" {
  for f in "${EXECUTABLES[@]}"; do
    [ -x "${REPO}/${f}" ] || { echo "NON eseguibile su disco: $f"; return 1; }
  done
 }
@test "executable scripts are recorded 100755 in git" {
  for f in "${EXECUTABLES[@]}"; do
    mode="$(git_mode "$f")"
    [ -n "$mode" ]          || { echo "non tracciato in git: $f"; return 1; }
    [ "$mode" = "100755" ]  || { echo "git mode $mode (atteso 100755): $f"; return 1; }
  done
 }
@test "sourced libraries are NOT executable in git (100644)" {
  for f in "${LIBRARIES[@]}"; do
    mode="$(git_mode "$f")"
    [ -z "$mode" ] && continue          # non tracciato/opzionale -> salta
    [ "$mode" = "100644" ] || { echo "git mode $mode (atteso 100644, e' sourced): $f"; return 1; }
  done
 }
@test "executable shell scripts pass bash -n (syntax)" {
  for f in "${EXECUTABLES[@]}"; do
    case "$f" in
      *.sh) bash -n "${REPO}/${f}" || { echo "syntax error: $f"; return 1; } ;;
    esac
  done
 }