fix(ingest): Ollama call leggibile sugli errori + toggle INGEST_THINK; script eseguibili + permissions.bats
This commit is contained in:
parent
a18a04a38c
commit
dfa1862ddf
14 changed files with 72 additions and 0 deletions
0
scripts/add-genome.sh
Normal file → Executable file
0
scripts/add-genome.sh
Normal file → Executable file
0
scripts/lint-genomes.sh
Normal file → Executable file
0
scripts/lint-genomes.sh
Normal file → Executable file
0
scripts/setup-genomes.sh
Normal file → Executable file
0
scripts/setup-genomes.sh
Normal file → Executable file
0
scripts/setup-master.sh
Normal file → Executable file
0
scripts/setup-master.sh
Normal file → Executable file
0
scripts/setup.sh
Normal file → Executable file
0
scripts/setup.sh
Normal file → Executable file
0
scripts/verify-genomes.sh
Normal file → Executable file
0
scripts/verify-genomes.sh
Normal file → Executable file
0
skills/ingest/scripts/index-append.py
Normal file → Executable file
0
skills/ingest/scripts/index-append.py
Normal file → Executable file
6
skills/ingest/scripts/ingest-semantic.py
Normal file → Executable file
6
skills/ingest/scripts/ingest-semantic.py
Normal file → Executable file
|
|
@ -27,6 +27,10 @@ OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/chat")
|
||||||
MODEL = os.environ.get("INGEST_MODEL", "qwen2.5:14b")
|
MODEL = os.environ.get("INGEST_MODEL", "qwen2.5:14b")
|
||||||
NUM_CTX = int(os.environ.get("INGEST_NUM_CTX", "16384"))
|
NUM_CTX = int(os.environ.get("INGEST_NUM_CTX", "16384"))
|
||||||
TIMEOUT = int(os.environ.get("INGEST_TIMEOUT", "600"))
|
TIMEOUT = int(os.environ.get("INGEST_TIMEOUT", "600"))
|
||||||
|
# INGEST_THINK: "false" disables a reasoning model's thinking trace, so models like
|
||||||
|
# gemma / qwq / qwen3 emit only the structured JSON (no truncation from long thinking).
|
||||||
|
# Unset = omit the flag entirely (correct for plain instruct models such as qwen2.5).
|
||||||
|
THINK = os.environ.get("INGEST_THINK")
|
||||||
TODAY = datetime.date.today().isoformat()
|
TODAY = datetime.date.today().isoformat()
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -228,6 +232,8 @@ def call_model():
|
||||||
# deterministic extraction; repetition penalties OFF for structured output
|
# deterministic extraction; repetition penalties OFF for structured output
|
||||||
"options": {"temperature": 0.2, "repeat_penalty": 1.0, "num_ctx": NUM_CTX},
|
"options": {"temperature": 0.2, "repeat_penalty": 1.0, "num_ctx": NUM_CTX},
|
||||||
}
|
}
|
||||||
|
if THINK is not None:
|
||||||
|
payload["think"] = THINK.strip().lower() in ("1", "true", "yes", "on")
|
||||||
data = json.dumps(payload).encode("utf-8")
|
data = json.dumps(payload).encode("utf-8")
|
||||||
req = urllib.request.Request(
|
req = urllib.request.Request(
|
||||||
OLLAMA_URL, data=data, headers={"Content-Type": "application/json"})
|
OLLAMA_URL, data=data, headers={"Content-Type": "application/json"})
|
||||||
|
|
|
||||||
0
skills/ingest/scripts/log-append.sh
Normal file → Executable file
0
skills/ingest/scripts/log-append.sh
Normal file → Executable file
0
skills/ingest/scripts/open-pr.sh
Normal file → Executable file
0
skills/ingest/scripts/open-pr.sh
Normal file → Executable file
0
skills/ingest/scripts/run-ingest.sh
Normal file → Executable file
0
skills/ingest/scripts/run-ingest.sh
Normal file → Executable file
0
skills/ingest/scripts/scoped-lint.sh
Normal file → Executable file
0
skills/ingest/scripts/scoped-lint.sh
Normal file → Executable file
0
skills/ingest/scripts/slug.sh
Normal file → Executable file
0
skills/ingest/scripts/slug.sh
Normal file → Executable file
66
tests/permissions.bats
Normal file
66
tests/permissions.bats
Normal file
|
|
@ -0,0 +1,66 @@
|
||||||
|
#!/usr/bin/env bats
|
||||||
|
# tests/permissions.bats
|
||||||
|
# Blinda i permessi del repo, cosi' un `cp`/deploy preserva l'eseguibilita' e non
|
||||||
|
# ricapita il "Permission denied" (es. ingest-semantic.py lanciato diretto).
|
||||||
|
#
|
||||||
|
# Principio:
|
||||||
|
# - script con shebang lanciati direttamente -> eseguibili (git mode 100755)
|
||||||
|
# - librerie *sourced* (lib/, providers/, registry.sh, globals.env) -> NON eseguibili (100644)
|
||||||
|
|
||||||
|
REPO="${BATS_TEST_DIRNAME}/.."
|
||||||
|
|
||||||
|
# Entry-point / script eseguibili (tutti hanno shebang; alcuni anche lanciati a mano per debug)
|
||||||
|
EXECUTABLES=(
|
||||||
|
skills/ingest/scripts/ingest-semantic.py
|
||||||
|
skills/ingest/scripts/run-ingest.sh
|
||||||
|
skills/ingest/scripts/scoped-lint.sh
|
||||||
|
skills/ingest/scripts/open-pr.sh
|
||||||
|
skills/ingest/scripts/log-append.sh
|
||||||
|
skills/ingest/scripts/slug.sh
|
||||||
|
skills/ingest/scripts/index-append.py
|
||||||
|
scripts/add-genome.sh
|
||||||
|
scripts/setup.sh
|
||||||
|
scripts/setup-genomes.sh
|
||||||
|
scripts/setup-master.sh
|
||||||
|
scripts/lint-genomes.sh
|
||||||
|
scripts/verify-genomes.sh
|
||||||
|
)
|
||||||
|
|
||||||
|
# Librerie sourced: NON devono essere eseguibili.
|
||||||
|
LIBRARIES=(
|
||||||
|
lib/lint.sh lib/output.sh lib/deps.sh lib/git-crypt.sh lib/scaffold.sh lib/structure.sh
|
||||||
|
providers/forgejo.sh providers/github.sh
|
||||||
|
registry.sh globals.env
|
||||||
|
)
|
||||||
|
|
||||||
|
git_mode() { git -C "$REPO" ls-files -s -- "$1" | awk '{print $1}'; }
|
||||||
|
|
||||||
|
@test "executable scripts have the +x bit on disk" {
|
||||||
|
for f in "${EXECUTABLES[@]}"; do
|
||||||
|
[ -x "${REPO}/${f}" ] || { echo "NON eseguibile su disco: $f"; return 1; }
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "executable scripts are recorded 100755 in git" {
|
||||||
|
for f in "${EXECUTABLES[@]}"; do
|
||||||
|
mode="$(git_mode "$f")"
|
||||||
|
[ -n "$mode" ] || { echo "non tracciato in git: $f"; return 1; }
|
||||||
|
[ "$mode" = "100755" ] || { echo "git mode $mode (atteso 100755): $f"; return 1; }
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "sourced libraries are NOT executable in git (100644)" {
|
||||||
|
for f in "${LIBRARIES[@]}"; do
|
||||||
|
mode="$(git_mode "$f")"
|
||||||
|
[ -z "$mode" ] && continue # non tracciato/opzionale -> salta
|
||||||
|
[ "$mode" = "100644" ] || { echo "git mode $mode (atteso 100644, e' sourced): $f"; return 1; }
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "executable shell scripts pass bash -n (syntax)" {
|
||||||
|
for f in "${EXECUTABLES[@]}"; do
|
||||||
|
case "$f" in
|
||||||
|
*.sh) bash -n "${REPO}/${f}" || { echo "syntax error: $f"; return 1; } ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
}
|
||||||
Loading…
Add table
Reference in a new issue