refactor(ingest): Refine path validation and improve script clarity

This commit is contained in:
Matteo Cherubini 2026-06-05 10:47:35 +02:00
parent 93bc5bb007
commit 2426b09b50

View file

@ -30,10 +30,11 @@ command -v python3 >/dev/null 2>&1 || fail "deps" "python3 missing (needed by in
jq -e 'type=="object" and (.raw_source|type=="string") and (.pages|type=="array")' \
"$manifest" >/dev/null 2>&1 \
|| fail "manifest" "invalid manifest: need object with string raw_source and array pages"
# 2) every page.path must be a string, live under wiki/, and contain no '..' (no traversal)
if jq -e '[.pages[].path
| select((type!="string") or (startswith("wiki/")|not) or test("\\.\\."))]
| length > 0' "$manifest" >/dev/null 2>&1; then
| select((type!="string") or (startswith("wiki/")|not) or contains(".."))]
| length > 0' "$manifest" >/dev/null 2>&1; then
fail "manifest" "unsafe page path (must be a string under wiki/, no '..')"
fi
@ -95,12 +96,13 @@ bash "${SCRIPTS}/log-append.sh" --type INGEST --subject "$slug" --model "$model"
--context "[[${raw_source}]]" --output "${out:-*(none)*}" --reasoning "$reasoning" \
|| fail "log" "log-append failed"
# --- 3. scoped lint (capture findings for the PR; never aborts the run) ---
# --- 3. scoped linter (capture findings for the PR; never aborts the run) ---
lint_out="$( bash "${SCRIPTS}/scoped-lint.sh" "$genome" "${all_paths[@]}" 2>&1 )" && lint_rc=0 || lint_rc=$?
# --- 4. assemble the PR body (manifest tables + lint results) ---
body="$(mktemp)"
trap 'rm -f "$body"' EXIT # auto-clean on any exit (success, fail(), or crash)
trap 'rm -f "$body"' EXIT # auto-clean on any exit (success, fail(), or crash)
{
echo "## Summary"
echo "$pr_summary"
@ -135,8 +137,8 @@ jq -nc \
--arg detail "$pr_out" \
'{status:$status, slug:$slug, pr_url:$pr_url, lint_clean:$lint_clean, conflict:$conflict, detail:$detail}'
# The manifest is a single file overwritten by each pi run (not accumulating), but on full
# success we remove it so a stale manifest can never be re-processed by mistake.
# The manifest is a single file that is overwritten with each run, but if the process is
# completely successful, we remove it to prevent an outdated manifest from being reprocessed by mistake.
if [[ $pr_rc -eq 0 ]]; then
rm -f "$manifest"
else