refactor(ingest): Refine path validation and improve script clarity
This commit is contained in:
parent
93bc5bb007
commit
2426b09b50
1 changed files with 8 additions and 6 deletions
|
|
@ -30,9 +30,10 @@ command -v python3 >/dev/null 2>&1 || fail "deps" "python3 missing (needed by in
|
||||||
jq -e 'type=="object" and (.raw_source|type=="string") and (.pages|type=="array")' \
|
jq -e 'type=="object" and (.raw_source|type=="string") and (.pages|type=="array")' \
|
||||||
"$manifest" >/dev/null 2>&1 \
|
"$manifest" >/dev/null 2>&1 \
|
||||||
|| fail "manifest" "invalid manifest: need object with string raw_source and array pages"
|
|| fail "manifest" "invalid manifest: need object with string raw_source and array pages"
|
||||||
|
|
||||||
# 2) every page.path must be a string, live under wiki/, and contain no '..' (no traversal)
|
# 2) every page.path must be a string, live under wiki/, and contain no '..' (no traversal)
|
||||||
if jq -e '[.pages[].path
|
if jq -e '[.pages[].path
|
||||||
| select((type!="string") or (startswith("wiki/")|not) or test("\\.\\."))]
|
| select((type!="string") or (startswith("wiki/")|not) or contains(".."))]
|
||||||
| length > 0' "$manifest" >/dev/null 2>&1; then
|
| length > 0' "$manifest" >/dev/null 2>&1; then
|
||||||
fail "manifest" "unsafe page path (must be a string under wiki/, no '..')"
|
fail "manifest" "unsafe page path (must be a string under wiki/, no '..')"
|
||||||
fi
|
fi
|
||||||
|
|
@ -95,12 +96,13 @@ bash "${SCRIPTS}/log-append.sh" --type INGEST --subject "$slug" --model "$model"
|
||||||
--context "[[${raw_source}]]" --output "${out:-*(none)*}" --reasoning "$reasoning" \
|
--context "[[${raw_source}]]" --output "${out:-*(none)*}" --reasoning "$reasoning" \
|
||||||
|| fail "log" "log-append failed"
|
|| fail "log" "log-append failed"
|
||||||
|
|
||||||
# --- 3. scoped lint (capture findings for the PR; never aborts the run) ---
|
# --- 3. scoped linter (capture findings for the PR; never aborts the run) ---
|
||||||
lint_out="$( bash "${SCRIPTS}/scoped-lint.sh" "$genome" "${all_paths[@]}" 2>&1 )" && lint_rc=0 || lint_rc=$?
|
lint_out="$( bash "${SCRIPTS}/scoped-lint.sh" "$genome" "${all_paths[@]}" 2>&1 )" && lint_rc=0 || lint_rc=$?
|
||||||
|
|
||||||
# --- 4. assemble the PR body (manifest tables + lint results) ---
|
# --- 4. assemble the PR body (manifest tables + lint results) ---
|
||||||
body="$(mktemp)"
|
body="$(mktemp)"
|
||||||
trap 'rm -f "$body"' EXIT # auto-clean on any exit (success, fail(), or crash)
|
trap 'rm -f "$body"' EXIT # auto-clean on any exit (success, fail(), or crash)
|
||||||
|
|
||||||
{
|
{
|
||||||
echo "## Summary"
|
echo "## Summary"
|
||||||
echo "$pr_summary"
|
echo "$pr_summary"
|
||||||
|
|
@ -135,8 +137,8 @@ jq -nc \
|
||||||
--arg detail "$pr_out" \
|
--arg detail "$pr_out" \
|
||||||
'{status:$status, slug:$slug, pr_url:$pr_url, lint_clean:$lint_clean, conflict:$conflict, detail:$detail}'
|
'{status:$status, slug:$slug, pr_url:$pr_url, lint_clean:$lint_clean, conflict:$conflict, detail:$detail}'
|
||||||
|
|
||||||
# The manifest is a single file overwritten by each pi run (not accumulating), but on full
|
# The manifest is a single file that is overwritten with each run, but if the process is
|
||||||
# success we remove it so a stale manifest can never be re-processed by mistake.
|
# completely successful, we remove it to prevent an outdated manifest from being reprocessed by mistake.
|
||||||
if [[ $pr_rc -eq 0 ]]; then
|
if [[ $pr_rc -eq 0 ]]; then
|
||||||
rm -f "$manifest"
|
rm -f "$manifest"
|
||||||
else
|
else
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue