knowledge-genome-orchestrator/deploy/nexus/genome-raw-commit.sh

133 lines
6.2 KiB
Bash

#!/bin/bash
# genome-raw-commit <genome>
#
# Commit the raw files that Syncthing has placed in the vault and push them to origin/<base>.
# - Committer = n8n-bot (sole pusher); Author = the person who wrote it (Syncthing modifiedBy -> .authors.json)
# - One commit per author (single-device => one commit). No-op if there is nothing.
# - JSON output built with jq (safe escaping), with a `files` array:
# for each raw -> file, author, local_path, local_url (file://), remote_url (Forgejo web).
set -euo pipefail
genome="${1:?usage: genome-raw-commit <genome>}"
# Input validation to prevent path or URL traversal inside the script
[[ "$genome" =~ ^[a-z0-9][a-z0-9-]{0,63}$ ]] || { echo '{"status":"error","reason":"invalid genome name"}'; exit 1; }
set -a; . "${HOME}/.config/knowledge-genome.env"; set +a
: "${GENOME_VAULTS_ROOT:=/srv/genome-vaults}"
: "${GENOME_BASE:=develop}"
: "${FORGEJO_USER:=n8n-bot}"
: "${FORGEJO_HOST:=127.0.0.1:3001}"
: "${FORGEJO_OWNER:=Keru}"
: "${FORGEJO_WEB_BASE:=https://git.keruhomelab.com}" # human-facing URL for remote links (not the loopback)
: "${SYNCTHING_URL:=http://127.0.0.1:8384}"
: "${COMMITTER_NAME:=n8n-bot}"
: "${COMMITTER_EMAIL:=n8n-bot@homelab}"
: "${DEFAULT_AUTHOR_NAME:=Unknown}"
: "${DEFAULT_AUTHOR_EMAIL:=unknown@syncthing}"
vault="${GENOME_VAULTS_ROOT}/${genome}"
fid="${genome}-public"
authors_map="${GENOME_VAULTS_ROOT}/.authors.json"
# GENOME_PUSH_URL is a test seam: defaults to the Forgejo loopback URL in production.
clone_url="${GENOME_PUSH_URL:-http://${FORGEJO_USER}@${FORGEJO_HOST}/${FORGEJO_OWNER}/${genome}.git}"
export GIT_ASKPASS=/usr/local/bin/genome-askpass
[[ -d "${vault}/.git" ]] || { printf '{"status":"error","reason":"vault absent","genome":"%s"}\n' "$genome"; exit 1; }
cd "$vault"
git config user.name "$COMMITTER_NAME"
git config user.email "$COMMITTER_EMAIL"
git config commit.gpgsign false
grep -qxF 'raw/.stignore' "${vault}/.git/info/exclude" 2>/dev/null || echo 'raw/.stignore' >> "${vault}/.git/info/exclude"
grep -qxF 'raw/.stfolder' "${vault}/.git/info/exclude" 2>/dev/null || echo 'raw/.stfolder' >> "${vault}/.git/info/exclude"
git add -A -- raw/
git reset -q -- raw/.stignore raw/.stfolder 2>/dev/null || true
if git diff --cached --quiet; then
printf '{"status":"noop","genome":"%s"}\n' "$genome"
exit 0
fi
resolve_dev() { # $1 = path relative to the vault (raw/...) -> prints the short device id, or empty
[[ -z "${SYNCTHING_API_KEY:-}" ]] && return 0
curl -fsS -H "X-API-Key: ${SYNCTHING_API_KEY}" --get "${SYNCTHING_URL}/rest/db/file" \
--data-urlencode "folder=${fid}" --data-urlencode "file=${1#raw/}" 2>/dev/null \
| jq -r '.local.modifiedBy // empty' 2>/dev/null || true
}
author_for_dev() { # $1 = device id -> prints "name\temail"
local dev="$1" name="$DEFAULT_AUTHOR_NAME" email="$DEFAULT_AUTHOR_EMAIL"
if [[ -n "$dev" && -f "$authors_map" ]] && jq -e --arg d "$dev" '.[$d]' "$authors_map" >/dev/null 2>&1; then
name="$(jq -r --arg d "$dev" '.[$d].name' "$authors_map")"
email="$(jq -r --arg d "$dev" '.[$d].email' "$authors_map")"
fi
printf '%s\t%s' "$name" "$email"
}
# Collect per-file (relpath, author) and group by author for committing
declare -A G_FILES G_NAME G_EMAIL
declare -a ROWS
while IFS= read -r f; do
[[ -z "$f" ]] && continue
dev="$(resolve_dev "$f")"
IFS=$'\t' read -r aname aemail <<< "$(author_for_dev "$dev")"
ROWS+=("${f}"$'\t'"${aname}")
key="${aname} <${aemail}>"
G_FILES["$key"]+="${f}"$'\n'
G_NAME["$key"]="$aname"; G_EMAIL["$key"]="$aemail"
done < <(git diff --cached --name-only -- raw/)
ts="$(date +%Y-%m-%dT%H:%M:%S%z)"
commits=0; summary=""
for key in "${!G_FILES[@]}"; do
mapfile -t files < <(printf '%s' "${G_FILES[$key]}")
short="$(printf '%s\n' "${files[@]}" | sed 's#^raw/##' | paste -sd, -)"
msg="$(printf 'raw(%s): sync %s\n\nAdded-by: %s\nSource: syncthing-autocommit\nSynced-at: %s\n' \
"$genome" "$short" "${G_NAME[$key]}" "$ts")"
git commit -q --author="$key" -m "$msg" -- "${files[@]}"
commits=$((commits+1))
summary="${summary}${summary:+; }${G_NAME[$key]}:${short}"
done
# Push to origin/<base>. The vault is SCRATCH, so we never do an interactive rebase
# (which can conflict when the same raw file is edited repeatedly). Strategy:
# try a fast-forward push; if origin moved, re-apply our raw changes on top of a
# fresh origin/<base> and push again. Deterministic, conflict-free.
git fetch -q origin
if ! git push -q "$clone_url" "HEAD:${GENOME_BASE}" 2>/dev/null; then
# origin advanced: capture our just-made tree for raw/, realign hard, re-apply, retry once.
tmp="$(mktemp -d)"
cp -a raw/. "$tmp"/ 2>/dev/null || true
git reset -q --hard "origin/${GENOME_BASE}"
git clean -q -fd
cp -a "$tmp"/. raw/ 2>/dev/null || true
rm -rf "$tmp"
git add -A -- raw/
git reset -q -- raw/.stignore raw/.stfolder 2>/dev/null || true
if git diff --cached --quiet; then
# our content already matches origin -> nothing to push, report ok-noop-after-realign
printf '{"status":"ok","genome":"%s","base":"%s","commits":0,"head":"%s","summary":"already in sync after realign","files":[]}\n' \
"$genome" "$GENOME_BASE" "$(git rev-parse --short HEAD)"
exit 0
fi
git commit -q --author="${DEFAULT_AUTHOR_NAME} <${DEFAULT_AUTHOR_EMAIL}>" \
-m "raw(${genome}): re-apply after realign" -- raw/ || true
git push -q "$clone_url" "HEAD:${GENOME_BASE}" \
|| { printf '{"status":"error","reason":"push-failed-after-realign","genome":"%s"}\n' "$genome"; exit 1; }
fi
head="$(git rev-parse --short HEAD)"
# `files` array: local (file://) and remote (Forgejo web) link for each committed raw
files_json="$(
for row in "${ROWS[@]}"; do
IFS=$'\t' read -r rel aname <<< "$row"
jq -n --arg file "$rel" --arg author "$aname" \
--arg lpath "${vault}/${rel}" \
--arg lurl "file://${vault}/${rel}" \
--arg rurl "${FORGEJO_WEB_BASE}/${FORGEJO_OWNER}/${genome}/src/branch/${GENOME_BASE}/${rel}" \
'{file:$file, author:$author, local_path:$lpath, local_url:$lurl, remote_url:$rurl}'
done | jq -s '.'
)"
jq -n --arg genome "$genome" --arg base "$GENOME_BASE" --argjson commits "$commits" \
--arg head "$head" --arg summary "$summary" --argjson files "$files_json" \
'{status:"ok", genome:$genome, base:$base, commits:$commits, head:$head, summary:$summary, files:$files}'