#!/usr/bin/env bash # ============================================================================= # pending-raw.sh — deterministic "what needs ingesting" calculator. # Reads the clean base checkout and classifies each raw/articles/*.md as: # new -> no wiki/sources/.md # modified -> page exists but its source_sha256 != current file hash # Emits the same JSON envelope as changed-raw (drop-in), plus detail[] for ntfy. # ============================================================================= set -euo pipefail genome="${1:?usage: pending-raw.sh }" base_dir="${GENOMES_ROOT:-${HOME}/genomes}" cd "${base_dir}/${genome}" 2>/dev/null || { echo '{"status":"error","reason":"unknown genome"}'; exit 1; } # Clean start on the configured base (Step 3 will extract this to lib/clean-start.sh). git fetch -q origin \ && git switch -q "${INGEST_BASE:-main}" 2>/dev/null \ && git reset -q --hard "origin/${INGEST_BASE:-main}" \ && git clean -q -fd SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" SLUG="${SCRIPT_DIR}/slug.sh" declare -a NEW=() declare -a MOD=() declare -A SEEN_SLUG=() if [[ -d raw/articles ]]; then while IFS= read -r -d '' f; do rel="${f#./}" case "$rel" in */.stfolder/*|*/.stignore|*/.gitkeep) continue ;; esac slug="$("$SLUG" --raw "$rel")" || continue # Residual collision (two distinct raws -> same slug): warn, do not silence. if [[ -n "${SEEN_SLUG[$slug]:-}" && "${SEEN_SLUG[$slug]}" != "$rel" ]]; then logger -t pending-raw "warn: slug collision ${slug}: ${SEEN_SLUG[$slug]} <-> ${rel}" fi SEEN_SLUG[$slug]="$rel" page="wiki/sources/${slug}.md" if [[ ! -f "$page" ]]; then NEW+=("$rel") else cur="$(sha256sum "$rel" | cut -d' ' -f1)" rec="$(sed -n 's/^source_sha256:[[:space:]]*//p' "$page" | tr -d '\r' | head -n1)" if [[ "$cur" != "$rec" ]]; then MOD+=("$rel") fi fi done < <(find raw/articles -type f -name '*.md' -print0 2>/dev/null) fi if [[ ${#NEW[@]} -eq 0 && ${#MOD[@]} -eq 0 ]]; then echo '{"status":"ok","genome":"'"$genome"'","count":0,"files":[],"detail":[]}' else { for x in "${NEW[@]}"; do printf '%s\tnew\n' "$x"; done for x in "${MOD[@]}"; do printf '%s\tmodified\n' "$x"; done } | jq -R 'split("\t") | {path: .[0], reason: .[1]}' \ | jq -s --arg g "$genome" \ '{status: "ok", genome: $g, count: length, files: [.[].path], detail: .}' fi