feat: Introduce advanced linting for knowledge decay and content validation
This commit is contained in:
parent
4ed7b8edd0
commit
0cb6e1a929
1 changed files with 111 additions and 14 deletions
125
lib/lint.sh
125
lib/lint.sh
|
|
@ -4,20 +4,28 @@
|
||||||
# Validation logic for Knowledge Genome files.
|
# Validation logic for Knowledge Genome files.
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
# Validates YAML frontmatter and mandatory fields
|
# Valid values for the 'type' frontmatter field.
|
||||||
|
# Must stay in sync with the type list in templates/agents-genome.md.
|
||||||
|
VALID_TYPES=("source" "entity" "concept" "query" "conflict" "private" "index" "log")
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# lint_markdown_file <file> <genome_name>
|
||||||
|
# Validates YAML frontmatter: delimiters, mandatory fields, domain, type value.
|
||||||
|
# Returns the number of errors found.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
lint_markdown_file() {
|
lint_markdown_file() {
|
||||||
local file="$1"
|
local file="$1"
|
||||||
local genome_name="$2"
|
local genome_name="$2"
|
||||||
local errors=0
|
local errors=0
|
||||||
|
|
||||||
# 1. Check Frontmatter delimiters
|
# 1. Check frontmatter delimiters
|
||||||
if [[ $(head -n 1 "$file") != "---" ]]; then
|
if [[ $(head -n 1 "$file") != "---" ]]; then
|
||||||
warn "Missing frontmatter start (---) in: $file"
|
warn "Missing frontmatter start (---) in: $file"
|
||||||
errors=$((errors + 1))
|
errors=$((errors + 1))
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# 2. Check mandatory fields
|
# 2. Check mandatory fields
|
||||||
local mandatory_fields=("title:" "type:" "domain:")
|
local mandatory_fields=("title:" "type:" "domain:" "maturity:" "last_updated:")
|
||||||
for field in "${mandatory_fields[@]}"; do
|
for field in "${mandatory_fields[@]}"; do
|
||||||
if ! grep -q "^${field}" "$file"; then
|
if ! grep -q "^${field}" "$file"; then
|
||||||
warn "Missing mandatory field '${field}' in: $file"
|
warn "Missing mandatory field '${field}' in: $file"
|
||||||
|
|
@ -25,52 +33,141 @@ lint_markdown_file() {
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
# 3. Check if domain matches the genome name
|
# 3. Check domain matches genome name
|
||||||
if grep -q "^domain:" "$file" && ! grep -q "^domain: ${genome_name}" "$file"; then
|
if grep -q "^domain:" "$file" && ! grep -q "^domain: ${genome_name}" "$file"; then
|
||||||
warn "Domain mismatch in $file (expected ${genome_name})"
|
warn "Domain mismatch in $file (expected '${genome_name}')"
|
||||||
errors=$((errors + 1))
|
errors=$((errors + 1))
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# 4. Validate type value against allowed list
|
||||||
|
local type_errors
|
||||||
|
check_valid_type "$file"
|
||||||
|
type_errors=$?
|
||||||
|
errors=$((errors + type_errors))
|
||||||
|
|
||||||
return $errors
|
return $errors
|
||||||
}
|
}
|
||||||
|
|
||||||
# Ensures files in private/ directories have the 'private: true' flag
|
# ---------------------------------------------------------------------------
|
||||||
|
# check_valid_type <file>
|
||||||
|
# Verifies that the 'type' field contains a value from VALID_TYPES.
|
||||||
|
# Returns 1 if invalid, 0 if valid or field absent (absence caught by lint_markdown_file).
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
check_valid_type() {
|
||||||
|
local file="$1"
|
||||||
|
|
||||||
|
local type_value
|
||||||
|
type_value=$(grep "^type:" "$file" | head -1 | sed 's/^type:[[:space:]]*//' | tr -d '"')
|
||||||
|
|
||||||
|
[[ -z "$type_value" ]] && return 0 # absence is caught upstream
|
||||||
|
|
||||||
|
local valid=0
|
||||||
|
for t in "${VALID_TYPES[@]}"; do
|
||||||
|
[[ "$type_value" == "$t" ]] && valid=1 && break
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ $valid -eq 0 ]]; then
|
||||||
|
warn "Invalid type value '${type_value}' in: $file"
|
||||||
|
warn " Valid types: ${VALID_TYPES[*]}"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# check_privacy_consistency <file>
|
||||||
|
# Ensures files in private/ directories carry 'private: true'.
|
||||||
|
# Warns if a public file is incorrectly marked private.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
check_privacy_consistency() {
|
check_privacy_consistency() {
|
||||||
local file="$1"
|
local file="$1"
|
||||||
local errors=0
|
local errors=0
|
||||||
|
|
||||||
if [[ "$file" == *"/private/"* ]]; then
|
if [[ "$file" == *"/private/"* ]]; then
|
||||||
if ! grep -q "^private: true" "$file"; then
|
if ! grep -q "^private: true" "$file"; then
|
||||||
error "Privacy Leak: $file is in a private folder but lacks 'private: true' metadata."
|
error "Privacy leak: $file is in a private/ directory but lacks 'private: true'."
|
||||||
errors=$((errors + 1))
|
errors=$((errors + 1))
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
if grep -q "^private: true" "$file"; then
|
if grep -q "^private: true" "$file"; then
|
||||||
warn "Metadata Mismatch: $file is marked private but located in a public directory."
|
warn "Metadata mismatch: $file is marked 'private: true' but is in a public directory."
|
||||||
# We count this as a warning unless you want to force strict isolation
|
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
return $errors
|
return $errors
|
||||||
}
|
}
|
||||||
|
|
||||||
# Basic check for internal wiki-links [[target]]
|
# ---------------------------------------------------------------------------
|
||||||
|
# check_knowledge_decay <file>
|
||||||
|
# Reads 'maturity' and 'last_updated' from frontmatter and compares against
|
||||||
|
# the staleness thresholds defined in agents-genome.md:
|
||||||
|
# maturity: stable → flag if last_updated > 180 days ago
|
||||||
|
# maturity: draft → flag if last_updated > 90 days ago
|
||||||
|
#
|
||||||
|
# Returns 1 if the file is stale, 0 otherwise.
|
||||||
|
# Silently skips files with missing or unparseable date fields.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
check_knowledge_decay() {
|
||||||
|
local file="$1"
|
||||||
|
|
||||||
|
local last_updated maturity
|
||||||
|
last_updated=$(grep "^last_updated:" "$file" | head -1 | sed 's/^last_updated:[[:space:]]*//' | tr -d '"')
|
||||||
|
maturity=$(grep "^maturity:" "$file" | head -1 | sed 's/^maturity:[[:space:]]*//' | tr -d '"')
|
||||||
|
|
||||||
|
# Skip if either field is absent or maturity is not decay-trackable
|
||||||
|
[[ -z "$last_updated" || -z "$maturity" ]] && return 0
|
||||||
|
[[ "$maturity" != "stable" && "$maturity" != "draft" ]] && return 0
|
||||||
|
|
||||||
|
# Parse date — handle both GNU date (Linux) and BSD date (macOS)
|
||||||
|
local updated_ts
|
||||||
|
if date --version &>/dev/null 2>&1; then
|
||||||
|
# GNU date
|
||||||
|
updated_ts=$(date -d "$last_updated" +%s 2>/dev/null)
|
||||||
|
else
|
||||||
|
# BSD date (macOS)
|
||||||
|
updated_ts=$(date -j -f "%Y-%m-%d" "$last_updated" +%s 2>/dev/null)
|
||||||
|
fi
|
||||||
|
|
||||||
|
[[ -z "$updated_ts" ]] && return 0 # unparseable date — skip silently
|
||||||
|
|
||||||
|
local now days_old threshold
|
||||||
|
now=$(date +%s)
|
||||||
|
days_old=$(( (now - updated_ts) / 86400 ))
|
||||||
|
|
||||||
|
case "$maturity" in
|
||||||
|
stable) threshold=180 ;;
|
||||||
|
draft) threshold=90 ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
if [[ $days_old -gt $threshold ]]; then
|
||||||
|
warn "STALE: $file"
|
||||||
|
warn " maturity: ${maturity} | last_updated: ${last_updated} | ${days_old} days ago (threshold: ${threshold})"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# check_broken_links <file>
|
||||||
|
# Basic check for internal [[wikilinks]] that cannot be resolved locally.
|
||||||
|
# Only emits warnings — cross-genome links may legitimately not resolve here.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
check_broken_links() {
|
check_broken_links() {
|
||||||
local file="$1"
|
local file="$1"
|
||||||
local base_dir
|
local base_dir
|
||||||
base_dir=$(dirname "$file")
|
base_dir=$(dirname "$file")
|
||||||
|
|
||||||
# Extract links, stripping aliases: [[Link|Alias]] -> Link
|
# Extract link targets, stripping aliases: [[Link|Alias]] -> Link
|
||||||
local links
|
local links
|
||||||
links=$(grep -oP '\[\[\K[^\]]+' "$file" | cut -d'|' -f1)
|
links=$(grep -oP '\[\[\K[^\]]+' "$file" 2>/dev/null | cut -d'|' -f1)
|
||||||
|
|
||||||
for link in $links; do
|
for link in $links; do
|
||||||
local target="$link"
|
local target="$link"
|
||||||
[[ "$target" != *.md ]] && target="${target}.md"
|
[[ "$target" != *.md ]] && target="${target}.md"
|
||||||
|
|
||||||
# Simple relative check
|
|
||||||
if [[ ! -f "${base_dir}/${target}" && ! -f "${base_dir}/../${target}" ]]; then
|
if [[ ! -f "${base_dir}/${target}" && ! -f "${base_dir}/../${target}" ]]; then
|
||||||
# Only a warning as links might point to other genomes or deep structures
|
|
||||||
warn "Potential broken link: [[$link]] in $file"
|
warn "Potential broken link: [[$link]] in $file"
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue