test: Add comprehensive tests for duplicate slug advisory and helpers

This commit is contained in:
Matteo Cherubini 2026-06-19 05:44:59 +02:00
parent a843c30a9e
commit 8e4f0386c6

View file

@ -69,3 +69,80 @@ EOF
run bash "$SKILL_SCRIPTS/scoped-lint.sh" genome-test wiki/sources/good.md
[ "$status" -eq 0 ]
}
# --- duplicate-slug advisory (check_duplicates + its distance helpers) --------
# These guard the dedup feature: correct edit-distance math, the warn-only
# contract, the exact-self-match skip (run-ingest appends new slugs to the
# index before lint runs), and that the advisory fires once per run, not once
# per file.
@test "levenshtein: identical strings have distance 0" {
run levenshtein cat cat
[ "$status" -eq 0 ]
[ "$output" -eq 0 ]
}
@test "levenshtein: kitten→sitting is 3 (textbook case)" {
run levenshtein kitten sitting
[ "$output" -eq 3 ]
}
@test "similarity: identical strings score 100" {
run similarity gpu-pricing gpu-pricing
[ "$output" -eq 100 ]
}
@test "check_duplicates: warns on a near-duplicate of an indexed concept" {
G="$(make_fixture_genome)"; cd "$G"
printf -- '- [[concepts/llm-routing]] — x\n' >> wiki/index.md
cat > .ingest-manifest.json <<'JSON'
{"raw_source":"src","pages":[{"path":"wiki/concepts/llm-routings.md","status":"created"}]}
JSON
run check_duplicates .ingest-manifest.json
[ "$status" -eq 0 ]
[[ "$output" == *"≈"* ]]
[[ "$output" == *"llm-routings"* ]]
}
@test "check_duplicates: silent when the new slug is unlike anything indexed" {
G="$(make_fixture_genome)"; cd "$G"
printf -- '- [[concepts/llm-routing]] — x\n' >> wiki/index.md
cat > .ingest-manifest.json <<'JSON'
{"raw_source":"src","pages":[{"path":"wiki/concepts/budget-hardware.md","status":"created"}]}
JSON
run check_duplicates .ingest-manifest.json
[ "$status" -eq 0 ]
[[ "$output" != *"≈"* ]]
}
@test "check_duplicates: an exact self-match is not flagged (index already has the slug)" {
G="$(make_fixture_genome)"; cd "$G"
# run-ingest step 1 inserts this run's slug into the index BEFORE lint runs;
# the slug must not be reported as a duplicate of itself.
printf -- '- [[concepts/llm-routing]] — x\n' >> wiki/index.md
cat > .ingest-manifest.json <<'JSON'
{"raw_source":"src","pages":[{"path":"wiki/concepts/llm-routing.md","status":"created"}]}
JSON
run check_duplicates .ingest-manifest.json
[ "$status" -eq 0 ]
[[ "$output" != *"≈"* ]]
}
@test "scoped-lint: duplicate advisory fires once across multiple files, not per file" {
G="$(make_fixture_genome)"
write_page "$G/wiki/concepts/data-pipelines.md" concept genome-test
write_page "$G/wiki/concepts/other-topic.md" concept genome-test
printf -- '- [[concepts/data-pipeline]] — x\n' >> "$G/wiki/index.md"
cat > "$G/.ingest-manifest.json" <<'JSON'
{"raw_source":"src","pages":[
{"path":"wiki/concepts/data-pipelines.md","status":"created"},
{"path":"wiki/concepts/other-topic.md","status":"created"}
]}
JSON
cd "$G"
export KG_LIB_DIR="$LIB_DIR"
run bash "$SKILL_SCRIPTS/scoped-lint.sh" genome-test \
wiki/concepts/data-pipelines.md wiki/concepts/other-topic.md
[ "$status" -eq 0 ]
[ "$(grep -c "≈" <<< "$output")" -eq 1 ]
}