diff --git a/tests/lint.bats b/tests/lint.bats index f0b0306..ac19dfe 100644 --- a/tests/lint.bats +++ b/tests/lint.bats @@ -69,3 +69,80 @@ EOF run bash "$SKILL_SCRIPTS/scoped-lint.sh" genome-test wiki/sources/good.md [ "$status" -eq 0 ] } + +# --- duplicate-slug advisory (check_duplicates + its distance helpers) -------- +# These guard the dedup feature: correct edit-distance math, the warn-only +# contract, the exact-self-match skip (run-ingest appends new slugs to the +# index before lint runs), and that the advisory fires once per run, not once +# per file. + +@test "levenshtein: identical strings have distance 0" { + run levenshtein cat cat + [ "$status" -eq 0 ] + [ "$output" -eq 0 ] +} + +@test "levenshtein: kitten→sitting is 3 (textbook case)" { + run levenshtein kitten sitting + [ "$output" -eq 3 ] +} + +@test "similarity: identical strings score 100" { + run similarity gpu-pricing gpu-pricing + [ "$output" -eq 100 ] +} + +@test "check_duplicates: warns on a near-duplicate of an indexed concept" { + G="$(make_fixture_genome)"; cd "$G" + printf -- '- [[concepts/llm-routing]] — x\n' >> wiki/index.md + cat > .ingest-manifest.json <<'JSON' +{"raw_source":"src","pages":[{"path":"wiki/concepts/llm-routings.md","status":"created"}]} +JSON + run check_duplicates .ingest-manifest.json + [ "$status" -eq 0 ] + [[ "$output" == *"≈"* ]] + [[ "$output" == *"llm-routings"* ]] +} + +@test "check_duplicates: silent when the new slug is unlike anything indexed" { + G="$(make_fixture_genome)"; cd "$G" + printf -- '- [[concepts/llm-routing]] — x\n' >> wiki/index.md + cat > .ingest-manifest.json <<'JSON' +{"raw_source":"src","pages":[{"path":"wiki/concepts/budget-hardware.md","status":"created"}]} +JSON + run check_duplicates .ingest-manifest.json + [ "$status" -eq 0 ] + [[ "$output" != *"≈"* ]] +} + +@test "check_duplicates: an exact self-match is not flagged (index already has the slug)" { + G="$(make_fixture_genome)"; cd "$G" + # run-ingest step 1 inserts this run's slug into the index BEFORE lint runs; + # the slug must not be reported as a duplicate of itself. + printf -- '- [[concepts/llm-routing]] — x\n' >> wiki/index.md + cat > .ingest-manifest.json <<'JSON' +{"raw_source":"src","pages":[{"path":"wiki/concepts/llm-routing.md","status":"created"}]} +JSON + run check_duplicates .ingest-manifest.json + [ "$status" -eq 0 ] + [[ "$output" != *"≈"* ]] +} + +@test "scoped-lint: duplicate advisory fires once across multiple files, not per file" { + G="$(make_fixture_genome)" + write_page "$G/wiki/concepts/data-pipelines.md" concept genome-test + write_page "$G/wiki/concepts/other-topic.md" concept genome-test + printf -- '- [[concepts/data-pipeline]] — x\n' >> "$G/wiki/index.md" + cat > "$G/.ingest-manifest.json" <<'JSON' +{"raw_source":"src","pages":[ + {"path":"wiki/concepts/data-pipelines.md","status":"created"}, + {"path":"wiki/concepts/other-topic.md","status":"created"} +]} +JSON + cd "$G" + export KG_LIB_DIR="$LIB_DIR" + run bash "$SKILL_SCRIPTS/scoped-lint.sh" genome-test \ + wiki/concepts/data-pipelines.md wiki/concepts/other-topic.md + [ "$status" -eq 0 ] + [ "$(grep -c "≈" <<< "$output")" -eq 1 ] +}