def report_to_markdown(report: dict[str, Any]) -> str:
"""Render a compare_backends report as Markdown."""
meta = report.get("meta", {})
backends = meta.get("backends", [])
lines: list[str] = []
lines.append("# Compare Backends Report")
lines.append("")
lines.append("## Metadata")
lines.append("")
lines.append(f"- Language: {meta.get('language')}")
lines.append(f"- Backends: {', '.join(backends)}")
lines.append(f"- Base backend: {meta.get('base_backend')}")
lines.append(f"- Timestamp: {meta.get('timestamp')}")
lines.append(f"- Text hash: {meta.get('text_hash')}")
cltk_version = meta.get("cltk_version")
if cltk_version:
lines.append(f"- CLTK version: {cltk_version}")
summary = report.get("summary", {})
agreement_rates = summary.get("agreement_rates", {})
lines.append("")
lines.append("## Agreement Rates")
lines.append("")
lines.append("| Field | Backend Pair | Agree | Total | Rate |")
lines.append("| --- | --- | --- | --- | --- |")
for field in COMPARE_FIELDS:
field_rates = agreement_rates.get(field, {})
for pair, stats in field_rates.items():
agree = stats.get("agree", 0)
total = stats.get("total", 0)
rate = stats.get("rate")
rate_str = f"{rate:.3f}" if isinstance(rate, float) else "-"
lines.append(f"| {field} | {pair} | {agree} | {total} | {rate_str} |")
lines.append("")
lines.append("## Top Disagreements")
lines.append("")
lines.append("| Sentence | Row | Fields | Tokenization |")
lines.append("| --- | --- | --- | --- |")
for item in summary.get("most_disagreed_tokens", []):
sent_idx = item.get("sentence_index")
row = item.get("row")
fields = ", ".join(item.get("fields", []))
token_pairs = item.get("tokenization", {})
tokens_str = "; ".join(f"{k}={v}" for k, v in token_pairs.items())
lines.append(f"| {sent_idx} | {row} | {fields} | {tokens_str} |")
sentences = report.get("sentences", [])
if sentences:
lines.append("")
lines.append("## Per-Sentence Details")
for sent in sentences:
sent_idx = sent.get("index")
sent_text = sent.get("text") or ""
lines.append("")
lines.append(f"### Sentence {sent_idx}")
if sent_text:
lines.append("")
lines.append(sent_text)
disagreement_rows = [
tok
for tok in sent.get("tokens", [])
if any(
not tok.get("diff", {}).get(field, {}).get("agree", True)
for field in COMPARE_FIELDS
)
]
if not disagreement_rows:
lines.append("")
lines.append("No disagreements found.")
continue
lines.append("")
lines.append("| Row | Tokenization |")
lines.append("| --- | --- |")
for tok in disagreement_rows[:20]:
row = tok.get("row")
token_values = (
tok.get("diff", {}).get("tokenization", {}).get("values", {})
)
tokens_str = "; ".join(f"{k}={v}" for k, v in token_values.items())
lines.append(f"| {row} | {tokens_str} |")
if len(disagreement_rows) > 20:
lines.append("")
lines.append(
f"Truncated {len(disagreement_rows) - 20} additional rows."
)
lines.append("")
return "\n".join(lines)