Skip to content

compare

CLI handler for the cltk compare subcommand.

configure_parser

configure_parser(subparsers: _SubParsersAction) -> None

Register the compare subcommand parser.

Source code in cltk/cli/compare.py
def configure_parser(subparsers: argparse._SubParsersAction) -> None:
    """Register the compare subcommand parser."""
    parser = subparsers.add_parser(
        "compare",
        help="Compare multiple CLTK backends on the same text.",
        formatter_class=HelpFormatter,
    )
    parser.add_argument(
        "--lang",
        "--language",
        dest="language",
        required=True,
        help="Glottolog language id.",
    )
    parser.add_argument("--text", help="Raw text to analyze.")
    parser.add_argument("--text-file", help="Path to a text file to analyze.")
    parser.add_argument(
        "--backends",
        required=True,
        help="Comma-separated backend list (e.g., stanza,openai,ollama).",
    )
    parser.add_argument(
        "--configs",
        help="JSON string or path to JSON file with per-backend overrides.",
    )
    parser.add_argument("--out-dir", help="Directory for report outputs.")
    parser.add_argument(
        "--basename",
        default="compare_backends",
        help="Base filename for outputs (default: compare_backends).",
    )
    parser.add_argument(
        "--format",
        choices=["md", "json", "both"],
        default="md",
        help="Output format (md, json, both).",
    )
    parser.add_argument(
        "--top-n",
        type=int,
        default=10,
        help="Number of top disagreements to include.",
    )
    parser.add_argument(
        "--max-sentences",
        type=int,
        help="Cap the number of sentences compared.",
    )
    parser.add_argument(
        "--max-tokens",
        type=int,
        help="Cap the number of tokens per sentence.",
    )
    parser.add_argument(
        "--quiet",
        action="store_true",
        help="Suppress non-error logs.",
    )
    parser.add_argument(
        "--verbose",
        action="store_true",
        help="Enable info-level logs.",
    )
    parser.set_defaults(func=run)

run

run(args: Namespace) -> int

Run the compare command.

Source code in cltk/cli/compare.py
def run(args: argparse.Namespace) -> int:
    """Run the compare command."""
    set_log_level(quiet=args.quiet, verbose=args.verbose)
    _validate_limits(args)
    if args.top_n <= 0:
        raise SystemExit("--top-n must be a positive integer.")
    text = load_text(args.text, args.text_file)
    backends = parse_backends(args.backends)
    configs = _parse_configs(args.configs)

    try:
        report = compare_backends(
            args.language,
            text,
            backends,
            configs=configs or None,
            max_sentences=args.max_sentences,
            max_tokens=args.max_tokens,
            top_n=args.top_n,
        )
    except Exception as exc:
        raise SystemExit(str(exc)) from exc

    if args.out_dir:
        _write_reports(report, args.out_dir, args.basename, args.format)
        return 0

    if args.format == "both":
        raise SystemExit("--format both requires --out-dir.")
    if args.format == "json":
        print(json.dumps(report, indent=2, sort_keys=True))
    else:
        print(report_to_markdown(report))
    return 0