def configure_parser(subparsers: argparse._SubParsersAction) -> None:
"""Register the analyze subcommand parser."""
parser = subparsers.add_parser(
"analyze",
help="Run a CLTK pipeline on text and emit a chosen output format.",
formatter_class=HelpFormatter,
)
parser.add_argument(
"--lang",
"--language",
dest="language",
required=True,
help="Glottolog id or CLTK language key.",
)
parser.add_argument(
"--backend",
default="stanza",
help="Backend to use (stanza, openai, ollama, mistral, spacy).",
)
parser.add_argument("--pipeline", help="Optional pipeline class name to use.")
parser.add_argument("--text", help="Raw text to analyze.")
parser.add_argument("--text-file", help="Path to a text file to analyze.")
parser.add_argument("--input-dir", help="Batch mode: directory of input files.")
parser.add_argument(
"--glob",
default="*.txt",
help="Glob pattern for --input-dir (default: *.txt).",
)
parser.add_argument(
"--out",
required=True,
help="Output type (raw, conllu, readers-guide, feature-table, json).",
)
parser.add_argument(
"--format",
help="Format for feature-table (csv, tsv, parquet) or json (pretty, min).",
)
parser.add_argument(
"--out-file",
help="Write output to this path; defaults to stdout.",
)
parser.add_argument(
"--out-dir",
help="Output directory for batch mode (--input-dir).",
)
parser.add_argument(
"--config",
help="JSON string or path to JSON file for backend/pipeline settings.",
)
parser.add_argument(
"--max-sentences",
type=int,
help="Cap the number of sentences in output.",
)
parser.add_argument(
"--max-tokens",
type=int,
help="Cap the number of tokens per sentence in output.",
)
parser.add_argument(
"--continue-on-error",
action="store_true",
help="Continue processing batch inputs after errors.",
)
parser.add_argument(
"--quiet",
action="store_true",
help="Suppress non-error logs.",
)
parser.add_argument(
"--verbose",
action="store_true",
help="Enable info-level logs.",
)
parser.set_defaults(func=run)