Skip to content

processes

Processes of POS and feature tagging.

PromptBuilder module-attribute

PromptBuilder = (
    Callable[[str, str], PromptInfo] | PromptInfo | str
)

MorphosyntaxProcess

Bases: Process

Base class for morphosyntactic processes.

process_id class-attribute

process_id: str = ''

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = None

run abstractmethod

run(input_doc: Doc) -> Doc

Process input_doc and return an enriched/modified copy.

Source code in cltk/core/data_types.py
@abstractmethod
def run(self, input_doc: Doc) -> Doc:
    """Process ``input_doc`` and return an enriched/modified copy."""
    pass

GenAIMorphosyntaxProcess

Bases: MorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = None

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

CuneiformLuwianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'cune1239'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Cuneiform Luwian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

HieroglyphicLuwianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'hier1240'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Hieroglyphic Luwian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldPrussianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'prus1238'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Old Prussian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

LithuanianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'lith1251'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Lithuanian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

LatvianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'latv1249'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Latvian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

AlbanianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'gheg1238'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Albanian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

AkkadianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'akka1240'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Akkadian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

AncientGreekGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'anci1242'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Ancient Greek language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

BiblicalHebrewGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'anci1244'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Biblical Hebrew language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

ClassicalArabicGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'clas1259'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Classical Arabic language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

AvestanGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'aves1237'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Avestan language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

BactrianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'bact1239'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Bactrian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

SogdianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'sogd1245'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Sogdian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

BengaliGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'beng1280'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Bengali language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

CarianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'cari1274'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Carian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

ChurchSlavicGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'chur1257'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Church Slavic language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

ClassicalArmenianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'clas1256'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Classical Armenian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

ClassicalMandaicGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'clas1253'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Classical Mandaic language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

ClassicalMongolianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'mong1331'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Classical Mongolian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

ClassicalSyriacGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'clas1252'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Classical Syriac language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

ClassicalTibetanGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'clas1254'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Classical Tibetan language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

CopticGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'copt1239'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Coptic language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

DemoticGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'demo1234'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Demotic language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

EasternPanjabiGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'panj1256'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Eastern Panjabi language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

EdomiteGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'edom1234'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Edomite language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

GeezGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'geez1241'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Geez language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

GothicGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'goth1244'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Gothic language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

GujaratiGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'guja1252'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Gujarati language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

HindiGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'hind1269'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Hindi language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

KhariBoliGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'khad1239'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Khari Boli dialect of Hindi."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

BrajGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'braj1242'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Braj Bhasha language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

AwadhiGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'awad1243'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Awadhi language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

HittiteGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'hitt1242'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Hittite language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

KhotaneseGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'khot1251'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Khotanese language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

TumshuqeseGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'tums1237'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Tumshuqese language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

LateEgyptianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'late1256'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Late Egyptian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

LatinGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'lati1261'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Latin language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

LiteraryChineseGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'lite1248'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Literary Chinese language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

LycianAGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'lyci1241'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Lycian A language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

LydianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'lydi1241'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Lydian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MaharastriPrakritGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'maha1305'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Maharastri Prakrit language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MiddleArmenianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'midd1364'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Middle Armenian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MiddleBretonGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldb1244'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Middle Breton language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MiddleChineseGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'midd1344'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Middle Chinese language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MiddleCornishGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'corn1251'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Middle Cornish language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MiddleEgyptianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'midd1369'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Middle Egyptian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MiddleEnglishGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'midd1317'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Middle English language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MiddleFrenchGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'midd1316'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Middle French language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MiddleHighGermanGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'midd1343'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Middle High German language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MiddleMongolGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'mong1329'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Middle Mongol language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MoabiteGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'moab1234'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Moabite language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OdiaGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oriy1255'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Odia language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OfficialAramaicGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'impe1235'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Official Aramaic (700-300 BCE) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldBurmeseGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldb1235'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Old Burmese language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldChineseGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldc1244'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Old Chinese language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

BaihuaChineseGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'clas1255'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for Early Vernacular Chinese (Baihua)."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

ClassicalBurmeseGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'nucl1310'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Classical Burmese language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

TangutGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'tang1334'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Tangut (Xixia) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

NewarGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'newa1246'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Newar (Classical Nepal Bhasa) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MeiteiGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'mani1292'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Meitei (Classical Manipuri) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

SgawKarenGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'sgaw1245'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Sgaw Karen language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MogholiGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'mogh1245'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Mogholi (Moghol) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

NumidianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'numi1241'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Numidian (Ancient Berber) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

TaitaGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'tait1247'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Cushitic Taita language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

HausaGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'haus1257'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Hausa language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldJurchenGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'jurc1239'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Old Jurchen language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldJapaneseGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'japo1237'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Old Japanese language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldHungarianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldh1242'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Old Hungarian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

ChagataiGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'chag1247'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Chagatai language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldTurkicGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldu1238'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Old Turkic language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldTamilGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldt1248'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Old Tamil language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

AmmoniteGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'ammo1234'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Ammonite language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldAramaicGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'olda1246'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Old Aramaic (up to 700 BCE) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldAramaicSamalianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'olda1245'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Old Aramaic–Samʾalian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MiddleAramaicGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'midd1366'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Middle Aramaic language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

HatranGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'hatr1234'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Hatran language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

JewishBabylonianAramaicGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'jewi1240'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Jewish Babylonian Aramaic language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

SamalianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'sama1234'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Samʾalian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldEgyptianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'olde1242'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Old Egyptian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldEnglishGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'olde1238'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Old English (ca. 450-1100) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldFrenchGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldf1239'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Old French (842-ca. 1400) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldHighGermanGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldh1241'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Old High German (ca. 750-1050) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

EarlyIrishGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldi1245'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Old Irish language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MarathiGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'mara1378'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Marathi language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldNorseGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldn1244'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Old Norse language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldPersianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldp1254'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Old Persian (ca. 600-400 B.C.) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldMiddleWelshGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldw1239'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Old-Middle Welsh language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

ParthianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'part1239'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Parthian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MiddlePersianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'pahl1241'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Middle Persian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

PalaicGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'pala1331'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Palaic language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

PaliGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'pali1273'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Pali language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

PhoenicianGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'phoe1239'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Phoenician language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

PunjabiGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'panj1256'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Punjabi language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

AssameseGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'assa1263'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Assamese language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

SinhalaGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'sinh1246'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Sinhala language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

SindhiGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'sind1272'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Sindhi language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

KashmiriGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'kash1277'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Kashmiri language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

BagriGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'bagr1243'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Bagri (Rajasthani) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

ClassicalSanskritGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'clas1258'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Classical Sanskrit language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

VedicSanskritGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'vedi1234'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Vedic Sanskrit language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

TokharianAGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'toch1238'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Tokharian A language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

TokharianBGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'toch1237'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Tokharian B language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

UgariticGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'ugar1238'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Ugaritic language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

UrduGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'urdu1245'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Urdu language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

SauraseniPrakritGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'saur1252'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Sauraseni Prakrit language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MagadhiPrakritGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'maga1260'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Magadhi Prakrit language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

GandhariGenAIMorphosyntaxProcess

Bases: GenAIMorphosyntaxProcess

Language-specific morphosyntax process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'gand1259'

description class-attribute instance-attribute

description: str = (
    "Default morphology tagging process using a generative GPT model for the Gandhari language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'morphosyntax.genai'

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[PromptBuilder] = None

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the morphosyntax generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT morphosyntax tagging workflow.

Source code in cltk/morphosyntax/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT morphosyntax tagging workflow."""
    output_doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg: str = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    # Ensure required attributes are present
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for sentence splitting")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, self.process_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str, text: str, _template: PromptTemplate = template
        ) -> PromptInfo:
            """Build a morphosyntax prompt from a profile template."""
            return build_prompt_info(
                _template, lang_or_dialect_name=lang, text=text
            )

        prompt_builder = _builder
    # Callable typing does not retain keyword names; pass positionally
    output_doc = self.algorithm(
        output_doc,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc