Skip to content

processes

Process for GenAI-driven enrichment (glosses, IPA, idioms, pedagogy).

EnrichmentPromptBuilder module-attribute

EnrichmentPromptBuilder = (
    Callable[[str, str, IPA_PRONUNCIATION_MODE], PromptInfo]
    | PromptInfo
    | str
)

GenAIEnrichmentProcess

Bases: Process

Language-agnostic enrichment process using a generative GPT model (legacy).

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = None

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

LexiconEnrichmentProcess

Bases: GenAIEnrichmentProcess

Lexicon-focused enrichment step (glosses, lemma translations).

process_id class-attribute

process_id: str = 'enrichment.lexicon'

enrichment_fields class-attribute

enrichment_fields: set[str] = {'lexicon'}

prompt_template_id class-attribute

prompt_template_id: str = 'enrichment.genai'

source class-attribute instance-attribute

source: Optional[str] = None

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

PhonologyEnrichmentProcess

Bases: GenAIEnrichmentProcess

Phonology-focused enrichment step (IPA and orthography).

process_id class-attribute

process_id: str = 'enrichment.phonology'

enrichment_fields class-attribute

enrichment_fields: set[str] = {'phonology'}

prompt_template_id class-attribute

prompt_template_id: str = 'enrichment.genai'

mode class-attribute instance-attribute

mode: Optional[str] = None

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

IdiomsEnrichmentProcess

Bases: GenAIEnrichmentProcess

Idiom/MWE-focused enrichment step.

process_id class-attribute

process_id: str = 'enrichment.idioms'

enrichment_fields class-attribute

enrichment_fields: set[str] = {'idioms'}

prompt_template_id class-attribute

prompt_template_id: str = 'enrichment.genai'

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

PedagogyEnrichmentProcess

Bases: GenAIEnrichmentProcess

Pedagogy-focused enrichment step (learner-facing notes).

process_id class-attribute

process_id: str = 'enrichment.pedagogy'

enrichment_fields class-attribute

enrichment_fields: set[str] = {'pedagogy'}

prompt_template_id class-attribute

prompt_template_id: str = 'enrichment.genai'

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

CuneiformLuwianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'cune1239'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Cuneiform Luwian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

HieroglyphicLuwianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'hier1240'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Hieroglyphic Luwian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldPrussianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'prus1238'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Old Prussian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

LithuanianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'lith1251'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Lithuanian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

LatvianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'latv1249'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Latvian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

AlbanianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'gheg1238'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Albanian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

AkkadianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'akka1240'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Akkadian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

AncientGreekGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'anci1242'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Ancient Greek language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

BiblicalHebrewGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'anci1244'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Biblical Hebrew language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

ClassicalArabicGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'clas1259'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Classical Arabic language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

AvestanGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'aves1237'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Avestan language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

BactrianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'bact1239'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Bactrian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

SogdianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'sogd1245'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Sogdian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

BengaliGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'beng1280'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Bengali language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

CarianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'cari1274'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Carian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

ChurchSlavicGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'chur1257'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Church Slavic language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

ClassicalArmenianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'clas1256'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Classical Armenian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

ClassicalMandaicGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'clas1253'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Classical Mandaic language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

ClassicalMongolianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'mong1331'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Classical Mongolian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

ClassicalSyriacGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'clas1252'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Classical Syriac language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

ClassicalTibetanGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'clas1254'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Classical Tibetan language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

CopticGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'copt1239'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Coptic language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

DemoticGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'demo1234'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Demotic language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

EasternPanjabiGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'panj1256'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Eastern Panjabi language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

EdomiteGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'edom1234'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Edomite language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

GeezGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'geez1241'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Geez language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

GothicGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'goth1244'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Gothic language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

GujaratiGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'guja1252'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Gujarati language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

HindiGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'hind1269'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Hindi language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

KhariBoliGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'khad1239'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Khari Boli dialect of Hindi."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

BrajGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'braj1242'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Braj Bhasha language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

AwadhiGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'awad1243'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Awadhi language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

HittiteGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'hitt1242'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Hittite language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

KhotaneseGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'khot1251'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Khotanese language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

TumshuqeseGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'tums1237'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Tumshuqese language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

LateEgyptianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'late1256'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Late Egyptian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

LatinGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'lati1261'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Latin language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

LiteraryChineseGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'lite1248'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Literary Chinese language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

LycianAGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'lyci1241'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Lycian A language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

LydianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'lydi1241'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Lydian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MaharastriPrakritGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'maha1305'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Maharastri Prakrit language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MiddleArmenianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'midd1364'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Middle Armenian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MiddleBretonGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldb1244'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Middle Breton language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MiddleChineseGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'midd1344'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Middle Chinese language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MiddleCornishGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'corn1251'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Middle Cornish language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MiddleEgyptianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'midd1369'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Middle Egyptian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MiddleEnglishGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'midd1317'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Middle English language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MiddleFrenchGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'midd1316'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Middle French language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MiddleHighGermanGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'midd1343'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Middle High German language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MiddleMongolGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'mong1329'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Middle Mongol language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MoabiteGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'moab1234'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Moabite language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OdiaGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oriy1255'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Odia language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OfficialAramaicGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'impe1235'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Official Aramaic (700-300 BCE) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldBurmeseGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldb1235'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Old Burmese language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldChineseGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldc1244'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Old Chinese language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

BaihuaChineseGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'clas1255'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for Early Vernacular Chinese (Baihua)."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

ClassicalBurmeseGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'nucl1310'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Classical Burmese language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

TangutGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'tang1334'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Tangut (Xixia) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

NewarGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'newa1246'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Newar (Classical Nepal Bhasa) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MeiteiGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'mani1292'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Meitei (Classical Manipuri) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

SgawKarenGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'sgaw1245'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Sgaw Karen language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MogholiGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'mogh1245'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Mogholi (Moghol) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

NumidianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'numi1241'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Numidian (Ancient Berber) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

TaitaGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'tait1247'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Cushitic Taita language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

HausaGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'haus1257'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Hausa language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldJurchenGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'jurc1239'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Old Jurchen language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldJapaneseGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'japo1237'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Old Japanese language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldHungarianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldh1242'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Old Hungarian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

ChagataiGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'chag1247'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Chagatai language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldTurkicGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldu1238'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Old Turkic language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldTamilGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldt1248'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Old Tamil language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

AmmoniteGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'ammo1234'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Ammonite language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldAramaicGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'olda1246'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Old Aramaic (up to 700 BCE) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldAramaicSamalianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'olda1245'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Old Aramaic–Samʾalian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MiddleAramaicGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'midd1366'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Middle Aramaic language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

HatranGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'hatr1234'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Hatran language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

JewishBabylonianAramaicGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'jewi1240'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Jewish Babylonian Aramaic language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

SamalianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'sama1234'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Samʾalian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldEgyptianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'olde1242'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Old Egyptian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldEnglishGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'olde1238'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Old English (ca. 450-1100) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldFrenchGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldf1239'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Old French (842-ca. 1400) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldHighGermanGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldh1241'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Old High German (ca. 750-1050) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

EarlyIrishGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldi1245'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Old Irish language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MarathiGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'mara1378'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Marathi language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldNorseGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldn1244'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Old Norse language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldPersianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldp1254'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Old Persian (ca. 600-400 B.C.) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

OldMiddleWelshGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'oldw1239'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Old-Middle Welsh language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

ParthianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'part1239'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Parthian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MiddlePersianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'pahl1241'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Middle Persian language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

PalaicGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'pala1331'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Palaic language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

PaliGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'pali1273'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Pali language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

PhoenicianGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'phoe1239'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Phoenician language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

PunjabiGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'panj1256'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Punjabi language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

AssameseGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'assa1263'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Assamese language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

SinhalaGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'sinh1246'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Sinhala language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

SindhiGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'sind1272'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Sindhi language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

KashmiriGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'kash1277'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Kashmiri language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

BagriGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'bagr1243'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Bagri (Rajasthani) language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

ClassicalSanskritGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'clas1258'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Classical Sanskrit language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

VedicSanskritGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'vedi1234'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Vedic Sanskrit language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

TokharianAGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'toch1238'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Tokharian A language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

TokharianBGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'toch1237'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Tokharian B language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

UgariticGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'ugar1238'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Ugaritic language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

UrduGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'urdu1245'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Urdu language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

SauraseniPrakritGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'saur1252'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Sauraseni Prakrit language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

MagadhiPrakritGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'maga1260'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Magadhi Prakrit language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

GandhariGenAIEnrichmentProcess

Bases: GenAIEnrichmentProcess

Language-specific dependency process using a generative GPT model.

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = 'gand1259'

description class-attribute instance-attribute

description: str = (
    "Default textual enrichment process using a generative GPT model for the Gandhari language."
)

authorship_info class-attribute instance-attribute

authorship_info: str = 'CLTK'

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc