Skip to content

cltk.enrichment

Enrichment layer for GenAI-driven annotations (glosses, IPA, idioms).

GenAIEnrichmentProcess

Bases: Process

Language-agnostic enrichment process using a generative GPT model (legacy).

glottolog_id class-attribute instance-attribute

glottolog_id: Optional[str] = None

process_id class-attribute

process_id: str = 'enrichment.genai'

enrichment_fields class-attribute

enrichment_fields: Optional[set[str]] = None

prompt_template_id class-attribute

prompt_template_id: Optional[str] = None

prompt_builder class-attribute instance-attribute

prompt_builder: Optional[EnrichmentPromptBuilder] = None

ipa_mode class-attribute instance-attribute

ipa_mode: IPA_PRONUNCIATION_MODE = 'attic_5c_bce'

prompt_profile class-attribute instance-attribute

prompt_profile: Optional[str] = None

prompt_version class-attribute instance-attribute

prompt_version: Optional[str] = None

algorithm cached property

algorithm: Callable[..., Doc]

Return the enrichment generation function for this process.

run

run(input_doc: Doc) -> Doc

Run the configured GPT enrichment workflow.

Source code in cltk/enrichment/processes.py
def run(self, input_doc: Doc) -> Doc:
    """Run the configured GPT enrichment workflow."""
    output_doc: Doc = copy(input_doc)
    if not output_doc.normalized_text:
        msg = "Doc must have `normalized_text`."
        bind_from_doc(output_doc).error(msg)
        raise ValueError(msg)
    if self.glottolog_id is None:
        raise ValueError("glottolog_id must be set for enrichment.")
    prompt_builder = self.prompt_builder
    prompt_digest = None
    if prompt_builder is None and self.prompt_profile:
        template_id = self.prompt_template_id or self.process_id
        template = PromptProfileRegistry.get_prompt(
            self.prompt_profile, template_id, self.prompt_version
        )
        prompt_digest = template.digest

        def _builder(
            lang: str,
            table: str,
            ipa_mode: IPA_PRONUNCIATION_MODE,
            _template: PromptTemplate = template,
        ) -> PromptInfo:
            """Build prompt info from template and parameters."""
            return build_prompt_info(
                _template,
                lang_or_dialect_name=lang,
                token_table=table,
                ipa_mode=ipa_mode,
            )

        prompt_builder = _builder
    output_doc = self.algorithm(
        output_doc,
        ipa_mode=self.ipa_mode,
        prompt_builder=prompt_builder,
        prompt_profile=self.prompt_profile,
        prompt_digest=prompt_digest,
        fields=self.enrichment_fields,
        provenance_process=f"{self.process_id}:{self.__class__.__name__}",
    )
    return output_doc

Submodules