Source code for cltk.morphology.utils

"""Misc helper functions for extracting morphological
info from CLTK data structures.
"""

from typing import Optional, Union

from cltk.core.data_types import Word
from cltk.core.exceptions import CLTKException
from cltk.morphology.universal_dependencies_features import (
    NOMINAL_FEATURES,
    VERBAL_FEATURES,
    MorphosyntacticFeature,
)

ALL_POSSIBLE_FEATURES = NOMINAL_FEATURES + VERBAL_FEATURES


[docs]def get_pos(word: Optional[Word]) -> Optional[str]: """Take word, return structured info.""" if not word: return None # Note: SpaCy word.pos.name for stanza, word.pos for spacy # TODO: Write this properly upstream! if hasattr(word.pos, "name"): return word.pos.name else: return word.pos
[docs]def get_features( word: Optional[Word], prepend_to_label: str = None, ) -> tuple[list[str], list[Union[str, int, float, None]]]: """Take a word, return a list of feature labels.""" features_present: list[Union[str, None]] = list() feature_variables: list[str] = list() for possible_feature in ALL_POSSIBLE_FEATURES: feature_variables.append(str(possible_feature).lower()) if not word: features_present.append(None) continue try: feat: MorphosyntacticFeature = word.__getattr__(possible_feature)[0] features_present.append(str(feat.name)) except CLTKException: features_present.append(None) if prepend_to_label: feature_variables = [prepend_to_label + name for name in feature_variables] return feature_variables, features_present