Source code for cltk.languages.utils

"""Utility functions for keeping track of languages."""

from cltk.core.data_types import Language
from cltk.core.exceptions import UnknownLanguageError
from cltk.languages.glottolog import LANGUAGES


[docs]def get_lang(iso_code: str) -> Language: """Take ISO 639-3 code and return ``Language`` object for language. TODO: Split this into another fn, ``check_language()``, which is how is usually used now. >>> from cltk.languages.utils import get_lang >>> get_lang("akk") Language(name='Akkadian', glottolog_id='akka1240', latitude=33.1, longitude=44.1, family_id='afro1255', parent_id='east2678', level='language', iso_639_3_code='akk', type='a', dates=[]) >>> from cltk.core.exceptions import UnknownLanguageError >>> get_lang("xxx") Traceback (most recent call last): ... cltk.core.exceptions.UnknownLanguageError: Unknown ISO language code 'xxx'. """ try: return LANGUAGES[iso_code] except KeyError: raise UnknownLanguageError(f"Unknown ISO language code '{iso_code}'.")
[docs]def find_iso_name(common_name: str) -> list[str]: """Find the ISO 639-3 language code (e.g., ``lat``) by inputting the common name (``Latin``). This function just does simple substring matching, with some normalization of case, on the ``name`` field of the ``Language`` object. >>> find_iso_name(common_name="Latin") ['lat'] >>> find_iso_name(common_name="lat") ['xga', 'lat'] >>> find_iso_name(common_name="slav") ['chu'] >>> find_iso_name(common_name="xxx") [] """ iso_return_list: list[str] = list() for iso_key, language_obj in LANGUAGES.items(): if common_name.lower() in language_obj.name.lower(): iso_return_list.append(iso_key) return iso_return_list