Source code for cltk.lexicon.non

"""Code for querying Old Norse language dictionaries/lexicons."""

import regex
import yaml

from cltk.core.exceptions import CLTKException
from cltk.data.fetch import FetchCorpus
from cltk.utils.file_operations import make_cltk_path
from cltk.utils.utils import query_yes_no

__author__ = ["Clément Besnier <clem@clementbesnier.fr>"]


[docs]class OldNorseZoegaLexicon: """Access a digital form of Zoëga's dictionary.""" def __init__(self, interactive: bool = True): self.interactive: bool = interactive self.zoega_yaml_fp: str = make_cltk_path( "non", "dictionary", "cltk_non_zoega_dictionary", "dictionary.yaml" ) try: self.entries: dict[str, str] = self._load_entries() except FileNotFoundError: if self.interactive: dl_msg: str = f"This part of the CLTK depends upon Zoëga's *A Concise Old Norse Dictionary* (1890)." print(dl_msg) dl_question = "Do you want to download this?" do_download = query_yes_no(question=dl_question) else: do_download = True if do_download: fetch_corpus = FetchCorpus(language="non") fetch_corpus.import_corpus(corpus_name="cltk_non_zoega_dictionary") else: raise CLTKException( f"File '{self.zoega_yaml_fp}' is not found. It is required for this class." ) self.entries: dict[str, str] = self._load_entries()
[docs] def lookup(self, lemma: str) -> str: """Perform match of a lemma against headwords. This is case sensitive. If more than one match, then return the concatenated entries. For example: >>> from cltk.lexicon.non import OldNorseZoegaLexicon >>> onzl = OldNorseZoegaLexicon(interactive=False) >>> onzl.lookup("sonr") '(gen. sonar, dat. syni and søni; pl. synir, sønir; ace. sonu and syni), m. son.' """ if not self.entries: raise CLTKException( "No dictionary entries found in the .yaml file. This should never happen." ) if regex.match(r"^[0-9\.\?,\:;\!\<\>\-]*$", lemma) is not None: return "" keys: list[str] = list(self.entries.keys()) matches = [key for key in keys if regex.match(rf"^{lemma}[0-9]?$", key)] n_matches = len(matches) if n_matches > 1: return "\n".join([self.entries[key] for key in matches]) elif n_matches == 1: return self.entries[matches[0]] else: return ""
[docs] def _load_entries(self) -> dict[str, str]: """Read the yaml file of the lexion. TODO: Make sure this actualy returns dict[str, str] """ with open(self.zoega_yaml_fp) as file_open: entries: dict[str, str] = yaml.load(file_open, Loader=yaml.Loader) return entries