Source code for cltk.lexicon.lat
"""Code for querying Latin language dictionaries/lexicons."""
from typing import Optional
import regex
import yaml
from cltk.core.exceptions import CLTKException
from cltk.data.fetch import FetchCorpus
from cltk.utils.file_operations import make_cltk_path
from cltk.utils.utils import query_yes_no
__author__ = ["Clément Besnier <clem@clementbesnier.fr>"]
[docs]class LatinLewisLexicon:
"""Access a digital form of Charlton T. Lewis's *An Elementary Latin Dictionary* (1890)."""
def __init__(self, interactive: bool = True):
self.interactive: bool = interactive
self.lewis_yaml_fp: str = make_cltk_path(
"lat", "lexicon", "cltk_lat_lewis_elementary_lexicon", "lewis.yaml"
)
try:
self.entries: dict[str, str] = self._load_entries()
except FileNotFoundError:
do_download: Optional[bool] = None
if self.interactive:
dl_msg: str = f"This part of the CLTK depends upon Lewis's *An Elementary Latin Dictionary* (1890)."
print(dl_msg)
dl_question: str = "Do you want to download this?"
do_download = query_yes_no(question=dl_question)
else:
do_download = True
if do_download:
fetch_corpus: FetchCorpus = FetchCorpus(language="lat")
fetch_corpus.import_corpus(
corpus_name="cltk_lat_lewis_elementary_lexicon"
)
else:
raise CLTKException(
f"File '{self.lewis_yaml_fp}' is not found. It is required for this class."
)
self.entries: dict[str, str] = self._load_entries()
[docs] def lookup(self, lemma: str) -> str:
"""Perform match of a lemma against headwords. If more than one match,
then return the concatenated entries. For example:
>>> from cltk.lexicon.lat import LatinLewisLexicon
>>> lll = LatinLewisLexicon(interactive=False)
>>> lll.lookup("clemens")[:50]
'clēmēns entis (abl. -tī; rarely -te, L.), adj. wit'
>>> all(word in lll.lookup("levis") for word in ["levis","lēvis"]) # Test for concatenated entries
True
>>> lll.lookup("omnia")
''
>>> lll.lookup(".")
''
>>> lll.lookup("123")
''
>>> lll.lookup("175.")
''
>>> lll.lookup("(") # Test for regex special character
''
"""
if not self.entries:
raise CLTKException(
"No lexicon entries found in the .yaml file. This should never happen."
)
if regex.match(r"^[0-9\.\?,\:;\!\<\>\-]*$", lemma) is not None:
return ""
lemma = regex.escape(lemma.lower())
keys: list[str] = list(self.entries.keys())
matches: list[str] = [
key for key in keys if regex.match(rf"^{lemma}[0-9]?$", key)
]
n_matches: int = len(matches)
if n_matches > 1:
return "\n".join([self.entries[key] for key in matches])
elif n_matches == 1:
return self.entries[matches[0]]
else:
return ""
[docs] def _load_entries(self) -> dict[str, str]:
"""Read the yaml file of the lexion."""
with open(self.lewis_yaml_fp) as file_open:
entries: dict[str, str] = yaml.load(file_open, Loader=yaml.Loader)
return entries