Source code for cltk.stem.fro

"""Stemmer for Old French."""
import re

__author__ = ["Natasha Voake <natashavoake@gmail.com>"]
__license__ = "MIT License. See LICENSE."

EXCEPTIONS: list[str] = [
    "mer",
    "certes",
    "mais",
    "vos",
    "onques",
    "nos",
    "talent",
    "omnipotent",
    "jadis",
    "et",
    "je",
    "certes",
    "que",
    "mais",
    "ne",
    "me",
    "de",
    "ce",
    "qui",
    "la",
    "li",
    "le",
    "ma",
    "mon",
    "si",
    "sa",
    "son",
    "chambre",
    "quant",
    "ki",
    "tut",
]


[docs]def _matchremove_noun_endings(word: str) -> tuple[str, bool]: """Remove the noun and adverb word endings""" was_stemmed: bool = False """common and proper noun and adjective word endings sorted by charlen, then alph""" noun_endings: list[str] = [ "arons", "ains", "aron", "ment", "ain", "age", "on", "es", "ée", "ee", "ie", "s", ] for ending in noun_endings: """ignore exceptions""" if word in EXCEPTIONS: word = word was_stemmed = True break if word == ending: word = word was_stemmed = True break """removes noun endings""" if word.endswith(ending): word = re.sub(r"{0}$".format(ending), "", word) was_stemmed = True break return word, was_stemmed
[docs]def _matchremove_verb_endings(word: str) -> str: """Remove the verb endings""" """verb endings sorted by charlen then alph""" verb_endings: list[str] = [ "issiiens", "isseient", "issiiez", "issons", "issent", "issant", "isseie", "isseit", "issons", "isseiz", "assent", "issons", "isseiz", "issent", "iiens", "eient", "issez", "oient", "istes", "ïstes", "istes", "astes", "erent", "istes", "irent", "ustes", "urent", "âmes", "âtes", "èrent", "asses", "isses", "issez", "ssons", "sseiz", "ssent", "erent", "eies", "iiez", "oies", "iens", "ions", "oint", "eret", "imes", "rent", "ümes", "ütes", "ïmes", "imes", "asse", "isse", "usse", "ames", "imes", "umes", "asse", "isse", "sses", "ssez", "ons", "ent", "ant", "eie", "eit", "int", "ist", "eiz", "oie", "oit", "iez", "ois", "oit", "iez", "res", "ert", "ast", "ist", "sse", "mes", "er", "es", "et", "ez", "is", "re", "oi", "ïs", "üs", "ai", "as", "at", "is", "it", "ui", "us", "ut", "st", "s", "t", "e", "é", "z", "u", "a", "i", ] for ending in verb_endings: if word == ending: word = word break if word.endswith(ending): word = re.sub(r"{0}$".format(ending), "", word) break return word
[docs]def stem(word: str) -> str: """ Stem a word of Old French. >>> stem('departissent') 'depart' >>> stem('talent') 'talent' """ word = word.lower() """remove the simple endings from the target word""" stemmed_word, was_stemmed = _matchremove_noun_endings(word) """if word didn't match the simple endings, try verb endings""" if not was_stemmed: stemmed_word = _matchremove_verb_endings(word) return stemmed_word