Source code for cltk.phonology.akk

"""
Functions and classes for Akkadian phonology.
"""

from typing import Union

_author__ = ["M. Willis Monroe <willismonroe@gmail.com>"]
__license__ = "MIT License. See LICENSE."


AKKADIAN = {
    "short_vowels": ["a", "e", "i", "u"],
    "macron_vowels": ["ā", "ē", "ī", "ū"],
    "circumflex_vowels": ["â", "ê", "î", "û"],
    "consonants": [
        "b",
        "d",
        "g",
        "h",
        "ḫ",
        "k",
        "l",
        "m",
        "n",
        "p",
        "q",
        "r",
        "s",
        "ṣ",
        "š",
        "t",
        "ṭ",
        "w",
        "y",
        "z",
        "ʾ",
    ],
}


[docs]def get_cv_pattern(
    word: str, pprint: bool = False
) -> Union[list[tuple[str, int, str]], str]:
    """
    Return a patterned string representing the consonants
    and vowels of the input word.

    >>> word = 'iparras'
    >>> get_cv_pattern(word)
    [('V', 1, 'i'), ('C', 1, 'p'), ('V', 2, 'a'), ('C', 2, 'r'), ('C', 2, 'r'), ('V', 2, 'a'), ('C', 3, 's')]
    >>> get_cv_pattern(word, True)
    'V₁C₁V₂C₂C₂V₂C₃'
    """

    subscripts = {
        1: "₁",
        2: "₂",
        3: "₃",
        4: "₄",
        5: "₅",
        6: "₆",
        7: "₇",
        8: "₈",
        9: "₉",
        0: "₀",
    }

    pattern = []
    c_count = 1
    v_count = 1
    count = 0

    for char in word:
        if char in AKKADIAN["consonants"]:
            cv = "C"
        else:
            cv = "V"
            # remove length:
            if char in AKKADIAN["macron_vowels"]:
                char = AKKADIAN["short_vowels"][AKKADIAN["macron_vowels"].index(char)]
            elif char in AKKADIAN["circumflex_vowels"]:
                char = AKKADIAN["short_vowels"][
                    AKKADIAN["circumflex_vowels"].index(char)
                ]
        if char not in [x[2] for x in pattern]:
            if cv == "C":
                count = c_count
                c_count += 1
            elif cv == "V":
                count = v_count
                v_count += 1
            pattern.append((cv, count, char))
        elif char in [x[2] for x in pattern]:
            pattern.append((cv, next(x[1] for x in pattern if x[2] == char), char))
    if pprint:
        output = ""
        for item in pattern:
            output += item[0] + subscripts[item[1]]
        return output
    return pattern


def _is_consonant(char: str) -> bool:
    return char in AKKADIAN["consonants"]


def _is_vowel(char: str) -> bool:
    return (
        char
        in AKKADIAN["short_vowels"]
        + AKKADIAN["macron_vowels"]
        + AKKADIAN["circumflex_vowels"]
    )


def _is_short_vowel(char: str) -> bool:
    return char in AKKADIAN["short_vowels"]


def _is_macron_vowel(char: str) -> bool:
    return char in AKKADIAN["macron_vowels"]


def _is_circumflex_vowel(char: str) -> bool:
    return char in AKKADIAN["circumflex_vowels"]


[docs]def syllabify(word) -> list[str]:
    """
    Split Akkadian words into list of syllables
    >>> syllabify("napištašunu")
    ['na', 'piš', 'ta', 'šu', 'nu']

    >>> syllabify("epištašu")
    ['e', 'piš', 'ta', 'šu']
    """

    syllables = []

    # catch single character words
    if len(word) == 1:
        return [word]

    # If there's an initial vowel and the word is longer than 2 letters,
    # and the third syllable is a not consonant (easy way to check for VCC pattern),
    # the initial vowel is the first syllable.
    # Rule (b.ii)
    if _is_vowel(word[0]):
        if len(word) > 2 and not _is_consonant(word[2]):
            syllables.append(word[0])
            word = word[1:]

    # flip the word and count from the back:
    word = word[::-1]

    # Here we iterate over the characters backwards trying to match
    # consonant and vowel patterns in a hierarchical way.
    # Each time we find a match we store the syllable (in reverse order)
    # and move the index ahead the length of the syllable.
    syllables_reverse = []
    i = 0
    while i < len(word):
        char = word[i]

        # CV:
        if _is_vowel(char):
            if _is_vowel(word[i + 1]):
                # Next char is a vowel so cut off syllable here
                syllables_reverse.append(word[i])
                i += 1
            else:
                syllables_reverse.append(word[i + 1] + word[i])
                i += 2

        # CVC and VC:
        elif _is_consonant(char):
            if _is_vowel(word[i + 1]):
                # If there are only two characters left, that's it.
                if i + 2 >= len(word):
                    syllables_reverse.append(word[i + 1] + word[i])
                    break
                # CVC
                elif _is_consonant(word[i + 2]):
                    syllables_reverse.append(word[i + 2] + word[i + 1] + word[i])
                    i += 3
                # VC (remember it's backwards here)
                elif _is_vowel(word[i + 2]):
                    syllables_reverse.append(word[i + 1] + word[i])
                    i += 2

    return syllables + syllables_reverse[::-1]


[docs]def find_stress(word: str) -> list[str]:
    """
    Find the stressed syllable in a word.
    The general logic follows Huehnergard 3rd edition (pgs. 3-4):
    (a) Light: ending in a short vowel: e.g., -a, -ba
    (b) Heavy: ending in a long vowel marked with a macron, or in a
    short vowel plus a consonant: e.g., -ā, -bā, -ak, -bak
    (c) Ultraheavy: ending in a long vowel marked with a circumflex,
    in any long vowel plus a consonant: e.g., -â, -bâ, -āk, -bāk, -âk, -bâk.
    (a) If the last syllable is ultraheavy, it bears the stress.
    (b) Otherwise, stress falls on the last non-final heavy or ultraheavy syllable.
    (c) Words that contain no non-final heavy or ultraheavy syllables have the
    stress fall on the first syllable.

    >>> find_stress("napištašunu")
    ['na', '[piš]', 'ta', 'šu', 'nu']

    """

    word = syllabify(word)

    syllables_stress = []

    for i, syllable in enumerate(word):
        # Enumerate over the syllables and mark them for length
        # We check each type of length by looking at the length of the
        # syllable and verifying rules based on character length.

        # Ultraheavy:
        # -â, -bâ, -āk, -bāk, -âk, -bâk.
        if len(syllable) == 1:
            if _is_circumflex_vowel(syllable):
                syllables_stress.append((syllable, "Ultraheavy"))
                continue
        elif len(syllable) == 2:
            if _is_consonant(syllable[0]) and _is_circumflex_vowel(syllable[1]):
                syllables_stress.append((syllable, "Ultraheavy"))
                continue
            if (
                _is_macron_vowel(syllable[0]) or _is_circumflex_vowel(syllable[0])
            ) and _is_consonant(syllable[1]):
                syllables_stress.append((syllable, "Ultraheavy"))
                continue
        elif len(syllable) == 3:
            if _is_macron_vowel(syllable[1]) or _is_circumflex_vowel(syllable[1]):
                syllables_stress.append((syllable, "Ultraheavy"))
                continue

        # Heavy:
        # -ā, -bā, -ak, -bak
        if len(syllable) == 1:
            if _is_macron_vowel(syllable):
                syllables_stress.append((syllable, "Heavy"))
                continue
        elif len(syllable) == 2:
            if _is_consonant(syllable[0]) and _is_macron_vowel(syllable[1]):
                syllables_stress.append((syllable, "Heavy"))
                continue
            if _is_short_vowel(syllable[0]) and _is_consonant(syllable[1]):
                syllables_stress.append((syllable, "Heavy"))
                continue
        elif len(syllable) == 3:
            if _is_short_vowel(syllable[1]):
                syllables_stress.append((syllable, "Heavy"))
                continue

        # Light:
        # -a, -ba
        if len(syllable) == 1:
            if _is_short_vowel(syllable):
                syllables_stress.append((syllable, "Light"))
                continue
        elif len(syllable) == 2:
            if _is_consonant(syllable[0]) and _is_short_vowel(syllable[1]):
                syllables_stress.append((syllable, "Light"))
                continue

    # It's easier to find stress backwards
    syllables_stress = syllables_stress[::-1]

    syllables = []
    found_stress = 0
    for i, syllable in enumerate(syllables_stress):
        # If we've found the stressed syllable just append the next syllable
        if found_stress:
            syllables.append(syllable[0])
            continue

        # Rule (a)
        elif syllable[1] == "Ultraheavy" and i == 0:
            syllables.append("[{}]".format(syllable[0]))
            found_stress = 1
            continue

        # Rule (b)
        elif syllable[1] in ["Ultraheavy", "Heavy"] and i > 0:
            syllables.append("[{}]".format(syllable[0]))
            found_stress = 1
            continue

        # Final 'Heavy' syllable, gets no stress
        elif syllable[1] == "Heavy" and i == 0:
            syllables.append(syllable[0])
            continue

        # Light syllable gets no stress
        elif syllable[1] == "Light":
            syllables.append(syllable[0])
            continue

    # Reverse the list again
    syllables = syllables[::-1]

    # If we still haven't found stress then rule (c) applies
    # Rule (c)
    if not found_stress:
        syllables[0] = "[{}]".format(syllables[0])

    return syllables


[docs]class AkkadianSyllabifier:
    def __init__(self):
        pass

[docs]    def syllabify(self, word):
        if type(word) == tuple and len(word) == 2:
            return syllabify(word[0])
        else:
            return syllabify(word)

    def __repr__(self):
        return f"<AkkadianSyllabifier>"

    def __call__(self, word):
        return self.syllabify(word)
Source code for cltk.phonology.akk

The Classical Language Toolkit

Navigation

Related Topics