Source code for cltk.alphabet.egy

"""Convert MdC transliterated text to Unicode."""

import re

__author__ = ["Doğu Kaan Eraslan <kaaneraslan@gmail.com>"]


[docs]def mdc_unicode(string, q_kopf=True): """ parameters: string: str q_kopf: boolean return: unicode_text: str The translitterated text passes to the function under the variable 'string'. The search and replace operation is done for the related caracters. If the q_kopf parameter is False, we replace 'q' with 'ḳ' """ # lettres miniscules/lower case letters/küçük harfler alef = string.replace("\u0041", "\ua723") # A -> ꜣ ayin = alef.replace("\u0061", "\ua725") # a -> ꜥ h_dot = ayin.replace("\u0048", "\u1e25") # H -> ḥ h_breve = h_dot.replace("\u0078", "\u1e2b") # x -> ḫ h_line = h_breve.replace("\u0058", "\u1e96") # X -> ẖ h_circum_below = h_line.replace("\u0056", "\u0068" + "\u032d") # V ->  shin = h_circum_below.replace("\u0053", "\u0161") # S -> š s_acute = shin.replace("\u0063", "\u015b") # c -> ś tche = s_acute.replace("\u0054", "\u1e6f") # T -> ṯ t_circum_below = tche.replace("\u0076", "\u1e71") # v -> ṱ djed = t_circum_below.replace("\u0044", "\u1e0f") # D -> ḏ egy_yod = djed.replace("\u0069", "\u0069" + "\u0486") # i -> i҆ equal = egy_yod.replace("\u003d", "\u2e17") # = -> ⸗ left_brackets = equal.replace("\u003c", "\u2329") # < -> 〈 right_brackets = left_brackets.replace("\u003e", "\u232a") # > -> 〉 if q_kopf is False: kopf = right_brackets.replace("\u0071", "\u1e33") # q -> ḳ kopf_capital = kopf.replace("\u0051", "\u1e32") # Q -> Ḳ else: kopf_capital = right_brackets # LETTRES MAJUSCULES/ UPPER CASE LETTERS/ BÜYÜK HARFLER h2_capital = re.sub("[\u00a1\u0040]", "\u1e24", kopf_capital) # ¡|@ -> Ḥ h3_capital = re.sub("[\u0023\u00a2]", "\u1e2a", h2_capital) # #|¢ -> Ḫ h4_capital = re.sub("[\u0024\u00a3]", "\u0048" + "\u0331", h3_capital) # $|£ -> H̱ shin_capital = re.sub("[\u00a5\u005e]", "\u0160", h4_capital) # ¥|^ -> Š tche_capital = re.sub("[\u002a\u00a7]", "\u1e6e", shin_capital) # *|§ -> Ṯ djed_capital = re.sub("[\u00a9\u002b]", "\u1e0e", tche_capital) # ©|+ -> Ḏ unicode_text = djed_capital.replace("\u0043", "\u015a") # C -> Ś return unicode_text