Source code for cltk.tokenizers.arb

""" Code for word tokenization: Arabic
"""

__author__ = ["TK", "Patrick J. Burns <patrick@diyclassics.org>"]
__license__ = "MIT License."

from cltk.phonology.arb.utils.pyarabic import araby
from cltk.tokenizers.word import WordTokenizer


[docs]class ArabicWordTokenizer(WordTokenizer): """ Class for word tokenizer using the pyarabic package: https://pypi.org/project/PyArabic/ """
[docs] def tokenize(self, text: str): """ :rtype: list :param text: text to be tokenized into sentences :type text: str """ return araby.tokenize(text)