Source code for cltk.utils.file_operations

"""Miscellaneous file operations used by various parts of the CLTK."""

import hashlib
import os.path
import pickle
from typing import Any

from cltk.core.cltk_logger import logger
from cltk.utils import CLTK_DATA_DIR

__author__ = [
    "Andreas Grivas <>",
    "Kyle P. Johnson <>",
    "Todd Cook <>",
__license__ = "MIT License. See LICENSE."

CLTK_DATA_DIR_PRIVATE = os.path.expanduser("~/cltk_data/private/")

[docs]def make_cltk_path(*fp_list): """Take arbitrary number of str arguments (not list) and return expanded, absolute path to a user's (or user-defined) cltk_data dir. Example: In [8]: make_cltk_path('greek', 'model', 'greek_models_cltk') Out[8]: '/Users/kyle/cltk_data/greek/model/greek_models_cltk' :type fp_list: str positional arguments :param: : fp_list tokens to join together beginning from cltk_root folder :rtype: str """ return os.path.join(CLTK_DATA_DIR, *fp_list)
[docs]def open_pickle(path: str) -> Any: """Open a pickle and return loaded pickle object. :type path: str :param : path: File path to pickle file to be opened. :rtype : object """ try: with open(path, "rb") as opened_pickle: try: return pickle.load(opened_pickle) except Exception as pickle_error: logger.error(pickle_error) raise except FileNotFoundError as fnf_error: logger.error(fnf_error) raise except IOError as io_err: logger.error(io_err) raise except EOFError as eof_error: logger.error(eof_error) raise except pickle.UnpicklingError as unp_error: logger.error(unp_error) raise
[docs]def md5(filename: str) -> str: """ Given a filename produce an md5 hash of the contents. >>> import tempfile, os >>> f = tempfile.NamedTemporaryFile(delete=False) >>> f.write(b'Hello Wirld!') 12 >>> f.close() >>> md5( '997c62b6afe9712cad3baffb49cb8c8a' >>> os.unlink( """ hash_md5 = hashlib.md5() with open(filename, "rb") as f: for chunk in iter(lambda:, b""): hash_md5.update(chunk) return hash_md5.hexdigest()