"""Import CLTK corpora.
TODO: ? Fix so ``import_corpora()`` can take relative path.
TODO: ? Add https://github.com/cltk/pos_latin
TODO: Consider renaming all "import" to "clone"
"""
import errno
import os
import shutil
import sys
from urllib.parse import urljoin
import yaml
from git import RemoteProgress, Repo
from cltk.core.cltk_logger import logger
from cltk.core.exceptions import CorpusImportError
from cltk.languages.utils import get_lang
from cltk.utils.utils import CLTK_DATA_DIR
__author__ = [
"Kyle P. Johnson <kyle@kyle-p-johnson.com>",
"Stephen Margheim <stephen.margheim@gmail.com>",
]
# TODO: Decide whether to drop repos w/o models
# langs_with_model_repo = ["grc", "lat", "ang", "gmh", "gml", "san", "non", "pli"]
LANGUAGE_CORPORA = {
"akk": [
{
"name": "cdli_corpus",
"origin": "https://github.com/cdli-gh/data.git",
"type": "atf",
"branch": "master",
}
],
"arb": [
{
"name": "arabic_text_perseus",
"origin": "https://github.com/cltk/arabic_text_perseus",
"type": "text",
"branch": "master",
},
{
"name": "quranic-corpus",
"origin": "https://github.com/cltk/arabic_text_quranic_corpus",
"type": "text",
"branch": "master",
},
{
"name": "quranic-corpus-morphology",
"origin": "https://github.com/cltk/arabic_morphology_quranic-corpus",
"type": "text",
"branch": "master",
},
],
"lzh": [
{
"type": "text",
"origin": "https://github.com/cltk/chinese_text_cbeta_01.git",
"name": "chinese_text_cbeta_01",
"branch": "master",
},
{
"type": "text",
"origin": "https://github.com/cltk/chinese_text_cbeta_02.git",
"name": "chinese_text_cbeta_02",
"branch": "master",
},
{
"type": "text",
"origin": "https://github.com/cltk/chinese_text_cbeta_indices.git",
"name": "chinese_text_cbeta_indices",
"branch": "master",
},
{
"type": "text",
"origin": "https://github.com/cltk/chinese_text_cbeta_txt.git",
"name": "chinese_text_cbeta_txt",
"branch": "master",
},
{
"type": "text",
"origin": "https://github.com/cltk/chinese_text_cbeta_taf_xml.git",
"name": "chinese_text_cbeta_taf_xml",
},
{
"type": "text",
"origin": "https://github.com/cltk/chinese_text_cbeta_txt.git",
"name": "chinese_text_cbeta_txt",
"branch": "master",
},
],
"cop": [
{
"type": "text",
"origin": "https://github.com/cltk/coptic_text_scriptorium.git",
"name": "coptic_text_scriptorium",
"branch": "master",
}
],
"grc": [
{
"name": "grc_software_tlgu",
"origin": "https://github.com/cltk/grc_software_tlgu.git",
"type": "software",
"branch": "master",
},
{
"name": "grc_text_perseus",
"origin": "https://github.com/cltk/grc_text_perseus.git",
"type": "text",
"branch": "master",
},
{"origin": None, "name": "phi7", "location": "local", "type": "text"},
{"name": "tlg", "origin": None, "location": "local", "type": "text"},
{
"name": "greek_proper_names_cltk",
"origin": "https://github.com/cltk/greek_proper_names_cltk.git",
"type": "lexicon",
"branch": "master",
},
{
"name": "grc_models_cltk",
"origin": "https://github.com/cltk/grc_models_cltk.git",
"type": "model",
"branch": "master",
},
{
"origin": "https://github.com/cltk/greek_treebank_perseus.git",
"name": "greek_treebank_perseus",
"type": "treebank",
"branch": "master",
},
{
"origin": "https://github.com/vgorman1/Greek-Dependency-Trees.git",
"name": "greek_treebank_gorman",
"type": "treebank",
"branch": "master",
},
{
"origin": "https://github.com/cltk/greek_lexica_perseus.git",
"name": "greek_lexica_perseus",
"type": "lexicon",
"branch": "master",
},
{
"origin": "https://github.com/cltk/greek_training_set_sentence_cltk.git",
"name": "greek_training_set_sentence_cltk",
"type": "training_set",
"branch": "master",
},
{
"name": "greek_word2vec_cltk",
"origin": "https://github.com/cltk/greek_word2vec_cltk.git",
"type": "model",
"branch": "master",
},
{
"name": "greek_text_lacus_curtius",
"origin": "https://github.com/cltk/greek_text_lacus_curtius.git",
"type": "text",
"branch": "master",
},
{
"name": "grc_text_first1kgreek",
"origin": "https://github.com/cltk/First1KGreek",
"type": "text",
"branch": "master",
},
{
"name": "grc_text_tesserae",
# modified plaintext with Tesserae-style citations
"origin": "https://github.com/cltk/grc_text_tesserae.git",
"type": "text",
"branch": "master",
},
],
"hbo": [
{
"name": "hebrew_text_sefaria",
"origin": "https://github.com/cltk/hebrew_text_sefaria.git",
"type": "text",
"branch": "master",
}
],
"lat": [
{
"type": "text",
"name": "lat_text_perseus",
"origin": "https://github.com/cltk/lat_text_perseus.git",
"branch": "master",
},
{
"name": "lat_treebank_perseus",
"origin": "https://github.com/cltk/lat_treebank_perseus.git",
"type": "treebank",
"branch": "master",
},
{
"name": "lat_text_latin_library",
"origin": "https://github.com/cltk/lat_text_latin_library.git",
"type": "text",
"branch": "master",
},
{"location": "local", "name": "phi5", "origin": None, "type": "text"},
{"origin": None, "name": "phi7", "location": "local", "type": "text"},
{
"name": "latin_proper_names_cltk",
"origin": "https://github.com/cltk/latin_proper_names_cltk.git",
"type": "lexicon",
"branch": "master",
},
{
"origin": "https://github.com/cltk/lat_models_cltk.git",
"name": "lat_models_cltk",
"type": "model",
"branch": "master",
},
{
"name": "latin_pos_lemmata_cltk",
"origin": "https://github.com/cltk/latin_pos_lemmata_cltk.git",
"type": "lemma",
"branch": "master",
},
{
"name": "latin_treebank_index_thomisticus",
"origin": "https://github.com/cltk/latin_treebank_index_thomisticus.git",
"type": "treebank",
"branch": "master",
},
{
"name": "latin_lexica_perseus",
"origin": "https://github.com/cltk/latin_lexica_perseus.git",
"type": "lexicon",
"branch": "master",
},
{
"name": "latin_training_set_sentence_cltk",
"origin": "https://github.com/cltk/latin_training_set_sentence_cltk.git",
"type": "training_set",
"branch": "master",
},
{
"origin": "https://github.com/cltk/latin_word2vec_cltk.git",
"name": "latin_word2vec_cltk",
"type": "model",
"branch": "master",
},
{
"type": "text",
"name": "latin_text_antique_digiliblt",
"origin": "https://github.com/cltk/latin_text_antique_digiliblt.git",
"branch": "master",
},
{
"type": "text",
"name": "latin_text_corpus_grammaticorum_latinorum",
"origin": "https://github.com/cltk/latin_text_corpus_grammaticorum_latinorum.git",
"branch": "master",
},
{
"type": "text",
"name": "latin_text_poeti_ditalia",
"origin": "https://github.com/cltk/latin_text_poeti_ditalia.git",
"branch": "master",
},
{
"name": "lat_text_tesserae",
# modified plaintext with Tesserae-style citations
"origin": "https://github.com/cltk/lat_text_tesserae.git",
"type": "text",
"branch": "master",
},
{
"type": "lexicon",
"name": "cltk_lat_lewis_elementary_lexicon",
"origin": "https://github.com/cltk/cltk_lat_lewis_elementary_lexicon.git",
"branch": "master",
},
],
"multilingual": [
{
"type": "treebank",
"origin": "https://github.com/cltk/multilingual_treebank_proiel.git",
"name": "multilingual_treebank_proiel",
"branch": "master",
},
{
"type": "treebank",
"origin": "https://github.com/cltk/iswoc-treebank.git",
"name": "multilingual_treebank_iswoc",
"branch": "master",
},
{
"type": "treebank",
"origin": "https://github.com/cltk/treebank-releases.git",
"name": "multilingual_treebank_torot",
"branch": "master",
},
],
"pli": [
{
"type": "text",
"origin": "https://github.com/cltk/pali_text_ptr_tipitaka.git",
"name": "pali_text_ptr_tipitaka",
"branch": "master",
},
{
"name": "pali_texts_gretil",
"type": "text",
"origin": "https://github.com/cltk/pali_texts_gretil",
"branch": "master",
},
],
"pan": [
{
"name": "punjabi_text_gurban",
"origin": "https://github.com/cltk/punjabi_text_gurban.git",
"type": "text",
"branch": "master",
}
],
"xct": [
{
"type": "pos",
"origin": "https://github.com/cltk/tibetan_pos_tdc.git",
"name": "tibetan_pos_tdc",
"branch": "master",
},
{
"type": "lexicon",
"origin": "https://github.com/cltk/tibetan_lexica_tdc.git",
"name": "tibetan_lexica_tdc",
"branch": "master",
},
],
"san": [
{
"name": "sanskrit_text_jnu",
"origin": "https://github.com/cltk/sanskrit_text_jnu.git",
"type": "text",
"branch": "master",
},
{
"name": "sanskrit_text_dcs",
"origin": "https://github.com/cltk/sanskrit_text_dcs.git",
"type": "text",
"branch": "master",
},
{
"name": "sanskrit_parallel_sacred_texts",
"origin": "https://github.com/cltk/sanskrit_parallel_sacred_texts.git",
"type": "parallel",
"branch": "master",
},
{
"name": "sanskrit_text_sacred_texts",
"origin": "https://github.com/cltk/sanskrit_text_sacred_texts.git",
"type": "text",
"branch": "master",
},
{
"name": "sanskrit_parallel_gitasupersite",
"origin": "https://github.com/cltk/sanskrit_parallel_gitasupersite.git",
"type": "parallel",
"branch": "master",
},
{
"name": "sanskrit_text_gitasupersite",
"origin": "https://github.com/cltk/sanskrit_text_gitasupersite.git",
"type": "text",
"branch": "master",
},
{
"name": "sanskrit_text_wikipedia",
"origin": "https://github.com/cltk/sanskrit_text_wikipedia.git",
"type": "text",
"branch": "master",
},
{
"name": "sanskrit_text_sanskrit_documents",
"origin": "https://github.com/cltk/sanskrit_text_sanskrit_documents.git",
"type": "text",
"branch": "master",
},
{
"name": "san_models_cltk",
"origin": "https://github.com/cltk/san_models_cltk.git",
"type": "model",
"branch": "master",
},
],
"ang": [
{
"name": "old_english_text_sacred_texts",
"origin": "https://github.com/cltk/old_english_text_sacred_texts.git",
"type": "html",
"branch": "master",
},
{
"origin": "https://github.com/cltk/ang_models_cltk.git",
"name": "ang_models_cltk",
"type": "model",
"branch": "master",
},
],
"ben": [
{
"name": "bengali_text_wikisource",
"origin": "https://github.com/cltk/bengali_text_wikisource.git",
"type": "text",
"branch": "master",
}
],
"chu": [
{
"name": "old_church_slavonic_ccmh",
"origin": "https://github.com/cltk/old_church_slavonic_ccmh.git",
"type": "text",
"branch": "master",
}
],
"pmh": [
{
"name": "prakrit_texts_gretil",
"type": "text",
"origin": "https://github.com/cltk/prakrit_texts_gretil.git",
"branch": "master",
}
],
"mal": [
{
"name": "malayalam_text_gretil",
"origin": "https://github.com/cltk/malayalam_text_gretil.git",
"type": "text",
"branch": "master",
}
],
"omr": [
{
"name": "marathi_text_wikisource",
"origin": "https://github.com/cltk/marathi_text_wikisource.git",
"type": "text",
"branch": "master",
}
],
"kaw": [
{
"name": "javanese_text_gretil",
"origin": "https://github.com/cltk/javanese_text_gretil.git",
"type": "text",
"branch": "master",
}
],
"non": [
{
"name": "old_norse_text_perseus",
"origin": "https://github.com/cltk/old_norse_text_perseus.git",
"type": "text",
"branch": "master",
},
{
"name": "non_models_cltk",
"origin": "https://github.com/cltk/non_models_cltk.git",
"type": "model",
"branch": "master",
},
{
"name": "old_norse_texts_heimskringla",
"origin": "https://github.com/cltk/old_norse_texts_heimskringla.git",
"type": "text",
"branch": "master",
},
{
"name": "old_norse_runic_transcriptions",
"origin": "https://github.com/cltk/old_norse_runes_corpus.git",
"type": "text",
"branch": "master",
},
{
"name": "cltk_non_zoega_dictionary",
"origin": "https://github.com/cltk/cltk_non_zoega_dictionary.git",
"type": "dictionary",
"branch": "master",
},
],
"tel": [
{
"name": "telugu_text_wikisource",
"origin": "https://github.com/cltk/telugu_text_wikisource.git",
"type": "text",
"branch": "master",
}
],
"hin": [
{
"type": "text",
"origin": "https://github.com/cltk/hindi_text_ltrc.git",
"name": "hindi_text_ltrc",
"branch": "master",
}
],
"fro": [
{
"name": "french_text_wikisource",
"origin": "https://github.com/cltk/french_text_wikisource.git",
"type": "text",
"branch": "master",
},
{
"name": "french_lexicon_cltk",
"origin": "https://github.com/cltk/french_lexicon_cltk.git",
"type": "text",
"branch": "master",
},
{
"name": "fro_models_cltk",
"origin": "https://github.com/cltk/fro_models_cltk.git",
"type": "model",
"branch": "master",
},
],
"guj": [
{
"name": "gujarati_text_wikisource",
"origin": "https://github.com/cltk/gujarati_text_wikisource.git",
"type": "text",
"branch": "master",
}
],
"gml": [
{
"name": "gml_models_cltk",
"origin": "https://github.com/cltk/gml_models_cltk.git",
"type": "model",
"branch": "master",
}
],
"gmh": [
{
"name": "gmh_models_cltk",
"origin": "https://github.com/cltk/gmh_models_cltk.git",
"type": "model",
"branch": "master",
}
],
"enm": [
{
"name": "enm_models_cltk",
"origin": "https://github.com/cltk/enm_models_cltk.git",
"type": "model",
"branch": "master",
}
],
}
[docs]class ProgressPrinter(RemoteProgress):
"""Class that implements progress reporting."""
[docs] def update(self, op_code, cur_count, max_count=None, message=""):
if message:
percentage = "%.0f" % (100 * cur_count / (max_count or 100.0))
sys.stdout.write("Downloaded %s%% %s \r" % (percentage, message))
[docs]class FetchCorpus:
"""Import CLTK corpora."""
def __init__(self, language: str, testing: bool = False):
"""Setup corpus importing.
`testing` is a hack to check a tmp .yaml file to look at
or local corpus. This keeps from overwriting local. A
better idea is probably to refuse to overwrite the .yaml.
"""
self.language = language.lower()
if self.language != "multilingual":
get_lang(iso_code=language)
assert isinstance(testing, bool), "``testing`` parameter must be boolean type"
self.testing = testing
self.user_defined_corpora = self._get_user_defined_corpora()
self.library_defined_corpora = self._get_library_defined_corpora()
self.all_corpora_for_lang = (
self.user_defined_corpora + self.library_defined_corpora
)
def __repr__(self):
"""Representation string for ipython
:rtype : str
"""
return "FetchCorpus for: {}".format(self.language)
[docs] def _get_user_defined_corpora(self):
"""Check CLTK_DATA_DIR + '/distributed_corpora.yaml' for any custom,
distributed corpora that the user wants to load locally.
"""
if self.testing:
distributed_corpora_fp = os.path.normpath(
os.path.join(CLTK_DATA_DIR, "test_distributed_corpora.yaml")
)
else:
distributed_corpora_fp = os.path.normpath(
os.path.join(CLTK_DATA_DIR, "distributed_corpora.yaml")
)
try:
with open(distributed_corpora_fp) as file_open:
corpora_dict = yaml.safe_load(file_open)
except FileNotFoundError:
logger.debug("``~/cltk_data/distributed_corpora.yaml`` file not found.")
return []
except yaml.parser.ParserError as parse_err:
logger.debug("Yaml parsing error: %s" % parse_err)
return []
user_defined_corpora = []
for corpus_name in corpora_dict:
about = corpora_dict[corpus_name]
if about["language"].lower() == self.language:
user_defined_corpus = dict()
user_defined_corpus["origin"] = about["origin"]
user_defined_corpus["type"] = about["type"]
user_defined_corpus["name"] = corpus_name
user_defined_corpus["user_defined"] = True
user_defined_corpora.append(user_defined_corpus)
return user_defined_corpora
[docs] def _get_library_defined_corpora(self):
"""Pull from ``LANGUAGE_CORPORA`` and return
corpora for given language.
"""
try:
return LANGUAGE_CORPORA[self.language]
except KeyError:
return list()
@property
def list_corpora(self):
"""Show corpora available for the CLTK to download."""
return [corpus_info["name"] for corpus_info in self.all_corpora_for_lang]
[docs] @staticmethod
def _copy_dir_recursive(src_rel, dst_rel):
"""Copy contents of one directory to another. `dst_rel` dir cannot
exist. Source: http://stackoverflow.com/a/1994840
TODO: Move this to file_operations.py module.
:type src_rel: str
:param src_rel: Directory to be copied.
:type dst_rel: str
:param dst_rel: Directory to be created with contents of ``src_rel``.
"""
src = os.path.expanduser(src_rel)
dst = os.path.expanduser(dst_rel)
try:
shutil.copytree(src, dst)
logger.info("Files copied from %s to %s", src, dst)
except OSError as exc:
if exc.errno == errno.ENOTDIR:
shutil.copy(src, dst)
logger.info("Files copied from %s to %s", src, dst)
else:
raise
[docs] def _get_corpus_properties(self, corpus_name: str):
"""Check whether a corpus is available for import.
:type corpus_name: str
:param corpus_name: Name of available corpus.
:rtype : str
"""
try:
corpora = self.all_corpora
except NameError as name_error:
msg = "Corpus not available for language " '"%s": %s' % (
self.language,
name_error,
)
logger.error(msg)
raise CorpusImportError(msg)
for corpus_properties in corpora:
if corpus_properties["name"] == corpus_name:
return corpus_properties
msg = 'Corpus "%s" not available for the ' '"%s" language.' % (
corpus_name,
self.language,
)
logger.error(msg)
raise CorpusImportError(msg)
[docs] def _git_user_defined_corpus(
self, corpus_name, corpus_type, uri: str, branch="master"
):
"""Clone or update a git repo defined by user.
TODO: This code is very redundant with what's in import_corpus(),
could be refactored.
"""
type_dir_rel = os.path.join(CLTK_DATA_DIR, self.language, corpus_type)
type_dir = os.path.expanduser(type_dir_rel)
repo_name = uri.split("/")[-1] # eg, 'latin_corpus_newton_example.git'
repo_name = repo_name.rstrip(".git")
target_dir = os.path.join(type_dir, repo_name)
target_file = os.path.join(type_dir, repo_name, "README.md")
# check if corpus already present
# if not, clone
if not os.path.isfile(target_file):
if not os.path.isdir(type_dir):
os.makedirs(type_dir)
try:
msg = "Cloning '{}' from '{}'".format(corpus_name, uri)
logger.info(msg)
Repo.clone_from(
uri, target_dir, branch=branch, depth=1, progress=ProgressPrinter()
)
except CorpusImportError as corpus_imp_err:
msg = "Git clone of '{}' failed: '{}'".format(uri, corpus_imp_err)
logger.error(msg)
# if corpus is present, pull latest
else:
try:
repo = Repo(target_dir)
assert not repo.bare # or: assert repo.exists()
git_origin = repo.remotes.origin
msg = "Pulling latest '{}' from '{}'.".format(corpus_name, uri)
logger.info(msg)
git_origin.pull()
except CorpusImportError as corpus_imp_err:
msg = "Git pull of '{}' failed: '{}'".format(uri, corpus_imp_err)
logger.error(msg)
[docs] def import_corpus(
self, corpus_name: str, local_path: str = None, branch: str = "master"
) -> None:
"""Download a remote or load local corpus into dir ``~/cltk_data``.
TODO: maybe add ``from git import RemoteProgress``
TODO: refactor this, it's getting kinda long
:param corpus_name: The name of an available corpus.
:param local_path: A filepath, required when importing local corpora.
:param branch: What Git branch to clone.
"""
matching_corpus_list = [
_dict for _dict in self.all_corpora_for_lang if _dict["name"] == corpus_name
]
if not matching_corpus_list:
raise CorpusImportError(
f"No corpus ``{corpus_name}`` for language ``{self.language}``."
)
if len(matching_corpus_list) > 1:
raise CorpusImportError(
f"Found more than one corpus with the name ``{corpus_name}``."
)
matching_corpus = matching_corpus_list[0]
if matching_corpus.get("user_defined"):
"""{'origin': 'https://github.com/kylepjohnson/latin_corpus_newton_example.git',
'type': 'text',
'name': 'example_distributed_latin_corpus',
'user_defined': True}
"""
self._git_user_defined_corpus(
matching_corpus["name"],
matching_corpus["type"],
matching_corpus["origin"],
)
return
elif matching_corpus.get("location") == "local":
# {'location': 'local', 'name': 'phi5', 'origin': None, 'type': 'text'}
msg = "Importing from local path: '{}'".format(local_path)
logger.info(msg)
if corpus_name not in ["phi5", "phi7", "tlg"]:
raise CorpusImportError(f"Unsupported local corpus ``{corpus_name}``.")
if corpus_name == "phi5":
# normalize path for checking dir
if local_path.endswith("/"):
local_path = local_path[:-1]
# check for right corpus dir
if os.path.split(local_path)[1] != "PHI5":
logger.info("Directory must be named 'PHI5'.")
if corpus_name == "phi7":
# normalize local_path for checking dir
if local_path.endswith("/"):
local_path = local_path[:-1]
# check for right corpus dir
if os.path.split(local_path)[1] != "PHI7":
logger.info("Directory must be named 'PHI7'.")
if corpus_name == "tlg":
# normalize path for checking dir
if local_path.endswith("/"):
local_path = local_path[:-1]
# check for right corpus dir
if os.path.split(local_path)[1] != "TLG_E":
logger.info("Directory must be named 'TLG_E'.")
# move the dir-checking commands into a function
data_dir = os.path.expanduser(CLTK_DATA_DIR)
originals_dir = os.path.join(data_dir, "originals")
# check for `originals` dir; if not present mkdir
if not os.path.isdir(originals_dir):
os.makedirs(originals_dir)
msg = "Wrote directory at '{}'.".format(originals_dir)
logger.info(msg)
tlg_originals_dir = os.path.join(data_dir, "originals", corpus_name)
# check for `originals/<corpus_name>`; if pres, delete
if os.path.isdir(tlg_originals_dir):
shutil.rmtree(tlg_originals_dir)
msg = "Removed directory at '{}'.".format(tlg_originals_dir)
logger.info(msg)
# copy_dir requires that target
if not os.path.isdir(tlg_originals_dir):
self._copy_dir_recursive(local_path, tlg_originals_dir)
else:
"""{'type': 'text',
'name': 'lat_text_perseus',
'origin': 'https://github.com/cltk/lat_text_perseus.git'},
"""
if (
not matching_corpus.get("type")
and not matching_corpus.get("name")
and not matching_corpus.get("origin")
):
raise ValueError(f"Malformed record for ``{corpus_name}``.")
git_uri = matching_corpus["origin"]
type_dir_rel = os.path.join(
CLTK_DATA_DIR, self.language, matching_corpus["type"]
)
type_dir = os.path.expanduser(type_dir_rel)
target_dir = os.path.join(type_dir, corpus_name)
target_file = os.path.join(type_dir, corpus_name, "README.md")
# check if corpus already present
# if not, clone
if not os.path.isfile(target_file):
if not os.path.isdir(type_dir):
os.makedirs(type_dir)
try:
msg = "Cloning '{}' from '{}'".format(corpus_name, git_uri)
logger.info(msg)
Repo.clone_from(
git_uri,
target_dir,
branch=branch,
depth=1,
progress=ProgressPrinter(),
)
except CorpusImportError as corpus_imp_err:
msg = "Git clone of '{}' failed: '{}'".format(
git_uri, corpus_imp_err
)
logger.error(msg)
# if corpus is present, pull latest
else:
try:
repo = Repo(target_dir)
assert not repo.bare # or: assert repo.exists()
git_origin = repo.remotes.origin
msg = "Pulling latest '{}' from '{}'.".format(corpus_name, git_uri)
logger.info(msg)
git_origin.pull()
except CorpusImportError as corpus_imp_err:
msg = "Git pull of '{}' failed: '{}'".format(
git_uri, corpus_imp_err
)
logger.error(msg)