Ollama integration for CLTK.
This module provides a thin client around the local Ollama server for
generating text responses used by CLTK's generative pipelines. It mirrors the
shape of the OpenAI integration so higher layers can switch based on
doc.backend.
Usage requires the optional dependency group cltk[ollama] alongside either
a running local Ollama server (default host http://127.0.0.1:11434) or an
Ollama Cloud API key.
OLLAMA_HOST_ENV
module-attribute
OLLAMA_HOST_ENV = 'OLLAMA_HOST'
OLLAMA_INSTALL_HINT
module-attribute
OLLAMA_INSTALL_HINT = "Ollama client not installed. Install with: pip install 'cltk[ollama]'"
HTTPX_INCOMPAT_HINT
module-attribute
HTTPX_INCOMPAT_HINT = "Ollama client is incompatible with httpx>=0.29. Install a supported version via: pip install 'httpx<0.29'."
SUGGESTED_OLLAMA_MODELS
module-attribute
SUGGESTED_OLLAMA_MODELS: list[str] = [
"qwen2.5:7b",
"qwen2.5:14b",
"qwen2.5:72b",
"llama3.1:8b",
"llama3.1:70b",
"llama3.1:405b",
"gemma2:9b",
"gemma2:27b",
"gpt-oss:20b",
"gpt-oss:120b",
"deepseek-r13:8b",
]
OllamaConnection
OllamaConnection(
model: str,
host: Optional[str] = None,
*,
use_cloud: bool = False,
api_key: Optional[str] = None,
temperature: Optional[float] = None,
top_p: Optional[float] = None,
num_ctx: Optional[int] = None,
num_predict: Optional[int] = None,
options: Optional[dict[str, Any]] = None
)
Thin wrapper around the Ollama client for CLTK use cases (sync).
Parameters:
-
model
(str)
–
Ollama model name (e.g., "llama3.1:8b"). Any string accepted.
-
host
(Optional[str], default:
None
)
–
Optional Ollama host URL. Defaults to $OLLAMA_HOST or
http://127.0.0.1:11434.
-
use_cloud
(bool, default:
False
)
–
When true, use the hosted Ollama Cloud endpoint.
-
api_key
(Optional[str], default:
None
)
–
Optional explicit API key for the hosted endpoint.
Source code in cltk/genai/ollama.py
| def __init__(
self,
model: str,
host: Optional[str] = None,
*,
use_cloud: bool = False,
api_key: Optional[str] = None,
temperature: Optional[float] = None,
top_p: Optional[float] = None,
num_ctx: Optional[int] = None,
num_predict: Optional[int] = None,
options: Optional[dict[str, Any]] = None,
) -> None:
self.model = model
self.use_cloud = use_cloud
self.log = bind_context(model=model)
self.host = host or ("https://ollama.com" if use_cloud else _default_host())
self.api_key = api_key
self.temperature = temperature
self.top_p = top_p
self.num_ctx = num_ctx
self.num_predict = num_predict
self.options: dict[str, Any] = options or {}
headers: Optional[dict[str, str]] = None
if self.use_cloud:
load_env_file()
self.api_key = self.api_key or os.environ.get("OLLAMA_CLOUD_API_KEY")
if not self.api_key:
raise ImportError(
"Ollama Cloud API key not found. Set OLLAMA_CLOUD_API_KEY in your environment."
)
headers = {"Authorization": _bearer(self.api_key)}
self._client: Any
try:
from ollama import Client as _Client
except Exception as e: # pragma: no cover - optional dep
raise ImportError(OLLAMA_INSTALL_HINT) from e
try:
if headers:
self._client = _Client(host=self.host, headers=headers)
else:
self._client = _Client(host=self.host)
except TypeError as e:
if "base_url" in str(e):
httpx_version = "unknown"
try: # pragma: no cover - optional dep
import httpx
httpx_version = getattr(httpx, "__version__", httpx_version)
except Exception:
pass
raise ImportError(
f"{HTTPX_INCOMPAT_HINT} Detected httpx {httpx_version}."
) from e
raise ImportError(OLLAMA_INSTALL_HINT) from e
except Exception as e: # pragma: no cover - optional dep
raise ImportError(OLLAMA_INSTALL_HINT) from e
|
use_cloud
instance-attribute
log
instance-attribute
log = bind_context(model=model)
host
instance-attribute
host = host or (
"https://ollama.com" if use_cloud else _default_host()
)
api_key
instance-attribute
temperature
instance-attribute
temperature = temperature
num_ctx
instance-attribute
num_predict
instance-attribute
num_predict = num_predict
options
instance-attribute
options: dict[str, Any] = options or {}
generate
generate(
prompt: str, *, max_retries: int = 2
) -> CLTKGenAIResponse
Call the Ollama API synchronously with retries and option merging.
Source code in cltk/genai/ollama.py
| def generate(self, prompt: str, *, max_retries: int = 2) -> CLTKGenAIResponse:
"""Call the Ollama API synchronously with retries and option merging."""
# Avoid logging prompt contents unless explicitly enabled
if os.getenv("CLTK_LOG_CONTENT", "").strip().lower() in {
"1",
"true",
"yes",
"on",
}:
self.log.debug("[ollama] Prompt being sent to Ollama:\n%s", prompt)
# Ensure model is present (best-effort)
self._pull_if_needed()
last_err: Optional[Exception] = None
gen_options: dict[str, Any] = dict(self.options) if self.options else {}
if self.temperature is not None:
gen_options.setdefault("temperature", self.temperature)
if self.top_p is not None:
gen_options.setdefault("top_p", self.top_p)
if self.num_ctx is not None:
gen_options.setdefault("num_ctx", self.num_ctx)
if self.num_predict is not None:
gen_options.setdefault("num_predict", self.num_predict)
for attempt in range(1, max_retries + 1):
self.log.debug("[ollama] Attempt %s of %s", attempt, max_retries)
try:
res: dict[str, Any] = self._client.generate(
model=self.model,
prompt=prompt,
options=gen_options or None,
)
text: str = str(res.get("response", ""))
usage = _usage_from_result(res)
if not text.strip():
raise CLTKException("Empty response from Ollama.")
return CLTKGenAIResponse(response=text, usage=usage)
except Exception as e:
last_err = e
self.log.error("[ollama] Error on attempt %s: %s", attempt, e)
assert last_err is not None
raise CLTKException(f"Ollama generation failed after retries: {last_err}")
|
AsyncOllamaConnection
AsyncOllamaConnection(
model: str,
host: Optional[str] = None,
*,
use_cloud: bool = False,
api_key: Optional[str] = None,
temperature: Optional[float] = None,
top_p: Optional[float] = None,
num_ctx: Optional[int] = None,
num_predict: Optional[int] = None,
options: Optional[dict[str, Any]] = None
)
Async wrapper around the Ollama client for CLTK use cases.
Source code in cltk/genai/ollama.py
| def __init__(
self,
model: str,
host: Optional[str] = None,
*,
use_cloud: bool = False,
api_key: Optional[str] = None,
temperature: Optional[float] = None,
top_p: Optional[float] = None,
num_ctx: Optional[int] = None,
num_predict: Optional[int] = None,
options: Optional[dict[str, Any]] = None,
) -> None:
self.model = model
self.use_cloud = use_cloud
self.log = bind_context(model=model)
self.host = host or ("https://ollama.com" if use_cloud else _default_host())
self.api_key = api_key
self.temperature = temperature
self.top_p = top_p
self.num_ctx = num_ctx
self.num_predict = num_predict
self.options: dict[str, Any] = options or {}
headers: Optional[dict[str, str]] = None
if self.use_cloud:
load_env_file()
self.api_key = self.api_key or os.environ.get("OLLAMA_CLOUD_API_KEY")
if not self.api_key:
raise ImportError(
"Ollama Cloud API key not found. Set OLLAMA_CLOUD_API_KEY in your environment."
)
headers = {"Authorization": _bearer(self.api_key)}
self._client: Any
try:
from ollama import AsyncClient as _AsyncClient
except Exception as e: # pragma: no cover - optional dep
raise ImportError(OLLAMA_INSTALL_HINT) from e
try:
if headers:
self._client = _AsyncClient(host=self.host, headers=headers)
else:
self._client = _AsyncClient(host=self.host)
except TypeError as e:
if "base_url" in str(e):
httpx_version = "unknown"
try: # pragma: no cover - optional dep
import httpx
httpx_version = getattr(httpx, "__version__", httpx_version)
except Exception:
pass
raise ImportError(
f"{HTTPX_INCOMPAT_HINT} Detected httpx {httpx_version}."
) from e
raise ImportError(OLLAMA_INSTALL_HINT) from e
except Exception as e: # pragma: no cover - optional dep
raise ImportError(OLLAMA_INSTALL_HINT) from e
|
use_cloud
instance-attribute
log
instance-attribute
log = bind_context(model=model)
host
instance-attribute
host = host or (
"https://ollama.com" if use_cloud else _default_host()
)
api_key
instance-attribute
temperature
instance-attribute
temperature = temperature
num_ctx
instance-attribute
num_predict
instance-attribute
num_predict = num_predict
options
instance-attribute
options: dict[str, Any] = options or {}
generate_async
async
generate_async(
*, prompt: str, max_retries: int = 2
) -> CLTKGenAIResponse
Call the Ollama API asynchronously with retries and option merging.
Source code in cltk/genai/ollama.py
| async def generate_async(
self, *, prompt: str, max_retries: int = 2
) -> CLTKGenAIResponse:
"""Call the Ollama API asynchronously with retries and option merging."""
if os.getenv("CLTK_LOG_CONTENT", "").strip().lower() in {
"1",
"true",
"yes",
"on",
}:
self.log.debug("[async-ollama] Prompt being sent to Ollama:\n%s", prompt)
await self._pull_if_needed()
last_err: Optional[Exception] = None
gen_options: dict[str, Any] = dict(self.options) if self.options else {}
if self.temperature is not None:
gen_options.setdefault("temperature", self.temperature)
if self.top_p is not None:
gen_options.setdefault("top_p", self.top_p)
if self.num_ctx is not None:
gen_options.setdefault("num_ctx", self.num_ctx)
if self.num_predict is not None:
gen_options.setdefault("num_predict", self.num_predict)
for attempt in range(1, max_retries + 1):
self.log.debug("[async-ollama] Attempt %s of %s", attempt, max_retries)
try:
res: dict[str, Any] = await self._client.generate(
model=self.model,
prompt=prompt,
options=gen_options or None,
)
text: str = str(res.get("response", ""))
usage = _usage_from_result(res)
if not text.strip():
raise CLTKException("Empty response from Ollama.")
return CLTKGenAIResponse(response=text, usage=usage)
except Exception as e:
last_err = e
self.log.error("[async-ollama] Error on attempt %s: %s", attempt, e)
assert last_err is not None
raise CLTKException(
f"[async-ollama] Ollama generation failed after retries: {last_err}"
)
|