ollama

Ollama integration for CLTK.

This module provides a thin client around the local Ollama server for generating text responses used by CLTK's generative pipelines. It mirrors the shape of the OpenAI integration so higher layers can switch based on doc.backend.

Usage requires the optional dependency group cltk[ollama] alongside either a running local Ollama server (default host http://127.0.0.1:11434) or an Ollama Cloud API key.

OLLAMA_HOST_ENV `module-attribute`

OLLAMA_HOST_ENV = 'OLLAMA_HOST'

OLLAMA_INSTALL_HINT `module-attribute`

OLLAMA_INSTALL_HINT = "Ollama client not installed. Install with: pip install 'cltk[ollama]'"

HTTPX_INCOMPAT_HINT `module-attribute`

HTTPX_INCOMPAT_HINT = "Ollama client is incompatible with httpx>=0.29. Install a supported version via: pip install 'httpx<0.29'."

SUGGESTED_OLLAMA_MODELS `module-attribute`

SUGGESTED_OLLAMA_MODELS: list[str] = [
    "qwen2.5:7b",
    "qwen2.5:14b",
    "qwen2.5:72b",
    "llama3.1:8b",
    "llama3.1:70b",
    "llama3.1:405b",
    "gemma2:9b",
    "gemma2:27b",
    "gpt-oss:20b",
    "gpt-oss:120b",
    "deepseek-r13:8b",
]

OllamaConnection

OllamaConnection(
    model: str,
    host: Optional[str] = None,
    *,
    use_cloud: bool = False,
    api_key: Optional[str] = None,
    temperature: Optional[float] = None,
    top_p: Optional[float] = None,
    num_ctx: Optional[int] = None,
    num_predict: Optional[int] = None,
    options: Optional[dict[str, Any]] = None
)

Thin wrapper around the Ollama client for CLTK use cases (sync).

Parameters:

model (str) –

Ollama model name (e.g., "llama3.1:8b"). Any string accepted.
host (Optional[str], default: None ) –

Optional Ollama host URL. Defaults to $OLLAMA_HOST or http://127.0.0.1:11434.
use_cloud (bool, default: False ) –

When true, use the hosted Ollama Cloud endpoint.
api_key (Optional[str], default: None ) –

Optional explicit API key for the hosted endpoint.

Source code in cltk/genai/ollama.py

def __init__(
    self,
    model: str,
    host: Optional[str] = None,
    *,
    use_cloud: bool = False,
    api_key: Optional[str] = None,
    temperature: Optional[float] = None,
    top_p: Optional[float] = None,
    num_ctx: Optional[int] = None,
    num_predict: Optional[int] = None,
    options: Optional[dict[str, Any]] = None,
) -> None:
    self.model = model
    self.use_cloud = use_cloud
    self.log = bind_context(model=model)
    self.host = host or ("https://ollama.com" if use_cloud else _default_host())
    self.api_key = api_key
    self.temperature = temperature
    self.top_p = top_p
    self.num_ctx = num_ctx
    self.num_predict = num_predict
    self.options: dict[str, Any] = options or {}
    headers: Optional[dict[str, str]] = None
    if self.use_cloud:
        load_env_file()
        self.api_key = self.api_key or os.environ.get("OLLAMA_CLOUD_API_KEY")
        if not self.api_key:
            raise ImportError(
                "Ollama Cloud API key not found. Set OLLAMA_CLOUD_API_KEY in your environment."
            )
        headers = {"Authorization": _bearer(self.api_key)}
    self._client: Any
    try:
        from ollama import Client as _Client
    except Exception as e:  # pragma: no cover - optional dep
        raise ImportError(OLLAMA_INSTALL_HINT) from e

    try:
        if headers:
            self._client = _Client(host=self.host, headers=headers)
        else:
            self._client = _Client(host=self.host)
    except TypeError as e:
        if "base_url" in str(e):
            httpx_version = "unknown"
            try:  # pragma: no cover - optional dep
                import httpx

                httpx_version = getattr(httpx, "__version__", httpx_version)
            except Exception:
                pass
            raise ImportError(
                f"{HTTPX_INCOMPAT_HINT} Detected httpx {httpx_version}."
            ) from e
        raise ImportError(OLLAMA_INSTALL_HINT) from e
    except Exception as e:  # pragma: no cover - optional dep
        raise ImportError(OLLAMA_INSTALL_HINT) from e

model `instance-attribute`

model = model

use_cloud `instance-attribute`

use_cloud = use_cloud

log `instance-attribute`

log = bind_context(model=model)

host `instance-attribute`

host = host or (
    "https://ollama.com" if use_cloud else _default_host()
)

api_key `instance-attribute`

api_key = api_key

temperature `instance-attribute`

temperature = temperature

top_p `instance-attribute`

top_p = top_p

num_ctx `instance-attribute`

num_ctx = num_ctx

num_predict `instance-attribute`

num_predict = num_predict

options `instance-attribute`

options: dict[str, Any] = options or {}

generate

generate(
    prompt: str, *, max_retries: int = 2
) -> CLTKGenAIResponse

Call the Ollama API synchronously with retries and option merging.

Source code in cltk/genai/ollama.py

def generate(self, prompt: str, *, max_retries: int = 2) -> CLTKGenAIResponse:
    """Call the Ollama API synchronously with retries and option merging."""
    # Avoid logging prompt contents unless explicitly enabled
    if os.getenv("CLTK_LOG_CONTENT", "").strip().lower() in {
        "1",
        "true",
        "yes",
        "on",
    }:
        self.log.debug("[ollama] Prompt being sent to Ollama:\n%s", prompt)
    # Ensure model is present (best-effort)
    self._pull_if_needed()
    last_err: Optional[Exception] = None
    gen_options: dict[str, Any] = dict(self.options) if self.options else {}
    if self.temperature is not None:
        gen_options.setdefault("temperature", self.temperature)
    if self.top_p is not None:
        gen_options.setdefault("top_p", self.top_p)
    if self.num_ctx is not None:
        gen_options.setdefault("num_ctx", self.num_ctx)
    if self.num_predict is not None:
        gen_options.setdefault("num_predict", self.num_predict)
    for attempt in range(1, max_retries + 1):
        self.log.debug("[ollama] Attempt %s of %s", attempt, max_retries)
        try:
            res: dict[str, Any] = self._client.generate(
                model=self.model,
                prompt=prompt,
                options=gen_options or None,
            )
            text: str = str(res.get("response", ""))
            usage = _usage_from_result(res)
            if not text.strip():
                raise CLTKException("Empty response from Ollama.")
            return CLTKGenAIResponse(response=text, usage=usage)
        except Exception as e:
            last_err = e
            self.log.error("[ollama] Error on attempt %s: %s", attempt, e)
    assert last_err is not None
    raise CLTKException(f"Ollama generation failed after retries: {last_err}")

AsyncOllamaConnection

AsyncOllamaConnection(
    model: str,
    host: Optional[str] = None,
    *,
    use_cloud: bool = False,
    api_key: Optional[str] = None,
    temperature: Optional[float] = None,
    top_p: Optional[float] = None,
    num_ctx: Optional[int] = None,
    num_predict: Optional[int] = None,
    options: Optional[dict[str, Any]] = None
)

Async wrapper around the Ollama client for CLTK use cases.

Source code in cltk/genai/ollama.py

def __init__(
    self,
    model: str,
    host: Optional[str] = None,
    *,
    use_cloud: bool = False,
    api_key: Optional[str] = None,
    temperature: Optional[float] = None,
    top_p: Optional[float] = None,
    num_ctx: Optional[int] = None,
    num_predict: Optional[int] = None,
    options: Optional[dict[str, Any]] = None,
) -> None:
    self.model = model
    self.use_cloud = use_cloud
    self.log = bind_context(model=model)
    self.host = host or ("https://ollama.com" if use_cloud else _default_host())
    self.api_key = api_key
    self.temperature = temperature
    self.top_p = top_p
    self.num_ctx = num_ctx
    self.num_predict = num_predict
    self.options: dict[str, Any] = options or {}
    headers: Optional[dict[str, str]] = None
    if self.use_cloud:
        load_env_file()
        self.api_key = self.api_key or os.environ.get("OLLAMA_CLOUD_API_KEY")
        if not self.api_key:
            raise ImportError(
                "Ollama Cloud API key not found. Set OLLAMA_CLOUD_API_KEY in your environment."
            )
        headers = {"Authorization": _bearer(self.api_key)}
    self._client: Any
    try:
        from ollama import AsyncClient as _AsyncClient
    except Exception as e:  # pragma: no cover - optional dep
        raise ImportError(OLLAMA_INSTALL_HINT) from e

    try:
        if headers:
            self._client = _AsyncClient(host=self.host, headers=headers)
        else:
            self._client = _AsyncClient(host=self.host)
    except TypeError as e:
        if "base_url" in str(e):
            httpx_version = "unknown"
            try:  # pragma: no cover - optional dep
                import httpx

                httpx_version = getattr(httpx, "__version__", httpx_version)
            except Exception:
                pass
            raise ImportError(
                f"{HTTPX_INCOMPAT_HINT} Detected httpx {httpx_version}."
            ) from e
        raise ImportError(OLLAMA_INSTALL_HINT) from e
    except Exception as e:  # pragma: no cover - optional dep
        raise ImportError(OLLAMA_INSTALL_HINT) from e

model `instance-attribute`

model = model

use_cloud `instance-attribute`

use_cloud = use_cloud

log `instance-attribute`

log = bind_context(model=model)

host `instance-attribute`

host = host or (
    "https://ollama.com" if use_cloud else _default_host()
)

api_key `instance-attribute`

api_key = api_key

temperature `instance-attribute`

temperature = temperature

top_p `instance-attribute`

top_p = top_p

num_ctx `instance-attribute`

num_ctx = num_ctx

num_predict `instance-attribute`

num_predict = num_predict

options `instance-attribute`

options: dict[str, Any] = options or {}

generate_async `async`

generate_async(
    *, prompt: str, max_retries: int = 2
) -> CLTKGenAIResponse

Call the Ollama API asynchronously with retries and option merging.

Source code in cltk/genai/ollama.py

async def generate_async(
    self, *, prompt: str, max_retries: int = 2
) -> CLTKGenAIResponse:
    """Call the Ollama API asynchronously with retries and option merging."""
    if os.getenv("CLTK_LOG_CONTENT", "").strip().lower() in {
        "1",
        "true",
        "yes",
        "on",
    }:
        self.log.debug("[async-ollama] Prompt being sent to Ollama:\n%s", prompt)
    await self._pull_if_needed()
    last_err: Optional[Exception] = None
    gen_options: dict[str, Any] = dict(self.options) if self.options else {}
    if self.temperature is not None:
        gen_options.setdefault("temperature", self.temperature)
    if self.top_p is not None:
        gen_options.setdefault("top_p", self.top_p)
    if self.num_ctx is not None:
        gen_options.setdefault("num_ctx", self.num_ctx)
    if self.num_predict is not None:
        gen_options.setdefault("num_predict", self.num_predict)
    for attempt in range(1, max_retries + 1):
        self.log.debug("[async-ollama] Attempt %s of %s", attempt, max_retries)
        try:
            res: dict[str, Any] = await self._client.generate(
                model=self.model,
                prompt=prompt,
                options=gen_options or None,
            )
            text: str = str(res.get("response", ""))
            usage = _usage_from_result(res)
            if not text.strip():
                raise CLTKException("Empty response from Ollama.")
            return CLTKGenAIResponse(response=text, usage=usage)
        except Exception as e:
            last_err = e
            self.log.error("[async-ollama] Error on attempt %s: %s", attempt, e)
    assert last_err is not None
    raise CLTKException(
        f"[async-ollama] Ollama generation failed after retries: {last_err}"
    )

ollama

OLLAMA_HOST_ENV module-attribute

OLLAMA_INSTALL_HINT module-attribute

HTTPX_INCOMPAT_HINT module-attribute

SUGGESTED_OLLAMA_MODELS module-attribute

OllamaConnection

model instance-attribute

use_cloud instance-attribute

log instance-attribute

host instance-attribute

api_key instance-attribute

temperature instance-attribute

top_p instance-attribute

num_ctx instance-attribute

num_predict instance-attribute

options instance-attribute

generate

AsyncOllamaConnection

model instance-attribute

use_cloud instance-attribute

log instance-attribute

host instance-attribute

api_key instance-attribute

temperature instance-attribute

top_p instance-attribute

num_ctx instance-attribute

num_predict instance-attribute

options instance-attribute

generate_async async

OLLAMA_HOST_ENV `module-attribute`

OLLAMA_INSTALL_HINT `module-attribute`

HTTPX_INCOMPAT_HINT `module-attribute`

SUGGESTED_OLLAMA_MODELS `module-attribute`

model `instance-attribute`

use_cloud `instance-attribute`

log `instance-attribute`

host `instance-attribute`

api_key `instance-attribute`

temperature `instance-attribute`

top_p `instance-attribute`

num_ctx `instance-attribute`

num_predict `instance-attribute`

options `instance-attribute`

model `instance-attribute`

use_cloud `instance-attribute`

log `instance-attribute`

host `instance-attribute`

api_key `instance-attribute`

temperature `instance-attribute`

top_p `instance-attribute`

num_ctx `instance-attribute`

num_predict `instance-attribute`

options `instance-attribute`

generate_async `async`