Skip to content

ollama

Ollama integration for CLTK.

This module provides a thin client around the local Ollama server for generating text responses used by CLTK's generative pipelines. It mirrors the shape of the OpenAI integration so higher layers can switch based on doc.backend.

Usage requires the optional dependency group cltk[ollama] alongside either a running local Ollama server (default host http://127.0.0.1:11434) or an Ollama Cloud API key.

OLLAMA_HOST_ENV module-attribute

OLLAMA_HOST_ENV = 'OLLAMA_HOST'

OLLAMA_INSTALL_HINT module-attribute

OLLAMA_INSTALL_HINT = "Ollama client not installed. Install with: pip install 'cltk[ollama]'"

HTTPX_INCOMPAT_HINT module-attribute

HTTPX_INCOMPAT_HINT = "Ollama client is incompatible with httpx>=0.29. Install a supported version via: pip install 'httpx<0.29'."

SUGGESTED_OLLAMA_MODELS module-attribute

SUGGESTED_OLLAMA_MODELS: list[str] = [
    "qwen2.5:7b",
    "qwen2.5:14b",
    "qwen2.5:72b",
    "llama3.1:8b",
    "llama3.1:70b",
    "llama3.1:405b",
    "gemma2:9b",
    "gemma2:27b",
    "gpt-oss:20b",
    "gpt-oss:120b",
    "deepseek-r13:8b",
]

OllamaConnection

OllamaConnection(
    model: str,
    host: Optional[str] = None,
    *,
    use_cloud: bool = False,
    api_key: Optional[str] = None,
    temperature: Optional[float] = None,
    top_p: Optional[float] = None,
    num_ctx: Optional[int] = None,
    num_predict: Optional[int] = None,
    options: Optional[dict[str, Any]] = None
)

Thin wrapper around the Ollama client for CLTK use cases (sync).

Parameters:

  • model (str) –

    Ollama model name (e.g., "llama3.1:8b"). Any string accepted.

  • host (Optional[str], default: None ) –

    Optional Ollama host URL. Defaults to $OLLAMA_HOST or http://127.0.0.1:11434.

  • use_cloud (bool, default: False ) –

    When true, use the hosted Ollama Cloud endpoint.

  • api_key (Optional[str], default: None ) –

    Optional explicit API key for the hosted endpoint.

Source code in cltk/genai/ollama.py
def __init__(
    self,
    model: str,
    host: Optional[str] = None,
    *,
    use_cloud: bool = False,
    api_key: Optional[str] = None,
    temperature: Optional[float] = None,
    top_p: Optional[float] = None,
    num_ctx: Optional[int] = None,
    num_predict: Optional[int] = None,
    options: Optional[dict[str, Any]] = None,
) -> None:
    self.model = model
    self.use_cloud = use_cloud
    self.log = bind_context(model=model)
    self.host = host or ("https://ollama.com" if use_cloud else _default_host())
    self.api_key = api_key
    self.temperature = temperature
    self.top_p = top_p
    self.num_ctx = num_ctx
    self.num_predict = num_predict
    self.options: dict[str, Any] = options or {}
    headers: Optional[dict[str, str]] = None
    if self.use_cloud:
        load_env_file()
        self.api_key = self.api_key or os.environ.get("OLLAMA_CLOUD_API_KEY")
        if not self.api_key:
            raise ImportError(
                "Ollama Cloud API key not found. Set OLLAMA_CLOUD_API_KEY in your environment."
            )
        headers = {"Authorization": _bearer(self.api_key)}
    self._client: Any
    try:
        from ollama import Client as _Client
    except Exception as e:  # pragma: no cover - optional dep
        raise ImportError(OLLAMA_INSTALL_HINT) from e

    try:
        if headers:
            self._client = _Client(host=self.host, headers=headers)
        else:
            self._client = _Client(host=self.host)
    except TypeError as e:
        if "base_url" in str(e):
            httpx_version = "unknown"
            try:  # pragma: no cover - optional dep
                import httpx

                httpx_version = getattr(httpx, "__version__", httpx_version)
            except Exception:
                pass
            raise ImportError(
                f"{HTTPX_INCOMPAT_HINT} Detected httpx {httpx_version}."
            ) from e
        raise ImportError(OLLAMA_INSTALL_HINT) from e
    except Exception as e:  # pragma: no cover - optional dep
        raise ImportError(OLLAMA_INSTALL_HINT) from e

model instance-attribute

model = model

use_cloud instance-attribute

use_cloud = use_cloud

log instance-attribute

log = bind_context(model=model)

host instance-attribute

host = host or (
    "https://ollama.com" if use_cloud else _default_host()
)

api_key instance-attribute

api_key = api_key

temperature instance-attribute

temperature = temperature

top_p instance-attribute

top_p = top_p

num_ctx instance-attribute

num_ctx = num_ctx

num_predict instance-attribute

num_predict = num_predict

options instance-attribute

options: dict[str, Any] = options or {}

generate

generate(
    prompt: str, *, max_retries: int = 2
) -> CLTKGenAIResponse

Call the Ollama API synchronously with retries and option merging.

Source code in cltk/genai/ollama.py
def generate(self, prompt: str, *, max_retries: int = 2) -> CLTKGenAIResponse:
    """Call the Ollama API synchronously with retries and option merging."""
    # Avoid logging prompt contents unless explicitly enabled
    if os.getenv("CLTK_LOG_CONTENT", "").strip().lower() in {
        "1",
        "true",
        "yes",
        "on",
    }:
        self.log.debug("[ollama] Prompt being sent to Ollama:\n%s", prompt)
    # Ensure model is present (best-effort)
    self._pull_if_needed()
    last_err: Optional[Exception] = None
    gen_options: dict[str, Any] = dict(self.options) if self.options else {}
    if self.temperature is not None:
        gen_options.setdefault("temperature", self.temperature)
    if self.top_p is not None:
        gen_options.setdefault("top_p", self.top_p)
    if self.num_ctx is not None:
        gen_options.setdefault("num_ctx", self.num_ctx)
    if self.num_predict is not None:
        gen_options.setdefault("num_predict", self.num_predict)
    for attempt in range(1, max_retries + 1):
        self.log.debug("[ollama] Attempt %s of %s", attempt, max_retries)
        try:
            res: dict[str, Any] = self._client.generate(
                model=self.model,
                prompt=prompt,
                options=gen_options or None,
            )
            text: str = str(res.get("response", ""))
            usage = _usage_from_result(res)
            if not text.strip():
                raise CLTKException("Empty response from Ollama.")
            return CLTKGenAIResponse(response=text, usage=usage)
        except Exception as e:
            last_err = e
            self.log.error("[ollama] Error on attempt %s: %s", attempt, e)
    assert last_err is not None
    raise CLTKException(f"Ollama generation failed after retries: {last_err}")

AsyncOllamaConnection

AsyncOllamaConnection(
    model: str,
    host: Optional[str] = None,
    *,
    use_cloud: bool = False,
    api_key: Optional[str] = None,
    temperature: Optional[float] = None,
    top_p: Optional[float] = None,
    num_ctx: Optional[int] = None,
    num_predict: Optional[int] = None,
    options: Optional[dict[str, Any]] = None
)

Async wrapper around the Ollama client for CLTK use cases.

Source code in cltk/genai/ollama.py
def __init__(
    self,
    model: str,
    host: Optional[str] = None,
    *,
    use_cloud: bool = False,
    api_key: Optional[str] = None,
    temperature: Optional[float] = None,
    top_p: Optional[float] = None,
    num_ctx: Optional[int] = None,
    num_predict: Optional[int] = None,
    options: Optional[dict[str, Any]] = None,
) -> None:
    self.model = model
    self.use_cloud = use_cloud
    self.log = bind_context(model=model)
    self.host = host or ("https://ollama.com" if use_cloud else _default_host())
    self.api_key = api_key
    self.temperature = temperature
    self.top_p = top_p
    self.num_ctx = num_ctx
    self.num_predict = num_predict
    self.options: dict[str, Any] = options or {}
    headers: Optional[dict[str, str]] = None
    if self.use_cloud:
        load_env_file()
        self.api_key = self.api_key or os.environ.get("OLLAMA_CLOUD_API_KEY")
        if not self.api_key:
            raise ImportError(
                "Ollama Cloud API key not found. Set OLLAMA_CLOUD_API_KEY in your environment."
            )
        headers = {"Authorization": _bearer(self.api_key)}
    self._client: Any
    try:
        from ollama import AsyncClient as _AsyncClient
    except Exception as e:  # pragma: no cover - optional dep
        raise ImportError(OLLAMA_INSTALL_HINT) from e

    try:
        if headers:
            self._client = _AsyncClient(host=self.host, headers=headers)
        else:
            self._client = _AsyncClient(host=self.host)
    except TypeError as e:
        if "base_url" in str(e):
            httpx_version = "unknown"
            try:  # pragma: no cover - optional dep
                import httpx

                httpx_version = getattr(httpx, "__version__", httpx_version)
            except Exception:
                pass
            raise ImportError(
                f"{HTTPX_INCOMPAT_HINT} Detected httpx {httpx_version}."
            ) from e
        raise ImportError(OLLAMA_INSTALL_HINT) from e
    except Exception as e:  # pragma: no cover - optional dep
        raise ImportError(OLLAMA_INSTALL_HINT) from e

model instance-attribute

model = model

use_cloud instance-attribute

use_cloud = use_cloud

log instance-attribute

log = bind_context(model=model)

host instance-attribute

host = host or (
    "https://ollama.com" if use_cloud else _default_host()
)

api_key instance-attribute

api_key = api_key

temperature instance-attribute

temperature = temperature

top_p instance-attribute

top_p = top_p

num_ctx instance-attribute

num_ctx = num_ctx

num_predict instance-attribute

num_predict = num_predict

options instance-attribute

options: dict[str, Any] = options or {}

generate_async async

generate_async(
    *, prompt: str, max_retries: int = 2
) -> CLTKGenAIResponse

Call the Ollama API asynchronously with retries and option merging.

Source code in cltk/genai/ollama.py
async def generate_async(
    self, *, prompt: str, max_retries: int = 2
) -> CLTKGenAIResponse:
    """Call the Ollama API asynchronously with retries and option merging."""
    if os.getenv("CLTK_LOG_CONTENT", "").strip().lower() in {
        "1",
        "true",
        "yes",
        "on",
    }:
        self.log.debug("[async-ollama] Prompt being sent to Ollama:\n%s", prompt)
    await self._pull_if_needed()
    last_err: Optional[Exception] = None
    gen_options: dict[str, Any] = dict(self.options) if self.options else {}
    if self.temperature is not None:
        gen_options.setdefault("temperature", self.temperature)
    if self.top_p is not None:
        gen_options.setdefault("top_p", self.top_p)
    if self.num_ctx is not None:
        gen_options.setdefault("num_ctx", self.num_ctx)
    if self.num_predict is not None:
        gen_options.setdefault("num_predict", self.num_predict)
    for attempt in range(1, max_retries + 1):
        self.log.debug("[async-ollama] Attempt %s of %s", attempt, max_retries)
        try:
            res: dict[str, Any] = await self._client.generate(
                model=self.model,
                prompt=prompt,
                options=gen_options or None,
            )
            text: str = str(res.get("response", ""))
            usage = _usage_from_result(res)
            if not text.strip():
                raise CLTKException("Empty response from Ollama.")
            return CLTKGenAIResponse(response=text, usage=usage)
        except Exception as e:
            last_err = e
            self.log.error("[async-ollama] Error on attempt %s: %s", attempt, e)
    assert last_err is not None
    raise CLTKException(
        f"[async-ollama] Ollama generation failed after retries: {last_err}"
    )