diff --git a/docs/docs/providers/inference/remote_databricks.mdx b/docs/docs/providers/inference/remote_databricks.mdx index 995eb72c1..670f8a7f9 100644 --- a/docs/docs/providers/inference/remote_databricks.mdx +++ b/docs/docs/providers/inference/remote_databricks.mdx @@ -15,7 +15,7 @@ Databricks inference provider for running models on Databricks' unified analytic | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `url` | `` | No | | The URL for the Databricks model serving endpoint | +| `url` | `str \| None` | No | | The URL for the Databricks model serving endpoint | | `api_token` | `` | No | | The Databricks API token | ## Sample Configuration diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py index 89d7f55e8..f51b65cc2 100644 --- a/llama_stack/providers/registry/inference.py +++ b/llama_stack/providers/registry/inference.py @@ -52,9 +52,7 @@ def available_providers() -> list[ProviderSpec]: api=Api.inference, adapter_type="cerebras", provider_type="remote::cerebras", - pip_packages=[ - "cerebras_cloud_sdk", - ], + pip_packages=[], module="llama_stack.providers.remote.inference.cerebras", config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig", description="Cerebras inference provider for running models on Cerebras Cloud platform.", @@ -179,7 +177,7 @@ def available_providers() -> list[ProviderSpec]: api=Api.inference, adapter_type="anthropic", provider_type="remote::anthropic", - pip_packages=["litellm"], + pip_packages=["litellm", "anthropic"], module="llama_stack.providers.remote.inference.anthropic", config_class="llama_stack.providers.remote.inference.anthropic.AnthropicConfig", provider_data_validator="llama_stack.providers.remote.inference.anthropic.config.AnthropicProviderDataValidator", diff --git a/llama_stack/providers/remote/inference/anthropic/__init__.py b/llama_stack/providers/remote/inference/anthropic/__init__.py index 30d986808..1cac133f5 100644 --- a/llama_stack/providers/remote/inference/anthropic/__init__.py +++ b/llama_stack/providers/remote/inference/anthropic/__init__.py @@ -10,6 +10,6 @@ from .config import AnthropicConfig async def get_adapter_impl(config: AnthropicConfig, _deps): from .anthropic import AnthropicInferenceAdapter - impl = AnthropicInferenceAdapter(config) + impl = AnthropicInferenceAdapter(config=config) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/inference/anthropic/anthropic.py b/llama_stack/providers/remote/inference/anthropic/anthropic.py index cdde4a411..29dd3d3b1 100644 --- a/llama_stack/providers/remote/inference/anthropic/anthropic.py +++ b/llama_stack/providers/remote/inference/anthropic/anthropic.py @@ -4,13 +4,19 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin +from collections.abc import Iterable + +from anthropic import AsyncAnthropic + from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import AnthropicConfig -class AnthropicInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin): +class AnthropicInferenceAdapter(OpenAIMixin): + config: AnthropicConfig + + provider_data_api_key_field: str = "anthropic_api_key" # source: https://docs.claude.com/en/docs/build-with-claude/embeddings # TODO: add support for voyageai, which is where these models are hosted # embedding_model_metadata = { @@ -23,22 +29,11 @@ class AnthropicInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin): # "voyage-multimodal-3": {"embedding_dimension": 1024, "context_length": 32000}, # } - def __init__(self, config: AnthropicConfig) -> None: - LiteLLMOpenAIMixin.__init__( - self, - litellm_provider_name="anthropic", - api_key_from_config=config.api_key, - provider_data_api_key_field="anthropic_api_key", - ) - self.config = config - - async def initialize(self) -> None: - await super().initialize() - - async def shutdown(self) -> None: - await super().shutdown() - - get_api_key = LiteLLMOpenAIMixin.get_api_key + def get_api_key(self) -> str: + return self.config.api_key or "" def get_base_url(self): return "https://api.anthropic.com/v1" + + async def get_models(self) -> Iterable[str] | None: + return [m.id async for m in AsyncAnthropic(api_key=self.get_api_key()).models.list()] diff --git a/llama_stack/providers/remote/inference/azure/__init__.py b/llama_stack/providers/remote/inference/azure/__init__.py index 87bcaf309..4eca2c610 100644 --- a/llama_stack/providers/remote/inference/azure/__init__.py +++ b/llama_stack/providers/remote/inference/azure/__init__.py @@ -10,6 +10,6 @@ from .config import AzureConfig async def get_adapter_impl(config: AzureConfig, _deps): from .azure import AzureInferenceAdapter - impl = AzureInferenceAdapter(config) + impl = AzureInferenceAdapter(config=config) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/inference/azure/azure.py b/llama_stack/providers/remote/inference/azure/azure.py index a2c69b69c..0c8f6e7ad 100644 --- a/llama_stack/providers/remote/inference/azure/azure.py +++ b/llama_stack/providers/remote/inference/azure/azure.py @@ -4,31 +4,20 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Any from urllib.parse import urljoin -from llama_stack.apis.inference import ChatCompletionRequest -from llama_stack.providers.utils.inference.litellm_openai_mixin import ( - LiteLLMOpenAIMixin, -) from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import AzureConfig -class AzureInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin): - def __init__(self, config: AzureConfig) -> None: - LiteLLMOpenAIMixin.__init__( - self, - litellm_provider_name="azure", - api_key_from_config=config.api_key.get_secret_value(), - provider_data_api_key_field="azure_api_key", - openai_compat_api_base=str(config.api_base), - ) - self.config = config +class AzureInferenceAdapter(OpenAIMixin): + config: AzureConfig - # Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin - get_api_key = LiteLLMOpenAIMixin.get_api_key + provider_data_api_key_field: str = "azure_api_key" + + def get_api_key(self) -> str: + return self.config.api_key.get_secret_value() def get_base_url(self) -> str: """ @@ -37,26 +26,3 @@ class AzureInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin): Returns the Azure API base URL from the configuration. """ return urljoin(str(self.config.api_base), "/openai/v1") - - async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]: - # Get base parameters from parent - params = await super()._get_params(request) - - # Add Azure specific parameters - provider_data = self.get_request_provider_data() - if provider_data: - if getattr(provider_data, "azure_api_key", None): - params["api_key"] = provider_data.azure_api_key - if getattr(provider_data, "azure_api_base", None): - params["api_base"] = provider_data.azure_api_base - if getattr(provider_data, "azure_api_version", None): - params["api_version"] = provider_data.azure_api_version - if getattr(provider_data, "azure_api_type", None): - params["api_type"] = provider_data.azure_api_type - else: - params["api_key"] = self.config.api_key.get_secret_value() - params["api_base"] = str(self.config.api_base) - params["api_version"] = self.config.api_version - params["api_type"] = self.config.api_type - - return params diff --git a/llama_stack/providers/remote/inference/cerebras/__init__.py b/llama_stack/providers/remote/inference/cerebras/__init__.py index 51f446110..e9e989798 100644 --- a/llama_stack/providers/remote/inference/cerebras/__init__.py +++ b/llama_stack/providers/remote/inference/cerebras/__init__.py @@ -12,7 +12,7 @@ async def get_adapter_impl(config: CerebrasImplConfig, _deps): assert isinstance(config, CerebrasImplConfig), f"Unexpected config type: {type(config)}" - impl = CerebrasInferenceAdapter(config) + impl = CerebrasInferenceAdapter(config=config) await impl.initialize() diff --git a/llama_stack/providers/remote/inference/cerebras/cerebras.py b/llama_stack/providers/remote/inference/cerebras/cerebras.py index e3ce9bfab..11ef218a1 100644 --- a/llama_stack/providers/remote/inference/cerebras/cerebras.py +++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py @@ -6,39 +6,14 @@ from urllib.parse import urljoin -from cerebras.cloud.sdk import AsyncCerebras - -from llama_stack.apis.inference import ( - ChatCompletionRequest, - CompletionRequest, - Inference, - OpenAIEmbeddingsResponse, - TopKSamplingStrategy, -) -from llama_stack.providers.utils.inference.openai_compat import ( - get_sampling_options, -) +from llama_stack.apis.inference import OpenAIEmbeddingsResponse from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin -from llama_stack.providers.utils.inference.prompt_adapter import ( - chat_completion_request_to_prompt, - completion_request_to_prompt, -) from .config import CerebrasImplConfig -class CerebrasInferenceAdapter( - OpenAIMixin, - Inference, -): - def __init__(self, config: CerebrasImplConfig) -> None: - self.config = config - - # TODO: make this use provider data, etc. like other providers - self._cerebras_client = AsyncCerebras( - base_url=self.config.base_url, - api_key=self.config.api_key.get_secret_value(), - ) +class CerebrasInferenceAdapter(OpenAIMixin): + config: CerebrasImplConfig def get_api_key(self) -> str: return self.config.api_key.get_secret_value() @@ -46,31 +21,6 @@ class CerebrasInferenceAdapter( def get_base_url(self) -> str: return urljoin(self.config.base_url, "v1") - async def initialize(self) -> None: - return - - async def shutdown(self) -> None: - pass - - async def _get_params(self, request: ChatCompletionRequest | CompletionRequest) -> dict: - if request.sampling_params and isinstance(request.sampling_params.strategy, TopKSamplingStrategy): - raise ValueError("`top_k` not supported by Cerebras") - - prompt = "" - if isinstance(request, ChatCompletionRequest): - prompt = await chat_completion_request_to_prompt(request, self.get_llama_model(request.model)) - elif isinstance(request, CompletionRequest): - prompt = await completion_request_to_prompt(request) - else: - raise ValueError(f"Unknown request type {type(request)}") - - return { - "model": request.model, - "prompt": prompt, - "stream": request.stream, - **get_sampling_options(request.sampling_params), - } - async def openai_embeddings( self, model: str, diff --git a/llama_stack/providers/remote/inference/cerebras/config.py b/llama_stack/providers/remote/inference/cerebras/config.py index 9e7aeb411..40db38935 100644 --- a/llama_stack/providers/remote/inference/cerebras/config.py +++ b/llama_stack/providers/remote/inference/cerebras/config.py @@ -22,7 +22,7 @@ class CerebrasImplConfig(RemoteInferenceProviderConfig): description="Base URL for the Cerebras API", ) api_key: SecretStr = Field( - default=SecretStr(os.environ.get("CEREBRAS_API_KEY")), + default=SecretStr(os.environ.get("CEREBRAS_API_KEY")), # type: ignore[arg-type] description="Cerebras API Key", ) diff --git a/llama_stack/providers/remote/inference/databricks/__init__.py b/llama_stack/providers/remote/inference/databricks/__init__.py index 24f658a2b..9ee595de8 100644 --- a/llama_stack/providers/remote/inference/databricks/__init__.py +++ b/llama_stack/providers/remote/inference/databricks/__init__.py @@ -11,6 +11,6 @@ async def get_adapter_impl(config: DatabricksImplConfig, _deps): from .databricks import DatabricksInferenceAdapter assert isinstance(config, DatabricksImplConfig), f"Unexpected config type: {type(config)}" - impl = DatabricksInferenceAdapter(config) + impl = DatabricksInferenceAdapter(config=config) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/inference/databricks/config.py b/llama_stack/providers/remote/inference/databricks/config.py index b5406a1c5..68e94151e 100644 --- a/llama_stack/providers/remote/inference/databricks/config.py +++ b/llama_stack/providers/remote/inference/databricks/config.py @@ -14,12 +14,12 @@ from llama_stack.schema_utils import json_schema_type @json_schema_type class DatabricksImplConfig(RemoteInferenceProviderConfig): - url: str = Field( + url: str | None = Field( default=None, description="The URL for the Databricks model serving endpoint", ) api_token: SecretStr = Field( - default=SecretStr(None), + default=SecretStr(None), # type: ignore[arg-type] description="The Databricks API token", ) diff --git a/llama_stack/providers/remote/inference/databricks/databricks.py b/llama_stack/providers/remote/inference/databricks/databricks.py index a2621b81e..70d6bb278 100644 --- a/llama_stack/providers/remote/inference/databricks/databricks.py +++ b/llama_stack/providers/remote/inference/databricks/databricks.py @@ -9,11 +9,8 @@ from typing import Any from databricks.sdk import WorkspaceClient from llama_stack.apis.inference import ( - Inference, - Model, OpenAICompletion, ) -from llama_stack.apis.models import ModelType from llama_stack.log import get_logger from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin @@ -22,30 +19,31 @@ from .config import DatabricksImplConfig logger = get_logger(name=__name__, category="inference::databricks") -class DatabricksInferenceAdapter( - OpenAIMixin, - Inference, -): +class DatabricksInferenceAdapter(OpenAIMixin): + config: DatabricksImplConfig + # source: https://docs.databricks.com/aws/en/machine-learning/foundation-model-apis/supported-models - embedding_model_metadata = { + embedding_model_metadata: dict[str, dict[str, int]] = { "databricks-gte-large-en": {"embedding_dimension": 1024, "context_length": 8192}, "databricks-bge-large-en": {"embedding_dimension": 1024, "context_length": 512}, } - def __init__(self, config: DatabricksImplConfig) -> None: - self.config = config - def get_api_key(self) -> str: return self.config.api_token.get_secret_value() def get_base_url(self) -> str: return f"{self.config.url}/serving-endpoints" - async def initialize(self) -> None: - return + async def get_models(self) -> list[str] | None: + return [ + endpoint.name + for endpoint in WorkspaceClient( + host=self.config.url, token=self.get_api_key() + ).serving_endpoints.list() # TODO: this is not async + ] - async def shutdown(self) -> None: - pass + async def should_refresh_models(self) -> bool: + return False async def openai_completion( self, @@ -71,32 +69,3 @@ class DatabricksInferenceAdapter( suffix: str | None = None, ) -> OpenAICompletion: raise NotImplementedError() - - async def list_models(self) -> list[Model] | None: - self._model_cache = {} # from OpenAIMixin - ws_client = WorkspaceClient(host=self.config.url, token=self.get_api_key()) # TODO: this is not async - endpoints = ws_client.serving_endpoints.list() - for endpoint in endpoints: - model = Model( - provider_id=self.__provider_id__, - provider_resource_id=endpoint.name, - identifier=endpoint.name, - ) - if endpoint.task == "llm/v1/chat": - model.model_type = ModelType.llm # this is redundant, but informative - elif endpoint.task == "llm/v1/embeddings": - if endpoint.name not in self.embedding_model_metadata: - logger.warning(f"No metadata information available for embedding model {endpoint.name}, skipping.") - continue - model.model_type = ModelType.embedding - model.metadata = self.embedding_model_metadata[endpoint.name] - else: - logger.warning(f"Unknown model type, skipping: {endpoint}") - continue - - self._model_cache[endpoint.name] = model - - return list(self._model_cache.values()) - - async def should_refresh_models(self) -> bool: - return False diff --git a/llama_stack/providers/remote/inference/fireworks/__init__.py b/llama_stack/providers/remote/inference/fireworks/__init__.py index f53242334..9285342d0 100644 --- a/llama_stack/providers/remote/inference/fireworks/__init__.py +++ b/llama_stack/providers/remote/inference/fireworks/__init__.py @@ -17,6 +17,6 @@ async def get_adapter_impl(config: FireworksImplConfig, _deps): from .fireworks import FireworksInferenceAdapter assert isinstance(config, FireworksImplConfig), f"Unexpected config type: {type(config)}" - impl = FireworksInferenceAdapter(config) + impl = FireworksInferenceAdapter(config=config) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py index 56c12fd49..81dbff0a3 100644 --- a/llama_stack/providers/remote/inference/fireworks/fireworks.py +++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py @@ -5,124 +5,26 @@ # the root directory of this source tree. -from fireworks.client import Fireworks - -from llama_stack.apis.inference import ( - ChatCompletionRequest, - Inference, - LogProbConfig, - ResponseFormat, - ResponseFormatType, - SamplingParams, -) -from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger -from llama_stack.providers.utils.inference.model_registry import ( - ModelRegistryHelper, -) -from llama_stack.providers.utils.inference.openai_compat import ( - convert_message_to_openai_dict, - get_sampling_options, -) from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin -from llama_stack.providers.utils.inference.prompt_adapter import ( - chat_completion_request_to_prompt, - request_has_media, -) from .config import FireworksImplConfig logger = get_logger(name=__name__, category="inference::fireworks") -class FireworksInferenceAdapter(OpenAIMixin, Inference, NeedsRequestProviderData): - embedding_model_metadata = { +class FireworksInferenceAdapter(OpenAIMixin): + config: FireworksImplConfig + + embedding_model_metadata: dict[str, dict[str, int]] = { "nomic-ai/nomic-embed-text-v1.5": {"embedding_dimension": 768, "context_length": 8192}, "accounts/fireworks/models/qwen3-embedding-8b": {"embedding_dimension": 4096, "context_length": 40960}, } - def __init__(self, config: FireworksImplConfig) -> None: - ModelRegistryHelper.__init__(self) - self.config = config - self.allowed_models = config.allowed_models - - async def initialize(self) -> None: - pass - - async def shutdown(self) -> None: - pass + provider_data_api_key_field: str = "fireworks_api_key" def get_api_key(self) -> str: - config_api_key = self.config.api_key.get_secret_value() if self.config.api_key else None - if config_api_key: - return config_api_key - else: - provider_data = self.get_request_provider_data() - if provider_data is None or not provider_data.fireworks_api_key: - raise ValueError( - 'Pass Fireworks API Key in the header X-LlamaStack-Provider-Data as { "fireworks_api_key": }' - ) - return provider_data.fireworks_api_key + return self.config.api_key.get_secret_value() if self.config.api_key else None # type: ignore[return-value] def get_base_url(self) -> str: return "https://api.fireworks.ai/inference/v1" - - def _get_client(self) -> Fireworks: - fireworks_api_key = self.get_api_key() - return Fireworks(api_key=fireworks_api_key) - - def _build_options( - self, - sampling_params: SamplingParams | None, - fmt: ResponseFormat | None, - logprobs: LogProbConfig | None, - ) -> dict: - options = get_sampling_options(sampling_params) - options.setdefault("max_tokens", 512) - - if fmt: - if fmt.type == ResponseFormatType.json_schema.value: - options["response_format"] = { - "type": "json_object", - "schema": fmt.json_schema, - } - elif fmt.type == ResponseFormatType.grammar.value: - options["response_format"] = { - "type": "grammar", - "grammar": fmt.bnf, - } - else: - raise ValueError(f"Unknown response format {fmt.type}") - - if logprobs and logprobs.top_k: - options["logprobs"] = logprobs.top_k - if options["logprobs"] <= 0 or options["logprobs"] >= 5: - raise ValueError("Required range: 0 < top_k < 5") - - return options - - async def _get_params(self, request: ChatCompletionRequest) -> dict: - input_dict = {} - media_present = request_has_media(request) - - llama_model = self.get_llama_model(request.model) - # TODO: tools are never added to the request, so we need to add them here - if media_present or not llama_model: - input_dict["messages"] = [await convert_message_to_openai_dict(m, download=True) for m in request.messages] - else: - input_dict["prompt"] = await chat_completion_request_to_prompt(request, llama_model) - - # Fireworks always prepends with BOS - if "prompt" in input_dict: - if input_dict["prompt"].startswith("<|begin_of_text|>"): - input_dict["prompt"] = input_dict["prompt"][len("<|begin_of_text|>") :] - - params = { - "model": request.model, - **input_dict, - "stream": bool(request.stream), - **self._build_options(request.sampling_params, request.response_format, request.logprobs), - } - logger.debug(f"params to fireworks: {params}") - - return params diff --git a/llama_stack/providers/remote/inference/gemini/__init__.py b/llama_stack/providers/remote/inference/gemini/__init__.py index bda2f52d4..5e2ed2d1a 100644 --- a/llama_stack/providers/remote/inference/gemini/__init__.py +++ b/llama_stack/providers/remote/inference/gemini/__init__.py @@ -10,6 +10,6 @@ from .config import GeminiConfig async def get_adapter_impl(config: GeminiConfig, _deps): from .gemini import GeminiInferenceAdapter - impl = GeminiInferenceAdapter(config) + impl = GeminiInferenceAdapter(config=config) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/inference/gemini/gemini.py b/llama_stack/providers/remote/inference/gemini/gemini.py index 30ceedff0..ea7219a59 100644 --- a/llama_stack/providers/remote/inference/gemini/gemini.py +++ b/llama_stack/providers/remote/inference/gemini/gemini.py @@ -4,33 +4,21 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import GeminiConfig -class GeminiInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin): - embedding_model_metadata = { +class GeminiInferenceAdapter(OpenAIMixin): + config: GeminiConfig + + provider_data_api_key_field: str = "gemini_api_key" + embedding_model_metadata: dict[str, dict[str, int]] = { "text-embedding-004": {"embedding_dimension": 768, "context_length": 2048}, } - def __init__(self, config: GeminiConfig) -> None: - LiteLLMOpenAIMixin.__init__( - self, - litellm_provider_name="gemini", - api_key_from_config=config.api_key, - provider_data_api_key_field="gemini_api_key", - ) - self.config = config - - get_api_key = LiteLLMOpenAIMixin.get_api_key + def get_api_key(self) -> str: + return self.config.api_key or "" def get_base_url(self): return "https://generativelanguage.googleapis.com/v1beta/openai/" - - async def initialize(self) -> None: - await super().initialize() - - async def shutdown(self) -> None: - await super().shutdown() diff --git a/llama_stack/providers/remote/inference/groq/__init__.py b/llama_stack/providers/remote/inference/groq/__init__.py index cca333ccf..b22bd6385 100644 --- a/llama_stack/providers/remote/inference/groq/__init__.py +++ b/llama_stack/providers/remote/inference/groq/__init__.py @@ -11,5 +11,5 @@ async def get_adapter_impl(config: GroqConfig, _deps): # import dynamically so the import is used only when it is needed from .groq import GroqInferenceAdapter - adapter = GroqInferenceAdapter(config) + adapter = GroqInferenceAdapter(config=config) return adapter diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/llama_stack/providers/remote/inference/groq/groq.py index e449f2005..21b37de36 100644 --- a/llama_stack/providers/remote/inference/groq/groq.py +++ b/llama_stack/providers/remote/inference/groq/groq.py @@ -6,30 +6,16 @@ from llama_stack.providers.remote.inference.groq.config import GroqConfig -from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin -class GroqInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin): - _config: GroqConfig +class GroqInferenceAdapter(OpenAIMixin): + config: GroqConfig - def __init__(self, config: GroqConfig): - LiteLLMOpenAIMixin.__init__( - self, - litellm_provider_name="groq", - api_key_from_config=config.api_key, - provider_data_api_key_field="groq_api_key", - ) - self.config = config + provider_data_api_key_field: str = "groq_api_key" - # Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin - get_api_key = LiteLLMOpenAIMixin.get_api_key + def get_api_key(self) -> str: + return self.config.api_key or "" def get_base_url(self) -> str: return f"{self.config.url}/openai/v1" - - async def initialize(self): - await super().initialize() - - async def shutdown(self): - await super().shutdown() diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/__init__.py b/llama_stack/providers/remote/inference/llama_openai_compat/__init__.py index be48d1067..8859903e3 100644 --- a/llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +++ b/llama_stack/providers/remote/inference/llama_openai_compat/__init__.py @@ -4,14 +4,12 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.inference import InferenceProvider - from .config import LlamaCompatConfig -async def get_adapter_impl(config: LlamaCompatConfig, _deps) -> InferenceProvider: +async def get_adapter_impl(config: LlamaCompatConfig, _deps): # import dynamically so the import is used only when it is needed from .llama import LlamaCompatInferenceAdapter - adapter = LlamaCompatInferenceAdapter(config) + adapter = LlamaCompatInferenceAdapter(config=config) return adapter diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py index 74507cb7a..403680668 100644 --- a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py +++ b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py @@ -8,38 +8,21 @@ from typing import Any from llama_stack.apis.inference.inference import OpenAICompletion from llama_stack.log import get_logger from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig -from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin logger = get_logger(name=__name__, category="inference::llama_openai_compat") -class LlamaCompatInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin): +class LlamaCompatInferenceAdapter(OpenAIMixin): + config: LlamaCompatConfig + + provider_data_api_key_field: str = "llama_api_key" """ Llama API Inference Adapter for Llama Stack. - - Note: The inheritance order is important here. OpenAIMixin must come before - LiteLLMOpenAIMixin to ensure that OpenAIMixin.check_model_availability() - is used instead of ModelRegistryHelper.check_model_availability(). - - - OpenAIMixin.check_model_availability() queries the Llama API to check if a model exists - - ModelRegistryHelper.check_model_availability() (inherited by LiteLLMOpenAIMixin) just returns False and shows a warning """ - _config: LlamaCompatConfig - - def __init__(self, config: LlamaCompatConfig): - LiteLLMOpenAIMixin.__init__( - self, - litellm_provider_name="meta_llama", - api_key_from_config=config.api_key, - provider_data_api_key_field="llama_api_key", - openai_compat_api_base=config.openai_compat_api_base, - ) - self.config = config - - # Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin - get_api_key = LiteLLMOpenAIMixin.get_api_key + def get_api_key(self) -> str: + return self.config.api_key or "" def get_base_url(self) -> str: """ @@ -49,12 +32,6 @@ class LlamaCompatInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin): """ return self.config.openai_compat_api_base - async def initialize(self): - await super().initialize() - - async def shutdown(self): - await super().shutdown() - async def openai_completion( self, model: str, diff --git a/llama_stack/providers/remote/inference/nvidia/__init__.py b/llama_stack/providers/remote/inference/nvidia/__init__.py index 9c537d448..1869cb748 100644 --- a/llama_stack/providers/remote/inference/nvidia/__init__.py +++ b/llama_stack/providers/remote/inference/nvidia/__init__.py @@ -15,7 +15,8 @@ async def get_adapter_impl(config: NVIDIAConfig, _deps) -> Inference: if not isinstance(config, NVIDIAConfig): raise RuntimeError(f"Unexpected config type: {type(config)}") - adapter = NVIDIAInferenceAdapter(config) + adapter = NVIDIAInferenceAdapter(config=config) + await adapter.initialize() return adapter diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index 2e6c3d769..7a2697327 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -8,7 +8,6 @@ from openai import NOT_GIVEN from llama_stack.apis.inference import ( - Inference, OpenAIEmbeddingData, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, @@ -22,7 +21,9 @@ from .utils import _is_nvidia_hosted logger = get_logger(name=__name__, category="inference::nvidia") -class NVIDIAInferenceAdapter(OpenAIMixin, Inference): +class NVIDIAInferenceAdapter(OpenAIMixin): + config: NVIDIAConfig + """ NVIDIA Inference Adapter for Llama Stack. @@ -37,32 +38,21 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference): """ # source: https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html - embedding_model_metadata = { + embedding_model_metadata: dict[str, dict[str, int]] = { "nvidia/llama-3.2-nv-embedqa-1b-v2": {"embedding_dimension": 2048, "context_length": 8192}, "nvidia/nv-embedqa-e5-v5": {"embedding_dimension": 512, "context_length": 1024}, "nvidia/nv-embedqa-mistral-7b-v2": {"embedding_dimension": 512, "context_length": 4096}, "snowflake/arctic-embed-l": {"embedding_dimension": 512, "context_length": 1024}, } - def __init__(self, config: NVIDIAConfig) -> None: - logger.info(f"Initializing NVIDIAInferenceAdapter({config.url})...") + async def initialize(self) -> None: + logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.url})...") - if _is_nvidia_hosted(config): - if not config.api_key: + if _is_nvidia_hosted(self.config): + if not self.config.api_key: raise RuntimeError( "API key is required for hosted NVIDIA NIM. Either provide an API key or use a self-hosted NIM." ) - # elif self._config.api_key: - # - # we don't raise this warning because a user may have deployed their - # self-hosted NIM with an API key requirement. - # - # warnings.warn( - # "API key is not required for self-hosted NVIDIA NIM. " - # "Consider removing the api_key from the configuration." - # ) - - self._config = config def get_api_key(self) -> str: """ @@ -70,7 +60,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference): :return: The NVIDIA API key """ - return self._config.api_key.get_secret_value() if self._config.api_key else "NO KEY" + return self.config.api_key.get_secret_value() if self.config.api_key else "NO KEY" def get_base_url(self) -> str: """ @@ -78,7 +68,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference): :return: The NVIDIA API base URL """ - return f"{self._config.url}/v1" if self._config.append_api_version else self._config.url + return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url async def openai_embeddings( self, diff --git a/llama_stack/providers/remote/inference/ollama/__init__.py b/llama_stack/providers/remote/inference/ollama/__init__.py index 491339451..3de84a2c7 100644 --- a/llama_stack/providers/remote/inference/ollama/__init__.py +++ b/llama_stack/providers/remote/inference/ollama/__init__.py @@ -10,6 +10,6 @@ from .config import OllamaImplConfig async def get_adapter_impl(config: OllamaImplConfig, _deps): from .ollama import OllamaInferenceAdapter - impl = OllamaInferenceAdapter(config) + impl = OllamaInferenceAdapter(config=config) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index de55c1b58..e5b08997c 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -6,58 +6,29 @@ import asyncio -from typing import Any from ollama import AsyncClient as AsyncOllamaClient -from llama_stack.apis.common.content_types import ( - ImageContentItem, - TextContentItem, -) from llama_stack.apis.common.errors import UnsupportedModelError -from llama_stack.apis.inference import ( - ChatCompletionRequest, - GrammarResponseFormat, - InferenceProvider, - JsonSchemaResponseFormat, - Message, -) from llama_stack.apis.models import Model from llama_stack.log import get_logger -from llama_stack.models.llama.sku_types import CoreModelId from llama_stack.providers.datatypes import ( HealthResponse, HealthStatus, - ModelsProtocolPrivate, ) from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig -from llama_stack.providers.utils.inference.model_registry import ( - ModelRegistryHelper, - build_hf_repo_model_entry, -) -from llama_stack.providers.utils.inference.openai_compat import ( - get_sampling_options, -) from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin -from llama_stack.providers.utils.inference.prompt_adapter import ( - chat_completion_request_to_prompt, - convert_image_content_to_url, - request_has_media, -) logger = get_logger(name=__name__, category="inference::ollama") -class OllamaInferenceAdapter( - OpenAIMixin, - ModelRegistryHelper, - InferenceProvider, - ModelsProtocolPrivate, -): +class OllamaInferenceAdapter(OpenAIMixin): + config: OllamaImplConfig + # automatically set by the resolver when instantiating the provider __provider_id__: str - embedding_model_metadata = { + embedding_model_metadata: dict[str, dict[str, int]] = { "all-minilm:l6-v2": { "embedding_dimension": 384, "context_length": 512, @@ -76,29 +47,8 @@ class OllamaInferenceAdapter( }, } - def __init__(self, config: OllamaImplConfig) -> None: - # TODO: remove ModelRegistryHelper.__init__ when completion and - # chat_completion are. this exists to satisfy the input / - # output processing for llama models. specifically, - # tool_calling is handled by raw template processing, - # instead of using the /api/chat endpoint w/ tools=... - ModelRegistryHelper.__init__( - self, - model_entries=[ - build_hf_repo_model_entry( - "llama3.2:3b-instruct-fp16", - CoreModelId.llama3_2_3b_instruct.value, - ), - build_hf_repo_model_entry( - "llama-guard3:1b", - CoreModelId.llama_guard_3_1b.value, - ), - ], - ) - self.config = config - # Ollama does not support image urls, so we need to download the image and convert it to base64 - self.download_images = True - self._clients: dict[asyncio.AbstractEventLoop, AsyncOllamaClient] = {} + download_images: bool = True + _clients: dict[asyncio.AbstractEventLoop, AsyncOllamaClient] = {} @property def ollama_client(self) -> AsyncOllamaClient: @@ -142,50 +92,6 @@ class OllamaInferenceAdapter( async def shutdown(self) -> None: self._clients.clear() - async def _get_model(self, model_id: str) -> Model: - if not self.model_store: - raise ValueError("Model store not set") - return await self.model_store.get_model(model_id) - - async def _get_params(self, request: ChatCompletionRequest) -> dict: - sampling_options = get_sampling_options(request.sampling_params) - # This is needed since the Ollama API expects num_predict to be set - # for early truncation instead of max_tokens. - if sampling_options.get("max_tokens") is not None: - sampling_options["num_predict"] = sampling_options["max_tokens"] - - input_dict: dict[str, Any] = {} - media_present = request_has_media(request) - llama_model = self.get_llama_model(request.model) - if media_present or not llama_model: - contents = [await convert_message_to_openai_dict_for_ollama(m) for m in request.messages] - # flatten the list of lists - input_dict["messages"] = [item for sublist in contents for item in sublist] - else: - input_dict["raw"] = True - input_dict["prompt"] = await chat_completion_request_to_prompt( - request, - llama_model, - ) - - if fmt := request.response_format: - if isinstance(fmt, JsonSchemaResponseFormat): - input_dict["format"] = fmt.json_schema - elif isinstance(fmt, GrammarResponseFormat): - raise NotImplementedError("Grammar response format is not supported") - else: - raise ValueError(f"Unknown response format type: {fmt.type}") - - params = { - "model": request.model, - **input_dict, - "options": sampling_options, - "stream": request.stream, - } - logger.debug(f"params to ollama: {params}") - - return params - async def register_model(self, model: Model) -> Model: if await self.check_model_availability(model.provider_model_id): return model @@ -197,24 +103,3 @@ class OllamaInferenceAdapter( return model raise UnsupportedModelError(model.provider_model_id, list(self._model_cache.keys())) - - -async def convert_message_to_openai_dict_for_ollama(message: Message) -> list[dict]: - async def _convert_content(content) -> dict: - if isinstance(content, ImageContentItem): - return { - "role": message.role, - "images": [await convert_image_content_to_url(content, download=True, include_format=False)], - } - else: - text = content.text if isinstance(content, TextContentItem) else content - assert isinstance(text, str) - return { - "role": message.role, - "content": text, - } - - if isinstance(message.content, list): - return [await _convert_content(c) for c in message.content] - else: - return [await _convert_content(message.content)] diff --git a/llama_stack/providers/remote/inference/openai/__init__.py b/llama_stack/providers/remote/inference/openai/__init__.py index bd3daeb9a..52cd1f8c3 100644 --- a/llama_stack/providers/remote/inference/openai/__init__.py +++ b/llama_stack/providers/remote/inference/openai/__init__.py @@ -10,6 +10,6 @@ from .config import OpenAIConfig async def get_adapter_impl(config: OpenAIConfig, _deps): from .openai import OpenAIInferenceAdapter - impl = OpenAIInferenceAdapter(config) + impl = OpenAIInferenceAdapter(config=config) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/inference/openai/openai.py b/llama_stack/providers/remote/inference/openai/openai.py index 9b341ede2..f68e8f9d6 100644 --- a/llama_stack/providers/remote/inference/openai/openai.py +++ b/llama_stack/providers/remote/inference/openai/openai.py @@ -5,7 +5,6 @@ # the root directory of this source tree. from llama_stack.log import get_logger -from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import OpenAIConfig @@ -14,52 +13,24 @@ logger = get_logger(name=__name__, category="inference::openai") # -# This OpenAI adapter implements Inference methods using two mixins - +# This OpenAI adapter implements Inference methods using OpenAIMixin # -# | Inference Method | Implementation Source | -# |----------------------------|--------------------------| -# | completion | LiteLLMOpenAIMixin | -# | chat_completion | LiteLLMOpenAIMixin | -# | embedding | LiteLLMOpenAIMixin | -# | openai_completion | OpenAIMixin | -# | openai_chat_completion | OpenAIMixin | -# | openai_embeddings | OpenAIMixin | -# -class OpenAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin): +class OpenAIInferenceAdapter(OpenAIMixin): """ OpenAI Inference Adapter for Llama Stack. - - Note: The inheritance order is important here. OpenAIMixin must come before - LiteLLMOpenAIMixin to ensure that OpenAIMixin.check_model_availability() - is used instead of ModelRegistryHelper.check_model_availability(). - - - OpenAIMixin.check_model_availability() queries the OpenAI API to check if a model exists - - ModelRegistryHelper.check_model_availability() (inherited by LiteLLMOpenAIMixin) just returns False and shows a warning """ - embedding_model_metadata = { + config: OpenAIConfig + + provider_data_api_key_field: str = "openai_api_key" + + embedding_model_metadata: dict[str, dict[str, int]] = { "text-embedding-3-small": {"embedding_dimension": 1536, "context_length": 8192}, "text-embedding-3-large": {"embedding_dimension": 3072, "context_length": 8192}, } - def __init__(self, config: OpenAIConfig) -> None: - LiteLLMOpenAIMixin.__init__( - self, - litellm_provider_name="openai", - api_key_from_config=config.api_key, - provider_data_api_key_field="openai_api_key", - ) - self.config = config - # we set is_openai_compat so users can use the canonical - # openai model names like "gpt-4" or "gpt-3.5-turbo" - # and the model name will be translated to litellm's - # "openai/gpt-4" or "openai/gpt-3.5-turbo" transparently. - # if we do not set this, users will be exposed to the - # litellm specific model names, an abstraction leak. - self.is_openai_compat = True - - # Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin - get_api_key = LiteLLMOpenAIMixin.get_api_key + def get_api_key(self) -> str: + return self.config.api_key or "" def get_base_url(self) -> str: """ @@ -68,9 +39,3 @@ class OpenAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin): Returns the OpenAI API base URL from the configuration. """ return self.config.base_url - - async def initialize(self) -> None: - await super().initialize() - - async def shutdown(self) -> None: - await super().shutdown() diff --git a/llama_stack/providers/remote/inference/passthrough/passthrough.py b/llama_stack/providers/remote/inference/passthrough/passthrough.py index e0ddb237e..01078760a 100644 --- a/llama_stack/providers/remote/inference/passthrough/passthrough.py +++ b/llama_stack/providers/remote/inference/passthrough/passthrough.py @@ -31,12 +31,6 @@ class PassthroughInferenceAdapter(Inference): ModelRegistryHelper.__init__(self) self.config = config - async def initialize(self) -> None: - pass - - async def shutdown(self) -> None: - pass - async def unregister_model(self, model_id: str) -> None: pass diff --git a/llama_stack/providers/remote/inference/runpod/runpod.py b/llama_stack/providers/remote/inference/runpod/runpod.py index 1c99182ea..08652f8c0 100644 --- a/llama_stack/providers/remote/inference/runpod/runpod.py +++ b/llama_stack/providers/remote/inference/runpod/runpod.py @@ -53,12 +53,6 @@ class RunpodInferenceAdapter( ModelRegistryHelper.__init__(self, stack_to_provider_models_map=RUNPOD_SUPPORTED_MODELS) self.config = config - async def initialize(self) -> None: - return - - async def shutdown(self) -> None: - pass - def _get_params(self, request: ChatCompletionRequest) -> dict: return { "model": self.map_to_provider_model(request.model), diff --git a/llama_stack/providers/remote/inference/sambanova/__init__.py b/llama_stack/providers/remote/inference/sambanova/__init__.py index 2a5448041..12508f7cb 100644 --- a/llama_stack/providers/remote/inference/sambanova/__init__.py +++ b/llama_stack/providers/remote/inference/sambanova/__init__.py @@ -11,6 +11,6 @@ async def get_adapter_impl(config: SambaNovaImplConfig, _deps): from .sambanova import SambaNovaInferenceAdapter assert isinstance(config, SambaNovaImplConfig), f"Unexpected config type: {type(config)}" - impl = SambaNovaInferenceAdapter(config) + impl = SambaNovaInferenceAdapter(config=config) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/inference/sambanova/sambanova.py b/llama_stack/providers/remote/inference/sambanova/sambanova.py index 4d8fd11cd..f30bab780 100644 --- a/llama_stack/providers/remote/inference/sambanova/sambanova.py +++ b/llama_stack/providers/remote/inference/sambanova/sambanova.py @@ -5,39 +5,22 @@ # the root directory of this source tree. -from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import SambaNovaImplConfig -class SambaNovaInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin): +class SambaNovaInferenceAdapter(OpenAIMixin): + config: SambaNovaImplConfig + + provider_data_api_key_field: str = "sambanova_api_key" + download_images: bool = True # SambaNova does not support image downloads server-size, perform them on the client """ SambaNova Inference Adapter for Llama Stack. - - Note: The inheritance order is important here. OpenAIMixin must come before - LiteLLMOpenAIMixin to ensure that OpenAIMixin.check_model_availability() - is used instead of LiteLLMOpenAIMixin.check_model_availability(). - - - OpenAIMixin.check_model_availability() queries the /v1/models to check if a model exists - - LiteLLMOpenAIMixin.check_model_availability() checks the static registry within LiteLLM """ - def __init__(self, config: SambaNovaImplConfig): - self.config = config - self.environment_available_models: list[str] = [] - LiteLLMOpenAIMixin.__init__( - self, - litellm_provider_name="sambanova", - api_key_from_config=self.config.api_key.get_secret_value() if self.config.api_key else None, - provider_data_api_key_field="sambanova_api_key", - openai_compat_api_base=self.config.url, - download_images=True, # SambaNova requires base64 image encoding - json_schema_strict=False, # SambaNova doesn't support strict=True yet - ) - - # Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin - get_api_key = LiteLLMOpenAIMixin.get_api_key + def get_api_key(self) -> str: + return self.config.api_key.get_secret_value() if self.config.api_key else "" def get_base_url(self) -> str: """ diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py index 0bb56da2b..53c872c02 100644 --- a/llama_stack/providers/remote/inference/tgi/tgi.py +++ b/llama_stack/providers/remote/inference/tgi/tgi.py @@ -5,53 +5,21 @@ # the root directory of this source tree. +from collections.abc import Iterable + from huggingface_hub import AsyncInferenceClient, HfApi from pydantic import SecretStr -from llama_stack.apis.inference import ( - ChatCompletionRequest, - Inference, - OpenAIEmbeddingsResponse, - ResponseFormat, - ResponseFormatType, - SamplingParams, -) -from llama_stack.apis.models import Model -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.inference import OpenAIEmbeddingsResponse from llama_stack.log import get_logger -from llama_stack.models.llama.sku_list import all_registered_models -from llama_stack.providers.utils.inference.model_registry import ( - ModelRegistryHelper, - build_hf_repo_model_entry, -) -from llama_stack.providers.utils.inference.openai_compat import ( - get_sampling_options, -) from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin -from llama_stack.providers.utils.inference.prompt_adapter import ( - chat_completion_request_to_model_input_info, -) from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig log = get_logger(name=__name__, category="inference::tgi") -def build_hf_repo_model_entries(): - return [ - build_hf_repo_model_entry( - model.huggingface_repo, - model.descriptor(), - ) - for model in all_registered_models() - if model.huggingface_repo - ] - - -class _HfAdapter( - OpenAIMixin, - Inference, -): +class _HfAdapter(OpenAIMixin): url: str api_key: SecretStr @@ -61,90 +29,14 @@ class _HfAdapter( overwrite_completion_id = True # TGI always returns id="" - def __init__(self) -> None: - self.register_helper = ModelRegistryHelper(build_hf_repo_model_entries()) - self.huggingface_repo_to_llama_model_id = { - model.huggingface_repo: model.descriptor() for model in all_registered_models() if model.huggingface_repo - } - def get_api_key(self): return self.api_key.get_secret_value() def get_base_url(self): return self.url - async def shutdown(self) -> None: - pass - - async def list_models(self) -> list[Model] | None: - models = [] - async for model in self.client.models.list(): - models.append( - Model( - identifier=model.id, - provider_resource_id=model.id, - provider_id=self.__provider_id__, - metadata={}, - model_type=ModelType.llm, - ) - ) - return models - - async def register_model(self, model: Model) -> Model: - if model.provider_resource_id != self.model_id: - raise ValueError( - f"Model {model.provider_resource_id} does not match the model {self.model_id} served by TGI." - ) - return model - - async def unregister_model(self, model_id: str) -> None: - pass - - def _get_max_new_tokens(self, sampling_params, input_tokens): - return min( - sampling_params.max_tokens or (self.max_tokens - input_tokens), - self.max_tokens - input_tokens - 1, - ) - - def _build_options( - self, - sampling_params: SamplingParams | None = None, - fmt: ResponseFormat = None, - ): - options = get_sampling_options(sampling_params) - # TGI does not support temperature=0 when using greedy sampling - # We set it to 1e-3 instead, anything lower outputs garbage from TGI - # We can use top_p sampling strategy to specify lower temperature - if abs(options["temperature"]) < 1e-10: - options["temperature"] = 1e-3 - - # delete key "max_tokens" from options since its not supported by the API - options.pop("max_tokens", None) - if fmt: - if fmt.type == ResponseFormatType.json_schema.value: - options["grammar"] = { - "type": "json", - "value": fmt.json_schema, - } - elif fmt.type == ResponseFormatType.grammar.value: - raise ValueError("Grammar response format not supported yet") - else: - raise ValueError(f"Unexpected response format: {fmt.type}") - - return options - - async def _get_params(self, request: ChatCompletionRequest) -> dict: - prompt, input_tokens = await chat_completion_request_to_model_input_info( - request, self.register_helper.get_llama_model(request.model) - ) - return dict( - prompt=prompt, - stream=request.stream, - details=True, - max_new_tokens=self._get_max_new_tokens(request.sampling_params, input_tokens), - stop_sequences=["<|eom_id|>", "<|eot_id|>"], - **self._build_options(request.sampling_params, request.response_format), - ) + async def get_models(self) -> Iterable[str] | None: + return [self.model_id] async def openai_embeddings( self, diff --git a/llama_stack/providers/remote/inference/together/__init__.py b/llama_stack/providers/remote/inference/together/__init__.py index 8ba84bbd1..fca6859de 100644 --- a/llama_stack/providers/remote/inference/together/__init__.py +++ b/llama_stack/providers/remote/inference/together/__init__.py @@ -17,6 +17,6 @@ async def get_adapter_impl(config: TogetherImplConfig, _deps): from .together import TogetherInferenceAdapter assert isinstance(config, TogetherImplConfig), f"Unexpected config type: {type(config)}" - impl = TogetherInferenceAdapter(config) + impl = TogetherInferenceAdapter(config=config) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py index 6f7a19743..d19e85f09 100644 --- a/llama_stack/providers/remote/inference/together/together.py +++ b/llama_stack/providers/remote/inference/together/together.py @@ -5,41 +5,29 @@ # the root directory of this source tree. -from openai import AsyncOpenAI +from collections.abc import Iterable + from together import AsyncTogether from together.constants import BASE_URL from llama_stack.apis.inference import ( - ChatCompletionRequest, - Inference, - LogProbConfig, OpenAIEmbeddingsResponse, - ResponseFormat, - ResponseFormatType, - SamplingParams, ) from llama_stack.apis.inference.inference import OpenAIEmbeddingUsage -from llama_stack.apis.models import Model, ModelType +from llama_stack.apis.models import Model from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger -from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper -from llama_stack.providers.utils.inference.openai_compat import ( - convert_message_to_openai_dict, - get_sampling_options, -) from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin -from llama_stack.providers.utils.inference.prompt_adapter import ( - chat_completion_request_to_prompt, - request_has_media, -) from .config import TogetherImplConfig logger = get_logger(name=__name__, category="inference::together") -class TogetherInferenceAdapter(OpenAIMixin, Inference, NeedsRequestProviderData): - embedding_model_metadata = { +class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData): + config: TogetherImplConfig + + embedding_model_metadata: dict[str, dict[str, int]] = { "togethercomputer/m2-bert-80M-32k-retrieval": {"embedding_dimension": 768, "context_length": 32768}, "BAAI/bge-large-en-v1.5": {"embedding_dimension": 1024, "context_length": 512}, "BAAI/bge-base-en-v1.5": {"embedding_dimension": 768, "context_length": 512}, @@ -47,24 +35,16 @@ class TogetherInferenceAdapter(OpenAIMixin, Inference, NeedsRequestProviderData) "intfloat/multilingual-e5-large-instruct": {"embedding_dimension": 1024, "context_length": 512}, } - def __init__(self, config: TogetherImplConfig) -> None: - ModelRegistryHelper.__init__(self) - self.config = config - self.allowed_models = config.allowed_models - self._model_cache: dict[str, Model] = {} + _model_cache: dict[str, Model] = {} + + provider_data_api_key_field: str = "together_api_key" def get_api_key(self): - return self.config.api_key.get_secret_value() + return self.config.api_key.get_secret_value() if self.config.api_key else None def get_base_url(self): return BASE_URL - async def initialize(self) -> None: - pass - - async def shutdown(self) -> None: - pass - def _get_client(self) -> AsyncTogether: together_api_key = None config_api_key = self.config.api_key.get_secret_value() if self.config.api_key else None @@ -79,83 +59,9 @@ class TogetherInferenceAdapter(OpenAIMixin, Inference, NeedsRequestProviderData) together_api_key = provider_data.together_api_key return AsyncTogether(api_key=together_api_key) - def _get_openai_client(self) -> AsyncOpenAI: - together_client = self._get_client().client - return AsyncOpenAI( - base_url=together_client.base_url, - api_key=together_client.api_key, - ) - - def _build_options( - self, - sampling_params: SamplingParams | None, - logprobs: LogProbConfig | None, - fmt: ResponseFormat, - ) -> dict: - options = get_sampling_options(sampling_params) - if fmt: - if fmt.type == ResponseFormatType.json_schema.value: - options["response_format"] = { - "type": "json_object", - "schema": fmt.json_schema, - } - elif fmt.type == ResponseFormatType.grammar.value: - raise NotImplementedError("Grammar response format not supported yet") - else: - raise ValueError(f"Unknown response format {fmt.type}") - - if logprobs and logprobs.top_k: - if logprobs.top_k != 1: - raise ValueError( - f"Unsupported value: Together only supports logprobs top_k=1. {logprobs.top_k} was provided", - ) - options["logprobs"] = 1 - - return options - - async def _get_params(self, request: ChatCompletionRequest) -> dict: - input_dict = {} - media_present = request_has_media(request) - llama_model = self.get_llama_model(request.model) - if media_present or not llama_model: - input_dict["messages"] = [await convert_message_to_openai_dict(m) for m in request.messages] - else: - input_dict["prompt"] = await chat_completion_request_to_prompt(request, llama_model) - - params = { - "model": request.model, - **input_dict, - "stream": request.stream, - **self._build_options(request.sampling_params, request.logprobs, request.response_format), - } - logger.debug(f"params to together: {params}") - return params - - async def list_models(self) -> list[Model] | None: - self._model_cache = {} + async def get_models(self) -> Iterable[str] | None: # Together's /v1/models is not compatible with OpenAI's /v1/models. Together support ticket #13355 -> will not fix, use Together's own client - for m in await self._get_client().models.list(): - if m.type == "embedding": - if m.id not in self.embedding_model_metadata: - logger.warning(f"Unknown embedding dimension for model {m.id}, skipping.") - continue - metadata = self.embedding_model_metadata[m.id] - self._model_cache[m.id] = Model( - provider_id=self.__provider_id__, - provider_resource_id=m.id, - identifier=m.id, - model_type=ModelType.embedding, - metadata=metadata, - ) - else: - self._model_cache[m.id] = Model( - provider_id=self.__provider_id__, - provider_resource_id=m.id, - identifier=m.id, - model_type=ModelType.llm, - ) - - return self._model_cache.values() + return [m.id for m in await self._get_client().models.list()] async def should_refresh_models(self) -> bool: return True @@ -203,4 +109,4 @@ class TogetherInferenceAdapter(OpenAIMixin, Inference, NeedsRequestProviderData) ) response.usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1) - return response + return response # type: ignore[no-any-return] diff --git a/llama_stack/providers/remote/inference/vertexai/__init__.py b/llama_stack/providers/remote/inference/vertexai/__init__.py index d9e9419be..05ce6776e 100644 --- a/llama_stack/providers/remote/inference/vertexai/__init__.py +++ b/llama_stack/providers/remote/inference/vertexai/__init__.py @@ -10,6 +10,6 @@ from .config import VertexAIConfig async def get_adapter_impl(config: VertexAIConfig, _deps): from .vertexai import VertexAIInferenceAdapter - impl = VertexAIInferenceAdapter(config) + impl = VertexAIInferenceAdapter(config=config) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/inference/vertexai/vertexai.py b/llama_stack/providers/remote/inference/vertexai/vertexai.py index 770d21a2a..647c8c752 100644 --- a/llama_stack/providers/remote/inference/vertexai/vertexai.py +++ b/llama_stack/providers/remote/inference/vertexai/vertexai.py @@ -4,29 +4,19 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Any import google.auth.transport.requests from google.auth import default -from llama_stack.apis.inference import ChatCompletionRequest -from llama_stack.providers.utils.inference.litellm_openai_mixin import ( - LiteLLMOpenAIMixin, -) from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import VertexAIConfig -class VertexAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin): - def __init__(self, config: VertexAIConfig) -> None: - LiteLLMOpenAIMixin.__init__( - self, - litellm_provider_name="vertex_ai", - api_key_from_config=None, # Vertex AI uses ADC, not API keys - provider_data_api_key_field="vertex_project", # Use project for validation - ) - self.config = config +class VertexAIInferenceAdapter(OpenAIMixin): + config: VertexAIConfig + + provider_data_api_key_field: str = "vertex_project" def get_api_key(self) -> str: """ @@ -41,8 +31,7 @@ class VertexAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin): credentials.refresh(google.auth.transport.requests.Request()) return str(credentials.token) except Exception: - # If we can't get credentials, return empty string to let LiteLLM handle it - # This allows the LiteLLM mixin to work with ADC directly + # If we can't get credentials, return empty string to let the env work with ADC directly return "" def get_base_url(self) -> str: @@ -53,23 +42,3 @@ class VertexAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin): Source: https://cloud.google.com/vertex-ai/generative-ai/docs/start/openai """ return f"https://{self.config.location}-aiplatform.googleapis.com/v1/projects/{self.config.project}/locations/{self.config.location}/endpoints/openapi" - - async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]: - # Get base parameters from parent - params = await super()._get_params(request) - - # Add Vertex AI specific parameters - provider_data = self.get_request_provider_data() - if provider_data: - if getattr(provider_data, "vertex_project", None): - params["vertex_project"] = provider_data.vertex_project - if getattr(provider_data, "vertex_location", None): - params["vertex_location"] = provider_data.vertex_location - else: - params["vertex_project"] = self.config.project - params["vertex_location"] = self.config.location - - # Remove api_key since Vertex AI uses ADC - params.pop("api_key", None) - - return params diff --git a/llama_stack/providers/remote/inference/vllm/__init__.py b/llama_stack/providers/remote/inference/vllm/__init__.py index 1f196e507..3f5c17026 100644 --- a/llama_stack/providers/remote/inference/vllm/__init__.py +++ b/llama_stack/providers/remote/inference/vllm/__init__.py @@ -17,6 +17,6 @@ async def get_adapter_impl(config: VLLMInferenceAdapterConfig, _deps): from .vllm import VLLMInferenceAdapter assert isinstance(config, VLLMInferenceAdapterConfig), f"Unexpected config type: {type(config)}" - impl = VLLMInferenceAdapter(config) + impl = VLLMInferenceAdapter(config=config) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 54ac8e1dc..31241213a 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -3,56 +3,27 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import json -from collections.abc import AsyncGenerator, AsyncIterator +from collections.abc import AsyncIterator from typing import Any from urllib.parse import urljoin import httpx -from openai import APIConnectionError from openai.types.chat.chat_completion_chunk import ( ChatCompletionChunk as OpenAIChatCompletionChunk, ) +from pydantic import ConfigDict -from llama_stack.apis.common.content_types import ( - TextDelta, - ToolCallDelta, - ToolCallParseStatus, -) from llama_stack.apis.inference import ( - ChatCompletionRequest, - ChatCompletionResponseEvent, - ChatCompletionResponseEventType, - ChatCompletionResponseStreamChunk, - GrammarResponseFormat, - Inference, - JsonSchemaResponseFormat, - ModelStore, OpenAIChatCompletion, OpenAIMessageParam, OpenAIResponseFormatParam, ToolChoice, - ToolDefinition, ) from llama_stack.apis.models import Model, ModelType from llama_stack.log import get_logger -from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.datatypes import ( HealthResponse, HealthStatus, - ModelsProtocolPrivate, -) -from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin -from llama_stack.providers.utils.inference.model_registry import ( - ModelRegistryHelper, - build_hf_repo_model_entry, -) -from llama_stack.providers.utils.inference.openai_compat import ( - UnparseableToolCall, - convert_message_to_openai_dict, - convert_tool_call, - get_sampling_options, ) from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin @@ -61,210 +32,15 @@ from .config import VLLMInferenceAdapterConfig log = get_logger(name=__name__, category="inference::vllm") -def build_hf_repo_model_entries(): - return [ - build_hf_repo_model_entry( - model.huggingface_repo, - model.descriptor(), - ) - for model in all_registered_models() - if model.huggingface_repo - ] +class VLLMInferenceAdapter(OpenAIMixin): + config: VLLMInferenceAdapterConfig + model_config = ConfigDict(arbitrary_types_allowed=True) -def _convert_to_vllm_tool_calls_in_response( - tool_calls, -) -> list[ToolCall]: - if not tool_calls: - return [] + provider_data_api_key_field: str = "vllm_api_token" - return [ - ToolCall( - call_id=call.id, - tool_name=call.function.name, - arguments=call.function.arguments, - ) - for call in tool_calls - ] - - -def _convert_to_vllm_tools_in_request(tools: list[ToolDefinition]) -> list[dict]: - compat_tools = [] - - for tool in tools: - # The tool.tool_name can be a str or a BuiltinTool enum. If - # it's the latter, convert to a string. - tool_name = tool.tool_name - if isinstance(tool_name, BuiltinTool): - tool_name = tool_name.value - - compat_tool = { - "type": "function", - "function": { - "name": tool_name, - "description": tool.description, - "parameters": tool.input_schema - or { - "type": "object", - "properties": {}, - "required": [], - }, - }, - } - - compat_tools.append(compat_tool) - - return compat_tools - - -def _convert_to_vllm_finish_reason(finish_reason: str) -> StopReason: - return { - "stop": StopReason.end_of_turn, - "length": StopReason.out_of_tokens, - "tool_calls": StopReason.end_of_message, - }.get(finish_reason, StopReason.end_of_turn) - - -def _process_vllm_chat_completion_end_of_stream( - finish_reason: str | None, - last_chunk_content: str | None, - current_event_type: ChatCompletionResponseEventType, - tool_call_bufs: dict[str, UnparseableToolCall] | None = None, -) -> list[OpenAIChatCompletionChunk]: - chunks = [] - - if finish_reason is not None: - stop_reason = _convert_to_vllm_finish_reason(finish_reason) - else: - stop_reason = StopReason.end_of_message - - tool_call_bufs = tool_call_bufs or {} - for _index, tool_call_buf in sorted(tool_call_bufs.items()): - args_str = tool_call_buf.arguments or "{}" - try: - chunks.append( - ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=current_event_type, - delta=ToolCallDelta( - tool_call=ToolCall( - call_id=tool_call_buf.call_id, - tool_name=tool_call_buf.tool_name, - arguments=args_str, - ), - parse_status=ToolCallParseStatus.succeeded, - ), - ) - ) - ) - except Exception as e: - log.warning(f"Failed to parse tool call buffer arguments: {args_str} \nError: {e}") - - chunks.append( - ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - tool_call=str(tool_call_buf), - parse_status=ToolCallParseStatus.failed, - ), - ) - ) - ) - - chunks.append( - ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.complete, - delta=TextDelta(text=last_chunk_content or ""), - logprobs=None, - stop_reason=stop_reason, - ) - ) - ) - - return chunks - - -async def _process_vllm_chat_completion_stream_response( - stream: AsyncGenerator[OpenAIChatCompletionChunk, None], -) -> AsyncGenerator: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.start, - delta=TextDelta(text=""), - ) - ) - event_type = ChatCompletionResponseEventType.progress - tool_call_bufs: dict[str, UnparseableToolCall] = {} - end_of_stream_processed = False - - async for chunk in stream: - if not chunk.choices: - log.warning("vLLM failed to generation any completions - check the vLLM server logs for an error.") - return - choice = chunk.choices[0] - if choice.delta.tool_calls: - for delta_tool_call in choice.delta.tool_calls: - tool_call = convert_tool_call(delta_tool_call) - if delta_tool_call.index not in tool_call_bufs: - tool_call_bufs[delta_tool_call.index] = UnparseableToolCall() - tool_call_buf = tool_call_bufs[delta_tool_call.index] - tool_call_buf.tool_name += str(tool_call.tool_name) - tool_call_buf.call_id += tool_call.call_id - tool_call_buf.arguments += ( - tool_call.arguments if isinstance(tool_call.arguments, str) else json.dumps(tool_call.arguments) - ) - if choice.finish_reason: - chunks = _process_vllm_chat_completion_end_of_stream( - finish_reason=choice.finish_reason, - last_chunk_content=choice.delta.content, - current_event_type=event_type, - tool_call_bufs=tool_call_bufs, - ) - for c in chunks: - yield c - end_of_stream_processed = True - elif not choice.delta.tool_calls: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=event_type, - delta=TextDelta(text=choice.delta.content or ""), - logprobs=None, - ) - ) - event_type = ChatCompletionResponseEventType.progress - - if end_of_stream_processed: - return - - # the stream ended without a chunk containing finish_reason - we have to generate the - # respective completion chunks manually - chunks = _process_vllm_chat_completion_end_of_stream( - finish_reason=None, last_chunk_content=None, current_event_type=event_type, tool_call_bufs=tool_call_bufs - ) - for c in chunks: - yield c - - -class VLLMInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin, Inference, ModelsProtocolPrivate): - # automatically set by the resolver when instantiating the provider - __provider_id__: str - model_store: ModelStore | None = None - - def __init__(self, config: VLLMInferenceAdapterConfig) -> None: - LiteLLMOpenAIMixin.__init__( - self, - model_entries=build_hf_repo_model_entries(), - litellm_provider_name="vllm", - api_key_from_config=config.api_token, - provider_data_api_key_field="vllm_api_token", - openai_compat_api_base=config.url, - ) - self.register_helper = ModelRegistryHelper(build_hf_repo_model_entries()) - self.config = config - - get_api_key = LiteLLMOpenAIMixin.get_api_key + def get_api_key(self) -> str: + return self.config.api_token or "" def get_base_url(self) -> str: """Get the base URL from config.""" @@ -290,19 +66,13 @@ class VLLMInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin, Inference, ModelsPro Model( identifier=m.id, provider_resource_id=m.id, - provider_id=self.__provider_id__, + provider_id=self.__provider_id__, # type: ignore[attr-defined] metadata={}, model_type=model_type, ) ) return models - async def shutdown(self) -> None: - pass - - async def unregister_model(self, model_id: str) -> None: - pass - async def health(self) -> HealthResponse: """ Performs a health check by verifying connectivity to the remote vLLM server. @@ -324,63 +94,9 @@ class VLLMInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin, Inference, ModelsPro except Exception as e: return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}") - async def _get_model(self, model_id: str) -> Model: - if not self.model_store: - raise ValueError("Model store not set") - return await self.model_store.get_model(model_id) - def get_extra_client_params(self): return {"http_client": httpx.AsyncClient(verify=self.config.tls_verify)} - async def register_model(self, model: Model) -> Model: - try: - model = await self.register_helper.register_model(model) - except ValueError: - pass # Ignore statically unknown model, will check live listing - try: - res = self.client.models.list() - except APIConnectionError as e: - raise ValueError( - f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL." - ) from e - available_models = [m.id async for m in res] - if model.provider_resource_id not in available_models: - raise ValueError( - f"Model {model.provider_resource_id} is not being served by vLLM. " - f"Available models: {', '.join(available_models)}" - ) - return model - - async def _get_params(self, request: ChatCompletionRequest) -> dict: - options = get_sampling_options(request.sampling_params) - if "max_tokens" not in options: - options["max_tokens"] = self.config.max_tokens - - input_dict: dict[str, Any] = {} - # Only include the 'tools' param if there is any. It can break things if an empty list is sent to the vLLM. - if isinstance(request, ChatCompletionRequest) and request.tools: - input_dict = {"tools": _convert_to_vllm_tools_in_request(request.tools)} - - input_dict["messages"] = [await convert_message_to_openai_dict(m, download=True) for m in request.messages] - - if fmt := request.response_format: - if isinstance(fmt, JsonSchemaResponseFormat): - input_dict["extra_body"] = {"guided_json": fmt.json_schema} - elif isinstance(fmt, GrammarResponseFormat): - raise NotImplementedError("Grammar response format not supported yet") - else: - raise ValueError(f"Unknown response format {fmt.type}") - - if request.logprobs and request.logprobs.top_k: - input_dict["logprobs"] = request.logprobs.top_k - - return { - "model": request.model, - **input_dict, - "stream": request.stream, - **options, - } - async def openai_chat_completion( self, model: str, diff --git a/llama_stack/providers/remote/inference/watsonx/watsonx.py b/llama_stack/providers/remote/inference/watsonx/watsonx.py index 0557aff5f..fc58691e2 100644 --- a/llama_stack/providers/remote/inference/watsonx/watsonx.py +++ b/llama_stack/providers/remote/inference/watsonx/watsonx.py @@ -65,12 +65,6 @@ class WatsonXInferenceAdapter(Inference, ModelRegistryHelper): self._project_id = self._config.project_id - async def initialize(self) -> None: - pass - - async def shutdown(self) -> None: - pass - def _get_client(self, model_id) -> Model: config_api_key = self._config.api_key.get_secret_value() if self._config.api_key else None config_url = self._config.url diff --git a/llama_stack/providers/utils/inference/openai_mixin.py b/llama_stack/providers/utils/inference/openai_mixin.py index 4354b067e..06eba09f4 100644 --- a/llama_stack/providers/utils/inference/openai_mixin.py +++ b/llama_stack/providers/utils/inference/openai_mixin.py @@ -7,10 +7,11 @@ import base64 import uuid from abc import ABC, abstractmethod -from collections.abc import AsyncIterator +from collections.abc import AsyncIterator, Iterable from typing import Any from openai import NOT_GIVEN, AsyncOpenAI +from pydantic import BaseModel, ConfigDict from llama_stack.apis.inference import ( Model, @@ -26,14 +27,14 @@ from llama_stack.apis.inference import ( from llama_stack.apis.models import ModelType from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ModelsProtocolPrivate +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content logger = get_logger(name=__name__, category="providers::utils") -class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC): +class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel): """ Mixin class that provides OpenAI-specific functionality for inference providers. This class handles direct OpenAI API calls using the AsyncOpenAI client. @@ -42,12 +43,25 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC): - get_api_key(): Method to retrieve the API key - get_base_url(): Method to retrieve the OpenAI-compatible API base URL + The behavior of this class can be customized by child classes in the following ways: + - overwrite_completion_id: If True, overwrites the 'id' field in OpenAI responses + - download_images: If True, downloads images and converts to base64 for providers that require it + - embedding_model_metadata: A dictionary mapping model IDs to their embedding metadata + - provider_data_api_key_field: Optional field name in provider data to look for API key + - get_models: Method to list available models from the provider + - get_extra_client_params: Method to provide extra parameters to the AsyncOpenAI client + Expected Dependencies: - self.model_store: Injected by the Llama Stack distribution system at runtime. This provides model registry functionality for looking up registered models. The model_store is set in routing_tables/common.py during provider initialization. """ + # Allow extra fields so the routing infra can inject model_store, __provider_id__, etc. + model_config = ConfigDict(extra="allow") + + config: RemoteInferenceProviderConfig + # Allow subclasses to control whether to overwrite the 'id' field in OpenAI responses # is overwritten with a client-side generated id. # @@ -73,9 +87,6 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC): # Optional field name in provider data to look for API key, which takes precedence provider_data_api_key_field: str | None = None - # automatically set by the resolver when instantiating the provider - __provider_id__: str - @abstractmethod def get_api_key(self) -> str: """ @@ -111,6 +122,38 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC): """ return {} + async def get_models(self) -> Iterable[str] | None: + """ + List available models from the provider. + + Child classes can override this method to provide a custom implementation + for listing models. The default implementation uses the AsyncOpenAI client + to list models from the OpenAI-compatible endpoint. + + :return: An iterable of model IDs or None if not implemented + """ + return None + + async def initialize(self) -> None: + """ + Initialize the OpenAI mixin. + + This method provides a default implementation that does nothing. + Subclasses can override this method to perform initialization tasks + such as setting up clients, validating configurations, etc. + """ + pass + + async def shutdown(self) -> None: + """ + Shutdown the OpenAI mixin. + + This method provides a default implementation that does nothing. + Subclasses can override this method to perform cleanup tasks + such as closing connections, releasing resources, etc. + """ + pass + @property def client(self) -> AsyncOpenAI: """ @@ -371,7 +414,7 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC): async def register_model(self, model: Model) -> Model: if not await self.check_model_availability(model.provider_model_id): - raise ValueError(f"Model {model.provider_model_id} is not available from provider {self.__provider_id__}") + raise ValueError(f"Model {model.provider_model_id} is not available from provider {self.__provider_id__}") # type: ignore[attr-defined] return model async def unregister_model(self, model_id: str) -> None: @@ -387,16 +430,34 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC): """ self._model_cache = {} - async for m in self.client.models.list(): - if self.allowed_models and m.id not in self.allowed_models: - logger.info(f"Skipping model {m.id} as it is not in the allowed models list") + # give subclasses a chance to provide custom model listing + models_ids = [] + try: + if (iterable := await self.get_models()) is not None: # TODO: handle exceptions from get_models + models_ids = list(iterable) + logger.info( + f"Using {self.__class__.__name__}.get_models() implementation, received {len(models_ids)} models" + ) + for id_ in models_ids: + if not isinstance(id_, str): + raise ValueError(f"Model ID {id_} from get_models() is not a string") + except Exception as e: + logger.error(f"{self.__class__.__name__}.get_models() failed with: {e}") + raise + + if not models_ids: + models_ids = [m.id async for m in self.client.models.list()] + + for m_id in models_ids: + if self.allowed_models and m_id not in self.allowed_models: + logger.info(f"Skipping model {m_id} as it is not in the allowed models list") continue - if metadata := self.embedding_model_metadata.get(m.id): + if metadata := self.embedding_model_metadata.get(m_id): # This is an embedding model - augment with metadata model = Model( provider_id=self.__provider_id__, # type: ignore[attr-defined] - provider_resource_id=m.id, - identifier=m.id, + provider_resource_id=m_id, + identifier=m_id, model_type=ModelType.embedding, metadata=metadata, ) @@ -404,11 +465,11 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC): # This is an LLM model = Model( provider_id=self.__provider_id__, # type: ignore[attr-defined] - provider_resource_id=m.id, - identifier=m.id, + provider_resource_id=m_id, + identifier=m_id, model_type=ModelType.llm, ) - self._model_cache[m.id] = model + self._model_cache[m_id] = model return list(self._model_cache.values()) @@ -425,3 +486,29 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC): async def should_refresh_models(self) -> bool: return False + + # + # The model_dump implementations are to avoid serializing the extra fields, + # e.g. model_store, which are not pydantic. + # + + def _filter_fields(self, **kwargs): + """Helper to exclude extra fields from serialization.""" + # Exclude any extra fields stored in __pydantic_extra__ + if hasattr(self, "__pydantic_extra__") and self.__pydantic_extra__: + exclude = kwargs.get("exclude", set()) + if not isinstance(exclude, set): + exclude = set(exclude) if exclude else set() + exclude.update(self.__pydantic_extra__.keys()) + kwargs["exclude"] = exclude + return kwargs + + def model_dump(self, **kwargs): + """Override to exclude extra fields from serialization.""" + kwargs = self._filter_fields(**kwargs) + return super().model_dump(**kwargs) + + def model_dump_json(self, **kwargs): + """Override to exclude extra fields from JSON serialization.""" + kwargs = self._filter_fields(**kwargs) + return super().model_dump_json(**kwargs) diff --git a/pyproject.toml b/pyproject.toml index fef765d66..5f086bd9d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -278,14 +278,10 @@ exclude = [ "^llama_stack/providers/remote/datasetio/huggingface/", "^llama_stack/providers/remote/datasetio/nvidia/", "^llama_stack/providers/remote/inference/bedrock/", - "^llama_stack/providers/remote/inference/cerebras/", - "^llama_stack/providers/remote/inference/databricks/", - "^llama_stack/providers/remote/inference/fireworks/", "^llama_stack/providers/remote/inference/nvidia/", "^llama_stack/providers/remote/inference/passthrough/", "^llama_stack/providers/remote/inference/runpod/", "^llama_stack/providers/remote/inference/tgi/", - "^llama_stack/providers/remote/inference/together/", "^llama_stack/providers/remote/inference/watsonx/", "^llama_stack/providers/remote/safety/bedrock/", "^llama_stack/providers/remote/safety/nvidia/", diff --git a/tests/integration/recordings/responses/08f97e548c4b.json b/tests/integration/recordings/responses/08f97e548c4b.json new file mode 100644 index 000000000..1e4b27a18 --- /dev/null +++ b/tests/integration/recordings/responses/08f97e548c4b.json @@ -0,0 +1,710 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "What is the boiling point of the liquid polyjuice in celsius?" + }, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_6ah4hyex", + "type": "function", + "function": { + "name": "get_boiling_point", + "arguments": "{\"liquid\":\"polyjuice\",\"unit\":\"celsius\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_6ah4hyex", + "content": "Error when running tool: get_boiling_point() got an unexpected keyword argument 'liquid'" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit." + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": "I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514971, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": " was", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514971, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": " unable", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514971, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514971, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": " find", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514972, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514972, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": " boiling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514972, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": " point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514972, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514972, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": " poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514972, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514972, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514972, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": " in", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514972, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": " my", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514972, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": " search", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514972, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514972, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": " Can", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514972, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514972, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": " help", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514972, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514972, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": " with", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514972, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": " something", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514972, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": " else", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514972, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": "?", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514972, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-622", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1759514972, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + } +} diff --git a/tests/integration/recordings/responses/0d3602bdeb33.json b/tests/integration/recordings/responses/0d3602bdeb33.json new file mode 100644 index 000000000..9e861bd3d --- /dev/null +++ b/tests/integration/recordings/responses/0d3602bdeb33.json @@ -0,0 +1,710 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "What is the boiling point of the liquid polyjuice in celsius?" + }, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_4gduxvhb", + "type": "function", + "function": { + "name": "get_boiling_point", + "arguments": "{\"liquid\":\"polyjuice\",\"unit\":\"celsius\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_4gduxvhb", + "content": "Error when running tool: get_boiling_point() got an unexpected keyword argument 'liquid'" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": "required", + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit." + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": "I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": " was", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": " unable", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": " find", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": " boiling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": " point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": " poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": " in", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": " my", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": " search", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": " Can", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": " help", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": " with", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": " something", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": " else", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": "?", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-759", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1759514982, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + } +} diff --git a/tests/integration/recordings/responses/11675efe359b.json b/tests/integration/recordings/responses/11675efe359b.json new file mode 100644 index 000000000..f2330afb5 --- /dev/null +++ b/tests/integration/recordings/responses/11675efe359b.json @@ -0,0 +1,57 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama-guard3:1b", + "messages": [ + { + "role": "user", + "content": "Task: Check if there is unsafe content in 'Assistant' messages in conversations according our safety policy with the below categories.\n\n\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n\n\n\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\nAssistant: \n\nTool: Error when running tool: get_boiling_point() got an unexpected keyword argument 'liquid'\n\nAssistant: I was unable to find the boiling point of polyjuice in my search. Can I help you with something else?\n\n\n\nProvide your safety assessment for ONLY THE LAST Assistant message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories." + } + ], + "stream": false, + "temperature": 0.0 + }, + "endpoint": "/v1/chat/completions", + "model": "llama-guard3:1b" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "chatcmpl-774", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "safe", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": null + } + } + ], + "created": 1759514987, + "model": "llama-guard3:1b", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 2, + "prompt_tokens": 447, + "total_tokens": 449, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + }, + "is_streaming": false + } +} diff --git a/tests/integration/recordings/responses/125f1f43f167.json b/tests/integration/recordings/responses/125f1f43f167.json new file mode 100644 index 000000000..d47a7d422 --- /dev/null +++ b/tests/integration/recordings/responses/125f1f43f167.json @@ -0,0 +1,3154 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant Always respond with tool calls no matter what. " + }, + { + "role": "user", + "content": "Get the boiling point of polyjuice with a tool call." + }, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_laifztfo", + "type": "function", + "function": { + "name": "get_boiling_point", + "arguments": "{}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_laifztfo", + "content": "Error when running tool: get_boiling_point() missing 1 required positional argument: 'liquid_name'" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit." + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": "I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " apologize", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " error", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " It", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " seems", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " that", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " `", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": "get", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": "_bo", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": "iling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": "_point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": "`", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " tool", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " requires", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " a", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " liquid", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " name", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " as", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " an", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " argument", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": ".\n\n", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": "To", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " provide", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " boiling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": "'ll", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " need", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " know", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " that", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " is", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " not", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " a", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " real", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " substance", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " and", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " its", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " boiling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " cannot", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " be", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " found", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " in", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " my", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " database", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " However", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " if", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " meant", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " ask", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " about", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " Poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " Potion", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " from", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " Harry", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " Potter", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " series", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " can", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " tell", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " that", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " it", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": "'s", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " a", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " fictional", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " potion", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": ".\n\n", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": "If", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " could", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " provide", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " more", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " context", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " or", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " clarify", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " which", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " are", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514974, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " referring", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514975, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514975, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514975, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514975, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": "'ll", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514975, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " do", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514975, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " my", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514975, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " best", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514975, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514975, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " assist", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514975, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514975, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " with", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514975, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " your", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514975, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": " question", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514975, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514975, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-835", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1759514975, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + } +} diff --git a/tests/integration/recordings/responses/1b08c8e14202.json b/tests/integration/recordings/responses/1b08c8e14202.json new file mode 100644 index 000000000..8f2f3c53e --- /dev/null +++ b/tests/integration/recordings/responses/1b08c8e14202.json @@ -0,0 +1,103 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant Always respond with tool calls no matter what. " + }, + { + "role": "user", + "content": "Get the boiling point of polyjuice with a tool call." + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit." + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-707", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "index": 0, + "id": "call_laifztfo", + "function": { + "arguments": "{}", + "name": "get_boiling_point" + }, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514973, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-707", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null + } + ], + "created": 1759514973, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + } +} diff --git a/tests/integration/recordings/responses/26b3819920f0.json b/tests/integration/recordings/responses/26b3819920f0.json new file mode 100644 index 000000000..7bb7a385d --- /dev/null +++ b/tests/integration/recordings/responses/26b3819920f0.json @@ -0,0 +1,1724 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "Call get_boiling_point tool and answer What is the boiling point of polyjuice?" + }, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_swism1x1", + "type": "function", + "function": { + "name": "get_boiling_point", + "arguments": "{}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_swism1x1", + "content": "Error when running tool: get_boiling_point() missing 1 required positional argument: 'liquid_name'" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit." + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": "I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " was", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " unable", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " find", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " boiling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " The", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " get", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": "_bo", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": "iling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": "_point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " tool", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " requires", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " a", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " liquid", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " name", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " as", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " an", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " argument", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " but", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " it", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " does", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " not", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " appear", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " have", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " information", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " on", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " If", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " meant", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " ask", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " about", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " a", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " different", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " substance", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " please", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " let", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " me", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " know", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " and", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": "'ll", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " do", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " my", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " best", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": " help", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": "!", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-904", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1759514988, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + } +} diff --git a/tests/integration/recordings/responses/31a87d74ea98.json b/tests/integration/recordings/responses/31a87d74ea98.json new file mode 100644 index 000000000..f5f5c9d51 --- /dev/null +++ b/tests/integration/recordings/responses/31a87d74ea98.json @@ -0,0 +1,108 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "What is the boiling point of the liquid polyjuice in celsius?" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": { + "type": "function", + "function": { + "name": "get_boiling_point" + } + }, + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit." + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-269", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "index": 0, + "id": "call_m61820zt", + "function": { + "arguments": "{\"liquid\":\"polyjuice\",\"unit\":\"celsius\"}", + "name": "get_boiling_point" + }, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514985, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-269", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null + } + ], + "created": 1759514985, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + } +} diff --git a/tests/integration/recordings/responses/3e8248d253fd.json b/tests/integration/recordings/responses/3e8248d253fd.json new file mode 100644 index 000000000..caf3f026e --- /dev/null +++ b/tests/integration/recordings/responses/3e8248d253fd.json @@ -0,0 +1,170 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Say hi to the world. Use tools to do so." + }, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_bhtxlmzm", + "type": "function", + "function": { + "name": "greet_everyone", + "arguments": "{\"url\":\"world\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_bhtxlmzm", + "content": [ + { + "type": "text", + "text": "Hello, world!" + } + ] + }, + { + "role": "assistant", + "content": "I'm able to \"speak\" to you through this chat platform, hello! Would you like me to repeat anything or provide assistance with something else?" + }, + { + "role": "user", + "content": "What is the boiling point of polyjuice? Use tools to answer." + } + ], + "max_tokens": 0, + "stream": true, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "greet_everyone", + "parameters": { + "properties": { + "url": { + "title": "Url", + "type": "string" + } + }, + "required": [ + "url" + ], + "title": "greet_everyoneArguments", + "type": "object" + } + } + }, + { + "type": "function", + "function": { + "name": "get_boiling_point", + "description": "\n Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n :param liquid_name: The name of the liquid\n :param celsius: Whether to return the boiling point in Celsius\n :return: The boiling point of the liquid in Celcius or Fahrenheit\n ", + "parameters": { + "properties": { + "liquid_name": { + "title": "Liquid Name", + "type": "string" + }, + "celsius": { + "default": true, + "title": "Celsius", + "type": "boolean" + } + }, + "required": [ + "liquid_name" + ], + "title": "get_boiling_pointArguments", + "type": "object" + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-515", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "index": 0, + "id": "call_li57r4tl", + "function": { + "arguments": "{\"celsius\":\"true\",\"liquid_name\":\"polyjuice\"}", + "name": "get_boiling_point" + }, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515074, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-515", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null + } + ], + "created": 1759515074, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + } +} diff --git a/tests/integration/recordings/responses/41393ddb2491.json b/tests/integration/recordings/responses/41393ddb2491.json new file mode 100644 index 000000000..12283f7be --- /dev/null +++ b/tests/integration/recordings/responses/41393ddb2491.json @@ -0,0 +1,1828 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "Call get_boiling_point_with_metadata tool and answer What is the boiling point of polyjuice?" + }, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_9vy3xwac", + "type": "function", + "function": { + "name": "get_boiling_point_with_metadata", + "arguments": "{}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_9vy3xwac", + "content": "Error when running tool: get_boiling_point_with_metadata() missing 1 required positional argument: 'liquid_name'" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point_with_metadata", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit" + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": "I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " was", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " unable", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " find", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " boiling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " The", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " `", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": "get", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": "_bo", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": "iling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": "_point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": "_with", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": "_metadata", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": "`", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " tool", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " requires", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " a", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " liquid", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " name", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " as", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " an", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " argument", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " but", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " couldn", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": "'t", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " find", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " any", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " information", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " on", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " boiling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " If", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " have", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " more", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " context", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " or", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " details", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " about", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " may", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " be", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " able", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " help", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": " further", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-276", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1759515076, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + } +} diff --git a/tests/integration/recordings/responses/864a270ca97b.json b/tests/integration/recordings/responses/864a270ca97b.json new file mode 100644 index 000000000..8893bec34 --- /dev/null +++ b/tests/integration/recordings/responses/864a270ca97b.json @@ -0,0 +1,57 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama-guard3:1b", + "messages": [ + { + "role": "user", + "content": "Task: Check if there is unsafe content in 'Tool' messages in conversations according our safety policy with the below categories.\n\n\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n\n\n\n\nUser: Get the boiling point of polyjuice with a tool call.\n\nAssistant: \n\nTool: Error when running tool: get_boiling_point() missing 1 required positional argument: 'liquid_name'\n\n\n\nProvide your safety assessment for ONLY THE LAST Tool message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories." + } + ], + "stream": false, + "temperature": 0.0 + }, + "endpoint": "/v1/chat/completions", + "model": "llama-guard3:1b" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "chatcmpl-394", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "safe", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": null + } + } + ], + "created": 1759514973, + "model": "llama-guard3:1b", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 2, + "prompt_tokens": 422, + "total_tokens": 424, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + }, + "is_streaming": false + } +} diff --git a/tests/integration/recordings/responses/b63c1324a814.json b/tests/integration/recordings/responses/b63c1324a814.json new file mode 100644 index 000000000..8072e97c6 --- /dev/null +++ b/tests/integration/recordings/responses/b63c1324a814.json @@ -0,0 +1,57 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama-guard3:1b", + "messages": [ + { + "role": "user", + "content": "Task: Check if there is unsafe content in 'Assistant' messages in conversations according our safety policy with the below categories.\n\n\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n\n\n\n\nUser: Get the boiling point of polyjuice with a tool call.\n\nAssistant: \n\nTool: Error when running tool: get_boiling_point() missing 1 required positional argument: 'liquid_name'\n\nAssistant: I apologize for the error. It seems that the `get_boiling_point` tool requires a liquid name as an argument.\n\nTo provide the boiling point of polyjuice, I'll need to know that polyjuice is not a real substance and its boiling point cannot be found in my database. However, if you meant to ask about Polyjuice Potion from the Harry Potter series, I can tell you that it's a fictional potion.\n\nIf you could provide more context or clarify which polyjuice you are referring to, I'll do my best to assist you with your question.\n\n\n\nProvide your safety assessment for ONLY THE LAST Assistant message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories." + } + ], + "stream": false, + "temperature": 0.0 + }, + "endpoint": "/v1/chat/completions", + "model": "llama-guard3:1b" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "chatcmpl-515", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "safe", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": null + } + } + ], + "created": 1759514975, + "model": "llama-guard3:1b", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 2, + "prompt_tokens": 542, + "total_tokens": 544, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + }, + "is_streaming": false + } +} diff --git a/tests/integration/recordings/responses/c23c11b48503.json b/tests/integration/recordings/responses/c23c11b48503.json new file mode 100644 index 000000000..a264183c7 --- /dev/null +++ b/tests/integration/recordings/responses/c23c11b48503.json @@ -0,0 +1,57 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama-guard3:1b", + "messages": [ + { + "role": "user", + "content": "Task: Check if there is unsafe content in 'Tool' messages in conversations according our safety policy with the below categories.\n\n\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n\n\n\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\nAssistant: \n\nTool: Error when running tool: get_boiling_point() got an unexpected keyword argument 'liquid'\n\n\n\nProvide your safety assessment for ONLY THE LAST Tool message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories." + } + ], + "stream": false, + "temperature": 0.0 + }, + "endpoint": "/v1/chat/completions", + "model": "llama-guard3:1b" + }, + "response": { + "body": { + "__type__": "openai.types.chat.chat_completion.ChatCompletion", + "__data__": { + "id": "chatcmpl-576", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "safe", + "refusal": null, + "role": "assistant", + "annotations": null, + "audio": null, + "function_call": null, + "tool_calls": null + } + } + ], + "created": 1759514986, + "model": "llama-guard3:1b", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 2, + "prompt_tokens": 421, + "total_tokens": 423, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + }, + "is_streaming": false + } +} diff --git a/tests/integration/recordings/responses/c315ffba4f17.json b/tests/integration/recordings/responses/c315ffba4f17.json new file mode 100644 index 000000000..15c260097 --- /dev/null +++ b/tests/integration/recordings/responses/c315ffba4f17.json @@ -0,0 +1,715 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "What is the boiling point of the liquid polyjuice in celsius?" + }, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_m61820zt", + "type": "function", + "function": { + "name": "get_boiling_point", + "arguments": "{\"liquid\":\"polyjuice\",\"unit\":\"celsius\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_m61820zt", + "content": "Error when running tool: get_boiling_point() got an unexpected keyword argument 'liquid'" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": { + "type": "function", + "function": { + "name": "get_boiling_point" + } + }, + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit." + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": "I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": " was", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": " unable", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": " find", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": " boiling", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": " point", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": " poly", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": "ju", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": "ice", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": " in", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": " my", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": " search", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": " Can", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": " I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": " help", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": " with", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": " something", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": " else", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": "?", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-884", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1759514986, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + } +} diff --git a/tests/integration/recordings/responses/c8632717f6b8.json b/tests/integration/recordings/responses/c8632717f6b8.json new file mode 100644 index 000000000..545bbf293 --- /dev/null +++ b/tests/integration/recordings/responses/c8632717f6b8.json @@ -0,0 +1,103 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "What is the boiling point of the liquid polyjuice in celsius?" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit." + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-382", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "index": 0, + "id": "call_6ah4hyex", + "function": { + "arguments": "{\"liquid\":\"polyjuice\",\"unit\":\"celsius\"}", + "name": "get_boiling_point" + }, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514971, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-382", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null + } + ], + "created": 1759514971, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + } +} diff --git a/tests/integration/recordings/responses/ccdce45aab2c.json b/tests/integration/recordings/responses/ccdce45aab2c.json new file mode 100644 index 000000000..a4fb4025d --- /dev/null +++ b/tests/integration/recordings/responses/ccdce45aab2c.json @@ -0,0 +1,103 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "What is the boiling point of the liquid polyjuice in celsius?" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": "required", + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit." + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-421", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "index": 0, + "id": "call_4gduxvhb", + "function": { + "arguments": "{\"liquid\":\"polyjuice\",\"unit\":\"celsius\"}", + "name": "get_boiling_point" + }, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514981, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-421", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null + } + ], + "created": 1759514981, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + } +} diff --git a/tests/integration/recordings/responses/d9c3bf231670.json b/tests/integration/recordings/responses/d9c3bf231670.json new file mode 100644 index 000000000..939b3d976 --- /dev/null +++ b/tests/integration/recordings/responses/d9c3bf231670.json @@ -0,0 +1,932 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Say hi to the world. Use tools to do so." + }, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_bhtxlmzm", + "type": "function", + "function": { + "name": "greet_everyone", + "arguments": "{\"url\":\"world\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_bhtxlmzm", + "content": [ + { + "type": "text", + "text": "Hello, world!" + } + ] + } + ], + "max_tokens": 0, + "stream": true, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "greet_everyone", + "parameters": { + "properties": { + "url": { + "title": "Url", + "type": "string" + } + }, + "required": [ + "url" + ], + "title": "greet_everyoneArguments", + "type": "object" + } + } + }, + { + "type": "function", + "function": { + "name": "get_boiling_point", + "description": "\n Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n :param liquid_name: The name of the liquid\n :param celsius: Whether to return the boiling point in Celsius\n :return: The boiling point of the liquid in Celcius or Fahrenheit\n ", + "parameters": { + "properties": { + "liquid_name": { + "title": "Liquid Name", + "type": "string" + }, + "celsius": { + "default": true, + "title": "Celsius", + "type": "boolean" + } + }, + "required": [ + "liquid_name" + ], + "title": "get_boiling_pointArguments", + "type": "object" + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": "I", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515073, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": "'m", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515073, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " able", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515073, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515073, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " \"", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515073, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": "s", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515073, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": "peak", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515073, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": "\"", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515073, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515073, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515073, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " through", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515073, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " this", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515073, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " chat", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515073, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " platform", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515073, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": ",", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515073, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " hello", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515074, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": "!", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515074, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " Would", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515074, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " you", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515074, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " like", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515074, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " me", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515074, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515074, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " repeat", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515074, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " anything", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515074, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " or", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515074, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " provide", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515074, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " assistance", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515074, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " with", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515074, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " something", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515074, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": " else", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515074, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": "?", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515074, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-770", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1759515074, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + } +} diff --git a/tests/integration/recordings/responses/db9689e2cf53.json b/tests/integration/recordings/responses/db9689e2cf53.json new file mode 100644 index 000000000..7fccf8196 --- /dev/null +++ b/tests/integration/recordings/responses/db9689e2cf53.json @@ -0,0 +1,103 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "Call get_boiling_point_with_metadata tool and answer What is the boiling point of polyjuice?" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point_with_metadata", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit" + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-178", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "index": 0, + "id": "call_9vy3xwac", + "function": { + "arguments": "{}", + "name": "get_boiling_point_with_metadata" + }, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759515075, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-178", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null + } + ], + "created": 1759515075, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + } +} diff --git a/tests/integration/recordings/responses/eac12959a803.json b/tests/integration/recordings/responses/eac12959a803.json new file mode 100644 index 000000000..4d9c48d84 --- /dev/null +++ b/tests/integration/recordings/responses/eac12959a803.json @@ -0,0 +1,103 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "Call get_boiling_point tool and answer What is the boiling point of polyjuice?" + } + ], + "max_tokens": 512, + "stream": true, + "temperature": 0.0001, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_boiling_point", + "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit." + } + } + ], + "top_p": 0.9 + }, + "endpoint": "/v1/chat/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-367", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": [ + { + "index": 0, + "id": "call_swism1x1", + "function": { + "arguments": "{}", + "name": "get_boiling_point" + }, + "type": "function" + } + ] + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "chatcmpl-367", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": "tool_calls", + "index": 0, + "logprobs": null + } + ], + "created": 1759514987, + "model": "llama3.2:3b-instruct-fp16", + "object": "chat.completion.chunk", + "service_tier": null, + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + } +} diff --git a/tests/integration/recordings/responses/models-bd032f995f2a-cf0b7036.json b/tests/integration/recordings/responses/models-bd032f995f2a-cf0b7036.json new file mode 100644 index 000000000..8eb7ab105 --- /dev/null +++ b/tests/integration/recordings/responses/models-bd032f995f2a-cf0b7036.json @@ -0,0 +1,1500 @@ +{ + "request": { + "method": "POST", + "url": "https://integrate.api.nvidia.com/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "01-ai/yi-large", + "created": 735790403, + "object": "model", + "owned_by": "01-ai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "abacusai/dracarys-llama-3.1-70b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "abacusai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "adept/fuyu-8b", + "created": 735790403, + "object": "model", + "owned_by": "adept" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "ai21labs/jamba-1.5-large-instruct", + "created": 735790403, + "object": "model", + "owned_by": "ai21labs" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "ai21labs/jamba-1.5-mini-instruct", + "created": 735790403, + "object": "model", + "owned_by": "ai21labs" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "aisingapore/sea-lion-7b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "aisingapore" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "baai/bge-m3", + "created": 735790403, + "object": "model", + "owned_by": "baai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "baichuan-inc/baichuan2-13b-chat", + "created": 735790403, + "object": "model", + "owned_by": "baichuan-inc" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "bigcode/starcoder2-15b", + "created": 735790403, + "object": "model", + "owned_by": "bigcode" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "bigcode/starcoder2-7b", + "created": 735790403, + "object": "model", + "owned_by": "bigcode" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "bytedance/seed-oss-36b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "bytedance" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "databricks/dbrx-instruct", + "created": 735790403, + "object": "model", + "owned_by": "databricks" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "deepseek-ai/deepseek-coder-6.7b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "deepseek-ai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "deepseek-ai/deepseek-r1", + "created": 735790403, + "object": "model", + "owned_by": "deepseek-ai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "deepseek-ai/deepseek-r1-0528", + "created": 735790403, + "object": "model", + "owned_by": "deepseek-ai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "deepseek-ai/deepseek-r1-distill-llama-8b", + "created": 735790403, + "object": "model", + "owned_by": "deepseek-ai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "deepseek-ai/deepseek-r1-distill-qwen-14b", + "created": 735790403, + "object": "model", + "owned_by": "deepseek-ai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "deepseek-ai/deepseek-r1-distill-qwen-32b", + "created": 735790403, + "object": "model", + "owned_by": "deepseek-ai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "deepseek-ai/deepseek-r1-distill-qwen-7b", + "created": 735790403, + "object": "model", + "owned_by": "deepseek-ai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "deepseek-ai/deepseek-v3.1", + "created": 735790403, + "object": "model", + "owned_by": "deepseek-ai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "google/codegemma-1.1-7b", + "created": 735790403, + "object": "model", + "owned_by": "google" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "google/codegemma-7b", + "created": 735790403, + "object": "model", + "owned_by": "google" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "google/deplot", + "created": 735790403, + "object": "model", + "owned_by": "google" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "google/gemma-2-27b-it", + "created": 735790403, + "object": "model", + "owned_by": "google" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "google/gemma-2-2b-it", + "created": 735790403, + "object": "model", + "owned_by": "google" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "google/gemma-2-9b-it", + "created": 735790403, + "object": "model", + "owned_by": "google" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "google/gemma-2b", + "created": 735790403, + "object": "model", + "owned_by": "google" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "google/gemma-3-12b-it", + "created": 735790403, + "object": "model", + "owned_by": "google" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "google/gemma-3-1b-it", + "created": 735790403, + "object": "model", + "owned_by": "google" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "google/gemma-3-27b-it", + "created": 735790403, + "object": "model", + "owned_by": "google" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "google/gemma-3-4b-it", + "created": 735790403, + "object": "model", + "owned_by": "google" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "google/gemma-3n-e2b-it", + "created": 735790403, + "object": "model", + "owned_by": "google" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "google/gemma-3n-e4b-it", + "created": 735790403, + "object": "model", + "owned_by": "google" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "google/gemma-7b", + "created": 735790403, + "object": "model", + "owned_by": "google" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "google/paligemma", + "created": 735790403, + "object": "model", + "owned_by": "google" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "google/recurrentgemma-2b", + "created": 735790403, + "object": "model", + "owned_by": "google" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "google/shieldgemma-9b", + "created": 735790403, + "object": "model", + "owned_by": "google" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gotocompany/gemma-2-9b-cpt-sahabatai-instruct", + "created": 735790403, + "object": "model", + "owned_by": "gotocompany" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "ibm/granite-3.0-3b-a800m-instruct", + "created": 735790403, + "object": "model", + "owned_by": "ibm" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "ibm/granite-3.0-8b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "ibm" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "ibm/granite-3.3-8b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "ibm" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "ibm/granite-34b-code-instruct", + "created": 735790403, + "object": "model", + "owned_by": "ibm" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "ibm/granite-8b-code-instruct", + "created": 735790403, + "object": "model", + "owned_by": "ibm" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "ibm/granite-guardian-3.0-8b", + "created": 735790403, + "object": "model", + "owned_by": "ibm" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "igenius/colosseum_355b_instruct_16k", + "created": 735790403, + "object": "model", + "owned_by": "igenius" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "igenius/italia_10b_instruct_16k", + "created": 735790403, + "object": "model", + "owned_by": "igenius" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "institute-of-science-tokyo/llama-3.1-swallow-70b-instruct-v0.1", + "created": 735790403, + "object": "model", + "owned_by": "institute-of-science-tokyo" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "institute-of-science-tokyo/llama-3.1-swallow-8b-instruct-v0.1", + "created": 735790403, + "object": "model", + "owned_by": "institute-of-science-tokyo" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "marin/marin-8b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "marin" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "mediatek/breeze-7b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "mediatek" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "meta/codellama-70b", + "created": 735790403, + "object": "model", + "owned_by": "meta" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "meta/llama-3.1-405b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "meta" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "meta/llama-3.1-70b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "meta" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "meta/llama-3.1-8b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "meta" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "meta/llama-3.2-11b-vision-instruct", + "created": 735790403, + "object": "model", + "owned_by": "meta" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "meta/llama-3.2-1b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "meta" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "meta/llama-3.2-3b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "meta" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "meta/llama-3.2-90b-vision-instruct", + "created": 735790403, + "object": "model", + "owned_by": "meta" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "meta/llama-3.3-70b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "meta" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "meta/llama-4-maverick-17b-128e-instruct", + "created": 735790403, + "object": "model", + "owned_by": "meta" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "meta/llama-4-scout-17b-16e-instruct", + "created": 735790403, + "object": "model", + "owned_by": "meta" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "meta/llama-guard-4-12b", + "created": 735790403, + "object": "model", + "owned_by": "meta" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "meta/llama2-70b", + "created": 735790403, + "object": "model", + "owned_by": "meta" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "meta/llama3-70b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "meta" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "meta/llama3-8b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "meta" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "microsoft/kosmos-2", + "created": 735790403, + "object": "model", + "owned_by": "microsoft" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "microsoft/phi-3-medium-128k-instruct", + "created": 735790403, + "object": "model", + "owned_by": "microsoft" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "microsoft/phi-3-medium-4k-instruct", + "created": 735790403, + "object": "model", + "owned_by": "microsoft" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "microsoft/phi-3-mini-128k-instruct", + "created": 735790403, + "object": "model", + "owned_by": "microsoft" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "microsoft/phi-3-mini-4k-instruct", + "created": 735790403, + "object": "model", + "owned_by": "microsoft" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "microsoft/phi-3-small-128k-instruct", + "created": 735790403, + "object": "model", + "owned_by": "microsoft" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "microsoft/phi-3-small-8k-instruct", + "created": 735790403, + "object": "model", + "owned_by": "microsoft" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "microsoft/phi-3-vision-128k-instruct", + "created": 735790403, + "object": "model", + "owned_by": "microsoft" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "microsoft/phi-3.5-mini-instruct", + "created": 735790403, + "object": "model", + "owned_by": "microsoft" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "microsoft/phi-3.5-moe-instruct", + "created": 735790403, + "object": "model", + "owned_by": "microsoft" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "microsoft/phi-3.5-vision-instruct", + "created": 735790403, + "object": "model", + "owned_by": "microsoft" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "microsoft/phi-4-mini-flash-reasoning", + "created": 735790403, + "object": "model", + "owned_by": "microsoft" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "microsoft/phi-4-mini-instruct", + "created": 735790403, + "object": "model", + "owned_by": "microsoft" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "microsoft/phi-4-multimodal-instruct", + "created": 735790403, + "object": "model", + "owned_by": "microsoft" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "mistralai/codestral-22b-instruct-v0.1", + "created": 735790403, + "object": "model", + "owned_by": "mistralai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "mistralai/magistral-small-2506", + "created": 735790403, + "object": "model", + "owned_by": "mistralai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "mistralai/mamba-codestral-7b-v0.1", + "created": 735790403, + "object": "model", + "owned_by": "mistralai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "mistralai/mathstral-7b-v0.1", + "created": 735790403, + "object": "model", + "owned_by": "mistralai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "mistralai/mistral-7b-instruct-v0.2", + "created": 735790403, + "object": "model", + "owned_by": "mistralai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "mistralai/mistral-7b-instruct-v0.3", + "created": 735790403, + "object": "model", + "owned_by": "mistralai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "mistralai/mistral-large", + "created": 735790403, + "object": "model", + "owned_by": "mistralai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "mistralai/mistral-large-2-instruct", + "created": 735790403, + "object": "model", + "owned_by": "mistralai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "mistralai/mistral-medium-3-instruct", + "created": 735790403, + "object": "model", + "owned_by": "mistralai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "mistralai/mistral-nemotron", + "created": 735790403, + "object": "model", + "owned_by": "mistralai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "mistralai/mistral-small-24b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "mistralai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "mistralai/mistral-small-3.1-24b-instruct-2503", + "created": 735790403, + "object": "model", + "owned_by": "mistralai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "mistralai/mixtral-8x22b-instruct-v0.1", + "created": 735790403, + "object": "model", + "owned_by": "mistralai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "mistralai/mixtral-8x22b-v0.1", + "created": 735790403, + "object": "model", + "owned_by": "mistralai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "mistralai/mixtral-8x7b-instruct-v0.1", + "created": 735790403, + "object": "model", + "owned_by": "mistralai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "moonshotai/kimi-k2-instruct", + "created": 735790403, + "object": "model", + "owned_by": "moonshotai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "moonshotai/kimi-k2-instruct-0905", + "created": 735790403, + "object": "model", + "owned_by": "moonshotai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nv-mistralai/mistral-nemo-12b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "nv-mistralai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/embed-qa-4", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/llama-3.1-nemoguard-8b-content-safety", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/llama-3.1-nemoguard-8b-topic-control", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/llama-3.1-nemotron-51b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/llama-3.1-nemotron-70b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/llama-3.1-nemotron-70b-reward", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/llama-3.1-nemotron-nano-4b-v1.1", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/llama-3.1-nemotron-nano-8b-v1", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/llama-3.1-nemotron-nano-vl-8b-v1", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/llama-3.1-nemotron-ultra-253b-v1", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/llama-3.2-nemoretriever-300m-embed-v1", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/llama-3.2-nemoretriever-300m-embed-v2", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/llama-3.2-nv-embedqa-1b-v1", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/llama-3.2-nv-embedqa-1b-v2", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/llama-3.3-nemotron-super-49b-v1", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/llama-3.3-nemotron-super-49b-v1.5", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/llama3-chatqa-1.5-70b", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/llama3-chatqa-1.5-8b", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/mistral-nemo-minitron-8b-8k-instruct", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/mistral-nemo-minitron-8b-base", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/nemoretriever-parse", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/nemotron-4-340b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/nemotron-4-340b-reward", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/nemotron-4-mini-hindi-4b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/nemotron-mini-4b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/neva-22b", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/nv-embed-v1", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/nv-embedcode-7b-v1", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/nv-embedqa-e5-v5", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/nv-embedqa-mistral-7b-v2", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/nvclip", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/nvidia-nemotron-nano-9b-v2", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/riva-translate-4b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/usdcode-llama-3.1-70b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "nvidia/vila", + "created": 735790403, + "object": "model", + "owned_by": "nvidia" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "openai/gpt-oss-120b", + "created": 735790403, + "object": "model", + "owned_by": "openai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "openai/gpt-oss-120b", + "created": 735790403, + "object": "model", + "owned_by": "openai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "openai/gpt-oss-20b", + "created": 735790403, + "object": "model", + "owned_by": "openai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "openai/gpt-oss-20b", + "created": 735790403, + "object": "model", + "owned_by": "openai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "opengpt-x/teuken-7b-instruct-commercial-v0.4", + "created": 735790403, + "object": "model", + "owned_by": "opengpt-x" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "qwen/qwen2-7b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "qwen" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "qwen/qwen2.5-7b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "qwen" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "qwen/qwen2.5-coder-32b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "qwen" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "qwen/qwen2.5-coder-7b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "qwen" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "qwen/qwen3-235b-a22b", + "created": 735790403, + "object": "model", + "owned_by": "qwen" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "qwen/qwen3-coder-480b-a35b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "qwen" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "qwen/qwen3-next-80b-a3b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "qwen" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "qwen/qwen3-next-80b-a3b-thinking", + "created": 735790403, + "object": "model", + "owned_by": "qwen" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "qwen/qwq-32b", + "created": 735790403, + "object": "model", + "owned_by": "qwen" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "rakuten/rakutenai-7b-chat", + "created": 735790403, + "object": "model", + "owned_by": "rakuten" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "rakuten/rakutenai-7b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "rakuten" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "sarvamai/sarvam-m", + "created": 735790403, + "object": "model", + "owned_by": "sarvamai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "snowflake/arctic-embed-l", + "created": 735790403, + "object": "model", + "owned_by": "snowflake" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "speakleash/bielik-11b-v2.3-instruct", + "created": 735790403, + "object": "model", + "owned_by": "speakleash" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "speakleash/bielik-11b-v2.6-instruct", + "created": 735790403, + "object": "model", + "owned_by": "speakleash" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "stockmark/stockmark-2-100b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "stockmark" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "thudm/chatglm3-6b", + "created": 735790403, + "object": "model", + "owned_by": "thudm" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tiiuae/falcon3-7b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "tiiuae" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tokyotech-llm/llama-3-swallow-70b-instruct-v0.1", + "created": 735790403, + "object": "model", + "owned_by": "tokyotech-llm" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "upstage/solar-10.7b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "upstage" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "utter-project/eurollm-9b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "utter-project" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "writer/palmyra-creative-122b", + "created": 735790403, + "object": "model", + "owned_by": "writer" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "writer/palmyra-fin-70b-32k", + "created": 735790403, + "object": "model", + "owned_by": "writer" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "writer/palmyra-med-70b", + "created": 735790403, + "object": "model", + "owned_by": "writer" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "writer/palmyra-med-70b-32k", + "created": 735790403, + "object": "model", + "owned_by": "writer" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "yentinglin/llama-3-taiwan-70b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "yentinglin" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "zyphra/zamba2-7b-instruct", + "created": 735790403, + "object": "model", + "owned_by": "zyphra" + } + } + ], + "is_streaming": false + } +} diff --git a/tests/integration/suites.py b/tests/integration/suites.py index d8c283a0a..e82e766e3 100644 --- a/tests/integration/suites.py +++ b/tests/integration/suites.py @@ -131,6 +131,27 @@ SETUP_DEFINITIONS: dict[str, Setup] = { "embedding_model": "fireworks/accounts/fireworks/models/qwen3-embedding-8b", }, ), + "anthropic": Setup( + name="anthropic", + description="Anthropic Claude models", + defaults={ + "text_model": "anthropic/claude-3-5-haiku-20241022", + }, + ), + "llama-api": Setup( + name="llama-openai-compat", + description="Llama models from https://api.llama.com", + defaults={ + "text_model": "llama_openai_compat/Llama-3.3-8B-Instruct", + }, + ), + "groq": Setup( + name="groq", + description="Groq models", + defaults={ + "text_model": "groq/llama-3.3-70b-versatile", + }, + ), } diff --git a/tests/unit/providers/inference/test_inference_client_caching.py b/tests/unit/providers/inference/test_inference_client_caching.py index f4b3201e9..d30b5b12a 100644 --- a/tests/unit/providers/inference/test_inference_client_caching.py +++ b/tests/unit/providers/inference/test_inference_client_caching.py @@ -7,6 +7,8 @@ import json from unittest.mock import MagicMock +import pytest + from llama_stack.core.request_headers import request_provider_data_context from llama_stack.providers.remote.inference.groq.config import GroqConfig from llama_stack.providers.remote.inference.groq.groq import GroqInferenceAdapter @@ -18,72 +20,41 @@ from llama_stack.providers.remote.inference.together.config import TogetherImplC from llama_stack.providers.remote.inference.together.together import TogetherInferenceAdapter -def test_groq_provider_openai_client_caching(): - """Ensure the Groq provider does not cache api keys across client requests""" - - config = GroqConfig() - inference_adapter = GroqInferenceAdapter(config) - - inference_adapter.__provider_spec__ = MagicMock() - inference_adapter.__provider_spec__.provider_data_validator = ( - "llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator" - ) - - for api_key in ["test1", "test2"]: - with request_provider_data_context( - {"x-llamastack-provider-data": json.dumps({inference_adapter.provider_data_api_key_field: api_key})} - ): - assert inference_adapter.client.api_key == api_key - - -def test_openai_provider_openai_client_caching(): +@pytest.mark.parametrize( + "config_cls,adapter_cls,provider_data_validator", + [ + ( + GroqConfig, + GroqInferenceAdapter, + "llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator", + ), + ( + OpenAIConfig, + OpenAIInferenceAdapter, + "llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator", + ), + ( + TogetherImplConfig, + TogetherInferenceAdapter, + "llama_stack.providers.remote.inference.together.TogetherProviderDataValidator", + ), + ( + LlamaCompatConfig, + LlamaCompatInferenceAdapter, + "llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator", + ), + ], +) +def test_openai_provider_data_used(config_cls, adapter_cls, provider_data_validator: str): """Ensure the OpenAI provider does not cache api keys across client requests""" - config = OpenAIConfig() - inference_adapter = OpenAIInferenceAdapter(config) + inference_adapter = adapter_cls(config=config_cls()) inference_adapter.__provider_spec__ = MagicMock() - inference_adapter.__provider_spec__.provider_data_validator = ( - "llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator" - ) + inference_adapter.__provider_spec__.provider_data_validator = provider_data_validator for api_key in ["test1", "test2"]: with request_provider_data_context( {"x-llamastack-provider-data": json.dumps({inference_adapter.provider_data_api_key_field: api_key})} ): - openai_client = inference_adapter.client - assert openai_client.api_key == api_key - - -def test_together_provider_openai_client_caching(): - """Ensure the Together provider does not cache api keys across client requests""" - - config = TogetherImplConfig() - inference_adapter = TogetherInferenceAdapter(config) - - inference_adapter.__provider_spec__ = MagicMock() - inference_adapter.__provider_spec__.provider_data_validator = ( - "llama_stack.providers.remote.inference.together.TogetherProviderDataValidator" - ) - - for api_key in ["test1", "test2"]: - with request_provider_data_context({"x-llamastack-provider-data": json.dumps({"together_api_key": api_key})}): - together_client = inference_adapter._get_client() - assert together_client.client.api_key == api_key - openai_client = inference_adapter._get_openai_client() - assert openai_client.api_key == api_key - - -def test_llama_compat_provider_openai_client_caching(): - """Ensure the LlamaCompat provider does not cache api keys across client requests""" - config = LlamaCompatConfig() - inference_adapter = LlamaCompatInferenceAdapter(config) - - inference_adapter.__provider_spec__ = MagicMock() - inference_adapter.__provider_spec__.provider_data_validator = ( - "llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator" - ) - - for api_key in ["test1", "test2"]: - with request_provider_data_context({"x-llamastack-provider-data": json.dumps({"llama_api_key": api_key})}): assert inference_adapter.client.api_key == api_key diff --git a/tests/unit/providers/inference/test_openai_base_url_config.py b/tests/unit/providers/inference/test_openai_base_url_config.py index 7c5a5b327..039c3cecd 100644 --- a/tests/unit/providers/inference/test_openai_base_url_config.py +++ b/tests/unit/providers/inference/test_openai_base_url_config.py @@ -18,7 +18,7 @@ class TestOpenAIBaseURLConfig: def test_default_base_url_without_env_var(self): """Test that the adapter uses the default OpenAI base URL when no environment variable is set.""" config = OpenAIConfig(api_key="test-key") - adapter = OpenAIInferenceAdapter(config) + adapter = OpenAIInferenceAdapter(config=config) adapter.provider_data_api_key_field = None # Disable provider data for this test assert adapter.get_base_url() == "https://api.openai.com/v1" @@ -27,7 +27,7 @@ class TestOpenAIBaseURLConfig: """Test that the adapter uses a custom base URL when provided in config.""" custom_url = "https://custom.openai.com/v1" config = OpenAIConfig(api_key="test-key", base_url=custom_url) - adapter = OpenAIInferenceAdapter(config) + adapter = OpenAIInferenceAdapter(config=config) adapter.provider_data_api_key_field = None # Disable provider data for this test assert adapter.get_base_url() == custom_url @@ -39,7 +39,7 @@ class TestOpenAIBaseURLConfig: config_data = OpenAIConfig.sample_run_config(api_key="test-key") processed_config = replace_env_vars(config_data) config = OpenAIConfig.model_validate(processed_config) - adapter = OpenAIInferenceAdapter(config) + adapter = OpenAIInferenceAdapter(config=config) adapter.provider_data_api_key_field = None # Disable provider data for this test assert adapter.get_base_url() == "https://env.openai.com/v1" @@ -49,7 +49,7 @@ class TestOpenAIBaseURLConfig: """Test that explicit config value overrides environment variable.""" custom_url = "https://config.openai.com/v1" config = OpenAIConfig(api_key="test-key", base_url=custom_url) - adapter = OpenAIInferenceAdapter(config) + adapter = OpenAIInferenceAdapter(config=config) adapter.provider_data_api_key_field = None # Disable provider data for this test # Config should take precedence over environment variable @@ -60,7 +60,7 @@ class TestOpenAIBaseURLConfig: """Test that the OpenAI client is initialized with the configured base URL.""" custom_url = "https://test.openai.com/v1" config = OpenAIConfig(api_key="test-key", base_url=custom_url) - adapter = OpenAIInferenceAdapter(config) + adapter = OpenAIInferenceAdapter(config=config) adapter.provider_data_api_key_field = None # Disable provider data for this test # Mock the get_api_key method since it's delegated to LiteLLMOpenAIMixin @@ -80,7 +80,7 @@ class TestOpenAIBaseURLConfig: """Test that check_model_availability uses the configured base URL.""" custom_url = "https://test.openai.com/v1" config = OpenAIConfig(api_key="test-key", base_url=custom_url) - adapter = OpenAIInferenceAdapter(config) + adapter = OpenAIInferenceAdapter(config=config) adapter.provider_data_api_key_field = None # Disable provider data for this test # Mock the get_api_key method @@ -122,7 +122,7 @@ class TestOpenAIBaseURLConfig: config_data = OpenAIConfig.sample_run_config(api_key="test-key") processed_config = replace_env_vars(config_data) config = OpenAIConfig.model_validate(processed_config) - adapter = OpenAIInferenceAdapter(config) + adapter = OpenAIInferenceAdapter(config=config) adapter.provider_data_api_key_field = None # Disable provider data for this test # Mock the get_api_key method diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py index cd31e4943..2806f618c 100644 --- a/tests/unit/providers/inference/test_remote_vllm.py +++ b/tests/unit/providers/inference/test_remote_vllm.py @@ -5,45 +5,21 @@ # the root directory of this source tree. import asyncio -import json import time from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch import pytest -from openai.types.chat.chat_completion_chunk import ( - ChatCompletionChunk as OpenAIChatCompletionChunk, -) -from openai.types.chat.chat_completion_chunk import ( - Choice as OpenAIChoiceChunk, -) -from openai.types.chat.chat_completion_chunk import ( - ChoiceDelta as OpenAIChoiceDelta, -) -from openai.types.chat.chat_completion_chunk import ( - ChoiceDeltaToolCall as OpenAIChoiceDeltaToolCall, -) -from openai.types.chat.chat_completion_chunk import ( - ChoiceDeltaToolCallFunction as OpenAIChoiceDeltaToolCallFunction, -) -from openai.types.model import Model as OpenAIModel from llama_stack.apis.inference import ( - ChatCompletionRequest, - ChatCompletionResponseEventType, OpenAIAssistantMessageParam, OpenAIChatCompletion, OpenAIChoice, ToolChoice, - UserMessage, ) from llama_stack.apis.models import Model -from llama_stack.models.llama.datatypes import StopReason from llama_stack.providers.datatypes import HealthStatus from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig -from llama_stack.providers.remote.inference.vllm.vllm import ( - VLLMInferenceAdapter, - _process_vllm_chat_completion_stream_response, -) +from llama_stack.providers.remote.inference.vllm.vllm import VLLMInferenceAdapter # These are unit test for the remote vllm provider # implementation. This should only contain tests which are specific to @@ -56,37 +32,15 @@ from llama_stack.providers.remote.inference.vllm.vllm import ( # -v -s --tb=short --disable-warnings -@pytest.fixture(scope="module") -def mock_openai_models_list(): - with patch("openai.resources.models.AsyncModels.list") as mock_list: - yield mock_list - - @pytest.fixture(scope="function") async def vllm_inference_adapter(): config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345") - inference_adapter = VLLMInferenceAdapter(config) + inference_adapter = VLLMInferenceAdapter(config=config) inference_adapter.model_store = AsyncMock() - # Mock the __provider_spec__ attribute that would normally be set by the resolver - inference_adapter.__provider_spec__ = MagicMock() - inference_adapter.__provider_spec__.provider_type = "vllm-inference" - inference_adapter.__provider_spec__.provider_data_validator = MagicMock() await inference_adapter.initialize() return inference_adapter -async def test_register_model_checks_vllm(mock_openai_models_list, vllm_inference_adapter): - async def mock_openai_models(): - yield OpenAIModel(id="foo", created=1, object="model", owned_by="test") - - mock_openai_models_list.return_value = mock_openai_models() - - foo_model = Model(identifier="foo", provider_resource_id="foo", provider_id="vllm-inference") - - await vllm_inference_adapter.register_model(foo_model) - mock_openai_models_list.assert_called() - - async def test_old_vllm_tool_choice(vllm_inference_adapter): """ Test that we set tool_choice to none when no tools are in use @@ -115,403 +69,6 @@ async def test_old_vllm_tool_choice(vllm_inference_adapter): assert call_args.kwargs["tool_choice"] == ToolChoice.none.value -async def test_tool_call_delta_empty_tool_call_buf(): - """ - Test that we don't generate extra chunks when processing a - tool call response that didn't call any tools. Previously we would - emit chunks with spurious ToolCallParseStatus.succeeded or - ToolCallParseStatus.failed when processing chunks that didn't - actually make any tool calls. - """ - - async def mock_stream(): - delta = OpenAIChoiceDelta(content="", tool_calls=None) - choices = [OpenAIChoiceChunk(delta=delta, finish_reason="stop", index=0)] - mock_chunk = OpenAIChatCompletionChunk( - id="chunk-1", - created=1, - model="foo", - object="chat.completion.chunk", - choices=choices, - ) - for chunk in [mock_chunk]: - yield chunk - - chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())] - assert len(chunks) == 2 - assert chunks[0].event.event_type.value == "start" - assert chunks[1].event.event_type.value == "complete" - assert chunks[1].event.stop_reason == StopReason.end_of_turn - - -async def test_tool_call_delta_streaming_arguments_dict(): - async def mock_stream(): - mock_chunk_1 = OpenAIChatCompletionChunk( - id="chunk-1", - created=1, - model="foo", - object="chat.completion.chunk", - choices=[ - OpenAIChoiceChunk( - delta=OpenAIChoiceDelta( - content="", - tool_calls=[ - OpenAIChoiceDeltaToolCall( - id="tc_1", - index=1, - function=OpenAIChoiceDeltaToolCallFunction( - name="power", - arguments="", - ), - ) - ], - ), - finish_reason=None, - index=0, - ) - ], - ) - mock_chunk_2 = OpenAIChatCompletionChunk( - id="chunk-2", - created=1, - model="foo", - object="chat.completion.chunk", - choices=[ - OpenAIChoiceChunk( - delta=OpenAIChoiceDelta( - content="", - tool_calls=[ - OpenAIChoiceDeltaToolCall( - id="tc_1", - index=1, - function=OpenAIChoiceDeltaToolCallFunction( - name="power", - arguments='{"number": 28, "power": 3}', - ), - ) - ], - ), - finish_reason=None, - index=0, - ) - ], - ) - mock_chunk_3 = OpenAIChatCompletionChunk( - id="chunk-3", - created=1, - model="foo", - object="chat.completion.chunk", - choices=[ - OpenAIChoiceChunk( - delta=OpenAIChoiceDelta(content="", tool_calls=None), finish_reason="tool_calls", index=0 - ) - ], - ) - for chunk in [mock_chunk_1, mock_chunk_2, mock_chunk_3]: - yield chunk - - chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())] - assert len(chunks) == 3 - assert chunks[0].event.event_type.value == "start" - assert chunks[1].event.event_type.value == "progress" - assert chunks[1].event.delta.type == "tool_call" - assert chunks[1].event.delta.parse_status.value == "succeeded" - assert chunks[1].event.delta.tool_call.arguments == '{"number": 28, "power": 3}' - assert chunks[2].event.event_type.value == "complete" - - -async def test_multiple_tool_calls(): - async def mock_stream(): - mock_chunk_1 = OpenAIChatCompletionChunk( - id="chunk-1", - created=1, - model="foo", - object="chat.completion.chunk", - choices=[ - OpenAIChoiceChunk( - delta=OpenAIChoiceDelta( - content="", - tool_calls=[ - OpenAIChoiceDeltaToolCall( - id="", - index=1, - function=OpenAIChoiceDeltaToolCallFunction( - name="power", - arguments='{"number": 28, "power": 3}', - ), - ), - ], - ), - finish_reason=None, - index=0, - ) - ], - ) - mock_chunk_2 = OpenAIChatCompletionChunk( - id="chunk-2", - created=1, - model="foo", - object="chat.completion.chunk", - choices=[ - OpenAIChoiceChunk( - delta=OpenAIChoiceDelta( - content="", - tool_calls=[ - OpenAIChoiceDeltaToolCall( - id="", - index=2, - function=OpenAIChoiceDeltaToolCallFunction( - name="multiple", - arguments='{"first_number": 4, "second_number": 7}', - ), - ), - ], - ), - finish_reason=None, - index=0, - ) - ], - ) - mock_chunk_3 = OpenAIChatCompletionChunk( - id="chunk-3", - created=1, - model="foo", - object="chat.completion.chunk", - choices=[ - OpenAIChoiceChunk( - delta=OpenAIChoiceDelta(content="", tool_calls=None), finish_reason="tool_calls", index=0 - ) - ], - ) - for chunk in [mock_chunk_1, mock_chunk_2, mock_chunk_3]: - yield chunk - - chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())] - assert len(chunks) == 4 - assert chunks[0].event.event_type.value == "start" - assert chunks[1].event.event_type.value == "progress" - assert chunks[1].event.delta.type == "tool_call" - assert chunks[1].event.delta.parse_status.value == "succeeded" - assert chunks[1].event.delta.tool_call.arguments == '{"number": 28, "power": 3}' - assert chunks[2].event.event_type.value == "progress" - assert chunks[2].event.delta.type == "tool_call" - assert chunks[2].event.delta.parse_status.value == "succeeded" - assert chunks[2].event.delta.tool_call.arguments == '{"first_number": 4, "second_number": 7}' - assert chunks[3].event.event_type.value == "complete" - - -async def test_process_vllm_chat_completion_stream_response_no_choices(): - """ - Test that we don't error out when vLLM returns no choices for a - completion request. This can happen when there's an error thrown - in vLLM for example. - """ - - async def mock_stream(): - choices = [] - mock_chunk = OpenAIChatCompletionChunk( - id="chunk-1", - created=1, - model="foo", - object="chat.completion.chunk", - choices=choices, - ) - for chunk in [mock_chunk]: - yield chunk - - chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())] - assert len(chunks) == 1 - assert chunks[0].event.event_type.value == "start" - - -async def test_get_params_empty_tools(vllm_inference_adapter): - request = ChatCompletionRequest( - tools=[], - model="test_model", - messages=[UserMessage(content="test")], - ) - params = await vllm_inference_adapter._get_params(request) - assert "tools" not in params - - -async def test_process_vllm_chat_completion_stream_response_tool_call_args_last_chunk(): - """ - Tests the edge case where the model returns the arguments for the tool call in the same chunk that - contains the finish reason (i.e., the last one). - We want to make sure the tool call is executed in this case, and the parameters are passed correctly. - """ - - mock_tool_name = "mock_tool" - mock_tool_arguments = {"arg1": 0, "arg2": 100} - mock_tool_arguments_str = json.dumps(mock_tool_arguments) - - async def mock_stream(): - mock_chunks = [ - OpenAIChatCompletionChunk( - id="chunk-1", - created=1, - model="foo", - object="chat.completion.chunk", - choices=[ - { - "delta": { - "content": None, - "tool_calls": [ - { - "index": 0, - "id": "mock_id", - "type": "function", - "function": { - "name": mock_tool_name, - "arguments": None, - }, - } - ], - }, - "finish_reason": None, - "logprobs": None, - "index": 0, - } - ], - ), - OpenAIChatCompletionChunk( - id="chunk-1", - created=1, - model="foo", - object="chat.completion.chunk", - choices=[ - { - "delta": { - "content": None, - "tool_calls": [ - { - "index": 0, - "id": None, - "function": { - "name": None, - "arguments": mock_tool_arguments_str, - }, - } - ], - }, - "finish_reason": "tool_calls", - "logprobs": None, - "index": 0, - } - ], - ), - ] - for chunk in mock_chunks: - yield chunk - - chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())] - assert len(chunks) == 3 - assert chunks[-1].event.event_type == ChatCompletionResponseEventType.complete - assert chunks[-2].event.delta.type == "tool_call" - assert chunks[-2].event.delta.tool_call.tool_name == mock_tool_name - assert chunks[-2].event.delta.tool_call.arguments == mock_tool_arguments_str - - -async def test_process_vllm_chat_completion_stream_response_no_finish_reason(): - """ - Tests the edge case where the model requests a tool call and stays idle without explicitly providing the - finish reason. - We want to make sure that this case is recognized and handled correctly, i.e., as a valid end of message. - """ - - mock_tool_name = "mock_tool" - mock_tool_arguments = {"arg1": 0, "arg2": 100} - mock_tool_arguments_str = json.dumps(mock_tool_arguments) - - async def mock_stream(): - mock_chunks = [ - OpenAIChatCompletionChunk( - id="chunk-1", - created=1, - model="foo", - object="chat.completion.chunk", - choices=[ - { - "delta": { - "content": None, - "tool_calls": [ - { - "index": 0, - "id": "mock_id", - "type": "function", - "function": { - "name": mock_tool_name, - "arguments": mock_tool_arguments_str, - }, - } - ], - }, - "finish_reason": None, - "logprobs": None, - "index": 0, - } - ], - ), - ] - for chunk in mock_chunks: - yield chunk - - chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())] - assert len(chunks) == 3 - assert chunks[-1].event.event_type == ChatCompletionResponseEventType.complete - assert chunks[-2].event.delta.type == "tool_call" - assert chunks[-2].event.delta.tool_call.tool_name == mock_tool_name - assert chunks[-2].event.delta.tool_call.arguments == mock_tool_arguments_str - - -async def test_process_vllm_chat_completion_stream_response_tool_without_args(): - """ - Tests the edge case where no arguments are provided for the tool call. - Tool calls with no arguments should be treated as regular tool calls, which was not the case until now. - """ - mock_tool_name = "mock_tool" - - async def mock_stream(): - mock_chunks = [ - OpenAIChatCompletionChunk( - id="chunk-1", - created=1, - model="foo", - object="chat.completion.chunk", - choices=[ - { - "delta": { - "content": None, - "tool_calls": [ - { - "index": 0, - "id": "mock_id", - "type": "function", - "function": { - "name": mock_tool_name, - "arguments": "", - }, - } - ], - }, - "finish_reason": None, - "logprobs": None, - "index": 0, - } - ], - ), - ] - for chunk in mock_chunks: - yield chunk - - chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())] - assert len(chunks) == 3 - assert chunks[-1].event.event_type == ChatCompletionResponseEventType.complete - assert chunks[-2].event.delta.type == "tool_call" - assert chunks[-2].event.delta.tool_call.tool_name == mock_tool_name - assert chunks[-2].event.delta.tool_call.arguments == "{}" - - async def test_health_status_success(vllm_inference_adapter): """ Test the health method of VLLM InferenceAdapter when the connection is successful. @@ -642,94 +199,30 @@ async def test_should_refresh_models(): # Test case 1: refresh_models is True, api_token is None config1 = VLLMInferenceAdapterConfig(url="http://test.localhost", api_token=None, refresh_models=True) - adapter1 = VLLMInferenceAdapter(config1) + adapter1 = VLLMInferenceAdapter(config=config1) result1 = await adapter1.should_refresh_models() assert result1 is True, "should_refresh_models should return True when refresh_models is True" # Test case 2: refresh_models is True, api_token is empty string config2 = VLLMInferenceAdapterConfig(url="http://test.localhost", api_token="", refresh_models=True) - adapter2 = VLLMInferenceAdapter(config2) + adapter2 = VLLMInferenceAdapter(config=config2) result2 = await adapter2.should_refresh_models() assert result2 is True, "should_refresh_models should return True when refresh_models is True" # Test case 3: refresh_models is True, api_token is "fake" (default) config3 = VLLMInferenceAdapterConfig(url="http://test.localhost", api_token="fake", refresh_models=True) - adapter3 = VLLMInferenceAdapter(config3) + adapter3 = VLLMInferenceAdapter(config=config3) result3 = await adapter3.should_refresh_models() assert result3 is True, "should_refresh_models should return True when refresh_models is True" # Test case 4: refresh_models is True, api_token is real token config4 = VLLMInferenceAdapterConfig(url="http://test.localhost", api_token="real-token-123", refresh_models=True) - adapter4 = VLLMInferenceAdapter(config4) + adapter4 = VLLMInferenceAdapter(config=config4) result4 = await adapter4.should_refresh_models() assert result4 is True, "should_refresh_models should return True when refresh_models is True" # Test case 5: refresh_models is False, api_token is real token config5 = VLLMInferenceAdapterConfig(url="http://test.localhost", api_token="real-token-456", refresh_models=False) - adapter5 = VLLMInferenceAdapter(config5) + adapter5 = VLLMInferenceAdapter(config=config5) result5 = await adapter5.should_refresh_models() assert result5 is False, "should_refresh_models should return False when refresh_models is False" - - -async def test_provider_data_var_context_propagation(vllm_inference_adapter): - """ - Test that PROVIDER_DATA_VAR context is properly propagated through the vLLM inference adapter. - This ensures that dynamic provider data (like API tokens) can be passed through context. - Note: The base URL is always taken from config.url, not from provider data. - """ - # Mock the AsyncOpenAI class to capture provider data - with ( - patch("llama_stack.providers.utils.inference.openai_mixin.AsyncOpenAI") as mock_openai_class, - patch.object(vllm_inference_adapter, "get_request_provider_data") as mock_get_provider_data, - ): - mock_client = AsyncMock() - mock_client.chat.completions.create = AsyncMock() - mock_openai_class.return_value = mock_client - - # Mock provider data to return test data - mock_provider_data = MagicMock() - mock_provider_data.vllm_api_token = "test-token-123" - mock_provider_data.vllm_url = "http://test-server:8000/v1" - mock_get_provider_data.return_value = mock_provider_data - - # Mock the model - mock_model = Model(identifier="test-model", provider_resource_id="test-model", provider_id="vllm-inference") - vllm_inference_adapter.model_store.get_model.return_value = mock_model - - try: - # Execute chat completion - await vllm_inference_adapter.openai_chat_completion( - model="test-model", - messages=[UserMessage(content="Hello")], - stream=False, - ) - - # Verify that ALL client calls were made with the correct parameters - calls = mock_openai_class.call_args_list - incorrect_calls = [] - - for i, call in enumerate(calls): - api_key = call[1]["api_key"] - base_url = call[1]["base_url"] - - if api_key != "test-token-123" or base_url != "http://mocked.localhost:12345": - incorrect_calls.append({"call_index": i, "api_key": api_key, "base_url": base_url}) - - if incorrect_calls: - error_msg = ( - f"Found {len(incorrect_calls)} calls with incorrect parameters out of {len(calls)} total calls:\n" - ) - for incorrect_call in incorrect_calls: - error_msg += f" Call {incorrect_call['call_index']}: api_key='{incorrect_call['api_key']}', base_url='{incorrect_call['base_url']}'\n" - error_msg += "Expected: api_key='test-token-123', base_url='http://mocked.localhost:12345'" - raise AssertionError(error_msg) - - # Ensure at least one call was made - assert len(calls) >= 1, "No AsyncOpenAI client calls were made" - - # Verify that chat completion was called - mock_client.chat.completions.create.assert_called_once() - - finally: - # Clean up context - pass diff --git a/tests/unit/providers/utils/inference/test_openai_mixin.py b/tests/unit/providers/utils/inference/test_openai_mixin.py index 4856f510b..266c15f81 100644 --- a/tests/unit/providers/utils/inference/test_openai_mixin.py +++ b/tests/unit/providers/utils/inference/test_openai_mixin.py @@ -5,6 +5,7 @@ # the root directory of this source tree. import json +from collections.abc import Iterable from unittest.mock import AsyncMock, MagicMock, Mock, PropertyMock, patch import pytest @@ -13,6 +14,7 @@ from pydantic import BaseModel, Field from llama_stack.apis.inference import Model, OpenAIUserMessageParam from llama_stack.apis.models import ModelType from llama_stack.core.request_headers import request_provider_data_context +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin @@ -29,7 +31,7 @@ class OpenAIMixinImpl(OpenAIMixin): class OpenAIMixinWithEmbeddingsImpl(OpenAIMixinImpl): """Test implementation with embedding model metadata""" - embedding_model_metadata = { + embedding_model_metadata: dict[str, dict[str, int]] = { "text-embedding-3-small": {"embedding_dimension": 1536, "context_length": 8192}, "text-embedding-ada-002": {"embedding_dimension": 1536, "context_length": 8192}, } @@ -38,7 +40,8 @@ class OpenAIMixinWithEmbeddingsImpl(OpenAIMixinImpl): @pytest.fixture def mixin(): """Create a test instance of OpenAIMixin with mocked model_store""" - mixin_instance = OpenAIMixinImpl() + config = RemoteInferenceProviderConfig() + mixin_instance = OpenAIMixinImpl(config=config) # just enough to satisfy _get_provider_model_id calls mock_model_store = MagicMock() @@ -53,7 +56,8 @@ def mixin(): @pytest.fixture def mixin_with_embeddings(): """Create a test instance of OpenAIMixin with embedding model metadata""" - return OpenAIMixinWithEmbeddingsImpl() + config = RemoteInferenceProviderConfig() + return OpenAIMixinWithEmbeddingsImpl(config=config) @pytest.fixture @@ -498,13 +502,296 @@ class OpenAIMixinWithProviderData(OpenAIMixinImpl): return "default-base-url" +class OpenAIMixinWithCustomGetModels(OpenAIMixinImpl): + """Test implementation with custom get_models override""" + + def __init__(self, config, custom_model_ids): + super().__init__(config=config) + self._custom_model_ids = custom_model_ids + + async def get_models(self) -> Iterable[str] | None: + """Return custom model IDs list""" + return self._custom_model_ids + + +class TestOpenAIMixinCustomGetModels: + """Test cases for custom get_models() implementation functionality""" + + @pytest.fixture + def custom_model_ids_list(self): + """Create a list of custom model ID strings""" + return ["custom-model-1", "custom-model-2", "custom-embedding"] + + @pytest.fixture + def mixin_with_custom_get_models(self, custom_model_ids_list): + """Create mixin instance with custom get_models implementation""" + config = RemoteInferenceProviderConfig() + mixin = OpenAIMixinWithCustomGetModels(config=config, custom_model_ids=custom_model_ids_list) + # Add embedding metadata to test that feature still works + mixin.embedding_model_metadata = {"custom-embedding": {"embedding_dimension": 768, "context_length": 512}} + return mixin + + async def test_custom_get_models_is_used(self, mixin_with_custom_get_models, custom_model_ids_list): + """Test that custom get_models() implementation is used instead of client.models.list()""" + result = await mixin_with_custom_get_models.list_models() + + assert result is not None + assert len(result) == 3 + + # Verify all custom models are present + identifiers = {m.identifier for m in result} + assert "custom-model-1" in identifiers + assert "custom-model-2" in identifiers + assert "custom-embedding" in identifiers + + async def test_custom_get_models_populates_cache(self, mixin_with_custom_get_models): + """Test that custom get_models() results are cached""" + assert len(mixin_with_custom_get_models._model_cache) == 0 + + await mixin_with_custom_get_models.list_models() + + assert len(mixin_with_custom_get_models._model_cache) == 3 + assert "custom-model-1" in mixin_with_custom_get_models._model_cache + assert "custom-model-2" in mixin_with_custom_get_models._model_cache + assert "custom-embedding" in mixin_with_custom_get_models._model_cache + + async def test_custom_get_models_respects_allowed_models(self): + """Test that custom get_models() respects allowed_models filtering""" + config = RemoteInferenceProviderConfig() + mixin = OpenAIMixinWithCustomGetModels(config=config, custom_model_ids=["model-1", "model-2", "model-3"]) + mixin.allowed_models = ["model-1"] + + result = await mixin.list_models() + + assert result is not None + assert len(result) == 1 + assert result[0].identifier == "model-1" + + async def test_custom_get_models_with_embedding_metadata(self, mixin_with_custom_get_models): + """Test that custom get_models() works with embedding_model_metadata""" + result = await mixin_with_custom_get_models.list_models() + + # Find the embedding model + embedding_model = next((m for m in result if m.identifier == "custom-embedding"), None) + assert embedding_model is not None + assert embedding_model.model_type == ModelType.embedding + assert embedding_model.metadata == {"embedding_dimension": 768, "context_length": 512} + + # Verify LLM models + llm_models = [m for m in result if m.model_type == ModelType.llm] + assert len(llm_models) == 2 + + async def test_custom_get_models_with_empty_list(self, mock_client_with_empty_models, mock_client_context): + """Test that custom get_models() handles empty list correctly""" + config = RemoteInferenceProviderConfig() + mixin = OpenAIMixinWithCustomGetModels(config=config, custom_model_ids=[]) + + # Empty list from get_models() falls back to client.models.list() + with mock_client_context(mixin, mock_client_with_empty_models): + result = await mixin.list_models() + + assert result is not None + assert len(result) == 0 + assert len(mixin._model_cache) == 0 + + async def test_default_get_models_returns_none(self, mixin): + """Test that default get_models() implementation returns None""" + custom_models = await mixin.get_models() + assert custom_models is None + + async def test_fallback_to_client_when_get_models_returns_none( + self, mixin, mock_client_with_models, mock_client_context + ): + """Test that when get_models() returns None, falls back to client.models.list()""" + # Default get_models() returns None, so should use client + with mock_client_context(mixin, mock_client_with_models): + result = await mixin.list_models() + + assert result is not None + assert len(result) == 3 + mock_client_with_models.models.list.assert_called_once() + + async def test_custom_get_models_creates_proper_model_objects(self): + """Test that custom get_models() model IDs are converted to proper Model objects""" + config = RemoteInferenceProviderConfig() + model_ids = ["gpt-4", "gpt-3.5-turbo"] + mixin = OpenAIMixinWithCustomGetModels(config=config, custom_model_ids=model_ids) + + result = await mixin.list_models() + + assert result is not None + assert len(result) == 2 + + for model in result: + assert isinstance(model, Model) + assert model.provider_id == "test-provider" + assert model.identifier in model_ids + assert model.provider_resource_id in model_ids + assert model.model_type == ModelType.llm + + async def test_custom_get_models_bypasses_client(self, mock_client_context): + """Test that providing get_models() means client.models.list() is NOT called""" + config = RemoteInferenceProviderConfig() + mixin = OpenAIMixinWithCustomGetModels(config=config, custom_model_ids=["model-1", "model-2"]) + + # Create a mock client that should NOT be called + mock_client = MagicMock() + mock_client.models.list = MagicMock(side_effect=AssertionError("client.models.list should not be called!")) + + with mock_client_context(mixin, mock_client): + result = await mixin.list_models() + + # Should succeed without calling client.models.list + assert result is not None + assert len(result) == 2 + mock_client.models.list.assert_not_called() + + async def test_get_models_wrong_type_raises_error(self): + """Test that get_models() returning non-string items results in an error""" + + class BadGetModelsAdapter(OpenAIMixinImpl): + async def get_models(self) -> Iterable[str] | None: + # Return list with non-string items + return [["nested", "list"], {"key": "value"}] # type: ignore + + config = RemoteInferenceProviderConfig() + mixin = BadGetModelsAdapter(config=config) + + # Should raise ValueError for non-string model ID + with pytest.raises(ValueError, match="Model ID .* from get_models\\(\\) is not a string"): + await mixin.list_models() + + async def test_get_models_non_iterable_raises_error(self): + """Test that get_models() returning non-iterable type raises error""" + + class NonIterableGetModelsAdapter(OpenAIMixinImpl): + async def get_models(self) -> Iterable[str] | None: + # Return non-iterable type + return 42 # type: ignore + + config = RemoteInferenceProviderConfig() + mixin = NonIterableGetModelsAdapter(config=config) + + # Should raise TypeError when trying to convert to list + with pytest.raises(TypeError, match="'int' object is not iterable"): + await mixin.list_models() + + async def test_get_models_with_none_items_raises_error(self): + """Test that get_models() returning list with None items causes error""" + + class NoneItemsAdapter(OpenAIMixinImpl): + async def get_models(self) -> Iterable[str] | None: + # Return list with None items + return [None, "valid-model", None] # type: ignore + + config = RemoteInferenceProviderConfig() + mixin = NoneItemsAdapter(config=config) + + # Should raise ValueError for non-string model ID + with pytest.raises(ValueError, match="Model ID .* from get_models\\(\\) is not a string"): + await mixin.list_models() + + async def test_get_models_with_non_string_items_raises_error(self): + """Test that get_models() returning non-string items raises ValueError""" + + class NonStringItemsAdapter(OpenAIMixinImpl): + async def get_models(self) -> Iterable[str] | None: + # Return list with non-string items (integers) + return ["valid-model", 123, "another-model"] # type: ignore + + config = RemoteInferenceProviderConfig() + mixin = NonStringItemsAdapter(config=config) + + # Should raise ValueError for non-string model ID + with pytest.raises(ValueError, match="Model ID 123 from get_models\\(\\) is not a string"): + await mixin.list_models() + + async def test_embedding_models_from_custom_get_models_have_correct_type(self, mixin_with_custom_get_models): + """Test that embedding models from custom get_models() are properly typed as embedding""" + result = await mixin_with_custom_get_models.list_models() + + # Verify we have both LLM and embedding models + llm_models = [m for m in result if m.model_type == ModelType.llm] + embedding_models = [m for m in result if m.model_type == ModelType.embedding] + + assert len(llm_models) == 2 + assert len(embedding_models) == 1 + assert embedding_models[0].identifier == "custom-embedding" + + async def test_llm_models_from_custom_get_models_have_correct_type(self): + """Test that LLM models from custom get_models() are properly typed as llm""" + config = RemoteInferenceProviderConfig() + mixin = OpenAIMixinWithCustomGetModels(config=config, custom_model_ids=["gpt-4", "claude-3"]) + + result = await mixin.list_models() + + assert result is not None + assert len(result) == 2 + for model in result: + assert model.model_type == ModelType.llm + + async def test_get_models_accepts_various_iterables(self): + """Test that get_models() accepts tuples, sets, generators, etc.""" + + # Test with tuple + class TupleGetModelsAdapter(OpenAIMixinImpl): + async def get_models(self) -> Iterable[str] | None: + return ("model-1", "model-2", "model-3") + + config = RemoteInferenceProviderConfig() + mixin = TupleGetModelsAdapter(config=config) + result = await mixin.list_models() + assert result is not None + assert len(result) == 3 + + # Test with generator + class GeneratorGetModelsAdapter(OpenAIMixinImpl): + async def get_models(self) -> Iterable[str] | None: + def gen(): + yield "gen-model-1" + yield "gen-model-2" + + return gen() + + mixin = GeneratorGetModelsAdapter(config=config) + result = await mixin.list_models() + assert result is not None + assert len(result) == 2 + + # Test with set (order may vary) + class SetGetModelsAdapter(OpenAIMixinImpl): + async def get_models(self) -> Iterable[str] | None: + return {"set-model-1", "set-model-2"} + + mixin = SetGetModelsAdapter(config=config) + result = await mixin.list_models() + assert result is not None + assert len(result) == 2 + + async def test_get_models_exception_propagates(self): + """Test that when get_models() raises an exception, it propagates to the caller""" + + class FailingGetModelsAdapter(OpenAIMixinImpl): + async def get_models(self) -> Iterable[str] | None: + # Simulate an exception during custom model listing + raise RuntimeError("Failed to fetch custom models") + + config = RemoteInferenceProviderConfig() + mixin = FailingGetModelsAdapter(config=config) + + # Exception should propagate and not fall back to client.models.list() + with pytest.raises(RuntimeError, match="Failed to fetch custom models"): + await mixin.list_models() + + class TestOpenAIMixinProviderDataApiKey: """Test cases for provider_data_api_key_field functionality""" @pytest.fixture def mixin_with_provider_data_field(self): """Mixin instance with provider_data_api_key_field set""" - mixin_instance = OpenAIMixinWithProviderData() + config = RemoteInferenceProviderConfig() + mixin_instance = OpenAIMixinWithProviderData(config=config) # Mock provider_spec for provider data validation mock_provider_spec = MagicMock()