diff --git a/src/llama_stack/providers/inline/batches/reference/batches.py b/src/llama_stack/providers/inline/batches/reference/batches.py index b6f201fae..241218dca 100644 --- a/src/llama_stack/providers/inline/batches/reference/batches.py +++ b/src/llama_stack/providers/inline/batches/reference/batches.py @@ -451,7 +451,7 @@ class ReferenceBatchesImpl(Batches): ] for param, expected_type, type_string in required_params: - if param not in body: + if param not in request_body: errors.append( BatchError( code="invalid_request", @@ -461,7 +461,7 @@ class ReferenceBatchesImpl(Batches): ) ) valid = False - elif not isinstance(body[param], expected_type): + elif not isinstance(request_body[param], expected_type): errors.append( BatchError( code="invalid_request", @@ -472,15 +472,15 @@ class ReferenceBatchesImpl(Batches): ) valid = False - if "model" in body and isinstance(body["model"], str): + if "model" in request_body and isinstance(request_body["model"], str): try: - await self.models_api.get_model(body["model"]) + await self.models_api.get_model(request_body["model"]) except Exception: errors.append( BatchError( code="model_not_found", line=line_num, - message=f"Model '{body['model']}' does not exist or is not supported", + message=f"Model '{request_body['model']}' does not exist or is not supported", param="body.model", ) ) @@ -488,14 +488,14 @@ class ReferenceBatchesImpl(Batches): if valid: assert isinstance(url, str), "URL must be a string" # for mypy - assert isinstance(body, dict), "Body must be a dictionary" # for mypy + assert isinstance(request_body, dict), "Body must be a dictionary" # for mypy requests.append( BatchRequest( line_num=line_num, url=url, method=request["method"], custom_id=request["custom_id"], - body=body, + body=request_body, ), ) except json.JSONDecodeError: diff --git a/src/llama_stack/providers/remote/inference/together/together.py b/src/llama_stack/providers/remote/inference/together/together.py index e31ebf7c5..4caa4004d 100644 --- a/src/llama_stack/providers/remote/inference/together/together.py +++ b/src/llama_stack/providers/remote/inference/together/together.py @@ -6,6 +6,7 @@ from collections.abc import Iterable +from typing import Any, cast from together import AsyncTogether from together.constants import BASE_URL @@ -81,10 +82,11 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData): if params.dimensions is not None: raise ValueError("Together's embeddings endpoint does not support dimensions param.") + # Cast encoding_format to match OpenAI SDK's expected Literal type response = await self.client.embeddings.create( model=await self._get_provider_model_id(params.model), input=params.input, - encoding_format=params.encoding_format, + encoding_format=cast(Any, params.encoding_format), ) response.model = ( @@ -97,6 +99,8 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData): logger.warning( f"Together's embedding endpoint for {params.model} did not return usage information, substituting -1s." ) - response.usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1) + # Cast to allow monkey-patching the response object + response.usage = cast(Any, OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1)) - return response # type: ignore[no-any-return] + # Together's CreateEmbeddingResponse is compatible with OpenAIEmbeddingsResponse after monkey-patching + return cast(OpenAIEmbeddingsResponse, response)