mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-13 04:22:35 +00:00
feat(api)!: support passing extra_body to embeddings and vector_stores APIs
Applies the same pattern from #3777 to embeddings and vector_stores.create() endpoints. Breaking change: Method signatures now accept a single params object with Pydantic extra="allow" instead of individual parameters. Provider-specific params can be passed via extra_body and accessed through params.model_extra. Updated APIs: openai_embeddings(), openai_create_vector_store(), openai_create_vector_store_file_batch()
This commit is contained in:
parent
cfd2e303db
commit
74e2976c1e
20 changed files with 364 additions and 297 deletions
|
|
@ -20,6 +20,7 @@ from llama_stack.apis.inference import (
|
|||
OpenAICompletion,
|
||||
OpenAICompletionRequestWithExtraBody,
|
||||
OpenAIEmbeddingData,
|
||||
OpenAIEmbeddingsRequestWithExtraBody,
|
||||
OpenAIEmbeddingsResponse,
|
||||
OpenAIEmbeddingUsage,
|
||||
ToolChoice,
|
||||
|
|
@ -189,16 +190,12 @@ class LiteLLMOpenAIMixin(
|
|||
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
model: str,
|
||||
input: str | list[str],
|
||||
encoding_format: str | None = "float",
|
||||
dimensions: int | None = None,
|
||||
user: str | None = None,
|
||||
params: OpenAIEmbeddingsRequestWithExtraBody,
|
||||
) -> OpenAIEmbeddingsResponse:
|
||||
model_obj = await self.model_store.get_model(model)
|
||||
model_obj = await self.model_store.get_model(params.model)
|
||||
|
||||
# Convert input to list if it's a string
|
||||
input_list = [input] if isinstance(input, str) else input
|
||||
input_list = [params.input] if isinstance(params.input, str) else params.input
|
||||
|
||||
# Call litellm embedding function
|
||||
# litellm.drop_params = True
|
||||
|
|
@ -207,11 +204,11 @@ class LiteLLMOpenAIMixin(
|
|||
input=input_list,
|
||||
api_key=self.get_api_key(),
|
||||
api_base=self.api_base,
|
||||
dimensions=dimensions,
|
||||
dimensions=params.dimensions,
|
||||
)
|
||||
|
||||
# Convert response to OpenAI format
|
||||
data = b64_encode_openai_embeddings_response(response.data, encoding_format)
|
||||
data = b64_encode_openai_embeddings_response(response.data, params.encoding_format)
|
||||
|
||||
usage = OpenAIEmbeddingUsage(
|
||||
prompt_tokens=response["usage"]["prompt_tokens"],
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue