Merge pull request #4156 from BerriAI/litellm_fix_mistral_user_inserted

[Fix] `user` was inserted in Proxy Server embedding requests + added param mapping for mistral
This commit is contained in:
Ishaan Jaff 2024-06-12 16:03:36 -07:00 committed by GitHub
commit 469e920a69
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 72 additions and 11 deletions

View file

@ -202,6 +202,7 @@ jobs:
-e REDIS_PORT=$REDIS_PORT \
-e AZURE_FRANCE_API_KEY=$AZURE_FRANCE_API_KEY \
-e AZURE_EUROPE_API_KEY=$AZURE_EUROPE_API_KEY \
-e MISTRAL_API_KEY=$MISTRAL_API_KEY \
-e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
-e AWS_REGION_NAME=$AWS_REGION_NAME \

View file

@ -787,6 +787,7 @@ from .llms.openai import (
OpenAIConfig,
OpenAITextCompletionConfig,
MistralConfig,
MistralEmbeddingConfig,
DeepInfraConfig,
)
from .llms.azure import (

View file

@ -164,6 +164,49 @@ class MistralConfig:
return optional_params
class MistralEmbeddingConfig:
"""
Reference: https://docs.mistral.ai/api/#operation/createEmbedding
"""
def __init__(
self,
) -> None:
locals_ = locals().copy()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return {
k: v
for k, v in cls.__dict__.items()
if not k.startswith("__")
and not isinstance(
v,
(
types.FunctionType,
types.BuiltinFunctionType,
classmethod,
staticmethod,
),
)
and v is not None
}
def get_supported_openai_params(self):
return [
"encoding_format",
]
def map_openai_params(self, non_default_params: dict, optional_params: dict):
for param, value in non_default_params.items():
if param == "encoding_format":
optional_params["encoding_format"] = value
return optional_params
class DeepInfraConfig:
"""
Reference: https://deepinfra.com/docs/advanced/openai_api

View file

@ -79,10 +79,6 @@ async def add_litellm_data_to_request(
data["cache"][k] = v
verbose_proxy_logger.debug("receiving data: %s", data)
# users can pass in 'user' param to /chat/completions. Don't override it
if data.get("user", None) is None and user_api_key_dict.user_id is not None:
# if users are using user_api_key_auth, set `user` in `data`
data["user"] = user_api_key_dict.user_id
if "metadata" not in data:
data["metadata"] = {}

View file

@ -14,10 +14,9 @@ model_list:
litellm_params:
model: openai/*
api_key: os.environ/OPENAI_API_KEY
- model_name: my-triton-model
- model_name: mistral-embed
litellm_params:
model: triton/any"
api_base: https://exampleopenaiendpoint-production.up.railway.app/triton/embeddings
model: mistral/mistral-embed
general_settings:
master_key: sk-1234

View file

@ -4943,7 +4943,18 @@ def get_optional_params_embeddings(
message=f"Setting user/encoding format is not supported by {custom_llm_provider}. To drop it from the call, set `litellm.drop_params = True`.",
)
return {**non_default_params, **kwargs}
if custom_llm_provider == "mistral":
supported_params = get_supported_openai_params(
model=model,
custom_llm_provider="mistral",
request_type="embeddings",
)
_check_valid_arg(supported_params=supported_params)
optional_params = litellm.MistralEmbeddingConfig().map_openai_params(
non_default_params=non_default_params, optional_params={}
)
final_params = {**optional_params, **kwargs}
return final_params
if (
custom_llm_provider != "openai"
and custom_llm_provider != "azure"
@ -6352,7 +6363,10 @@ def get_supported_openai_params(
"max_retries",
]
elif custom_llm_provider == "mistral":
return litellm.MistralConfig().get_supported_openai_params()
if request_type == "chat_completion":
return litellm.MistralConfig().get_supported_openai_params()
elif request_type == "embeddings":
return litellm.MistralEmbeddingConfig().get_supported_openai_params()
elif custom_llm_provider == "replicate":
return [
"stream",

View file

@ -85,6 +85,9 @@ model_list:
litellm_params:
model: openai/*
api_key: os.environ/OPENAI_API_KEY
- model_name: mistral-embed
litellm_params:
model: mistral/mistral-embed
- model_name: gpt-instruct # [PROD TEST] - tests if `/health` automatically infers this to be a text completion model
litellm_params:
model: text-completion-openai/gpt-3.5-turbo-instruct

View file

@ -22,6 +22,7 @@ async def generate_key(
"text-embedding-ada-002",
"dall-e-2",
"fake-openai-endpoint-2",
"mistral-embed",
],
):
url = "http://0.0.0.0:4000/key/generate"
@ -197,14 +198,14 @@ async def completion(session, key):
return response
async def embeddings(session, key):
async def embeddings(session, key, model="text-embedding-ada-002"):
url = "http://0.0.0.0:4000/embeddings"
headers = {
"Authorization": f"Bearer {key}",
"Content-Type": "application/json",
}
data = {
"model": "text-embedding-ada-002",
"model": model,
"input": ["hello world"],
}
@ -408,6 +409,9 @@ async def test_embeddings():
key_2 = key_gen["key"]
await embeddings(session=session, key=key_2)
# embedding request with non OpenAI model
await embeddings(session=session, key=key, model="mistral-embed")
@pytest.mark.asyncio
async def test_image_generation():