forked from phoenix/litellm-mirror
LiteLLM Minor Fixes & Improvements (09/25/2024) (#5893)
* fix(langfuse.py): support new langfuse prompt_chat class init params * fix(langfuse.py): handle new init values on prompt chat + prompt text templates fixes error caused during langfuse logging * docs(openai_compatible.md): clarify `openai/` handles correct routing for `/v1/completions` route Fixes https://github.com/BerriAI/litellm/issues/5876 * fix(utils.py): handle unmapped gemini model optional param translation Fixes https://github.com/BerriAI/litellm/issues/5888 * fix(o1_transformation.py): fix o-1 validation, to not raise error if temperature=1 Fixes https://github.com/BerriAI/litellm/issues/5884 * fix(prisma_client.py): refresh iam token Fixes https://github.com/BerriAI/litellm/issues/5896 * fix: pass drop params where required * fix(utils.py): pass drop_params correctly * fix(types/vertex_ai.py): fix generation config * test(test_max_completion_tokens.py): fix test * fix(vertex_and_google_ai_studio_gemini.py): fix map openai params
This commit is contained in:
parent
16c0307eab
commit
a1d9e96b31
22 changed files with 755 additions and 292 deletions
|
@ -7,7 +7,7 @@ To call models hosted behind an openai proxy, make 2 changes:
|
||||||
|
|
||||||
1. For `/chat/completions`: Put `openai/` in front of your model name, so litellm knows you're trying to call an openai `/chat/completions` endpoint.
|
1. For `/chat/completions`: Put `openai/` in front of your model name, so litellm knows you're trying to call an openai `/chat/completions` endpoint.
|
||||||
|
|
||||||
2. For `/completions`: Put `text-completion-openai/` in front of your model name, so litellm knows you're trying to call an openai `/completions` endpoint.
|
2. For `/completions`: Put `text-completion-openai/` in front of your model name, so litellm knows you're trying to call an openai `/completions` endpoint. [NOT REQUIRED for `openai/` endpoints called via `/v1/completions` route].
|
||||||
|
|
||||||
2. **Do NOT** add anything additional to the base url e.g. `/v1/embedding`. LiteLLM uses the openai-client to make these calls, and that automatically adds the relevant endpoints.
|
2. **Do NOT** add anything additional to the base url e.g. `/v1/embedding`. LiteLLM uses the openai-client to make these calls, and that automatically adds the relevant endpoints.
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
#### What this does ####
|
#### What this does ####
|
||||||
# On success, logs events to Langfuse
|
# On success, logs events to Langfuse
|
||||||
import copy
|
import copy
|
||||||
|
import inspect
|
||||||
import os
|
import os
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
|
@ -676,21 +677,37 @@ def _add_prompt_to_generation_params(
|
||||||
elif "version" in user_prompt and "prompt" in user_prompt:
|
elif "version" in user_prompt and "prompt" in user_prompt:
|
||||||
# prompts
|
# prompts
|
||||||
if isinstance(user_prompt["prompt"], str):
|
if isinstance(user_prompt["prompt"], str):
|
||||||
_prompt_obj = Prompt_Text(
|
prompt_text_params = getattr(
|
||||||
name=user_prompt["name"],
|
Prompt_Text, "model_fields", Prompt_Text.__fields__
|
||||||
prompt=user_prompt["prompt"],
|
|
||||||
version=user_prompt["version"],
|
|
||||||
config=user_prompt.get("config", None),
|
|
||||||
)
|
)
|
||||||
|
_data = {
|
||||||
|
"name": user_prompt["name"],
|
||||||
|
"prompt": user_prompt["prompt"],
|
||||||
|
"version": user_prompt["version"],
|
||||||
|
"config": user_prompt.get("config", None),
|
||||||
|
}
|
||||||
|
if "labels" in prompt_text_params and "tags" in prompt_text_params:
|
||||||
|
_data["labels"] = user_prompt.get("labels", []) or []
|
||||||
|
_data["tags"] = user_prompt.get("tags", []) or []
|
||||||
|
_prompt_obj = Prompt_Text(**_data) # type: ignore
|
||||||
generation_params["prompt"] = TextPromptClient(prompt=_prompt_obj)
|
generation_params["prompt"] = TextPromptClient(prompt=_prompt_obj)
|
||||||
|
|
||||||
elif isinstance(user_prompt["prompt"], list):
|
elif isinstance(user_prompt["prompt"], list):
|
||||||
_prompt_obj = Prompt_Chat(
|
prompt_chat_params = getattr(
|
||||||
name=user_prompt["name"],
|
Prompt_Chat, "model_fields", Prompt_Chat.__fields__
|
||||||
prompt=user_prompt["prompt"],
|
|
||||||
version=user_prompt["version"],
|
|
||||||
config=user_prompt.get("config", None),
|
|
||||||
)
|
)
|
||||||
|
_data = {
|
||||||
|
"name": user_prompt["name"],
|
||||||
|
"prompt": user_prompt["prompt"],
|
||||||
|
"version": user_prompt["version"],
|
||||||
|
"config": user_prompt.get("config", None),
|
||||||
|
}
|
||||||
|
if "labels" in prompt_chat_params and "tags" in prompt_chat_params:
|
||||||
|
_data["labels"] = user_prompt.get("labels", []) or []
|
||||||
|
_data["tags"] = user_prompt.get("tags", []) or []
|
||||||
|
|
||||||
|
_prompt_obj = Prompt_Chat(**_data) # type: ignore
|
||||||
|
|
||||||
generation_params["prompt"] = ChatPromptClient(prompt=_prompt_obj)
|
generation_params["prompt"] = ChatPromptClient(prompt=_prompt_obj)
|
||||||
else:
|
else:
|
||||||
verbose_logger.error(
|
verbose_logger.error(
|
||||||
|
|
|
@ -125,7 +125,11 @@ class OpenAIGPTConfig:
|
||||||
return base_params + model_specific_params
|
return base_params + model_specific_params
|
||||||
|
|
||||||
def _map_openai_params(
|
def _map_openai_params(
|
||||||
self, non_default_params: dict, optional_params: dict, model: str
|
self,
|
||||||
|
non_default_params: dict,
|
||||||
|
optional_params: dict,
|
||||||
|
model: str,
|
||||||
|
drop_params: bool,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
supported_openai_params = self.get_supported_openai_params(model)
|
supported_openai_params = self.get_supported_openai_params(model)
|
||||||
for param, value in non_default_params.items():
|
for param, value in non_default_params.items():
|
||||||
|
@ -134,10 +138,15 @@ class OpenAIGPTConfig:
|
||||||
return optional_params
|
return optional_params
|
||||||
|
|
||||||
def map_openai_params(
|
def map_openai_params(
|
||||||
self, non_default_params: dict, optional_params: dict, model: str
|
self,
|
||||||
|
non_default_params: dict,
|
||||||
|
optional_params: dict,
|
||||||
|
model: str,
|
||||||
|
drop_params: bool,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
return self._map_openai_params(
|
return self._map_openai_params(
|
||||||
non_default_params=non_default_params,
|
non_default_params=non_default_params,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
model=model,
|
model=model,
|
||||||
|
drop_params=drop_params,
|
||||||
)
|
)
|
||||||
|
|
|
@ -57,7 +57,6 @@ class OpenAIO1Config(OpenAIGPTConfig):
|
||||||
"parallel_tool_calls",
|
"parallel_tool_calls",
|
||||||
"function_call",
|
"function_call",
|
||||||
"functions",
|
"functions",
|
||||||
"temperature",
|
|
||||||
"top_p",
|
"top_p",
|
||||||
"n",
|
"n",
|
||||||
"presence_penalty",
|
"presence_penalty",
|
||||||
|
@ -73,13 +72,36 @@ class OpenAIO1Config(OpenAIGPTConfig):
|
||||||
]
|
]
|
||||||
|
|
||||||
def map_openai_params(
|
def map_openai_params(
|
||||||
self, non_default_params: dict, optional_params: dict, model: str
|
self,
|
||||||
|
non_default_params: dict,
|
||||||
|
optional_params: dict,
|
||||||
|
model: str,
|
||||||
|
drop_params: bool,
|
||||||
):
|
):
|
||||||
if "max_tokens" in non_default_params:
|
if "max_tokens" in non_default_params:
|
||||||
optional_params["max_completion_tokens"] = non_default_params.pop(
|
optional_params["max_completion_tokens"] = non_default_params.pop(
|
||||||
"max_tokens"
|
"max_tokens"
|
||||||
)
|
)
|
||||||
return super()._map_openai_params(non_default_params, optional_params, model)
|
if "temperature" in non_default_params:
|
||||||
|
temperature_value: Optional[float] = non_default_params.pop("temperature")
|
||||||
|
if temperature_value is not None:
|
||||||
|
if temperature_value == 0 or temperature_value == 1:
|
||||||
|
optional_params["temperature"] = temperature_value
|
||||||
|
else:
|
||||||
|
## UNSUPPORTED TOOL CHOICE VALUE
|
||||||
|
if litellm.drop_params is True or drop_params is True:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise litellm.utils.UnsupportedParamsError(
|
||||||
|
message="O-1 doesn't support temperature={}. To drop unsupported openai params from the call, set `litellm.drop_params = True`".format(
|
||||||
|
temperature_value
|
||||||
|
),
|
||||||
|
status_code=400,
|
||||||
|
)
|
||||||
|
|
||||||
|
return super()._map_openai_params(
|
||||||
|
non_default_params, optional_params, model, drop_params
|
||||||
|
)
|
||||||
|
|
||||||
def is_model_o1_reasoning_model(self, model: str) -> bool:
|
def is_model_o1_reasoning_model(self, model: str) -> bool:
|
||||||
if model in litellm.open_ai_chat_completion_models and "o1" in model:
|
if model in litellm.open_ai_chat_completion_models and "o1" in model:
|
||||||
|
|
|
@ -413,7 +413,11 @@ class OpenAIConfig:
|
||||||
return optional_params
|
return optional_params
|
||||||
|
|
||||||
def map_openai_params(
|
def map_openai_params(
|
||||||
self, non_default_params: dict, optional_params: dict, model: str
|
self,
|
||||||
|
non_default_params: dict,
|
||||||
|
optional_params: dict,
|
||||||
|
model: str,
|
||||||
|
drop_params: bool,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
""" """
|
""" """
|
||||||
if litellm.OpenAIO1Config().is_model_o1_reasoning_model(model=model):
|
if litellm.OpenAIO1Config().is_model_o1_reasoning_model(model=model):
|
||||||
|
@ -421,11 +425,13 @@ class OpenAIConfig:
|
||||||
non_default_params=non_default_params,
|
non_default_params=non_default_params,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
model=model,
|
model=model,
|
||||||
|
drop_params=drop_params,
|
||||||
)
|
)
|
||||||
return litellm.OpenAIGPTConfig().map_openai_params(
|
return litellm.OpenAIGPTConfig().map_openai_params(
|
||||||
non_default_params=non_default_params,
|
non_default_params=non_default_params,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
model=model,
|
model=model,
|
||||||
|
drop_params=drop_params,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ from litellm.types.llms.vertex_ai import (
|
||||||
Tools,
|
Tools,
|
||||||
)
|
)
|
||||||
|
|
||||||
from ..common_utils import get_supports_system_message, get_supports_response_schema
|
from ..common_utils import get_supports_response_schema, get_supports_system_message
|
||||||
from ..vertex_ai_non_gemini import _gemini_convert_messages_with_history
|
from ..vertex_ai_non_gemini import _gemini_convert_messages_with_history
|
||||||
|
|
||||||
|
|
||||||
|
@ -73,8 +73,14 @@ def _transform_request_body(
|
||||||
safety_settings: Optional[List[SafetSettingsConfig]] = optional_params.pop(
|
safety_settings: Optional[List[SafetSettingsConfig]] = optional_params.pop(
|
||||||
"safety_settings", None
|
"safety_settings", None
|
||||||
) # type: ignore
|
) # type: ignore
|
||||||
|
config_fields = GenerationConfig.__annotations__.keys()
|
||||||
|
|
||||||
|
filtered_params = {
|
||||||
|
k: v for k, v in optional_params.items() if k in config_fields
|
||||||
|
}
|
||||||
|
|
||||||
generation_config: Optional[GenerationConfig] = GenerationConfig(
|
generation_config: Optional[GenerationConfig] = GenerationConfig(
|
||||||
**optional_params
|
**filtered_params
|
||||||
)
|
)
|
||||||
data = RequestBody(contents=content)
|
data = RequestBody(contents=content)
|
||||||
if system_instructions is not None:
|
if system_instructions is not None:
|
||||||
|
@ -104,7 +110,7 @@ def sync_transform_request_body(
|
||||||
timeout: Optional[Union[float, httpx.Timeout]],
|
timeout: Optional[Union[float, httpx.Timeout]],
|
||||||
extra_headers: Optional[dict],
|
extra_headers: Optional[dict],
|
||||||
optional_params: dict,
|
optional_params: dict,
|
||||||
logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
|
logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, # type: ignore
|
||||||
custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
|
custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
|
||||||
litellm_params: dict,
|
litellm_params: dict,
|
||||||
) -> RequestBody:
|
) -> RequestBody:
|
||||||
|
@ -146,7 +152,7 @@ async def async_transform_request_body(
|
||||||
timeout: Optional[Union[float, httpx.Timeout]],
|
timeout: Optional[Union[float, httpx.Timeout]],
|
||||||
extra_headers: Optional[dict],
|
extra_headers: Optional[dict],
|
||||||
optional_params: dict,
|
optional_params: dict,
|
||||||
logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
|
logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, # type: ignore
|
||||||
custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
|
custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
|
||||||
litellm_params: dict,
|
litellm_params: dict,
|
||||||
) -> RequestBody:
|
) -> RequestBody:
|
||||||
|
@ -199,6 +205,7 @@ def _transform_system_message(
|
||||||
if supports_system_message is True:
|
if supports_system_message is True:
|
||||||
for idx, message in enumerate(messages):
|
for idx, message in enumerate(messages):
|
||||||
if message["role"] == "system":
|
if message["role"] == "system":
|
||||||
|
_system_content_block: Optional[PartType] = None
|
||||||
if isinstance(message["content"], str):
|
if isinstance(message["content"], str):
|
||||||
_system_content_block = PartType(text=message["content"])
|
_system_content_block = PartType(text=message["content"])
|
||||||
elif isinstance(message["content"], list):
|
elif isinstance(message["content"], list):
|
||||||
|
@ -206,6 +213,7 @@ def _transform_system_message(
|
||||||
for content in message["content"]:
|
for content in message["content"]:
|
||||||
system_text += content.get("text") or ""
|
system_text += content.get("text") or ""
|
||||||
_system_content_block = PartType(text=system_text)
|
_system_content_block = PartType(text=system_text)
|
||||||
|
if _system_content_block is not None:
|
||||||
system_content_blocks.append(_system_content_block)
|
system_content_blocks.append(_system_content_block)
|
||||||
system_prompt_indices.append(idx)
|
system_prompt_indices.append(idx)
|
||||||
if len(system_prompt_indices) > 0:
|
if len(system_prompt_indices) > 0:
|
||||||
|
|
|
@ -252,233 +252,6 @@ class VertexAIConfig:
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
class GoogleAIStudioGeminiConfig: # key diff from VertexAI - 'frequency_penalty' and 'presence_penalty' not supported
|
|
||||||
"""
|
|
||||||
Reference: https://ai.google.dev/api/rest/v1beta/GenerationConfig
|
|
||||||
|
|
||||||
The class `GoogleAIStudioGeminiConfig` provides configuration for the Google AI Studio's Gemini API interface. Below are the parameters:
|
|
||||||
|
|
||||||
- `temperature` (float): This controls the degree of randomness in token selection.
|
|
||||||
|
|
||||||
- `max_output_tokens` (integer): This sets the limitation for the maximum amount of token in the text output. In this case, the default value is 256.
|
|
||||||
|
|
||||||
- `top_p` (float): The tokens are selected from the most probable to the least probable until the sum of their probabilities equals the `top_p` value. Default is 0.95.
|
|
||||||
|
|
||||||
- `top_k` (integer): The value of `top_k` determines how many of the most probable tokens are considered in the selection. For example, a `top_k` of 1 means the selected token is the most probable among all tokens. The default value is 40.
|
|
||||||
|
|
||||||
- `response_mime_type` (str): The MIME type of the response. The default value is 'text/plain'. Other values - `application/json`.
|
|
||||||
|
|
||||||
- `response_schema` (dict): Optional. Output response schema of the generated candidate text when response mime type can have schema. Schema can be objects, primitives or arrays and is a subset of OpenAPI schema. If set, a compatible response_mime_type must also be set. Compatible mimetypes: application/json: Schema for JSON response.
|
|
||||||
|
|
||||||
- `candidate_count` (int): Number of generated responses to return.
|
|
||||||
|
|
||||||
- `stop_sequences` (List[str]): The set of character sequences (up to 5) that will stop output generation. If specified, the API will stop at the first appearance of a stop sequence. The stop sequence will not be included as part of the response.
|
|
||||||
|
|
||||||
Note: Please make sure to modify the default parameters as required for your use case.
|
|
||||||
"""
|
|
||||||
|
|
||||||
temperature: Optional[float] = None
|
|
||||||
max_output_tokens: Optional[int] = None
|
|
||||||
top_p: Optional[float] = None
|
|
||||||
top_k: Optional[int] = None
|
|
||||||
response_mime_type: Optional[str] = None
|
|
||||||
response_schema: Optional[dict] = None
|
|
||||||
candidate_count: Optional[int] = None
|
|
||||||
stop_sequences: Optional[list] = None
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
temperature: Optional[float] = None,
|
|
||||||
max_output_tokens: Optional[int] = None,
|
|
||||||
top_p: Optional[float] = None,
|
|
||||||
top_k: Optional[int] = None,
|
|
||||||
response_mime_type: Optional[str] = None,
|
|
||||||
response_schema: Optional[dict] = None,
|
|
||||||
candidate_count: Optional[int] = None,
|
|
||||||
stop_sequences: Optional[list] = None,
|
|
||||||
) -> None:
|
|
||||||
locals_ = locals()
|
|
||||||
for key, value in locals_.items():
|
|
||||||
if key != "self" and value is not None:
|
|
||||||
setattr(self.__class__, key, value)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_config(cls):
|
|
||||||
return {
|
|
||||||
k: v
|
|
||||||
for k, v in cls.__dict__.items()
|
|
||||||
if not k.startswith("__")
|
|
||||||
and not isinstance(
|
|
||||||
v,
|
|
||||||
(
|
|
||||||
types.FunctionType,
|
|
||||||
types.BuiltinFunctionType,
|
|
||||||
classmethod,
|
|
||||||
staticmethod,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
and v is not None
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_supported_openai_params(self):
|
|
||||||
return [
|
|
||||||
"temperature",
|
|
||||||
"top_p",
|
|
||||||
"max_tokens",
|
|
||||||
"max_completion_tokens",
|
|
||||||
"stream",
|
|
||||||
"tools",
|
|
||||||
"tool_choice",
|
|
||||||
"functions",
|
|
||||||
"response_format",
|
|
||||||
"n",
|
|
||||||
"stop",
|
|
||||||
]
|
|
||||||
|
|
||||||
def _map_function(self, value: List[dict]) -> List[Tools]:
|
|
||||||
gtool_func_declarations = []
|
|
||||||
googleSearchRetrieval: Optional[dict] = None
|
|
||||||
|
|
||||||
for tool in value:
|
|
||||||
openai_function_object: Optional[ChatCompletionToolParamFunctionChunk] = (
|
|
||||||
None
|
|
||||||
)
|
|
||||||
if "function" in tool: # tools list
|
|
||||||
openai_function_object = ChatCompletionToolParamFunctionChunk( # type: ignore
|
|
||||||
**tool["function"]
|
|
||||||
)
|
|
||||||
elif "name" in tool: # functions list
|
|
||||||
openai_function_object = ChatCompletionToolParamFunctionChunk(**tool) # type: ignore
|
|
||||||
|
|
||||||
# check if grounding
|
|
||||||
if tool.get("googleSearchRetrieval", None) is not None:
|
|
||||||
googleSearchRetrieval = tool["googleSearchRetrieval"]
|
|
||||||
elif openai_function_object is not None:
|
|
||||||
gtool_func_declaration = FunctionDeclaration(
|
|
||||||
name=openai_function_object["name"],
|
|
||||||
description=openai_function_object.get("description", ""),
|
|
||||||
parameters=openai_function_object.get("parameters", {}),
|
|
||||||
)
|
|
||||||
gtool_func_declarations.append(gtool_func_declaration)
|
|
||||||
else:
|
|
||||||
# assume it's a provider-specific param
|
|
||||||
verbose_logger.warning(
|
|
||||||
"Invalid tool={}. Use `litellm.set_verbose` or `litellm --detailed_debug` to see raw request."
|
|
||||||
)
|
|
||||||
|
|
||||||
_tools = Tools(
|
|
||||||
function_declarations=gtool_func_declarations,
|
|
||||||
)
|
|
||||||
if googleSearchRetrieval is not None:
|
|
||||||
_tools["googleSearchRetrieval"] = googleSearchRetrieval
|
|
||||||
return [_tools]
|
|
||||||
|
|
||||||
def map_tool_choice_values(
|
|
||||||
self, model: str, tool_choice: Union[str, dict]
|
|
||||||
) -> Optional[ToolConfig]:
|
|
||||||
if tool_choice == "none":
|
|
||||||
return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="NONE"))
|
|
||||||
elif tool_choice == "required":
|
|
||||||
return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="ANY"))
|
|
||||||
elif tool_choice == "auto":
|
|
||||||
return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="AUTO"))
|
|
||||||
elif isinstance(tool_choice, dict):
|
|
||||||
# only supported for anthropic + mistral models - https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ToolChoice.html
|
|
||||||
name = tool_choice.get("function", {}).get("name", "")
|
|
||||||
return ToolConfig(
|
|
||||||
functionCallingConfig=FunctionCallingConfig(
|
|
||||||
mode="ANY", allowed_function_names=[name]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
raise litellm.utils.UnsupportedParamsError(
|
|
||||||
message="VertexAI doesn't support tool_choice={}. Supported tool_choice values=['auto', 'required', json object]. To drop it from the call, set `litellm.drop_params = True.".format(
|
|
||||||
tool_choice
|
|
||||||
),
|
|
||||||
status_code=400,
|
|
||||||
)
|
|
||||||
|
|
||||||
def map_openai_params(
|
|
||||||
self,
|
|
||||||
model: str,
|
|
||||||
non_default_params: dict,
|
|
||||||
optional_params: dict,
|
|
||||||
):
|
|
||||||
for param, value in non_default_params.items():
|
|
||||||
if param == "temperature":
|
|
||||||
optional_params["temperature"] = value
|
|
||||||
if param == "top_p":
|
|
||||||
optional_params["top_p"] = value
|
|
||||||
if (
|
|
||||||
param == "stream" and value is True
|
|
||||||
): # sending stream = False, can cause it to get passed unchecked and raise issues
|
|
||||||
optional_params["stream"] = value
|
|
||||||
if param == "n":
|
|
||||||
optional_params["candidate_count"] = value
|
|
||||||
if param == "stop":
|
|
||||||
if isinstance(value, str):
|
|
||||||
optional_params["stop_sequences"] = [value]
|
|
||||||
elif isinstance(value, list):
|
|
||||||
optional_params["stop_sequences"] = value
|
|
||||||
if param == "max_tokens" or param == "max_completion_tokens":
|
|
||||||
optional_params["max_output_tokens"] = value
|
|
||||||
if param == "response_format": # type: ignore
|
|
||||||
if value["type"] == "json_object": # type: ignore
|
|
||||||
if value["type"] == "json_object": # type: ignore
|
|
||||||
optional_params["response_mime_type"] = "application/json"
|
|
||||||
elif value["type"] == "text": # type: ignore
|
|
||||||
optional_params["response_mime_type"] = "text/plain"
|
|
||||||
if "response_schema" in value: # type: ignore
|
|
||||||
optional_params["response_mime_type"] = "application/json"
|
|
||||||
optional_params["response_schema"] = value["response_schema"] # type: ignore
|
|
||||||
elif value["type"] == "json_schema": # type: ignore
|
|
||||||
if "json_schema" in value and "schema" in value["json_schema"]: # type: ignore
|
|
||||||
optional_params["response_mime_type"] = "application/json"
|
|
||||||
optional_params["response_schema"] = value["json_schema"]["schema"] # type: ignore
|
|
||||||
if (param == "tools" or param == "functions") and isinstance(value, list):
|
|
||||||
optional_params["tools"] = self._map_function(value=value)
|
|
||||||
optional_params["litellm_param_is_function_call"] = (
|
|
||||||
True if param == "functions" else False
|
|
||||||
)
|
|
||||||
if param == "tool_choice" and (
|
|
||||||
isinstance(value, str) or isinstance(value, dict)
|
|
||||||
):
|
|
||||||
_tool_choice_value = self.map_tool_choice_values(
|
|
||||||
model=model, tool_choice=value # type: ignore
|
|
||||||
)
|
|
||||||
if _tool_choice_value is not None:
|
|
||||||
optional_params["tool_choice"] = _tool_choice_value
|
|
||||||
return optional_params
|
|
||||||
|
|
||||||
def get_mapped_special_auth_params(self) -> dict:
|
|
||||||
"""
|
|
||||||
Common auth params across bedrock/vertex_ai/azure/watsonx
|
|
||||||
"""
|
|
||||||
return {"project": "vertex_project", "region_name": "vertex_location"}
|
|
||||||
|
|
||||||
def map_special_auth_params(self, non_default_params: dict, optional_params: dict):
|
|
||||||
mapped_params = self.get_mapped_special_auth_params()
|
|
||||||
|
|
||||||
for param, value in non_default_params.items():
|
|
||||||
if param in mapped_params:
|
|
||||||
optional_params[mapped_params[param]] = value
|
|
||||||
return optional_params
|
|
||||||
|
|
||||||
def get_flagged_finish_reasons(self) -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
Return Dictionary of finish reasons which indicate response was flagged
|
|
||||||
|
|
||||||
and what it means
|
|
||||||
"""
|
|
||||||
return {
|
|
||||||
"SAFETY": "The token generation was stopped as the response was flagged for safety reasons. NOTE: When streaming the Candidate.content will be empty if content filters blocked the output.",
|
|
||||||
"RECITATION": "The token generation was stopped as the response was flagged for unauthorized citations.",
|
|
||||||
"BLOCKLIST": "The token generation was stopped as the response was flagged for the terms which are included from the terminology blocklist.",
|
|
||||||
"PROHIBITED_CONTENT": "The token generation was stopped as the response was flagged for the prohibited contents.",
|
|
||||||
"SPII": "The token generation was stopped as the response was flagged for Sensitive Personally Identifiable Information (SPII) contents.",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class VertexGeminiConfig:
|
class VertexGeminiConfig:
|
||||||
"""
|
"""
|
||||||
Reference: https://cloud.google.com/vertex-ai/docs/generative-ai/chat/test-chat-prompts
|
Reference: https://cloud.google.com/vertex-ai/docs/generative-ai/chat/test-chat-prompts
|
||||||
|
@ -752,6 +525,108 @@ class VertexGeminiConfig:
|
||||||
return exception_string
|
return exception_string
|
||||||
|
|
||||||
|
|
||||||
|
class GoogleAIStudioGeminiConfig(
|
||||||
|
VertexGeminiConfig
|
||||||
|
): # key diff from VertexAI - 'frequency_penalty' and 'presence_penalty' not supported
|
||||||
|
"""
|
||||||
|
Reference: https://ai.google.dev/api/rest/v1beta/GenerationConfig
|
||||||
|
|
||||||
|
The class `GoogleAIStudioGeminiConfig` provides configuration for the Google AI Studio's Gemini API interface. Below are the parameters:
|
||||||
|
|
||||||
|
- `temperature` (float): This controls the degree of randomness in token selection.
|
||||||
|
|
||||||
|
- `max_output_tokens` (integer): This sets the limitation for the maximum amount of token in the text output. In this case, the default value is 256.
|
||||||
|
|
||||||
|
- `top_p` (float): The tokens are selected from the most probable to the least probable until the sum of their probabilities equals the `top_p` value. Default is 0.95.
|
||||||
|
|
||||||
|
- `top_k` (integer): The value of `top_k` determines how many of the most probable tokens are considered in the selection. For example, a `top_k` of 1 means the selected token is the most probable among all tokens. The default value is 40.
|
||||||
|
|
||||||
|
- `response_mime_type` (str): The MIME type of the response. The default value is 'text/plain'. Other values - `application/json`.
|
||||||
|
|
||||||
|
- `response_schema` (dict): Optional. Output response schema of the generated candidate text when response mime type can have schema. Schema can be objects, primitives or arrays and is a subset of OpenAPI schema. If set, a compatible response_mime_type must also be set. Compatible mimetypes: application/json: Schema for JSON response.
|
||||||
|
|
||||||
|
- `candidate_count` (int): Number of generated responses to return.
|
||||||
|
|
||||||
|
- `stop_sequences` (List[str]): The set of character sequences (up to 5) that will stop output generation. If specified, the API will stop at the first appearance of a stop sequence. The stop sequence will not be included as part of the response.
|
||||||
|
|
||||||
|
Note: Please make sure to modify the default parameters as required for your use case.
|
||||||
|
"""
|
||||||
|
|
||||||
|
temperature: Optional[float] = None
|
||||||
|
max_output_tokens: Optional[int] = None
|
||||||
|
top_p: Optional[float] = None
|
||||||
|
top_k: Optional[int] = None
|
||||||
|
response_mime_type: Optional[str] = None
|
||||||
|
response_schema: Optional[dict] = None
|
||||||
|
candidate_count: Optional[int] = None
|
||||||
|
stop_sequences: Optional[list] = None
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
temperature: Optional[float] = None,
|
||||||
|
max_output_tokens: Optional[int] = None,
|
||||||
|
top_p: Optional[float] = None,
|
||||||
|
top_k: Optional[int] = None,
|
||||||
|
response_mime_type: Optional[str] = None,
|
||||||
|
response_schema: Optional[dict] = None,
|
||||||
|
candidate_count: Optional[int] = None,
|
||||||
|
stop_sequences: Optional[list] = None,
|
||||||
|
) -> None:
|
||||||
|
locals_ = locals()
|
||||||
|
for key, value in locals_.items():
|
||||||
|
if key != "self" and value is not None:
|
||||||
|
setattr(self.__class__, key, value)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_config(cls):
|
||||||
|
return {
|
||||||
|
k: v
|
||||||
|
for k, v in cls.__dict__.items()
|
||||||
|
if not k.startswith("__")
|
||||||
|
and not isinstance(
|
||||||
|
v,
|
||||||
|
(
|
||||||
|
types.FunctionType,
|
||||||
|
types.BuiltinFunctionType,
|
||||||
|
classmethod,
|
||||||
|
staticmethod,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
and v is not None
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_supported_openai_params(self):
|
||||||
|
return [
|
||||||
|
"temperature",
|
||||||
|
"top_p",
|
||||||
|
"max_tokens",
|
||||||
|
"max_completion_tokens",
|
||||||
|
"stream",
|
||||||
|
"tools",
|
||||||
|
"tool_choice",
|
||||||
|
"functions",
|
||||||
|
"response_format",
|
||||||
|
"n",
|
||||||
|
"stop",
|
||||||
|
]
|
||||||
|
|
||||||
|
def map_openai_params(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
non_default_params: Dict,
|
||||||
|
optional_params: Dict,
|
||||||
|
drop_params: bool,
|
||||||
|
):
|
||||||
|
# drop frequency_penalty and presence_penalty
|
||||||
|
if "frequency_penalty" in non_default_params:
|
||||||
|
del non_default_params["frequency_penalty"]
|
||||||
|
if "presence_penalty" in non_default_params:
|
||||||
|
del non_default_params["presence_penalty"]
|
||||||
|
return super().map_openai_params(
|
||||||
|
model, non_default_params, optional_params, drop_params
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def make_call(
|
async def make_call(
|
||||||
client: Optional[AsyncHTTPHandler],
|
client: Optional[AsyncHTTPHandler],
|
||||||
api_base: str,
|
api_base: str,
|
||||||
|
|
|
@ -44,7 +44,11 @@ class VertexAIAi21Config:
|
||||||
return litellm.OpenAIConfig().get_supported_openai_params(model="gpt-3.5-turbo")
|
return litellm.OpenAIConfig().get_supported_openai_params(model="gpt-3.5-turbo")
|
||||||
|
|
||||||
def map_openai_params(
|
def map_openai_params(
|
||||||
self, non_default_params: dict, optional_params: dict, model: str
|
self,
|
||||||
|
non_default_params: dict,
|
||||||
|
optional_params: dict,
|
||||||
|
model: str,
|
||||||
|
drop_params: bool,
|
||||||
):
|
):
|
||||||
if "max_completion_tokens" in non_default_params:
|
if "max_completion_tokens" in non_default_params:
|
||||||
non_default_params["max_tokens"] = non_default_params.pop(
|
non_default_params["max_tokens"] = non_default_params.pop(
|
||||||
|
@ -54,4 +58,5 @@ class VertexAIAi21Config:
|
||||||
non_default_params=non_default_params,
|
non_default_params=non_default_params,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
model=model,
|
model=model,
|
||||||
|
drop_params=drop_params,
|
||||||
)
|
)
|
||||||
|
|
|
@ -50,7 +50,11 @@ class VertexAILlama3Config:
|
||||||
return litellm.OpenAIConfig().get_supported_openai_params(model="gpt-3.5-turbo")
|
return litellm.OpenAIConfig().get_supported_openai_params(model="gpt-3.5-turbo")
|
||||||
|
|
||||||
def map_openai_params(
|
def map_openai_params(
|
||||||
self, non_default_params: dict, optional_params: dict, model: str
|
self,
|
||||||
|
non_default_params: dict,
|
||||||
|
optional_params: dict,
|
||||||
|
model: str,
|
||||||
|
drop_params: bool,
|
||||||
):
|
):
|
||||||
if "max_completion_tokens" in non_default_params:
|
if "max_completion_tokens" in non_default_params:
|
||||||
non_default_params["max_tokens"] = non_default_params.pop(
|
non_default_params["max_tokens"] = non_default_params.pop(
|
||||||
|
@ -60,4 +64,5 @@ class VertexAILlama3Config:
|
||||||
non_default_params=non_default_params,
|
non_default_params=non_default_params,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
model=model,
|
model=model,
|
||||||
|
drop_params=drop_params,
|
||||||
)
|
)
|
||||||
|
|
|
@ -31,15 +31,21 @@ model_list:
|
||||||
- model_name: "anthropic/*"
|
- model_name: "anthropic/*"
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: "anthropic/*"
|
model: "anthropic/*"
|
||||||
- model_name: "openai/*"
|
- model_name: "*"
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: "openai/*"
|
model: "openai/*"
|
||||||
- model_name: "fireworks_ai/*"
|
- model_name: "fireworks_ai/*"
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: "fireworks_ai/*"
|
model: "fireworks_ai/*"
|
||||||
configurable_clientside_auth_params: ["api_base"]
|
configurable_clientside_auth_params: ["api_base"]
|
||||||
|
- model_name: "gemini-flash-experimental"
|
||||||
|
litellm_params:
|
||||||
|
model: "vertex_ai/gemini-flash-experimental"
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
success_callback: ["langfuse"]
|
success_callback: ["langfuse", "prometheus"]
|
||||||
cache: true
|
failure_callback: ["prometheus"]
|
||||||
|
|
||||||
|
general_settings:
|
||||||
|
proxy_budget_rescheduler_min_time: 1
|
||||||
|
proxy_budget_rescheduler_max_time: 1
|
|
@ -1,5 +1,5 @@
|
||||||
import os
|
import os
|
||||||
from typing import Optional, Union
|
from typing import Any, Optional, Union
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
|
@ -34,7 +34,7 @@ def init_rds_client(
|
||||||
# Iterate over parameters and update if needed
|
# Iterate over parameters and update if needed
|
||||||
for i, param in enumerate(params_to_check):
|
for i, param in enumerate(params_to_check):
|
||||||
if param and param.startswith("os.environ/"):
|
if param and param.startswith("os.environ/"):
|
||||||
params_to_check[i] = get_secret(param)
|
params_to_check[i] = get_secret(param) # type: ignore
|
||||||
# Assign updated values back to parameters
|
# Assign updated values back to parameters
|
||||||
(
|
(
|
||||||
aws_access_key_id,
|
aws_access_key_id,
|
||||||
|
@ -62,13 +62,13 @@ def init_rds_client(
|
||||||
import boto3
|
import boto3
|
||||||
|
|
||||||
if isinstance(timeout, float):
|
if isinstance(timeout, float):
|
||||||
config = boto3.session.Config(connect_timeout=timeout, read_timeout=timeout)
|
config = boto3.session.Config(connect_timeout=timeout, read_timeout=timeout) # type: ignore
|
||||||
elif isinstance(timeout, httpx.Timeout):
|
elif isinstance(timeout, httpx.Timeout):
|
||||||
config = boto3.session.Config(
|
config = boto3.session.Config( # type: ignore
|
||||||
connect_timeout=timeout.connect, read_timeout=timeout.read
|
connect_timeout=timeout.connect, read_timeout=timeout.read
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
config = boto3.session.Config()
|
config = boto3.session.Config() # type: ignore
|
||||||
|
|
||||||
### CHECK STS ###
|
### CHECK STS ###
|
||||||
if (
|
if (
|
||||||
|
@ -105,6 +105,7 @@ def init_rds_client(
|
||||||
region_name=region_name,
|
region_name=region_name,
|
||||||
config=config,
|
config=config,
|
||||||
)
|
)
|
||||||
|
|
||||||
elif aws_role_name is not None and aws_session_name is not None:
|
elif aws_role_name is not None and aws_session_name is not None:
|
||||||
# use sts if role name passed in
|
# use sts if role name passed in
|
||||||
sts_client = boto3.client(
|
sts_client = boto3.client(
|
||||||
|
@ -144,6 +145,7 @@ def init_rds_client(
|
||||||
region_name=region_name,
|
region_name=region_name,
|
||||||
config=config,
|
config=config,
|
||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# aws_access_key_id is None, assume user is trying to auth using env variables
|
# aws_access_key_id is None, assume user is trying to auth using env variables
|
||||||
# boto3 automatically reads env variables
|
# boto3 automatically reads env variables
|
||||||
|
@ -157,11 +159,14 @@ def init_rds_client(
|
||||||
return client
|
return client
|
||||||
|
|
||||||
|
|
||||||
def generate_iam_auth_token(db_host, db_port, db_user) -> str:
|
def generate_iam_auth_token(
|
||||||
|
db_host, db_port, db_user, client: Optional[Any] = None
|
||||||
|
) -> str:
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
|
||||||
import boto3
|
import boto3
|
||||||
|
|
||||||
|
if client is None:
|
||||||
boto_client = init_rds_client(
|
boto_client = init_rds_client(
|
||||||
aws_region_name=os.getenv("AWS_REGION_NAME"),
|
aws_region_name=os.getenv("AWS_REGION_NAME"),
|
||||||
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
|
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
|
||||||
|
@ -173,9 +178,12 @@ def generate_iam_auth_token(db_host, db_port, db_user) -> str:
|
||||||
"AWS_WEB_IDENTITY_TOKEN", os.getenv("AWS_WEB_IDENTITY_TOKEN_FILE")
|
"AWS_WEB_IDENTITY_TOKEN", os.getenv("AWS_WEB_IDENTITY_TOKEN_FILE")
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
boto_client = client
|
||||||
|
|
||||||
token = boto_client.generate_db_auth_token(
|
token = boto_client.generate_db_auth_token(
|
||||||
DBHostname=db_host, Port=db_port, DBUsername=db_user
|
DBHostname=db_host, Port=db_port, DBUsername=db_user
|
||||||
)
|
)
|
||||||
cleaned_token = quote(token, safe="")
|
cleaned_token = quote(token, safe="")
|
||||||
|
|
||||||
return cleaned_token
|
return cleaned_token
|
||||||
|
|
106
litellm/proxy/db/prisma_client.py
Normal file
106
litellm/proxy/db/prisma_client.py
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import urllib
|
||||||
|
import urllib.parse
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from typing import Any, Callable, Optional
|
||||||
|
|
||||||
|
|
||||||
|
class PrismaWrapper:
|
||||||
|
def __init__(self, original_prisma: Any, iam_token_db_auth: bool):
|
||||||
|
self._original_prisma = original_prisma
|
||||||
|
self.iam_token_db_auth = iam_token_db_auth
|
||||||
|
|
||||||
|
def is_token_expired(self, token_url: Optional[str]) -> bool:
|
||||||
|
if token_url is None:
|
||||||
|
return True
|
||||||
|
# Decode the token URL to handle URL-encoded characters
|
||||||
|
decoded_url = urllib.parse.unquote(token_url)
|
||||||
|
|
||||||
|
# Parse the token URL
|
||||||
|
parsed_url = urllib.parse.urlparse(decoded_url)
|
||||||
|
|
||||||
|
# Parse the query parameters from the path component (if they exist there)
|
||||||
|
query_params = urllib.parse.parse_qs(parsed_url.query)
|
||||||
|
|
||||||
|
# Get expiration time from the query parameters
|
||||||
|
expires = query_params.get("X-Amz-Expires", [None])[0]
|
||||||
|
if expires is None:
|
||||||
|
raise ValueError("X-Amz-Expires parameter is missing or invalid.")
|
||||||
|
|
||||||
|
expires_int = int(expires)
|
||||||
|
|
||||||
|
# Get the token's creation time from the X-Amz-Date parameter
|
||||||
|
token_time_str = query_params.get("X-Amz-Date", [""])[0]
|
||||||
|
if not token_time_str:
|
||||||
|
raise ValueError("X-Amz-Date parameter is missing or invalid.")
|
||||||
|
|
||||||
|
# Ensure the token time string is parsed correctly
|
||||||
|
try:
|
||||||
|
token_time = datetime.strptime(token_time_str, "%Y%m%dT%H%M%SZ")
|
||||||
|
except ValueError as e:
|
||||||
|
raise ValueError(f"Invalid X-Amz-Date format: {e}")
|
||||||
|
|
||||||
|
# Calculate the expiration time
|
||||||
|
expiration_time = token_time + timedelta(seconds=expires_int)
|
||||||
|
|
||||||
|
# Current time in UTC
|
||||||
|
current_time = datetime.utcnow()
|
||||||
|
|
||||||
|
# Check if the token is expired
|
||||||
|
return current_time > expiration_time
|
||||||
|
|
||||||
|
def get_rds_iam_token(self) -> Optional[str]:
|
||||||
|
if self.iam_token_db_auth:
|
||||||
|
from litellm.proxy.auth.rds_iam_token import generate_iam_auth_token
|
||||||
|
|
||||||
|
db_host = os.getenv("DATABASE_HOST")
|
||||||
|
db_port = os.getenv("DATABASE_PORT")
|
||||||
|
db_user = os.getenv("DATABASE_USER")
|
||||||
|
db_name = os.getenv("DATABASE_NAME")
|
||||||
|
db_schema = os.getenv("DATABASE_SCHEMA")
|
||||||
|
|
||||||
|
token = generate_iam_auth_token(
|
||||||
|
db_host=db_host, db_port=db_port, db_user=db_user
|
||||||
|
)
|
||||||
|
|
||||||
|
# print(f"token: {token}")
|
||||||
|
_db_url = f"postgresql://{db_user}:{token}@{db_host}:{db_port}/{db_name}"
|
||||||
|
if db_schema:
|
||||||
|
_db_url += f"?schema={db_schema}"
|
||||||
|
|
||||||
|
os.environ["DATABASE_URL"] = _db_url
|
||||||
|
return _db_url
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def recreate_prisma_client(
|
||||||
|
self, new_db_url: str, http_client: Optional[Any] = None
|
||||||
|
):
|
||||||
|
from prisma import Prisma # type: ignore
|
||||||
|
|
||||||
|
if http_client is not None:
|
||||||
|
self._original_prisma = Prisma(http=http_client)
|
||||||
|
else:
|
||||||
|
self._original_prisma = Prisma()
|
||||||
|
|
||||||
|
await self._original_prisma.connect()
|
||||||
|
|
||||||
|
def __getattr__(self, name: str):
|
||||||
|
original_attr = getattr(self._original_prisma, name)
|
||||||
|
if self.iam_token_db_auth:
|
||||||
|
db_url = os.getenv("DATABASE_URL")
|
||||||
|
if self.is_token_expired(db_url):
|
||||||
|
db_url = self.get_rds_iam_token()
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
|
||||||
|
if db_url:
|
||||||
|
if loop.is_running():
|
||||||
|
asyncio.run_coroutine_threadsafe(
|
||||||
|
self.recreate_prisma_client(db_url), loop
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
asyncio.run(self.recreate_prisma_client(db_url))
|
||||||
|
else:
|
||||||
|
raise ValueError("Failed to get RDS IAM token")
|
||||||
|
|
||||||
|
return original_attr
|
|
@ -40,7 +40,7 @@ def append_query_params(url, params) -> str:
|
||||||
parsed_query.update(params)
|
parsed_query.update(params)
|
||||||
encoded_query = urlparse.urlencode(parsed_query, doseq=True)
|
encoded_query = urlparse.urlencode(parsed_query, doseq=True)
|
||||||
modified_url = urlparse.urlunparse(parsed_url._replace(query=encoded_query))
|
modified_url = urlparse.urlunparse(parsed_url._replace(query=encoded_query))
|
||||||
return modified_url
|
return modified_url # type: ignore
|
||||||
|
|
||||||
|
|
||||||
def run_ollama_serve():
|
def run_ollama_serve():
|
||||||
|
@ -287,7 +287,7 @@ def run_server(
|
||||||
save_worker_config,
|
save_worker_config,
|
||||||
)
|
)
|
||||||
if version == True:
|
if version == True:
|
||||||
pkg_version = importlib.metadata.version("litellm")
|
pkg_version = importlib.metadata.version("litellm") # type: ignore
|
||||||
click.echo(f"\nLiteLLM: Current Version = {pkg_version}\n")
|
click.echo(f"\nLiteLLM: Current Version = {pkg_version}\n")
|
||||||
return
|
return
|
||||||
if model and "ollama" in model and api_base is None:
|
if model and "ollama" in model and api_base is None:
|
||||||
|
@ -338,14 +338,14 @@ def run_server(
|
||||||
futures = []
|
futures = []
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
# Make concurrent calls
|
# Make concurrent calls
|
||||||
with concurrent.futures.ThreadPoolExecutor(
|
with concurrent.futures.ThreadPoolExecutor( # type: ignore
|
||||||
max_workers=concurrent_calls
|
max_workers=concurrent_calls
|
||||||
) as executor:
|
) as executor:
|
||||||
for _ in range(concurrent_calls):
|
for _ in range(concurrent_calls):
|
||||||
futures.append(executor.submit(_make_openai_completion))
|
futures.append(executor.submit(_make_openai_completion))
|
||||||
|
|
||||||
# Wait for all futures to complete
|
# Wait for all futures to complete
|
||||||
concurrent.futures.wait(futures)
|
concurrent.futures.wait(futures) # type: ignore
|
||||||
|
|
||||||
# Summarize the results
|
# Summarize the results
|
||||||
successful_calls = 0
|
successful_calls = 0
|
||||||
|
@ -476,6 +476,7 @@ def run_server(
|
||||||
_db_url += f"?schema={db_schema}"
|
_db_url += f"?schema={db_schema}"
|
||||||
|
|
||||||
os.environ["DATABASE_URL"] = _db_url
|
os.environ["DATABASE_URL"] = _db_url
|
||||||
|
os.environ["IAM_TOKEN_DB_AUTH"] = "True"
|
||||||
|
|
||||||
### DECRYPT ENV VAR ###
|
### DECRYPT ENV VAR ###
|
||||||
|
|
||||||
|
@ -600,8 +601,9 @@ def run_server(
|
||||||
0, os.path.abspath("../..")
|
0, os.path.abspath("../..")
|
||||||
) # Adds the parent directory to the system path - for litellm local dev
|
) # Adds the parent directory to the system path - for litellm local dev
|
||||||
import litellm
|
import litellm
|
||||||
|
from litellm import get_secret_str
|
||||||
|
|
||||||
database_url = litellm.get_secret(database_url, default_value=None)
|
database_url = get_secret_str(database_url, default_value=None)
|
||||||
os.chdir(original_dir)
|
os.chdir(original_dir)
|
||||||
if database_url is not None and isinstance(database_url, str):
|
if database_url is not None and isinstance(database_url, str):
|
||||||
os.environ["DATABASE_URL"] = database_url
|
os.environ["DATABASE_URL"] = database_url
|
||||||
|
@ -650,6 +652,8 @@ def run_server(
|
||||||
subprocess.run(["prisma", "db", "push", "--accept-data-loss"])
|
subprocess.run(["prisma", "db", "push", "--accept-data-loss"])
|
||||||
break # Exit the loop if the subprocess succeeds
|
break # Exit the loop if the subprocess succeeds
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
|
import time
|
||||||
|
|
||||||
print(f"Error: {e}") # noqa
|
print(f"Error: {e}") # noqa
|
||||||
time.sleep(random.randrange(start=1, stop=5))
|
time.sleep(random.randrange(start=1, stop=5))
|
||||||
finally:
|
finally:
|
||||||
|
@ -728,12 +732,16 @@ def run_server(
|
||||||
|
|
||||||
def load_config(self):
|
def load_config(self):
|
||||||
# note: This Loads the gunicorn config - has nothing to do with LiteLLM Proxy config
|
# note: This Loads the gunicorn config - has nothing to do with LiteLLM Proxy config
|
||||||
|
if self.cfg is not None:
|
||||||
config = {
|
config = {
|
||||||
key: value
|
key: value
|
||||||
for key, value in self.options.items()
|
for key, value in self.options.items()
|
||||||
if key in self.cfg.settings and value is not None
|
if key in self.cfg.settings and value is not None
|
||||||
}
|
}
|
||||||
|
else:
|
||||||
|
config = {}
|
||||||
for key, value in config.items():
|
for key, value in config.items():
|
||||||
|
if self.cfg is not None:
|
||||||
self.cfg.set(key.lower(), value)
|
self.cfg.set(key.lower(), value)
|
||||||
|
|
||||||
def load(self):
|
def load(self):
|
||||||
|
|
|
@ -65,11 +65,13 @@ from litellm.proxy.db.create_views import (
|
||||||
create_missing_views,
|
create_missing_views,
|
||||||
should_create_missing_views,
|
should_create_missing_views,
|
||||||
)
|
)
|
||||||
|
from litellm.proxy.db.prisma_client import PrismaWrapper
|
||||||
from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
|
from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
|
||||||
from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
|
from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
|
||||||
from litellm.proxy.hooks.parallel_request_limiter import (
|
from litellm.proxy.hooks.parallel_request_limiter import (
|
||||||
_PROXY_MaxParallelRequestsHandler,
|
_PROXY_MaxParallelRequestsHandler,
|
||||||
)
|
)
|
||||||
|
from litellm.secret_managers.main import str_to_bool
|
||||||
from litellm.types.utils import CallTypes, LoggedLiteLLMParams
|
from litellm.types.utils import CallTypes, LoggedLiteLLMParams
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
@ -1017,6 +1019,9 @@ class PrismaClient:
|
||||||
)
|
)
|
||||||
## init logging object
|
## init logging object
|
||||||
self.proxy_logging_obj = proxy_logging_obj
|
self.proxy_logging_obj = proxy_logging_obj
|
||||||
|
self.iam_token_db_auth: Optional[bool] = str_to_bool(
|
||||||
|
os.getenv("IAM_TOKEN_DB_AUTH")
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
from prisma import Prisma # type: ignore
|
from prisma import Prisma # type: ignore
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -1043,9 +1048,23 @@ class PrismaClient:
|
||||||
from prisma import Prisma # type: ignore
|
from prisma import Prisma # type: ignore
|
||||||
verbose_proxy_logger.debug("Connecting Prisma Client to DB..")
|
verbose_proxy_logger.debug("Connecting Prisma Client to DB..")
|
||||||
if http_client is not None:
|
if http_client is not None:
|
||||||
self.db = Prisma(http=http_client)
|
self.db = PrismaWrapper(
|
||||||
|
original_prisma=Prisma(http=http_client),
|
||||||
|
iam_token_db_auth=(
|
||||||
|
self.iam_token_db_auth
|
||||||
|
if self.iam_token_db_auth is not None
|
||||||
|
else False
|
||||||
|
),
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
self.db = Prisma() # Client to connect to Prisma db
|
self.db = PrismaWrapper(
|
||||||
|
original_prisma=Prisma(),
|
||||||
|
iam_token_db_auth=(
|
||||||
|
self.iam_token_db_auth
|
||||||
|
if self.iam_token_db_auth is not None
|
||||||
|
else False
|
||||||
|
),
|
||||||
|
) # Client to connect to Prisma db
|
||||||
verbose_proxy_logger.debug("Success - Connected Prisma Client to DB")
|
verbose_proxy_logger.debug("Success - Connected Prisma Client to DB")
|
||||||
|
|
||||||
def hash_token(self, token: str):
|
def hash_token(self, token: str):
|
||||||
|
@ -1141,9 +1160,9 @@ class PrismaClient:
|
||||||
"LiteLLM_VerificationTokenView Created in DB!"
|
"LiteLLM_VerificationTokenView Created in DB!"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
should_create_views = await should_create_missing_views(db=self.db)
|
should_create_views = await should_create_missing_views(db=self.db.db) # type: ignore
|
||||||
if should_create_views:
|
if should_create_views:
|
||||||
await create_missing_views(db=self.db)
|
await create_missing_views(db=self.db) # type: ignore
|
||||||
else:
|
else:
|
||||||
# don't block execution if these views are missing
|
# don't block execution if these views are missing
|
||||||
# Convert lists to sets for efficient difference calculation
|
# Convert lists to sets for efficient difference calculation
|
||||||
|
|
|
@ -29,7 +29,7 @@ def _is_base64(s):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def str_to_bool(value: str) -> Optional[bool]:
|
def str_to_bool(value: Optional[str]) -> Optional[bool]:
|
||||||
"""
|
"""
|
||||||
Converts a string to a boolean if it's a recognized boolean string.
|
Converts a string to a boolean if it's a recognized boolean string.
|
||||||
Returns None if the string is not a recognized boolean value.
|
Returns None if the string is not a recognized boolean value.
|
||||||
|
@ -37,6 +37,9 @@ def str_to_bool(value: str) -> Optional[bool]:
|
||||||
:param value: The string to be checked.
|
:param value: The string to be checked.
|
||||||
:return: True or False if the string is a recognized boolean, otherwise None.
|
:return: True or False if the string is a recognized boolean, otherwise None.
|
||||||
"""
|
"""
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
|
||||||
true_values = {"true"}
|
true_values = {"true"}
|
||||||
false_values = {"false"}
|
false_values = {"false"}
|
||||||
|
|
||||||
|
|
|
@ -968,3 +968,259 @@ def test_aaalangfuse_dynamic_logging():
|
||||||
)
|
)
|
||||||
|
|
||||||
langfuse_client.get_trace(id=trace_id)
|
langfuse_client.get_trace(id=trace_id)
|
||||||
|
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
generation_params = {
|
||||||
|
"name": "litellm-acompletion",
|
||||||
|
"id": "time-10-35-32-316778_chatcmpl-ABQDEzVJS8fziPdvkeTA3tnQaxeMX",
|
||||||
|
"start_time": datetime.datetime(2024, 9, 25, 10, 35, 32, 316778),
|
||||||
|
"end_time": datetime.datetime(2024, 9, 25, 10, 35, 32, 897141),
|
||||||
|
"model": "gpt-4o",
|
||||||
|
"model_parameters": {
|
||||||
|
"stream": False,
|
||||||
|
"max_retries": 0,
|
||||||
|
"extra_body": "{}",
|
||||||
|
"system_fingerprint": "fp_52a7f40b0b",
|
||||||
|
},
|
||||||
|
"input": {
|
||||||
|
"messages": [
|
||||||
|
{"content": "<>", "role": "system"},
|
||||||
|
{"content": "<>", "role": "user"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"output": {
|
||||||
|
"content": "Hello! It looks like your message might have been sent by accident. How can I assist you today?",
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": None,
|
||||||
|
"function_call": None,
|
||||||
|
},
|
||||||
|
"usage": {"prompt_tokens": 13, "completion_tokens": 21, "total_cost": 0.00038},
|
||||||
|
"metadata": {
|
||||||
|
"prompt": {
|
||||||
|
"name": "conversational-service-answer_question_restricted_reply",
|
||||||
|
"version": 9,
|
||||||
|
"config": {},
|
||||||
|
"labels": ["latest", "staging", "production"],
|
||||||
|
"tags": ["conversational-service"],
|
||||||
|
"prompt": [
|
||||||
|
{"role": "system", "content": "<>"},
|
||||||
|
{"role": "user", "content": "{{text}}"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"requester_metadata": {
|
||||||
|
"session_id": "e953a71f-e129-4cf5-ad11-ad18245022f1",
|
||||||
|
"trace_name": "jess",
|
||||||
|
"tags": ["conversational-service", "generative-ai-engine", "staging"],
|
||||||
|
"prompt": {
|
||||||
|
"name": "conversational-service-answer_question_restricted_reply",
|
||||||
|
"version": 9,
|
||||||
|
"config": {},
|
||||||
|
"labels": ["latest", "staging", "production"],
|
||||||
|
"tags": ["conversational-service"],
|
||||||
|
"prompt": [
|
||||||
|
{"role": "system", "content": "<>"},
|
||||||
|
{"role": "user", "content": "{{text}}"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
|
||||||
|
"litellm_api_version": "0.0.0",
|
||||||
|
"user_api_key_user_id": "default_user_id",
|
||||||
|
"user_api_key_spend": 0.0,
|
||||||
|
"user_api_key_metadata": {},
|
||||||
|
"requester_ip_address": "127.0.0.1",
|
||||||
|
"model_group": "gpt-4o",
|
||||||
|
"model_group_size": 0,
|
||||||
|
"deployment": "gpt-4o",
|
||||||
|
"model_info": {
|
||||||
|
"id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
|
||||||
|
"db_model": False,
|
||||||
|
},
|
||||||
|
"hidden_params": {
|
||||||
|
"headers": {
|
||||||
|
"date": "Wed, 25 Sep 2024 17:35:32 GMT",
|
||||||
|
"content-type": "application/json",
|
||||||
|
"transfer-encoding": "chunked",
|
||||||
|
"connection": "keep-alive",
|
||||||
|
"access-control-expose-headers": "X-Request-ID",
|
||||||
|
"openai-organization": "reliablekeystest",
|
||||||
|
"openai-processing-ms": "329",
|
||||||
|
"openai-version": "2020-10-01",
|
||||||
|
"strict-transport-security": "max-age=31536000; includeSubDomains; preload",
|
||||||
|
"x-ratelimit-limit-requests": "10000",
|
||||||
|
"x-ratelimit-limit-tokens": "30000000",
|
||||||
|
"x-ratelimit-remaining-requests": "9999",
|
||||||
|
"x-ratelimit-remaining-tokens": "29999980",
|
||||||
|
"x-ratelimit-reset-requests": "6ms",
|
||||||
|
"x-ratelimit-reset-tokens": "0s",
|
||||||
|
"x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
|
||||||
|
"cf-cache-status": "DYNAMIC",
|
||||||
|
"set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
|
||||||
|
"x-content-type-options": "nosniff",
|
||||||
|
"server": "cloudflare",
|
||||||
|
"cf-ray": "8c8cc573becb232c-SJC",
|
||||||
|
"content-encoding": "gzip",
|
||||||
|
"alt-svc": 'h3=":443"; ma=86400',
|
||||||
|
},
|
||||||
|
"additional_headers": {
|
||||||
|
"llm_provider-date": "Wed, 25 Sep 2024 17:35:32 GMT",
|
||||||
|
"llm_provider-content-type": "application/json",
|
||||||
|
"llm_provider-transfer-encoding": "chunked",
|
||||||
|
"llm_provider-connection": "keep-alive",
|
||||||
|
"llm_provider-access-control-expose-headers": "X-Request-ID",
|
||||||
|
"llm_provider-openai-organization": "reliablekeystest",
|
||||||
|
"llm_provider-openai-processing-ms": "329",
|
||||||
|
"llm_provider-openai-version": "2020-10-01",
|
||||||
|
"llm_provider-strict-transport-security": "max-age=31536000; includeSubDomains; preload",
|
||||||
|
"llm_provider-x-ratelimit-limit-requests": "10000",
|
||||||
|
"llm_provider-x-ratelimit-limit-tokens": "30000000",
|
||||||
|
"llm_provider-x-ratelimit-remaining-requests": "9999",
|
||||||
|
"llm_provider-x-ratelimit-remaining-tokens": "29999980",
|
||||||
|
"llm_provider-x-ratelimit-reset-requests": "6ms",
|
||||||
|
"llm_provider-x-ratelimit-reset-tokens": "0s",
|
||||||
|
"llm_provider-x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
|
||||||
|
"llm_provider-cf-cache-status": "DYNAMIC",
|
||||||
|
"llm_provider-set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
|
||||||
|
"llm_provider-x-content-type-options": "nosniff",
|
||||||
|
"llm_provider-server": "cloudflare",
|
||||||
|
"llm_provider-cf-ray": "8c8cc573becb232c-SJC",
|
||||||
|
"llm_provider-content-encoding": "gzip",
|
||||||
|
"llm_provider-alt-svc": 'h3=":443"; ma=86400',
|
||||||
|
},
|
||||||
|
"litellm_call_id": "1fa31658-20af-40b5-9ac9-60fd7b5ad98c",
|
||||||
|
"model_id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
|
||||||
|
"api_base": "https://api.openai.com",
|
||||||
|
"optional_params": {
|
||||||
|
"stream": False,
|
||||||
|
"max_retries": 0,
|
||||||
|
"extra_body": {},
|
||||||
|
},
|
||||||
|
"response_cost": 0.00038,
|
||||||
|
},
|
||||||
|
"litellm_response_cost": 0.00038,
|
||||||
|
"api_base": "https://api.openai.com/v1/",
|
||||||
|
"cache_hit": False,
|
||||||
|
},
|
||||||
|
"level": "DEFAULT",
|
||||||
|
"version": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"prompt",
|
||||||
|
[
|
||||||
|
[
|
||||||
|
{"role": "system", "content": "<>"},
|
||||||
|
{"role": "user", "content": "{{text}}"},
|
||||||
|
],
|
||||||
|
"hello world",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_langfuse_prompt_type(prompt):
|
||||||
|
|
||||||
|
from litellm.integrations.langfuse import _add_prompt_to_generation_params
|
||||||
|
|
||||||
|
clean_metadata = {
|
||||||
|
"prompt": {
|
||||||
|
"name": "conversational-service-answer_question_restricted_reply",
|
||||||
|
"version": 9,
|
||||||
|
"config": {},
|
||||||
|
"labels": ["latest", "staging", "production"],
|
||||||
|
"tags": ["conversational-service"],
|
||||||
|
"prompt": prompt,
|
||||||
|
},
|
||||||
|
"requester_metadata": {
|
||||||
|
"session_id": "e953a71f-e129-4cf5-ad11-ad18245022f1",
|
||||||
|
"trace_name": "jess",
|
||||||
|
"tags": ["conversational-service", "generative-ai-engine", "staging"],
|
||||||
|
"prompt": {
|
||||||
|
"name": "conversational-service-answer_question_restricted_reply",
|
||||||
|
"version": 9,
|
||||||
|
"config": {},
|
||||||
|
"labels": ["latest", "staging", "production"],
|
||||||
|
"tags": ["conversational-service"],
|
||||||
|
"prompt": [
|
||||||
|
{"role": "system", "content": "<>"},
|
||||||
|
{"role": "user", "content": "{{text}}"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
|
||||||
|
"litellm_api_version": "0.0.0",
|
||||||
|
"user_api_key_user_id": "default_user_id",
|
||||||
|
"user_api_key_spend": 0.0,
|
||||||
|
"user_api_key_metadata": {},
|
||||||
|
"requester_ip_address": "127.0.0.1",
|
||||||
|
"model_group": "gpt-4o",
|
||||||
|
"model_group_size": 0,
|
||||||
|
"deployment": "gpt-4o",
|
||||||
|
"model_info": {
|
||||||
|
"id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
|
||||||
|
"db_model": False,
|
||||||
|
},
|
||||||
|
"hidden_params": {
|
||||||
|
"headers": {
|
||||||
|
"date": "Wed, 25 Sep 2024 17:35:32 GMT",
|
||||||
|
"content-type": "application/json",
|
||||||
|
"transfer-encoding": "chunked",
|
||||||
|
"connection": "keep-alive",
|
||||||
|
"access-control-expose-headers": "X-Request-ID",
|
||||||
|
"openai-organization": "reliablekeystest",
|
||||||
|
"openai-processing-ms": "329",
|
||||||
|
"openai-version": "2020-10-01",
|
||||||
|
"strict-transport-security": "max-age=31536000; includeSubDomains; preload",
|
||||||
|
"x-ratelimit-limit-requests": "10000",
|
||||||
|
"x-ratelimit-limit-tokens": "30000000",
|
||||||
|
"x-ratelimit-remaining-requests": "9999",
|
||||||
|
"x-ratelimit-remaining-tokens": "29999980",
|
||||||
|
"x-ratelimit-reset-requests": "6ms",
|
||||||
|
"x-ratelimit-reset-tokens": "0s",
|
||||||
|
"x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
|
||||||
|
"cf-cache-status": "DYNAMIC",
|
||||||
|
"set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
|
||||||
|
"x-content-type-options": "nosniff",
|
||||||
|
"server": "cloudflare",
|
||||||
|
"cf-ray": "8c8cc573becb232c-SJC",
|
||||||
|
"content-encoding": "gzip",
|
||||||
|
"alt-svc": 'h3=":443"; ma=86400',
|
||||||
|
},
|
||||||
|
"additional_headers": {
|
||||||
|
"llm_provider-date": "Wed, 25 Sep 2024 17:35:32 GMT",
|
||||||
|
"llm_provider-content-type": "application/json",
|
||||||
|
"llm_provider-transfer-encoding": "chunked",
|
||||||
|
"llm_provider-connection": "keep-alive",
|
||||||
|
"llm_provider-access-control-expose-headers": "X-Request-ID",
|
||||||
|
"llm_provider-openai-organization": "reliablekeystest",
|
||||||
|
"llm_provider-openai-processing-ms": "329",
|
||||||
|
"llm_provider-openai-version": "2020-10-01",
|
||||||
|
"llm_provider-strict-transport-security": "max-age=31536000; includeSubDomains; preload",
|
||||||
|
"llm_provider-x-ratelimit-limit-requests": "10000",
|
||||||
|
"llm_provider-x-ratelimit-limit-tokens": "30000000",
|
||||||
|
"llm_provider-x-ratelimit-remaining-requests": "9999",
|
||||||
|
"llm_provider-x-ratelimit-remaining-tokens": "29999980",
|
||||||
|
"llm_provider-x-ratelimit-reset-requests": "6ms",
|
||||||
|
"llm_provider-x-ratelimit-reset-tokens": "0s",
|
||||||
|
"llm_provider-x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
|
||||||
|
"llm_provider-cf-cache-status": "DYNAMIC",
|
||||||
|
"llm_provider-set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
|
||||||
|
"llm_provider-x-content-type-options": "nosniff",
|
||||||
|
"llm_provider-server": "cloudflare",
|
||||||
|
"llm_provider-cf-ray": "8c8cc573becb232c-SJC",
|
||||||
|
"llm_provider-content-encoding": "gzip",
|
||||||
|
"llm_provider-alt-svc": 'h3=":443"; ma=86400',
|
||||||
|
},
|
||||||
|
"litellm_call_id": "1fa31658-20af-40b5-9ac9-60fd7b5ad98c",
|
||||||
|
"model_id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
|
||||||
|
"api_base": "https://api.openai.com",
|
||||||
|
"optional_params": {"stream": False, "max_retries": 0, "extra_body": {}},
|
||||||
|
"response_cost": 0.00038,
|
||||||
|
},
|
||||||
|
"litellm_response_cost": 0.00038,
|
||||||
|
"api_base": "https://api.openai.com/v1/",
|
||||||
|
"cache_hit": False,
|
||||||
|
}
|
||||||
|
_add_prompt_to_generation_params(
|
||||||
|
generation_params=generation_params, clean_metadata=clean_metadata
|
||||||
|
)
|
||||||
|
|
|
@ -153,6 +153,7 @@ class GenerationConfig(TypedDict, total=False):
|
||||||
presence_penalty: float
|
presence_penalty: float
|
||||||
frequency_penalty: float
|
frequency_penalty: float
|
||||||
response_mime_type: Literal["text/plain", "application/json"]
|
response_mime_type: Literal["text/plain", "application/json"]
|
||||||
|
response_schema: dict
|
||||||
seed: int
|
seed: int
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -3239,8 +3239,15 @@ def get_optional_params(
|
||||||
non_default_params=non_default_params,
|
non_default_params=non_default_params,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
model=model,
|
model=model,
|
||||||
|
drop_params=(
|
||||||
|
drop_params
|
||||||
|
if drop_params is not None and isinstance(drop_params, bool)
|
||||||
|
else False
|
||||||
|
),
|
||||||
)
|
)
|
||||||
elif custom_llm_provider == "vertex_ai_beta":
|
elif custom_llm_provider == "vertex_ai_beta" or (
|
||||||
|
custom_llm_provider == "vertex_ai" and "gemini" in model
|
||||||
|
):
|
||||||
supported_params = get_supported_openai_params(
|
supported_params = get_supported_openai_params(
|
||||||
model=model, custom_llm_provider=custom_llm_provider
|
model=model, custom_llm_provider=custom_llm_provider
|
||||||
)
|
)
|
||||||
|
@ -3277,6 +3284,11 @@ def get_optional_params(
|
||||||
non_default_params=non_default_params,
|
non_default_params=non_default_params,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
model=model,
|
model=model,
|
||||||
|
drop_params=(
|
||||||
|
drop_params
|
||||||
|
if drop_params is not None and isinstance(drop_params, bool)
|
||||||
|
else False
|
||||||
|
),
|
||||||
)
|
)
|
||||||
elif custom_llm_provider == "vertex_ai" and model in litellm.vertex_mistral_models:
|
elif custom_llm_provider == "vertex_ai" and model in litellm.vertex_mistral_models:
|
||||||
supported_params = get_supported_openai_params(
|
supported_params = get_supported_openai_params(
|
||||||
|
@ -3301,6 +3313,11 @@ def get_optional_params(
|
||||||
non_default_params=non_default_params,
|
non_default_params=non_default_params,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
model=model,
|
model=model,
|
||||||
|
drop_params=(
|
||||||
|
drop_params
|
||||||
|
if drop_params is not None and isinstance(drop_params, bool)
|
||||||
|
else False
|
||||||
|
),
|
||||||
)
|
)
|
||||||
elif custom_llm_provider == "sagemaker":
|
elif custom_llm_provider == "sagemaker":
|
||||||
## check if unsupported param passed in
|
## check if unsupported param passed in
|
||||||
|
@ -3710,6 +3727,7 @@ def get_optional_params(
|
||||||
non_default_params=non_default_params,
|
non_default_params=non_default_params,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
model=model,
|
model=model,
|
||||||
|
drop_params=drop_params,
|
||||||
)
|
)
|
||||||
elif custom_llm_provider == "openrouter":
|
elif custom_llm_provider == "openrouter":
|
||||||
supported_params = get_supported_openai_params(
|
supported_params = get_supported_openai_params(
|
||||||
|
@ -3818,6 +3836,7 @@ def get_optional_params(
|
||||||
non_default_params=non_default_params,
|
non_default_params=non_default_params,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
model=model,
|
model=model,
|
||||||
|
drop_params=drop_params,
|
||||||
)
|
)
|
||||||
elif custom_llm_provider == "azure":
|
elif custom_llm_provider == "azure":
|
||||||
supported_params = get_supported_openai_params(
|
supported_params = get_supported_openai_params(
|
||||||
|
|
56
package-lock.json
generated
56
package-lock.json
generated
|
@ -5,12 +5,53 @@
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"prisma": "^5.17.0",
|
||||||
"react-copy-to-clipboard": "^5.1.0"
|
"react-copy-to-clipboard": "^5.1.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/react-copy-to-clipboard": "^5.0.7"
|
"@types/react-copy-to-clipboard": "^5.0.7"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@prisma/debug": {
|
||||||
|
"version": "5.17.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@prisma/debug/-/debug-5.17.0.tgz",
|
||||||
|
"integrity": "sha512-l7+AteR3P8FXiYyo496zkuoiJ5r9jLQEdUuxIxNCN1ud8rdbH3GTxm+f+dCyaSv9l9WY+29L9czaVRXz9mULfg=="
|
||||||
|
},
|
||||||
|
"node_modules/@prisma/engines": {
|
||||||
|
"version": "5.17.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@prisma/engines/-/engines-5.17.0.tgz",
|
||||||
|
"integrity": "sha512-+r+Nf+JP210Jur+/X8SIPLtz+uW9YA4QO5IXA+KcSOBe/shT47bCcRMTYCbOESw3FFYFTwe7vU6KTWHKPiwvtg==",
|
||||||
|
"hasInstallScript": true,
|
||||||
|
"dependencies": {
|
||||||
|
"@prisma/debug": "5.17.0",
|
||||||
|
"@prisma/engines-version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
|
||||||
|
"@prisma/fetch-engine": "5.17.0",
|
||||||
|
"@prisma/get-platform": "5.17.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@prisma/engines-version": {
|
||||||
|
"version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
|
||||||
|
"resolved": "https://registry.npmjs.org/@prisma/engines-version/-/engines-version-5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053.tgz",
|
||||||
|
"integrity": "sha512-tUuxZZysZDcrk5oaNOdrBnnkoTtmNQPkzINFDjz7eG6vcs9AVDmA/F6K5Plsb2aQc/l5M2EnFqn3htng9FA4hg=="
|
||||||
|
},
|
||||||
|
"node_modules/@prisma/fetch-engine": {
|
||||||
|
"version": "5.17.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@prisma/fetch-engine/-/fetch-engine-5.17.0.tgz",
|
||||||
|
"integrity": "sha512-ESxiOaHuC488ilLPnrv/tM2KrPhQB5TRris/IeIV4ZvUuKeaicCl4Xj/JCQeG9IlxqOgf1cCg5h5vAzlewN91Q==",
|
||||||
|
"dependencies": {
|
||||||
|
"@prisma/debug": "5.17.0",
|
||||||
|
"@prisma/engines-version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
|
||||||
|
"@prisma/get-platform": "5.17.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@prisma/get-platform": {
|
||||||
|
"version": "5.17.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@prisma/get-platform/-/get-platform-5.17.0.tgz",
|
||||||
|
"integrity": "sha512-UlDgbRozCP1rfJ5Tlkf3Cnftb6srGrEQ4Nm3og+1Se2gWmCZ0hmPIi+tQikGDUVLlvOWx3Gyi9LzgRP+HTXV9w==",
|
||||||
|
"dependencies": {
|
||||||
|
"@prisma/debug": "5.17.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@types/prop-types": {
|
"node_modules/@types/prop-types": {
|
||||||
"version": "15.7.12",
|
"version": "15.7.12",
|
||||||
"resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.12.tgz",
|
"resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.12.tgz",
|
||||||
|
@ -74,6 +115,21 @@
|
||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/prisma": {
|
||||||
|
"version": "5.17.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/prisma/-/prisma-5.17.0.tgz",
|
||||||
|
"integrity": "sha512-m4UWkN5lBE6yevqeOxEvmepnL5cNPEjzMw2IqDB59AcEV6w7D8vGljDLd1gPFH+W6gUxw9x7/RmN5dCS/WTPxA==",
|
||||||
|
"hasInstallScript": true,
|
||||||
|
"dependencies": {
|
||||||
|
"@prisma/engines": "5.17.0"
|
||||||
|
},
|
||||||
|
"bin": {
|
||||||
|
"prisma": "build/index.js"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=16.13"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/prop-types": {
|
"node_modules/prop-types": {
|
||||||
"version": "15.8.1",
|
"version": "15.8.1",
|
||||||
"resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz",
|
"resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz",
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
{
|
{
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"prisma": "^5.17.0",
|
||||||
"react-copy-to-clipboard": "^5.1.0"
|
"react-copy-to-clipboard": "^5.1.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
|
|
@ -141,12 +141,12 @@ def test_all_model_configs():
|
||||||
"max_completion_tokens" in VertexAILlama3Config().get_supported_openai_params()
|
"max_completion_tokens" in VertexAILlama3Config().get_supported_openai_params()
|
||||||
)
|
)
|
||||||
assert VertexAILlama3Config().map_openai_params(
|
assert VertexAILlama3Config().map_openai_params(
|
||||||
{"max_completion_tokens": 10}, {}, "llama3"
|
{"max_completion_tokens": 10}, {}, "llama3", drop_params=False
|
||||||
) == {"max_tokens": 10}
|
) == {"max_tokens": 10}
|
||||||
|
|
||||||
assert "max_completion_tokens" in VertexAIAi21Config().get_supported_openai_params()
|
assert "max_completion_tokens" in VertexAIAi21Config().get_supported_openai_params()
|
||||||
assert VertexAIAi21Config().map_openai_params(
|
assert VertexAIAi21Config().map_openai_params(
|
||||||
{"max_completion_tokens": 10}, {}, "llama3"
|
{"max_completion_tokens": 10}, {}, "llama3", drop_params=False
|
||||||
) == {"max_tokens": 10}
|
) == {"max_tokens": 10}
|
||||||
|
|
||||||
from litellm.llms.fireworks_ai.chat.fireworks_ai_transformation import (
|
from litellm.llms.fireworks_ai.chat.fireworks_ai_transformation import (
|
||||||
|
@ -332,6 +332,7 @@ def test_all_model_configs():
|
||||||
model="gemini-1.0-pro",
|
model="gemini-1.0-pro",
|
||||||
non_default_params={"max_completion_tokens": 10},
|
non_default_params={"max_completion_tokens": 10},
|
||||||
optional_params={},
|
optional_params={},
|
||||||
|
drop_params=False,
|
||||||
) == {"max_output_tokens": 10}
|
) == {"max_output_tokens": 10}
|
||||||
|
|
||||||
assert "max_completion_tokens" in VertexGeminiConfig().get_supported_openai_params()
|
assert "max_completion_tokens" in VertexGeminiConfig().get_supported_openai_params()
|
||||||
|
|
|
@ -600,3 +600,35 @@ def test_o1_model_params():
|
||||||
)
|
)
|
||||||
assert optional_params["seed"] == 10
|
assert optional_params["seed"] == 10
|
||||||
assert optional_params["user"] == "John"
|
assert optional_params["user"] == "John"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"temperature, expected_error",
|
||||||
|
[(0.2, True), (1, False)],
|
||||||
|
)
|
||||||
|
def test_o1_model_temperature_params(temperature, expected_error):
|
||||||
|
if expected_error:
|
||||||
|
with pytest.raises(litellm.UnsupportedParamsError):
|
||||||
|
get_optional_params(
|
||||||
|
model="o1-preview-2024-09-12",
|
||||||
|
custom_llm_provider="openai",
|
||||||
|
temperature=temperature,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
get_optional_params(
|
||||||
|
model="o1-preview-2024-09-12",
|
||||||
|
custom_llm_provider="openai",
|
||||||
|
temperature=temperature,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_unmapped_gemini_model_params():
|
||||||
|
"""
|
||||||
|
Test if unmapped gemini model optional params are translated correctly
|
||||||
|
"""
|
||||||
|
optional_params = get_optional_params(
|
||||||
|
model="gemini-new-model",
|
||||||
|
custom_llm_provider="vertex_ai",
|
||||||
|
stop="stop_word",
|
||||||
|
)
|
||||||
|
assert optional_params["stop_sequences"] == ["stop_word"]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue