forked from phoenix/litellm-mirror
LiteLLM Minor Fixes & Improvements (09/25/2024) (#5893)
* fix(langfuse.py): support new langfuse prompt_chat class init params * fix(langfuse.py): handle new init values on prompt chat + prompt text templates fixes error caused during langfuse logging * docs(openai_compatible.md): clarify `openai/` handles correct routing for `/v1/completions` route Fixes https://github.com/BerriAI/litellm/issues/5876 * fix(utils.py): handle unmapped gemini model optional param translation Fixes https://github.com/BerriAI/litellm/issues/5888 * fix(o1_transformation.py): fix o-1 validation, to not raise error if temperature=1 Fixes https://github.com/BerriAI/litellm/issues/5884 * fix(prisma_client.py): refresh iam token Fixes https://github.com/BerriAI/litellm/issues/5896 * fix: pass drop params where required * fix(utils.py): pass drop_params correctly * fix(types/vertex_ai.py): fix generation config * test(test_max_completion_tokens.py): fix test * fix(vertex_and_google_ai_studio_gemini.py): fix map openai params
This commit is contained in:
parent
16c0307eab
commit
a1d9e96b31
22 changed files with 755 additions and 292 deletions
|
@ -7,7 +7,7 @@ To call models hosted behind an openai proxy, make 2 changes:
|
|||
|
||||
1. For `/chat/completions`: Put `openai/` in front of your model name, so litellm knows you're trying to call an openai `/chat/completions` endpoint.
|
||||
|
||||
2. For `/completions`: Put `text-completion-openai/` in front of your model name, so litellm knows you're trying to call an openai `/completions` endpoint.
|
||||
2. For `/completions`: Put `text-completion-openai/` in front of your model name, so litellm knows you're trying to call an openai `/completions` endpoint. [NOT REQUIRED for `openai/` endpoints called via `/v1/completions` route].
|
||||
|
||||
2. **Do NOT** add anything additional to the base url e.g. `/v1/embedding`. LiteLLM uses the openai-client to make these calls, and that automatically adds the relevant endpoints.
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#### What this does ####
|
||||
# On success, logs events to Langfuse
|
||||
import copy
|
||||
import inspect
|
||||
import os
|
||||
import traceback
|
||||
|
||||
|
@ -676,21 +677,37 @@ def _add_prompt_to_generation_params(
|
|||
elif "version" in user_prompt and "prompt" in user_prompt:
|
||||
# prompts
|
||||
if isinstance(user_prompt["prompt"], str):
|
||||
_prompt_obj = Prompt_Text(
|
||||
name=user_prompt["name"],
|
||||
prompt=user_prompt["prompt"],
|
||||
version=user_prompt["version"],
|
||||
config=user_prompt.get("config", None),
|
||||
prompt_text_params = getattr(
|
||||
Prompt_Text, "model_fields", Prompt_Text.__fields__
|
||||
)
|
||||
_data = {
|
||||
"name": user_prompt["name"],
|
||||
"prompt": user_prompt["prompt"],
|
||||
"version": user_prompt["version"],
|
||||
"config": user_prompt.get("config", None),
|
||||
}
|
||||
if "labels" in prompt_text_params and "tags" in prompt_text_params:
|
||||
_data["labels"] = user_prompt.get("labels", []) or []
|
||||
_data["tags"] = user_prompt.get("tags", []) or []
|
||||
_prompt_obj = Prompt_Text(**_data) # type: ignore
|
||||
generation_params["prompt"] = TextPromptClient(prompt=_prompt_obj)
|
||||
|
||||
elif isinstance(user_prompt["prompt"], list):
|
||||
_prompt_obj = Prompt_Chat(
|
||||
name=user_prompt["name"],
|
||||
prompt=user_prompt["prompt"],
|
||||
version=user_prompt["version"],
|
||||
config=user_prompt.get("config", None),
|
||||
prompt_chat_params = getattr(
|
||||
Prompt_Chat, "model_fields", Prompt_Chat.__fields__
|
||||
)
|
||||
_data = {
|
||||
"name": user_prompt["name"],
|
||||
"prompt": user_prompt["prompt"],
|
||||
"version": user_prompt["version"],
|
||||
"config": user_prompt.get("config", None),
|
||||
}
|
||||
if "labels" in prompt_chat_params and "tags" in prompt_chat_params:
|
||||
_data["labels"] = user_prompt.get("labels", []) or []
|
||||
_data["tags"] = user_prompt.get("tags", []) or []
|
||||
|
||||
_prompt_obj = Prompt_Chat(**_data) # type: ignore
|
||||
|
||||
generation_params["prompt"] = ChatPromptClient(prompt=_prompt_obj)
|
||||
else:
|
||||
verbose_logger.error(
|
||||
|
|
|
@ -125,7 +125,11 @@ class OpenAIGPTConfig:
|
|||
return base_params + model_specific_params
|
||||
|
||||
def _map_openai_params(
|
||||
self, non_default_params: dict, optional_params: dict, model: str
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
) -> dict:
|
||||
supported_openai_params = self.get_supported_openai_params(model)
|
||||
for param, value in non_default_params.items():
|
||||
|
@ -134,10 +138,15 @@ class OpenAIGPTConfig:
|
|||
return optional_params
|
||||
|
||||
def map_openai_params(
|
||||
self, non_default_params: dict, optional_params: dict, model: str
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
) -> dict:
|
||||
return self._map_openai_params(
|
||||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
model=model,
|
||||
drop_params=drop_params,
|
||||
)
|
||||
|
|
|
@ -57,7 +57,6 @@ class OpenAIO1Config(OpenAIGPTConfig):
|
|||
"parallel_tool_calls",
|
||||
"function_call",
|
||||
"functions",
|
||||
"temperature",
|
||||
"top_p",
|
||||
"n",
|
||||
"presence_penalty",
|
||||
|
@ -73,13 +72,36 @@ class OpenAIO1Config(OpenAIGPTConfig):
|
|||
]
|
||||
|
||||
def map_openai_params(
|
||||
self, non_default_params: dict, optional_params: dict, model: str
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
):
|
||||
if "max_tokens" in non_default_params:
|
||||
optional_params["max_completion_tokens"] = non_default_params.pop(
|
||||
"max_tokens"
|
||||
)
|
||||
return super()._map_openai_params(non_default_params, optional_params, model)
|
||||
if "temperature" in non_default_params:
|
||||
temperature_value: Optional[float] = non_default_params.pop("temperature")
|
||||
if temperature_value is not None:
|
||||
if temperature_value == 0 or temperature_value == 1:
|
||||
optional_params["temperature"] = temperature_value
|
||||
else:
|
||||
## UNSUPPORTED TOOL CHOICE VALUE
|
||||
if litellm.drop_params is True or drop_params is True:
|
||||
pass
|
||||
else:
|
||||
raise litellm.utils.UnsupportedParamsError(
|
||||
message="O-1 doesn't support temperature={}. To drop unsupported openai params from the call, set `litellm.drop_params = True`".format(
|
||||
temperature_value
|
||||
),
|
||||
status_code=400,
|
||||
)
|
||||
|
||||
return super()._map_openai_params(
|
||||
non_default_params, optional_params, model, drop_params
|
||||
)
|
||||
|
||||
def is_model_o1_reasoning_model(self, model: str) -> bool:
|
||||
if model in litellm.open_ai_chat_completion_models and "o1" in model:
|
||||
|
|
|
@ -413,7 +413,11 @@ class OpenAIConfig:
|
|||
return optional_params
|
||||
|
||||
def map_openai_params(
|
||||
self, non_default_params: dict, optional_params: dict, model: str
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
) -> dict:
|
||||
""" """
|
||||
if litellm.OpenAIO1Config().is_model_o1_reasoning_model(model=model):
|
||||
|
@ -421,11 +425,13 @@ class OpenAIConfig:
|
|||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
model=model,
|
||||
drop_params=drop_params,
|
||||
)
|
||||
return litellm.OpenAIGPTConfig().map_openai_params(
|
||||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
model=model,
|
||||
drop_params=drop_params,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ from litellm.types.llms.vertex_ai import (
|
|||
Tools,
|
||||
)
|
||||
|
||||
from ..common_utils import get_supports_system_message, get_supports_response_schema
|
||||
from ..common_utils import get_supports_response_schema, get_supports_system_message
|
||||
from ..vertex_ai_non_gemini import _gemini_convert_messages_with_history
|
||||
|
||||
|
||||
|
@ -73,8 +73,14 @@ def _transform_request_body(
|
|||
safety_settings: Optional[List[SafetSettingsConfig]] = optional_params.pop(
|
||||
"safety_settings", None
|
||||
) # type: ignore
|
||||
config_fields = GenerationConfig.__annotations__.keys()
|
||||
|
||||
filtered_params = {
|
||||
k: v for k, v in optional_params.items() if k in config_fields
|
||||
}
|
||||
|
||||
generation_config: Optional[GenerationConfig] = GenerationConfig(
|
||||
**optional_params
|
||||
**filtered_params
|
||||
)
|
||||
data = RequestBody(contents=content)
|
||||
if system_instructions is not None:
|
||||
|
@ -104,7 +110,7 @@ def sync_transform_request_body(
|
|||
timeout: Optional[Union[float, httpx.Timeout]],
|
||||
extra_headers: Optional[dict],
|
||||
optional_params: dict,
|
||||
logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
|
||||
logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, # type: ignore
|
||||
custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
|
||||
litellm_params: dict,
|
||||
) -> RequestBody:
|
||||
|
@ -146,7 +152,7 @@ async def async_transform_request_body(
|
|||
timeout: Optional[Union[float, httpx.Timeout]],
|
||||
extra_headers: Optional[dict],
|
||||
optional_params: dict,
|
||||
logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
|
||||
logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, # type: ignore
|
||||
custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
|
||||
litellm_params: dict,
|
||||
) -> RequestBody:
|
||||
|
@ -199,6 +205,7 @@ def _transform_system_message(
|
|||
if supports_system_message is True:
|
||||
for idx, message in enumerate(messages):
|
||||
if message["role"] == "system":
|
||||
_system_content_block: Optional[PartType] = None
|
||||
if isinstance(message["content"], str):
|
||||
_system_content_block = PartType(text=message["content"])
|
||||
elif isinstance(message["content"], list):
|
||||
|
@ -206,6 +213,7 @@ def _transform_system_message(
|
|||
for content in message["content"]:
|
||||
system_text += content.get("text") or ""
|
||||
_system_content_block = PartType(text=system_text)
|
||||
if _system_content_block is not None:
|
||||
system_content_blocks.append(_system_content_block)
|
||||
system_prompt_indices.append(idx)
|
||||
if len(system_prompt_indices) > 0:
|
||||
|
|
|
@ -252,233 +252,6 @@ class VertexAIConfig:
|
|||
]
|
||||
|
||||
|
||||
class GoogleAIStudioGeminiConfig: # key diff from VertexAI - 'frequency_penalty' and 'presence_penalty' not supported
|
||||
"""
|
||||
Reference: https://ai.google.dev/api/rest/v1beta/GenerationConfig
|
||||
|
||||
The class `GoogleAIStudioGeminiConfig` provides configuration for the Google AI Studio's Gemini API interface. Below are the parameters:
|
||||
|
||||
- `temperature` (float): This controls the degree of randomness in token selection.
|
||||
|
||||
- `max_output_tokens` (integer): This sets the limitation for the maximum amount of token in the text output. In this case, the default value is 256.
|
||||
|
||||
- `top_p` (float): The tokens are selected from the most probable to the least probable until the sum of their probabilities equals the `top_p` value. Default is 0.95.
|
||||
|
||||
- `top_k` (integer): The value of `top_k` determines how many of the most probable tokens are considered in the selection. For example, a `top_k` of 1 means the selected token is the most probable among all tokens. The default value is 40.
|
||||
|
||||
- `response_mime_type` (str): The MIME type of the response. The default value is 'text/plain'. Other values - `application/json`.
|
||||
|
||||
- `response_schema` (dict): Optional. Output response schema of the generated candidate text when response mime type can have schema. Schema can be objects, primitives or arrays and is a subset of OpenAPI schema. If set, a compatible response_mime_type must also be set. Compatible mimetypes: application/json: Schema for JSON response.
|
||||
|
||||
- `candidate_count` (int): Number of generated responses to return.
|
||||
|
||||
- `stop_sequences` (List[str]): The set of character sequences (up to 5) that will stop output generation. If specified, the API will stop at the first appearance of a stop sequence. The stop sequence will not be included as part of the response.
|
||||
|
||||
Note: Please make sure to modify the default parameters as required for your use case.
|
||||
"""
|
||||
|
||||
temperature: Optional[float] = None
|
||||
max_output_tokens: Optional[int] = None
|
||||
top_p: Optional[float] = None
|
||||
top_k: Optional[int] = None
|
||||
response_mime_type: Optional[str] = None
|
||||
response_schema: Optional[dict] = None
|
||||
candidate_count: Optional[int] = None
|
||||
stop_sequences: Optional[list] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
temperature: Optional[float] = None,
|
||||
max_output_tokens: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
top_k: Optional[int] = None,
|
||||
response_mime_type: Optional[str] = None,
|
||||
response_schema: Optional[dict] = None,
|
||||
candidate_count: Optional[int] = None,
|
||||
stop_sequences: Optional[list] = None,
|
||||
) -> None:
|
||||
locals_ = locals()
|
||||
for key, value in locals_.items():
|
||||
if key != "self" and value is not None:
|
||||
setattr(self.__class__, key, value)
|
||||
|
||||
@classmethod
|
||||
def get_config(cls):
|
||||
return {
|
||||
k: v
|
||||
for k, v in cls.__dict__.items()
|
||||
if not k.startswith("__")
|
||||
and not isinstance(
|
||||
v,
|
||||
(
|
||||
types.FunctionType,
|
||||
types.BuiltinFunctionType,
|
||||
classmethod,
|
||||
staticmethod,
|
||||
),
|
||||
)
|
||||
and v is not None
|
||||
}
|
||||
|
||||
def get_supported_openai_params(self):
|
||||
return [
|
||||
"temperature",
|
||||
"top_p",
|
||||
"max_tokens",
|
||||
"max_completion_tokens",
|
||||
"stream",
|
||||
"tools",
|
||||
"tool_choice",
|
||||
"functions",
|
||||
"response_format",
|
||||
"n",
|
||||
"stop",
|
||||
]
|
||||
|
||||
def _map_function(self, value: List[dict]) -> List[Tools]:
|
||||
gtool_func_declarations = []
|
||||
googleSearchRetrieval: Optional[dict] = None
|
||||
|
||||
for tool in value:
|
||||
openai_function_object: Optional[ChatCompletionToolParamFunctionChunk] = (
|
||||
None
|
||||
)
|
||||
if "function" in tool: # tools list
|
||||
openai_function_object = ChatCompletionToolParamFunctionChunk( # type: ignore
|
||||
**tool["function"]
|
||||
)
|
||||
elif "name" in tool: # functions list
|
||||
openai_function_object = ChatCompletionToolParamFunctionChunk(**tool) # type: ignore
|
||||
|
||||
# check if grounding
|
||||
if tool.get("googleSearchRetrieval", None) is not None:
|
||||
googleSearchRetrieval = tool["googleSearchRetrieval"]
|
||||
elif openai_function_object is not None:
|
||||
gtool_func_declaration = FunctionDeclaration(
|
||||
name=openai_function_object["name"],
|
||||
description=openai_function_object.get("description", ""),
|
||||
parameters=openai_function_object.get("parameters", {}),
|
||||
)
|
||||
gtool_func_declarations.append(gtool_func_declaration)
|
||||
else:
|
||||
# assume it's a provider-specific param
|
||||
verbose_logger.warning(
|
||||
"Invalid tool={}. Use `litellm.set_verbose` or `litellm --detailed_debug` to see raw request."
|
||||
)
|
||||
|
||||
_tools = Tools(
|
||||
function_declarations=gtool_func_declarations,
|
||||
)
|
||||
if googleSearchRetrieval is not None:
|
||||
_tools["googleSearchRetrieval"] = googleSearchRetrieval
|
||||
return [_tools]
|
||||
|
||||
def map_tool_choice_values(
|
||||
self, model: str, tool_choice: Union[str, dict]
|
||||
) -> Optional[ToolConfig]:
|
||||
if tool_choice == "none":
|
||||
return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="NONE"))
|
||||
elif tool_choice == "required":
|
||||
return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="ANY"))
|
||||
elif tool_choice == "auto":
|
||||
return ToolConfig(functionCallingConfig=FunctionCallingConfig(mode="AUTO"))
|
||||
elif isinstance(tool_choice, dict):
|
||||
# only supported for anthropic + mistral models - https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ToolChoice.html
|
||||
name = tool_choice.get("function", {}).get("name", "")
|
||||
return ToolConfig(
|
||||
functionCallingConfig=FunctionCallingConfig(
|
||||
mode="ANY", allowed_function_names=[name]
|
||||
)
|
||||
)
|
||||
else:
|
||||
raise litellm.utils.UnsupportedParamsError(
|
||||
message="VertexAI doesn't support tool_choice={}. Supported tool_choice values=['auto', 'required', json object]. To drop it from the call, set `litellm.drop_params = True.".format(
|
||||
tool_choice
|
||||
),
|
||||
status_code=400,
|
||||
)
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
model: str,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
):
|
||||
for param, value in non_default_params.items():
|
||||
if param == "temperature":
|
||||
optional_params["temperature"] = value
|
||||
if param == "top_p":
|
||||
optional_params["top_p"] = value
|
||||
if (
|
||||
param == "stream" and value is True
|
||||
): # sending stream = False, can cause it to get passed unchecked and raise issues
|
||||
optional_params["stream"] = value
|
||||
if param == "n":
|
||||
optional_params["candidate_count"] = value
|
||||
if param == "stop":
|
||||
if isinstance(value, str):
|
||||
optional_params["stop_sequences"] = [value]
|
||||
elif isinstance(value, list):
|
||||
optional_params["stop_sequences"] = value
|
||||
if param == "max_tokens" or param == "max_completion_tokens":
|
||||
optional_params["max_output_tokens"] = value
|
||||
if param == "response_format": # type: ignore
|
||||
if value["type"] == "json_object": # type: ignore
|
||||
if value["type"] == "json_object": # type: ignore
|
||||
optional_params["response_mime_type"] = "application/json"
|
||||
elif value["type"] == "text": # type: ignore
|
||||
optional_params["response_mime_type"] = "text/plain"
|
||||
if "response_schema" in value: # type: ignore
|
||||
optional_params["response_mime_type"] = "application/json"
|
||||
optional_params["response_schema"] = value["response_schema"] # type: ignore
|
||||
elif value["type"] == "json_schema": # type: ignore
|
||||
if "json_schema" in value and "schema" in value["json_schema"]: # type: ignore
|
||||
optional_params["response_mime_type"] = "application/json"
|
||||
optional_params["response_schema"] = value["json_schema"]["schema"] # type: ignore
|
||||
if (param == "tools" or param == "functions") and isinstance(value, list):
|
||||
optional_params["tools"] = self._map_function(value=value)
|
||||
optional_params["litellm_param_is_function_call"] = (
|
||||
True if param == "functions" else False
|
||||
)
|
||||
if param == "tool_choice" and (
|
||||
isinstance(value, str) or isinstance(value, dict)
|
||||
):
|
||||
_tool_choice_value = self.map_tool_choice_values(
|
||||
model=model, tool_choice=value # type: ignore
|
||||
)
|
||||
if _tool_choice_value is not None:
|
||||
optional_params["tool_choice"] = _tool_choice_value
|
||||
return optional_params
|
||||
|
||||
def get_mapped_special_auth_params(self) -> dict:
|
||||
"""
|
||||
Common auth params across bedrock/vertex_ai/azure/watsonx
|
||||
"""
|
||||
return {"project": "vertex_project", "region_name": "vertex_location"}
|
||||
|
||||
def map_special_auth_params(self, non_default_params: dict, optional_params: dict):
|
||||
mapped_params = self.get_mapped_special_auth_params()
|
||||
|
||||
for param, value in non_default_params.items():
|
||||
if param in mapped_params:
|
||||
optional_params[mapped_params[param]] = value
|
||||
return optional_params
|
||||
|
||||
def get_flagged_finish_reasons(self) -> Dict[str, str]:
|
||||
"""
|
||||
Return Dictionary of finish reasons which indicate response was flagged
|
||||
|
||||
and what it means
|
||||
"""
|
||||
return {
|
||||
"SAFETY": "The token generation was stopped as the response was flagged for safety reasons. NOTE: When streaming the Candidate.content will be empty if content filters blocked the output.",
|
||||
"RECITATION": "The token generation was stopped as the response was flagged for unauthorized citations.",
|
||||
"BLOCKLIST": "The token generation was stopped as the response was flagged for the terms which are included from the terminology blocklist.",
|
||||
"PROHIBITED_CONTENT": "The token generation was stopped as the response was flagged for the prohibited contents.",
|
||||
"SPII": "The token generation was stopped as the response was flagged for Sensitive Personally Identifiable Information (SPII) contents.",
|
||||
}
|
||||
|
||||
|
||||
class VertexGeminiConfig:
|
||||
"""
|
||||
Reference: https://cloud.google.com/vertex-ai/docs/generative-ai/chat/test-chat-prompts
|
||||
|
@ -752,6 +525,108 @@ class VertexGeminiConfig:
|
|||
return exception_string
|
||||
|
||||
|
||||
class GoogleAIStudioGeminiConfig(
|
||||
VertexGeminiConfig
|
||||
): # key diff from VertexAI - 'frequency_penalty' and 'presence_penalty' not supported
|
||||
"""
|
||||
Reference: https://ai.google.dev/api/rest/v1beta/GenerationConfig
|
||||
|
||||
The class `GoogleAIStudioGeminiConfig` provides configuration for the Google AI Studio's Gemini API interface. Below are the parameters:
|
||||
|
||||
- `temperature` (float): This controls the degree of randomness in token selection.
|
||||
|
||||
- `max_output_tokens` (integer): This sets the limitation for the maximum amount of token in the text output. In this case, the default value is 256.
|
||||
|
||||
- `top_p` (float): The tokens are selected from the most probable to the least probable until the sum of their probabilities equals the `top_p` value. Default is 0.95.
|
||||
|
||||
- `top_k` (integer): The value of `top_k` determines how many of the most probable tokens are considered in the selection. For example, a `top_k` of 1 means the selected token is the most probable among all tokens. The default value is 40.
|
||||
|
||||
- `response_mime_type` (str): The MIME type of the response. The default value is 'text/plain'. Other values - `application/json`.
|
||||
|
||||
- `response_schema` (dict): Optional. Output response schema of the generated candidate text when response mime type can have schema. Schema can be objects, primitives or arrays and is a subset of OpenAPI schema. If set, a compatible response_mime_type must also be set. Compatible mimetypes: application/json: Schema for JSON response.
|
||||
|
||||
- `candidate_count` (int): Number of generated responses to return.
|
||||
|
||||
- `stop_sequences` (List[str]): The set of character sequences (up to 5) that will stop output generation. If specified, the API will stop at the first appearance of a stop sequence. The stop sequence will not be included as part of the response.
|
||||
|
||||
Note: Please make sure to modify the default parameters as required for your use case.
|
||||
"""
|
||||
|
||||
temperature: Optional[float] = None
|
||||
max_output_tokens: Optional[int] = None
|
||||
top_p: Optional[float] = None
|
||||
top_k: Optional[int] = None
|
||||
response_mime_type: Optional[str] = None
|
||||
response_schema: Optional[dict] = None
|
||||
candidate_count: Optional[int] = None
|
||||
stop_sequences: Optional[list] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
temperature: Optional[float] = None,
|
||||
max_output_tokens: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
top_k: Optional[int] = None,
|
||||
response_mime_type: Optional[str] = None,
|
||||
response_schema: Optional[dict] = None,
|
||||
candidate_count: Optional[int] = None,
|
||||
stop_sequences: Optional[list] = None,
|
||||
) -> None:
|
||||
locals_ = locals()
|
||||
for key, value in locals_.items():
|
||||
if key != "self" and value is not None:
|
||||
setattr(self.__class__, key, value)
|
||||
|
||||
@classmethod
|
||||
def get_config(cls):
|
||||
return {
|
||||
k: v
|
||||
for k, v in cls.__dict__.items()
|
||||
if not k.startswith("__")
|
||||
and not isinstance(
|
||||
v,
|
||||
(
|
||||
types.FunctionType,
|
||||
types.BuiltinFunctionType,
|
||||
classmethod,
|
||||
staticmethod,
|
||||
),
|
||||
)
|
||||
and v is not None
|
||||
}
|
||||
|
||||
def get_supported_openai_params(self):
|
||||
return [
|
||||
"temperature",
|
||||
"top_p",
|
||||
"max_tokens",
|
||||
"max_completion_tokens",
|
||||
"stream",
|
||||
"tools",
|
||||
"tool_choice",
|
||||
"functions",
|
||||
"response_format",
|
||||
"n",
|
||||
"stop",
|
||||
]
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
model: str,
|
||||
non_default_params: Dict,
|
||||
optional_params: Dict,
|
||||
drop_params: bool,
|
||||
):
|
||||
# drop frequency_penalty and presence_penalty
|
||||
if "frequency_penalty" in non_default_params:
|
||||
del non_default_params["frequency_penalty"]
|
||||
if "presence_penalty" in non_default_params:
|
||||
del non_default_params["presence_penalty"]
|
||||
return super().map_openai_params(
|
||||
model, non_default_params, optional_params, drop_params
|
||||
)
|
||||
|
||||
|
||||
async def make_call(
|
||||
client: Optional[AsyncHTTPHandler],
|
||||
api_base: str,
|
||||
|
|
|
@ -44,7 +44,11 @@ class VertexAIAi21Config:
|
|||
return litellm.OpenAIConfig().get_supported_openai_params(model="gpt-3.5-turbo")
|
||||
|
||||
def map_openai_params(
|
||||
self, non_default_params: dict, optional_params: dict, model: str
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
):
|
||||
if "max_completion_tokens" in non_default_params:
|
||||
non_default_params["max_tokens"] = non_default_params.pop(
|
||||
|
@ -54,4 +58,5 @@ class VertexAIAi21Config:
|
|||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
model=model,
|
||||
drop_params=drop_params,
|
||||
)
|
||||
|
|
|
@ -50,7 +50,11 @@ class VertexAILlama3Config:
|
|||
return litellm.OpenAIConfig().get_supported_openai_params(model="gpt-3.5-turbo")
|
||||
|
||||
def map_openai_params(
|
||||
self, non_default_params: dict, optional_params: dict, model: str
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
):
|
||||
if "max_completion_tokens" in non_default_params:
|
||||
non_default_params["max_tokens"] = non_default_params.pop(
|
||||
|
@ -60,4 +64,5 @@ class VertexAILlama3Config:
|
|||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
model=model,
|
||||
drop_params=drop_params,
|
||||
)
|
||||
|
|
|
@ -31,15 +31,21 @@ model_list:
|
|||
- model_name: "anthropic/*"
|
||||
litellm_params:
|
||||
model: "anthropic/*"
|
||||
- model_name: "openai/*"
|
||||
- model_name: "*"
|
||||
litellm_params:
|
||||
model: "openai/*"
|
||||
- model_name: "fireworks_ai/*"
|
||||
litellm_params:
|
||||
model: "fireworks_ai/*"
|
||||
configurable_clientside_auth_params: ["api_base"]
|
||||
|
||||
- model_name: "gemini-flash-experimental"
|
||||
litellm_params:
|
||||
model: "vertex_ai/gemini-flash-experimental"
|
||||
|
||||
litellm_settings:
|
||||
success_callback: ["langfuse"]
|
||||
cache: true
|
||||
success_callback: ["langfuse", "prometheus"]
|
||||
failure_callback: ["prometheus"]
|
||||
|
||||
general_settings:
|
||||
proxy_budget_rescheduler_min_time: 1
|
||||
proxy_budget_rescheduler_max_time: 1
|
|
@ -1,5 +1,5 @@
|
|||
import os
|
||||
from typing import Optional, Union
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
|
@ -34,7 +34,7 @@ def init_rds_client(
|
|||
# Iterate over parameters and update if needed
|
||||
for i, param in enumerate(params_to_check):
|
||||
if param and param.startswith("os.environ/"):
|
||||
params_to_check[i] = get_secret(param)
|
||||
params_to_check[i] = get_secret(param) # type: ignore
|
||||
# Assign updated values back to parameters
|
||||
(
|
||||
aws_access_key_id,
|
||||
|
@ -62,13 +62,13 @@ def init_rds_client(
|
|||
import boto3
|
||||
|
||||
if isinstance(timeout, float):
|
||||
config = boto3.session.Config(connect_timeout=timeout, read_timeout=timeout)
|
||||
config = boto3.session.Config(connect_timeout=timeout, read_timeout=timeout) # type: ignore
|
||||
elif isinstance(timeout, httpx.Timeout):
|
||||
config = boto3.session.Config(
|
||||
config = boto3.session.Config( # type: ignore
|
||||
connect_timeout=timeout.connect, read_timeout=timeout.read
|
||||
)
|
||||
else:
|
||||
config = boto3.session.Config()
|
||||
config = boto3.session.Config() # type: ignore
|
||||
|
||||
### CHECK STS ###
|
||||
if (
|
||||
|
@ -105,6 +105,7 @@ def init_rds_client(
|
|||
region_name=region_name,
|
||||
config=config,
|
||||
)
|
||||
|
||||
elif aws_role_name is not None and aws_session_name is not None:
|
||||
# use sts if role name passed in
|
||||
sts_client = boto3.client(
|
||||
|
@ -144,6 +145,7 @@ def init_rds_client(
|
|||
region_name=region_name,
|
||||
config=config,
|
||||
)
|
||||
|
||||
else:
|
||||
# aws_access_key_id is None, assume user is trying to auth using env variables
|
||||
# boto3 automatically reads env variables
|
||||
|
@ -157,11 +159,14 @@ def init_rds_client(
|
|||
return client
|
||||
|
||||
|
||||
def generate_iam_auth_token(db_host, db_port, db_user) -> str:
|
||||
def generate_iam_auth_token(
|
||||
db_host, db_port, db_user, client: Optional[Any] = None
|
||||
) -> str:
|
||||
from urllib.parse import quote
|
||||
|
||||
import boto3
|
||||
|
||||
if client is None:
|
||||
boto_client = init_rds_client(
|
||||
aws_region_name=os.getenv("AWS_REGION_NAME"),
|
||||
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
|
||||
|
@ -173,9 +178,12 @@ def generate_iam_auth_token(db_host, db_port, db_user) -> str:
|
|||
"AWS_WEB_IDENTITY_TOKEN", os.getenv("AWS_WEB_IDENTITY_TOKEN_FILE")
|
||||
),
|
||||
)
|
||||
else:
|
||||
boto_client = client
|
||||
|
||||
token = boto_client.generate_db_auth_token(
|
||||
DBHostname=db_host, Port=db_port, DBUsername=db_user
|
||||
)
|
||||
cleaned_token = quote(token, safe="")
|
||||
|
||||
return cleaned_token
|
||||
|
|
106
litellm/proxy/db/prisma_client.py
Normal file
106
litellm/proxy/db/prisma_client.py
Normal file
|
@ -0,0 +1,106 @@
|
|||
import asyncio
|
||||
import os
|
||||
import urllib
|
||||
import urllib.parse
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
|
||||
class PrismaWrapper:
|
||||
def __init__(self, original_prisma: Any, iam_token_db_auth: bool):
|
||||
self._original_prisma = original_prisma
|
||||
self.iam_token_db_auth = iam_token_db_auth
|
||||
|
||||
def is_token_expired(self, token_url: Optional[str]) -> bool:
|
||||
if token_url is None:
|
||||
return True
|
||||
# Decode the token URL to handle URL-encoded characters
|
||||
decoded_url = urllib.parse.unquote(token_url)
|
||||
|
||||
# Parse the token URL
|
||||
parsed_url = urllib.parse.urlparse(decoded_url)
|
||||
|
||||
# Parse the query parameters from the path component (if they exist there)
|
||||
query_params = urllib.parse.parse_qs(parsed_url.query)
|
||||
|
||||
# Get expiration time from the query parameters
|
||||
expires = query_params.get("X-Amz-Expires", [None])[0]
|
||||
if expires is None:
|
||||
raise ValueError("X-Amz-Expires parameter is missing or invalid.")
|
||||
|
||||
expires_int = int(expires)
|
||||
|
||||
# Get the token's creation time from the X-Amz-Date parameter
|
||||
token_time_str = query_params.get("X-Amz-Date", [""])[0]
|
||||
if not token_time_str:
|
||||
raise ValueError("X-Amz-Date parameter is missing or invalid.")
|
||||
|
||||
# Ensure the token time string is parsed correctly
|
||||
try:
|
||||
token_time = datetime.strptime(token_time_str, "%Y%m%dT%H%M%SZ")
|
||||
except ValueError as e:
|
||||
raise ValueError(f"Invalid X-Amz-Date format: {e}")
|
||||
|
||||
# Calculate the expiration time
|
||||
expiration_time = token_time + timedelta(seconds=expires_int)
|
||||
|
||||
# Current time in UTC
|
||||
current_time = datetime.utcnow()
|
||||
|
||||
# Check if the token is expired
|
||||
return current_time > expiration_time
|
||||
|
||||
def get_rds_iam_token(self) -> Optional[str]:
|
||||
if self.iam_token_db_auth:
|
||||
from litellm.proxy.auth.rds_iam_token import generate_iam_auth_token
|
||||
|
||||
db_host = os.getenv("DATABASE_HOST")
|
||||
db_port = os.getenv("DATABASE_PORT")
|
||||
db_user = os.getenv("DATABASE_USER")
|
||||
db_name = os.getenv("DATABASE_NAME")
|
||||
db_schema = os.getenv("DATABASE_SCHEMA")
|
||||
|
||||
token = generate_iam_auth_token(
|
||||
db_host=db_host, db_port=db_port, db_user=db_user
|
||||
)
|
||||
|
||||
# print(f"token: {token}")
|
||||
_db_url = f"postgresql://{db_user}:{token}@{db_host}:{db_port}/{db_name}"
|
||||
if db_schema:
|
||||
_db_url += f"?schema={db_schema}"
|
||||
|
||||
os.environ["DATABASE_URL"] = _db_url
|
||||
return _db_url
|
||||
return None
|
||||
|
||||
async def recreate_prisma_client(
|
||||
self, new_db_url: str, http_client: Optional[Any] = None
|
||||
):
|
||||
from prisma import Prisma # type: ignore
|
||||
|
||||
if http_client is not None:
|
||||
self._original_prisma = Prisma(http=http_client)
|
||||
else:
|
||||
self._original_prisma = Prisma()
|
||||
|
||||
await self._original_prisma.connect()
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
original_attr = getattr(self._original_prisma, name)
|
||||
if self.iam_token_db_auth:
|
||||
db_url = os.getenv("DATABASE_URL")
|
||||
if self.is_token_expired(db_url):
|
||||
db_url = self.get_rds_iam_token()
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
if db_url:
|
||||
if loop.is_running():
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
self.recreate_prisma_client(db_url), loop
|
||||
)
|
||||
else:
|
||||
asyncio.run(self.recreate_prisma_client(db_url))
|
||||
else:
|
||||
raise ValueError("Failed to get RDS IAM token")
|
||||
|
||||
return original_attr
|
|
@ -40,7 +40,7 @@ def append_query_params(url, params) -> str:
|
|||
parsed_query.update(params)
|
||||
encoded_query = urlparse.urlencode(parsed_query, doseq=True)
|
||||
modified_url = urlparse.urlunparse(parsed_url._replace(query=encoded_query))
|
||||
return modified_url
|
||||
return modified_url # type: ignore
|
||||
|
||||
|
||||
def run_ollama_serve():
|
||||
|
@ -287,7 +287,7 @@ def run_server(
|
|||
save_worker_config,
|
||||
)
|
||||
if version == True:
|
||||
pkg_version = importlib.metadata.version("litellm")
|
||||
pkg_version = importlib.metadata.version("litellm") # type: ignore
|
||||
click.echo(f"\nLiteLLM: Current Version = {pkg_version}\n")
|
||||
return
|
||||
if model and "ollama" in model and api_base is None:
|
||||
|
@ -338,14 +338,14 @@ def run_server(
|
|||
futures = []
|
||||
start_time = time.time()
|
||||
# Make concurrent calls
|
||||
with concurrent.futures.ThreadPoolExecutor(
|
||||
with concurrent.futures.ThreadPoolExecutor( # type: ignore
|
||||
max_workers=concurrent_calls
|
||||
) as executor:
|
||||
for _ in range(concurrent_calls):
|
||||
futures.append(executor.submit(_make_openai_completion))
|
||||
|
||||
# Wait for all futures to complete
|
||||
concurrent.futures.wait(futures)
|
||||
concurrent.futures.wait(futures) # type: ignore
|
||||
|
||||
# Summarize the results
|
||||
successful_calls = 0
|
||||
|
@ -476,6 +476,7 @@ def run_server(
|
|||
_db_url += f"?schema={db_schema}"
|
||||
|
||||
os.environ["DATABASE_URL"] = _db_url
|
||||
os.environ["IAM_TOKEN_DB_AUTH"] = "True"
|
||||
|
||||
### DECRYPT ENV VAR ###
|
||||
|
||||
|
@ -600,8 +601,9 @@ def run_server(
|
|||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path - for litellm local dev
|
||||
import litellm
|
||||
from litellm import get_secret_str
|
||||
|
||||
database_url = litellm.get_secret(database_url, default_value=None)
|
||||
database_url = get_secret_str(database_url, default_value=None)
|
||||
os.chdir(original_dir)
|
||||
if database_url is not None and isinstance(database_url, str):
|
||||
os.environ["DATABASE_URL"] = database_url
|
||||
|
@ -650,6 +652,8 @@ def run_server(
|
|||
subprocess.run(["prisma", "db", "push", "--accept-data-loss"])
|
||||
break # Exit the loop if the subprocess succeeds
|
||||
except subprocess.CalledProcessError as e:
|
||||
import time
|
||||
|
||||
print(f"Error: {e}") # noqa
|
||||
time.sleep(random.randrange(start=1, stop=5))
|
||||
finally:
|
||||
|
@ -728,12 +732,16 @@ def run_server(
|
|||
|
||||
def load_config(self):
|
||||
# note: This Loads the gunicorn config - has nothing to do with LiteLLM Proxy config
|
||||
if self.cfg is not None:
|
||||
config = {
|
||||
key: value
|
||||
for key, value in self.options.items()
|
||||
if key in self.cfg.settings and value is not None
|
||||
}
|
||||
else:
|
||||
config = {}
|
||||
for key, value in config.items():
|
||||
if self.cfg is not None:
|
||||
self.cfg.set(key.lower(), value)
|
||||
|
||||
def load(self):
|
||||
|
|
|
@ -65,11 +65,13 @@ from litellm.proxy.db.create_views import (
|
|||
create_missing_views,
|
||||
should_create_missing_views,
|
||||
)
|
||||
from litellm.proxy.db.prisma_client import PrismaWrapper
|
||||
from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
|
||||
from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
|
||||
from litellm.proxy.hooks.parallel_request_limiter import (
|
||||
_PROXY_MaxParallelRequestsHandler,
|
||||
)
|
||||
from litellm.secret_managers.main import str_to_bool
|
||||
from litellm.types.utils import CallTypes, LoggedLiteLLMParams
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
@ -1017,6 +1019,9 @@ class PrismaClient:
|
|||
)
|
||||
## init logging object
|
||||
self.proxy_logging_obj = proxy_logging_obj
|
||||
self.iam_token_db_auth: Optional[bool] = str_to_bool(
|
||||
os.getenv("IAM_TOKEN_DB_AUTH")
|
||||
)
|
||||
try:
|
||||
from prisma import Prisma # type: ignore
|
||||
except Exception as e:
|
||||
|
@ -1043,9 +1048,23 @@ class PrismaClient:
|
|||
from prisma import Prisma # type: ignore
|
||||
verbose_proxy_logger.debug("Connecting Prisma Client to DB..")
|
||||
if http_client is not None:
|
||||
self.db = Prisma(http=http_client)
|
||||
self.db = PrismaWrapper(
|
||||
original_prisma=Prisma(http=http_client),
|
||||
iam_token_db_auth=(
|
||||
self.iam_token_db_auth
|
||||
if self.iam_token_db_auth is not None
|
||||
else False
|
||||
),
|
||||
)
|
||||
else:
|
||||
self.db = Prisma() # Client to connect to Prisma db
|
||||
self.db = PrismaWrapper(
|
||||
original_prisma=Prisma(),
|
||||
iam_token_db_auth=(
|
||||
self.iam_token_db_auth
|
||||
if self.iam_token_db_auth is not None
|
||||
else False
|
||||
),
|
||||
) # Client to connect to Prisma db
|
||||
verbose_proxy_logger.debug("Success - Connected Prisma Client to DB")
|
||||
|
||||
def hash_token(self, token: str):
|
||||
|
@ -1141,9 +1160,9 @@ class PrismaClient:
|
|||
"LiteLLM_VerificationTokenView Created in DB!"
|
||||
)
|
||||
else:
|
||||
should_create_views = await should_create_missing_views(db=self.db)
|
||||
should_create_views = await should_create_missing_views(db=self.db.db) # type: ignore
|
||||
if should_create_views:
|
||||
await create_missing_views(db=self.db)
|
||||
await create_missing_views(db=self.db) # type: ignore
|
||||
else:
|
||||
# don't block execution if these views are missing
|
||||
# Convert lists to sets for efficient difference calculation
|
||||
|
|
|
@ -29,7 +29,7 @@ def _is_base64(s):
|
|||
return False
|
||||
|
||||
|
||||
def str_to_bool(value: str) -> Optional[bool]:
|
||||
def str_to_bool(value: Optional[str]) -> Optional[bool]:
|
||||
"""
|
||||
Converts a string to a boolean if it's a recognized boolean string.
|
||||
Returns None if the string is not a recognized boolean value.
|
||||
|
@ -37,6 +37,9 @@ def str_to_bool(value: str) -> Optional[bool]:
|
|||
:param value: The string to be checked.
|
||||
:return: True or False if the string is a recognized boolean, otherwise None.
|
||||
"""
|
||||
if value is None:
|
||||
return None
|
||||
|
||||
true_values = {"true"}
|
||||
false_values = {"false"}
|
||||
|
||||
|
|
|
@ -968,3 +968,259 @@ def test_aaalangfuse_dynamic_logging():
|
|||
)
|
||||
|
||||
langfuse_client.get_trace(id=trace_id)
|
||||
|
||||
|
||||
import datetime
|
||||
|
||||
generation_params = {
|
||||
"name": "litellm-acompletion",
|
||||
"id": "time-10-35-32-316778_chatcmpl-ABQDEzVJS8fziPdvkeTA3tnQaxeMX",
|
||||
"start_time": datetime.datetime(2024, 9, 25, 10, 35, 32, 316778),
|
||||
"end_time": datetime.datetime(2024, 9, 25, 10, 35, 32, 897141),
|
||||
"model": "gpt-4o",
|
||||
"model_parameters": {
|
||||
"stream": False,
|
||||
"max_retries": 0,
|
||||
"extra_body": "{}",
|
||||
"system_fingerprint": "fp_52a7f40b0b",
|
||||
},
|
||||
"input": {
|
||||
"messages": [
|
||||
{"content": "<>", "role": "system"},
|
||||
{"content": "<>", "role": "user"},
|
||||
]
|
||||
},
|
||||
"output": {
|
||||
"content": "Hello! It looks like your message might have been sent by accident. How can I assist you today?",
|
||||
"role": "assistant",
|
||||
"tool_calls": None,
|
||||
"function_call": None,
|
||||
},
|
||||
"usage": {"prompt_tokens": 13, "completion_tokens": 21, "total_cost": 0.00038},
|
||||
"metadata": {
|
||||
"prompt": {
|
||||
"name": "conversational-service-answer_question_restricted_reply",
|
||||
"version": 9,
|
||||
"config": {},
|
||||
"labels": ["latest", "staging", "production"],
|
||||
"tags": ["conversational-service"],
|
||||
"prompt": [
|
||||
{"role": "system", "content": "<>"},
|
||||
{"role": "user", "content": "{{text}}"},
|
||||
],
|
||||
},
|
||||
"requester_metadata": {
|
||||
"session_id": "e953a71f-e129-4cf5-ad11-ad18245022f1",
|
||||
"trace_name": "jess",
|
||||
"tags": ["conversational-service", "generative-ai-engine", "staging"],
|
||||
"prompt": {
|
||||
"name": "conversational-service-answer_question_restricted_reply",
|
||||
"version": 9,
|
||||
"config": {},
|
||||
"labels": ["latest", "staging", "production"],
|
||||
"tags": ["conversational-service"],
|
||||
"prompt": [
|
||||
{"role": "system", "content": "<>"},
|
||||
{"role": "user", "content": "{{text}}"},
|
||||
],
|
||||
},
|
||||
},
|
||||
"user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
|
||||
"litellm_api_version": "0.0.0",
|
||||
"user_api_key_user_id": "default_user_id",
|
||||
"user_api_key_spend": 0.0,
|
||||
"user_api_key_metadata": {},
|
||||
"requester_ip_address": "127.0.0.1",
|
||||
"model_group": "gpt-4o",
|
||||
"model_group_size": 0,
|
||||
"deployment": "gpt-4o",
|
||||
"model_info": {
|
||||
"id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
|
||||
"db_model": False,
|
||||
},
|
||||
"hidden_params": {
|
||||
"headers": {
|
||||
"date": "Wed, 25 Sep 2024 17:35:32 GMT",
|
||||
"content-type": "application/json",
|
||||
"transfer-encoding": "chunked",
|
||||
"connection": "keep-alive",
|
||||
"access-control-expose-headers": "X-Request-ID",
|
||||
"openai-organization": "reliablekeystest",
|
||||
"openai-processing-ms": "329",
|
||||
"openai-version": "2020-10-01",
|
||||
"strict-transport-security": "max-age=31536000; includeSubDomains; preload",
|
||||
"x-ratelimit-limit-requests": "10000",
|
||||
"x-ratelimit-limit-tokens": "30000000",
|
||||
"x-ratelimit-remaining-requests": "9999",
|
||||
"x-ratelimit-remaining-tokens": "29999980",
|
||||
"x-ratelimit-reset-requests": "6ms",
|
||||
"x-ratelimit-reset-tokens": "0s",
|
||||
"x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
|
||||
"cf-cache-status": "DYNAMIC",
|
||||
"set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
|
||||
"x-content-type-options": "nosniff",
|
||||
"server": "cloudflare",
|
||||
"cf-ray": "8c8cc573becb232c-SJC",
|
||||
"content-encoding": "gzip",
|
||||
"alt-svc": 'h3=":443"; ma=86400',
|
||||
},
|
||||
"additional_headers": {
|
||||
"llm_provider-date": "Wed, 25 Sep 2024 17:35:32 GMT",
|
||||
"llm_provider-content-type": "application/json",
|
||||
"llm_provider-transfer-encoding": "chunked",
|
||||
"llm_provider-connection": "keep-alive",
|
||||
"llm_provider-access-control-expose-headers": "X-Request-ID",
|
||||
"llm_provider-openai-organization": "reliablekeystest",
|
||||
"llm_provider-openai-processing-ms": "329",
|
||||
"llm_provider-openai-version": "2020-10-01",
|
||||
"llm_provider-strict-transport-security": "max-age=31536000; includeSubDomains; preload",
|
||||
"llm_provider-x-ratelimit-limit-requests": "10000",
|
||||
"llm_provider-x-ratelimit-limit-tokens": "30000000",
|
||||
"llm_provider-x-ratelimit-remaining-requests": "9999",
|
||||
"llm_provider-x-ratelimit-remaining-tokens": "29999980",
|
||||
"llm_provider-x-ratelimit-reset-requests": "6ms",
|
||||
"llm_provider-x-ratelimit-reset-tokens": "0s",
|
||||
"llm_provider-x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
|
||||
"llm_provider-cf-cache-status": "DYNAMIC",
|
||||
"llm_provider-set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
|
||||
"llm_provider-x-content-type-options": "nosniff",
|
||||
"llm_provider-server": "cloudflare",
|
||||
"llm_provider-cf-ray": "8c8cc573becb232c-SJC",
|
||||
"llm_provider-content-encoding": "gzip",
|
||||
"llm_provider-alt-svc": 'h3=":443"; ma=86400',
|
||||
},
|
||||
"litellm_call_id": "1fa31658-20af-40b5-9ac9-60fd7b5ad98c",
|
||||
"model_id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
|
||||
"api_base": "https://api.openai.com",
|
||||
"optional_params": {
|
||||
"stream": False,
|
||||
"max_retries": 0,
|
||||
"extra_body": {},
|
||||
},
|
||||
"response_cost": 0.00038,
|
||||
},
|
||||
"litellm_response_cost": 0.00038,
|
||||
"api_base": "https://api.openai.com/v1/",
|
||||
"cache_hit": False,
|
||||
},
|
||||
"level": "DEFAULT",
|
||||
"version": None,
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"prompt",
|
||||
[
|
||||
[
|
||||
{"role": "system", "content": "<>"},
|
||||
{"role": "user", "content": "{{text}}"},
|
||||
],
|
||||
"hello world",
|
||||
],
|
||||
)
|
||||
def test_langfuse_prompt_type(prompt):
|
||||
|
||||
from litellm.integrations.langfuse import _add_prompt_to_generation_params
|
||||
|
||||
clean_metadata = {
|
||||
"prompt": {
|
||||
"name": "conversational-service-answer_question_restricted_reply",
|
||||
"version": 9,
|
||||
"config": {},
|
||||
"labels": ["latest", "staging", "production"],
|
||||
"tags": ["conversational-service"],
|
||||
"prompt": prompt,
|
||||
},
|
||||
"requester_metadata": {
|
||||
"session_id": "e953a71f-e129-4cf5-ad11-ad18245022f1",
|
||||
"trace_name": "jess",
|
||||
"tags": ["conversational-service", "generative-ai-engine", "staging"],
|
||||
"prompt": {
|
||||
"name": "conversational-service-answer_question_restricted_reply",
|
||||
"version": 9,
|
||||
"config": {},
|
||||
"labels": ["latest", "staging", "production"],
|
||||
"tags": ["conversational-service"],
|
||||
"prompt": [
|
||||
{"role": "system", "content": "<>"},
|
||||
{"role": "user", "content": "{{text}}"},
|
||||
],
|
||||
},
|
||||
},
|
||||
"user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
|
||||
"litellm_api_version": "0.0.0",
|
||||
"user_api_key_user_id": "default_user_id",
|
||||
"user_api_key_spend": 0.0,
|
||||
"user_api_key_metadata": {},
|
||||
"requester_ip_address": "127.0.0.1",
|
||||
"model_group": "gpt-4o",
|
||||
"model_group_size": 0,
|
||||
"deployment": "gpt-4o",
|
||||
"model_info": {
|
||||
"id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
|
||||
"db_model": False,
|
||||
},
|
||||
"hidden_params": {
|
||||
"headers": {
|
||||
"date": "Wed, 25 Sep 2024 17:35:32 GMT",
|
||||
"content-type": "application/json",
|
||||
"transfer-encoding": "chunked",
|
||||
"connection": "keep-alive",
|
||||
"access-control-expose-headers": "X-Request-ID",
|
||||
"openai-organization": "reliablekeystest",
|
||||
"openai-processing-ms": "329",
|
||||
"openai-version": "2020-10-01",
|
||||
"strict-transport-security": "max-age=31536000; includeSubDomains; preload",
|
||||
"x-ratelimit-limit-requests": "10000",
|
||||
"x-ratelimit-limit-tokens": "30000000",
|
||||
"x-ratelimit-remaining-requests": "9999",
|
||||
"x-ratelimit-remaining-tokens": "29999980",
|
||||
"x-ratelimit-reset-requests": "6ms",
|
||||
"x-ratelimit-reset-tokens": "0s",
|
||||
"x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
|
||||
"cf-cache-status": "DYNAMIC",
|
||||
"set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
|
||||
"x-content-type-options": "nosniff",
|
||||
"server": "cloudflare",
|
||||
"cf-ray": "8c8cc573becb232c-SJC",
|
||||
"content-encoding": "gzip",
|
||||
"alt-svc": 'h3=":443"; ma=86400',
|
||||
},
|
||||
"additional_headers": {
|
||||
"llm_provider-date": "Wed, 25 Sep 2024 17:35:32 GMT",
|
||||
"llm_provider-content-type": "application/json",
|
||||
"llm_provider-transfer-encoding": "chunked",
|
||||
"llm_provider-connection": "keep-alive",
|
||||
"llm_provider-access-control-expose-headers": "X-Request-ID",
|
||||
"llm_provider-openai-organization": "reliablekeystest",
|
||||
"llm_provider-openai-processing-ms": "329",
|
||||
"llm_provider-openai-version": "2020-10-01",
|
||||
"llm_provider-strict-transport-security": "max-age=31536000; includeSubDomains; preload",
|
||||
"llm_provider-x-ratelimit-limit-requests": "10000",
|
||||
"llm_provider-x-ratelimit-limit-tokens": "30000000",
|
||||
"llm_provider-x-ratelimit-remaining-requests": "9999",
|
||||
"llm_provider-x-ratelimit-remaining-tokens": "29999980",
|
||||
"llm_provider-x-ratelimit-reset-requests": "6ms",
|
||||
"llm_provider-x-ratelimit-reset-tokens": "0s",
|
||||
"llm_provider-x-request-id": "req_fdff3bfa11c391545d2042d46473214f",
|
||||
"llm_provider-cf-cache-status": "DYNAMIC",
|
||||
"llm_provider-set-cookie": "__cf_bm=NWwOByRU5dQwDqLRYbbTT.ecfqvnWiBi8aF9rfp1QB8-1727285732-1.0.1.1-.Cm0UGMaQ4qZbY3ZU0F7trjSsNUcIBo04PetRMlCoyoTCTnKTbmwmDCWcHmqHOTuE_bNspSgfQoANswx4BSD.A; path=/; expires=Wed, 25-Sep-24 18:05:32 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None, _cfuvid=1b_nyqBtAs4KHRhFBV2a.8zic1fSRJxT.Jn1npl1_GY-1727285732915-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None",
|
||||
"llm_provider-x-content-type-options": "nosniff",
|
||||
"llm_provider-server": "cloudflare",
|
||||
"llm_provider-cf-ray": "8c8cc573becb232c-SJC",
|
||||
"llm_provider-content-encoding": "gzip",
|
||||
"llm_provider-alt-svc": 'h3=":443"; ma=86400',
|
||||
},
|
||||
"litellm_call_id": "1fa31658-20af-40b5-9ac9-60fd7b5ad98c",
|
||||
"model_id": "5583ac0c3e38cfd381b6cc09bcca6e0db60af48d3f16da325f82eb9df1b6a1e4",
|
||||
"api_base": "https://api.openai.com",
|
||||
"optional_params": {"stream": False, "max_retries": 0, "extra_body": {}},
|
||||
"response_cost": 0.00038,
|
||||
},
|
||||
"litellm_response_cost": 0.00038,
|
||||
"api_base": "https://api.openai.com/v1/",
|
||||
"cache_hit": False,
|
||||
}
|
||||
_add_prompt_to_generation_params(
|
||||
generation_params=generation_params, clean_metadata=clean_metadata
|
||||
)
|
||||
|
|
|
@ -153,6 +153,7 @@ class GenerationConfig(TypedDict, total=False):
|
|||
presence_penalty: float
|
||||
frequency_penalty: float
|
||||
response_mime_type: Literal["text/plain", "application/json"]
|
||||
response_schema: dict
|
||||
seed: int
|
||||
|
||||
|
||||
|
|
|
@ -3239,8 +3239,15 @@ def get_optional_params(
|
|||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
model=model,
|
||||
drop_params=(
|
||||
drop_params
|
||||
if drop_params is not None and isinstance(drop_params, bool)
|
||||
else False
|
||||
),
|
||||
)
|
||||
elif custom_llm_provider == "vertex_ai_beta":
|
||||
elif custom_llm_provider == "vertex_ai_beta" or (
|
||||
custom_llm_provider == "vertex_ai" and "gemini" in model
|
||||
):
|
||||
supported_params = get_supported_openai_params(
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
|
@ -3277,6 +3284,11 @@ def get_optional_params(
|
|||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
model=model,
|
||||
drop_params=(
|
||||
drop_params
|
||||
if drop_params is not None and isinstance(drop_params, bool)
|
||||
else False
|
||||
),
|
||||
)
|
||||
elif custom_llm_provider == "vertex_ai" and model in litellm.vertex_mistral_models:
|
||||
supported_params = get_supported_openai_params(
|
||||
|
@ -3301,6 +3313,11 @@ def get_optional_params(
|
|||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
model=model,
|
||||
drop_params=(
|
||||
drop_params
|
||||
if drop_params is not None and isinstance(drop_params, bool)
|
||||
else False
|
||||
),
|
||||
)
|
||||
elif custom_llm_provider == "sagemaker":
|
||||
## check if unsupported param passed in
|
||||
|
@ -3710,6 +3727,7 @@ def get_optional_params(
|
|||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
model=model,
|
||||
drop_params=drop_params,
|
||||
)
|
||||
elif custom_llm_provider == "openrouter":
|
||||
supported_params = get_supported_openai_params(
|
||||
|
@ -3818,6 +3836,7 @@ def get_optional_params(
|
|||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
model=model,
|
||||
drop_params=drop_params,
|
||||
)
|
||||
elif custom_llm_provider == "azure":
|
||||
supported_params = get_supported_openai_params(
|
||||
|
|
56
package-lock.json
generated
56
package-lock.json
generated
|
@ -5,12 +5,53 @@
|
|||
"packages": {
|
||||
"": {
|
||||
"dependencies": {
|
||||
"prisma": "^5.17.0",
|
||||
"react-copy-to-clipboard": "^5.1.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/react-copy-to-clipboard": "^5.0.7"
|
||||
}
|
||||
},
|
||||
"node_modules/@prisma/debug": {
|
||||
"version": "5.17.0",
|
||||
"resolved": "https://registry.npmjs.org/@prisma/debug/-/debug-5.17.0.tgz",
|
||||
"integrity": "sha512-l7+AteR3P8FXiYyo496zkuoiJ5r9jLQEdUuxIxNCN1ud8rdbH3GTxm+f+dCyaSv9l9WY+29L9czaVRXz9mULfg=="
|
||||
},
|
||||
"node_modules/@prisma/engines": {
|
||||
"version": "5.17.0",
|
||||
"resolved": "https://registry.npmjs.org/@prisma/engines/-/engines-5.17.0.tgz",
|
||||
"integrity": "sha512-+r+Nf+JP210Jur+/X8SIPLtz+uW9YA4QO5IXA+KcSOBe/shT47bCcRMTYCbOESw3FFYFTwe7vU6KTWHKPiwvtg==",
|
||||
"hasInstallScript": true,
|
||||
"dependencies": {
|
||||
"@prisma/debug": "5.17.0",
|
||||
"@prisma/engines-version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
|
||||
"@prisma/fetch-engine": "5.17.0",
|
||||
"@prisma/get-platform": "5.17.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@prisma/engines-version": {
|
||||
"version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
|
||||
"resolved": "https://registry.npmjs.org/@prisma/engines-version/-/engines-version-5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053.tgz",
|
||||
"integrity": "sha512-tUuxZZysZDcrk5oaNOdrBnnkoTtmNQPkzINFDjz7eG6vcs9AVDmA/F6K5Plsb2aQc/l5M2EnFqn3htng9FA4hg=="
|
||||
},
|
||||
"node_modules/@prisma/fetch-engine": {
|
||||
"version": "5.17.0",
|
||||
"resolved": "https://registry.npmjs.org/@prisma/fetch-engine/-/fetch-engine-5.17.0.tgz",
|
||||
"integrity": "sha512-ESxiOaHuC488ilLPnrv/tM2KrPhQB5TRris/IeIV4ZvUuKeaicCl4Xj/JCQeG9IlxqOgf1cCg5h5vAzlewN91Q==",
|
||||
"dependencies": {
|
||||
"@prisma/debug": "5.17.0",
|
||||
"@prisma/engines-version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
|
||||
"@prisma/get-platform": "5.17.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@prisma/get-platform": {
|
||||
"version": "5.17.0",
|
||||
"resolved": "https://registry.npmjs.org/@prisma/get-platform/-/get-platform-5.17.0.tgz",
|
||||
"integrity": "sha512-UlDgbRozCP1rfJ5Tlkf3Cnftb6srGrEQ4Nm3og+1Se2gWmCZ0hmPIi+tQikGDUVLlvOWx3Gyi9LzgRP+HTXV9w==",
|
||||
"dependencies": {
|
||||
"@prisma/debug": "5.17.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/prop-types": {
|
||||
"version": "15.7.12",
|
||||
"resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.12.tgz",
|
||||
|
@ -74,6 +115,21 @@
|
|||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/prisma": {
|
||||
"version": "5.17.0",
|
||||
"resolved": "https://registry.npmjs.org/prisma/-/prisma-5.17.0.tgz",
|
||||
"integrity": "sha512-m4UWkN5lBE6yevqeOxEvmepnL5cNPEjzMw2IqDB59AcEV6w7D8vGljDLd1gPFH+W6gUxw9x7/RmN5dCS/WTPxA==",
|
||||
"hasInstallScript": true,
|
||||
"dependencies": {
|
||||
"@prisma/engines": "5.17.0"
|
||||
},
|
||||
"bin": {
|
||||
"prisma": "build/index.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=16.13"
|
||||
}
|
||||
},
|
||||
"node_modules/prop-types": {
|
||||
"version": "15.8.1",
|
||||
"resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz",
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
{
|
||||
"dependencies": {
|
||||
"prisma": "^5.17.0",
|
||||
"react-copy-to-clipboard": "^5.1.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
|
|
@ -141,12 +141,12 @@ def test_all_model_configs():
|
|||
"max_completion_tokens" in VertexAILlama3Config().get_supported_openai_params()
|
||||
)
|
||||
assert VertexAILlama3Config().map_openai_params(
|
||||
{"max_completion_tokens": 10}, {}, "llama3"
|
||||
{"max_completion_tokens": 10}, {}, "llama3", drop_params=False
|
||||
) == {"max_tokens": 10}
|
||||
|
||||
assert "max_completion_tokens" in VertexAIAi21Config().get_supported_openai_params()
|
||||
assert VertexAIAi21Config().map_openai_params(
|
||||
{"max_completion_tokens": 10}, {}, "llama3"
|
||||
{"max_completion_tokens": 10}, {}, "llama3", drop_params=False
|
||||
) == {"max_tokens": 10}
|
||||
|
||||
from litellm.llms.fireworks_ai.chat.fireworks_ai_transformation import (
|
||||
|
@ -332,6 +332,7 @@ def test_all_model_configs():
|
|||
model="gemini-1.0-pro",
|
||||
non_default_params={"max_completion_tokens": 10},
|
||||
optional_params={},
|
||||
drop_params=False,
|
||||
) == {"max_output_tokens": 10}
|
||||
|
||||
assert "max_completion_tokens" in VertexGeminiConfig().get_supported_openai_params()
|
||||
|
|
|
@ -600,3 +600,35 @@ def test_o1_model_params():
|
|||
)
|
||||
assert optional_params["seed"] == 10
|
||||
assert optional_params["user"] == "John"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"temperature, expected_error",
|
||||
[(0.2, True), (1, False)],
|
||||
)
|
||||
def test_o1_model_temperature_params(temperature, expected_error):
|
||||
if expected_error:
|
||||
with pytest.raises(litellm.UnsupportedParamsError):
|
||||
get_optional_params(
|
||||
model="o1-preview-2024-09-12",
|
||||
custom_llm_provider="openai",
|
||||
temperature=temperature,
|
||||
)
|
||||
else:
|
||||
get_optional_params(
|
||||
model="o1-preview-2024-09-12",
|
||||
custom_llm_provider="openai",
|
||||
temperature=temperature,
|
||||
)
|
||||
|
||||
|
||||
def test_unmapped_gemini_model_params():
|
||||
"""
|
||||
Test if unmapped gemini model optional params are translated correctly
|
||||
"""
|
||||
optional_params = get_optional_params(
|
||||
model="gemini-new-model",
|
||||
custom_llm_provider="vertex_ai",
|
||||
stop="stop_word",
|
||||
)
|
||||
assert optional_params["stop_sequences"] == ["stop_word"]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue