forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_model_id_fix
This commit is contained in:
commit
5e5179e476
31 changed files with 710 additions and 7234 deletions
|
@ -93,6 +93,7 @@ class LangFuseLogger:
|
||||||
)
|
)
|
||||||
|
|
||||||
litellm_params = kwargs.get("litellm_params", {})
|
litellm_params = kwargs.get("litellm_params", {})
|
||||||
|
litellm_call_id = kwargs.get("litellm_call_id", None)
|
||||||
metadata = (
|
metadata = (
|
||||||
litellm_params.get("metadata", {}) or {}
|
litellm_params.get("metadata", {}) or {}
|
||||||
) # if litellm_params['metadata'] == None
|
) # if litellm_params['metadata'] == None
|
||||||
|
@ -161,6 +162,7 @@ class LangFuseLogger:
|
||||||
response_obj,
|
response_obj,
|
||||||
level,
|
level,
|
||||||
print_verbose,
|
print_verbose,
|
||||||
|
litellm_call_id,
|
||||||
)
|
)
|
||||||
elif response_obj is not None:
|
elif response_obj is not None:
|
||||||
self._log_langfuse_v1(
|
self._log_langfuse_v1(
|
||||||
|
@ -255,6 +257,7 @@ class LangFuseLogger:
|
||||||
response_obj,
|
response_obj,
|
||||||
level,
|
level,
|
||||||
print_verbose,
|
print_verbose,
|
||||||
|
litellm_call_id,
|
||||||
) -> tuple:
|
) -> tuple:
|
||||||
import langfuse
|
import langfuse
|
||||||
|
|
||||||
|
@ -318,7 +321,7 @@ class LangFuseLogger:
|
||||||
|
|
||||||
session_id = clean_metadata.pop("session_id", None)
|
session_id = clean_metadata.pop("session_id", None)
|
||||||
trace_name = clean_metadata.pop("trace_name", None)
|
trace_name = clean_metadata.pop("trace_name", None)
|
||||||
trace_id = clean_metadata.pop("trace_id", None)
|
trace_id = clean_metadata.pop("trace_id", litellm_call_id)
|
||||||
existing_trace_id = clean_metadata.pop("existing_trace_id", None)
|
existing_trace_id = clean_metadata.pop("existing_trace_id", None)
|
||||||
update_trace_keys = clean_metadata.pop("update_trace_keys", [])
|
update_trace_keys = clean_metadata.pop("update_trace_keys", [])
|
||||||
debug = clean_metadata.pop("debug_langfuse", None)
|
debug = clean_metadata.pop("debug_langfuse", None)
|
||||||
|
@ -351,9 +354,13 @@ class LangFuseLogger:
|
||||||
|
|
||||||
# Special keys that are found in the function arguments and not the metadata
|
# Special keys that are found in the function arguments and not the metadata
|
||||||
if "input" in update_trace_keys:
|
if "input" in update_trace_keys:
|
||||||
trace_params["input"] = input if not mask_input else "redacted-by-litellm"
|
trace_params["input"] = (
|
||||||
|
input if not mask_input else "redacted-by-litellm"
|
||||||
|
)
|
||||||
if "output" in update_trace_keys:
|
if "output" in update_trace_keys:
|
||||||
trace_params["output"] = output if not mask_output else "redacted-by-litellm"
|
trace_params["output"] = (
|
||||||
|
output if not mask_output else "redacted-by-litellm"
|
||||||
|
)
|
||||||
else: # don't overwrite an existing trace
|
else: # don't overwrite an existing trace
|
||||||
trace_params = {
|
trace_params = {
|
||||||
"id": trace_id,
|
"id": trace_id,
|
||||||
|
@ -375,7 +382,9 @@ class LangFuseLogger:
|
||||||
if level == "ERROR":
|
if level == "ERROR":
|
||||||
trace_params["status_message"] = output
|
trace_params["status_message"] = output
|
||||||
else:
|
else:
|
||||||
trace_params["output"] = output if not mask_output else "redacted-by-litellm"
|
trace_params["output"] = (
|
||||||
|
output if not mask_output else "redacted-by-litellm"
|
||||||
|
)
|
||||||
|
|
||||||
if debug == True or (isinstance(debug, str) and debug.lower() == "true"):
|
if debug == True or (isinstance(debug, str) and debug.lower() == "true"):
|
||||||
if "metadata" in trace_params:
|
if "metadata" in trace_params:
|
||||||
|
|
|
@ -164,13 +164,28 @@ class SlackAlerting(CustomLogger):
|
||||||
) -> Optional[str]:
|
) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
Returns langfuse trace url
|
Returns langfuse trace url
|
||||||
|
|
||||||
|
- check:
|
||||||
|
-> existing_trace_id
|
||||||
|
-> trace_id
|
||||||
|
-> litellm_call_id
|
||||||
"""
|
"""
|
||||||
# do nothing for now
|
# do nothing for now
|
||||||
if (
|
if request_data is not None:
|
||||||
request_data is not None
|
trace_id = None
|
||||||
and request_data.get("metadata", {}).get("trace_id", None) is not None
|
if (
|
||||||
):
|
request_data.get("metadata", {}).get("existing_trace_id", None)
|
||||||
trace_id = request_data["metadata"]["trace_id"]
|
is not None
|
||||||
|
):
|
||||||
|
trace_id = request_data["metadata"]["existing_trace_id"]
|
||||||
|
elif request_data.get("metadata", {}).get("trace_id", None) is not None:
|
||||||
|
trace_id = request_data["metadata"]["trace_id"]
|
||||||
|
elif request_data.get("litellm_logging_obj", None) is not None and hasattr(
|
||||||
|
request_data["litellm_logging_obj"], "model_call_details"
|
||||||
|
):
|
||||||
|
trace_id = request_data["litellm_logging_obj"].model_call_details[
|
||||||
|
"litellm_call_id"
|
||||||
|
]
|
||||||
if litellm.utils.langFuseLogger is not None:
|
if litellm.utils.langFuseLogger is not None:
|
||||||
base_url = litellm.utils.langFuseLogger.Langfuse.base_url
|
base_url = litellm.utils.langFuseLogger.Langfuse.base_url
|
||||||
return f"{base_url}/trace/{trace_id}"
|
return f"{base_url}/trace/{trace_id}"
|
||||||
|
@ -671,11 +686,19 @@ class SlackAlerting(CustomLogger):
|
||||||
)
|
)
|
||||||
await _cache.async_set_cache(key=message, value="SENT", ttl=2419200)
|
await _cache.async_set_cache(key=message, value="SENT", ttl=2419200)
|
||||||
return
|
return
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
async def model_added_alert(self, model_name: str, litellm_model_name: str):
|
async def model_added_alert(
|
||||||
model_info = litellm.model_cost.get(litellm_model_name, {})
|
self, model_name: str, litellm_model_name: str, passed_model_info: Any
|
||||||
|
):
|
||||||
|
base_model_from_user = getattr(passed_model_info, "base_model", None)
|
||||||
|
model_info = {}
|
||||||
|
base_model = ""
|
||||||
|
if base_model_from_user is not None:
|
||||||
|
model_info = litellm.model_cost.get(base_model_from_user, {})
|
||||||
|
base_model = f"Base Model: `{base_model_from_user}`\n"
|
||||||
|
else:
|
||||||
|
model_info = litellm.model_cost.get(litellm_model_name, {})
|
||||||
model_info_str = ""
|
model_info_str = ""
|
||||||
for k, v in model_info.items():
|
for k, v in model_info.items():
|
||||||
if k == "input_cost_per_token" or k == "output_cost_per_token":
|
if k == "input_cost_per_token" or k == "output_cost_per_token":
|
||||||
|
@ -687,6 +710,7 @@ class SlackAlerting(CustomLogger):
|
||||||
message = f"""
|
message = f"""
|
||||||
*🚅 New Model Added*
|
*🚅 New Model Added*
|
||||||
Model Name: `{model_name}`
|
Model Name: `{model_name}`
|
||||||
|
{base_model}
|
||||||
|
|
||||||
Usage OpenAI Python SDK:
|
Usage OpenAI Python SDK:
|
||||||
```
|
```
|
||||||
|
|
|
@ -21,7 +21,7 @@ class BaseLLM:
|
||||||
messages: list,
|
messages: list,
|
||||||
print_verbose,
|
print_verbose,
|
||||||
encoding,
|
encoding,
|
||||||
) -> litellm.utils.ModelResponse:
|
) -> Union[litellm.utils.ModelResponse, litellm.utils.CustomStreamWrapper]:
|
||||||
"""
|
"""
|
||||||
Helper function to process the response across sync + async completion calls
|
Helper function to process the response across sync + async completion calls
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# What is this?
|
# What is this?
|
||||||
## Initial implementation of calling bedrock via httpx client (allows for async calls).
|
## Initial implementation of calling bedrock via httpx client (allows for async calls).
|
||||||
## V0 - just covers cohere command-r support
|
## V1 - covers cohere + anthropic claude-3 support
|
||||||
|
|
||||||
import os, types
|
import os, types
|
||||||
import json
|
import json
|
||||||
|
@ -29,12 +29,20 @@ from litellm.utils import (
|
||||||
get_secret,
|
get_secret,
|
||||||
Logging,
|
Logging,
|
||||||
)
|
)
|
||||||
import litellm
|
import litellm, uuid
|
||||||
from .prompt_templates.factory import prompt_factory, custom_prompt, cohere_message_pt
|
from .prompt_templates.factory import (
|
||||||
|
prompt_factory,
|
||||||
|
custom_prompt,
|
||||||
|
cohere_message_pt,
|
||||||
|
construct_tool_use_system_prompt,
|
||||||
|
extract_between_tags,
|
||||||
|
parse_xml_params,
|
||||||
|
contains_tag,
|
||||||
|
)
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
from .base import BaseLLM
|
from .base import BaseLLM
|
||||||
import httpx # type: ignore
|
import httpx # type: ignore
|
||||||
from .bedrock import BedrockError, convert_messages_to_prompt
|
from .bedrock import BedrockError, convert_messages_to_prompt, ModelResponseIterator
|
||||||
from litellm.types.llms.bedrock import *
|
from litellm.types.llms.bedrock import *
|
||||||
|
|
||||||
|
|
||||||
|
@ -280,7 +288,8 @@ class BedrockLLM(BaseLLM):
|
||||||
messages: List,
|
messages: List,
|
||||||
print_verbose,
|
print_verbose,
|
||||||
encoding,
|
encoding,
|
||||||
) -> ModelResponse:
|
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||||||
|
provider = model.split(".")[0]
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging_obj.post_call(
|
logging_obj.post_call(
|
||||||
input=messages,
|
input=messages,
|
||||||
|
@ -297,26 +306,210 @@ class BedrockLLM(BaseLLM):
|
||||||
raise BedrockError(message=response.text, status_code=422)
|
raise BedrockError(message=response.text, status_code=422)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
model_response.choices[0].message.content = completion_response["text"] # type: ignore
|
if provider == "cohere":
|
||||||
|
if "text" in completion_response:
|
||||||
|
outputText = completion_response["text"] # type: ignore
|
||||||
|
elif "generations" in completion_response:
|
||||||
|
outputText = completion_response["generations"][0]["text"]
|
||||||
|
model_response["finish_reason"] = map_finish_reason(
|
||||||
|
completion_response["generations"][0]["finish_reason"]
|
||||||
|
)
|
||||||
|
elif provider == "anthropic":
|
||||||
|
if model.startswith("anthropic.claude-3"):
|
||||||
|
json_schemas: dict = {}
|
||||||
|
_is_function_call = False
|
||||||
|
## Handle Tool Calling
|
||||||
|
if "tools" in optional_params:
|
||||||
|
_is_function_call = True
|
||||||
|
for tool in optional_params["tools"]:
|
||||||
|
json_schemas[tool["function"]["name"]] = tool[
|
||||||
|
"function"
|
||||||
|
].get("parameters", None)
|
||||||
|
outputText = completion_response.get("content")[0].get("text", None)
|
||||||
|
if outputText is not None and contains_tag(
|
||||||
|
"invoke", outputText
|
||||||
|
): # OUTPUT PARSE FUNCTION CALL
|
||||||
|
function_name = extract_between_tags("tool_name", outputText)[0]
|
||||||
|
function_arguments_str = extract_between_tags(
|
||||||
|
"invoke", outputText
|
||||||
|
)[0].strip()
|
||||||
|
function_arguments_str = (
|
||||||
|
f"<invoke>{function_arguments_str}</invoke>"
|
||||||
|
)
|
||||||
|
function_arguments = parse_xml_params(
|
||||||
|
function_arguments_str,
|
||||||
|
json_schema=json_schemas.get(
|
||||||
|
function_name, None
|
||||||
|
), # check if we have a json schema for this function name)
|
||||||
|
)
|
||||||
|
_message = litellm.Message(
|
||||||
|
tool_calls=[
|
||||||
|
{
|
||||||
|
"id": f"call_{uuid.uuid4()}",
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": function_name,
|
||||||
|
"arguments": json.dumps(function_arguments),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
content=None,
|
||||||
|
)
|
||||||
|
model_response.choices[0].message = _message # type: ignore
|
||||||
|
model_response._hidden_params["original_response"] = (
|
||||||
|
outputText # allow user to access raw anthropic tool calling response
|
||||||
|
)
|
||||||
|
if (
|
||||||
|
_is_function_call == True
|
||||||
|
and stream is not None
|
||||||
|
and stream == True
|
||||||
|
):
|
||||||
|
print_verbose(
|
||||||
|
f"INSIDE BEDROCK STREAMING TOOL CALLING CONDITION BLOCK"
|
||||||
|
)
|
||||||
|
# return an iterator
|
||||||
|
streaming_model_response = ModelResponse(stream=True)
|
||||||
|
streaming_model_response.choices[0].finish_reason = getattr(
|
||||||
|
model_response.choices[0], "finish_reason", "stop"
|
||||||
|
)
|
||||||
|
# streaming_model_response.choices = [litellm.utils.StreamingChoices()]
|
||||||
|
streaming_choice = litellm.utils.StreamingChoices()
|
||||||
|
streaming_choice.index = model_response.choices[0].index
|
||||||
|
_tool_calls = []
|
||||||
|
print_verbose(
|
||||||
|
f"type of model_response.choices[0]: {type(model_response.choices[0])}"
|
||||||
|
)
|
||||||
|
print_verbose(
|
||||||
|
f"type of streaming_choice: {type(streaming_choice)}"
|
||||||
|
)
|
||||||
|
if isinstance(model_response.choices[0], litellm.Choices):
|
||||||
|
if getattr(
|
||||||
|
model_response.choices[0].message, "tool_calls", None
|
||||||
|
) is not None and isinstance(
|
||||||
|
model_response.choices[0].message.tool_calls, list
|
||||||
|
):
|
||||||
|
for tool_call in model_response.choices[
|
||||||
|
0
|
||||||
|
].message.tool_calls:
|
||||||
|
_tool_call = {**tool_call.dict(), "index": 0}
|
||||||
|
_tool_calls.append(_tool_call)
|
||||||
|
delta_obj = litellm.utils.Delta(
|
||||||
|
content=getattr(
|
||||||
|
model_response.choices[0].message, "content", None
|
||||||
|
),
|
||||||
|
role=model_response.choices[0].message.role,
|
||||||
|
tool_calls=_tool_calls,
|
||||||
|
)
|
||||||
|
streaming_choice.delta = delta_obj
|
||||||
|
streaming_model_response.choices = [streaming_choice]
|
||||||
|
completion_stream = ModelResponseIterator(
|
||||||
|
model_response=streaming_model_response
|
||||||
|
)
|
||||||
|
print_verbose(
|
||||||
|
f"Returns anthropic CustomStreamWrapper with 'cached_response' streaming object"
|
||||||
|
)
|
||||||
|
return litellm.CustomStreamWrapper(
|
||||||
|
completion_stream=completion_stream,
|
||||||
|
model=model,
|
||||||
|
custom_llm_provider="cached_response",
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
)
|
||||||
|
|
||||||
|
model_response["finish_reason"] = map_finish_reason(
|
||||||
|
completion_response.get("stop_reason", "")
|
||||||
|
)
|
||||||
|
_usage = litellm.Usage(
|
||||||
|
prompt_tokens=completion_response["usage"]["input_tokens"],
|
||||||
|
completion_tokens=completion_response["usage"]["output_tokens"],
|
||||||
|
total_tokens=completion_response["usage"]["input_tokens"]
|
||||||
|
+ completion_response["usage"]["output_tokens"],
|
||||||
|
)
|
||||||
|
setattr(model_response, "usage", _usage)
|
||||||
|
else:
|
||||||
|
outputText = completion_response["completion"]
|
||||||
|
|
||||||
|
model_response["finish_reason"] = completion_response["stop_reason"]
|
||||||
|
elif provider == "ai21":
|
||||||
|
outputText = (
|
||||||
|
completion_response.get("completions")[0].get("data").get("text")
|
||||||
|
)
|
||||||
|
elif provider == "meta":
|
||||||
|
outputText = completion_response["generation"]
|
||||||
|
elif provider == "mistral":
|
||||||
|
outputText = completion_response["outputs"][0]["text"]
|
||||||
|
model_response["finish_reason"] = completion_response["outputs"][0][
|
||||||
|
"stop_reason"
|
||||||
|
]
|
||||||
|
else: # amazon titan
|
||||||
|
outputText = completion_response.get("results")[0].get("outputText")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise BedrockError(message=response.text, status_code=422)
|
raise BedrockError(
|
||||||
|
message="Error processing={}, Received error={}".format(
|
||||||
|
response.text, str(e)
|
||||||
|
),
|
||||||
|
status_code=422,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if (
|
||||||
|
len(outputText) > 0
|
||||||
|
and hasattr(model_response.choices[0], "message")
|
||||||
|
and getattr(model_response.choices[0].message, "tool_calls", None)
|
||||||
|
is None
|
||||||
|
):
|
||||||
|
model_response["choices"][0]["message"]["content"] = outputText
|
||||||
|
elif (
|
||||||
|
hasattr(model_response.choices[0], "message")
|
||||||
|
and getattr(model_response.choices[0].message, "tool_calls", None)
|
||||||
|
is not None
|
||||||
|
):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise Exception()
|
||||||
|
except:
|
||||||
|
raise BedrockError(
|
||||||
|
message=json.dumps(outputText), status_code=response.status_code
|
||||||
|
)
|
||||||
|
|
||||||
|
if stream and provider == "ai21":
|
||||||
|
streaming_model_response = ModelResponse(stream=True)
|
||||||
|
streaming_model_response.choices[0].finish_reason = model_response.choices[ # type: ignore
|
||||||
|
0
|
||||||
|
].finish_reason
|
||||||
|
# streaming_model_response.choices = [litellm.utils.StreamingChoices()]
|
||||||
|
streaming_choice = litellm.utils.StreamingChoices()
|
||||||
|
streaming_choice.index = model_response.choices[0].index
|
||||||
|
delta_obj = litellm.utils.Delta(
|
||||||
|
content=getattr(model_response.choices[0].message, "content", None),
|
||||||
|
role=model_response.choices[0].message.role,
|
||||||
|
)
|
||||||
|
streaming_choice.delta = delta_obj
|
||||||
|
streaming_model_response.choices = [streaming_choice]
|
||||||
|
mri = ModelResponseIterator(model_response=streaming_model_response)
|
||||||
|
return CustomStreamWrapper(
|
||||||
|
completion_stream=mri,
|
||||||
|
model=model,
|
||||||
|
custom_llm_provider="cached_response",
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
)
|
||||||
|
|
||||||
## CALCULATING USAGE - bedrock returns usage in the headers
|
## CALCULATING USAGE - bedrock returns usage in the headers
|
||||||
prompt_tokens = int(
|
bedrock_input_tokens = response.headers.get(
|
||||||
response.headers.get(
|
"x-amzn-bedrock-input-token-count", None
|
||||||
"x-amzn-bedrock-input-token-count",
|
|
||||||
len(encoding.encode("".join(m.get("content", "") for m in messages))),
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
bedrock_output_tokens = response.headers.get(
|
||||||
|
"x-amzn-bedrock-output-token-count", None
|
||||||
|
)
|
||||||
|
|
||||||
|
prompt_tokens = int(
|
||||||
|
bedrock_input_tokens or litellm.token_counter(messages=messages)
|
||||||
|
)
|
||||||
|
|
||||||
completion_tokens = int(
|
completion_tokens = int(
|
||||||
response.headers.get(
|
bedrock_output_tokens
|
||||||
"x-amzn-bedrock-output-token-count",
|
or litellm.token_counter(
|
||||||
len(
|
text=model_response.choices[0].message.content, # type: ignore
|
||||||
encoding.encode(
|
count_response_tokens=True,
|
||||||
model_response.choices[0].message.content, # type: ignore
|
|
||||||
disallowed_special=(),
|
|
||||||
)
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -359,6 +552,7 @@ class BedrockLLM(BaseLLM):
|
||||||
|
|
||||||
## SETUP ##
|
## SETUP ##
|
||||||
stream = optional_params.pop("stream", None)
|
stream = optional_params.pop("stream", None)
|
||||||
|
provider = model.split(".")[0]
|
||||||
|
|
||||||
## CREDENTIALS ##
|
## CREDENTIALS ##
|
||||||
# pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
|
# pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
|
||||||
|
@ -414,19 +608,18 @@ class BedrockLLM(BaseLLM):
|
||||||
else:
|
else:
|
||||||
endpoint_url = f"https://bedrock-runtime.{aws_region_name}.amazonaws.com"
|
endpoint_url = f"https://bedrock-runtime.{aws_region_name}.amazonaws.com"
|
||||||
|
|
||||||
if stream is not None and stream == True:
|
if (stream is not None and stream == True) and provider != "ai21":
|
||||||
endpoint_url = f"{endpoint_url}/model/{model}/invoke-with-response-stream"
|
endpoint_url = f"{endpoint_url}/model/{model}/invoke-with-response-stream"
|
||||||
else:
|
else:
|
||||||
endpoint_url = f"{endpoint_url}/model/{model}/invoke"
|
endpoint_url = f"{endpoint_url}/model/{model}/invoke"
|
||||||
|
|
||||||
sigv4 = SigV4Auth(credentials, "bedrock", aws_region_name)
|
sigv4 = SigV4Auth(credentials, "bedrock", aws_region_name)
|
||||||
|
|
||||||
provider = model.split(".")[0]
|
|
||||||
prompt, chat_history = self.convert_messages_to_prompt(
|
prompt, chat_history = self.convert_messages_to_prompt(
|
||||||
model, messages, provider, custom_prompt_dict
|
model, messages, provider, custom_prompt_dict
|
||||||
)
|
)
|
||||||
inference_params = copy.deepcopy(optional_params)
|
inference_params = copy.deepcopy(optional_params)
|
||||||
|
json_schemas: dict = {}
|
||||||
if provider == "cohere":
|
if provider == "cohere":
|
||||||
if model.startswith("cohere.command-r"):
|
if model.startswith("cohere.command-r"):
|
||||||
## LOAD CONFIG
|
## LOAD CONFIG
|
||||||
|
@ -453,8 +646,114 @@ class BedrockLLM(BaseLLM):
|
||||||
True # cohere requires stream = True in inference params
|
True # cohere requires stream = True in inference params
|
||||||
)
|
)
|
||||||
data = json.dumps({"prompt": prompt, **inference_params})
|
data = json.dumps({"prompt": prompt, **inference_params})
|
||||||
|
elif provider == "anthropic":
|
||||||
|
if model.startswith("anthropic.claude-3"):
|
||||||
|
# Separate system prompt from rest of message
|
||||||
|
system_prompt_idx: list[int] = []
|
||||||
|
system_messages: list[str] = []
|
||||||
|
for idx, message in enumerate(messages):
|
||||||
|
if message["role"] == "system":
|
||||||
|
system_messages.append(message["content"])
|
||||||
|
system_prompt_idx.append(idx)
|
||||||
|
if len(system_prompt_idx) > 0:
|
||||||
|
inference_params["system"] = "\n".join(system_messages)
|
||||||
|
messages = [
|
||||||
|
i for j, i in enumerate(messages) if j not in system_prompt_idx
|
||||||
|
]
|
||||||
|
# Format rest of message according to anthropic guidelines
|
||||||
|
messages = prompt_factory(
|
||||||
|
model=model, messages=messages, custom_llm_provider="anthropic_xml"
|
||||||
|
) # type: ignore
|
||||||
|
## LOAD CONFIG
|
||||||
|
config = litellm.AmazonAnthropicClaude3Config.get_config()
|
||||||
|
for k, v in config.items():
|
||||||
|
if (
|
||||||
|
k not in inference_params
|
||||||
|
): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||||
|
inference_params[k] = v
|
||||||
|
## Handle Tool Calling
|
||||||
|
if "tools" in inference_params:
|
||||||
|
_is_function_call = True
|
||||||
|
for tool in inference_params["tools"]:
|
||||||
|
json_schemas[tool["function"]["name"]] = tool["function"].get(
|
||||||
|
"parameters", None
|
||||||
|
)
|
||||||
|
tool_calling_system_prompt = construct_tool_use_system_prompt(
|
||||||
|
tools=inference_params["tools"]
|
||||||
|
)
|
||||||
|
inference_params["system"] = (
|
||||||
|
inference_params.get("system", "\n")
|
||||||
|
+ tool_calling_system_prompt
|
||||||
|
) # add the anthropic tool calling prompt to the system prompt
|
||||||
|
inference_params.pop("tools")
|
||||||
|
data = json.dumps({"messages": messages, **inference_params})
|
||||||
|
else:
|
||||||
|
## LOAD CONFIG
|
||||||
|
config = litellm.AmazonAnthropicConfig.get_config()
|
||||||
|
for k, v in config.items():
|
||||||
|
if (
|
||||||
|
k not in inference_params
|
||||||
|
): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||||
|
inference_params[k] = v
|
||||||
|
data = json.dumps({"prompt": prompt, **inference_params})
|
||||||
|
elif provider == "ai21":
|
||||||
|
## LOAD CONFIG
|
||||||
|
config = litellm.AmazonAI21Config.get_config()
|
||||||
|
for k, v in config.items():
|
||||||
|
if (
|
||||||
|
k not in inference_params
|
||||||
|
): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||||
|
inference_params[k] = v
|
||||||
|
|
||||||
|
data = json.dumps({"prompt": prompt, **inference_params})
|
||||||
|
elif provider == "mistral":
|
||||||
|
## LOAD CONFIG
|
||||||
|
config = litellm.AmazonMistralConfig.get_config()
|
||||||
|
for k, v in config.items():
|
||||||
|
if (
|
||||||
|
k not in inference_params
|
||||||
|
): # completion(top_k=3) > amazon_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||||
|
inference_params[k] = v
|
||||||
|
|
||||||
|
data = json.dumps({"prompt": prompt, **inference_params})
|
||||||
|
elif provider == "amazon": # amazon titan
|
||||||
|
## LOAD CONFIG
|
||||||
|
config = litellm.AmazonTitanConfig.get_config()
|
||||||
|
for k, v in config.items():
|
||||||
|
if (
|
||||||
|
k not in inference_params
|
||||||
|
): # completion(top_k=3) > amazon_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||||
|
inference_params[k] = v
|
||||||
|
|
||||||
|
data = json.dumps(
|
||||||
|
{
|
||||||
|
"inputText": prompt,
|
||||||
|
"textGenerationConfig": inference_params,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
elif provider == "meta":
|
||||||
|
## LOAD CONFIG
|
||||||
|
config = litellm.AmazonLlamaConfig.get_config()
|
||||||
|
for k, v in config.items():
|
||||||
|
if (
|
||||||
|
k not in inference_params
|
||||||
|
): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||||
|
inference_params[k] = v
|
||||||
|
data = json.dumps({"prompt": prompt, **inference_params})
|
||||||
else:
|
else:
|
||||||
raise Exception("UNSUPPORTED PROVIDER")
|
## LOGGING
|
||||||
|
logging_obj.pre_call(
|
||||||
|
input=messages,
|
||||||
|
api_key="",
|
||||||
|
additional_args={
|
||||||
|
"complete_input_dict": inference_params,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
raise Exception(
|
||||||
|
"Bedrock HTTPX: Unsupported provider={}, model={}".format(
|
||||||
|
provider, model
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
## COMPLETION CALL
|
## COMPLETION CALL
|
||||||
|
|
||||||
|
@ -482,7 +781,7 @@ class BedrockLLM(BaseLLM):
|
||||||
if acompletion:
|
if acompletion:
|
||||||
if isinstance(client, HTTPHandler):
|
if isinstance(client, HTTPHandler):
|
||||||
client = None
|
client = None
|
||||||
if stream:
|
if stream == True and provider != "ai21":
|
||||||
return self.async_streaming(
|
return self.async_streaming(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
@ -511,7 +810,7 @@ class BedrockLLM(BaseLLM):
|
||||||
encoding=encoding,
|
encoding=encoding,
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
stream=False,
|
stream=stream, # type: ignore
|
||||||
litellm_params=litellm_params,
|
litellm_params=litellm_params,
|
||||||
logger_fn=logger_fn,
|
logger_fn=logger_fn,
|
||||||
headers=prepped.headers,
|
headers=prepped.headers,
|
||||||
|
@ -528,7 +827,7 @@ class BedrockLLM(BaseLLM):
|
||||||
self.client = HTTPHandler(**_params) # type: ignore
|
self.client = HTTPHandler(**_params) # type: ignore
|
||||||
else:
|
else:
|
||||||
self.client = client
|
self.client = client
|
||||||
if stream is not None and stream == True:
|
if (stream is not None and stream == True) and provider != "ai21":
|
||||||
response = self.client.post(
|
response = self.client.post(
|
||||||
url=prepped.url,
|
url=prepped.url,
|
||||||
headers=prepped.headers, # type: ignore
|
headers=prepped.headers, # type: ignore
|
||||||
|
@ -541,7 +840,7 @@ class BedrockLLM(BaseLLM):
|
||||||
status_code=response.status_code, message=response.text
|
status_code=response.status_code, message=response.text
|
||||||
)
|
)
|
||||||
|
|
||||||
decoder = AWSEventStreamDecoder()
|
decoder = AWSEventStreamDecoder(model=model)
|
||||||
|
|
||||||
completion_stream = decoder.iter_bytes(response.iter_bytes(chunk_size=1024))
|
completion_stream = decoder.iter_bytes(response.iter_bytes(chunk_size=1024))
|
||||||
streaming_response = CustomStreamWrapper(
|
streaming_response = CustomStreamWrapper(
|
||||||
|
@ -550,15 +849,24 @@ class BedrockLLM(BaseLLM):
|
||||||
custom_llm_provider="bedrock",
|
custom_llm_provider="bedrock",
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
## LOGGING
|
||||||
|
logging_obj.post_call(
|
||||||
|
input=messages,
|
||||||
|
api_key="",
|
||||||
|
original_response=streaming_response,
|
||||||
|
additional_args={"complete_input_dict": data},
|
||||||
|
)
|
||||||
return streaming_response
|
return streaming_response
|
||||||
|
|
||||||
response = self.client.post(url=prepped.url, headers=prepped.headers, data=data) # type: ignore
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
response = self.client.post(url=prepped.url, headers=prepped.headers, data=data) # type: ignore
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
except httpx.HTTPStatusError as err:
|
except httpx.HTTPStatusError as err:
|
||||||
error_code = err.response.status_code
|
error_code = err.response.status_code
|
||||||
raise BedrockError(status_code=error_code, message=response.text)
|
raise BedrockError(status_code=error_code, message=response.text)
|
||||||
|
except httpx.TimeoutException as e:
|
||||||
|
raise BedrockError(status_code=408, message="Timeout error occurred.")
|
||||||
|
|
||||||
return self.process_response(
|
return self.process_response(
|
||||||
model=model,
|
model=model,
|
||||||
|
@ -591,7 +899,7 @@ class BedrockLLM(BaseLLM):
|
||||||
logger_fn=None,
|
logger_fn=None,
|
||||||
headers={},
|
headers={},
|
||||||
client: Optional[AsyncHTTPHandler] = None,
|
client: Optional[AsyncHTTPHandler] = None,
|
||||||
) -> ModelResponse:
|
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||||||
if client is None:
|
if client is None:
|
||||||
_params = {}
|
_params = {}
|
||||||
if timeout is not None:
|
if timeout is not None:
|
||||||
|
@ -602,12 +910,20 @@ class BedrockLLM(BaseLLM):
|
||||||
else:
|
else:
|
||||||
self.client = client # type: ignore
|
self.client = client # type: ignore
|
||||||
|
|
||||||
response = await self.client.post(api_base, headers=headers, data=data) # type: ignore
|
try:
|
||||||
|
response = await self.client.post(api_base, headers=headers, data=data) # type: ignore
|
||||||
|
response.raise_for_status()
|
||||||
|
except httpx.HTTPStatusError as err:
|
||||||
|
error_code = err.response.status_code
|
||||||
|
raise BedrockError(status_code=error_code, message=response.text)
|
||||||
|
except httpx.TimeoutException as e:
|
||||||
|
raise BedrockError(status_code=408, message="Timeout error occurred.")
|
||||||
|
|
||||||
return self.process_response(
|
return self.process_response(
|
||||||
model=model,
|
model=model,
|
||||||
response=response,
|
response=response,
|
||||||
model_response=model_response,
|
model_response=model_response,
|
||||||
stream=stream,
|
stream=stream if isinstance(stream, bool) else False,
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
api_key="",
|
api_key="",
|
||||||
data=data,
|
data=data,
|
||||||
|
@ -650,7 +966,7 @@ class BedrockLLM(BaseLLM):
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
raise BedrockError(status_code=response.status_code, message=response.text)
|
raise BedrockError(status_code=response.status_code, message=response.text)
|
||||||
|
|
||||||
decoder = AWSEventStreamDecoder()
|
decoder = AWSEventStreamDecoder(model=model)
|
||||||
|
|
||||||
completion_stream = decoder.aiter_bytes(response.aiter_bytes(chunk_size=1024))
|
completion_stream = decoder.aiter_bytes(response.aiter_bytes(chunk_size=1024))
|
||||||
streaming_response = CustomStreamWrapper(
|
streaming_response = CustomStreamWrapper(
|
||||||
|
@ -659,6 +975,15 @@ class BedrockLLM(BaseLLM):
|
||||||
custom_llm_provider="bedrock",
|
custom_llm_provider="bedrock",
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
## LOGGING
|
||||||
|
logging_obj.post_call(
|
||||||
|
input=messages,
|
||||||
|
api_key="",
|
||||||
|
original_response=streaming_response,
|
||||||
|
additional_args={"complete_input_dict": data},
|
||||||
|
)
|
||||||
|
|
||||||
return streaming_response
|
return streaming_response
|
||||||
|
|
||||||
def embedding(self, *args, **kwargs):
|
def embedding(self, *args, **kwargs):
|
||||||
|
@ -676,11 +1001,70 @@ def get_response_stream_shape():
|
||||||
|
|
||||||
|
|
||||||
class AWSEventStreamDecoder:
|
class AWSEventStreamDecoder:
|
||||||
def __init__(self) -> None:
|
def __init__(self, model: str) -> None:
|
||||||
from botocore.parsers import EventStreamJSONParser
|
from botocore.parsers import EventStreamJSONParser
|
||||||
|
|
||||||
|
self.model = model
|
||||||
self.parser = EventStreamJSONParser()
|
self.parser = EventStreamJSONParser()
|
||||||
|
|
||||||
|
def _chunk_parser(self, chunk_data: dict) -> GenericStreamingChunk:
|
||||||
|
text = ""
|
||||||
|
is_finished = False
|
||||||
|
finish_reason = ""
|
||||||
|
if "outputText" in chunk_data:
|
||||||
|
text = chunk_data["outputText"]
|
||||||
|
# ai21 mapping
|
||||||
|
if "ai21" in self.model: # fake ai21 streaming
|
||||||
|
text = chunk_data.get("completions")[0].get("data").get("text") # type: ignore
|
||||||
|
is_finished = True
|
||||||
|
finish_reason = "stop"
|
||||||
|
######## bedrock.anthropic mappings ###############
|
||||||
|
elif "completion" in chunk_data: # not claude-3
|
||||||
|
text = chunk_data["completion"] # bedrock.anthropic
|
||||||
|
stop_reason = chunk_data.get("stop_reason", None)
|
||||||
|
if stop_reason != None:
|
||||||
|
is_finished = True
|
||||||
|
finish_reason = stop_reason
|
||||||
|
elif "delta" in chunk_data:
|
||||||
|
if chunk_data["delta"].get("text", None) is not None:
|
||||||
|
text = chunk_data["delta"]["text"]
|
||||||
|
stop_reason = chunk_data["delta"].get("stop_reason", None)
|
||||||
|
if stop_reason != None:
|
||||||
|
is_finished = True
|
||||||
|
finish_reason = stop_reason
|
||||||
|
######## bedrock.mistral mappings ###############
|
||||||
|
elif "outputs" in chunk_data:
|
||||||
|
if (
|
||||||
|
len(chunk_data["outputs"]) == 1
|
||||||
|
and chunk_data["outputs"][0].get("text", None) is not None
|
||||||
|
):
|
||||||
|
text = chunk_data["outputs"][0]["text"]
|
||||||
|
stop_reason = chunk_data.get("stop_reason", None)
|
||||||
|
if stop_reason != None:
|
||||||
|
is_finished = True
|
||||||
|
finish_reason = stop_reason
|
||||||
|
######## bedrock.cohere mappings ###############
|
||||||
|
# meta mapping
|
||||||
|
elif "generation" in chunk_data:
|
||||||
|
text = chunk_data["generation"] # bedrock.meta
|
||||||
|
# cohere mapping
|
||||||
|
elif "text" in chunk_data:
|
||||||
|
text = chunk_data["text"] # bedrock.cohere
|
||||||
|
# cohere mapping for finish reason
|
||||||
|
elif "finish_reason" in chunk_data:
|
||||||
|
finish_reason = chunk_data["finish_reason"]
|
||||||
|
is_finished = True
|
||||||
|
elif chunk_data.get("completionReason", None):
|
||||||
|
is_finished = True
|
||||||
|
finish_reason = chunk_data["completionReason"]
|
||||||
|
return GenericStreamingChunk(
|
||||||
|
**{
|
||||||
|
"text": text,
|
||||||
|
"is_finished": is_finished,
|
||||||
|
"finish_reason": finish_reason,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
def iter_bytes(self, iterator: Iterator[bytes]) -> Iterator[GenericStreamingChunk]:
|
def iter_bytes(self, iterator: Iterator[bytes]) -> Iterator[GenericStreamingChunk]:
|
||||||
"""Given an iterator that yields lines, iterate over it & yield every event encountered"""
|
"""Given an iterator that yields lines, iterate over it & yield every event encountered"""
|
||||||
from botocore.eventstream import EventStreamBuffer
|
from botocore.eventstream import EventStreamBuffer
|
||||||
|
@ -693,12 +1077,7 @@ class AWSEventStreamDecoder:
|
||||||
if message:
|
if message:
|
||||||
# sse_event = ServerSentEvent(data=message, event="completion")
|
# sse_event = ServerSentEvent(data=message, event="completion")
|
||||||
_data = json.loads(message)
|
_data = json.loads(message)
|
||||||
streaming_chunk: GenericStreamingChunk = GenericStreamingChunk(
|
yield self._chunk_parser(chunk_data=_data)
|
||||||
text=_data.get("text", ""),
|
|
||||||
is_finished=_data.get("is_finished", False),
|
|
||||||
finish_reason=_data.get("finish_reason", ""),
|
|
||||||
)
|
|
||||||
yield streaming_chunk
|
|
||||||
|
|
||||||
async def aiter_bytes(
|
async def aiter_bytes(
|
||||||
self, iterator: AsyncIterator[bytes]
|
self, iterator: AsyncIterator[bytes]
|
||||||
|
@ -713,12 +1092,7 @@ class AWSEventStreamDecoder:
|
||||||
message = self._parse_message_from_event(event)
|
message = self._parse_message_from_event(event)
|
||||||
if message:
|
if message:
|
||||||
_data = json.loads(message)
|
_data = json.loads(message)
|
||||||
streaming_chunk: GenericStreamingChunk = GenericStreamingChunk(
|
yield self._chunk_parser(chunk_data=_data)
|
||||||
text=_data.get("text", ""),
|
|
||||||
is_finished=_data.get("is_finished", False),
|
|
||||||
finish_reason=_data.get("finish_reason", ""),
|
|
||||||
)
|
|
||||||
yield streaming_chunk
|
|
||||||
|
|
||||||
def _parse_message_from_event(self, event) -> Optional[str]:
|
def _parse_message_from_event(self, event) -> Optional[str]:
|
||||||
response_dict = event.to_response_dict()
|
response_dict = event.to_response_dict()
|
||||||
|
|
|
@ -326,7 +326,7 @@ async def acompletion(
|
||||||
or custom_llm_provider == "sagemaker"
|
or custom_llm_provider == "sagemaker"
|
||||||
or custom_llm_provider == "anthropic"
|
or custom_llm_provider == "anthropic"
|
||||||
or custom_llm_provider == "predibase"
|
or custom_llm_provider == "predibase"
|
||||||
or (custom_llm_provider == "bedrock" and "cohere" in model)
|
or custom_llm_provider == "bedrock"
|
||||||
or custom_llm_provider in litellm.openai_compatible_providers
|
or custom_llm_provider in litellm.openai_compatible_providers
|
||||||
): # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all.
|
): # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all.
|
||||||
init_response = await loop.run_in_executor(None, func_with_context)
|
init_response = await loop.run_in_executor(None, func_with_context)
|
||||||
|
@ -368,6 +368,8 @@ async def acompletion(
|
||||||
async def _async_streaming(response, model, custom_llm_provider, args):
|
async def _async_streaming(response, model, custom_llm_provider, args):
|
||||||
try:
|
try:
|
||||||
print_verbose(f"received response in _async_streaming: {response}")
|
print_verbose(f"received response in _async_streaming: {response}")
|
||||||
|
if asyncio.iscoroutine(response):
|
||||||
|
response = await response
|
||||||
async for line in response:
|
async for line in response:
|
||||||
print_verbose(f"line in async streaming: {line}")
|
print_verbose(f"line in async streaming: {line}")
|
||||||
yield line
|
yield line
|
||||||
|
@ -1979,23 +1981,9 @@ def completion(
|
||||||
# boto3 reads keys from .env
|
# boto3 reads keys from .env
|
||||||
custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
|
custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
|
||||||
|
|
||||||
if "cohere" in model:
|
if (
|
||||||
response = bedrock_chat_completion.completion(
|
"aws_bedrock_client" in optional_params
|
||||||
model=model,
|
): # use old bedrock flow for aws_bedrock_client users.
|
||||||
messages=messages,
|
|
||||||
custom_prompt_dict=litellm.custom_prompt_dict,
|
|
||||||
model_response=model_response,
|
|
||||||
print_verbose=print_verbose,
|
|
||||||
optional_params=optional_params,
|
|
||||||
litellm_params=litellm_params,
|
|
||||||
logger_fn=logger_fn,
|
|
||||||
encoding=encoding,
|
|
||||||
logging_obj=logging,
|
|
||||||
extra_headers=extra_headers,
|
|
||||||
timeout=timeout,
|
|
||||||
acompletion=acompletion,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
response = bedrock.completion(
|
response = bedrock.completion(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
@ -2031,7 +2019,22 @@ def completion(
|
||||||
custom_llm_provider="bedrock",
|
custom_llm_provider="bedrock",
|
||||||
logging_obj=logging,
|
logging_obj=logging,
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
response = bedrock_chat_completion.completion(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
custom_prompt_dict=custom_prompt_dict,
|
||||||
|
model_response=model_response,
|
||||||
|
print_verbose=print_verbose,
|
||||||
|
optional_params=optional_params,
|
||||||
|
litellm_params=litellm_params,
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
encoding=encoding,
|
||||||
|
logging_obj=logging,
|
||||||
|
extra_headers=extra_headers,
|
||||||
|
timeout=timeout,
|
||||||
|
acompletion=acompletion,
|
||||||
|
)
|
||||||
if optional_params.get("stream", False):
|
if optional_params.get("stream", False):
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging.post_call(
|
logging.post_call(
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f04e46b02318b660.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[4858,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"884\",\"static/chunks/884-7576ee407a2ecbe6.js\",\"931\",\"static/chunks/app/page-495003b4fc3648e1.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f04e46b02318b660.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"jE-EC3LDs6Y8P0wmind3t\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f04e46b02318b660.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[4858,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"884\",\"static/chunks/884-7576ee407a2ecbe6.js\",\"931\",\"static/chunks/app/page-f20fdea77aed85ba.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f04e46b02318b660.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"l-0LDfSCdaUCAbcLIx_QC\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[4858,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","884","static/chunks/884-7576ee407a2ecbe6.js","931","static/chunks/app/page-495003b4fc3648e1.js"],""]
|
3:I[4858,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","884","static/chunks/884-7576ee407a2ecbe6.js","931","static/chunks/app/page-f20fdea77aed85ba.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["jE-EC3LDs6Y8P0wmind3t",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f04e46b02318b660.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["l-0LDfSCdaUCAbcLIx_QC",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f04e46b02318b660.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -1,4 +1,9 @@
|
||||||
model_list:
|
model_list:
|
||||||
|
- model_name: gpt-3.5-turbo-fake-model
|
||||||
|
litellm_params:
|
||||||
|
model: openai/my-fake-model
|
||||||
|
api_base: http://0.0.0.0:8080
|
||||||
|
api_key: ""
|
||||||
- model_name: gpt-3.5-turbo
|
- model_name: gpt-3.5-turbo
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: azure/gpt-35-turbo
|
model: azure/gpt-35-turbo
|
||||||
|
@ -13,6 +18,3 @@ model_list:
|
||||||
|
|
||||||
router_settings:
|
router_settings:
|
||||||
enable_pre_call_checks: true
|
enable_pre_call_checks: true
|
||||||
|
|
||||||
# general_settings:
|
|
||||||
# master_key: sk-1234 # [OPTIONAL] Use to enforce auth on proxy. See - https://docs.litellm.ai/docs/proxy/virtual_keys
|
|
||||||
|
|
|
@ -671,15 +671,21 @@ async def user_api_key_auth(
|
||||||
_end_user_object = None
|
_end_user_object = None
|
||||||
end_user_params = {}
|
end_user_params = {}
|
||||||
if "user" in request_data:
|
if "user" in request_data:
|
||||||
_end_user_object = await get_end_user_object(
|
try:
|
||||||
end_user_id=request_data["user"],
|
_end_user_object = await get_end_user_object(
|
||||||
prisma_client=prisma_client,
|
end_user_id=request_data["user"],
|
||||||
user_api_key_cache=user_api_key_cache,
|
prisma_client=prisma_client,
|
||||||
)
|
user_api_key_cache=user_api_key_cache,
|
||||||
if _end_user_object is not None:
|
|
||||||
end_user_params["allowed_model_region"] = (
|
|
||||||
_end_user_object.allowed_model_region
|
|
||||||
)
|
)
|
||||||
|
if _end_user_object is not None:
|
||||||
|
end_user_params["allowed_model_region"] = (
|
||||||
|
_end_user_object.allowed_model_region
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
verbose_proxy_logger.debug(
|
||||||
|
"Unable to find user in db. Error - {}".format(str(e))
|
||||||
|
)
|
||||||
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
is_master_key_valid = secrets.compare_digest(api_key, master_key) # type: ignore
|
is_master_key_valid = secrets.compare_digest(api_key, master_key) # type: ignore
|
||||||
|
@ -4920,7 +4926,7 @@ async def token_counter(request: TokenCountRequest):
|
||||||
litellm_model_name or request.model
|
litellm_model_name or request.model
|
||||||
) # use litellm model name, if it's not avalable then fallback to request.model
|
) # use litellm model name, if it's not avalable then fallback to request.model
|
||||||
_tokenizer_used = litellm.utils._select_tokenizer(model=model_to_use)
|
_tokenizer_used = litellm.utils._select_tokenizer(model=model_to_use)
|
||||||
tokenizer_used = _tokenizer_used["type"]
|
tokenizer_used = str(_tokenizer_used["type"])
|
||||||
total_tokens = token_counter(
|
total_tokens = token_counter(
|
||||||
model=model_to_use,
|
model=model_to_use,
|
||||||
text=prompt,
|
text=prompt,
|
||||||
|
@ -8134,6 +8140,7 @@ async def add_new_model(
|
||||||
await proxy_logging_obj.slack_alerting_instance.model_added_alert(
|
await proxy_logging_obj.slack_alerting_instance.model_added_alert(
|
||||||
model_name=model_params.model_name,
|
model_name=model_params.model_name,
|
||||||
litellm_model_name=_orignal_litellm_model_name,
|
litellm_model_name=_orignal_litellm_model_name,
|
||||||
|
passed_model_info=model_params.model_info,
|
||||||
)
|
)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -242,12 +242,24 @@ async def test_langfuse_masked_input_output(langfuse_client):
|
||||||
response = await create_async_task(
|
response = await create_async_task(
|
||||||
model="gpt-3.5-turbo",
|
model="gpt-3.5-turbo",
|
||||||
messages=[{"role": "user", "content": "This is a test"}],
|
messages=[{"role": "user", "content": "This is a test"}],
|
||||||
metadata={"trace_id": _unique_trace_name, "mask_input": mask_value, "mask_output": mask_value},
|
metadata={
|
||||||
mock_response="This is a test response"
|
"trace_id": _unique_trace_name,
|
||||||
|
"mask_input": mask_value,
|
||||||
|
"mask_output": mask_value,
|
||||||
|
},
|
||||||
|
mock_response="This is a test response",
|
||||||
)
|
)
|
||||||
print(response)
|
print(response)
|
||||||
expected_input = "redacted-by-litellm" if mask_value else {'messages': [{'content': 'This is a test', 'role': 'user'}]}
|
expected_input = (
|
||||||
expected_output = "redacted-by-litellm" if mask_value else {'content': 'This is a test response', 'role': 'assistant'}
|
"redacted-by-litellm"
|
||||||
|
if mask_value
|
||||||
|
else {"messages": [{"content": "This is a test", "role": "user"}]}
|
||||||
|
)
|
||||||
|
expected_output = (
|
||||||
|
"redacted-by-litellm"
|
||||||
|
if mask_value
|
||||||
|
else {"content": "This is a test response", "role": "assistant"}
|
||||||
|
)
|
||||||
langfuse_client.flush()
|
langfuse_client.flush()
|
||||||
await asyncio.sleep(2)
|
await asyncio.sleep(2)
|
||||||
|
|
||||||
|
@ -262,6 +274,7 @@ async def test_langfuse_masked_input_output(langfuse_client):
|
||||||
assert generations[0].input == expected_input
|
assert generations[0].input == expected_input
|
||||||
assert generations[0].output == expected_output
|
assert generations[0].output == expected_output
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_langfuse_logging_metadata(langfuse_client):
|
async def test_langfuse_logging_metadata(langfuse_client):
|
||||||
"""
|
"""
|
||||||
|
@ -523,7 +536,7 @@ def test_langfuse_logging_function_calling():
|
||||||
# test_langfuse_logging_function_calling()
|
# test_langfuse_logging_function_calling()
|
||||||
|
|
||||||
|
|
||||||
def test_langfuse_existing_trace_id():
|
def test_aaalangfuse_existing_trace_id():
|
||||||
"""
|
"""
|
||||||
When existing trace id is passed, don't set trace params -> prevents overwriting the trace
|
When existing trace id is passed, don't set trace params -> prevents overwriting the trace
|
||||||
|
|
||||||
|
@ -577,7 +590,7 @@ def test_langfuse_existing_trace_id():
|
||||||
"verbose": False,
|
"verbose": False,
|
||||||
"custom_llm_provider": "openai",
|
"custom_llm_provider": "openai",
|
||||||
"api_base": "https://api.openai.com/v1/",
|
"api_base": "https://api.openai.com/v1/",
|
||||||
"litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
|
"litellm_call_id": None,
|
||||||
"model_alias_map": {},
|
"model_alias_map": {},
|
||||||
"completion_call_id": None,
|
"completion_call_id": None,
|
||||||
"metadata": None,
|
"metadata": None,
|
||||||
|
@ -593,7 +606,7 @@ def test_langfuse_existing_trace_id():
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"user": None,
|
"user": None,
|
||||||
"call_type": "completion",
|
"call_type": "completion",
|
||||||
"litellm_call_id": "508113a1-c6f1-48ce-a3e1-01c6cce9330e",
|
"litellm_call_id": None,
|
||||||
"completion_start_time": "2024-05-01 07:31:29.903685",
|
"completion_start_time": "2024-05-01 07:31:29.903685",
|
||||||
"temperature": 0.1,
|
"temperature": 0.1,
|
||||||
"extra_body": {},
|
"extra_body": {},
|
||||||
|
@ -633,6 +646,8 @@ def test_langfuse_existing_trace_id():
|
||||||
|
|
||||||
trace_id = langfuse_response_object["trace_id"]
|
trace_id = langfuse_response_object["trace_id"]
|
||||||
|
|
||||||
|
assert trace_id is not None
|
||||||
|
|
||||||
langfuse_client.flush()
|
langfuse_client.flush()
|
||||||
|
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
|
@ -7,7 +7,7 @@ import os, io
|
||||||
|
|
||||||
sys.path.insert(
|
sys.path.insert(
|
||||||
0, os.path.abspath("../..")
|
0, os.path.abspath("../..")
|
||||||
) # Adds the parent directory to the, system path
|
) # Adds the parent directory to the system path
|
||||||
import pytest
|
import pytest
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import embedding, completion, completion_cost, Timeout
|
from litellm import embedding, completion, completion_cost, Timeout
|
||||||
|
@ -2301,6 +2301,8 @@ def test_completion_azure_deployment_id():
|
||||||
|
|
||||||
# test_completion_azure_deployment_id()
|
# test_completion_azure_deployment_id()
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("sync_mode", [False, True])
|
@pytest.mark.parametrize("sync_mode", [False, True])
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
@ -2663,14 +2665,29 @@ def response_format_tests(response: litellm.ModelResponse):
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"model",
|
||||||
|
[
|
||||||
|
"bedrock/cohere.command-r-plus-v1:0",
|
||||||
|
"anthropic.claude-3-sonnet-20240229-v1:0",
|
||||||
|
"anthropic.claude-instant-v1",
|
||||||
|
"bedrock/ai21.j2-mid",
|
||||||
|
"mistral.mistral-7b-instruct-v0:2",
|
||||||
|
"bedrock/amazon.titan-tg1-large",
|
||||||
|
"meta.llama3-8b-instruct-v1:0",
|
||||||
|
"cohere.command-text-v14",
|
||||||
|
],
|
||||||
|
)
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_completion_bedrock_command_r(sync_mode):
|
async def test_completion_bedrock_httpx_models(sync_mode, model):
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
|
||||||
if sync_mode:
|
if sync_mode:
|
||||||
response = completion(
|
response = completion(
|
||||||
model="bedrock/cohere.command-r-plus-v1:0",
|
model=model,
|
||||||
messages=[{"role": "user", "content": "Hey! how's it going?"}],
|
messages=[{"role": "user", "content": "Hey! how's it going?"}],
|
||||||
|
temperature=0.2,
|
||||||
|
max_tokens=200,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert isinstance(response, litellm.ModelResponse)
|
assert isinstance(response, litellm.ModelResponse)
|
||||||
|
@ -2678,8 +2695,10 @@ async def test_completion_bedrock_command_r(sync_mode):
|
||||||
response_format_tests(response=response)
|
response_format_tests(response=response)
|
||||||
else:
|
else:
|
||||||
response = await litellm.acompletion(
|
response = await litellm.acompletion(
|
||||||
model="bedrock/cohere.command-r-plus-v1:0",
|
model=model,
|
||||||
messages=[{"role": "user", "content": "Hey! how's it going?"}],
|
messages=[{"role": "user", "content": "Hey! how's it going?"}],
|
||||||
|
temperature=0.2,
|
||||||
|
max_tokens=100,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert isinstance(response, litellm.ModelResponse)
|
assert isinstance(response, litellm.ModelResponse)
|
||||||
|
@ -2715,69 +2734,12 @@ def test_completion_bedrock_titan_null_response():
|
||||||
pytest.fail(f"An error occurred - {str(e)}")
|
pytest.fail(f"An error occurred - {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
def test_completion_bedrock_titan():
|
|
||||||
try:
|
|
||||||
response = completion(
|
|
||||||
model="bedrock/amazon.titan-tg1-large",
|
|
||||||
messages=messages,
|
|
||||||
temperature=0.2,
|
|
||||||
max_tokens=200,
|
|
||||||
top_p=0.8,
|
|
||||||
logger_fn=logger_fn,
|
|
||||||
)
|
|
||||||
# Add any assertions here to check the response
|
|
||||||
print(response)
|
|
||||||
except RateLimitError:
|
|
||||||
pass
|
|
||||||
except Exception as e:
|
|
||||||
pytest.fail(f"Error occurred: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
# test_completion_bedrock_titan()
|
# test_completion_bedrock_titan()
|
||||||
|
|
||||||
|
|
||||||
def test_completion_bedrock_claude():
|
|
||||||
print("calling claude")
|
|
||||||
try:
|
|
||||||
response = completion(
|
|
||||||
model="anthropic.claude-instant-v1",
|
|
||||||
messages=messages,
|
|
||||||
max_tokens=10,
|
|
||||||
temperature=0.1,
|
|
||||||
logger_fn=logger_fn,
|
|
||||||
)
|
|
||||||
# Add any assertions here to check the response
|
|
||||||
print(response)
|
|
||||||
except RateLimitError:
|
|
||||||
pass
|
|
||||||
except Exception as e:
|
|
||||||
pytest.fail(f"Error occurred: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
# test_completion_bedrock_claude()
|
# test_completion_bedrock_claude()
|
||||||
|
|
||||||
|
|
||||||
def test_completion_bedrock_cohere():
|
|
||||||
print("calling bedrock cohere")
|
|
||||||
litellm.set_verbose = True
|
|
||||||
try:
|
|
||||||
response = completion(
|
|
||||||
model="bedrock/cohere.command-text-v14",
|
|
||||||
messages=[{"role": "user", "content": "hi"}],
|
|
||||||
temperature=0.1,
|
|
||||||
max_tokens=10,
|
|
||||||
stream=True,
|
|
||||||
)
|
|
||||||
# Add any assertions here to check the response
|
|
||||||
print(response)
|
|
||||||
for chunk in response:
|
|
||||||
print(chunk)
|
|
||||||
except RateLimitError:
|
|
||||||
pass
|
|
||||||
except Exception as e:
|
|
||||||
pytest.fail(f"Error occurred: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
# test_completion_bedrock_cohere()
|
# test_completion_bedrock_cohere()
|
||||||
|
|
||||||
|
|
||||||
|
@ -2800,23 +2762,6 @@ def test_completion_bedrock_cohere():
|
||||||
# pytest.fail(f"Error occurred: {e}")
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
# test_completion_bedrock_claude_stream()
|
# test_completion_bedrock_claude_stream()
|
||||||
|
|
||||||
# def test_completion_bedrock_ai21():
|
|
||||||
# try:
|
|
||||||
# litellm.set_verbose = False
|
|
||||||
# response = completion(
|
|
||||||
# model="bedrock/ai21.j2-mid",
|
|
||||||
# messages=messages,
|
|
||||||
# temperature=0.2,
|
|
||||||
# top_p=0.2,
|
|
||||||
# max_tokens=20
|
|
||||||
# )
|
|
||||||
# # Add any assertions here to check the response
|
|
||||||
# print(response)
|
|
||||||
# except RateLimitError:
|
|
||||||
# pass
|
|
||||||
# except Exception as e:
|
|
||||||
# pytest.fail(f"Error occurred: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
######## Test VLLM ########
|
######## Test VLLM ########
|
||||||
# def test_completion_vllm():
|
# def test_completion_vllm():
|
||||||
|
|
|
@ -558,7 +558,7 @@ async def test_async_chat_bedrock_stream():
|
||||||
continue
|
continue
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
time.sleep(1)
|
await asyncio.sleep(1)
|
||||||
print(f"customHandler.errors: {customHandler.errors}")
|
print(f"customHandler.errors: {customHandler.errors}")
|
||||||
assert len(customHandler.errors) == 0
|
assert len(customHandler.errors) == 0
|
||||||
litellm.callbacks = []
|
litellm.callbacks = []
|
||||||
|
|
|
@ -1041,14 +1041,27 @@ async def test_completion_replicate_llama3_streaming(sync_mode):
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"model",
|
||||||
|
[
|
||||||
|
# "bedrock/cohere.command-r-plus-v1:0",
|
||||||
|
# "anthropic.claude-3-sonnet-20240229-v1:0",
|
||||||
|
# "anthropic.claude-instant-v1",
|
||||||
|
# "bedrock/ai21.j2-mid",
|
||||||
|
# "mistral.mistral-7b-instruct-v0:2",
|
||||||
|
# "bedrock/amazon.titan-tg1-large",
|
||||||
|
# "meta.llama3-8b-instruct-v1:0",
|
||||||
|
"cohere.command-text-v14"
|
||||||
|
],
|
||||||
|
)
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_bedrock_cohere_command_r_streaming(sync_mode):
|
async def test_bedrock_httpx_streaming(sync_mode, model):
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
if sync_mode:
|
if sync_mode:
|
||||||
final_chunk: Optional[litellm.ModelResponse] = None
|
final_chunk: Optional[litellm.ModelResponse] = None
|
||||||
response: litellm.CustomStreamWrapper = completion( # type: ignore
|
response: litellm.CustomStreamWrapper = completion( # type: ignore
|
||||||
model="bedrock/cohere.command-r-plus-v1:0",
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=10, # type: ignore
|
max_tokens=10, # type: ignore
|
||||||
stream=True,
|
stream=True,
|
||||||
|
@ -1069,7 +1082,7 @@ async def test_bedrock_cohere_command_r_streaming(sync_mode):
|
||||||
raise Exception("Empty response received")
|
raise Exception("Empty response received")
|
||||||
else:
|
else:
|
||||||
response: litellm.CustomStreamWrapper = await litellm.acompletion( # type: ignore
|
response: litellm.CustomStreamWrapper = await litellm.acompletion( # type: ignore
|
||||||
model="bedrock/cohere.command-r-plus-v1:0",
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=100, # type: ignore
|
max_tokens=100, # type: ignore
|
||||||
stream=True,
|
stream=True,
|
||||||
|
|
|
@ -76,6 +76,9 @@ class ModelInfo(BaseModel):
|
||||||
db_model: bool = (
|
db_model: bool = (
|
||||||
False # used for proxy - to separate models which are stored in the db vs. config.
|
False # used for proxy - to separate models which are stored in the db vs. config.
|
||||||
)
|
)
|
||||||
|
base_model: Optional[str] = (
|
||||||
|
None # specify if the base model is azure/gpt-3.5-turbo etc for accurate cost tracking
|
||||||
|
)
|
||||||
|
|
||||||
def __init__(self, id: Optional[Union[str, int]] = None, **params):
|
def __init__(self, id: Optional[Union[str, int]] = None, **params):
|
||||||
if id is None:
|
if id is None:
|
||||||
|
|
136
litellm/utils.py
136
litellm/utils.py
|
@ -3853,7 +3853,7 @@ def get_replicate_completion_pricing(completion_response=None, total_time=0.0):
|
||||||
)
|
)
|
||||||
if total_time == 0.0: # total time is in ms
|
if total_time == 0.0: # total time is in ms
|
||||||
start_time = completion_response["created"]
|
start_time = completion_response["created"]
|
||||||
end_time = completion_response["ended"]
|
end_time = getattr(completion_response, "ended", time.time())
|
||||||
total_time = end_time - start_time
|
total_time = end_time - start_time
|
||||||
|
|
||||||
return a100_80gb_price_per_second_public * total_time / 1000
|
return a100_80gb_price_per_second_public * total_time / 1000
|
||||||
|
@ -8676,7 +8676,7 @@ def exception_type(
|
||||||
llm_provider="bedrock",
|
llm_provider="bedrock",
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
)
|
)
|
||||||
if "Malformed input request" in error_str:
|
elif "Malformed input request" in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise BadRequestError(
|
raise BadRequestError(
|
||||||
message=f"BedrockException - {error_str}",
|
message=f"BedrockException - {error_str}",
|
||||||
|
@ -8684,7 +8684,7 @@ def exception_type(
|
||||||
llm_provider="bedrock",
|
llm_provider="bedrock",
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
)
|
)
|
||||||
if (
|
elif (
|
||||||
"Unable to locate credentials" in error_str
|
"Unable to locate credentials" in error_str
|
||||||
or "The security token included in the request is invalid"
|
or "The security token included in the request is invalid"
|
||||||
in error_str
|
in error_str
|
||||||
|
@ -8696,7 +8696,7 @@ def exception_type(
|
||||||
llm_provider="bedrock",
|
llm_provider="bedrock",
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
)
|
)
|
||||||
if "AccessDeniedException" in error_str:
|
elif "AccessDeniedException" in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise PermissionDeniedError(
|
raise PermissionDeniedError(
|
||||||
message=f"BedrockException PermissionDeniedError - {error_str}",
|
message=f"BedrockException PermissionDeniedError - {error_str}",
|
||||||
|
@ -8704,7 +8704,7 @@ def exception_type(
|
||||||
llm_provider="bedrock",
|
llm_provider="bedrock",
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
)
|
)
|
||||||
if (
|
elif (
|
||||||
"throttlingException" in error_str
|
"throttlingException" in error_str
|
||||||
or "ThrottlingException" in error_str
|
or "ThrottlingException" in error_str
|
||||||
):
|
):
|
||||||
|
@ -8715,14 +8715,17 @@ def exception_type(
|
||||||
llm_provider="bedrock",
|
llm_provider="bedrock",
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
)
|
)
|
||||||
if "Connect timeout on endpoint URL" in error_str:
|
elif (
|
||||||
|
"Connect timeout on endpoint URL" in error_str
|
||||||
|
or "timed out" in error_str
|
||||||
|
):
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise Timeout(
|
raise Timeout(
|
||||||
message=f"BedrockException: Timeout Error - {error_str}",
|
message=f"BedrockException: Timeout Error - {error_str}",
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider="bedrock",
|
llm_provider="bedrock",
|
||||||
)
|
)
|
||||||
if hasattr(original_exception, "status_code"):
|
elif hasattr(original_exception, "status_code"):
|
||||||
if original_exception.status_code == 500:
|
if original_exception.status_code == 500:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise ServiceUnavailableError(
|
raise ServiceUnavailableError(
|
||||||
|
@ -8760,6 +8763,49 @@ def exception_type(
|
||||||
model=model,
|
model=model,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
)
|
)
|
||||||
|
elif original_exception.status_code == 408:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise Timeout(
|
||||||
|
message=f"BedrockException - {original_exception.message}",
|
||||||
|
model=model,
|
||||||
|
llm_provider=custom_llm_provider,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
|
)
|
||||||
|
elif original_exception.status_code == 422:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise BadRequestError(
|
||||||
|
message=f"BedrockException - {original_exception.message}",
|
||||||
|
model=model,
|
||||||
|
llm_provider=custom_llm_provider,
|
||||||
|
response=original_exception.response,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
|
)
|
||||||
|
elif original_exception.status_code == 429:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise RateLimitError(
|
||||||
|
message=f"BedrockException - {original_exception.message}",
|
||||||
|
model=model,
|
||||||
|
llm_provider=custom_llm_provider,
|
||||||
|
response=original_exception.response,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
|
)
|
||||||
|
elif original_exception.status_code == 503:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise ServiceUnavailableError(
|
||||||
|
message=f"BedrockException - {original_exception.message}",
|
||||||
|
model=model,
|
||||||
|
llm_provider=custom_llm_provider,
|
||||||
|
response=original_exception.response,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
|
)
|
||||||
|
elif original_exception.status_code == 504: # gateway timeout error
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise Timeout(
|
||||||
|
message=f"BedrockException - {original_exception.message}",
|
||||||
|
model=model,
|
||||||
|
llm_provider=custom_llm_provider,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
|
)
|
||||||
elif custom_llm_provider == "sagemaker":
|
elif custom_llm_provider == "sagemaker":
|
||||||
if "Unable to locate credentials" in error_str:
|
if "Unable to locate credentials" in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
@ -10639,75 +10685,11 @@ class CustomStreamWrapper:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
def handle_bedrock_stream(self, chunk):
|
def handle_bedrock_stream(self, chunk):
|
||||||
if "cohere" in self.model:
|
return {
|
||||||
return {
|
"text": chunk["text"],
|
||||||
"text": chunk["text"],
|
"is_finished": chunk["is_finished"],
|
||||||
"is_finished": chunk["is_finished"],
|
"finish_reason": chunk["finish_reason"],
|
||||||
"finish_reason": chunk["finish_reason"],
|
}
|
||||||
}
|
|
||||||
if hasattr(chunk, "get"):
|
|
||||||
chunk = chunk.get("chunk")
|
|
||||||
chunk_data = json.loads(chunk.get("bytes").decode())
|
|
||||||
else:
|
|
||||||
chunk_data = json.loads(chunk.decode())
|
|
||||||
if chunk_data:
|
|
||||||
text = ""
|
|
||||||
is_finished = False
|
|
||||||
finish_reason = ""
|
|
||||||
if "outputText" in chunk_data:
|
|
||||||
text = chunk_data["outputText"]
|
|
||||||
# ai21 mapping
|
|
||||||
if "ai21" in self.model: # fake ai21 streaming
|
|
||||||
text = chunk_data.get("completions")[0].get("data").get("text")
|
|
||||||
is_finished = True
|
|
||||||
finish_reason = "stop"
|
|
||||||
######## bedrock.anthropic mappings ###############
|
|
||||||
elif "completion" in chunk_data: # not claude-3
|
|
||||||
text = chunk_data["completion"] # bedrock.anthropic
|
|
||||||
stop_reason = chunk_data.get("stop_reason", None)
|
|
||||||
if stop_reason != None:
|
|
||||||
is_finished = True
|
|
||||||
finish_reason = stop_reason
|
|
||||||
elif "delta" in chunk_data:
|
|
||||||
if chunk_data["delta"].get("text", None) is not None:
|
|
||||||
text = chunk_data["delta"]["text"]
|
|
||||||
stop_reason = chunk_data["delta"].get("stop_reason", None)
|
|
||||||
if stop_reason != None:
|
|
||||||
is_finished = True
|
|
||||||
finish_reason = stop_reason
|
|
||||||
######## bedrock.mistral mappings ###############
|
|
||||||
elif "outputs" in chunk_data:
|
|
||||||
if (
|
|
||||||
len(chunk_data["outputs"]) == 1
|
|
||||||
and chunk_data["outputs"][0].get("text", None) is not None
|
|
||||||
):
|
|
||||||
text = chunk_data["outputs"][0]["text"]
|
|
||||||
stop_reason = chunk_data.get("stop_reason", None)
|
|
||||||
if stop_reason != None:
|
|
||||||
is_finished = True
|
|
||||||
finish_reason = stop_reason
|
|
||||||
######## bedrock.cohere mappings ###############
|
|
||||||
# meta mapping
|
|
||||||
elif "generation" in chunk_data:
|
|
||||||
text = chunk_data["generation"] # bedrock.meta
|
|
||||||
# cohere mapping
|
|
||||||
elif "text" in chunk_data:
|
|
||||||
text = chunk_data["text"] # bedrock.cohere
|
|
||||||
# cohere mapping for finish reason
|
|
||||||
elif "finish_reason" in chunk_data:
|
|
||||||
finish_reason = chunk_data["finish_reason"]
|
|
||||||
is_finished = True
|
|
||||||
elif chunk_data.get("completionReason", None):
|
|
||||||
is_finished = True
|
|
||||||
finish_reason = chunk_data["completionReason"]
|
|
||||||
elif chunk.get("error", None):
|
|
||||||
raise Exception(chunk["error"])
|
|
||||||
return {
|
|
||||||
"text": text,
|
|
||||||
"is_finished": is_finished,
|
|
||||||
"finish_reason": finish_reason,
|
|
||||||
}
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def handle_sagemaker_stream(self, chunk):
|
def handle_sagemaker_stream(self, chunk):
|
||||||
if "data: [DONE]" in chunk:
|
if "data: [DONE]" in chunk:
|
||||||
|
@ -11510,7 +11492,7 @@ class CustomStreamWrapper:
|
||||||
or self.custom_llm_provider == "replicate"
|
or self.custom_llm_provider == "replicate"
|
||||||
or self.custom_llm_provider == "cached_response"
|
or self.custom_llm_provider == "cached_response"
|
||||||
or self.custom_llm_provider == "predibase"
|
or self.custom_llm_provider == "predibase"
|
||||||
or (self.custom_llm_provider == "bedrock" and "cohere" in self.model)
|
or self.custom_llm_provider == "bedrock"
|
||||||
or self.custom_llm_provider in litellm.openai_compatible_endpoints
|
or self.custom_llm_provider in litellm.openai_compatible_endpoints
|
||||||
):
|
):
|
||||||
async for chunk in self.completion_stream:
|
async for chunk in self.completion_stream:
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.37.15"
|
version = "1.37.16"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -79,7 +79,7 @@ requires = ["poetry-core", "wheel"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "1.37.15"
|
version = "1.37.16"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
|
@ -129,7 +129,7 @@ async def test_check_num_callbacks():
|
||||||
set(all_litellm_callbacks_1) - set(all_litellm_callbacks_2),
|
set(all_litellm_callbacks_1) - set(all_litellm_callbacks_2),
|
||||||
)
|
)
|
||||||
|
|
||||||
assert num_callbacks_1 == num_callbacks_2
|
assert abs(num_callbacks_1 - num_callbacks_2) <= 4
|
||||||
|
|
||||||
await asyncio.sleep(30)
|
await asyncio.sleep(30)
|
||||||
|
|
||||||
|
@ -142,7 +142,7 @@ async def test_check_num_callbacks():
|
||||||
set(all_litellm_callbacks_3) - set(all_litellm_callbacks_2),
|
set(all_litellm_callbacks_3) - set(all_litellm_callbacks_2),
|
||||||
)
|
)
|
||||||
|
|
||||||
assert num_callbacks_1 == num_callbacks_2 == num_callbacks_3
|
assert abs(num_callbacks_3 - num_callbacks_2) <= 4
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
@ -183,7 +183,7 @@ async def test_check_num_callbacks_on_lowest_latency():
|
||||||
set(all_litellm_callbacks_2) - set(all_litellm_callbacks_1),
|
set(all_litellm_callbacks_2) - set(all_litellm_callbacks_1),
|
||||||
)
|
)
|
||||||
|
|
||||||
assert num_callbacks_1 == num_callbacks_2
|
assert abs(num_callbacks_1 - num_callbacks_2) <= 4
|
||||||
|
|
||||||
await asyncio.sleep(30)
|
await asyncio.sleep(30)
|
||||||
|
|
||||||
|
@ -196,7 +196,7 @@ async def test_check_num_callbacks_on_lowest_latency():
|
||||||
set(all_litellm_callbacks_3) - set(all_litellm_callbacks_2),
|
set(all_litellm_callbacks_3) - set(all_litellm_callbacks_2),
|
||||||
)
|
)
|
||||||
|
|
||||||
assert num_callbacks_1 == num_callbacks_2 == num_callbacks_3
|
assert abs(num_callbacks_2 - num_callbacks_3) <= 4
|
||||||
|
|
||||||
assert num_alerts_1 == num_alerts_2 == num_alerts_3
|
assert num_alerts_1 == num_alerts_2 == num_alerts_3
|
||||||
|
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f04e46b02318b660.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[4858,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"884\",\"static/chunks/884-7576ee407a2ecbe6.js\",\"931\",\"static/chunks/app/page-495003b4fc3648e1.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f04e46b02318b660.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"jE-EC3LDs6Y8P0wmind3t\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f04e46b02318b660.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[4858,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"884\",\"static/chunks/884-7576ee407a2ecbe6.js\",\"931\",\"static/chunks/app/page-f20fdea77aed85ba.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f04e46b02318b660.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"l-0LDfSCdaUCAbcLIx_QC\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[4858,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","884","static/chunks/884-7576ee407a2ecbe6.js","931","static/chunks/app/page-495003b4fc3648e1.js"],""]
|
3:I[4858,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","884","static/chunks/884-7576ee407a2ecbe6.js","931","static/chunks/app/page-f20fdea77aed85ba.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["jE-EC3LDs6Y8P0wmind3t",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f04e46b02318b660.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["l-0LDfSCdaUCAbcLIx_QC",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f04e46b02318b660.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -121,6 +121,7 @@ const handleSubmit = async (formValues: Record<string, any>, accessToken: string
|
||||||
// Iterate through the key-value pairs in formValues
|
// Iterate through the key-value pairs in formValues
|
||||||
litellmParamsObj["model"] = litellm_model
|
litellmParamsObj["model"] = litellm_model
|
||||||
let modelName: string = "";
|
let modelName: string = "";
|
||||||
|
console.log("formValues add deployment:", formValues);
|
||||||
for (const [key, value] of Object.entries(formValues)) {
|
for (const [key, value] of Object.entries(formValues)) {
|
||||||
if (value === '') {
|
if (value === '') {
|
||||||
continue;
|
continue;
|
||||||
|
@ -628,6 +629,7 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
|
||||||
let input_cost = "Undefined";
|
let input_cost = "Undefined";
|
||||||
let output_cost = "Undefined";
|
let output_cost = "Undefined";
|
||||||
let max_tokens = "Undefined";
|
let max_tokens = "Undefined";
|
||||||
|
let max_input_tokens = "Undefined";
|
||||||
let cleanedLitellmParams = {};
|
let cleanedLitellmParams = {};
|
||||||
|
|
||||||
const getProviderFromModel = (model: string) => {
|
const getProviderFromModel = (model: string) => {
|
||||||
|
@ -664,6 +666,7 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
|
||||||
input_cost = model_info?.input_cost_per_token;
|
input_cost = model_info?.input_cost_per_token;
|
||||||
output_cost = model_info?.output_cost_per_token;
|
output_cost = model_info?.output_cost_per_token;
|
||||||
max_tokens = model_info?.max_tokens;
|
max_tokens = model_info?.max_tokens;
|
||||||
|
max_input_tokens = model_info?.max_input_tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (curr_model?.litellm_params) {
|
if (curr_model?.litellm_params) {
|
||||||
|
@ -689,6 +692,7 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
|
||||||
}
|
}
|
||||||
|
|
||||||
modelData.data[i].max_tokens = max_tokens;
|
modelData.data[i].max_tokens = max_tokens;
|
||||||
|
modelData.data[i].max_input_tokens = max_input_tokens;
|
||||||
modelData.data[i].api_base = curr_model?.litellm_params?.api_base;
|
modelData.data[i].api_base = curr_model?.litellm_params?.api_base;
|
||||||
modelData.data[i].cleanedLitellmParams = cleanedLitellmParams;
|
modelData.data[i].cleanedLitellmParams = cleanedLitellmParams;
|
||||||
|
|
||||||
|
@ -936,7 +940,7 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
|
||||||
<TableHeaderCell style={{ maxWidth: '200px', whiteSpace: 'normal', wordBreak: 'break-word' }}>Extra litellm Params</TableHeaderCell>
|
<TableHeaderCell style={{ maxWidth: '200px', whiteSpace: 'normal', wordBreak: 'break-word' }}>Extra litellm Params</TableHeaderCell>
|
||||||
<TableHeaderCell style={{ maxWidth: '85px', whiteSpace: 'normal', wordBreak: 'break-word' }}>Input Price <p style={{ fontSize: '10px', color: 'gray' }}>/1M Tokens ($)</p></TableHeaderCell>
|
<TableHeaderCell style={{ maxWidth: '85px', whiteSpace: 'normal', wordBreak: 'break-word' }}>Input Price <p style={{ fontSize: '10px', color: 'gray' }}>/1M Tokens ($)</p></TableHeaderCell>
|
||||||
<TableHeaderCell style={{ maxWidth: '85px', whiteSpace: 'normal', wordBreak: 'break-word' }}>Output Price <p style={{ fontSize: '10px', color: 'gray' }}>/1M Tokens ($)</p></TableHeaderCell>
|
<TableHeaderCell style={{ maxWidth: '85px', whiteSpace: 'normal', wordBreak: 'break-word' }}>Output Price <p style={{ fontSize: '10px', color: 'gray' }}>/1M Tokens ($)</p></TableHeaderCell>
|
||||||
<TableHeaderCell style={{ maxWidth: '85px', whiteSpace: 'normal', wordBreak: 'break-word' }}>Max Tokens</TableHeaderCell>
|
<TableHeaderCell style={{ maxWidth: '120px', whiteSpace: 'normal', wordBreak: 'break-word' }}>Max Tokens</TableHeaderCell>
|
||||||
<TableHeaderCell style={{ maxWidth: '50px', whiteSpace: 'normal', wordBreak: 'break-word' }}>Status</TableHeaderCell>
|
<TableHeaderCell style={{ maxWidth: '50px', whiteSpace: 'normal', wordBreak: 'break-word' }}>Status</TableHeaderCell>
|
||||||
</TableRow>
|
</TableRow>
|
||||||
</TableHead>
|
</TableHead>
|
||||||
|
@ -970,7 +974,12 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
|
||||||
</TableCell>
|
</TableCell>
|
||||||
<TableCell style={{ maxWidth: '80px', whiteSpace: 'normal', wordBreak: 'break-word' }}>{model.input_cost || model.litellm_params.input_cost_per_token || null}</TableCell>
|
<TableCell style={{ maxWidth: '80px', whiteSpace: 'normal', wordBreak: 'break-word' }}>{model.input_cost || model.litellm_params.input_cost_per_token || null}</TableCell>
|
||||||
<TableCell style={{ maxWidth: '80px', whiteSpace: 'normal', wordBreak: 'break-word' }}>{model.output_cost || model.litellm_params.output_cost_per_token || null}</TableCell>
|
<TableCell style={{ maxWidth: '80px', whiteSpace: 'normal', wordBreak: 'break-word' }}>{model.output_cost || model.litellm_params.output_cost_per_token || null}</TableCell>
|
||||||
<TableCell style={{ maxWidth: '100px', whiteSpace: 'normal', wordBreak: 'break-word' }}>{model.max_tokens}</TableCell>
|
<TableCell style={{ maxWidth: '120px', whiteSpace: 'normal', wordBreak: 'break-word' }}>
|
||||||
|
<p style={{ fontSize: '10px' }}>
|
||||||
|
Max Tokens: {model.max_tokens} <br></br>
|
||||||
|
Max Input Tokens: {model.max_input_tokens}
|
||||||
|
</p>
|
||||||
|
</TableCell>
|
||||||
<TableCell style={{ maxWidth: '100px', whiteSpace: 'normal', wordBreak: 'break-word' }}>
|
<TableCell style={{ maxWidth: '100px', whiteSpace: 'normal', wordBreak: 'break-word' }}>
|
||||||
{model.model_info.db_model ? (
|
{model.model_info.db_model ? (
|
||||||
<Badge icon={CheckCircleIcon} size="xs" className="text-white">
|
<Badge icon={CheckCircleIcon} size="xs" className="text-white">
|
||||||
|
@ -1114,13 +1123,22 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
selectedProvider == Providers.Azure && <Form.Item
|
selectedProvider == Providers.Azure &&
|
||||||
label="Base Model"
|
|
||||||
name="base_model"
|
<div>
|
||||||
>
|
<Form.Item
|
||||||
<TextInput placeholder="azure/gpt-3.5-turbo"/>
|
label="Base Model"
|
||||||
<Text>The actual model your azure deployment uses. Used for accurate cost tracking. Select name from <Link href="https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json" target="_blank">here</Link></Text>
|
name="base_model"
|
||||||
</Form.Item>
|
className="mb-0"
|
||||||
|
>
|
||||||
|
<TextInput placeholder="azure/gpt-3.5-turbo"/>
|
||||||
|
</Form.Item>
|
||||||
|
<Row>
|
||||||
|
<Col span={10}></Col>
|
||||||
|
<Col span={10}><Text className="mb-2">The actual model your azure deployment uses. Used for accurate cost tracking. Select name from <Link href="https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json" target="_blank">here</Link></Text></Col>
|
||||||
|
</Row>
|
||||||
|
|
||||||
|
</div>
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
selectedProvider == Providers.Bedrock && <Form.Item
|
selectedProvider == Providers.Bedrock && <Form.Item
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue