LiteLLM Minor Fixes and Improvements (09/10/2024) (#5618)

* fix(cost_calculator.py): move to debug for noisy warning message on cost calculation error

Fixes https://github.com/BerriAI/litellm/issues/5610

* fix(databricks/cost_calculator.py): Handles model name issues for databricks models

* fix(main.py): fix stream chunk builder for multiple tool calls

Fixes https://github.com/BerriAI/litellm/issues/5591

* fix: correctly set user_alias when passed in

Fixes https://github.com/BerriAI/litellm/issues/5612

* fix(types/utils.py): allow passing role for message object

https://github.com/BerriAI/litellm/issues/5621

* fix(litellm_logging.py): Fix langfuse logging across multiple projects

Fixes issue where langfuse logger was re-using the old logging object

* feat(proxy/_types.py): support adding key-based tags for tag-based routing

Enable tag based routing at key-level

* fix(proxy/_types.py): fix inheritance

* test(test_key_generate_prisma.py): fix test

* test: fix test

* fix(litellm_logging.py): return used callback object
This commit is contained in:
Krish Dholakia 2024-09-11 11:30:29 -07:00 committed by GitHub
parent d6e0d5d234
commit 7f47c48b35
15 changed files with 673 additions and 96 deletions

View file

@ -829,18 +829,11 @@ def response_cost_calculator(
) )
return None return None
except Exception as e: except Exception as e:
if litellm.suppress_debug_info: # allow cli tools to suppress this information. verbose_logger.debug(
verbose_logger.debug( "litellm.cost_calculator.py::response_cost_calculator - Returning None. Exception occurred - {}/n{}".format(
"litellm.cost_calculator.py::response_cost_calculator - Returning None. Exception occurred - {}/n{}".format( str(e), traceback.format_exc()
str(e), traceback.format_exc()
)
)
else:
verbose_logger.warning(
"litellm.cost_calculator.py::response_cost_calculator - Returning None. Exception occurred - {}/n{}".format(
str(e), traceback.format_exc()
)
) )
)
return None return None

View file

@ -269,10 +269,11 @@ class SlackAlerting(CustomLogger):
break break
await asyncio.sleep(3) # wait 3s before retrying for trace id await asyncio.sleep(3) # wait 3s before retrying for trace id
if litellm.litellm_core_utils.litellm_logging.langFuseLogger is not None: _langfuse_object = litellm_logging_obj._get_callback_object(
base_url = ( service_name="langfuse"
litellm.litellm_core_utils.litellm_logging.langFuseLogger.Langfuse.base_url )
) if _langfuse_object is not None:
base_url = _langfuse_object.Langfuse.base_url
return f"{base_url}/trace/{trace_id}" return f"{base_url}/trace/{trace_id}"
return None return None

View file

@ -924,6 +924,7 @@ class Logging:
else: else:
print_verbose("reaches langfuse for streaming logging!") print_verbose("reaches langfuse for streaming logging!")
result = kwargs["complete_streaming_response"] result = kwargs["complete_streaming_response"]
temp_langfuse_logger = langFuseLogger
if langFuseLogger is None or ( if langFuseLogger is None or (
( (
self.langfuse_public_key is not None self.langfuse_public_key is not None
@ -940,12 +941,12 @@ class Logging:
and self.langfuse_host != langFuseLogger.langfuse_host and self.langfuse_host != langFuseLogger.langfuse_host
) )
): ):
langFuseLogger = LangFuseLogger( temp_langfuse_logger = LangFuseLogger(
langfuse_public_key=self.langfuse_public_key, langfuse_public_key=self.langfuse_public_key,
langfuse_secret=self.langfuse_secret, langfuse_secret=self.langfuse_secret,
langfuse_host=self.langfuse_host, langfuse_host=self.langfuse_host,
) )
_response = langFuseLogger.log_event( _response = temp_langfuse_logger.log_event(
kwargs=kwargs, kwargs=kwargs,
response_obj=result, response_obj=result,
start_time=start_time, start_time=start_time,
@ -1925,6 +1926,38 @@ class Logging:
return trace_id return trace_id
def _get_callback_object(self, service_name: Literal["langfuse"]) -> Optional[Any]:
"""
Return dynamic callback object.
Meant to solve issue when doing key-based/team-based logging
"""
global langFuseLogger
if service_name == "langfuse":
if langFuseLogger is None or (
(
self.langfuse_public_key is not None
and self.langfuse_public_key != langFuseLogger.public_key
)
or (
self.langfuse_public_key is not None
and self.langfuse_public_key != langFuseLogger.public_key
)
or (
self.langfuse_host is not None
and self.langfuse_host != langFuseLogger.langfuse_host
)
):
return LangFuseLogger(
langfuse_public_key=self.langfuse_public_key,
langfuse_secret=self.langfuse_secret,
langfuse_host=self.langfuse_host,
)
return langFuseLogger
return None
def set_callbacks(callback_list, function_id=None): def set_callbacks(callback_list, function_id=None):
""" """

View file

@ -25,7 +25,30 @@ def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
"dbrx-instruct" "dbrx-instruct"
): ):
base_model = "databricks-dbrx-instruct" base_model = "databricks-dbrx-instruct"
elif model.startswith("databricks/meta-llama-3.1-70b-instruct") or model.startswith(
"meta-llama-3.1-70b-instruct"
):
base_model = "databricks-meta-llama-3-1-70b-instruct"
elif model.startswith(
"databricks/meta-llama-3.1-405b-instruct"
) or model.startswith("meta-llama-3.1-405b-instruct"):
base_model = "databricks-meta-llama-3-1-405b-instruct"
elif model.startswith("databricks/mixtral-8x7b-instruct-v0.1") or model.startswith(
"mixtral-8x7b-instruct-v0.1"
):
base_model = "databricks-mixtral-8x7b-instruct"
elif model.startswith("databricks/mixtral-8x7b-instruct-v0.1") or model.startswith(
"mixtral-8x7b-instruct-v0.1"
):
base_model = "databricks-mixtral-8x7b-instruct"
elif model.startswith("databricks/bge-large-en") or model.startswith(
"bge-large-en"
):
base_model = "databricks-bge-large-en"
elif model.startswith("databricks/gte-large-en") or model.startswith(
"gte-large-en"
):
base_model = "databricks-gte-large-en"
## GET MODEL INFO ## GET MODEL INFO
model_info = get_model_info(model=base_model, custom_llm_provider="databricks") model_info = get_model_info(model=base_model, custom_llm_provider="databricks")

View file

@ -5310,7 +5310,7 @@ def stream_chunk_builder(
] ]
if len(tool_call_chunks) > 0: if len(tool_call_chunks) > 0:
argument_list = [] argument_list: List = []
delta = tool_call_chunks[0]["choices"][0]["delta"] delta = tool_call_chunks[0]["choices"][0]["delta"]
message = response["choices"][0]["message"] message = response["choices"][0]["message"]
message["tool_calls"] = [] message["tool_calls"] = []
@ -5319,6 +5319,7 @@ def stream_chunk_builder(
type = None type = None
tool_calls_list = [] tool_calls_list = []
prev_index = None prev_index = None
prev_name = None
prev_id = None prev_id = None
curr_id = None curr_id = None
curr_index = 0 curr_index = 0
@ -5346,27 +5347,32 @@ def stream_chunk_builder(
type = tool_calls[0].type type = tool_calls[0].type
if prev_index is None: if prev_index is None:
prev_index = curr_index prev_index = curr_index
if prev_name is None:
prev_name = name
if curr_index != prev_index: # new tool call if curr_index != prev_index: # new tool call
combined_arguments = "".join(argument_list) combined_arguments = "".join(argument_list)
tool_calls_list.append( tool_calls_list.append(
{ {
"id": prev_id, "id": prev_id,
"index": prev_index, "function": {
"function": {"arguments": combined_arguments, "name": name}, "arguments": combined_arguments,
"name": prev_name,
},
"type": type, "type": type,
} }
) )
argument_list = [] # reset argument_list = [] # reset
prev_index = curr_index prev_index = curr_index
prev_id = curr_id prev_id = curr_id
prev_name = name
combined_arguments = ( combined_arguments = (
"".join(argument_list) or "{}" "".join(argument_list) or "{}"
) # base case, return empty dict ) # base case, return empty dict
tool_calls_list.append( tool_calls_list.append(
{ {
"id": id, "id": id,
"index": curr_index,
"function": {"arguments": combined_arguments, "name": name}, "function": {"arguments": combined_arguments, "name": name},
"type": type, "type": type,
} }
@ -5422,7 +5428,7 @@ def stream_chunk_builder(
for choice in choices: for choice in choices:
delta = choice.get("delta", {}) delta = choice.get("delta", {})
content = delta.get("content", "") content = delta.get("content", "")
if content == None: if content is None:
continue # openai v1.0.0 sets content = None for chunks continue # openai v1.0.0 sets content = None for chunks
content_list.append(content) content_list.append(content)

View file

@ -5461,90 +5461,129 @@
"max_input_tokens": 128000, "max_input_tokens": 128000,
"max_output_tokens": 128000, "max_output_tokens": 128000,
"input_cost_per_token": 0.000005, "input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000015, "input_dbu_cost_per_token": 0.000071429,
"output_cost_per_token": 0.00001500002,
"output_db_cost_per_token": 0.000214286,
"litellm_provider": "databricks", "litellm_provider": "databricks",
"mode": "chat", "mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving" "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
}, },
"databricks/databricks-meta-llama-3-1-70b-instruct": { "databricks/databricks-meta-llama-3-1-70b-instruct": {
"max_tokens": 128000, "max_tokens": 128000,
"max_input_tokens": 128000, "max_input_tokens": 128000,
"max_output_tokens": 128000, "max_output_tokens": 128000,
"input_cost_per_token": 0.000001, "input_cost_per_token": 0.00000100002,
"output_cost_per_token": 0.000003, "input_dbu_cost_per_token": 0.000014286,
"output_cost_per_token": 0.00000299999,
"output_dbu_cost_per_token": 0.000042857,
"litellm_provider": "databricks", "litellm_provider": "databricks",
"mode": "chat", "mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving" "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
}, },
"databricks/databricks-dbrx-instruct": { "databricks/databricks-dbrx-instruct": {
"max_tokens": 32768, "max_tokens": 32768,
"max_input_tokens": 32768, "max_input_tokens": 32768,
"max_output_tokens": 32768, "max_output_tokens": 32768,
"input_cost_per_token": 0.00000075, "input_cost_per_token": 0.00000074998,
"output_cost_per_token": 0.00000225, "input_dbu_cost_per_token": 0.000010714,
"output_cost_per_token": 0.00000224901,
"output_dbu_cost_per_token": 0.000032143,
"litellm_provider": "databricks", "litellm_provider": "databricks",
"mode": "chat", "mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving" "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
}, },
"databricks/databricks-meta-llama-3-70b-instruct": { "databricks/databricks-meta-llama-3-70b-instruct": {
"max_tokens": 8192, "max_tokens": 128000,
"max_input_tokens": 8192, "max_input_tokens": 128000,
"max_output_tokens": 8192, "max_output_tokens": 128000,
"input_cost_per_token": 0.000001, "input_cost_per_token": 0.00000100002,
"output_cost_per_token": 0.000003, "input_dbu_cost_per_token": 0.000014286,
"output_cost_per_token": 0.00000299999,
"output_dbu_cost_per_token": 0.000042857,
"litellm_provider": "databricks", "litellm_provider": "databricks",
"mode": "chat", "mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving" "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
}, },
"databricks/databricks-llama-2-70b-chat": { "databricks/databricks-llama-2-70b-chat": {
"max_tokens": 4096, "max_tokens": 4096,
"max_input_tokens": 4096, "max_input_tokens": 4096,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.0000005, "input_cost_per_token": 0.00000050001,
"input_dbu_cost_per_token": 0.000007143,
"output_cost_per_token": 0.0000015, "output_cost_per_token": 0.0000015,
"output_dbu_cost_per_token": 0.000021429,
"litellm_provider": "databricks", "litellm_provider": "databricks",
"mode": "chat", "mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving" "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
}, },
"databricks/databricks-mixtral-8x7b-instruct": { "databricks/databricks-mixtral-8x7b-instruct": {
"max_tokens": 4096, "max_tokens": 4096,
"max_input_tokens": 4096, "max_input_tokens": 4096,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.0000005, "input_cost_per_token": 0.00000050001,
"output_cost_per_token": 0.000001, "input_dbu_cost_per_token": 0.000007143,
"output_cost_per_token": 0.00000099902,
"output_dbu_cost_per_token": 0.000014286,
"litellm_provider": "databricks", "litellm_provider": "databricks",
"mode": "chat", "mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving" "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
}, },
"databricks/databricks-mpt-30b-instruct": { "databricks/databricks-mpt-30b-instruct": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 8192, "max_input_tokens": 8192,
"max_output_tokens": 8192, "max_output_tokens": 8192,
"input_cost_per_token": 0.000001, "input_cost_per_token": 0.00000099902,
"output_cost_per_token": 0.000001, "input_dbu_cost_per_token": 0.000014286,
"output_cost_per_token": 0.00000099902,
"output_dbu_cost_per_token": 0.000014286,
"litellm_provider": "databricks", "litellm_provider": "databricks",
"mode": "chat", "mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving" "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
}, },
"databricks/databricks-mpt-7b-instruct": { "databricks/databricks-mpt-7b-instruct": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 8192, "max_input_tokens": 8192,
"max_output_tokens": 8192, "max_output_tokens": 8192,
"input_cost_per_token": 0.0000005, "input_cost_per_token": 0.00000050001,
"output_cost_per_token": 0.0000005, "input_dbu_cost_per_token": 0.000007143,
"output_cost_per_token": 0.0,
"output_dbu_cost_per_token": 0.0,
"litellm_provider": "databricks", "litellm_provider": "databricks",
"mode": "chat", "mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving" "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
}, },
"databricks/databricks-bge-large-en": { "databricks/databricks-bge-large-en": {
"max_tokens": 512, "max_tokens": 512,
"max_input_tokens": 512, "max_input_tokens": 512,
"output_vector_size": 1024, "output_vector_size": 1024,
"input_cost_per_token": 0.0000001, "input_cost_per_token": 0.00000010003,
"input_dbu_cost_per_token": 0.000001429,
"output_cost_per_token": 0.0, "output_cost_per_token": 0.0,
"output_dbu_cost_per_token": 0.0,
"litellm_provider": "databricks", "litellm_provider": "databricks",
"mode": "embedding", "mode": "embedding",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving" "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
},
"databricks/databricks-gte-large-en": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"output_vector_size": 1024,
"input_cost_per_token": 0.00000012999,
"input_dbu_cost_per_token": 0.000001857,
"output_cost_per_token": 0.0,
"output_dbu_cost_per_token": 0.0,
"litellm_provider": "databricks",
"mode": "embedding",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
} }
} }

View file

@ -1,9 +1,9 @@
model_list: model_list:
- model_name: "gpt-turbo" - model_name: "gpt-4o"
litellm_params: litellm_params:
model: azure/chatgpt-v-2 model: gpt-4o
api_key: os.environ/AZURE_API_KEY
api_base: os.environ/AZURE_API_BASE
router_settings: litellm_settings:
model_group_alias: {"gpt-4": "gpt-turbo"} cache: true
cache_params:
type: local

View file

@ -600,7 +600,7 @@ class GenerateRequestBase(LiteLLMBase):
soft_budget: Optional[float] = None soft_budget: Optional[float] = None
class GenerateKeyRequest(GenerateRequestBase): class _GenerateKeyRequest(GenerateRequestBase):
key_alias: Optional[str] = None key_alias: Optional[str] = None
key: Optional[str] = None key: Optional[str] = None
duration: Optional[str] = None duration: Optional[str] = None
@ -618,7 +618,11 @@ class GenerateKeyRequest(GenerateRequestBase):
guardrails: Optional[List[str]] = None guardrails: Optional[List[str]] = None
class GenerateKeyResponse(GenerateKeyRequest): class GenerateKeyRequest(_GenerateKeyRequest):
tags: Optional[List[str]] = None
class GenerateKeyResponse(_GenerateKeyRequest):
key: str key: str
key_name: Optional[str] = None key_name: Optional[str] = None
expires: Optional[datetime] expires: Optional[datetime]
@ -677,9 +681,10 @@ class LiteLLM_ModelTable(LiteLLMBase):
model_config = ConfigDict(protected_namespaces=()) model_config = ConfigDict(protected_namespaces=())
class NewUserRequest(GenerateKeyRequest): class NewUserRequest(_GenerateKeyRequest):
max_budget: Optional[float] = None max_budget: Optional[float] = None
user_email: Optional[str] = None user_email: Optional[str] = None
user_alias: Optional[str] = None
user_role: Optional[ user_role: Optional[
Literal[ Literal[
LitellmUserRoles.PROXY_ADMIN, LitellmUserRoles.PROXY_ADMIN,
@ -713,6 +718,7 @@ class NewUserResponse(GenerateKeyResponse):
] = None ] = None
teams: Optional[list] = None teams: Optional[list] = None
organization_id: Optional[str] = None organization_id: Optional[str] = None
user_alias: Optional[str] = None
class UpdateUserRequest(GenerateRequestBase): class UpdateUserRequest(GenerateRequestBase):

View file

@ -156,6 +156,7 @@ async def new_user(
user_id=response["user_id"], user_id=response["user_id"],
user_role=response.get("user_role", None), user_role=response.get("user_role", None),
user_email=response.get("user_email", None), user_email=response.get("user_email", None),
user_alias=response.get("user_alias", None),
teams=response.get("teams", None), teams=response.get("teams", None),
team_id=response.get("team_id", None), team_id=response.get("team_id", None),
metadata=response.get("metadata", None), metadata=response.get("metadata", None),

View file

@ -202,6 +202,15 @@ async def generate_key_fn(
if "budget_duration" in data_json: if "budget_duration" in data_json:
data_json["key_budget_duration"] = data_json.pop("budget_duration", None) data_json["key_budget_duration"] = data_json.pop("budget_duration", None)
# Set tags on the new key
if "tags" in data_json:
if data_json["metadata"] is None:
data_json["metadata"] = {"tags": data_json["tags"]}
else:
data_json["metadata"]["tags"] = data_json["tags"]
data_json.pop("tags")
response = await generate_key_helper_fn( response = await generate_key_helper_fn(
request_type="key", **data_json, table_name="key" request_type="key", **data_json, table_name="key"
) )
@ -257,12 +266,11 @@ async def generate_key_fn(
return GenerateKeyResponse(**response) return GenerateKeyResponse(**response)
except Exception as e: except Exception as e:
verbose_proxy_logger.error( verbose_proxy_logger.exception(
"litellm.proxy.proxy_server.generate_key_fn(): Exception occured - {}".format( "litellm.proxy.proxy_server.generate_key_fn(): Exception occured - {}".format(
str(e) str(e)
) )
) )
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"), message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -731,6 +739,7 @@ async def generate_key_helper_fn(
str str
] = None, # dev-friendly alt param for 'token'. Exposed on `/key/generate` for setting key value yourself. ] = None, # dev-friendly alt param for 'token'. Exposed on `/key/generate` for setting key value yourself.
user_id: Optional[str] = None, user_id: Optional[str] = None,
user_alias: Optional[str] = None,
team_id: Optional[str] = None, team_id: Optional[str] = None,
user_email: Optional[str] = None, user_email: Optional[str] = None,
user_role: Optional[str] = None, user_role: Optional[str] = None,
@ -816,6 +825,7 @@ async def generate_key_helper_fn(
"max_budget": max_budget, "max_budget": max_budget,
"user_email": user_email, "user_email": user_email,
"user_id": user_id, "user_id": user_id,
"user_alias": user_alias,
"team_id": team_id, "team_id": team_id,
"organization_id": organization_id, "organization_id": organization_id,
"user_role": user_role, "user_role": user_role,

View file

@ -1221,11 +1221,37 @@ def test_completion_cost_anthropic_prompt_caching():
assert cost_1 > cost_2 assert cost_1 > cost_2
def test_completion_cost_databricks(): @pytest.mark.parametrize(
model, messages = "databricks/databricks-dbrx-instruct", [ "model",
{"role": "user", "content": "What is 2+2?"} [
] "databricks/databricks-meta-llama-3-1-70b-instruct",
"databricks/databricks-meta-llama-3-70b-instruct",
"databricks/databricks-dbrx-instruct",
"databricks/databricks-mixtral-8x7b-instruct",
],
)
def test_completion_cost_databricks(model):
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
model, messages = model, [{"role": "user", "content": "What is 2+2?"}]
resp = litellm.completion(model=model, messages=messages) # works fine resp = litellm.completion(model=model, messages=messages) # works fine
print(resp)
cost = completion_cost(completion_response=resp)
@pytest.mark.parametrize(
"model",
[
"databricks/databricks-bge-large-en",
"databricks/databricks-gte-large-en",
],
)
def test_completion_cost_databricks_embedding(model):
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
resp = litellm.embedding(model=model, input=["hey, how's it going?"]) # works fine
print(resp)
cost = completion_cost(completion_response=resp) cost = completion_cost(completion_response=resp)

View file

@ -2762,6 +2762,7 @@ async def test_generate_key_with_model_tpm_limit(prisma_client):
"team": "litellm-team3", "team": "litellm-team3",
"model_tpm_limit": {"gpt-4": 100}, "model_tpm_limit": {"gpt-4": 100},
"model_rpm_limit": {"gpt-4": 2}, "model_rpm_limit": {"gpt-4": 2},
"tags": None,
} }
# Update model tpm_limit and rpm_limit # Update model tpm_limit and rpm_limit
@ -2782,6 +2783,7 @@ async def test_generate_key_with_model_tpm_limit(prisma_client):
"team": "litellm-team3", "team": "litellm-team3",
"model_tpm_limit": {"gpt-4": 200}, "model_tpm_limit": {"gpt-4": 200},
"model_rpm_limit": {"gpt-4": 3}, "model_rpm_limit": {"gpt-4": 3},
"tags": None,
} }
@ -2818,6 +2820,7 @@ async def test_generate_key_with_guardrails(prisma_client):
assert result["info"]["metadata"] == { assert result["info"]["metadata"] == {
"team": "litellm-team3", "team": "litellm-team3",
"guardrails": ["aporia-pre-call"], "guardrails": ["aporia-pre-call"],
"tags": None,
} }
# Update model tpm_limit and rpm_limit # Update model tpm_limit and rpm_limit
@ -2836,6 +2839,7 @@ async def test_generate_key_with_guardrails(prisma_client):
assert result["info"]["metadata"] == { assert result["info"]["metadata"] == {
"team": "litellm-team3", "team": "litellm-team3",
"guardrails": ["aporia-pre-call", "aporia-post-call"], "guardrails": ["aporia-pre-call", "aporia-post-call"],
"tags": None,
} }

View file

@ -210,7 +210,6 @@ def test_stream_chunk_builder_litellm_mixed_calls():
assert len(response.choices[0].message.tool_calls) == 1 assert len(response.choices[0].message.tool_calls) == 1
assert response.choices[0].message.tool_calls[0].to_dict() == { assert response.choices[0].message.tool_calls[0].to_dict() == {
"index": 1,
"function": { "function": {
"arguments": '{"query": "SELECT COUNT(*) FROM users;"}', "arguments": '{"query": "SELECT COUNT(*) FROM users;"}',
"name": "sql_query", "name": "sql_query",
@ -226,3 +225,400 @@ def test_stream_chunk_builder_litellm_empty_chunks():
response = stream_chunk_builder(chunks=[]) response = stream_chunk_builder(chunks=[])
assert response is None assert response is None
def test_stream_chunk_builder_multiple_tool_calls():
init_chunks = [
{
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"tool_calls": [
{
"id": "call_X9P9B6STj7ze8OsJCGkfoN94",
"function": {"arguments": "", "name": "exponentiate"},
"type": "function",
"index": 0,
}
],
},
}
],
"created": 1725932618,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_b2ffeb16ee",
},
{
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"tool_calls": [
{
"function": {"arguments": '{"ba'},
"type": "function",
"index": 0,
}
],
},
}
],
"created": 1725932618,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_b2ffeb16ee",
},
{
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"tool_calls": [
{
"function": {"arguments": 'se": '},
"type": "function",
"index": 0,
}
],
},
}
],
"created": 1725932618,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_b2ffeb16ee",
},
{
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"tool_calls": [
{
"function": {"arguments": '3, "ex'},
"type": "function",
"index": 0,
}
],
},
}
],
"created": 1725932618,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_b2ffeb16ee",
},
{
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"tool_calls": [
{
"function": {"arguments": "pone"},
"type": "function",
"index": 0,
}
],
},
}
],
"created": 1725932618,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_b2ffeb16ee",
},
{
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"tool_calls": [
{
"function": {"arguments": 'nt": '},
"type": "function",
"index": 0,
}
],
},
}
],
"created": 1725932618,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_b2ffeb16ee",
},
{
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"tool_calls": [
{
"function": {"arguments": "5}"},
"type": "function",
"index": 0,
}
],
},
}
],
"created": 1725932618,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_b2ffeb16ee",
},
{
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"tool_calls": [
{
"id": "call_Qq8yDeRx7v276abRcLrYORdW",
"function": {"arguments": "", "name": "add"},
"type": "function",
"index": 1,
}
],
},
}
],
"created": 1725932618,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_b2ffeb16ee",
},
{
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"tool_calls": [
{
"function": {"arguments": '{"fi'},
"type": "function",
"index": 1,
}
],
},
}
],
"created": 1725932618,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_b2ffeb16ee",
},
{
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"tool_calls": [
{
"function": {"arguments": "rst_i"},
"type": "function",
"index": 1,
}
],
},
}
],
"created": 1725932618,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_b2ffeb16ee",
},
{
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"tool_calls": [
{
"function": {"arguments": 'nt": 1'},
"type": "function",
"index": 1,
}
],
},
}
],
"created": 1725932618,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_b2ffeb16ee",
},
{
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"tool_calls": [
{
"function": {"arguments": '2, "'},
"type": "function",
"index": 1,
}
],
},
}
],
"created": 1725932618,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_b2ffeb16ee",
},
{
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"tool_calls": [
{
"function": {"arguments": "secon"},
"type": "function",
"index": 1,
}
],
},
}
],
"created": 1725932618,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_b2ffeb16ee",
},
{
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"tool_calls": [
{
"function": {"arguments": 'd_int"'},
"type": "function",
"index": 1,
}
],
},
}
],
"created": 1725932618,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_b2ffeb16ee",
},
{
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"tool_calls": [
{
"function": {"arguments": ": 3}"},
"type": "function",
"index": 1,
}
],
},
}
],
"created": 1725932618,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_b2ffeb16ee",
},
{
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
"choices": [{"finish_reason": "tool_calls", "index": 0, "delta": {}}],
"created": 1725932618,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"system_fingerprint": "fp_b2ffeb16ee",
},
]
chunks = []
for chunk in init_chunks:
chunks.append(litellm.ModelResponse(**chunk, stream=True))
response = stream_chunk_builder(chunks=chunks)
print(f"Returned response: {response}")
completed_response = {
"id": "chatcmpl-A61mXjvcRX0Xr2IiojN9TPiy1P3Fm",
"choices": [
{
"finish_reason": "tool_calls",
"index": 0,
"message": {
"content": None,
"role": "assistant",
"tool_calls": [
{
"function": {
"arguments": '{"base": 3, "exponent": 5}',
"name": "exponentiate",
},
"id": "call_X9P9B6STj7ze8OsJCGkfoN94",
"type": "function",
},
{
"function": {
"arguments": '{"first_int": 12, "second_int": 3}',
"name": "add",
},
"id": "call_Qq8yDeRx7v276abRcLrYORdW",
"type": "function",
},
],
"function_call": None,
},
}
],
"created": 1726000181,
"model": "gpt-4o-2024-05-13",
"object": "chat.completion",
"system_fingerprint": "fp_25624ae3a5",
"usage": {"completion_tokens": 55, "prompt_tokens": 127, "total_tokens": 182},
"service_tier": None,
}
expected_response = litellm.ModelResponse(**completed_response)
print(f"\n\nexpected_response:\n{expected_response}\n\n")
assert (
expected_response.choices == response.choices
), "\nGot={}\n, Expected={}\n".format(response.choices, expected_response.choices)

View file

@ -325,7 +325,7 @@ class Message(OpenAIObject):
): ):
init_values = { init_values = {
"content": content, "content": content,
"role": "assistant", "role": role or "assistant", # handle null input
"function_call": ( "function_call": (
FunctionCall(**function_call) if function_call is not None else None FunctionCall(**function_call) if function_call is not None else None
), ),

View file

@ -5492,90 +5492,129 @@
"max_input_tokens": 128000, "max_input_tokens": 128000,
"max_output_tokens": 128000, "max_output_tokens": 128000,
"input_cost_per_token": 0.000005, "input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000015, "input_dbu_cost_per_token": 0.000071429,
"output_cost_per_token": 0.00001500002,
"output_db_cost_per_token": 0.000214286,
"litellm_provider": "databricks", "litellm_provider": "databricks",
"mode": "chat", "mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving" "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
}, },
"databricks/databricks-meta-llama-3-1-70b-instruct": { "databricks/databricks-meta-llama-3-1-70b-instruct": {
"max_tokens": 128000, "max_tokens": 128000,
"max_input_tokens": 128000, "max_input_tokens": 128000,
"max_output_tokens": 128000, "max_output_tokens": 128000,
"input_cost_per_token": 0.000001, "input_cost_per_token": 0.00000100002,
"output_cost_per_token": 0.000003, "input_dbu_cost_per_token": 0.000014286,
"output_cost_per_token": 0.00000299999,
"output_dbu_cost_per_token": 0.000042857,
"litellm_provider": "databricks", "litellm_provider": "databricks",
"mode": "chat", "mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving" "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
}, },
"databricks/databricks-dbrx-instruct": { "databricks/databricks-dbrx-instruct": {
"max_tokens": 32768, "max_tokens": 32768,
"max_input_tokens": 32768, "max_input_tokens": 32768,
"max_output_tokens": 32768, "max_output_tokens": 32768,
"input_cost_per_token": 0.00000075, "input_cost_per_token": 0.00000074998,
"output_cost_per_token": 0.00000225, "input_dbu_cost_per_token": 0.000010714,
"output_cost_per_token": 0.00000224901,
"output_dbu_cost_per_token": 0.000032143,
"litellm_provider": "databricks", "litellm_provider": "databricks",
"mode": "chat", "mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving" "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
}, },
"databricks/databricks-meta-llama-3-70b-instruct": { "databricks/databricks-meta-llama-3-70b-instruct": {
"max_tokens": 8192, "max_tokens": 128000,
"max_input_tokens": 8192, "max_input_tokens": 128000,
"max_output_tokens": 8192, "max_output_tokens": 128000,
"input_cost_per_token": 0.000001, "input_cost_per_token": 0.00000100002,
"output_cost_per_token": 0.000003, "input_dbu_cost_per_token": 0.000014286,
"output_cost_per_token": 0.00000299999,
"output_dbu_cost_per_token": 0.000042857,
"litellm_provider": "databricks", "litellm_provider": "databricks",
"mode": "chat", "mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving" "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
}, },
"databricks/databricks-llama-2-70b-chat": { "databricks/databricks-llama-2-70b-chat": {
"max_tokens": 4096, "max_tokens": 4096,
"max_input_tokens": 4096, "max_input_tokens": 4096,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.0000005, "input_cost_per_token": 0.00000050001,
"input_dbu_cost_per_token": 0.000007143,
"output_cost_per_token": 0.0000015, "output_cost_per_token": 0.0000015,
"output_dbu_cost_per_token": 0.000021429,
"litellm_provider": "databricks", "litellm_provider": "databricks",
"mode": "chat", "mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving" "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
}, },
"databricks/databricks-mixtral-8x7b-instruct": { "databricks/databricks-mixtral-8x7b-instruct": {
"max_tokens": 4096, "max_tokens": 4096,
"max_input_tokens": 4096, "max_input_tokens": 4096,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.0000005, "input_cost_per_token": 0.00000050001,
"output_cost_per_token": 0.000001, "input_dbu_cost_per_token": 0.000007143,
"output_cost_per_token": 0.00000099902,
"output_dbu_cost_per_token": 0.000014286,
"litellm_provider": "databricks", "litellm_provider": "databricks",
"mode": "chat", "mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving" "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
}, },
"databricks/databricks-mpt-30b-instruct": { "databricks/databricks-mpt-30b-instruct": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 8192, "max_input_tokens": 8192,
"max_output_tokens": 8192, "max_output_tokens": 8192,
"input_cost_per_token": 0.000001, "input_cost_per_token": 0.00000099902,
"output_cost_per_token": 0.000001, "input_dbu_cost_per_token": 0.000014286,
"output_cost_per_token": 0.00000099902,
"output_dbu_cost_per_token": 0.000014286,
"litellm_provider": "databricks", "litellm_provider": "databricks",
"mode": "chat", "mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving" "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
}, },
"databricks/databricks-mpt-7b-instruct": { "databricks/databricks-mpt-7b-instruct": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 8192, "max_input_tokens": 8192,
"max_output_tokens": 8192, "max_output_tokens": 8192,
"input_cost_per_token": 0.0000005, "input_cost_per_token": 0.00000050001,
"output_cost_per_token": 0.0000005, "input_dbu_cost_per_token": 0.000007143,
"output_cost_per_token": 0.0,
"output_dbu_cost_per_token": 0.0,
"litellm_provider": "databricks", "litellm_provider": "databricks",
"mode": "chat", "mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving" "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
}, },
"databricks/databricks-bge-large-en": { "databricks/databricks-bge-large-en": {
"max_tokens": 512, "max_tokens": 512,
"max_input_tokens": 512, "max_input_tokens": 512,
"output_vector_size": 1024, "output_vector_size": 1024,
"input_cost_per_token": 0.0000001, "input_cost_per_token": 0.00000010003,
"input_dbu_cost_per_token": 0.000001429,
"output_cost_per_token": 0.0, "output_cost_per_token": 0.0,
"output_dbu_cost_per_token": 0.0,
"litellm_provider": "databricks", "litellm_provider": "databricks",
"mode": "embedding", "mode": "embedding",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving" "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
},
"databricks/databricks-gte-large-en": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"output_vector_size": 1024,
"input_cost_per_token": 0.00000012999,
"input_dbu_cost_per_token": 0.000001857,
"output_cost_per_token": 0.0,
"output_dbu_cost_per_token": 0.0,
"litellm_provider": "databricks",
"mode": "embedding",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
} }
} }