mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
LiteLLM Minor Fixes and Improvements (09/10/2024) (#5618)
* fix(cost_calculator.py): move to debug for noisy warning message on cost calculation error Fixes https://github.com/BerriAI/litellm/issues/5610 * fix(databricks/cost_calculator.py): Handles model name issues for databricks models * fix(main.py): fix stream chunk builder for multiple tool calls Fixes https://github.com/BerriAI/litellm/issues/5591 * fix: correctly set user_alias when passed in Fixes https://github.com/BerriAI/litellm/issues/5612 * fix(types/utils.py): allow passing role for message object https://github.com/BerriAI/litellm/issues/5621 * fix(litellm_logging.py): Fix langfuse logging across multiple projects Fixes issue where langfuse logger was re-using the old logging object * feat(proxy/_types.py): support adding key-based tags for tag-based routing Enable tag based routing at key-level * fix(proxy/_types.py): fix inheritance * test(test_key_generate_prisma.py): fix test * test: fix test * fix(litellm_logging.py): return used callback object
This commit is contained in:
parent
d6e0d5d234
commit
7f47c48b35
15 changed files with 673 additions and 96 deletions
|
@ -829,18 +829,11 @@ def response_cost_calculator(
|
||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if litellm.suppress_debug_info: # allow cli tools to suppress this information.
|
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
"litellm.cost_calculator.py::response_cost_calculator - Returning None. Exception occurred - {}/n{}".format(
|
"litellm.cost_calculator.py::response_cost_calculator - Returning None. Exception occurred - {}/n{}".format(
|
||||||
str(e), traceback.format_exc()
|
str(e), traceback.format_exc()
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
else:
|
|
||||||
verbose_logger.warning(
|
|
||||||
"litellm.cost_calculator.py::response_cost_calculator - Returning None. Exception occurred - {}/n{}".format(
|
|
||||||
str(e), traceback.format_exc()
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -269,10 +269,11 @@ class SlackAlerting(CustomLogger):
|
||||||
break
|
break
|
||||||
await asyncio.sleep(3) # wait 3s before retrying for trace id
|
await asyncio.sleep(3) # wait 3s before retrying for trace id
|
||||||
|
|
||||||
if litellm.litellm_core_utils.litellm_logging.langFuseLogger is not None:
|
_langfuse_object = litellm_logging_obj._get_callback_object(
|
||||||
base_url = (
|
service_name="langfuse"
|
||||||
litellm.litellm_core_utils.litellm_logging.langFuseLogger.Langfuse.base_url
|
|
||||||
)
|
)
|
||||||
|
if _langfuse_object is not None:
|
||||||
|
base_url = _langfuse_object.Langfuse.base_url
|
||||||
return f"{base_url}/trace/{trace_id}"
|
return f"{base_url}/trace/{trace_id}"
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
|
@ -924,6 +924,7 @@ class Logging:
|
||||||
else:
|
else:
|
||||||
print_verbose("reaches langfuse for streaming logging!")
|
print_verbose("reaches langfuse for streaming logging!")
|
||||||
result = kwargs["complete_streaming_response"]
|
result = kwargs["complete_streaming_response"]
|
||||||
|
temp_langfuse_logger = langFuseLogger
|
||||||
if langFuseLogger is None or (
|
if langFuseLogger is None or (
|
||||||
(
|
(
|
||||||
self.langfuse_public_key is not None
|
self.langfuse_public_key is not None
|
||||||
|
@ -940,12 +941,12 @@ class Logging:
|
||||||
and self.langfuse_host != langFuseLogger.langfuse_host
|
and self.langfuse_host != langFuseLogger.langfuse_host
|
||||||
)
|
)
|
||||||
):
|
):
|
||||||
langFuseLogger = LangFuseLogger(
|
temp_langfuse_logger = LangFuseLogger(
|
||||||
langfuse_public_key=self.langfuse_public_key,
|
langfuse_public_key=self.langfuse_public_key,
|
||||||
langfuse_secret=self.langfuse_secret,
|
langfuse_secret=self.langfuse_secret,
|
||||||
langfuse_host=self.langfuse_host,
|
langfuse_host=self.langfuse_host,
|
||||||
)
|
)
|
||||||
_response = langFuseLogger.log_event(
|
_response = temp_langfuse_logger.log_event(
|
||||||
kwargs=kwargs,
|
kwargs=kwargs,
|
||||||
response_obj=result,
|
response_obj=result,
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
|
@ -1925,6 +1926,38 @@ class Logging:
|
||||||
|
|
||||||
return trace_id
|
return trace_id
|
||||||
|
|
||||||
|
def _get_callback_object(self, service_name: Literal["langfuse"]) -> Optional[Any]:
|
||||||
|
"""
|
||||||
|
Return dynamic callback object.
|
||||||
|
|
||||||
|
Meant to solve issue when doing key-based/team-based logging
|
||||||
|
"""
|
||||||
|
global langFuseLogger
|
||||||
|
|
||||||
|
if service_name == "langfuse":
|
||||||
|
if langFuseLogger is None or (
|
||||||
|
(
|
||||||
|
self.langfuse_public_key is not None
|
||||||
|
and self.langfuse_public_key != langFuseLogger.public_key
|
||||||
|
)
|
||||||
|
or (
|
||||||
|
self.langfuse_public_key is not None
|
||||||
|
and self.langfuse_public_key != langFuseLogger.public_key
|
||||||
|
)
|
||||||
|
or (
|
||||||
|
self.langfuse_host is not None
|
||||||
|
and self.langfuse_host != langFuseLogger.langfuse_host
|
||||||
|
)
|
||||||
|
):
|
||||||
|
return LangFuseLogger(
|
||||||
|
langfuse_public_key=self.langfuse_public_key,
|
||||||
|
langfuse_secret=self.langfuse_secret,
|
||||||
|
langfuse_host=self.langfuse_host,
|
||||||
|
)
|
||||||
|
return langFuseLogger
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def set_callbacks(callback_list, function_id=None):
|
def set_callbacks(callback_list, function_id=None):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -25,7 +25,30 @@ def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
|
||||||
"dbrx-instruct"
|
"dbrx-instruct"
|
||||||
):
|
):
|
||||||
base_model = "databricks-dbrx-instruct"
|
base_model = "databricks-dbrx-instruct"
|
||||||
|
elif model.startswith("databricks/meta-llama-3.1-70b-instruct") or model.startswith(
|
||||||
|
"meta-llama-3.1-70b-instruct"
|
||||||
|
):
|
||||||
|
base_model = "databricks-meta-llama-3-1-70b-instruct"
|
||||||
|
elif model.startswith(
|
||||||
|
"databricks/meta-llama-3.1-405b-instruct"
|
||||||
|
) or model.startswith("meta-llama-3.1-405b-instruct"):
|
||||||
|
base_model = "databricks-meta-llama-3-1-405b-instruct"
|
||||||
|
elif model.startswith("databricks/mixtral-8x7b-instruct-v0.1") or model.startswith(
|
||||||
|
"mixtral-8x7b-instruct-v0.1"
|
||||||
|
):
|
||||||
|
base_model = "databricks-mixtral-8x7b-instruct"
|
||||||
|
elif model.startswith("databricks/mixtral-8x7b-instruct-v0.1") or model.startswith(
|
||||||
|
"mixtral-8x7b-instruct-v0.1"
|
||||||
|
):
|
||||||
|
base_model = "databricks-mixtral-8x7b-instruct"
|
||||||
|
elif model.startswith("databricks/bge-large-en") or model.startswith(
|
||||||
|
"bge-large-en"
|
||||||
|
):
|
||||||
|
base_model = "databricks-bge-large-en"
|
||||||
|
elif model.startswith("databricks/gte-large-en") or model.startswith(
|
||||||
|
"gte-large-en"
|
||||||
|
):
|
||||||
|
base_model = "databricks-gte-large-en"
|
||||||
## GET MODEL INFO
|
## GET MODEL INFO
|
||||||
model_info = get_model_info(model=base_model, custom_llm_provider="databricks")
|
model_info = get_model_info(model=base_model, custom_llm_provider="databricks")
|
||||||
|
|
||||||
|
|
|
@ -5310,7 +5310,7 @@ def stream_chunk_builder(
|
||||||
]
|
]
|
||||||
|
|
||||||
if len(tool_call_chunks) > 0:
|
if len(tool_call_chunks) > 0:
|
||||||
argument_list = []
|
argument_list: List = []
|
||||||
delta = tool_call_chunks[0]["choices"][0]["delta"]
|
delta = tool_call_chunks[0]["choices"][0]["delta"]
|
||||||
message = response["choices"][0]["message"]
|
message = response["choices"][0]["message"]
|
||||||
message["tool_calls"] = []
|
message["tool_calls"] = []
|
||||||
|
@ -5319,6 +5319,7 @@ def stream_chunk_builder(
|
||||||
type = None
|
type = None
|
||||||
tool_calls_list = []
|
tool_calls_list = []
|
||||||
prev_index = None
|
prev_index = None
|
||||||
|
prev_name = None
|
||||||
prev_id = None
|
prev_id = None
|
||||||
curr_id = None
|
curr_id = None
|
||||||
curr_index = 0
|
curr_index = 0
|
||||||
|
@ -5346,27 +5347,32 @@ def stream_chunk_builder(
|
||||||
type = tool_calls[0].type
|
type = tool_calls[0].type
|
||||||
if prev_index is None:
|
if prev_index is None:
|
||||||
prev_index = curr_index
|
prev_index = curr_index
|
||||||
|
if prev_name is None:
|
||||||
|
prev_name = name
|
||||||
if curr_index != prev_index: # new tool call
|
if curr_index != prev_index: # new tool call
|
||||||
combined_arguments = "".join(argument_list)
|
combined_arguments = "".join(argument_list)
|
||||||
tool_calls_list.append(
|
tool_calls_list.append(
|
||||||
{
|
{
|
||||||
"id": prev_id,
|
"id": prev_id,
|
||||||
"index": prev_index,
|
"function": {
|
||||||
"function": {"arguments": combined_arguments, "name": name},
|
"arguments": combined_arguments,
|
||||||
|
"name": prev_name,
|
||||||
|
},
|
||||||
"type": type,
|
"type": type,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
argument_list = [] # reset
|
argument_list = [] # reset
|
||||||
prev_index = curr_index
|
prev_index = curr_index
|
||||||
prev_id = curr_id
|
prev_id = curr_id
|
||||||
|
prev_name = name
|
||||||
|
|
||||||
combined_arguments = (
|
combined_arguments = (
|
||||||
"".join(argument_list) or "{}"
|
"".join(argument_list) or "{}"
|
||||||
) # base case, return empty dict
|
) # base case, return empty dict
|
||||||
|
|
||||||
tool_calls_list.append(
|
tool_calls_list.append(
|
||||||
{
|
{
|
||||||
"id": id,
|
"id": id,
|
||||||
"index": curr_index,
|
|
||||||
"function": {"arguments": combined_arguments, "name": name},
|
"function": {"arguments": combined_arguments, "name": name},
|
||||||
"type": type,
|
"type": type,
|
||||||
}
|
}
|
||||||
|
@ -5422,7 +5428,7 @@ def stream_chunk_builder(
|
||||||
for choice in choices:
|
for choice in choices:
|
||||||
delta = choice.get("delta", {})
|
delta = choice.get("delta", {})
|
||||||
content = delta.get("content", "")
|
content = delta.get("content", "")
|
||||||
if content == None:
|
if content is None:
|
||||||
continue # openai v1.0.0 sets content = None for chunks
|
continue # openai v1.0.0 sets content = None for chunks
|
||||||
content_list.append(content)
|
content_list.append(content)
|
||||||
|
|
||||||
|
|
|
@ -5461,90 +5461,129 @@
|
||||||
"max_input_tokens": 128000,
|
"max_input_tokens": 128000,
|
||||||
"max_output_tokens": 128000,
|
"max_output_tokens": 128000,
|
||||||
"input_cost_per_token": 0.000005,
|
"input_cost_per_token": 0.000005,
|
||||||
"output_cost_per_token": 0.000015,
|
"input_dbu_cost_per_token": 0.000071429,
|
||||||
|
"output_cost_per_token": 0.00001500002,
|
||||||
|
"output_db_cost_per_token": 0.000214286,
|
||||||
"litellm_provider": "databricks",
|
"litellm_provider": "databricks",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
|
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||||
|
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
||||||
},
|
},
|
||||||
"databricks/databricks-meta-llama-3-1-70b-instruct": {
|
"databricks/databricks-meta-llama-3-1-70b-instruct": {
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
"max_input_tokens": 128000,
|
"max_input_tokens": 128000,
|
||||||
"max_output_tokens": 128000,
|
"max_output_tokens": 128000,
|
||||||
"input_cost_per_token": 0.000001,
|
"input_cost_per_token": 0.00000100002,
|
||||||
"output_cost_per_token": 0.000003,
|
"input_dbu_cost_per_token": 0.000014286,
|
||||||
|
"output_cost_per_token": 0.00000299999,
|
||||||
|
"output_dbu_cost_per_token": 0.000042857,
|
||||||
"litellm_provider": "databricks",
|
"litellm_provider": "databricks",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
|
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||||
|
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
||||||
},
|
},
|
||||||
"databricks/databricks-dbrx-instruct": {
|
"databricks/databricks-dbrx-instruct": {
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_input_tokens": 32768,
|
"max_input_tokens": 32768,
|
||||||
"max_output_tokens": 32768,
|
"max_output_tokens": 32768,
|
||||||
"input_cost_per_token": 0.00000075,
|
"input_cost_per_token": 0.00000074998,
|
||||||
"output_cost_per_token": 0.00000225,
|
"input_dbu_cost_per_token": 0.000010714,
|
||||||
|
"output_cost_per_token": 0.00000224901,
|
||||||
|
"output_dbu_cost_per_token": 0.000032143,
|
||||||
"litellm_provider": "databricks",
|
"litellm_provider": "databricks",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
|
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||||
|
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
||||||
},
|
},
|
||||||
"databricks/databricks-meta-llama-3-70b-instruct": {
|
"databricks/databricks-meta-llama-3-70b-instruct": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 128000,
|
||||||
"max_input_tokens": 8192,
|
"max_input_tokens": 128000,
|
||||||
"max_output_tokens": 8192,
|
"max_output_tokens": 128000,
|
||||||
"input_cost_per_token": 0.000001,
|
"input_cost_per_token": 0.00000100002,
|
||||||
"output_cost_per_token": 0.000003,
|
"input_dbu_cost_per_token": 0.000014286,
|
||||||
|
"output_cost_per_token": 0.00000299999,
|
||||||
|
"output_dbu_cost_per_token": 0.000042857,
|
||||||
"litellm_provider": "databricks",
|
"litellm_provider": "databricks",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
|
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||||
|
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
||||||
},
|
},
|
||||||
"databricks/databricks-llama-2-70b-chat": {
|
"databricks/databricks-llama-2-70b-chat": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"max_input_tokens": 4096,
|
"max_input_tokens": 4096,
|
||||||
"max_output_tokens": 4096,
|
"max_output_tokens": 4096,
|
||||||
"input_cost_per_token": 0.0000005,
|
"input_cost_per_token": 0.00000050001,
|
||||||
|
"input_dbu_cost_per_token": 0.000007143,
|
||||||
"output_cost_per_token": 0.0000015,
|
"output_cost_per_token": 0.0000015,
|
||||||
|
"output_dbu_cost_per_token": 0.000021429,
|
||||||
"litellm_provider": "databricks",
|
"litellm_provider": "databricks",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
|
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||||
|
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
||||||
},
|
},
|
||||||
"databricks/databricks-mixtral-8x7b-instruct": {
|
"databricks/databricks-mixtral-8x7b-instruct": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"max_input_tokens": 4096,
|
"max_input_tokens": 4096,
|
||||||
"max_output_tokens": 4096,
|
"max_output_tokens": 4096,
|
||||||
"input_cost_per_token": 0.0000005,
|
"input_cost_per_token": 0.00000050001,
|
||||||
"output_cost_per_token": 0.000001,
|
"input_dbu_cost_per_token": 0.000007143,
|
||||||
|
"output_cost_per_token": 0.00000099902,
|
||||||
|
"output_dbu_cost_per_token": 0.000014286,
|
||||||
"litellm_provider": "databricks",
|
"litellm_provider": "databricks",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
|
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||||
|
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
||||||
},
|
},
|
||||||
"databricks/databricks-mpt-30b-instruct": {
|
"databricks/databricks-mpt-30b-instruct": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 8192,
|
"max_input_tokens": 8192,
|
||||||
"max_output_tokens": 8192,
|
"max_output_tokens": 8192,
|
||||||
"input_cost_per_token": 0.000001,
|
"input_cost_per_token": 0.00000099902,
|
||||||
"output_cost_per_token": 0.000001,
|
"input_dbu_cost_per_token": 0.000014286,
|
||||||
|
"output_cost_per_token": 0.00000099902,
|
||||||
|
"output_dbu_cost_per_token": 0.000014286,
|
||||||
"litellm_provider": "databricks",
|
"litellm_provider": "databricks",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
|
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||||
|
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
||||||
},
|
},
|
||||||
"databricks/databricks-mpt-7b-instruct": {
|
"databricks/databricks-mpt-7b-instruct": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 8192,
|
"max_input_tokens": 8192,
|
||||||
"max_output_tokens": 8192,
|
"max_output_tokens": 8192,
|
||||||
"input_cost_per_token": 0.0000005,
|
"input_cost_per_token": 0.00000050001,
|
||||||
"output_cost_per_token": 0.0000005,
|
"input_dbu_cost_per_token": 0.000007143,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"output_dbu_cost_per_token": 0.0,
|
||||||
"litellm_provider": "databricks",
|
"litellm_provider": "databricks",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
|
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||||
|
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
||||||
},
|
},
|
||||||
"databricks/databricks-bge-large-en": {
|
"databricks/databricks-bge-large-en": {
|
||||||
"max_tokens": 512,
|
"max_tokens": 512,
|
||||||
"max_input_tokens": 512,
|
"max_input_tokens": 512,
|
||||||
"output_vector_size": 1024,
|
"output_vector_size": 1024,
|
||||||
"input_cost_per_token": 0.0000001,
|
"input_cost_per_token": 0.00000010003,
|
||||||
|
"input_dbu_cost_per_token": 0.000001429,
|
||||||
"output_cost_per_token": 0.0,
|
"output_cost_per_token": 0.0,
|
||||||
|
"output_dbu_cost_per_token": 0.0,
|
||||||
"litellm_provider": "databricks",
|
"litellm_provider": "databricks",
|
||||||
"mode": "embedding",
|
"mode": "embedding",
|
||||||
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
|
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||||
|
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
||||||
|
},
|
||||||
|
"databricks/databricks-gte-large-en": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 8192,
|
||||||
|
"output_vector_size": 1024,
|
||||||
|
"input_cost_per_token": 0.00000012999,
|
||||||
|
"input_dbu_cost_per_token": 0.000001857,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"output_dbu_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "databricks",
|
||||||
|
"mode": "embedding",
|
||||||
|
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||||
|
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -1,9 +1,9 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: "gpt-turbo"
|
- model_name: "gpt-4o"
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: azure/chatgpt-v-2
|
model: gpt-4o
|
||||||
api_key: os.environ/AZURE_API_KEY
|
|
||||||
api_base: os.environ/AZURE_API_BASE
|
|
||||||
|
|
||||||
router_settings:
|
litellm_settings:
|
||||||
model_group_alias: {"gpt-4": "gpt-turbo"}
|
cache: true
|
||||||
|
cache_params:
|
||||||
|
type: local
|
|
@ -600,7 +600,7 @@ class GenerateRequestBase(LiteLLMBase):
|
||||||
soft_budget: Optional[float] = None
|
soft_budget: Optional[float] = None
|
||||||
|
|
||||||
|
|
||||||
class GenerateKeyRequest(GenerateRequestBase):
|
class _GenerateKeyRequest(GenerateRequestBase):
|
||||||
key_alias: Optional[str] = None
|
key_alias: Optional[str] = None
|
||||||
key: Optional[str] = None
|
key: Optional[str] = None
|
||||||
duration: Optional[str] = None
|
duration: Optional[str] = None
|
||||||
|
@ -618,7 +618,11 @@ class GenerateKeyRequest(GenerateRequestBase):
|
||||||
guardrails: Optional[List[str]] = None
|
guardrails: Optional[List[str]] = None
|
||||||
|
|
||||||
|
|
||||||
class GenerateKeyResponse(GenerateKeyRequest):
|
class GenerateKeyRequest(_GenerateKeyRequest):
|
||||||
|
tags: Optional[List[str]] = None
|
||||||
|
|
||||||
|
|
||||||
|
class GenerateKeyResponse(_GenerateKeyRequest):
|
||||||
key: str
|
key: str
|
||||||
key_name: Optional[str] = None
|
key_name: Optional[str] = None
|
||||||
expires: Optional[datetime]
|
expires: Optional[datetime]
|
||||||
|
@ -677,9 +681,10 @@ class LiteLLM_ModelTable(LiteLLMBase):
|
||||||
model_config = ConfigDict(protected_namespaces=())
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
|
|
||||||
|
|
||||||
class NewUserRequest(GenerateKeyRequest):
|
class NewUserRequest(_GenerateKeyRequest):
|
||||||
max_budget: Optional[float] = None
|
max_budget: Optional[float] = None
|
||||||
user_email: Optional[str] = None
|
user_email: Optional[str] = None
|
||||||
|
user_alias: Optional[str] = None
|
||||||
user_role: Optional[
|
user_role: Optional[
|
||||||
Literal[
|
Literal[
|
||||||
LitellmUserRoles.PROXY_ADMIN,
|
LitellmUserRoles.PROXY_ADMIN,
|
||||||
|
@ -713,6 +718,7 @@ class NewUserResponse(GenerateKeyResponse):
|
||||||
] = None
|
] = None
|
||||||
teams: Optional[list] = None
|
teams: Optional[list] = None
|
||||||
organization_id: Optional[str] = None
|
organization_id: Optional[str] = None
|
||||||
|
user_alias: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class UpdateUserRequest(GenerateRequestBase):
|
class UpdateUserRequest(GenerateRequestBase):
|
||||||
|
|
|
@ -156,6 +156,7 @@ async def new_user(
|
||||||
user_id=response["user_id"],
|
user_id=response["user_id"],
|
||||||
user_role=response.get("user_role", None),
|
user_role=response.get("user_role", None),
|
||||||
user_email=response.get("user_email", None),
|
user_email=response.get("user_email", None),
|
||||||
|
user_alias=response.get("user_alias", None),
|
||||||
teams=response.get("teams", None),
|
teams=response.get("teams", None),
|
||||||
team_id=response.get("team_id", None),
|
team_id=response.get("team_id", None),
|
||||||
metadata=response.get("metadata", None),
|
metadata=response.get("metadata", None),
|
||||||
|
|
|
@ -202,6 +202,15 @@ async def generate_key_fn(
|
||||||
if "budget_duration" in data_json:
|
if "budget_duration" in data_json:
|
||||||
data_json["key_budget_duration"] = data_json.pop("budget_duration", None)
|
data_json["key_budget_duration"] = data_json.pop("budget_duration", None)
|
||||||
|
|
||||||
|
# Set tags on the new key
|
||||||
|
if "tags" in data_json:
|
||||||
|
if data_json["metadata"] is None:
|
||||||
|
data_json["metadata"] = {"tags": data_json["tags"]}
|
||||||
|
else:
|
||||||
|
data_json["metadata"]["tags"] = data_json["tags"]
|
||||||
|
|
||||||
|
data_json.pop("tags")
|
||||||
|
|
||||||
response = await generate_key_helper_fn(
|
response = await generate_key_helper_fn(
|
||||||
request_type="key", **data_json, table_name="key"
|
request_type="key", **data_json, table_name="key"
|
||||||
)
|
)
|
||||||
|
@ -257,12 +266,11 @@ async def generate_key_fn(
|
||||||
|
|
||||||
return GenerateKeyResponse(**response)
|
return GenerateKeyResponse(**response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.error(
|
verbose_proxy_logger.exception(
|
||||||
"litellm.proxy.proxy_server.generate_key_fn(): Exception occured - {}".format(
|
"litellm.proxy.proxy_server.generate_key_fn(): Exception occured - {}".format(
|
||||||
str(e)
|
str(e)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
verbose_proxy_logger.debug(traceback.format_exc())
|
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
||||||
|
@ -731,6 +739,7 @@ async def generate_key_helper_fn(
|
||||||
str
|
str
|
||||||
] = None, # dev-friendly alt param for 'token'. Exposed on `/key/generate` for setting key value yourself.
|
] = None, # dev-friendly alt param for 'token'. Exposed on `/key/generate` for setting key value yourself.
|
||||||
user_id: Optional[str] = None,
|
user_id: Optional[str] = None,
|
||||||
|
user_alias: Optional[str] = None,
|
||||||
team_id: Optional[str] = None,
|
team_id: Optional[str] = None,
|
||||||
user_email: Optional[str] = None,
|
user_email: Optional[str] = None,
|
||||||
user_role: Optional[str] = None,
|
user_role: Optional[str] = None,
|
||||||
|
@ -816,6 +825,7 @@ async def generate_key_helper_fn(
|
||||||
"max_budget": max_budget,
|
"max_budget": max_budget,
|
||||||
"user_email": user_email,
|
"user_email": user_email,
|
||||||
"user_id": user_id,
|
"user_id": user_id,
|
||||||
|
"user_alias": user_alias,
|
||||||
"team_id": team_id,
|
"team_id": team_id,
|
||||||
"organization_id": organization_id,
|
"organization_id": organization_id,
|
||||||
"user_role": user_role,
|
"user_role": user_role,
|
||||||
|
|
|
@ -1221,11 +1221,37 @@ def test_completion_cost_anthropic_prompt_caching():
|
||||||
assert cost_1 > cost_2
|
assert cost_1 > cost_2
|
||||||
|
|
||||||
|
|
||||||
def test_completion_cost_databricks():
|
@pytest.mark.parametrize(
|
||||||
model, messages = "databricks/databricks-dbrx-instruct", [
|
"model",
|
||||||
{"role": "user", "content": "What is 2+2?"}
|
[
|
||||||
]
|
"databricks/databricks-meta-llama-3-1-70b-instruct",
|
||||||
|
"databricks/databricks-meta-llama-3-70b-instruct",
|
||||||
|
"databricks/databricks-dbrx-instruct",
|
||||||
|
"databricks/databricks-mixtral-8x7b-instruct",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_completion_cost_databricks(model):
|
||||||
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||||
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||||
|
model, messages = model, [{"role": "user", "content": "What is 2+2?"}]
|
||||||
|
|
||||||
resp = litellm.completion(model=model, messages=messages) # works fine
|
resp = litellm.completion(model=model, messages=messages) # works fine
|
||||||
|
|
||||||
|
print(resp)
|
||||||
|
cost = completion_cost(completion_response=resp)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"model",
|
||||||
|
[
|
||||||
|
"databricks/databricks-bge-large-en",
|
||||||
|
"databricks/databricks-gte-large-en",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_completion_cost_databricks_embedding(model):
|
||||||
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||||
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||||
|
resp = litellm.embedding(model=model, input=["hey, how's it going?"]) # works fine
|
||||||
|
|
||||||
|
print(resp)
|
||||||
cost = completion_cost(completion_response=resp)
|
cost = completion_cost(completion_response=resp)
|
||||||
|
|
|
@ -2762,6 +2762,7 @@ async def test_generate_key_with_model_tpm_limit(prisma_client):
|
||||||
"team": "litellm-team3",
|
"team": "litellm-team3",
|
||||||
"model_tpm_limit": {"gpt-4": 100},
|
"model_tpm_limit": {"gpt-4": 100},
|
||||||
"model_rpm_limit": {"gpt-4": 2},
|
"model_rpm_limit": {"gpt-4": 2},
|
||||||
|
"tags": None,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Update model tpm_limit and rpm_limit
|
# Update model tpm_limit and rpm_limit
|
||||||
|
@ -2782,6 +2783,7 @@ async def test_generate_key_with_model_tpm_limit(prisma_client):
|
||||||
"team": "litellm-team3",
|
"team": "litellm-team3",
|
||||||
"model_tpm_limit": {"gpt-4": 200},
|
"model_tpm_limit": {"gpt-4": 200},
|
||||||
"model_rpm_limit": {"gpt-4": 3},
|
"model_rpm_limit": {"gpt-4": 3},
|
||||||
|
"tags": None,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -2818,6 +2820,7 @@ async def test_generate_key_with_guardrails(prisma_client):
|
||||||
assert result["info"]["metadata"] == {
|
assert result["info"]["metadata"] == {
|
||||||
"team": "litellm-team3",
|
"team": "litellm-team3",
|
||||||
"guardrails": ["aporia-pre-call"],
|
"guardrails": ["aporia-pre-call"],
|
||||||
|
"tags": None,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Update model tpm_limit and rpm_limit
|
# Update model tpm_limit and rpm_limit
|
||||||
|
@ -2836,6 +2839,7 @@ async def test_generate_key_with_guardrails(prisma_client):
|
||||||
assert result["info"]["metadata"] == {
|
assert result["info"]["metadata"] == {
|
||||||
"team": "litellm-team3",
|
"team": "litellm-team3",
|
||||||
"guardrails": ["aporia-pre-call", "aporia-post-call"],
|
"guardrails": ["aporia-pre-call", "aporia-post-call"],
|
||||||
|
"tags": None,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -210,7 +210,6 @@ def test_stream_chunk_builder_litellm_mixed_calls():
|
||||||
|
|
||||||
assert len(response.choices[0].message.tool_calls) == 1
|
assert len(response.choices[0].message.tool_calls) == 1
|
||||||
assert response.choices[0].message.tool_calls[0].to_dict() == {
|
assert response.choices[0].message.tool_calls[0].to_dict() == {
|
||||||
"index": 1,
|
|
||||||
"function": {
|
"function": {
|
||||||
"arguments": '{"query": "SELECT COUNT(*) FROM users;"}',
|
"arguments": '{"query": "SELECT COUNT(*) FROM users;"}',
|
||||||
"name": "sql_query",
|
"name": "sql_query",
|
||||||
|
@ -226,3 +225,400 @@ def test_stream_chunk_builder_litellm_empty_chunks():
|
||||||
|
|
||||||
response = stream_chunk_builder(chunks=[])
|
response = stream_chunk_builder(chunks=[])
|
||||||
assert response is None
|
assert response is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_stream_chunk_builder_multiple_tool_calls():
|
||||||
|
init_chunks = [
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"id": "call_X9P9B6STj7ze8OsJCGkfoN94",
|
||||||
|
"function": {"arguments": "", "name": "exponentiate"},
|
||||||
|
"type": "function",
|
||||||
|
"index": 0,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1725932618,
|
||||||
|
"model": "gpt-4o-2024-08-06",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "fp_b2ffeb16ee",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {"arguments": '{"ba'},
|
||||||
|
"type": "function",
|
||||||
|
"index": 0,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1725932618,
|
||||||
|
"model": "gpt-4o-2024-08-06",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "fp_b2ffeb16ee",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {"arguments": 'se": '},
|
||||||
|
"type": "function",
|
||||||
|
"index": 0,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1725932618,
|
||||||
|
"model": "gpt-4o-2024-08-06",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "fp_b2ffeb16ee",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {"arguments": '3, "ex'},
|
||||||
|
"type": "function",
|
||||||
|
"index": 0,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1725932618,
|
||||||
|
"model": "gpt-4o-2024-08-06",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "fp_b2ffeb16ee",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {"arguments": "pone"},
|
||||||
|
"type": "function",
|
||||||
|
"index": 0,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1725932618,
|
||||||
|
"model": "gpt-4o-2024-08-06",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "fp_b2ffeb16ee",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {"arguments": 'nt": '},
|
||||||
|
"type": "function",
|
||||||
|
"index": 0,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1725932618,
|
||||||
|
"model": "gpt-4o-2024-08-06",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "fp_b2ffeb16ee",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {"arguments": "5}"},
|
||||||
|
"type": "function",
|
||||||
|
"index": 0,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1725932618,
|
||||||
|
"model": "gpt-4o-2024-08-06",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "fp_b2ffeb16ee",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"id": "call_Qq8yDeRx7v276abRcLrYORdW",
|
||||||
|
"function": {"arguments": "", "name": "add"},
|
||||||
|
"type": "function",
|
||||||
|
"index": 1,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1725932618,
|
||||||
|
"model": "gpt-4o-2024-08-06",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "fp_b2ffeb16ee",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {"arguments": '{"fi'},
|
||||||
|
"type": "function",
|
||||||
|
"index": 1,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1725932618,
|
||||||
|
"model": "gpt-4o-2024-08-06",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "fp_b2ffeb16ee",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {"arguments": "rst_i"},
|
||||||
|
"type": "function",
|
||||||
|
"index": 1,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1725932618,
|
||||||
|
"model": "gpt-4o-2024-08-06",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "fp_b2ffeb16ee",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {"arguments": 'nt": 1'},
|
||||||
|
"type": "function",
|
||||||
|
"index": 1,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1725932618,
|
||||||
|
"model": "gpt-4o-2024-08-06",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "fp_b2ffeb16ee",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {"arguments": '2, "'},
|
||||||
|
"type": "function",
|
||||||
|
"index": 1,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1725932618,
|
||||||
|
"model": "gpt-4o-2024-08-06",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "fp_b2ffeb16ee",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {"arguments": "secon"},
|
||||||
|
"type": "function",
|
||||||
|
"index": 1,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1725932618,
|
||||||
|
"model": "gpt-4o-2024-08-06",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "fp_b2ffeb16ee",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {"arguments": 'd_int"'},
|
||||||
|
"type": "function",
|
||||||
|
"index": 1,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1725932618,
|
||||||
|
"model": "gpt-4o-2024-08-06",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "fp_b2ffeb16ee",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {"arguments": ": 3}"},
|
||||||
|
"type": "function",
|
||||||
|
"index": 1,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1725932618,
|
||||||
|
"model": "gpt-4o-2024-08-06",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "fp_b2ffeb16ee",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
|
||||||
|
"choices": [{"finish_reason": "tool_calls", "index": 0, "delta": {}}],
|
||||||
|
"created": 1725932618,
|
||||||
|
"model": "gpt-4o-2024-08-06",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "fp_b2ffeb16ee",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
chunks = []
|
||||||
|
for chunk in init_chunks:
|
||||||
|
chunks.append(litellm.ModelResponse(**chunk, stream=True))
|
||||||
|
response = stream_chunk_builder(chunks=chunks)
|
||||||
|
|
||||||
|
print(f"Returned response: {response}")
|
||||||
|
completed_response = {
|
||||||
|
"id": "chatcmpl-A61mXjvcRX0Xr2IiojN9TPiy1P3Fm",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "tool_calls",
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"content": None,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": '{"base": 3, "exponent": 5}',
|
||||||
|
"name": "exponentiate",
|
||||||
|
},
|
||||||
|
"id": "call_X9P9B6STj7ze8OsJCGkfoN94",
|
||||||
|
"type": "function",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": '{"first_int": 12, "second_int": 3}',
|
||||||
|
"name": "add",
|
||||||
|
},
|
||||||
|
"id": "call_Qq8yDeRx7v276abRcLrYORdW",
|
||||||
|
"type": "function",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"function_call": None,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1726000181,
|
||||||
|
"model": "gpt-4o-2024-05-13",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"system_fingerprint": "fp_25624ae3a5",
|
||||||
|
"usage": {"completion_tokens": 55, "prompt_tokens": 127, "total_tokens": 182},
|
||||||
|
"service_tier": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
expected_response = litellm.ModelResponse(**completed_response)
|
||||||
|
|
||||||
|
print(f"\n\nexpected_response:\n{expected_response}\n\n")
|
||||||
|
assert (
|
||||||
|
expected_response.choices == response.choices
|
||||||
|
), "\nGot={}\n, Expected={}\n".format(response.choices, expected_response.choices)
|
||||||
|
|
|
@ -325,7 +325,7 @@ class Message(OpenAIObject):
|
||||||
):
|
):
|
||||||
init_values = {
|
init_values = {
|
||||||
"content": content,
|
"content": content,
|
||||||
"role": "assistant",
|
"role": role or "assistant", # handle null input
|
||||||
"function_call": (
|
"function_call": (
|
||||||
FunctionCall(**function_call) if function_call is not None else None
|
FunctionCall(**function_call) if function_call is not None else None
|
||||||
),
|
),
|
||||||
|
|
|
@ -5492,90 +5492,129 @@
|
||||||
"max_input_tokens": 128000,
|
"max_input_tokens": 128000,
|
||||||
"max_output_tokens": 128000,
|
"max_output_tokens": 128000,
|
||||||
"input_cost_per_token": 0.000005,
|
"input_cost_per_token": 0.000005,
|
||||||
"output_cost_per_token": 0.000015,
|
"input_dbu_cost_per_token": 0.000071429,
|
||||||
|
"output_cost_per_token": 0.00001500002,
|
||||||
|
"output_db_cost_per_token": 0.000214286,
|
||||||
"litellm_provider": "databricks",
|
"litellm_provider": "databricks",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
|
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||||
|
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
||||||
},
|
},
|
||||||
"databricks/databricks-meta-llama-3-1-70b-instruct": {
|
"databricks/databricks-meta-llama-3-1-70b-instruct": {
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
"max_input_tokens": 128000,
|
"max_input_tokens": 128000,
|
||||||
"max_output_tokens": 128000,
|
"max_output_tokens": 128000,
|
||||||
"input_cost_per_token": 0.000001,
|
"input_cost_per_token": 0.00000100002,
|
||||||
"output_cost_per_token": 0.000003,
|
"input_dbu_cost_per_token": 0.000014286,
|
||||||
|
"output_cost_per_token": 0.00000299999,
|
||||||
|
"output_dbu_cost_per_token": 0.000042857,
|
||||||
"litellm_provider": "databricks",
|
"litellm_provider": "databricks",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
|
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||||
|
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
||||||
},
|
},
|
||||||
"databricks/databricks-dbrx-instruct": {
|
"databricks/databricks-dbrx-instruct": {
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_input_tokens": 32768,
|
"max_input_tokens": 32768,
|
||||||
"max_output_tokens": 32768,
|
"max_output_tokens": 32768,
|
||||||
"input_cost_per_token": 0.00000075,
|
"input_cost_per_token": 0.00000074998,
|
||||||
"output_cost_per_token": 0.00000225,
|
"input_dbu_cost_per_token": 0.000010714,
|
||||||
|
"output_cost_per_token": 0.00000224901,
|
||||||
|
"output_dbu_cost_per_token": 0.000032143,
|
||||||
"litellm_provider": "databricks",
|
"litellm_provider": "databricks",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
|
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||||
|
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
||||||
},
|
},
|
||||||
"databricks/databricks-meta-llama-3-70b-instruct": {
|
"databricks/databricks-meta-llama-3-70b-instruct": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 128000,
|
||||||
"max_input_tokens": 8192,
|
"max_input_tokens": 128000,
|
||||||
"max_output_tokens": 8192,
|
"max_output_tokens": 128000,
|
||||||
"input_cost_per_token": 0.000001,
|
"input_cost_per_token": 0.00000100002,
|
||||||
"output_cost_per_token": 0.000003,
|
"input_dbu_cost_per_token": 0.000014286,
|
||||||
|
"output_cost_per_token": 0.00000299999,
|
||||||
|
"output_dbu_cost_per_token": 0.000042857,
|
||||||
"litellm_provider": "databricks",
|
"litellm_provider": "databricks",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
|
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||||
|
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
||||||
},
|
},
|
||||||
"databricks/databricks-llama-2-70b-chat": {
|
"databricks/databricks-llama-2-70b-chat": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"max_input_tokens": 4096,
|
"max_input_tokens": 4096,
|
||||||
"max_output_tokens": 4096,
|
"max_output_tokens": 4096,
|
||||||
"input_cost_per_token": 0.0000005,
|
"input_cost_per_token": 0.00000050001,
|
||||||
|
"input_dbu_cost_per_token": 0.000007143,
|
||||||
"output_cost_per_token": 0.0000015,
|
"output_cost_per_token": 0.0000015,
|
||||||
|
"output_dbu_cost_per_token": 0.000021429,
|
||||||
"litellm_provider": "databricks",
|
"litellm_provider": "databricks",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
|
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||||
|
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
||||||
},
|
},
|
||||||
"databricks/databricks-mixtral-8x7b-instruct": {
|
"databricks/databricks-mixtral-8x7b-instruct": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"max_input_tokens": 4096,
|
"max_input_tokens": 4096,
|
||||||
"max_output_tokens": 4096,
|
"max_output_tokens": 4096,
|
||||||
"input_cost_per_token": 0.0000005,
|
"input_cost_per_token": 0.00000050001,
|
||||||
"output_cost_per_token": 0.000001,
|
"input_dbu_cost_per_token": 0.000007143,
|
||||||
|
"output_cost_per_token": 0.00000099902,
|
||||||
|
"output_dbu_cost_per_token": 0.000014286,
|
||||||
"litellm_provider": "databricks",
|
"litellm_provider": "databricks",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
|
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||||
|
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
||||||
},
|
},
|
||||||
"databricks/databricks-mpt-30b-instruct": {
|
"databricks/databricks-mpt-30b-instruct": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 8192,
|
"max_input_tokens": 8192,
|
||||||
"max_output_tokens": 8192,
|
"max_output_tokens": 8192,
|
||||||
"input_cost_per_token": 0.000001,
|
"input_cost_per_token": 0.00000099902,
|
||||||
"output_cost_per_token": 0.000001,
|
"input_dbu_cost_per_token": 0.000014286,
|
||||||
|
"output_cost_per_token": 0.00000099902,
|
||||||
|
"output_dbu_cost_per_token": 0.000014286,
|
||||||
"litellm_provider": "databricks",
|
"litellm_provider": "databricks",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
|
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||||
|
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
||||||
},
|
},
|
||||||
"databricks/databricks-mpt-7b-instruct": {
|
"databricks/databricks-mpt-7b-instruct": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 8192,
|
"max_input_tokens": 8192,
|
||||||
"max_output_tokens": 8192,
|
"max_output_tokens": 8192,
|
||||||
"input_cost_per_token": 0.0000005,
|
"input_cost_per_token": 0.00000050001,
|
||||||
"output_cost_per_token": 0.0000005,
|
"input_dbu_cost_per_token": 0.000007143,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"output_dbu_cost_per_token": 0.0,
|
||||||
"litellm_provider": "databricks",
|
"litellm_provider": "databricks",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
|
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||||
|
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
||||||
},
|
},
|
||||||
"databricks/databricks-bge-large-en": {
|
"databricks/databricks-bge-large-en": {
|
||||||
"max_tokens": 512,
|
"max_tokens": 512,
|
||||||
"max_input_tokens": 512,
|
"max_input_tokens": 512,
|
||||||
"output_vector_size": 1024,
|
"output_vector_size": 1024,
|
||||||
"input_cost_per_token": 0.0000001,
|
"input_cost_per_token": 0.00000010003,
|
||||||
|
"input_dbu_cost_per_token": 0.000001429,
|
||||||
"output_cost_per_token": 0.0,
|
"output_cost_per_token": 0.0,
|
||||||
|
"output_dbu_cost_per_token": 0.0,
|
||||||
"litellm_provider": "databricks",
|
"litellm_provider": "databricks",
|
||||||
"mode": "embedding",
|
"mode": "embedding",
|
||||||
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
|
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||||
|
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
||||||
|
},
|
||||||
|
"databricks/databricks-gte-large-en": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 8192,
|
||||||
|
"output_vector_size": 1024,
|
||||||
|
"input_cost_per_token": 0.00000012999,
|
||||||
|
"input_dbu_cost_per_token": 0.000001857,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"output_dbu_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "databricks",
|
||||||
|
"mode": "embedding",
|
||||||
|
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
|
||||||
|
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue