diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index bcec062de1..598de09be3 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -829,18 +829,11 @@ def response_cost_calculator( ) return None except Exception as e: - if litellm.suppress_debug_info: # allow cli tools to suppress this information. - verbose_logger.debug( - "litellm.cost_calculator.py::response_cost_calculator - Returning None. Exception occurred - {}/n{}".format( - str(e), traceback.format_exc() - ) - ) - else: - verbose_logger.warning( - "litellm.cost_calculator.py::response_cost_calculator - Returning None. Exception occurred - {}/n{}".format( - str(e), traceback.format_exc() - ) + verbose_logger.debug( + "litellm.cost_calculator.py::response_cost_calculator - Returning None. Exception occurred - {}/n{}".format( + str(e), traceback.format_exc() ) + ) return None diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py index 42e289b871..3d757fa3dc 100644 --- a/litellm/integrations/slack_alerting.py +++ b/litellm/integrations/slack_alerting.py @@ -269,10 +269,11 @@ class SlackAlerting(CustomLogger): break await asyncio.sleep(3) # wait 3s before retrying for trace id - if litellm.litellm_core_utils.litellm_logging.langFuseLogger is not None: - base_url = ( - litellm.litellm_core_utils.litellm_logging.langFuseLogger.Langfuse.base_url - ) + _langfuse_object = litellm_logging_obj._get_callback_object( + service_name="langfuse" + ) + if _langfuse_object is not None: + base_url = _langfuse_object.Langfuse.base_url return f"{base_url}/trace/{trace_id}" return None diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 43273224cb..0d3e59db5d 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -924,6 +924,7 @@ class Logging: else: print_verbose("reaches langfuse for streaming logging!") result = kwargs["complete_streaming_response"] + temp_langfuse_logger = langFuseLogger if langFuseLogger is None or ( ( self.langfuse_public_key is not None @@ -940,12 +941,12 @@ class Logging: and self.langfuse_host != langFuseLogger.langfuse_host ) ): - langFuseLogger = LangFuseLogger( + temp_langfuse_logger = LangFuseLogger( langfuse_public_key=self.langfuse_public_key, langfuse_secret=self.langfuse_secret, langfuse_host=self.langfuse_host, ) - _response = langFuseLogger.log_event( + _response = temp_langfuse_logger.log_event( kwargs=kwargs, response_obj=result, start_time=start_time, @@ -1925,6 +1926,38 @@ class Logging: return trace_id + def _get_callback_object(self, service_name: Literal["langfuse"]) -> Optional[Any]: + """ + Return dynamic callback object. + + Meant to solve issue when doing key-based/team-based logging + """ + global langFuseLogger + + if service_name == "langfuse": + if langFuseLogger is None or ( + ( + self.langfuse_public_key is not None + and self.langfuse_public_key != langFuseLogger.public_key + ) + or ( + self.langfuse_public_key is not None + and self.langfuse_public_key != langFuseLogger.public_key + ) + or ( + self.langfuse_host is not None + and self.langfuse_host != langFuseLogger.langfuse_host + ) + ): + return LangFuseLogger( + langfuse_public_key=self.langfuse_public_key, + langfuse_secret=self.langfuse_secret, + langfuse_host=self.langfuse_host, + ) + return langFuseLogger + + return None + def set_callbacks(callback_list, function_id=None): """ diff --git a/litellm/llms/databricks/cost_calculator.py b/litellm/llms/databricks/cost_calculator.py index 3d40f2aa62..695a6055f8 100644 --- a/litellm/llms/databricks/cost_calculator.py +++ b/litellm/llms/databricks/cost_calculator.py @@ -25,7 +25,30 @@ def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]: "dbrx-instruct" ): base_model = "databricks-dbrx-instruct" - + elif model.startswith("databricks/meta-llama-3.1-70b-instruct") or model.startswith( + "meta-llama-3.1-70b-instruct" + ): + base_model = "databricks-meta-llama-3-1-70b-instruct" + elif model.startswith( + "databricks/meta-llama-3.1-405b-instruct" + ) or model.startswith("meta-llama-3.1-405b-instruct"): + base_model = "databricks-meta-llama-3-1-405b-instruct" + elif model.startswith("databricks/mixtral-8x7b-instruct-v0.1") or model.startswith( + "mixtral-8x7b-instruct-v0.1" + ): + base_model = "databricks-mixtral-8x7b-instruct" + elif model.startswith("databricks/mixtral-8x7b-instruct-v0.1") or model.startswith( + "mixtral-8x7b-instruct-v0.1" + ): + base_model = "databricks-mixtral-8x7b-instruct" + elif model.startswith("databricks/bge-large-en") or model.startswith( + "bge-large-en" + ): + base_model = "databricks-bge-large-en" + elif model.startswith("databricks/gte-large-en") or model.startswith( + "gte-large-en" + ): + base_model = "databricks-gte-large-en" ## GET MODEL INFO model_info = get_model_info(model=base_model, custom_llm_provider="databricks") diff --git a/litellm/main.py b/litellm/main.py index 1d20cf4240..8df5d604d8 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -5310,7 +5310,7 @@ def stream_chunk_builder( ] if len(tool_call_chunks) > 0: - argument_list = [] + argument_list: List = [] delta = tool_call_chunks[0]["choices"][0]["delta"] message = response["choices"][0]["message"] message["tool_calls"] = [] @@ -5319,6 +5319,7 @@ def stream_chunk_builder( type = None tool_calls_list = [] prev_index = None + prev_name = None prev_id = None curr_id = None curr_index = 0 @@ -5346,27 +5347,32 @@ def stream_chunk_builder( type = tool_calls[0].type if prev_index is None: prev_index = curr_index + if prev_name is None: + prev_name = name if curr_index != prev_index: # new tool call combined_arguments = "".join(argument_list) tool_calls_list.append( { "id": prev_id, - "index": prev_index, - "function": {"arguments": combined_arguments, "name": name}, + "function": { + "arguments": combined_arguments, + "name": prev_name, + }, "type": type, } ) argument_list = [] # reset prev_index = curr_index prev_id = curr_id + prev_name = name combined_arguments = ( "".join(argument_list) or "{}" ) # base case, return empty dict + tool_calls_list.append( { "id": id, - "index": curr_index, "function": {"arguments": combined_arguments, "name": name}, "type": type, } @@ -5422,7 +5428,7 @@ def stream_chunk_builder( for choice in choices: delta = choice.get("delta", {}) content = delta.get("content", "") - if content == None: + if content is None: continue # openai v1.0.0 sets content = None for chunks content_list.append(content) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 3bb65a666a..61aa5183e6 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -5461,90 +5461,129 @@ "max_input_tokens": 128000, "max_output_tokens": 128000, "input_cost_per_token": 0.000005, - "output_cost_per_token": 0.000015, + "input_dbu_cost_per_token": 0.000071429, + "output_cost_per_token": 0.00001500002, + "output_db_cost_per_token": 0.000214286, "litellm_provider": "databricks", "mode": "chat", - "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} }, "databricks/databricks-meta-llama-3-1-70b-instruct": { "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.000001, - "output_cost_per_token": 0.000003, + "input_cost_per_token": 0.00000100002, + "input_dbu_cost_per_token": 0.000014286, + "output_cost_per_token": 0.00000299999, + "output_dbu_cost_per_token": 0.000042857, "litellm_provider": "databricks", "mode": "chat", - "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} }, "databricks/databricks-dbrx-instruct": { "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 0.00000075, - "output_cost_per_token": 0.00000225, + "input_cost_per_token": 0.00000074998, + "input_dbu_cost_per_token": 0.000010714, + "output_cost_per_token": 0.00000224901, + "output_dbu_cost_per_token": 0.000032143, "litellm_provider": "databricks", "mode": "chat", - "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} }, "databricks/databricks-meta-llama-3-70b-instruct": { - "max_tokens": 8192, - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "input_cost_per_token": 0.000001, - "output_cost_per_token": 0.000003, + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00000100002, + "input_dbu_cost_per_token": 0.000014286, + "output_cost_per_token": 0.00000299999, + "output_dbu_cost_per_token": 0.000042857, "litellm_provider": "databricks", "mode": "chat", - "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} }, "databricks/databricks-llama-2-70b-chat": { "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 0.0000005, + "input_cost_per_token": 0.00000050001, + "input_dbu_cost_per_token": 0.000007143, "output_cost_per_token": 0.0000015, + "output_dbu_cost_per_token": 0.000021429, "litellm_provider": "databricks", "mode": "chat", - "source": "https://www.databricks.com/product/pricing/foundation-model-serving" - + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} }, "databricks/databricks-mixtral-8x7b-instruct": { "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 0.0000005, - "output_cost_per_token": 0.000001, + "input_cost_per_token": 0.00000050001, + "input_dbu_cost_per_token": 0.000007143, + "output_cost_per_token": 0.00000099902, + "output_dbu_cost_per_token": 0.000014286, "litellm_provider": "databricks", "mode": "chat", - "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} }, "databricks/databricks-mpt-30b-instruct": { "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.000001, - "output_cost_per_token": 0.000001, + "input_cost_per_token": 0.00000099902, + "input_dbu_cost_per_token": 0.000014286, + "output_cost_per_token": 0.00000099902, + "output_dbu_cost_per_token": 0.000014286, "litellm_provider": "databricks", "mode": "chat", - "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} }, "databricks/databricks-mpt-7b-instruct": { "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.0000005, - "output_cost_per_token": 0.0000005, + "input_cost_per_token": 0.00000050001, + "input_dbu_cost_per_token": 0.000007143, + "output_cost_per_token": 0.0, + "output_dbu_cost_per_token": 0.0, "litellm_provider": "databricks", "mode": "chat", - "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} }, "databricks/databricks-bge-large-en": { "max_tokens": 512, "max_input_tokens": 512, "output_vector_size": 1024, - "input_cost_per_token": 0.0000001, + "input_cost_per_token": 0.00000010003, + "input_dbu_cost_per_token": 0.000001429, "output_cost_per_token": 0.0, + "output_dbu_cost_per_token": 0.0, "litellm_provider": "databricks", "mode": "embedding", - "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} + }, + "databricks/databricks-gte-large-en": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "output_vector_size": 1024, + "input_cost_per_token": 0.00000012999, + "input_dbu_cost_per_token": 0.000001857, + "output_cost_per_token": 0.0, + "output_dbu_cost_per_token": 0.0, + "litellm_provider": "databricks", + "mode": "embedding", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} } } \ No newline at end of file diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index bf86da1e12..9a3ce96921 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,9 +1,9 @@ model_list: - - model_name: "gpt-turbo" + - model_name: "gpt-4o" litellm_params: - model: azure/chatgpt-v-2 - api_key: os.environ/AZURE_API_KEY - api_base: os.environ/AZURE_API_BASE + model: gpt-4o -router_settings: - model_group_alias: {"gpt-4": "gpt-turbo"} \ No newline at end of file +litellm_settings: + cache: true + cache_params: + type: local \ No newline at end of file diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index 3559a4792f..da98b6a101 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -600,7 +600,7 @@ class GenerateRequestBase(LiteLLMBase): soft_budget: Optional[float] = None -class GenerateKeyRequest(GenerateRequestBase): +class _GenerateKeyRequest(GenerateRequestBase): key_alias: Optional[str] = None key: Optional[str] = None duration: Optional[str] = None @@ -618,7 +618,11 @@ class GenerateKeyRequest(GenerateRequestBase): guardrails: Optional[List[str]] = None -class GenerateKeyResponse(GenerateKeyRequest): +class GenerateKeyRequest(_GenerateKeyRequest): + tags: Optional[List[str]] = None + + +class GenerateKeyResponse(_GenerateKeyRequest): key: str key_name: Optional[str] = None expires: Optional[datetime] @@ -677,9 +681,10 @@ class LiteLLM_ModelTable(LiteLLMBase): model_config = ConfigDict(protected_namespaces=()) -class NewUserRequest(GenerateKeyRequest): +class NewUserRequest(_GenerateKeyRequest): max_budget: Optional[float] = None user_email: Optional[str] = None + user_alias: Optional[str] = None user_role: Optional[ Literal[ LitellmUserRoles.PROXY_ADMIN, @@ -713,6 +718,7 @@ class NewUserResponse(GenerateKeyResponse): ] = None teams: Optional[list] = None organization_id: Optional[str] = None + user_alias: Optional[str] = None class UpdateUserRequest(GenerateRequestBase): diff --git a/litellm/proxy/management_endpoints/internal_user_endpoints.py b/litellm/proxy/management_endpoints/internal_user_endpoints.py index 8774136272..859c8aeb84 100644 --- a/litellm/proxy/management_endpoints/internal_user_endpoints.py +++ b/litellm/proxy/management_endpoints/internal_user_endpoints.py @@ -156,6 +156,7 @@ async def new_user( user_id=response["user_id"], user_role=response.get("user_role", None), user_email=response.get("user_email", None), + user_alias=response.get("user_alias", None), teams=response.get("teams", None), team_id=response.get("team_id", None), metadata=response.get("metadata", None), diff --git a/litellm/proxy/management_endpoints/key_management_endpoints.py b/litellm/proxy/management_endpoints/key_management_endpoints.py index 3173e38864..90e7728d0a 100644 --- a/litellm/proxy/management_endpoints/key_management_endpoints.py +++ b/litellm/proxy/management_endpoints/key_management_endpoints.py @@ -202,6 +202,15 @@ async def generate_key_fn( if "budget_duration" in data_json: data_json["key_budget_duration"] = data_json.pop("budget_duration", None) + # Set tags on the new key + if "tags" in data_json: + if data_json["metadata"] is None: + data_json["metadata"] = {"tags": data_json["tags"]} + else: + data_json["metadata"]["tags"] = data_json["tags"] + + data_json.pop("tags") + response = await generate_key_helper_fn( request_type="key", **data_json, table_name="key" ) @@ -257,12 +266,11 @@ async def generate_key_fn( return GenerateKeyResponse(**response) except Exception as e: - verbose_proxy_logger.error( + verbose_proxy_logger.exception( "litellm.proxy.proxy_server.generate_key_fn(): Exception occured - {}".format( str(e) ) ) - verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "detail", f"Authentication Error({str(e)})"), @@ -731,6 +739,7 @@ async def generate_key_helper_fn( str ] = None, # dev-friendly alt param for 'token'. Exposed on `/key/generate` for setting key value yourself. user_id: Optional[str] = None, + user_alias: Optional[str] = None, team_id: Optional[str] = None, user_email: Optional[str] = None, user_role: Optional[str] = None, @@ -816,6 +825,7 @@ async def generate_key_helper_fn( "max_budget": max_budget, "user_email": user_email, "user_id": user_id, + "user_alias": user_alias, "team_id": team_id, "organization_id": organization_id, "user_role": user_role, diff --git a/litellm/tests/test_completion_cost.py b/litellm/tests/test_completion_cost.py index ed9eebedb1..835cced457 100644 --- a/litellm/tests/test_completion_cost.py +++ b/litellm/tests/test_completion_cost.py @@ -1221,11 +1221,37 @@ def test_completion_cost_anthropic_prompt_caching(): assert cost_1 > cost_2 -def test_completion_cost_databricks(): - model, messages = "databricks/databricks-dbrx-instruct", [ - {"role": "user", "content": "What is 2+2?"} - ] +@pytest.mark.parametrize( + "model", + [ + "databricks/databricks-meta-llama-3-1-70b-instruct", + "databricks/databricks-meta-llama-3-70b-instruct", + "databricks/databricks-dbrx-instruct", + "databricks/databricks-mixtral-8x7b-instruct", + ], +) +def test_completion_cost_databricks(model): + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + model, messages = model, [{"role": "user", "content": "What is 2+2?"}] resp = litellm.completion(model=model, messages=messages) # works fine + print(resp) + cost = completion_cost(completion_response=resp) + + +@pytest.mark.parametrize( + "model", + [ + "databricks/databricks-bge-large-en", + "databricks/databricks-gte-large-en", + ], +) +def test_completion_cost_databricks_embedding(model): + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + resp = litellm.embedding(model=model, input=["hey, how's it going?"]) # works fine + + print(resp) cost = completion_cost(completion_response=resp) diff --git a/litellm/tests/test_key_generate_prisma.py b/litellm/tests/test_key_generate_prisma.py index 995d5c0f75..09f12442a0 100644 --- a/litellm/tests/test_key_generate_prisma.py +++ b/litellm/tests/test_key_generate_prisma.py @@ -2762,6 +2762,7 @@ async def test_generate_key_with_model_tpm_limit(prisma_client): "team": "litellm-team3", "model_tpm_limit": {"gpt-4": 100}, "model_rpm_limit": {"gpt-4": 2}, + "tags": None, } # Update model tpm_limit and rpm_limit @@ -2782,6 +2783,7 @@ async def test_generate_key_with_model_tpm_limit(prisma_client): "team": "litellm-team3", "model_tpm_limit": {"gpt-4": 200}, "model_rpm_limit": {"gpt-4": 3}, + "tags": None, } @@ -2818,6 +2820,7 @@ async def test_generate_key_with_guardrails(prisma_client): assert result["info"]["metadata"] == { "team": "litellm-team3", "guardrails": ["aporia-pre-call"], + "tags": None, } # Update model tpm_limit and rpm_limit @@ -2836,6 +2839,7 @@ async def test_generate_key_with_guardrails(prisma_client): assert result["info"]["metadata"] == { "team": "litellm-team3", "guardrails": ["aporia-pre-call", "aporia-post-call"], + "tags": None, } diff --git a/litellm/tests/test_stream_chunk_builder.py b/litellm/tests/test_stream_chunk_builder.py index 100cf4ece9..477f28f2ed 100644 --- a/litellm/tests/test_stream_chunk_builder.py +++ b/litellm/tests/test_stream_chunk_builder.py @@ -210,7 +210,6 @@ def test_stream_chunk_builder_litellm_mixed_calls(): assert len(response.choices[0].message.tool_calls) == 1 assert response.choices[0].message.tool_calls[0].to_dict() == { - "index": 1, "function": { "arguments": '{"query": "SELECT COUNT(*) FROM users;"}', "name": "sql_query", @@ -226,3 +225,400 @@ def test_stream_chunk_builder_litellm_empty_chunks(): response = stream_chunk_builder(chunks=[]) assert response is None + + +def test_stream_chunk_builder_multiple_tool_calls(): + init_chunks = [ + { + "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "tool_calls": [ + { + "id": "call_X9P9B6STj7ze8OsJCGkfoN94", + "function": {"arguments": "", "name": "exponentiate"}, + "type": "function", + "index": 0, + } + ], + }, + } + ], + "created": 1725932618, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_b2ffeb16ee", + }, + { + "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "tool_calls": [ + { + "function": {"arguments": '{"ba'}, + "type": "function", + "index": 0, + } + ], + }, + } + ], + "created": 1725932618, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_b2ffeb16ee", + }, + { + "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "tool_calls": [ + { + "function": {"arguments": 'se": '}, + "type": "function", + "index": 0, + } + ], + }, + } + ], + "created": 1725932618, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_b2ffeb16ee", + }, + { + "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "tool_calls": [ + { + "function": {"arguments": '3, "ex'}, + "type": "function", + "index": 0, + } + ], + }, + } + ], + "created": 1725932618, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_b2ffeb16ee", + }, + { + "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "tool_calls": [ + { + "function": {"arguments": "pone"}, + "type": "function", + "index": 0, + } + ], + }, + } + ], + "created": 1725932618, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_b2ffeb16ee", + }, + { + "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "tool_calls": [ + { + "function": {"arguments": 'nt": '}, + "type": "function", + "index": 0, + } + ], + }, + } + ], + "created": 1725932618, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_b2ffeb16ee", + }, + { + "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "tool_calls": [ + { + "function": {"arguments": "5}"}, + "type": "function", + "index": 0, + } + ], + }, + } + ], + "created": 1725932618, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_b2ffeb16ee", + }, + { + "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "tool_calls": [ + { + "id": "call_Qq8yDeRx7v276abRcLrYORdW", + "function": {"arguments": "", "name": "add"}, + "type": "function", + "index": 1, + } + ], + }, + } + ], + "created": 1725932618, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_b2ffeb16ee", + }, + { + "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "tool_calls": [ + { + "function": {"arguments": '{"fi'}, + "type": "function", + "index": 1, + } + ], + }, + } + ], + "created": 1725932618, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_b2ffeb16ee", + }, + { + "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "tool_calls": [ + { + "function": {"arguments": "rst_i"}, + "type": "function", + "index": 1, + } + ], + }, + } + ], + "created": 1725932618, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_b2ffeb16ee", + }, + { + "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "tool_calls": [ + { + "function": {"arguments": 'nt": 1'}, + "type": "function", + "index": 1, + } + ], + }, + } + ], + "created": 1725932618, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_b2ffeb16ee", + }, + { + "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "tool_calls": [ + { + "function": {"arguments": '2, "'}, + "type": "function", + "index": 1, + } + ], + }, + } + ], + "created": 1725932618, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_b2ffeb16ee", + }, + { + "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "tool_calls": [ + { + "function": {"arguments": "secon"}, + "type": "function", + "index": 1, + } + ], + }, + } + ], + "created": 1725932618, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_b2ffeb16ee", + }, + { + "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "tool_calls": [ + { + "function": {"arguments": 'd_int"'}, + "type": "function", + "index": 1, + } + ], + }, + } + ], + "created": 1725932618, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_b2ffeb16ee", + }, + { + "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "tool_calls": [ + { + "function": {"arguments": ": 3}"}, + "type": "function", + "index": 1, + } + ], + }, + } + ], + "created": 1725932618, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_b2ffeb16ee", + }, + { + "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt", + "choices": [{"finish_reason": "tool_calls", "index": 0, "delta": {}}], + "created": 1725932618, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_b2ffeb16ee", + }, + ] + + chunks = [] + for chunk in init_chunks: + chunks.append(litellm.ModelResponse(**chunk, stream=True)) + response = stream_chunk_builder(chunks=chunks) + + print(f"Returned response: {response}") + completed_response = { + "id": "chatcmpl-A61mXjvcRX0Xr2IiojN9TPiy1P3Fm", + "choices": [ + { + "finish_reason": "tool_calls", + "index": 0, + "message": { + "content": None, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": '{"base": 3, "exponent": 5}', + "name": "exponentiate", + }, + "id": "call_X9P9B6STj7ze8OsJCGkfoN94", + "type": "function", + }, + { + "function": { + "arguments": '{"first_int": 12, "second_int": 3}', + "name": "add", + }, + "id": "call_Qq8yDeRx7v276abRcLrYORdW", + "type": "function", + }, + ], + "function_call": None, + }, + } + ], + "created": 1726000181, + "model": "gpt-4o-2024-05-13", + "object": "chat.completion", + "system_fingerprint": "fp_25624ae3a5", + "usage": {"completion_tokens": 55, "prompt_tokens": 127, "total_tokens": 182}, + "service_tier": None, + } + + expected_response = litellm.ModelResponse(**completed_response) + + print(f"\n\nexpected_response:\n{expected_response}\n\n") + assert ( + expected_response.choices == response.choices + ), "\nGot={}\n, Expected={}\n".format(response.choices, expected_response.choices) diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 96c25e5b51..c2a708663b 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -325,7 +325,7 @@ class Message(OpenAIObject): ): init_values = { "content": content, - "role": "assistant", + "role": role or "assistant", # handle null input "function_call": ( FunctionCall(**function_call) if function_call is not None else None ), diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 7883ddc81a..37ac239664 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -5492,90 +5492,129 @@ "max_input_tokens": 128000, "max_output_tokens": 128000, "input_cost_per_token": 0.000005, - "output_cost_per_token": 0.000015, + "input_dbu_cost_per_token": 0.000071429, + "output_cost_per_token": 0.00001500002, + "output_db_cost_per_token": 0.000214286, "litellm_provider": "databricks", "mode": "chat", - "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} }, "databricks/databricks-meta-llama-3-1-70b-instruct": { "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 0.000001, - "output_cost_per_token": 0.000003, + "input_cost_per_token": 0.00000100002, + "input_dbu_cost_per_token": 0.000014286, + "output_cost_per_token": 0.00000299999, + "output_dbu_cost_per_token": 0.000042857, "litellm_provider": "databricks", "mode": "chat", - "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} }, "databricks/databricks-dbrx-instruct": { "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 0.00000075, - "output_cost_per_token": 0.00000225, + "input_cost_per_token": 0.00000074998, + "input_dbu_cost_per_token": 0.000010714, + "output_cost_per_token": 0.00000224901, + "output_dbu_cost_per_token": 0.000032143, "litellm_provider": "databricks", "mode": "chat", - "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} }, "databricks/databricks-meta-llama-3-70b-instruct": { - "max_tokens": 8192, - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "input_cost_per_token": 0.000001, - "output_cost_per_token": 0.000003, + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00000100002, + "input_dbu_cost_per_token": 0.000014286, + "output_cost_per_token": 0.00000299999, + "output_dbu_cost_per_token": 0.000042857, "litellm_provider": "databricks", "mode": "chat", - "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} }, "databricks/databricks-llama-2-70b-chat": { "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 0.0000005, + "input_cost_per_token": 0.00000050001, + "input_dbu_cost_per_token": 0.000007143, "output_cost_per_token": 0.0000015, + "output_dbu_cost_per_token": 0.000021429, "litellm_provider": "databricks", "mode": "chat", - "source": "https://www.databricks.com/product/pricing/foundation-model-serving" - + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} }, "databricks/databricks-mixtral-8x7b-instruct": { "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 0.0000005, - "output_cost_per_token": 0.000001, + "input_cost_per_token": 0.00000050001, + "input_dbu_cost_per_token": 0.000007143, + "output_cost_per_token": 0.00000099902, + "output_dbu_cost_per_token": 0.000014286, "litellm_provider": "databricks", "mode": "chat", - "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} }, "databricks/databricks-mpt-30b-instruct": { "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.000001, - "output_cost_per_token": 0.000001, + "input_cost_per_token": 0.00000099902, + "input_dbu_cost_per_token": 0.000014286, + "output_cost_per_token": 0.00000099902, + "output_dbu_cost_per_token": 0.000014286, "litellm_provider": "databricks", "mode": "chat", - "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} }, "databricks/databricks-mpt-7b-instruct": { "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.0000005, - "output_cost_per_token": 0.0000005, + "input_cost_per_token": 0.00000050001, + "input_dbu_cost_per_token": 0.000007143, + "output_cost_per_token": 0.0, + "output_dbu_cost_per_token": 0.0, "litellm_provider": "databricks", "mode": "chat", - "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} }, "databricks/databricks-bge-large-en": { "max_tokens": 512, "max_input_tokens": 512, "output_vector_size": 1024, - "input_cost_per_token": 0.0000001, + "input_cost_per_token": 0.00000010003, + "input_dbu_cost_per_token": 0.000001429, "output_cost_per_token": 0.0, + "output_dbu_cost_per_token": 0.0, "litellm_provider": "databricks", "mode": "embedding", - "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} + }, + "databricks/databricks-gte-large-en": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "output_vector_size": 1024, + "input_cost_per_token": 0.00000012999, + "input_dbu_cost_per_token": 0.000001857, + "output_cost_per_token": 0.0, + "output_dbu_cost_per_token": 0.0, + "litellm_provider": "databricks", + "mode": "embedding", + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."} } }