From f681f0f2b26ceea97f1d2fd9267d4ee92dc000e0 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Fri, 5 Jan 2024 13:11:23 +0530 Subject: [PATCH] (feat) completion_cost - embeddings + raise Exception --- litellm/__init__.py | 7 ++++- litellm/tests/test_embedding.py | 7 ++++- litellm/utils.py | 47 ++++++++++++++++++++++----------- 3 files changed, 43 insertions(+), 18 deletions(-) diff --git a/litellm/__init__.py b/litellm/__init__.py index 8668fe850..f848dd324 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -338,7 +338,8 @@ baseten_models: List = [ ] # FALCON 7B # WizardLM # Mosaic ML -# used for token counting +# used for Cost Tracking & Token counting +# https://azure.microsoft.com/en-in/pricing/details/cognitive-services/openai-service/ # Azure returns gpt-35-turbo in their responses, we need to map this to azure/gpt-3.5-turbo for token counting azure_llms = { "gpt-35-turbo": "azure/gpt-35-turbo", @@ -346,6 +347,10 @@ azure_llms = { "gpt-35-turbo-instruct": "azure/gpt-35-turbo-instruct", } +azure_embedding_models = { + "ada": "azure/ada", +} + petals_models = [ "petals-team/StableBeluga2", ] diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py index 2a86f79d7..ae59424f6 100644 --- a/litellm/tests/test_embedding.py +++ b/litellm/tests/test_embedding.py @@ -59,6 +59,7 @@ def test_openai_embedding(): def test_openai_azure_embedding_simple(): try: + litellm.set_verbose = True response = embedding( model="azure/azure-embedding-model", input=["good morning from litellm"], @@ -70,11 +71,15 @@ def test_openai_azure_embedding_simple(): response_keys ) # assert litellm response has expected keys from OpenAI embedding response + request_cost = litellm.completion_cost(completion_response=response) + + print("Calculated request cost=", request_cost) + except Exception as e: pytest.fail(f"Error occurred: {e}") -# test_openai_azure_embedding_simple() +test_openai_azure_embedding_simple() def test_openai_azure_embedding_timeouts(): diff --git a/litellm/utils.py b/litellm/utils.py index 42c9b4157..3f3978dd2 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -2740,6 +2740,8 @@ def cost_per_token(model="", prompt_tokens=0, completion_tokens=0): completion_tokens_cost_usd_dollar = 0 model_cost_ref = litellm.model_cost # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models + print_verbose(f"Looking up model={model} in model_cost_map") + if model in model_cost_ref: prompt_tokens_cost_usd_dollar = ( model_cost_ref[model]["input_cost_per_token"] * prompt_tokens @@ -2749,6 +2751,7 @@ def cost_per_token(model="", prompt_tokens=0, completion_tokens=0): ) return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar elif "ft:gpt-3.5-turbo" in model: + print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM") # fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm prompt_tokens_cost_usd_dollar = ( model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens @@ -2759,6 +2762,7 @@ def cost_per_token(model="", prompt_tokens=0, completion_tokens=0): ) return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar elif model in litellm.azure_llms: + print_verbose(f"Cost Tracking: {model} is an Azure LLM") model = litellm.azure_llms[model] prompt_tokens_cost_usd_dollar = ( model_cost_ref[model]["input_cost_per_token"] * prompt_tokens @@ -2767,19 +2771,29 @@ def cost_per_token(model="", prompt_tokens=0, completion_tokens=0): model_cost_ref[model]["output_cost_per_token"] * completion_tokens ) return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar - else: - # calculate average input cost, azure/gpt-deployments can potentially go here if users don't specify, gpt-4, gpt-3.5-turbo. LLMs litellm knows - input_cost_sum = 0 - output_cost_sum = 0 - model_cost_ref = litellm.model_cost - for model in model_cost_ref: - input_cost_sum += model_cost_ref[model]["input_cost_per_token"] - output_cost_sum += model_cost_ref[model]["output_cost_per_token"] - avg_input_cost = input_cost_sum / len(model_cost_ref.keys()) - avg_output_cost = output_cost_sum / len(model_cost_ref.keys()) - prompt_tokens_cost_usd_dollar = avg_input_cost * prompt_tokens - completion_tokens_cost_usd_dollar = avg_output_cost * completion_tokens + elif model in litellm.azure_embedding_models: + print_verbose(f"Cost Tracking: {model} is an Azure Embedding Model") + model = litellm.azure_embedding_models[model] + prompt_tokens_cost_usd_dollar = ( + model_cost_ref[model]["input_cost_per_token"] * prompt_tokens + ) + completion_tokens_cost_usd_dollar = ( + model_cost_ref[model]["output_cost_per_token"] * completion_tokens + ) return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + else: + # if model is not in model_prices_and_context_window.json. Raise an exception-let users know + error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}\n" + raise litellm.exceptions.NotFoundError( # type: ignore + message=error_str, + model=model, + response=httpx.Response( + status_code=404, + content=error_str, + request=httpx.request(method="cost_per_token", url="https://github.com/BerriAI/litellm"), # type: ignore + ), + llm_provider="", + ) def completion_cost( @@ -2821,8 +2835,10 @@ def completion_cost( completion_tokens = 0 if completion_response is not None: # get input/output tokens from completion_response - prompt_tokens = completion_response["usage"]["prompt_tokens"] - completion_tokens = completion_response["usage"]["completion_tokens"] + prompt_tokens = completion_response.get("usage", {}).get("prompt_tokens", 0) + completion_tokens = completion_response.get("usage", {}).get( + "completion_tokens", 0 + ) model = ( model or completion_response["model"] ) # check if user passed an override for model, if it's none check completion_response['model'] @@ -2852,8 +2868,7 @@ def completion_cost( ) return prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar except Exception as e: - print_verbose(f"LiteLLM: Excepton when cost calculating {str(e)}") - return 0.0 # this should not block a users execution path + raise e ####### HELPER FUNCTIONS ################