mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
add api manager
This commit is contained in:
parent
15c40625c6
commit
a39756bfda
10 changed files with 110 additions and 19 deletions
|
@ -35,6 +35,11 @@ caching = False # deprecated son
|
|||
caching_with_models = False # if you want the caching key to be model + prompt # deprecated soon
|
||||
cache: Optional[Cache] = None # cache object
|
||||
model_alias_map: Dict[str, str] = {}
|
||||
|
||||
####### APIManager ###################
|
||||
from .apimanager import APIManager
|
||||
apiManager = APIManager()
|
||||
|
||||
def get_model_cost_map():
|
||||
url = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
|
||||
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
25
litellm/apimanager.py
Normal file
25
litellm/apimanager.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
import litellm
|
||||
from litellm.utils import ModelResponse
|
||||
class APIManager:
|
||||
def __init__(self):
|
||||
self.user_dict = {}
|
||||
|
||||
def create_budget(self, total_budget: float, user: str):
|
||||
self.user_dict[user] = {"total_budget": total_budget}
|
||||
return self.user_dict[user]
|
||||
|
||||
def projected_cost(self, model: str, messages: list, user: str):
|
||||
text = "".join(message["content"] for message in messages)
|
||||
prompt_tokens = litellm.token_counter(model=model, text=text)
|
||||
prompt_cost, _ = litellm.cost_per_token(model=model, prompt_tokens=prompt_tokens, completion_tokens=0)
|
||||
current_cost = self.user_dict[user].get("current_cost", 0)
|
||||
projected_cost = prompt_cost + current_cost
|
||||
return projected_cost
|
||||
|
||||
def get_total_budget(self, user: str):
|
||||
return self.user_dict[user]["total_budget"]
|
||||
|
||||
def update_cost(self, completion_obj: ModelResponse, user: str):
|
||||
cost = litellm.completion_cost(completion_response=completion_obj)
|
||||
self.user_dict[user]["current_cost"] = cost + self.user_dict[user].get("current_cost", 0)
|
||||
return self.user_dict[user]["current_cost"]
|
|
@ -168,7 +168,7 @@ def completion(
|
|||
model_alias_map=litellm.model_alias_map,
|
||||
completion_call_id=id
|
||||
)
|
||||
logging.update_environment_variables(model=model, optional_params=optional_params, litellm_params=litellm_params)
|
||||
logging.update_environment_variables(model=model, user=user, optional_params=optional_params, litellm_params=litellm_params)
|
||||
if custom_llm_provider == "azure":
|
||||
# azure configs
|
||||
openai.api_type = "azure"
|
||||
|
|
60
litellm/tests/test_api_manager.py
Normal file
60
litellm/tests/test_api_manager.py
Normal file
|
@ -0,0 +1,60 @@
|
|||
#### What this tests ####
|
||||
# This tests calling batch_completions by running 100 messages together
|
||||
|
||||
import sys, os
|
||||
import traceback
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import litellm
|
||||
from litellm import apiManager, completion
|
||||
|
||||
litellm.success_callback = ["api_manager"]
|
||||
|
||||
|
||||
## Scenario 1: User budget enough to make call
|
||||
def test_user_budget_enough():
|
||||
user = "1234"
|
||||
# create a budget for a user
|
||||
apiManager.create_budget(total_budget=10, user=user)
|
||||
|
||||
# check if a given call can be made
|
||||
data = {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
}
|
||||
model = data["model"]
|
||||
messages = data["messages"]
|
||||
if apiManager.projected_cost(**data, user=user) <= apiManager.get_total_budget(user):
|
||||
response = completion(**data)
|
||||
else:
|
||||
response = "Sorry - no budget!"
|
||||
|
||||
print(f"response: {response}")
|
||||
|
||||
## Scenario 2: User budget not enough to make call
|
||||
def test_user_budget_not_enough():
|
||||
user = "12345"
|
||||
# create a budget for a user
|
||||
apiManager.create_budget(total_budget=0, user=user)
|
||||
|
||||
# check if a given call can be made
|
||||
data = {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
}
|
||||
model = data["model"]
|
||||
messages = data["messages"]
|
||||
projectedCost = apiManager.projected_cost(**data, user=user)
|
||||
print(f"projectedCost: {projectedCost}")
|
||||
totalBudget = apiManager.get_total_budget(user)
|
||||
print(f"totalBudget: {totalBudget}")
|
||||
if projectedCost <= totalBudget:
|
||||
response = completion(**data)
|
||||
else:
|
||||
response = "Sorry - no budget!"
|
||||
|
||||
print(f"response: {response}")
|
||||
|
||||
test_user_budget_not_enough()
|
|
@ -117,9 +117,6 @@ def invalid_auth(model): # set the model key to an invalid key, depending on th
|
|||
os.environ["TOGETHERAI_API_KEY"] = temporary_key
|
||||
return
|
||||
|
||||
|
||||
invalid_auth(test_model)
|
||||
|
||||
# Test 3: Rate Limit Errors
|
||||
# def test_model_call(model):
|
||||
# try:
|
||||
|
|
|
@ -144,9 +144,10 @@ class Logging:
|
|||
self.litellm_call_id = litellm_call_id
|
||||
self.function_id = function_id
|
||||
|
||||
def update_environment_variables(self, model, optional_params, litellm_params):
|
||||
def update_environment_variables(self, model, user, optional_params, litellm_params):
|
||||
self.optional_params = optional_params
|
||||
self.model = model
|
||||
self.user = user
|
||||
self.litellm_params = litellm_params
|
||||
self.logger_fn = litellm_params["logger_fn"]
|
||||
print_verbose(f"self.optional_params: {self.optional_params}")
|
||||
|
@ -311,6 +312,9 @@ class Logging:
|
|||
call_type = self.call_type,
|
||||
stream = self.stream,
|
||||
)
|
||||
if callback == "api_manager":
|
||||
print_verbose("reaches api manager for updating model cost")
|
||||
litellm.apiManager.update_cost(completion_obj=result, user=self.user)
|
||||
if callback == "cache":
|
||||
# print("entering logger first time")
|
||||
# print(self.litellm_params["stream_response"])
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "0.1.576"
|
||||
version = "0.1.577"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT License"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue