forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_add_semantic_cache
This commit is contained in:
commit
7cb69c72c8
25 changed files with 1499 additions and 342 deletions
|
@ -80,7 +80,7 @@ jobs:
|
||||||
command: |
|
command: |
|
||||||
pwd
|
pwd
|
||||||
ls
|
ls
|
||||||
python -m pytest -vv litellm/tests/ -x --junitxml=test-results/junit.xml --durations=5
|
python -m pytest -vv -s litellm/tests/ -x --junitxml=test-results/junit.xml --durations=5
|
||||||
no_output_timeout: 120m
|
no_output_timeout: 120m
|
||||||
|
|
||||||
# Store test results
|
# Store test results
|
||||||
|
|
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -43,3 +43,4 @@ ui/litellm-dashboard/package-lock.json
|
||||||
deploy/charts/litellm-helm/*.tgz
|
deploy/charts/litellm-helm/*.tgz
|
||||||
deploy/charts/litellm-helm/charts/*
|
deploy/charts/litellm-helm/charts/*
|
||||||
deploy/charts/*.tgz
|
deploy/charts/*.tgz
|
||||||
|
litellm/proxy/vertex_key.json
|
||||||
|
|
|
@ -10,6 +10,12 @@ repos:
|
||||||
exclude: ^litellm/tests/|^litellm/proxy/proxy_cli.py|^litellm/integrations/|^litellm/proxy/tests/
|
exclude: ^litellm/tests/|^litellm/proxy/proxy_cli.py|^litellm/integrations/|^litellm/proxy/tests/
|
||||||
additional_dependencies: [flake8-print]
|
additional_dependencies: [flake8-print]
|
||||||
files: litellm/.*\.py
|
files: litellm/.*\.py
|
||||||
|
- repo: local
|
||||||
|
hooks:
|
||||||
|
- id: check-files-match
|
||||||
|
name: Check if files match
|
||||||
|
entry: python3 ci_cd/check_files_match.py
|
||||||
|
language: system
|
||||||
- repo: local
|
- repo: local
|
||||||
hooks:
|
hooks:
|
||||||
- id: mypy
|
- id: mypy
|
||||||
|
|
32
ci_cd/check_files_match.py
Normal file
32
ci_cd/check_files_match.py
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
import sys
|
||||||
|
import filecmp
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv=None):
|
||||||
|
print(
|
||||||
|
"Comparing model_prices_and_context_window and litellm/model_prices_and_context_window_backup.json files... checking if they match."
|
||||||
|
)
|
||||||
|
|
||||||
|
file1 = "model_prices_and_context_window.json"
|
||||||
|
file2 = "litellm/model_prices_and_context_window_backup.json"
|
||||||
|
|
||||||
|
cmp_result = filecmp.cmp(file1, file2, shallow=False)
|
||||||
|
|
||||||
|
if cmp_result:
|
||||||
|
print(f"Passed! Files {file1} and {file2} match.")
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"Failed! Files {file1} and {file2} do not match. Copying content from {file1} to {file2}."
|
||||||
|
)
|
||||||
|
copy_content(file1, file2)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
def copy_content(source, destination):
|
||||||
|
shutil.copy2(source, destination)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
|
@ -122,6 +122,7 @@ response = completion(
|
||||||
"generation_id": "gen-id22", # set langfuse Generation ID
|
"generation_id": "gen-id22", # set langfuse Generation ID
|
||||||
"trace_id": "trace-id22", # set langfuse Trace ID
|
"trace_id": "trace-id22", # set langfuse Trace ID
|
||||||
"trace_user_id": "user-id2", # set langfuse Trace User ID
|
"trace_user_id": "user-id2", # set langfuse Trace User ID
|
||||||
|
"session_id": "session-1", # set langfuse Session ID
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -352,6 +352,22 @@ Request Params:
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Upperbound /key/generate params
|
||||||
|
Use this, if you need to control the upperbound that users can use for `max_budget`, `budget_duration` or any `key/generate` param per key.
|
||||||
|
|
||||||
|
Set `litellm_settings:upperbound_key_generate_params`:
|
||||||
|
```yaml
|
||||||
|
litellm_settings:
|
||||||
|
upperbound_key_generate_params:
|
||||||
|
max_budget: 100 # upperbound of $100, for all /key/generate requests
|
||||||
|
duration: "30d" # upperbound of 30 days for all /key/generate requests
|
||||||
|
```
|
||||||
|
|
||||||
|
** Expected Behavior **
|
||||||
|
|
||||||
|
- Send a `/key/generate` request with `max_budget=200`
|
||||||
|
- Key will be created with `max_budget=100` since 100 is the upper bound
|
||||||
|
|
||||||
## Default /key/generate params
|
## Default /key/generate params
|
||||||
Use this, if you need to control the default `max_budget` or any `key/generate` param per key.
|
Use this, if you need to control the default `max_budget` or any `key/generate` param per key.
|
||||||
|
|
||||||
|
|
|
@ -146,6 +146,7 @@ suppress_debug_info = False
|
||||||
dynamodb_table_name: Optional[str] = None
|
dynamodb_table_name: Optional[str] = None
|
||||||
s3_callback_params: Optional[Dict] = None
|
s3_callback_params: Optional[Dict] = None
|
||||||
default_key_generate_params: Optional[Dict] = None
|
default_key_generate_params: Optional[Dict] = None
|
||||||
|
upperbound_key_generate_params: Optional[Dict] = None
|
||||||
default_team_settings: Optional[List] = None
|
default_team_settings: Optional[List] = None
|
||||||
#### RELIABILITY ####
|
#### RELIABILITY ####
|
||||||
request_timeout: Optional[float] = 6000
|
request_timeout: Optional[float] = 6000
|
||||||
|
|
|
@ -55,8 +55,21 @@ class LangFuseLogger:
|
||||||
else:
|
else:
|
||||||
self.upstream_langfuse = None
|
self.upstream_langfuse = None
|
||||||
|
|
||||||
|
# def log_error(kwargs, response_obj, start_time, end_time):
|
||||||
|
# generation = trace.generation(
|
||||||
|
# level ="ERROR" # can be any of DEBUG, DEFAULT, WARNING or ERROR
|
||||||
|
# status_message='error' # can be any string (e.g. stringified stack trace or error body)
|
||||||
|
# )
|
||||||
def log_event(
|
def log_event(
|
||||||
self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
|
self,
|
||||||
|
kwargs,
|
||||||
|
response_obj,
|
||||||
|
start_time,
|
||||||
|
end_time,
|
||||||
|
user_id,
|
||||||
|
print_verbose,
|
||||||
|
level="DEFAULT",
|
||||||
|
status_message=None,
|
||||||
):
|
):
|
||||||
# Method definition
|
# Method definition
|
||||||
|
|
||||||
|
@ -84,15 +97,24 @@ class LangFuseLogger:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# end of processing langfuse ########################
|
# end of processing langfuse ########################
|
||||||
if kwargs.get("call_type", None) == "embedding" or isinstance(
|
if (
|
||||||
response_obj, litellm.EmbeddingResponse
|
level == "ERROR"
|
||||||
|
and status_message is not None
|
||||||
|
and isinstance(status_message, str)
|
||||||
|
):
|
||||||
|
input = prompt
|
||||||
|
output = status_message
|
||||||
|
elif response_obj is not None and (
|
||||||
|
kwargs.get("call_type", None) == "embedding"
|
||||||
|
or isinstance(response_obj, litellm.EmbeddingResponse)
|
||||||
):
|
):
|
||||||
input = prompt
|
input = prompt
|
||||||
output = response_obj["data"]
|
output = response_obj["data"]
|
||||||
else:
|
elif response_obj is not None:
|
||||||
input = prompt
|
input = prompt
|
||||||
output = response_obj["choices"][0]["message"].json()
|
output = response_obj["choices"][0]["message"].json()
|
||||||
print_verbose(f"OUTPUT IN LANGFUSE: {output}; original: {response_obj}")
|
print(f"OUTPUT IN LANGFUSE: {output}; original: {response_obj}")
|
||||||
|
if self._is_langfuse_v2():
|
||||||
self._log_langfuse_v2(
|
self._log_langfuse_v2(
|
||||||
user_id,
|
user_id,
|
||||||
metadata,
|
metadata,
|
||||||
|
@ -103,8 +125,11 @@ class LangFuseLogger:
|
||||||
optional_params,
|
optional_params,
|
||||||
input,
|
input,
|
||||||
response_obj,
|
response_obj,
|
||||||
|
level,
|
||||||
print_verbose,
|
print_verbose,
|
||||||
) if self._is_langfuse_v2() else self._log_langfuse_v1(
|
)
|
||||||
|
elif response_obj is not None:
|
||||||
|
self._log_langfuse_v1(
|
||||||
user_id,
|
user_id,
|
||||||
metadata,
|
metadata,
|
||||||
output,
|
output,
|
||||||
|
@ -123,15 +148,15 @@ class LangFuseLogger:
|
||||||
verbose_logger.info(f"Langfuse Layer Logging - logging success")
|
verbose_logger.info(f"Langfuse Layer Logging - logging success")
|
||||||
except:
|
except:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
print_verbose(f"Langfuse Layer Error - {traceback.format_exc()}")
|
print(f"Langfuse Layer Error - {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def _async_log_event(
|
async def _async_log_event(
|
||||||
self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
|
self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
|
||||||
):
|
):
|
||||||
self.log_event(
|
"""
|
||||||
kwargs, response_obj, start_time, end_time, user_id, print_verbose
|
TODO: support async calls when langfuse is truly async
|
||||||
)
|
"""
|
||||||
|
|
||||||
def _is_langfuse_v2(self):
|
def _is_langfuse_v2(self):
|
||||||
import langfuse
|
import langfuse
|
||||||
|
@ -193,10 +218,12 @@ class LangFuseLogger:
|
||||||
optional_params,
|
optional_params,
|
||||||
input,
|
input,
|
||||||
response_obj,
|
response_obj,
|
||||||
|
level,
|
||||||
print_verbose,
|
print_verbose,
|
||||||
):
|
):
|
||||||
import langfuse
|
import langfuse
|
||||||
|
|
||||||
|
try:
|
||||||
tags = []
|
tags = []
|
||||||
supports_tags = Version(langfuse.version.__version__) >= Version("2.6.3")
|
supports_tags = Version(langfuse.version.__version__) >= Version("2.6.3")
|
||||||
supports_costs = Version(langfuse.version.__version__) >= Version("2.7.3")
|
supports_costs = Version(langfuse.version.__version__) >= Version("2.7.3")
|
||||||
|
@ -211,11 +238,17 @@ class LangFuseLogger:
|
||||||
trace_params = {
|
trace_params = {
|
||||||
"name": generation_name,
|
"name": generation_name,
|
||||||
"input": input,
|
"input": input,
|
||||||
"output": output,
|
|
||||||
"user_id": metadata.get("trace_user_id", user_id),
|
"user_id": metadata.get("trace_user_id", user_id),
|
||||||
"id": metadata.get("trace_id", None),
|
"id": metadata.get("trace_id", None),
|
||||||
|
"session_id": metadata.get("session_id", None),
|
||||||
}
|
}
|
||||||
cost = kwargs["response_cost"]
|
|
||||||
|
if level == "ERROR":
|
||||||
|
trace_params["status_message"] = output
|
||||||
|
else:
|
||||||
|
trace_params["output"] = output
|
||||||
|
|
||||||
|
cost = kwargs.get("response_cost", None)
|
||||||
print_verbose(f"trace: {cost}")
|
print_verbose(f"trace: {cost}")
|
||||||
if supports_tags:
|
if supports_tags:
|
||||||
for key, value in metadata.items():
|
for key, value in metadata.items():
|
||||||
|
@ -226,10 +259,22 @@ class LangFuseLogger:
|
||||||
|
|
||||||
trace = self.Langfuse.trace(**trace_params)
|
trace = self.Langfuse.trace(**trace_params)
|
||||||
|
|
||||||
|
if level == "ERROR":
|
||||||
|
trace.generation(
|
||||||
|
level="ERROR", # can be any of DEBUG, DEFAULT, WARNING or ERROR
|
||||||
|
status_message=output, # can be any string (e.g. stringified stack trace or error body)
|
||||||
|
)
|
||||||
|
print(f"SUCCESSFULLY LOGGED ERROR")
|
||||||
|
else:
|
||||||
# get generation_id
|
# get generation_id
|
||||||
generation_id = None
|
generation_id = None
|
||||||
if response_obj.get("id", None) is not None:
|
if (
|
||||||
generation_id = litellm.utils.get_logging_id(start_time, response_obj)
|
response_obj is not None
|
||||||
|
and response_obj.get("id", None) is not None
|
||||||
|
):
|
||||||
|
generation_id = litellm.utils.get_logging_id(
|
||||||
|
start_time, response_obj
|
||||||
|
)
|
||||||
trace.generation(
|
trace.generation(
|
||||||
name=generation_name,
|
name=generation_name,
|
||||||
id=metadata.get("generation_id", generation_id),
|
id=metadata.get("generation_id", generation_id),
|
||||||
|
@ -246,3 +291,5 @@ class LangFuseLogger:
|
||||||
},
|
},
|
||||||
metadata=metadata,
|
metadata=metadata,
|
||||||
)
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Langfuse Layer Error - {traceback.format_exc()}")
|
||||||
|
|
|
@ -146,7 +146,15 @@ def get_ollama_response(
|
||||||
optional_params[k] = v
|
optional_params[k] = v
|
||||||
|
|
||||||
stream = optional_params.pop("stream", False)
|
stream = optional_params.pop("stream", False)
|
||||||
data = {"model": model, "prompt": prompt, "options": optional_params}
|
format = optional_params.pop("format", None)
|
||||||
|
data = {
|
||||||
|
"model": model,
|
||||||
|
"prompt": prompt,
|
||||||
|
"options": optional_params,
|
||||||
|
"stream": stream,
|
||||||
|
}
|
||||||
|
if format is not None:
|
||||||
|
data["format"] = format
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging_obj.pre_call(
|
logging_obj.pre_call(
|
||||||
|
|
|
@ -146,7 +146,15 @@ def get_ollama_response(
|
||||||
optional_params[k] = v
|
optional_params[k] = v
|
||||||
|
|
||||||
stream = optional_params.pop("stream", False)
|
stream = optional_params.pop("stream", False)
|
||||||
data = {"model": model, "messages": messages, "options": optional_params}
|
format = optional_params.pop("format", None)
|
||||||
|
data = {
|
||||||
|
"model": model,
|
||||||
|
"messages": messages,
|
||||||
|
"options": optional_params,
|
||||||
|
"stream": stream,
|
||||||
|
}
|
||||||
|
if format is not None:
|
||||||
|
data["format"] = format
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging_obj.pre_call(
|
logging_obj.pre_call(
|
||||||
input=None,
|
input=None,
|
||||||
|
@ -320,11 +328,15 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
|
||||||
model_response["choices"][0]["message"] = message
|
model_response["choices"][0]["message"] = message
|
||||||
else:
|
else:
|
||||||
model_response["choices"][0]["message"] = response_json["message"]
|
model_response["choices"][0]["message"] = response_json["message"]
|
||||||
|
|
||||||
model_response["created"] = int(time.time())
|
model_response["created"] = int(time.time())
|
||||||
model_response["model"] = "ollama/" + data["model"]
|
model_response["model"] = "ollama_chat/" + data["model"]
|
||||||
prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=data["messages"])) # type: ignore
|
prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=data["messages"])) # type: ignore
|
||||||
completion_tokens = response_json.get(
|
completion_tokens = response_json.get(
|
||||||
"eval_count", litellm.token_counter(text=response_json["message"])
|
"eval_count",
|
||||||
|
litellm.token_counter(
|
||||||
|
text=response_json["message"]["content"], count_response_tokens=True
|
||||||
|
),
|
||||||
)
|
)
|
||||||
model_response["usage"] = litellm.Usage(
|
model_response["usage"] = litellm.Usage(
|
||||||
prompt_tokens=prompt_tokens,
|
prompt_tokens=prompt_tokens,
|
||||||
|
|
|
@ -263,6 +263,7 @@ async def acompletion(
|
||||||
or custom_llm_provider == "ollama"
|
or custom_llm_provider == "ollama"
|
||||||
or custom_llm_provider == "ollama_chat"
|
or custom_llm_provider == "ollama_chat"
|
||||||
or custom_llm_provider == "vertex_ai"
|
or custom_llm_provider == "vertex_ai"
|
||||||
|
or custom_llm_provider in litellm.openai_compatible_providers
|
||||||
): # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all.
|
): # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all.
|
||||||
init_response = await loop.run_in_executor(None, func_with_context)
|
init_response = await loop.run_in_executor(None, func_with_context)
|
||||||
if isinstance(init_response, dict) or isinstance(
|
if isinstance(init_response, dict) or isinstance(
|
||||||
|
@ -3319,6 +3320,10 @@ async def ahealth_check(
|
||||||
response = {} # args like remaining ratelimit etc.
|
response = {} # args like remaining ratelimit etc.
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
if model not in litellm.model_cost and mode is None:
|
||||||
|
raise Exception(
|
||||||
|
"Missing `mode`. Set the `mode` for the model - https://docs.litellm.ai/docs/proxy/health#embedding-models"
|
||||||
|
)
|
||||||
return {"error": str(e)}
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -78,7 +78,9 @@ litellm_settings:
|
||||||
type: "redis-semantic"
|
type: "redis-semantic"
|
||||||
similarity_threshold: 0.8
|
similarity_threshold: 0.8
|
||||||
redis_semantic_cache_embedding_model: azure-embedding-model
|
redis_semantic_cache_embedding_model: azure-embedding-model
|
||||||
# cache: True
|
upperbound_key_generate_params:
|
||||||
|
max_budget: 100
|
||||||
|
duration: "30d"
|
||||||
# setting callback class
|
# setting callback class
|
||||||
# callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
|
# callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
|
||||||
|
|
||||||
|
|
|
@ -636,6 +636,36 @@ async def user_api_key_auth(
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"Only master key can be used to generate, delete, update or get info for new keys/users. Value of allow_user_auth={allow_user_auth}"
|
f"Only master key can be used to generate, delete, update or get info for new keys/users. Value of allow_user_auth={allow_user_auth}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# check if token is from litellm-ui, litellm ui makes keys to allow users to login with sso. These keys can only be used for LiteLLM UI functions
|
||||||
|
# sso/login, ui/login, /key functions and /user functions
|
||||||
|
# this will never be allowed to call /chat/completions
|
||||||
|
token_team = getattr(valid_token, "team_id", None)
|
||||||
|
if token_team is not None:
|
||||||
|
if token_team == "litellm-dashboard":
|
||||||
|
# this token is only used for managing the ui
|
||||||
|
allowed_routes = [
|
||||||
|
"/sso",
|
||||||
|
"/login",
|
||||||
|
"/key",
|
||||||
|
"/spend",
|
||||||
|
"/user",
|
||||||
|
]
|
||||||
|
# check if the current route startswith any of the allowed routes
|
||||||
|
if (
|
||||||
|
route is not None
|
||||||
|
and isinstance(route, str)
|
||||||
|
and any(
|
||||||
|
route.startswith(allowed_route)
|
||||||
|
for allowed_route in allowed_routes
|
||||||
|
)
|
||||||
|
):
|
||||||
|
# Do something if the current route starts with any of the allowed routes
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise Exception(
|
||||||
|
f"This key is made for LiteLLM UI, Tried to access route: {route}. Not allowed"
|
||||||
|
)
|
||||||
return UserAPIKeyAuth(api_key=api_key, **valid_token_dict)
|
return UserAPIKeyAuth(api_key=api_key, **valid_token_dict)
|
||||||
else:
|
else:
|
||||||
raise Exception(f"Invalid Key Passed to LiteLLM Proxy")
|
raise Exception(f"Invalid Key Passed to LiteLLM Proxy")
|
||||||
|
@ -758,9 +788,10 @@ async def _PROXY_track_cost_callback(
|
||||||
verbose_proxy_logger.info(
|
verbose_proxy_logger.info(
|
||||||
f"response_cost {response_cost}, for user_id {user_id}"
|
f"response_cost {response_cost}, for user_id {user_id}"
|
||||||
)
|
)
|
||||||
if user_api_key and (
|
verbose_proxy_logger.debug(
|
||||||
prisma_client is not None or custom_db_client is not None
|
f"user_api_key {user_api_key}, prisma_client: {prisma_client}, custom_db_client: {custom_db_client}"
|
||||||
):
|
)
|
||||||
|
if user_api_key is not None:
|
||||||
await update_database(
|
await update_database(
|
||||||
token=user_api_key,
|
token=user_api_key,
|
||||||
response_cost=response_cost,
|
response_cost=response_cost,
|
||||||
|
@ -770,6 +801,8 @@ async def _PROXY_track_cost_callback(
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
raise Exception("User API key missing from custom callback.")
|
||||||
else:
|
else:
|
||||||
if kwargs["stream"] != True or (
|
if kwargs["stream"] != True or (
|
||||||
kwargs["stream"] == True
|
kwargs["stream"] == True
|
||||||
|
@ -1361,6 +1394,26 @@ class ProxyConfig:
|
||||||
proxy_config = ProxyConfig()
|
proxy_config = ProxyConfig()
|
||||||
|
|
||||||
|
|
||||||
|
def _duration_in_seconds(duration: str):
|
||||||
|
match = re.match(r"(\d+)([smhd]?)", duration)
|
||||||
|
if not match:
|
||||||
|
raise ValueError("Invalid duration format")
|
||||||
|
|
||||||
|
value, unit = match.groups()
|
||||||
|
value = int(value)
|
||||||
|
|
||||||
|
if unit == "s":
|
||||||
|
return value
|
||||||
|
elif unit == "m":
|
||||||
|
return value * 60
|
||||||
|
elif unit == "h":
|
||||||
|
return value * 3600
|
||||||
|
elif unit == "d":
|
||||||
|
return value * 86400
|
||||||
|
else:
|
||||||
|
raise ValueError("Unsupported duration unit")
|
||||||
|
|
||||||
|
|
||||||
async def generate_key_helper_fn(
|
async def generate_key_helper_fn(
|
||||||
duration: Optional[str],
|
duration: Optional[str],
|
||||||
models: list,
|
models: list,
|
||||||
|
@ -1395,25 +1448,6 @@ async def generate_key_helper_fn(
|
||||||
if token is None:
|
if token is None:
|
||||||
token = f"sk-{secrets.token_urlsafe(16)}"
|
token = f"sk-{secrets.token_urlsafe(16)}"
|
||||||
|
|
||||||
def _duration_in_seconds(duration: str):
|
|
||||||
match = re.match(r"(\d+)([smhd]?)", duration)
|
|
||||||
if not match:
|
|
||||||
raise ValueError("Invalid duration format")
|
|
||||||
|
|
||||||
value, unit = match.groups()
|
|
||||||
value = int(value)
|
|
||||||
|
|
||||||
if unit == "s":
|
|
||||||
return value
|
|
||||||
elif unit == "m":
|
|
||||||
return value * 60
|
|
||||||
elif unit == "h":
|
|
||||||
return value * 3600
|
|
||||||
elif unit == "d":
|
|
||||||
return value * 86400
|
|
||||||
else:
|
|
||||||
raise ValueError("Unsupported duration unit")
|
|
||||||
|
|
||||||
if duration is None: # allow tokens that never expire
|
if duration is None: # allow tokens that never expire
|
||||||
expires = None
|
expires = None
|
||||||
else:
|
else:
|
||||||
|
@ -2630,6 +2664,36 @@ async def generate_key_fn(
|
||||||
elif key == "metadata" and value == {}:
|
elif key == "metadata" and value == {}:
|
||||||
setattr(data, key, litellm.default_key_generate_params.get(key, {}))
|
setattr(data, key, litellm.default_key_generate_params.get(key, {}))
|
||||||
|
|
||||||
|
# check if user set default key/generate params on config.yaml
|
||||||
|
if litellm.upperbound_key_generate_params is not None:
|
||||||
|
for elem in data:
|
||||||
|
# if key in litellm.upperbound_key_generate_params, use the min of value and litellm.upperbound_key_generate_params[key]
|
||||||
|
key, value = elem
|
||||||
|
if value is not None and key in litellm.upperbound_key_generate_params:
|
||||||
|
# if value is float/int
|
||||||
|
if key in [
|
||||||
|
"max_budget",
|
||||||
|
"max_parallel_requests",
|
||||||
|
"tpm_limit",
|
||||||
|
"rpm_limit",
|
||||||
|
]:
|
||||||
|
if value > litellm.upperbound_key_generate_params[key]:
|
||||||
|
# directly compare floats/ints
|
||||||
|
setattr(
|
||||||
|
data, key, litellm.upperbound_key_generate_params[key]
|
||||||
|
)
|
||||||
|
elif key == "budget_duration":
|
||||||
|
# budgets are in 1s, 1m, 1h, 1d, 1m (30s, 30m, 30h, 30d, 30m)
|
||||||
|
# compare the duration in seconds and max duration in seconds
|
||||||
|
upperbound_budget_duration = _duration_in_seconds(
|
||||||
|
duration=litellm.upperbound_key_generate_params[key]
|
||||||
|
)
|
||||||
|
user_set_budget_duration = _duration_in_seconds(duration=value)
|
||||||
|
if user_set_budget_duration > upperbound_budget_duration:
|
||||||
|
setattr(
|
||||||
|
data, key, litellm.upperbound_key_generate_params[key]
|
||||||
|
)
|
||||||
|
|
||||||
data_json = data.json() # type: ignore
|
data_json = data.json() # type: ignore
|
||||||
|
|
||||||
# if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users
|
# if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users
|
||||||
|
|
|
@ -9,21 +9,11 @@ model_list:
|
||||||
api_key: os.environ/AZURE_CANADA_API_KEY
|
api_key: os.environ/AZURE_CANADA_API_KEY
|
||||||
model: azure/gpt-35-turbo
|
model: azure/gpt-35-turbo
|
||||||
model_name: azure-model
|
model_name: azure-model
|
||||||
- litellm_params:
|
|
||||||
api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1
|
|
||||||
api_key: os.environ/AZURE_API_KEY
|
|
||||||
model: azure/chatgpt-v-2
|
|
||||||
model_name: azure-cloudflare-model
|
|
||||||
- litellm_params:
|
- litellm_params:
|
||||||
api_base: https://openai-france-1234.openai.azure.com
|
api_base: https://openai-france-1234.openai.azure.com
|
||||||
api_key: os.environ/AZURE_FRANCE_API_KEY
|
api_key: os.environ/AZURE_FRANCE_API_KEY
|
||||||
model: azure/gpt-turbo
|
model: azure/gpt-turbo
|
||||||
model_name: azure-model
|
model_name: azure-model
|
||||||
- litellm_params:
|
|
||||||
model: gpt-3.5-turbo
|
|
||||||
model_info:
|
|
||||||
description: this is a test openai model
|
|
||||||
model_name: test_openai_models
|
|
||||||
- litellm_params:
|
- litellm_params:
|
||||||
model: gpt-3.5-turbo
|
model: gpt-3.5-turbo
|
||||||
model_info:
|
model_info:
|
||||||
|
@ -36,93 +26,8 @@ model_list:
|
||||||
description: this is a test openai model
|
description: this is a test openai model
|
||||||
id: 4d1ee26c-abca-450c-8744-8e87fd6755e9
|
id: 4d1ee26c-abca-450c-8744-8e87fd6755e9
|
||||||
model_name: test_openai_models
|
model_name: test_openai_models
|
||||||
- litellm_params:
|
|
||||||
model: gpt-3.5-turbo
|
|
||||||
model_info:
|
|
||||||
description: this is a test openai model
|
|
||||||
id: 00e19c0f-b63d-42bb-88e9-016fb0c60764
|
|
||||||
model_name: test_openai_models
|
|
||||||
- litellm_params:
|
|
||||||
model: gpt-3.5-turbo
|
|
||||||
model_info:
|
|
||||||
description: this is a test openai model
|
|
||||||
id: 79fc75bf-8e1b-47d5-8d24-9365a854af03
|
|
||||||
model_name: test_openai_models
|
|
||||||
- litellm_params:
|
|
||||||
api_base: os.environ/AZURE_API_BASE
|
|
||||||
api_key: os.environ/AZURE_API_KEY
|
|
||||||
api_version: 2023-07-01-preview
|
|
||||||
model: azure/azure-embedding-model
|
|
||||||
model_info:
|
|
||||||
mode: embedding
|
|
||||||
model_name: azure-embedding-model
|
|
||||||
- litellm_params:
|
|
||||||
model: gpt-3.5-turbo
|
|
||||||
model_info:
|
|
||||||
description: this is a test openai model
|
|
||||||
id: 55848c55-4162-40f9-a6e2-9a722b9ef404
|
|
||||||
model_name: test_openai_models
|
|
||||||
- litellm_params:
|
|
||||||
model: gpt-3.5-turbo
|
|
||||||
model_info:
|
|
||||||
description: this is a test openai model
|
|
||||||
id: 34339b1e-e030-4bcc-a531-c48559f10ce4
|
|
||||||
model_name: test_openai_models
|
|
||||||
- litellm_params:
|
|
||||||
model: gpt-3.5-turbo
|
|
||||||
model_info:
|
|
||||||
description: this is a test openai model
|
|
||||||
id: f6f74e14-ac64-4403-9365-319e584dcdc5
|
|
||||||
model_name: test_openai_models
|
|
||||||
- litellm_params:
|
|
||||||
model: gpt-3.5-turbo
|
|
||||||
model_info:
|
|
||||||
description: this is a test openai model
|
|
||||||
id: 9b1ef341-322c-410a-8992-903987fef439
|
|
||||||
model_name: test_openai_models
|
|
||||||
- litellm_params:
|
- litellm_params:
|
||||||
model: bedrock/amazon.titan-embed-text-v1
|
model: bedrock/amazon.titan-embed-text-v1
|
||||||
model_info:
|
model_info:
|
||||||
mode: embedding
|
mode: embedding
|
||||||
model_name: amazon-embeddings
|
model_name: amazon-embeddings
|
||||||
- litellm_params:
|
|
||||||
model: sagemaker/berri-benchmarking-gpt-j-6b-fp16
|
|
||||||
model_info:
|
|
||||||
mode: embedding
|
|
||||||
model_name: GPT-J 6B - Sagemaker Text Embedding (Internal)
|
|
||||||
- litellm_params:
|
|
||||||
model: dall-e-3
|
|
||||||
model_info:
|
|
||||||
mode: image_generation
|
|
||||||
model_name: dall-e-3
|
|
||||||
- litellm_params:
|
|
||||||
api_base: os.environ/AZURE_SWEDEN_API_BASE
|
|
||||||
api_key: os.environ/AZURE_SWEDEN_API_KEY
|
|
||||||
api_version: 2023-12-01-preview
|
|
||||||
model: azure/dall-e-3-test
|
|
||||||
model_info:
|
|
||||||
mode: image_generation
|
|
||||||
model_name: dall-e-3
|
|
||||||
- litellm_params:
|
|
||||||
api_base: os.environ/AZURE_API_BASE
|
|
||||||
api_key: os.environ/AZURE_API_KEY
|
|
||||||
api_version: 2023-06-01-preview
|
|
||||||
model: azure/
|
|
||||||
model_info:
|
|
||||||
mode: image_generation
|
|
||||||
model_name: dall-e-2
|
|
||||||
- litellm_params:
|
|
||||||
api_base: os.environ/AZURE_API_BASE
|
|
||||||
api_key: os.environ/AZURE_API_KEY
|
|
||||||
api_version: 2023-07-01-preview
|
|
||||||
model: azure/azure-embedding-model
|
|
||||||
model_info:
|
|
||||||
base_model: text-embedding-ada-002
|
|
||||||
mode: embedding
|
|
||||||
model_name: text-embedding-ada-002
|
|
||||||
- litellm_params:
|
|
||||||
model: gpt-3.5-turbo
|
|
||||||
model_info:
|
|
||||||
description: this is a test openai model
|
|
||||||
id: 34cb2419-7c63-44ae-a189-53f1d1ce5953
|
|
||||||
model_name: test_openai_models
|
|
||||||
|
|
|
@ -490,8 +490,13 @@ def test_dynamo_db_migration(custom_db_client):
|
||||||
try:
|
try:
|
||||||
|
|
||||||
async def test():
|
async def test():
|
||||||
|
request = GenerateKeyRequest(max_budget=1)
|
||||||
|
key = await generate_key_fn(request)
|
||||||
|
print(key)
|
||||||
|
|
||||||
|
generated_key = key.key
|
||||||
bearer_token = (
|
bearer_token = (
|
||||||
"Bearer " + "sk-elJDL2pOEjcAuC7zD4psAg"
|
"Bearer " + generated_key
|
||||||
) # this works with ishaan's db, it's a never expiring key
|
) # this works with ishaan's db, it's a never expiring key
|
||||||
|
|
||||||
request = Request(scope={"type": "http"})
|
request = Request(scope={"type": "http"})
|
||||||
|
|
|
@ -44,6 +44,7 @@ from litellm.proxy.proxy_server import (
|
||||||
info_key_fn,
|
info_key_fn,
|
||||||
update_key_fn,
|
update_key_fn,
|
||||||
generate_key_fn,
|
generate_key_fn,
|
||||||
|
generate_key_helper_fn,
|
||||||
spend_user_fn,
|
spend_user_fn,
|
||||||
spend_key_fn,
|
spend_key_fn,
|
||||||
view_spend_logs,
|
view_spend_logs,
|
||||||
|
@ -1278,6 +1279,40 @@ async def test_default_key_params(prisma_client):
|
||||||
pytest.fail(f"Got exception {e}")
|
pytest.fail(f"Got exception {e}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio()
|
||||||
|
async def test_upperbound_key_params(prisma_client):
|
||||||
|
"""
|
||||||
|
- create key
|
||||||
|
- get key info
|
||||||
|
- assert key_name is not null
|
||||||
|
"""
|
||||||
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
||||||
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
||||||
|
litellm.upperbound_key_generate_params = {
|
||||||
|
"max_budget": 0.001,
|
||||||
|
"budget_duration": "1m",
|
||||||
|
}
|
||||||
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||||
|
try:
|
||||||
|
request = GenerateKeyRequest(
|
||||||
|
max_budget=200000,
|
||||||
|
budget_duration="30d",
|
||||||
|
)
|
||||||
|
key = await generate_key_fn(request)
|
||||||
|
generated_key = key.key
|
||||||
|
|
||||||
|
result = await info_key_fn(key=generated_key)
|
||||||
|
key_info = result["info"]
|
||||||
|
# assert it used the upper bound for max_budget, and budget_duration
|
||||||
|
assert key_info["max_budget"] == 0.001
|
||||||
|
assert key_info["budget_duration"] == "1m"
|
||||||
|
|
||||||
|
print(result)
|
||||||
|
except Exception as e:
|
||||||
|
print("Got Exception", e)
|
||||||
|
pytest.fail(f"Got exception {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_get_bearer_token():
|
def test_get_bearer_token():
|
||||||
from litellm.proxy.proxy_server import _get_bearer_token
|
from litellm.proxy.proxy_server import _get_bearer_token
|
||||||
|
|
||||||
|
@ -1378,3 +1413,35 @@ async def test_user_api_key_auth_without_master_key(prisma_client):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Got Exception", e)
|
print("Got Exception", e)
|
||||||
pytest.fail(f"Got exception {e}")
|
pytest.fail(f"Got exception {e}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_key_with_no_permissions(prisma_client):
|
||||||
|
"""
|
||||||
|
- create key
|
||||||
|
- get key info
|
||||||
|
- assert key_name is null
|
||||||
|
"""
|
||||||
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
||||||
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
||||||
|
setattr(litellm.proxy.proxy_server, "general_settings", {"allow_user_auth": False})
|
||||||
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||||
|
try:
|
||||||
|
response = await generate_key_helper_fn(
|
||||||
|
**{"duration": "1hr", "key_max_budget": 0, "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": "ishaan", "team_id": "litellm-dashboard"} # type: ignore
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
key = response["token"]
|
||||||
|
|
||||||
|
# make a /chat/completions call -> it should fail
|
||||||
|
request = Request(scope={"type": "http"})
|
||||||
|
request._url = URL(url="/chat/completions")
|
||||||
|
|
||||||
|
# use generated key to auth in
|
||||||
|
result = await user_api_key_auth(request=request, api_key="Bearer " + key)
|
||||||
|
print("result from user auth with new key", result)
|
||||||
|
pytest.fail(f"This should have failed!. IT's an invalid key")
|
||||||
|
except Exception as e:
|
||||||
|
print("Got Exception", e)
|
||||||
|
print(e.message)
|
||||||
|
|
|
@ -379,6 +379,7 @@ async def test_normal_router_tpm_limit():
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
print("Exception on test_normal_router_tpm_limit", e)
|
||||||
assert e.status_code == 429
|
assert e.status_code == 429
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -33,6 +33,11 @@ def test_proxy_gunicorn_startup_direct_config():
|
||||||
Test both approaches
|
Test both approaches
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
from litellm._logging import verbose_proxy_logger, verbose_router_logger
|
||||||
|
import logging
|
||||||
|
|
||||||
|
verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
||||||
|
verbose_router_logger.setLevel(level=logging.DEBUG)
|
||||||
filepath = os.path.dirname(os.path.abspath(__file__))
|
filepath = os.path.dirname(os.path.abspath(__file__))
|
||||||
# test with worker_config = config yaml
|
# test with worker_config = config yaml
|
||||||
config_fp = f"{filepath}/test_configs/test_config_no_auth.yaml"
|
config_fp = f"{filepath}/test_configs/test_config_no_auth.yaml"
|
||||||
|
@ -48,6 +53,11 @@ def test_proxy_gunicorn_startup_direct_config():
|
||||||
|
|
||||||
def test_proxy_gunicorn_startup_config_dict():
|
def test_proxy_gunicorn_startup_config_dict():
|
||||||
try:
|
try:
|
||||||
|
from litellm._logging import verbose_proxy_logger, verbose_router_logger
|
||||||
|
import logging
|
||||||
|
|
||||||
|
verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
||||||
|
verbose_router_logger.setLevel(level=logging.DEBUG)
|
||||||
filepath = os.path.dirname(os.path.abspath(__file__))
|
filepath = os.path.dirname(os.path.abspath(__file__))
|
||||||
# test with worker_config = config yaml
|
# test with worker_config = config yaml
|
||||||
config_fp = f"{filepath}/test_configs/test_config_no_auth.yaml"
|
config_fp = f"{filepath}/test_configs/test_config_no_auth.yaml"
|
||||||
|
|
|
@ -980,12 +980,9 @@ class Logging:
|
||||||
self.model_call_details["log_event_type"] = "post_api_call"
|
self.model_call_details["log_event_type"] = "post_api_call"
|
||||||
|
|
||||||
# User Logging -> if you pass in a custom logging function
|
# User Logging -> if you pass in a custom logging function
|
||||||
verbose_logger.debug(
|
print_verbose(
|
||||||
f"RAW RESPONSE:\n{self.model_call_details.get('original_response', self.model_call_details)}\n\n"
|
f"RAW RESPONSE:\n{self.model_call_details.get('original_response', self.model_call_details)}\n\n"
|
||||||
)
|
)
|
||||||
verbose_logger.debug(
|
|
||||||
f"Logging Details Post-API Call: LiteLLM Params: {self.model_call_details}"
|
|
||||||
)
|
|
||||||
if self.logger_fn and callable(self.logger_fn):
|
if self.logger_fn and callable(self.logger_fn):
|
||||||
try:
|
try:
|
||||||
self.logger_fn(
|
self.logger_fn(
|
||||||
|
@ -1636,34 +1633,6 @@ class Logging:
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
print_verbose=print_verbose,
|
print_verbose=print_verbose,
|
||||||
)
|
)
|
||||||
if callback == "langfuse":
|
|
||||||
global langFuseLogger
|
|
||||||
print_verbose("reaches Async langfuse for logging!")
|
|
||||||
kwargs = {}
|
|
||||||
for k, v in self.model_call_details.items():
|
|
||||||
if (
|
|
||||||
k != "original_response"
|
|
||||||
): # copy.deepcopy raises errors as this could be a coroutine
|
|
||||||
kwargs[k] = v
|
|
||||||
# this only logs streaming once, complete_streaming_response exists i.e when stream ends
|
|
||||||
if self.stream:
|
|
||||||
if "complete_streaming_response" not in kwargs:
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
print_verbose(
|
|
||||||
"reaches Async langfuse for streaming logging!"
|
|
||||||
)
|
|
||||||
result = kwargs["complete_streaming_response"]
|
|
||||||
if langFuseLogger is None:
|
|
||||||
langFuseLogger = LangFuseLogger()
|
|
||||||
await langFuseLogger._async_log_event(
|
|
||||||
kwargs=kwargs,
|
|
||||||
response_obj=result,
|
|
||||||
start_time=start_time,
|
|
||||||
end_time=end_time,
|
|
||||||
user_id=kwargs.get("user", None),
|
|
||||||
print_verbose=print_verbose,
|
|
||||||
)
|
|
||||||
except:
|
except:
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
|
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
|
||||||
|
@ -1788,9 +1757,37 @@ class Logging:
|
||||||
response_obj=result,
|
response_obj=result,
|
||||||
kwargs=self.model_call_details,
|
kwargs=self.model_call_details,
|
||||||
)
|
)
|
||||||
|
elif callback == "langfuse":
|
||||||
|
global langFuseLogger
|
||||||
|
verbose_logger.debug("reaches langfuse for logging!")
|
||||||
|
kwargs = {}
|
||||||
|
for k, v in self.model_call_details.items():
|
||||||
|
if (
|
||||||
|
k != "original_response"
|
||||||
|
): # copy.deepcopy raises errors as this could be a coroutine
|
||||||
|
kwargs[k] = v
|
||||||
|
# this only logs streaming once, complete_streaming_response exists i.e when stream ends
|
||||||
|
if langFuseLogger is None or (
|
||||||
|
self.langfuse_public_key != langFuseLogger.public_key
|
||||||
|
and self.langfuse_secret != langFuseLogger.secret_key
|
||||||
|
):
|
||||||
|
langFuseLogger = LangFuseLogger(
|
||||||
|
langfuse_public_key=self.langfuse_public_key,
|
||||||
|
langfuse_secret=self.langfuse_secret,
|
||||||
|
)
|
||||||
|
langFuseLogger.log_event(
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
response_obj=None,
|
||||||
|
user_id=kwargs.get("user", None),
|
||||||
|
print_verbose=print_verbose,
|
||||||
|
status_message=str(exception),
|
||||||
|
level="ERROR",
|
||||||
|
kwargs=self.model_call_details,
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {traceback.format_exc()}"
|
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {str(e)}"
|
||||||
)
|
)
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
|
f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
|
||||||
|
@ -3860,6 +3857,8 @@ def get_optional_params(
|
||||||
and custom_llm_provider != "text-completion-openai"
|
and custom_llm_provider != "text-completion-openai"
|
||||||
and custom_llm_provider != "azure"
|
and custom_llm_provider != "azure"
|
||||||
and custom_llm_provider != "vertex_ai"
|
and custom_llm_provider != "vertex_ai"
|
||||||
|
and custom_llm_provider != "anyscale"
|
||||||
|
and custom_llm_provider != "together_ai"
|
||||||
):
|
):
|
||||||
if custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat":
|
if custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat":
|
||||||
# ollama actually supports json output
|
# ollama actually supports json output
|
||||||
|
@ -3878,11 +3877,6 @@ def get_optional_params(
|
||||||
optional_params[
|
optional_params[
|
||||||
"functions_unsupported_model"
|
"functions_unsupported_model"
|
||||||
] = non_default_params.pop("functions")
|
] = non_default_params.pop("functions")
|
||||||
elif (
|
|
||||||
custom_llm_provider == "anyscale"
|
|
||||||
and model == "mistralai/Mistral-7B-Instruct-v0.1"
|
|
||||||
): # anyscale just supports function calling with mistral
|
|
||||||
pass
|
|
||||||
elif (
|
elif (
|
||||||
litellm.add_function_to_prompt
|
litellm.add_function_to_prompt
|
||||||
): # if user opts to add it to prompt instead
|
): # if user opts to add it to prompt instead
|
||||||
|
@ -4095,6 +4089,8 @@ def get_optional_params(
|
||||||
"top_p",
|
"top_p",
|
||||||
"stop",
|
"stop",
|
||||||
"frequency_penalty",
|
"frequency_penalty",
|
||||||
|
"tools",
|
||||||
|
"tool_choice",
|
||||||
]
|
]
|
||||||
_check_valid_arg(supported_params=supported_params)
|
_check_valid_arg(supported_params=supported_params)
|
||||||
|
|
||||||
|
@ -4112,6 +4108,10 @@ def get_optional_params(
|
||||||
] = frequency_penalty # https://docs.together.ai/reference/inference
|
] = frequency_penalty # https://docs.together.ai/reference/inference
|
||||||
if stop is not None:
|
if stop is not None:
|
||||||
optional_params["stop"] = stop
|
optional_params["stop"] = stop
|
||||||
|
if tools is not None:
|
||||||
|
optional_params["tools"] = tools
|
||||||
|
if tool_choice is not None:
|
||||||
|
optional_params["tool_choice"] = tool_choice
|
||||||
elif custom_llm_provider == "ai21":
|
elif custom_llm_provider == "ai21":
|
||||||
## check if unsupported param passed in
|
## check if unsupported param passed in
|
||||||
supported_params = [
|
supported_params = [
|
||||||
|
|
|
@ -156,8 +156,8 @@
|
||||||
"max_tokens": 4097,
|
"max_tokens": 4097,
|
||||||
"max_input_tokens": 4097,
|
"max_input_tokens": 4097,
|
||||||
"max_output_tokens": 4096,
|
"max_output_tokens": 4096,
|
||||||
"input_cost_per_token": 0.000012,
|
"input_cost_per_token": 0.000003,
|
||||||
"output_cost_per_token": 0.000016,
|
"output_cost_per_token": 0.000006,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.22.4"
|
version = "1.22.8"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -69,7 +69,7 @@ requires = ["poetry-core", "wheel"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "1.22.4"
|
version = "1.22.8"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
|
@ -13,7 +13,7 @@ redisvl==0.0.7 # semantic caching
|
||||||
numpy==1.24.3 # semantic caching
|
numpy==1.24.3 # semantic caching
|
||||||
prisma==0.11.0 # for db
|
prisma==0.11.0 # for db
|
||||||
mangum==0.17.0 # for aws lambda functions
|
mangum==0.17.0 # for aws lambda functions
|
||||||
google-generativeai==0.1.0 # for vertex ai calls
|
google-generativeai==0.3.2 # for vertex ai calls
|
||||||
async_generator==1.10.0 # for async ollama calls
|
async_generator==1.10.0 # for async ollama calls
|
||||||
traceloop-sdk==0.5.3 # for open telemetry logging
|
traceloop-sdk==0.5.3 # for open telemetry logging
|
||||||
langfuse>=2.6.3 # for langfuse self-hosted logging
|
langfuse>=2.6.3 # for langfuse self-hosted logging
|
||||||
|
|
|
@ -5,8 +5,8 @@ import "./globals.css";
|
||||||
const inter = Inter({ subsets: ["latin"] });
|
const inter = Inter({ subsets: ["latin"] });
|
||||||
|
|
||||||
export const metadata: Metadata = {
|
export const metadata: Metadata = {
|
||||||
title: "Create Next App",
|
title: "🚅 LiteLLM",
|
||||||
description: "Generated by create next app",
|
description: "LiteLLM Proxy Admin UI",
|
||||||
};
|
};
|
||||||
|
|
||||||
export default function RootLayout({
|
export default function RootLayout({
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue