mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
All checks were successful
Read Version from pyproject.toml / read-version (push) Successful in 47s
* fix(azure/): support passing headers to azure openai endpoints
Fixes https://github.com/BerriAI/litellm/issues/6217
* fix(utils.py): move default tokenizer to just openai
hf tokenizer makes network calls when trying to get the tokenizer - this slows down execution time calls
* fix(router.py): fix pattern matching router - add generic "*" to it as well
Fixes issue where generic "*" model access group wouldn't show up
* fix(pattern_match_deployments.py): match to more specific pattern
match to more specific pattern
allows setting generic wildcard model access group and excluding specific models more easily
* fix(proxy_server.py): fix _delete_deployment to handle base case where db_model list is empty
don't delete all router models b/c of empty list
Fixes https://github.com/BerriAI/litellm/issues/7196
* fix(anthropic/): fix handling response_format for anthropic messages with anthropic api
* fix(fireworks_ai/): support passing response_format + tool call in same message
Addresses https://github.com/BerriAI/litellm/issues/7135
* Revert "fix(fireworks_ai/): support passing response_format + tool call in same message"
This reverts commit 6a30dc6929
.
* test: fix test
* fix(replicate/): fix replicate default retry/polling logic
* test: add unit testing for router pattern matching
* test: update test to use default oai tokenizer
* test: mark flaky test
* test: skip flaky test
79 lines
1.7 KiB
Python
79 lines
1.7 KiB
Python
ROUTER_MAX_FALLBACKS = 5
|
|
DEFAULT_BATCH_SIZE = 512
|
|
DEFAULT_FLUSH_INTERVAL_SECONDS = 5
|
|
DEFAULT_MAX_RETRIES = 2
|
|
DEFAULT_REPLICATE_POLLING_RETRIES = 5
|
|
DEFAULT_REPLICATE_POLLING_DELAY_SECONDS = 1
|
|
DEFAULT_IMAGE_TOKEN_COUNT = 250
|
|
DEFAULT_IMAGE_WIDTH = 300
|
|
DEFAULT_IMAGE_HEIGHT = 300
|
|
LITELLM_CHAT_PROVIDERS = [
|
|
"openai",
|
|
"openai_like",
|
|
"xai",
|
|
"custom_openai",
|
|
"text-completion-openai",
|
|
"cohere",
|
|
"cohere_chat",
|
|
"clarifai",
|
|
"anthropic",
|
|
"anthropic_text",
|
|
"replicate",
|
|
"huggingface",
|
|
"together_ai",
|
|
"openrouter",
|
|
"vertex_ai",
|
|
"vertex_ai_beta",
|
|
"gemini",
|
|
"ai21",
|
|
"baseten",
|
|
"azure",
|
|
"azure_text",
|
|
"azure_ai",
|
|
"sagemaker",
|
|
"sagemaker_chat",
|
|
"bedrock",
|
|
"vllm",
|
|
"nlp_cloud",
|
|
"petals",
|
|
"oobabooga",
|
|
"ollama",
|
|
"ollama_chat",
|
|
"deepinfra",
|
|
"perplexity",
|
|
"mistral",
|
|
"groq",
|
|
"nvidia_nim",
|
|
"cerebras",
|
|
"ai21_chat",
|
|
"volcengine",
|
|
"codestral",
|
|
"text-completion-codestral",
|
|
"deepseek",
|
|
"sambanova",
|
|
"maritalk",
|
|
"cloudflare",
|
|
"fireworks_ai",
|
|
"friendliai",
|
|
"watsonx",
|
|
"watsonx_text",
|
|
"triton",
|
|
"predibase",
|
|
"databricks",
|
|
"empower",
|
|
"github",
|
|
"custom",
|
|
"litellm_proxy",
|
|
"hosted_vllm",
|
|
"lm_studio",
|
|
"galadriel",
|
|
]
|
|
|
|
RESPONSE_FORMAT_TOOL_NAME = "json_tool_call" # default tool name used when converting response format to tool call
|
|
|
|
########################### LiteLLM Proxy Specific Constants ###########################
|
|
MAX_SPENDLOG_ROWS_TO_QUERY = (
|
|
1_000_000 # if spendLogs has more than 1M rows, do not query the DB
|
|
)
|
|
# makes it clear this is a rate limit error for a litellm virtual key
|
|
RATE_LIMIT_ERROR_MESSAGE_FOR_VIRTUAL_KEY = "LiteLLM Virtual Key user_api_key_hash"
|