mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
* fix(o_series_transformation.py): add 'reasoning_effort' as o series model param Closes https://github.com/BerriAI/litellm/issues/8182 * fix(main.py): ensure `reasoning_effort` is a mapped openai param * refactor(azure/): rename o1_[x] files to o_series_[x] * refactor(base_llm_unit_tests.py): refactor testing for o series reasoning effort * test(test_azure_o_series.py): have azure o series tests correctly inherit from base o series model tests * feat(base_utils.py): support translating 'developer' role to 'system' role for non-openai providers Makes it easy to switch from openai to anthropic * fix: fix linting errors * fix(base_llm_unit_tests.py): fix test * fix(main.py): add missing param
370 lines
11 KiB
Python
370 lines
11 KiB
Python
from typing import List
|
|
|
|
ROUTER_MAX_FALLBACKS = 5
|
|
DEFAULT_BATCH_SIZE = 512
|
|
DEFAULT_FLUSH_INTERVAL_SECONDS = 5
|
|
DEFAULT_MAX_RETRIES = 2
|
|
DEFAULT_FAILURE_THRESHOLD_PERCENT = (
|
|
0.5 # default cooldown a deployment if 50% of requests fail in a given minute
|
|
)
|
|
DEFAULT_COOLDOWN_TIME_SECONDS = 5
|
|
DEFAULT_REPLICATE_POLLING_RETRIES = 5
|
|
DEFAULT_REPLICATE_POLLING_DELAY_SECONDS = 1
|
|
DEFAULT_IMAGE_TOKEN_COUNT = 250
|
|
DEFAULT_IMAGE_WIDTH = 300
|
|
DEFAULT_IMAGE_HEIGHT = 300
|
|
SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
|
|
#### RELIABILITY ####
|
|
REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
|
|
#### Networking settings ####
|
|
request_timeout: float = 6000 # time in seconds
|
|
|
|
LITELLM_CHAT_PROVIDERS = [
|
|
"openai",
|
|
"openai_like",
|
|
"xai",
|
|
"custom_openai",
|
|
"text-completion-openai",
|
|
"cohere",
|
|
"cohere_chat",
|
|
"clarifai",
|
|
"anthropic",
|
|
"anthropic_text",
|
|
"replicate",
|
|
"huggingface",
|
|
"together_ai",
|
|
"openrouter",
|
|
"vertex_ai",
|
|
"vertex_ai_beta",
|
|
"gemini",
|
|
"ai21",
|
|
"baseten",
|
|
"azure",
|
|
"azure_text",
|
|
"azure_ai",
|
|
"sagemaker",
|
|
"sagemaker_chat",
|
|
"bedrock",
|
|
"vllm",
|
|
"nlp_cloud",
|
|
"petals",
|
|
"oobabooga",
|
|
"ollama",
|
|
"ollama_chat",
|
|
"deepinfra",
|
|
"perplexity",
|
|
"mistral",
|
|
"groq",
|
|
"nvidia_nim",
|
|
"cerebras",
|
|
"ai21_chat",
|
|
"volcengine",
|
|
"codestral",
|
|
"text-completion-codestral",
|
|
"deepseek",
|
|
"sambanova",
|
|
"maritalk",
|
|
"cloudflare",
|
|
"fireworks_ai",
|
|
"friendliai",
|
|
"watsonx",
|
|
"watsonx_text",
|
|
"triton",
|
|
"predibase",
|
|
"databricks",
|
|
"empower",
|
|
"github",
|
|
"custom",
|
|
"litellm_proxy",
|
|
"hosted_vllm",
|
|
"lm_studio",
|
|
"galadriel",
|
|
]
|
|
|
|
|
|
OPENAI_CHAT_COMPLETION_PARAMS = [
|
|
"functions",
|
|
"function_call",
|
|
"temperature",
|
|
"temperature",
|
|
"top_p",
|
|
"n",
|
|
"stream",
|
|
"stream_options",
|
|
"stop",
|
|
"max_completion_tokens",
|
|
"modalities",
|
|
"prediction",
|
|
"audio",
|
|
"max_tokens",
|
|
"presence_penalty",
|
|
"frequency_penalty",
|
|
"logit_bias",
|
|
"user",
|
|
"request_timeout",
|
|
"api_base",
|
|
"api_version",
|
|
"api_key",
|
|
"deployment_id",
|
|
"organization",
|
|
"base_url",
|
|
"default_headers",
|
|
"timeout",
|
|
"response_format",
|
|
"seed",
|
|
"tools",
|
|
"tool_choice",
|
|
"max_retries",
|
|
"parallel_tool_calls",
|
|
"logprobs",
|
|
"top_logprobs",
|
|
"reasoning_effort",
|
|
"extra_headers",
|
|
]
|
|
|
|
openai_compatible_endpoints: List = [
|
|
"api.perplexity.ai",
|
|
"api.endpoints.anyscale.com/v1",
|
|
"api.deepinfra.com/v1/openai",
|
|
"api.mistral.ai/v1",
|
|
"codestral.mistral.ai/v1/chat/completions",
|
|
"codestral.mistral.ai/v1/fim/completions",
|
|
"api.groq.com/openai/v1",
|
|
"https://integrate.api.nvidia.com/v1",
|
|
"api.deepseek.com/v1",
|
|
"api.together.xyz/v1",
|
|
"app.empower.dev/api/v1",
|
|
"https://api.friendli.ai/serverless/v1",
|
|
"api.sambanova.ai/v1",
|
|
"api.x.ai/v1",
|
|
"api.galadriel.ai/v1",
|
|
]
|
|
|
|
|
|
openai_compatible_providers: List = [
|
|
"anyscale",
|
|
"mistral",
|
|
"groq",
|
|
"nvidia_nim",
|
|
"cerebras",
|
|
"sambanova",
|
|
"ai21_chat",
|
|
"ai21",
|
|
"volcengine",
|
|
"codestral",
|
|
"deepseek",
|
|
"deepinfra",
|
|
"perplexity",
|
|
"xinference",
|
|
"xai",
|
|
"together_ai",
|
|
"fireworks_ai",
|
|
"empower",
|
|
"friendliai",
|
|
"azure_ai",
|
|
"github",
|
|
"litellm_proxy",
|
|
"hosted_vllm",
|
|
"lm_studio",
|
|
"galadriel",
|
|
]
|
|
openai_text_completion_compatible_providers: List = (
|
|
[ # providers that support `/v1/completions`
|
|
"together_ai",
|
|
"fireworks_ai",
|
|
"hosted_vllm",
|
|
]
|
|
)
|
|
_openai_like_providers: List = [
|
|
"predibase",
|
|
"databricks",
|
|
"watsonx",
|
|
] # private helper. similar to openai but require some custom auth / endpoint handling, so can't use the openai sdk
|
|
# well supported replicate llms
|
|
replicate_models: List = [
|
|
# llama replicate supported LLMs
|
|
"replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf",
|
|
"a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52",
|
|
"meta/codellama-13b:1c914d844307b0588599b8393480a3ba917b660c7e9dfae681542b5325f228db",
|
|
# Vicuna
|
|
"replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b",
|
|
"joehoover/instructblip-vicuna13b:c4c54e3c8c97cd50c2d2fec9be3b6065563ccf7d43787fb99f84151b867178fe",
|
|
# Flan T-5
|
|
"daanelson/flan-t5-large:ce962b3f6792a57074a601d3979db5839697add2e4e02696b3ced4c022d4767f",
|
|
# Others
|
|
"replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5",
|
|
"replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad",
|
|
]
|
|
|
|
clarifai_models: List = [
|
|
"clarifai/meta.Llama-3.Llama-3-8B-Instruct",
|
|
"clarifai/gcp.generate.gemma-1_1-7b-it",
|
|
"clarifai/mistralai.completion.mixtral-8x22B",
|
|
"clarifai/cohere.generate.command-r-plus",
|
|
"clarifai/databricks.drbx.dbrx-instruct",
|
|
"clarifai/mistralai.completion.mistral-large",
|
|
"clarifai/mistralai.completion.mistral-medium",
|
|
"clarifai/mistralai.completion.mistral-small",
|
|
"clarifai/mistralai.completion.mixtral-8x7B-Instruct-v0_1",
|
|
"clarifai/gcp.generate.gemma-2b-it",
|
|
"clarifai/gcp.generate.gemma-7b-it",
|
|
"clarifai/deci.decilm.deciLM-7B-instruct",
|
|
"clarifai/mistralai.completion.mistral-7B-Instruct",
|
|
"clarifai/gcp.generate.gemini-pro",
|
|
"clarifai/anthropic.completion.claude-v1",
|
|
"clarifai/anthropic.completion.claude-instant-1_2",
|
|
"clarifai/anthropic.completion.claude-instant",
|
|
"clarifai/anthropic.completion.claude-v2",
|
|
"clarifai/anthropic.completion.claude-2_1",
|
|
"clarifai/meta.Llama-2.codeLlama-70b-Python",
|
|
"clarifai/meta.Llama-2.codeLlama-70b-Instruct",
|
|
"clarifai/openai.completion.gpt-3_5-turbo-instruct",
|
|
"clarifai/meta.Llama-2.llama2-7b-chat",
|
|
"clarifai/meta.Llama-2.llama2-13b-chat",
|
|
"clarifai/meta.Llama-2.llama2-70b-chat",
|
|
"clarifai/openai.chat-completion.gpt-4-turbo",
|
|
"clarifai/microsoft.text-generation.phi-2",
|
|
"clarifai/meta.Llama-2.llama2-7b-chat-vllm",
|
|
"clarifai/upstage.solar.solar-10_7b-instruct",
|
|
"clarifai/openchat.openchat.openchat-3_5-1210",
|
|
"clarifai/togethercomputer.stripedHyena.stripedHyena-Nous-7B",
|
|
"clarifai/gcp.generate.text-bison",
|
|
"clarifai/meta.Llama-2.llamaGuard-7b",
|
|
"clarifai/fblgit.una-cybertron.una-cybertron-7b-v2",
|
|
"clarifai/openai.chat-completion.GPT-4",
|
|
"clarifai/openai.chat-completion.GPT-3_5-turbo",
|
|
"clarifai/ai21.complete.Jurassic2-Grande",
|
|
"clarifai/ai21.complete.Jurassic2-Grande-Instruct",
|
|
"clarifai/ai21.complete.Jurassic2-Jumbo-Instruct",
|
|
"clarifai/ai21.complete.Jurassic2-Jumbo",
|
|
"clarifai/ai21.complete.Jurassic2-Large",
|
|
"clarifai/cohere.generate.cohere-generate-command",
|
|
"clarifai/wizardlm.generate.wizardCoder-Python-34B",
|
|
"clarifai/wizardlm.generate.wizardLM-70B",
|
|
"clarifai/tiiuae.falcon.falcon-40b-instruct",
|
|
"clarifai/togethercomputer.RedPajama.RedPajama-INCITE-7B-Chat",
|
|
"clarifai/gcp.generate.code-gecko",
|
|
"clarifai/gcp.generate.code-bison",
|
|
"clarifai/mistralai.completion.mistral-7B-OpenOrca",
|
|
"clarifai/mistralai.completion.openHermes-2-mistral-7B",
|
|
"clarifai/wizardlm.generate.wizardLM-13B",
|
|
"clarifai/huggingface-research.zephyr.zephyr-7B-alpha",
|
|
"clarifai/wizardlm.generate.wizardCoder-15B",
|
|
"clarifai/microsoft.text-generation.phi-1_5",
|
|
"clarifai/databricks.Dolly-v2.dolly-v2-12b",
|
|
"clarifai/bigcode.code.StarCoder",
|
|
"clarifai/salesforce.xgen.xgen-7b-8k-instruct",
|
|
"clarifai/mosaicml.mpt.mpt-7b-instruct",
|
|
"clarifai/anthropic.completion.claude-3-opus",
|
|
"clarifai/anthropic.completion.claude-3-sonnet",
|
|
"clarifai/gcp.generate.gemini-1_5-pro",
|
|
"clarifai/gcp.generate.imagen-2",
|
|
"clarifai/salesforce.blip.general-english-image-caption-blip-2",
|
|
]
|
|
|
|
|
|
huggingface_models: List = [
|
|
"meta-llama/Llama-2-7b-hf",
|
|
"meta-llama/Llama-2-7b-chat-hf",
|
|
"meta-llama/Llama-2-13b-hf",
|
|
"meta-llama/Llama-2-13b-chat-hf",
|
|
"meta-llama/Llama-2-70b-hf",
|
|
"meta-llama/Llama-2-70b-chat-hf",
|
|
"meta-llama/Llama-2-7b",
|
|
"meta-llama/Llama-2-7b-chat",
|
|
"meta-llama/Llama-2-13b",
|
|
"meta-llama/Llama-2-13b-chat",
|
|
"meta-llama/Llama-2-70b",
|
|
"meta-llama/Llama-2-70b-chat",
|
|
] # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/providers
|
|
empower_models = [
|
|
"empower/empower-functions",
|
|
"empower/empower-functions-small",
|
|
]
|
|
|
|
together_ai_models: List = [
|
|
# llama llms - chat
|
|
"togethercomputer/llama-2-70b-chat",
|
|
# llama llms - language / instruct
|
|
"togethercomputer/llama-2-70b",
|
|
"togethercomputer/LLaMA-2-7B-32K",
|
|
"togethercomputer/Llama-2-7B-32K-Instruct",
|
|
"togethercomputer/llama-2-7b",
|
|
# falcon llms
|
|
"togethercomputer/falcon-40b-instruct",
|
|
"togethercomputer/falcon-7b-instruct",
|
|
# alpaca
|
|
"togethercomputer/alpaca-7b",
|
|
# chat llms
|
|
"HuggingFaceH4/starchat-alpha",
|
|
# code llms
|
|
"togethercomputer/CodeLlama-34b",
|
|
"togethercomputer/CodeLlama-34b-Instruct",
|
|
"togethercomputer/CodeLlama-34b-Python",
|
|
"defog/sqlcoder",
|
|
"NumbersStation/nsql-llama-2-7B",
|
|
"WizardLM/WizardCoder-15B-V1.0",
|
|
"WizardLM/WizardCoder-Python-34B-V1.0",
|
|
# language llms
|
|
"NousResearch/Nous-Hermes-Llama2-13b",
|
|
"Austism/chronos-hermes-13b",
|
|
"upstage/SOLAR-0-70b-16bit",
|
|
"WizardLM/WizardLM-70B-V1.0",
|
|
] # supports all together ai models, just pass in the model id e.g. completion(model="together_computer/replit_code_3b",...)
|
|
|
|
|
|
baseten_models: List = [
|
|
"qvv0xeq",
|
|
"q841o8w",
|
|
"31dxrj3",
|
|
] # FALCON 7B # WizardLM # Mosaic ML
|
|
|
|
|
|
open_ai_embedding_models: List = ["text-embedding-ada-002"]
|
|
cohere_embedding_models: List = [
|
|
"embed-english-v3.0",
|
|
"embed-english-light-v3.0",
|
|
"embed-multilingual-v3.0",
|
|
"embed-english-v2.0",
|
|
"embed-english-light-v2.0",
|
|
"embed-multilingual-v2.0",
|
|
]
|
|
bedrock_embedding_models: List = [
|
|
"amazon.titan-embed-text-v1",
|
|
"cohere.embed-english-v3",
|
|
"cohere.embed-multilingual-v3",
|
|
]
|
|
|
|
|
|
OPENAI_FINISH_REASONS = ["stop", "length", "function_call", "content_filter", "null"]
|
|
HUMANLOOP_PROMPT_CACHE_TTL_SECONDS = 60 # 1 minute
|
|
RESPONSE_FORMAT_TOOL_NAME = "json_tool_call" # default tool name used when converting response format to tool call
|
|
|
|
########################### Logging Callback Constants ###########################
|
|
AZURE_STORAGE_MSFT_VERSION = "2019-07-07"
|
|
|
|
########################### LiteLLM Proxy Specific Constants ###########################
|
|
########################################################################################
|
|
MAX_SPENDLOG_ROWS_TO_QUERY = (
|
|
1_000_000 # if spendLogs has more than 1M rows, do not query the DB
|
|
)
|
|
# makes it clear this is a rate limit error for a litellm virtual key
|
|
RATE_LIMIT_ERROR_MESSAGE_FOR_VIRTUAL_KEY = "LiteLLM Virtual Key user_api_key_hash"
|
|
|
|
# pass through route constansts
|
|
BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES = [
|
|
"agents/",
|
|
"knowledgebases/",
|
|
"flows/",
|
|
"retrieveAndGenerate/",
|
|
"rerank/",
|
|
"generateQuery/",
|
|
"optimize-prompt/",
|
|
]
|
|
|
|
BATCH_STATUS_POLL_INTERVAL_SECONDS = 3600 # 1 hour
|
|
BATCH_STATUS_POLL_MAX_ATTEMPTS = 24 # for 24 hours
|
|
|
|
HEALTH_CHECK_TIMEOUT_SECONDS = 60 # 60 seconds
|
|
|
|
UI_SESSION_TOKEN_TEAM_ID = "litellm-dashboard"
|