Doc updates + management endpoint fixes (#8138)

* Litellm dev 01 29 2025 p4 (#8107) * fix(key_management_endpoints.py): always get db team Fixes https://github.com/BerriAI/litellm/issues/7983 * test(test_key_management.py): add unit test enforcing check_db_only is always true on key generate checks * test: fix test * test: skip gemini thinking * Litellm dev 01 29 2025 p3 (#8106) * fix(__init__.py): reduces size of __init__.py and reduces scope for errors by using correct param * refactor(__init__.py): refactor init by cleaning up redundant params * refactor(__init__.py): move more constants into constants.py cleanup root * refactor(__init__.py): more cleanup * feat(__init__.py): expose new 'disable_hf_tokenizer_download' param enables hf model usage in offline env * docs(config_settings.md): document new disable_hf_tokenizer_download param * fix: fix linting error * fix: fix unsafe comparison * test: fix test * docs(public_teams.md): add doc showing how to expose public teams for users to join * docs: add beta disclaimer on public teams * test: update tests
2025-04-25 18:54:30 +00:00 · 2025-01-30 22:56:41 -08:00 · 2025-01-30 22:56:41 -08:00 · 16b5de07af
commit 16b5de07af
parent 2eee7f978f
16 changed files with 428 additions and 349 deletions
--- a/litellm/constants.py
+++ b/litellm/constants.py
@ -1,3 +1,5 @@
+from typing import List
+
 ROUTER_MAX_FALLBACKS = 5
 DEFAULT_BATCH_SIZE = 512
 DEFAULT_FLUSH_INTERVAL_SECONDS = 5
@ -12,6 +14,11 @@ DEFAULT_IMAGE_TOKEN_COUNT = 250
 DEFAULT_IMAGE_WIDTH = 300
 DEFAULT_IMAGE_HEIGHT = 300
 SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000  # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
+#### RELIABILITY ####
+REPEATED_STREAMING_CHUNK_LIMIT = 100  # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
+#### Networking settings ####
+request_timeout: float = 6000  # time in seconds
+
 LITELLM_CHAT_PROVIDERS = [
    "openai",
    "openai_like",
@ -113,6 +120,222 @@ OPENAI_CHAT_COMPLETION_PARAMS = [
    "top_logprobs",
    "extra_headers",
 ]
+
+openai_compatible_endpoints: List = [
+    "api.perplexity.ai",
+    "api.endpoints.anyscale.com/v1",
+    "api.deepinfra.com/v1/openai",
+    "api.mistral.ai/v1",
+    "codestral.mistral.ai/v1/chat/completions",
+    "codestral.mistral.ai/v1/fim/completions",
+    "api.groq.com/openai/v1",
+    "https://integrate.api.nvidia.com/v1",
+    "api.deepseek.com/v1",
+    "api.together.xyz/v1",
+    "app.empower.dev/api/v1",
+    "https://api.friendli.ai/serverless/v1",
+    "api.sambanova.ai/v1",
+    "api.x.ai/v1",
+    "api.galadriel.ai/v1",
+]
+
+
+openai_compatible_providers: List = [
+    "anyscale",
+    "mistral",
+    "groq",
+    "nvidia_nim",
+    "cerebras",
+    "sambanova",
+    "ai21_chat",
+    "ai21",
+    "volcengine",
+    "codestral",
+    "deepseek",
+    "deepinfra",
+    "perplexity",
+    "xinference",
+    "xai",
+    "together_ai",
+    "fireworks_ai",
+    "empower",
+    "friendliai",
+    "azure_ai",
+    "github",
+    "litellm_proxy",
+    "hosted_vllm",
+    "lm_studio",
+    "galadriel",
+]
+openai_text_completion_compatible_providers: List = (
+    [  # providers that support `/v1/completions`
+        "together_ai",
+        "fireworks_ai",
+        "hosted_vllm",
+    ]
+)
+_openai_like_providers: List = [
+    "predibase",
+    "databricks",
+    "watsonx",
+]  # private helper. similar to openai but require some custom auth / endpoint handling, so can't use the openai sdk
+# well supported replicate llms
+replicate_models: List = [
+    # llama replicate supported LLMs
+    "replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf",
+    "a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52",
+    "meta/codellama-13b:1c914d844307b0588599b8393480a3ba917b660c7e9dfae681542b5325f228db",
+    # Vicuna
+    "replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b",
+    "joehoover/instructblip-vicuna13b:c4c54e3c8c97cd50c2d2fec9be3b6065563ccf7d43787fb99f84151b867178fe",
+    # Flan T-5
+    "daanelson/flan-t5-large:ce962b3f6792a57074a601d3979db5839697add2e4e02696b3ced4c022d4767f",
+    # Others
+    "replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5",
+    "replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad",
+]
+
+clarifai_models: List = [
+    "clarifai/meta.Llama-3.Llama-3-8B-Instruct",
+    "clarifai/gcp.generate.gemma-1_1-7b-it",
+    "clarifai/mistralai.completion.mixtral-8x22B",
+    "clarifai/cohere.generate.command-r-plus",
+    "clarifai/databricks.drbx.dbrx-instruct",
+    "clarifai/mistralai.completion.mistral-large",
+    "clarifai/mistralai.completion.mistral-medium",
+    "clarifai/mistralai.completion.mistral-small",
+    "clarifai/mistralai.completion.mixtral-8x7B-Instruct-v0_1",
+    "clarifai/gcp.generate.gemma-2b-it",
+    "clarifai/gcp.generate.gemma-7b-it",
+    "clarifai/deci.decilm.deciLM-7B-instruct",
+    "clarifai/mistralai.completion.mistral-7B-Instruct",
+    "clarifai/gcp.generate.gemini-pro",
+    "clarifai/anthropic.completion.claude-v1",
+    "clarifai/anthropic.completion.claude-instant-1_2",
+    "clarifai/anthropic.completion.claude-instant",
+    "clarifai/anthropic.completion.claude-v2",
+    "clarifai/anthropic.completion.claude-2_1",
+    "clarifai/meta.Llama-2.codeLlama-70b-Python",
+    "clarifai/meta.Llama-2.codeLlama-70b-Instruct",
+    "clarifai/openai.completion.gpt-3_5-turbo-instruct",
+    "clarifai/meta.Llama-2.llama2-7b-chat",
+    "clarifai/meta.Llama-2.llama2-13b-chat",
+    "clarifai/meta.Llama-2.llama2-70b-chat",
+    "clarifai/openai.chat-completion.gpt-4-turbo",
+    "clarifai/microsoft.text-generation.phi-2",
+    "clarifai/meta.Llama-2.llama2-7b-chat-vllm",
+    "clarifai/upstage.solar.solar-10_7b-instruct",
+    "clarifai/openchat.openchat.openchat-3_5-1210",
+    "clarifai/togethercomputer.stripedHyena.stripedHyena-Nous-7B",
+    "clarifai/gcp.generate.text-bison",
+    "clarifai/meta.Llama-2.llamaGuard-7b",
+    "clarifai/fblgit.una-cybertron.una-cybertron-7b-v2",
+    "clarifai/openai.chat-completion.GPT-4",
+    "clarifai/openai.chat-completion.GPT-3_5-turbo",
+    "clarifai/ai21.complete.Jurassic2-Grande",
+    "clarifai/ai21.complete.Jurassic2-Grande-Instruct",
+    "clarifai/ai21.complete.Jurassic2-Jumbo-Instruct",
+    "clarifai/ai21.complete.Jurassic2-Jumbo",
+    "clarifai/ai21.complete.Jurassic2-Large",
+    "clarifai/cohere.generate.cohere-generate-command",
+    "clarifai/wizardlm.generate.wizardCoder-Python-34B",
+    "clarifai/wizardlm.generate.wizardLM-70B",
+    "clarifai/tiiuae.falcon.falcon-40b-instruct",
+    "clarifai/togethercomputer.RedPajama.RedPajama-INCITE-7B-Chat",
+    "clarifai/gcp.generate.code-gecko",
+    "clarifai/gcp.generate.code-bison",
+    "clarifai/mistralai.completion.mistral-7B-OpenOrca",
+    "clarifai/mistralai.completion.openHermes-2-mistral-7B",
+    "clarifai/wizardlm.generate.wizardLM-13B",
+    "clarifai/huggingface-research.zephyr.zephyr-7B-alpha",
+    "clarifai/wizardlm.generate.wizardCoder-15B",
+    "clarifai/microsoft.text-generation.phi-1_5",
+    "clarifai/databricks.Dolly-v2.dolly-v2-12b",
+    "clarifai/bigcode.code.StarCoder",
+    "clarifai/salesforce.xgen.xgen-7b-8k-instruct",
+    "clarifai/mosaicml.mpt.mpt-7b-instruct",
+    "clarifai/anthropic.completion.claude-3-opus",
+    "clarifai/anthropic.completion.claude-3-sonnet",
+    "clarifai/gcp.generate.gemini-1_5-pro",
+    "clarifai/gcp.generate.imagen-2",
+    "clarifai/salesforce.blip.general-english-image-caption-blip-2",
+]
+
+
+huggingface_models: List = [
+    "meta-llama/Llama-2-7b-hf",
+    "meta-llama/Llama-2-7b-chat-hf",
+    "meta-llama/Llama-2-13b-hf",
+    "meta-llama/Llama-2-13b-chat-hf",
+    "meta-llama/Llama-2-70b-hf",
+    "meta-llama/Llama-2-70b-chat-hf",
+    "meta-llama/Llama-2-7b",
+    "meta-llama/Llama-2-7b-chat",
+    "meta-llama/Llama-2-13b",
+    "meta-llama/Llama-2-13b-chat",
+    "meta-llama/Llama-2-70b",
+    "meta-llama/Llama-2-70b-chat",
+]  # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/providers
+empower_models = [
+    "empower/empower-functions",
+    "empower/empower-functions-small",
+]
+
+together_ai_models: List = [
+    # llama llms - chat
+    "togethercomputer/llama-2-70b-chat",
+    # llama llms - language / instruct
+    "togethercomputer/llama-2-70b",
+    "togethercomputer/LLaMA-2-7B-32K",
+    "togethercomputer/Llama-2-7B-32K-Instruct",
+    "togethercomputer/llama-2-7b",
+    # falcon llms
+    "togethercomputer/falcon-40b-instruct",
+    "togethercomputer/falcon-7b-instruct",
+    # alpaca
+    "togethercomputer/alpaca-7b",
+    # chat llms
+    "HuggingFaceH4/starchat-alpha",
+    # code llms
+    "togethercomputer/CodeLlama-34b",
+    "togethercomputer/CodeLlama-34b-Instruct",
+    "togethercomputer/CodeLlama-34b-Python",
+    "defog/sqlcoder",
+    "NumbersStation/nsql-llama-2-7B",
+    "WizardLM/WizardCoder-15B-V1.0",
+    "WizardLM/WizardCoder-Python-34B-V1.0",
+    # language llms
+    "NousResearch/Nous-Hermes-Llama2-13b",
+    "Austism/chronos-hermes-13b",
+    "upstage/SOLAR-0-70b-16bit",
+    "WizardLM/WizardLM-70B-V1.0",
+]  # supports all together ai models, just pass in the model id e.g. completion(model="together_computer/replit_code_3b",...)
+
+
+baseten_models: List = [
+    "qvv0xeq",
+    "q841o8w",
+    "31dxrj3",
+]  # FALCON 7B  # WizardLM  # Mosaic ML
+
+
+open_ai_embedding_models: List = ["text-embedding-ada-002"]
+cohere_embedding_models: List = [
+    "embed-english-v3.0",
+    "embed-english-light-v3.0",
+    "embed-multilingual-v3.0",
+    "embed-english-v2.0",
+    "embed-english-light-v2.0",
+    "embed-multilingual-v2.0",
+]
+bedrock_embedding_models: List = [
+    "amazon.titan-embed-text-v1",
+    "cohere.embed-english-v3",
+    "cohere.embed-multilingual-v3",
+]
+
+
+OPENAI_FINISH_REASONS = ["stop", "length", "function_call", "content_filter", "null"]
 HUMANLOOP_PROMPT_CACHE_TTL_SECONDS = 60  # 1 minute
 RESPONSE_FORMAT_TOOL_NAME = "json_tool_call"  # default tool name used when converting response format to tool call