forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_bedrock_command_r_support
This commit is contained in:
commit
1d651c6049
82 changed files with 3661 additions and 605 deletions
187
cookbook/liteLLM_clarifai_Demo.ipynb
vendored
Normal file
187
cookbook/liteLLM_clarifai_Demo.ipynb
vendored
Normal file
|
@ -0,0 +1,187 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# LiteLLM Clarifai \n",
|
||||
"This notebook walks you through on how to use liteLLM integration of Clarifai and call LLM model from clarifai with response in openAI output format."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Pre-Requisites"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#install necessary packages\n",
|
||||
"!pip install litellm\n",
|
||||
"!pip install clarifai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To obtain Clarifai Personal Access Token follow the steps mentioned in the [link](https://docs.clarifai.com/clarifai-basics/authentication/personal-access-tokens/)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Set Clarifai Credentials\n",
|
||||
"import os\n",
|
||||
"os.environ[\"CLARIFAI_API_KEY\"]= \"YOUR_CLARIFAI_PAT\" # Clarifai PAT"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Mistral-large"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import litellm\n",
|
||||
"\n",
|
||||
"litellm.set_verbose=False"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Mistral large response : ModelResponse(id='chatcmpl-6eed494d-7ae2-4870-b9c2-6a64d50a6151', choices=[Choices(finish_reason='stop', index=1, message=Message(content=\"In the grand tapestry of time, where tales unfold,\\nLies the chronicle of ages, a sight to behold.\\nA tale of empires rising, and kings of old,\\nOf civilizations lost, and stories untold.\\n\\nOnce upon a yesterday, in a time so vast,\\nHumans took their first steps, casting shadows in the past.\\nFrom the cradle of mankind, a journey they embarked,\\nThrough stone and bronze and iron, their skills they sharpened and marked.\\n\\nEgyptians built pyramids, reaching for the skies,\\nWhile Greeks sought wisdom, truth, in philosophies that lie.\\nRoman legions marched, their empire to expand,\\nAnd in the East, the Silk Road joined the world, hand in hand.\\n\\nThe Middle Ages came, with knights in shining armor,\\nFeudal lords and serfs, a time of both clamor and calm order.\\nThen Renaissance bloomed, like a flower in the sun,\\nA rebirth of art and science, a new age had begun.\\n\\nAcross the vast oceans, explorers sailed with courage bold,\\nDiscovering new lands, stories of adventure, untold.\\nIndustrial Revolution churned, progress in its wake,\\nMachines and factories, a whole new world to make.\\n\\nTwo World Wars raged, a testament to man's strife,\\nYet from the ashes rose hope, a renewed will for life.\\nInto the modern era, technology took flight,\\nConnecting every corner, bathed in digital light.\\n\\nHistory, a symphony, a melody of time,\\nA testament to human will, resilience so sublime.\\nIn every page, a lesson, in every tale, a guide,\\nFor understanding our past, shapes our future's tide.\", role='assistant'))], created=1713896412, model='https://api.clarifai.com/v2/users/mistralai/apps/completion/models/mistral-large/outputs', object='chat.completion', system_fingerprint=None, usage=Usage(prompt_tokens=13, completion_tokens=338, total_tokens=351))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"messages = [{\"role\": \"user\",\"content\": \"\"\"Write a poem about history?\"\"\"}]\n",
|
||||
"response=completion(\n",
|
||||
" model=\"clarifai/mistralai.completion.mistral-large\",\n",
|
||||
" messages=messages,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"print(f\"Mistral large response : {response}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Claude-2.1 "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Claude-2.1 response : ModelResponse(id='chatcmpl-d126c919-4db4-4aa3-ac8f-7edea41e0b93', choices=[Choices(finish_reason='stop', index=1, message=Message(content=\" Here's a poem I wrote about history:\\n\\nThe Tides of Time\\n\\nThe tides of time ebb and flow,\\nCarrying stories of long ago.\\nFigures and events come into light,\\nShaping the future with all their might.\\n\\nKingdoms rise, empires fall, \\nLeaving traces that echo down every hall.\\nRevolutions bring change with a fiery glow,\\nToppling structures from long ago.\\n\\nExplorers traverse each ocean and land,\\nSeeking treasures they don't understand.\\nWhile artists and writers try to make their mark,\\nHoping their works shine bright in the dark.\\n\\nThe cycle repeats again and again,\\nAs humanity struggles to learn from its pain.\\nThough the players may change on history's stage,\\nThe themes stay the same from age to age.\\n\\nWar and peace, life and death,\\nLove and strife with every breath.\\nThe tides of time continue their dance,\\nAs we join in, by luck or by chance.\\n\\nSo we study the past to light the way forward, \\nHeeding warnings from stories told and heard.\\nThe future unfolds from this unending flow -\\nWhere the tides of time ultimately go.\", role='assistant'))], created=1713896579, model='https://api.clarifai.com/v2/users/anthropic/apps/completion/models/claude-2_1/outputs', object='chat.completion', system_fingerprint=None, usage=Usage(prompt_tokens=12, completion_tokens=232, total_tokens=244))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"messages = [{\"role\": \"user\",\"content\": \"\"\"Write a poem about history?\"\"\"}]\n",
|
||||
"response=completion(\n",
|
||||
" model=\"clarifai/anthropic.completion.claude-2_1\",\n",
|
||||
" messages=messages,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"print(f\"Claude-2.1 response : {response}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### OpenAI GPT-4 (Streaming)\n",
|
||||
"Though clarifai doesn't support streaming, still you can call stream and get the response in standard StreamResponse format of liteLLM"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ModelResponse(id='chatcmpl-40ae19af-3bf0-4eb4-99f2-33aec3ba84af', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=\"In the quiet corners of time's grand hall,\\nLies the tale of rise and fall.\\nFrom ancient ruins to modern sprawl,\\nHistory, the greatest story of them all.\\n\\nEmpires have risen, empires have decayed,\\nThrough the eons, memories have stayed.\\nIn the book of time, history is laid,\\nA tapestry of events, meticulously displayed.\\n\\nThe pyramids of Egypt, standing tall,\\nThe Roman Empire's mighty sprawl.\\nFrom Alexander's conquest, to the Berlin Wall,\\nHistory, a silent witness to it all.\\n\\nIn the shadow of the past we tread,\\nWhere once kings and prophets led.\\nTheir stories in our hearts are spread,\\nEchoes of their words, in our minds are read.\\n\\nBattles fought and victories won,\\nActs of courage under the sun.\\nTales of love, of deeds done,\\nIn history's grand book, they all run.\\n\\nHeroes born, legends made,\\nIn the annals of time, they'll never fade.\\nTheir triumphs and failures all displayed,\\nIn the eternal march of history's parade.\\n\\nThe ink of the past is forever dry,\\nBut its lessons, we cannot deny.\\nIn its stories, truths lie,\\nIn its wisdom, we rely.\\n\\nHistory, a mirror to our past,\\nA guide for the future vast.\\nThrough its lens, we're ever cast,\\nIn the drama of life, forever vast.\", role='assistant', function_call=None, tool_calls=None), logprobs=None)], created=1714744515, model='https://api.clarifai.com/v2/users/openai/apps/chat-completion/models/GPT-4/outputs', object='chat.completion.chunk', system_fingerprint=None)\n",
|
||||
"ModelResponse(id='chatcmpl-40ae19af-3bf0-4eb4-99f2-33aec3ba84af', choices=[StreamingChoices(finish_reason='stop', index=0, delta=Delta(content=None, role=None, function_call=None, tool_calls=None), logprobs=None)], created=1714744515, model='https://api.clarifai.com/v2/users/openai/apps/chat-completion/models/GPT-4/outputs', object='chat.completion.chunk', system_fingerprint=None)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"messages = [{\"role\": \"user\",\"content\": \"\"\"Write a poem about history?\"\"\"}]\n",
|
||||
"response = completion(\n",
|
||||
" model=\"clarifai/openai.chat-completion.GPT-4\",\n",
|
||||
" messages=messages,\n",
|
||||
" stream=True,\n",
|
||||
" api_key = \"c75cc032415e45368be331fdd2c06db0\")\n",
|
||||
"\n",
|
||||
"for chunk in response:\n",
|
||||
" print(chunk)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -4,6 +4,12 @@ LiteLLM allows you to:
|
|||
* Send 1 completion call to many models: Return Fastest Response
|
||||
* Send 1 completion call to many models: Return All Responses
|
||||
|
||||
:::info
|
||||
|
||||
Trying to do batch completion on LiteLLM Proxy ? Go here: https://docs.litellm.ai/docs/proxy/user_keys#beta-batch-completions---pass-model-as-list
|
||||
|
||||
:::
|
||||
|
||||
## Send multiple completion calls to 1 model
|
||||
|
||||
In the batch_completion method, you provide a list of `messages` where each sub-list of messages is passed to `litellm.completion()`, allowing you to process multiple prompts efficiently in a single API call.
|
||||
|
|
|
@ -136,6 +136,7 @@ response = completion(
|
|||
"existing_trace_id": "trace-id22",
|
||||
"trace_metadata": {"key": "updated_trace_value"}, # The new value to use for the langfuse Trace Metadata
|
||||
"update_trace_keys": ["input", "output", "trace_metadata"], # Updates the trace input & output to be this generations input & output also updates the Trace Metadata to match the passed in value
|
||||
"debug_langfuse": True, # Will log the exact metadata sent to litellm for the trace/generation as `metadata_passed_to_litellm`
|
||||
},
|
||||
)
|
||||
|
||||
|
|
177
docs/my-website/docs/providers/clarifai.md
Normal file
177
docs/my-website/docs/providers/clarifai.md
Normal file
|
@ -0,0 +1,177 @@
|
|||
|
||||
# Clarifai
|
||||
Anthropic, OpenAI, Mistral, Llama and Gemini LLMs are Supported on Clarifai.
|
||||
|
||||
## Pre-Requisites
|
||||
|
||||
`pip install clarifai`
|
||||
|
||||
`pip install litellm`
|
||||
|
||||
## Required Environment Variables
|
||||
To obtain your Clarifai Personal access token follow this [link](https://docs.clarifai.com/clarifai-basics/authentication/personal-access-tokens/). Optionally the PAT can also be passed in `completion` function.
|
||||
|
||||
```python
|
||||
os.environ["CALRIFAI_API_KEY"] = "YOUR_CLARIFAI_PAT" # CLARIFAI_PAT
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```python
|
||||
import os
|
||||
from litellm import completion
|
||||
|
||||
os.environ["CLARIFAI_API_KEY"] = ""
|
||||
|
||||
response = completion(
|
||||
model="clarifai/mistralai.completion.mistral-large",
|
||||
messages=[{ "content": "Tell me a joke about physics?","role": "user"}]
|
||||
)
|
||||
```
|
||||
|
||||
**Output**
|
||||
```json
|
||||
{
|
||||
"id": "chatcmpl-572701ee-9ab2-411c-ac75-46c1ba18e781",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 1,
|
||||
"message": {
|
||||
"content": "Sure, here's a physics joke for you:\n\nWhy can't you trust an atom?\n\nBecause they make up everything!",
|
||||
"role": "assistant"
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1714410197,
|
||||
"model": "https://api.clarifai.com/v2/users/mistralai/apps/completion/models/mistral-large/outputs",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"prompt_tokens": 14,
|
||||
"completion_tokens": 24,
|
||||
"total_tokens": 38
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Clarifai models
|
||||
liteLLM supports non-streaming requests to all models on [Clarifai community](https://clarifai.com/explore/models?filterData=%5B%7B%22field%22%3A%22use_cases%22%2C%22value%22%3A%5B%22llm%22%5D%7D%5D&page=1&perPage=24)
|
||||
|
||||
Example Usage - Note: liteLLM supports all models deployed on Clarifai
|
||||
|
||||
## Llama LLMs
|
||||
| Model Name | Function Call |
|
||||
---------------------------|---------------------------------|
|
||||
| clarifai/meta.Llama-2.llama2-7b-chat | `completion('clarifai/meta.Llama-2.llama2-7b-chat', messages)`
|
||||
| clarifai/meta.Llama-2.llama2-13b-chat | `completion('clarifai/meta.Llama-2.llama2-13b-chat', messages)`
|
||||
| clarifai/meta.Llama-2.llama2-70b-chat | `completion('clarifai/meta.Llama-2.llama2-70b-chat', messages)` |
|
||||
| clarifai/meta.Llama-2.codeLlama-70b-Python | `completion('clarifai/meta.Llama-2.codeLlama-70b-Python', messages)`|
|
||||
| clarifai/meta.Llama-2.codeLlama-70b-Instruct | `completion('clarifai/meta.Llama-2.codeLlama-70b-Instruct', messages)` |
|
||||
|
||||
## Mistal LLMs
|
||||
| Model Name | Function Call |
|
||||
|---------------------------------------------|------------------------------------------------------------------------|
|
||||
| clarifai/mistralai.completion.mixtral-8x22B | `completion('clarifai/mistralai.completion.mixtral-8x22B', messages)` |
|
||||
| clarifai/mistralai.completion.mistral-large | `completion('clarifai/mistralai.completion.mistral-large', messages)` |
|
||||
| clarifai/mistralai.completion.mistral-medium | `completion('clarifai/mistralai.completion.mistral-medium', messages)` |
|
||||
| clarifai/mistralai.completion.mistral-small | `completion('clarifai/mistralai.completion.mistral-small', messages)` |
|
||||
| clarifai/mistralai.completion.mixtral-8x7B-Instruct-v0_1 | `completion('clarifai/mistralai.completion.mixtral-8x7B-Instruct-v0_1', messages)`
|
||||
| clarifai/mistralai.completion.mistral-7B-OpenOrca | `completion('clarifai/mistralai.completion.mistral-7B-OpenOrca', messages)` |
|
||||
| clarifai/mistralai.completion.openHermes-2-mistral-7B | `completion('clarifai/mistralai.completion.openHermes-2-mistral-7B', messages)` |
|
||||
|
||||
|
||||
## Jurassic LLMs
|
||||
| Model Name | Function Call |
|
||||
|-----------------------------------------------|---------------------------------------------------------------------|
|
||||
| clarifai/ai21.complete.Jurassic2-Grande | `completion('clarifai/ai21.complete.Jurassic2-Grande', messages)` |
|
||||
| clarifai/ai21.complete.Jurassic2-Grande-Instruct | `completion('clarifai/ai21.complete.Jurassic2-Grande-Instruct', messages)` |
|
||||
| clarifai/ai21.complete.Jurassic2-Jumbo-Instruct | `completion('clarifai/ai21.complete.Jurassic2-Jumbo-Instruct', messages)` |
|
||||
| clarifai/ai21.complete.Jurassic2-Jumbo | `completion('clarifai/ai21.complete.Jurassic2-Jumbo', messages)` |
|
||||
| clarifai/ai21.complete.Jurassic2-Large | `completion('clarifai/ai21.complete.Jurassic2-Large', messages)` |
|
||||
|
||||
## Wizard LLMs
|
||||
|
||||
| Model Name | Function Call |
|
||||
|-----------------------------------------------|---------------------------------------------------------------------|
|
||||
| clarifai/wizardlm.generate.wizardCoder-Python-34B | `completion('clarifai/wizardlm.generate.wizardCoder-Python-34B', messages)` |
|
||||
| clarifai/wizardlm.generate.wizardLM-70B | `completion('clarifai/wizardlm.generate.wizardLM-70B', messages)` |
|
||||
| clarifai/wizardlm.generate.wizardLM-13B | `completion('clarifai/wizardlm.generate.wizardLM-13B', messages)` |
|
||||
| clarifai/wizardlm.generate.wizardCoder-15B | `completion('clarifai/wizardlm.generate.wizardCoder-15B', messages)` |
|
||||
|
||||
## Anthropic models
|
||||
|
||||
| Model Name | Function Call |
|
||||
|-----------------------------------------------|---------------------------------------------------------------------|
|
||||
| clarifai/anthropic.completion.claude-v1 | `completion('clarifai/anthropic.completion.claude-v1', messages)` |
|
||||
| clarifai/anthropic.completion.claude-instant-1_2 | `completion('clarifai/anthropic.completion.claude-instant-1_2', messages)` |
|
||||
| clarifai/anthropic.completion.claude-instant | `completion('clarifai/anthropic.completion.claude-instant', messages)` |
|
||||
| clarifai/anthropic.completion.claude-v2 | `completion('clarifai/anthropic.completion.claude-v2', messages)` |
|
||||
| clarifai/anthropic.completion.claude-2_1 | `completion('clarifai/anthropic.completion.claude-2_1', messages)` |
|
||||
| clarifai/anthropic.completion.claude-3-opus | `completion('clarifai/anthropic.completion.claude-3-opus', messages)` |
|
||||
| clarifai/anthropic.completion.claude-3-sonnet | `completion('clarifai/anthropic.completion.claude-3-sonnet', messages)` |
|
||||
|
||||
## OpenAI GPT LLMs
|
||||
|
||||
| Model Name | Function Call |
|
||||
|-----------------------------------------------|---------------------------------------------------------------------|
|
||||
| clarifai/openai.chat-completion.GPT-4 | `completion('clarifai/openai.chat-completion.GPT-4', messages)` |
|
||||
| clarifai/openai.chat-completion.GPT-3_5-turbo | `completion('clarifai/openai.chat-completion.GPT-3_5-turbo', messages)` |
|
||||
| clarifai/openai.chat-completion.gpt-4-turbo | `completion('clarifai/openai.chat-completion.gpt-4-turbo', messages)` |
|
||||
| clarifai/openai.completion.gpt-3_5-turbo-instruct | `completion('clarifai/openai.completion.gpt-3_5-turbo-instruct', messages)` |
|
||||
|
||||
## GCP LLMs
|
||||
|
||||
| Model Name | Function Call |
|
||||
|-----------------------------------------------|---------------------------------------------------------------------|
|
||||
| clarifai/gcp.generate.gemini-1_5-pro | `completion('clarifai/gcp.generate.gemini-1_5-pro', messages)` |
|
||||
| clarifai/gcp.generate.imagen-2 | `completion('clarifai/gcp.generate.imagen-2', messages)` |
|
||||
| clarifai/gcp.generate.code-gecko | `completion('clarifai/gcp.generate.code-gecko', messages)` |
|
||||
| clarifai/gcp.generate.code-bison | `completion('clarifai/gcp.generate.code-bison', messages)` |
|
||||
| clarifai/gcp.generate.text-bison | `completion('clarifai/gcp.generate.text-bison', messages)` |
|
||||
| clarifai/gcp.generate.gemma-2b-it | `completion('clarifai/gcp.generate.gemma-2b-it', messages)` |
|
||||
| clarifai/gcp.generate.gemma-7b-it | `completion('clarifai/gcp.generate.gemma-7b-it', messages)` |
|
||||
| clarifai/gcp.generate.gemini-pro | `completion('clarifai/gcp.generate.gemini-pro', messages)` |
|
||||
| clarifai/gcp.generate.gemma-1_1-7b-it | `completion('clarifai/gcp.generate.gemma-1_1-7b-it', messages)` |
|
||||
|
||||
## Cohere LLMs
|
||||
| Model Name | Function Call |
|
||||
|-----------------------------------------------|---------------------------------------------------------------------|
|
||||
| clarifai/cohere.generate.cohere-generate-command | `completion('clarifai/cohere.generate.cohere-generate-command', messages)` |
|
||||
clarifai/cohere.generate.command-r-plus' | `completion('clarifai/clarifai/cohere.generate.command-r-plus', messages)`|
|
||||
|
||||
## Databricks LLMs
|
||||
|
||||
| Model Name | Function Call |
|
||||
|---------------------------------------------------|---------------------------------------------------------------------|
|
||||
| clarifai/databricks.drbx.dbrx-instruct | `completion('clarifai/databricks.drbx.dbrx-instruct', messages)` |
|
||||
| clarifai/databricks.Dolly-v2.dolly-v2-12b | `completion('clarifai/databricks.Dolly-v2.dolly-v2-12b', messages)`|
|
||||
|
||||
## Microsoft LLMs
|
||||
|
||||
| Model Name | Function Call |
|
||||
|---------------------------------------------------|---------------------------------------------------------------------|
|
||||
| clarifai/microsoft.text-generation.phi-2 | `completion('clarifai/microsoft.text-generation.phi-2', messages)` |
|
||||
| clarifai/microsoft.text-generation.phi-1_5 | `completion('clarifai/microsoft.text-generation.phi-1_5', messages)`|
|
||||
|
||||
## Salesforce models
|
||||
|
||||
| Model Name | Function Call |
|
||||
|-----------------------------------------------------------|-------------------------------------------------------------------------------|
|
||||
| clarifai/salesforce.blip.general-english-image-caption-blip-2 | `completion('clarifai/salesforce.blip.general-english-image-caption-blip-2', messages)` |
|
||||
| clarifai/salesforce.xgen.xgen-7b-8k-instruct | `completion('clarifai/salesforce.xgen.xgen-7b-8k-instruct', messages)` |
|
||||
|
||||
|
||||
## Other Top performing LLMs
|
||||
|
||||
| Model Name | Function Call |
|
||||
|---------------------------------------------------|---------------------------------------------------------------------|
|
||||
| clarifai/deci.decilm.deciLM-7B-instruct | `completion('clarifai/deci.decilm.deciLM-7B-instruct', messages)` |
|
||||
| clarifai/upstage.solar.solar-10_7b-instruct | `completion('clarifai/upstage.solar.solar-10_7b-instruct', messages)` |
|
||||
| clarifai/openchat.openchat.openchat-3_5-1210 | `completion('clarifai/openchat.openchat.openchat-3_5-1210', messages)` |
|
||||
| clarifai/togethercomputer.stripedHyena.stripedHyena-Nous-7B | `completion('clarifai/togethercomputer.stripedHyena.stripedHyena-Nous-7B', messages)` |
|
||||
| clarifai/fblgit.una-cybertron.una-cybertron-7b-v2 | `completion('clarifai/fblgit.una-cybertron.una-cybertron-7b-v2', messages)` |
|
||||
| clarifai/tiiuae.falcon.falcon-40b-instruct | `completion('clarifai/tiiuae.falcon.falcon-40b-instruct', messages)` |
|
||||
| clarifai/togethercomputer.RedPajama.RedPajama-INCITE-7B-Chat | `completion('clarifai/togethercomputer.RedPajama.RedPajama-INCITE-7B-Chat', messages)` |
|
||||
| clarifai/bigcode.code.StarCoder | `completion('clarifai/bigcode.code.StarCoder', messages)` |
|
||||
| clarifai/mosaicml.mpt.mpt-7b-instruct | `completion('clarifai/mosaicml.mpt.mpt-7b-instruct', messages)` |
|
|
@ -3,7 +3,7 @@ import Tabs from '@theme/Tabs';
|
|||
import TabItem from '@theme/TabItem';
|
||||
|
||||
|
||||
# 🔎 Logging - Custom Callbacks, DataDog, Langfuse, s3 Bucket, Sentry, OpenTelemetry, Athina
|
||||
# 🔎 Logging - Custom Callbacks, DataDog, Langfuse, s3 Bucket, Sentry, OpenTelemetry, Athina, Azure Content-Safety
|
||||
|
||||
Log Proxy Input, Output, Exceptions using Custom Callbacks, Langfuse, OpenTelemetry, LangFuse, DynamoDB, s3 Bucket
|
||||
|
||||
|
@ -17,6 +17,7 @@ Log Proxy Input, Output, Exceptions using Custom Callbacks, Langfuse, OpenTeleme
|
|||
- [Logging to Sentry](#logging-proxy-inputoutput---sentry)
|
||||
- [Logging to Traceloop (OpenTelemetry)](#logging-proxy-inputoutput-traceloop-opentelemetry)
|
||||
- [Logging to Athina](#logging-proxy-inputoutput-athina)
|
||||
- [(BETA) Moderation with Azure Content-Safety](#moderation-with-azure-content-safety)
|
||||
|
||||
## Custom Callback Class [Async]
|
||||
Use this when you want to run custom callbacks in `python`
|
||||
|
@ -1037,3 +1038,86 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|||
]
|
||||
}'
|
||||
```
|
||||
|
||||
## (BETA) Moderation with Azure Content Safety
|
||||
|
||||
[Azure Content-Safety](https://azure.microsoft.com/en-us/products/ai-services/ai-content-safety) is a Microsoft Azure service that provides content moderation APIs to detect potential offensive, harmful, or risky content in text.
|
||||
|
||||
We will use the `--config` to set `litellm.success_callback = ["azure_content_safety"]` this will moderate all LLM calls using Azure Content Safety.
|
||||
|
||||
**Step 0** Deploy Azure Content Safety
|
||||
|
||||
Deploy an Azure Content-Safety instance from the Azure Portal and get the `endpoint` and `key`.
|
||||
|
||||
**Step 1** Set Athina API key
|
||||
|
||||
```shell
|
||||
AZURE_CONTENT_SAFETY_KEY = "<your-azure-content-safety-key>"
|
||||
```
|
||||
|
||||
**Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback`
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
litellm_settings:
|
||||
callbacks: ["azure_content_safety"]
|
||||
azure_content_safety_params:
|
||||
endpoint: "<your-azure-content-safety-endpoint>"
|
||||
key: "os.environ/AZURE_CONTENT_SAFETY_KEY"
|
||||
```
|
||||
|
||||
**Step 3**: Start the proxy, make a test request
|
||||
|
||||
Start proxy
|
||||
```shell
|
||||
litellm --config config.yaml --debug
|
||||
```
|
||||
|
||||
Test Request
|
||||
```
|
||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data ' {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hi, how are you?"
|
||||
}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
An HTTP 400 error will be returned if the content is detected with a value greater than the threshold set in the `config.yaml`.
|
||||
The details of the response will describe :
|
||||
- The `source` : input text or llm generated text
|
||||
- The `category` : the category of the content that triggered the moderation
|
||||
- The `severity` : the severity from 0 to 10
|
||||
|
||||
**Step 4**: Customizing Azure Content Safety Thresholds
|
||||
|
||||
You can customize the thresholds for each category by setting the `thresholds` in the `config.yaml`
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
litellm_settings:
|
||||
callbacks: ["azure_content_safety"]
|
||||
azure_content_safety_params:
|
||||
endpoint: "<your-azure-content-safety-endpoint>"
|
||||
key: "os.environ/AZURE_CONTENT_SAFETY_KEY"
|
||||
thresholds:
|
||||
Hate: 6
|
||||
SelfHarm: 8
|
||||
Sexual: 6
|
||||
Violence: 4
|
||||
```
|
||||
|
||||
:::info
|
||||
`thresholds` are not required by default, but you can tune the values to your needs.
|
||||
Default values is `4` for all categories
|
||||
:::
|
|
@ -151,7 +151,7 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|||
}'
|
||||
```
|
||||
|
||||
## Advanced - Context Window Fallbacks
|
||||
## Advanced - Context Window Fallbacks (Pre-Call Checks + Fallbacks)
|
||||
|
||||
**Before call is made** check if a call is within model context window with **`enable_pre_call_checks: true`**.
|
||||
|
||||
|
@ -232,16 +232,16 @@ model_list:
|
|||
- model_name: gpt-3.5-turbo-small
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_version: "2023-07-01-preview"
|
||||
model_info:
|
||||
base_model: azure/gpt-4-1106-preview # 2. 👈 (azure-only) SET BASE MODEL
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_version: "2023-07-01-preview"
|
||||
model_info:
|
||||
base_model: azure/gpt-4-1106-preview # 2. 👈 (azure-only) SET BASE MODEL
|
||||
|
||||
- model_name: gpt-3.5-turbo-large
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo-1106
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
model: gpt-3.5-turbo-1106
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
- model_name: claude-opus
|
||||
litellm_params:
|
||||
|
@ -287,6 +287,69 @@ print(response)
|
|||
</Tabs>
|
||||
|
||||
|
||||
## Advanced - EU-Region Filtering (Pre-Call Checks)
|
||||
|
||||
**Before call is made** check if a call is within model context window with **`enable_pre_call_checks: true`**.
|
||||
|
||||
Set 'region_name' of deployment.
|
||||
|
||||
**Note:** LiteLLM can automatically infer region_name for Vertex AI, Bedrock, and IBM WatsonxAI based on your litellm params. For Azure, set `litellm.enable_preview = True`.
|
||||
|
||||
**1. Set Config**
|
||||
|
||||
```yaml
|
||||
router_settings:
|
||||
enable_pre_call_checks: true # 1. Enable pre-call checks
|
||||
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_version: "2023-07-01-preview"
|
||||
region_name: "eu" # 👈 SET EU-REGION
|
||||
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo-1106
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
- model_name: gemini-pro
|
||||
litellm_params:
|
||||
model: vertex_ai/gemini-pro-1.5
|
||||
vertex_project: adroit-crow-1234
|
||||
vertex_location: us-east1 # 👈 AUTOMATICALLY INFERS 'region_name'
|
||||
```
|
||||
|
||||
**2. Start proxy**
|
||||
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml
|
||||
|
||||
# RUNNING on http://0.0.0.0:4000
|
||||
```
|
||||
|
||||
**3. Test it!**
|
||||
|
||||
```python
|
||||
import openai
|
||||
client = openai.OpenAI(
|
||||
api_key="anything",
|
||||
base_url="http://0.0.0.0:4000"
|
||||
)
|
||||
|
||||
# request sent to model set on litellm proxy, `litellm --model`
|
||||
response = client.chat.completions.with_raw_response.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages = [{"role": "user", "content": "Who was Alexander?"}]
|
||||
)
|
||||
|
||||
print(response)
|
||||
|
||||
print(f"response.headers.get('x-litellm-model-api-base')")
|
||||
```
|
||||
|
||||
## Advanced - Custom Timeouts, Stream Timeouts - Per Model
|
||||
For each model you can set `timeout` & `stream_timeout` under `litellm_params`
|
||||
```yaml
|
||||
|
|
|
@ -365,6 +365,90 @@ curl --location 'http://0.0.0.0:4000/moderations' \
|
|||
|
||||
## Advanced
|
||||
|
||||
### (BETA) Batch Completions - pass `model` as List
|
||||
|
||||
Use this when you want to send 1 request to N Models
|
||||
|
||||
#### Expected Request Format
|
||||
|
||||
This same request will be sent to the following model groups on the [litellm proxy config.yaml](https://docs.litellm.ai/docs/proxy/configs)
|
||||
- `model_name="llama3"`
|
||||
- `model_name="gpt-3.5-turbo"`
|
||||
|
||||
```shell
|
||||
curl --location 'http://localhost:4000/chat/completions' \
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{
|
||||
"model": ["llama3", "gpt-3.5-turbo"],
|
||||
"max_tokens": 10,
|
||||
"user": "litellm2",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "is litellm getting better"
|
||||
}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
|
||||
#### Expected Response Format
|
||||
|
||||
Get a list of responses when `model` is passed as a list
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"id": "chatcmpl-3dbd5dd8-7c82-4ca3-bf1f-7c26f497cf2b",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 0,
|
||||
"message": {
|
||||
"content": "The Elder Scrolls IV: Oblivion!\n\nReleased",
|
||||
"role": "assistant"
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1715459876,
|
||||
"model": "groq/llama3-8b-8192",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "fp_179b0f92c9",
|
||||
"usage": {
|
||||
"completion_tokens": 10,
|
||||
"prompt_tokens": 12,
|
||||
"total_tokens": 22
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "chatcmpl-9NnldUfFLmVquFHSX4yAtjCw8PGei",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 0,
|
||||
"message": {
|
||||
"content": "TES4 could refer to The Elder Scrolls IV:",
|
||||
"role": "assistant"
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1715459877,
|
||||
"model": "gpt-3.5-turbo-0125",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"completion_tokens": 10,
|
||||
"prompt_tokens": 9,
|
||||
"total_tokens": 19
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
### Pass User LLM API Keys, Fallbacks
|
||||
Allow your end-users to pass their model list, api base, OpenAI API key (any LiteLLM supported provider) to make requests
|
||||
|
||||
|
|
|
@ -879,13 +879,11 @@ router = Router(model_list: Optional[list] = None,
|
|||
cache_responses=True)
|
||||
```
|
||||
|
||||
## Pre-Call Checks (Context Window)
|
||||
## Pre-Call Checks (Context Window, EU-Regions)
|
||||
|
||||
Enable pre-call checks to filter out:
|
||||
1. deployments with context window limit < messages for a call.
|
||||
2. deployments that have exceeded rate limits when making concurrent calls. (eg. `asyncio.gather(*[
|
||||
router.acompletion(model="gpt-3.5-turbo", messages=m) for m in list_of_messages
|
||||
])`)
|
||||
2. deployments outside of eu-region
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
@ -900,10 +898,14 @@ router = Router(model_list=model_list, enable_pre_call_checks=True) # 👈 Set t
|
|||
|
||||
**2. Set Model List**
|
||||
|
||||
For azure deployments, set the base model. Pick the base model from [this list](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json), all the azure models start with `azure/`.
|
||||
For context window checks on azure deployments, set the base model. Pick the base model from [this list](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json), all the azure models start with `azure/`.
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="same-group" label="Same Group">
|
||||
For 'eu-region' filtering, Set 'region_name' of deployment.
|
||||
|
||||
**Note:** We automatically infer region_name for Vertex AI, Bedrock, and IBM WatsonxAI based on your litellm params. For Azure, set `litellm.enable_preview = True`.
|
||||
|
||||
|
||||
[**See Code**](https://github.com/BerriAI/litellm/blob/d33e49411d6503cb634f9652873160cd534dec96/litellm/router.py#L2958)
|
||||
|
||||
```python
|
||||
model_list = [
|
||||
|
@ -914,10 +916,9 @@ model_list = [
|
|||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
},
|
||||
"model_info": {
|
||||
"region_name": "eu" # 👈 SET 'EU' REGION NAME
|
||||
"base_model": "azure/gpt-35-turbo", # 👈 (Azure-only) SET BASE MODEL
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo", # model group name
|
||||
|
@ -926,54 +927,26 @@ model_list = [
|
|||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "gemini-pro",
|
||||
"litellm_params: {
|
||||
"model": "vertex_ai/gemini-pro-1.5",
|
||||
"vertex_project": "adroit-crow-1234",
|
||||
"vertex_location": "us-east1" # 👈 AUTOMATICALLY INFERS 'region_name'
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
router = Router(model_list=model_list, enable_pre_call_checks=True)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="different-group" label="Context Window Fallbacks (Different Groups)">
|
||||
|
||||
```python
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo-small", # model group name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
},
|
||||
"model_info": {
|
||||
"base_model": "azure/gpt-35-turbo", # 👈 (Azure-only) SET BASE MODEL
|
||||
}
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo-large", # model group name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "gpt-3.5-turbo-1106",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "claude-opus",
|
||||
"litellm_params": { call
|
||||
"model": "claude-3-opus-20240229",
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
router = Router(model_list=model_list, enable_pre_call_checks=True, context_window_fallbacks=[{"gpt-3.5-turbo-small": ["gpt-3.5-turbo-large", "claude-opus"]}])
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
</Tabs>
|
||||
|
||||
**3. Test it!**
|
||||
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="context-window-check" label="Context Window Check">
|
||||
|
||||
```python
|
||||
"""
|
||||
- Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
|
||||
|
@ -983,7 +956,6 @@ router = Router(model_list=model_list, enable_pre_call_checks=True, context_wind
|
|||
from litellm import Router
|
||||
import os
|
||||
|
||||
try:
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo", # model group name
|
||||
|
@ -992,6 +964,7 @@ model_list = [
|
|||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"base_model": "azure/gpt-35-turbo",
|
||||
},
|
||||
"model_info": {
|
||||
"base_model": "azure/gpt-35-turbo",
|
||||
|
@ -1021,6 +994,59 @@ response = router.completion(
|
|||
print(f"response: {response}")
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="eu-region-check" label="EU Region Check">
|
||||
|
||||
```python
|
||||
"""
|
||||
- Give 2 gpt-3.5-turbo deployments, in eu + non-eu regions
|
||||
- Make a call
|
||||
- Assert it picks the eu-region model
|
||||
"""
|
||||
|
||||
from litellm import Router
|
||||
import os
|
||||
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo", # model group name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"region_name": "eu"
|
||||
},
|
||||
"model_info": {
|
||||
"id": "1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo", # model group name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "gpt-3.5-turbo-1106",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
"model_info": {
|
||||
"id": "2"
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
router = Router(model_list=model_list, enable_pre_call_checks=True)
|
||||
|
||||
response = router.completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "Who was Alexander?"}],
|
||||
)
|
||||
|
||||
print(f"response: {response}")
|
||||
|
||||
print(f"response id: {response._hidden_params['model_id']}")
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="Proxy">
|
||||
|
||||
:::info
|
||||
|
|
|
@ -71,6 +71,7 @@ azure_key: Optional[str] = None
|
|||
anthropic_key: Optional[str] = None
|
||||
replicate_key: Optional[str] = None
|
||||
cohere_key: Optional[str] = None
|
||||
clarifai_key: Optional[str] = None
|
||||
maritalk_key: Optional[str] = None
|
||||
ai21_key: Optional[str] = None
|
||||
ollama_key: Optional[str] = None
|
||||
|
@ -101,6 +102,9 @@ blocked_user_list: Optional[Union[str, List]] = None
|
|||
banned_keywords_list: Optional[Union[str, List]] = None
|
||||
llm_guard_mode: Literal["all", "key-specific", "request-specific"] = "all"
|
||||
##################
|
||||
### PREVIEW FEATURES ###
|
||||
enable_preview_features: bool = False
|
||||
##################
|
||||
logging: bool = True
|
||||
caching: bool = (
|
||||
False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||
|
@ -401,6 +405,73 @@ replicate_models: List = [
|
|||
"replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad",
|
||||
]
|
||||
|
||||
clarifai_models: List = [
|
||||
'clarifai/meta.Llama-3.Llama-3-8B-Instruct',
|
||||
'clarifai/gcp.generate.gemma-1_1-7b-it',
|
||||
'clarifai/mistralai.completion.mixtral-8x22B',
|
||||
'clarifai/cohere.generate.command-r-plus',
|
||||
'clarifai/databricks.drbx.dbrx-instruct',
|
||||
'clarifai/mistralai.completion.mistral-large',
|
||||
'clarifai/mistralai.completion.mistral-medium',
|
||||
'clarifai/mistralai.completion.mistral-small',
|
||||
'clarifai/mistralai.completion.mixtral-8x7B-Instruct-v0_1',
|
||||
'clarifai/gcp.generate.gemma-2b-it',
|
||||
'clarifai/gcp.generate.gemma-7b-it',
|
||||
'clarifai/deci.decilm.deciLM-7B-instruct',
|
||||
'clarifai/mistralai.completion.mistral-7B-Instruct',
|
||||
'clarifai/gcp.generate.gemini-pro',
|
||||
'clarifai/anthropic.completion.claude-v1',
|
||||
'clarifai/anthropic.completion.claude-instant-1_2',
|
||||
'clarifai/anthropic.completion.claude-instant',
|
||||
'clarifai/anthropic.completion.claude-v2',
|
||||
'clarifai/anthropic.completion.claude-2_1',
|
||||
'clarifai/meta.Llama-2.codeLlama-70b-Python',
|
||||
'clarifai/meta.Llama-2.codeLlama-70b-Instruct',
|
||||
'clarifai/openai.completion.gpt-3_5-turbo-instruct',
|
||||
'clarifai/meta.Llama-2.llama2-7b-chat',
|
||||
'clarifai/meta.Llama-2.llama2-13b-chat',
|
||||
'clarifai/meta.Llama-2.llama2-70b-chat',
|
||||
'clarifai/openai.chat-completion.gpt-4-turbo',
|
||||
'clarifai/microsoft.text-generation.phi-2',
|
||||
'clarifai/meta.Llama-2.llama2-7b-chat-vllm',
|
||||
'clarifai/upstage.solar.solar-10_7b-instruct',
|
||||
'clarifai/openchat.openchat.openchat-3_5-1210',
|
||||
'clarifai/togethercomputer.stripedHyena.stripedHyena-Nous-7B',
|
||||
'clarifai/gcp.generate.text-bison',
|
||||
'clarifai/meta.Llama-2.llamaGuard-7b',
|
||||
'clarifai/fblgit.una-cybertron.una-cybertron-7b-v2',
|
||||
'clarifai/openai.chat-completion.GPT-4',
|
||||
'clarifai/openai.chat-completion.GPT-3_5-turbo',
|
||||
'clarifai/ai21.complete.Jurassic2-Grande',
|
||||
'clarifai/ai21.complete.Jurassic2-Grande-Instruct',
|
||||
'clarifai/ai21.complete.Jurassic2-Jumbo-Instruct',
|
||||
'clarifai/ai21.complete.Jurassic2-Jumbo',
|
||||
'clarifai/ai21.complete.Jurassic2-Large',
|
||||
'clarifai/cohere.generate.cohere-generate-command',
|
||||
'clarifai/wizardlm.generate.wizardCoder-Python-34B',
|
||||
'clarifai/wizardlm.generate.wizardLM-70B',
|
||||
'clarifai/tiiuae.falcon.falcon-40b-instruct',
|
||||
'clarifai/togethercomputer.RedPajama.RedPajama-INCITE-7B-Chat',
|
||||
'clarifai/gcp.generate.code-gecko',
|
||||
'clarifai/gcp.generate.code-bison',
|
||||
'clarifai/mistralai.completion.mistral-7B-OpenOrca',
|
||||
'clarifai/mistralai.completion.openHermes-2-mistral-7B',
|
||||
'clarifai/wizardlm.generate.wizardLM-13B',
|
||||
'clarifai/huggingface-research.zephyr.zephyr-7B-alpha',
|
||||
'clarifai/wizardlm.generate.wizardCoder-15B',
|
||||
'clarifai/microsoft.text-generation.phi-1_5',
|
||||
'clarifai/databricks.Dolly-v2.dolly-v2-12b',
|
||||
'clarifai/bigcode.code.StarCoder',
|
||||
'clarifai/salesforce.xgen.xgen-7b-8k-instruct',
|
||||
'clarifai/mosaicml.mpt.mpt-7b-instruct',
|
||||
'clarifai/anthropic.completion.claude-3-opus',
|
||||
'clarifai/anthropic.completion.claude-3-sonnet',
|
||||
'clarifai/gcp.generate.gemini-1_5-pro',
|
||||
'clarifai/gcp.generate.imagen-2',
|
||||
'clarifai/salesforce.blip.general-english-image-caption-blip-2',
|
||||
]
|
||||
|
||||
|
||||
huggingface_models: List = [
|
||||
"meta-llama/Llama-2-7b-hf",
|
||||
"meta-llama/Llama-2-7b-chat-hf",
|
||||
|
@ -506,6 +577,7 @@ provider_list: List = [
|
|||
"text-completion-openai",
|
||||
"cohere",
|
||||
"cohere_chat",
|
||||
"clarifai",
|
||||
"anthropic",
|
||||
"replicate",
|
||||
"huggingface",
|
||||
|
@ -656,6 +728,7 @@ from .llms.predibase import PredibaseConfig
|
|||
from .llms.anthropic_text import AnthropicTextConfig
|
||||
from .llms.replicate import ReplicateConfig
|
||||
from .llms.cohere import CohereConfig
|
||||
from .llms.clarifai import ClarifaiConfig
|
||||
from .llms.ai21 import AI21Config
|
||||
from .llms.together_ai import TogetherAIConfig
|
||||
from .llms.cloudflare import CloudflareConfig
|
||||
|
|
|
@ -9,25 +9,12 @@
|
|||
|
||||
## LiteLLM versions of the OpenAI Exception Types
|
||||
|
||||
from openai import (
|
||||
AuthenticationError,
|
||||
BadRequestError,
|
||||
NotFoundError,
|
||||
RateLimitError,
|
||||
APIStatusError,
|
||||
OpenAIError,
|
||||
APIError,
|
||||
APITimeoutError,
|
||||
APIConnectionError,
|
||||
APIResponseValidationError,
|
||||
UnprocessableEntityError,
|
||||
PermissionDeniedError,
|
||||
)
|
||||
import openai
|
||||
import httpx
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class AuthenticationError(AuthenticationError): # type: ignore
|
||||
class AuthenticationError(openai.AuthenticationError): # type: ignore
|
||||
def __init__(self, message, llm_provider, model, response: httpx.Response):
|
||||
self.status_code = 401
|
||||
self.message = message
|
||||
|
@ -39,7 +26,7 @@ class AuthenticationError(AuthenticationError): # type: ignore
|
|||
|
||||
|
||||
# raise when invalid models passed, example gpt-8
|
||||
class NotFoundError(NotFoundError): # type: ignore
|
||||
class NotFoundError(openai.NotFoundError): # type: ignore
|
||||
def __init__(self, message, model, llm_provider, response: httpx.Response):
|
||||
self.status_code = 404
|
||||
self.message = message
|
||||
|
@ -50,7 +37,7 @@ class NotFoundError(NotFoundError): # type: ignore
|
|||
) # Call the base class constructor with the parameters it needs
|
||||
|
||||
|
||||
class BadRequestError(BadRequestError): # type: ignore
|
||||
class BadRequestError(openai.BadRequestError): # type: ignore
|
||||
def __init__(
|
||||
self, message, model, llm_provider, response: Optional[httpx.Response] = None
|
||||
):
|
||||
|
@ -69,7 +56,7 @@ class BadRequestError(BadRequestError): # type: ignore
|
|||
) # Call the base class constructor with the parameters it needs
|
||||
|
||||
|
||||
class UnprocessableEntityError(UnprocessableEntityError): # type: ignore
|
||||
class UnprocessableEntityError(openai.UnprocessableEntityError): # type: ignore
|
||||
def __init__(self, message, model, llm_provider, response: httpx.Response):
|
||||
self.status_code = 422
|
||||
self.message = message
|
||||
|
@ -80,7 +67,7 @@ class UnprocessableEntityError(UnprocessableEntityError): # type: ignore
|
|||
) # Call the base class constructor with the parameters it needs
|
||||
|
||||
|
||||
class Timeout(APITimeoutError): # type: ignore
|
||||
class Timeout(openai.APITimeoutError): # type: ignore
|
||||
def __init__(self, message, model, llm_provider):
|
||||
request = httpx.Request(method="POST", url="https://api.openai.com/v1")
|
||||
super().__init__(
|
||||
|
@ -96,7 +83,7 @@ class Timeout(APITimeoutError): # type: ignore
|
|||
return str(self.message)
|
||||
|
||||
|
||||
class PermissionDeniedError(PermissionDeniedError): # type:ignore
|
||||
class PermissionDeniedError(openai.PermissionDeniedError): # type:ignore
|
||||
def __init__(self, message, llm_provider, model, response: httpx.Response):
|
||||
self.status_code = 403
|
||||
self.message = message
|
||||
|
@ -107,7 +94,7 @@ class PermissionDeniedError(PermissionDeniedError): # type:ignore
|
|||
) # Call the base class constructor with the parameters it needs
|
||||
|
||||
|
||||
class RateLimitError(RateLimitError): # type: ignore
|
||||
class RateLimitError(openai.RateLimitError): # type: ignore
|
||||
def __init__(self, message, llm_provider, model, response: httpx.Response):
|
||||
self.status_code = 429
|
||||
self.message = message
|
||||
|
@ -148,7 +135,7 @@ class ContentPolicyViolationError(BadRequestError): # type: ignore
|
|||
) # Call the base class constructor with the parameters it needs
|
||||
|
||||
|
||||
class ServiceUnavailableError(APIStatusError): # type: ignore
|
||||
class ServiceUnavailableError(openai.APIStatusError): # type: ignore
|
||||
def __init__(self, message, llm_provider, model, response: httpx.Response):
|
||||
self.status_code = 503
|
||||
self.message = message
|
||||
|
@ -160,7 +147,7 @@ class ServiceUnavailableError(APIStatusError): # type: ignore
|
|||
|
||||
|
||||
# raise this when the API returns an invalid response object - https://github.com/openai/openai-python/blob/1be14ee34a0f8e42d3f9aa5451aa4cb161f1781f/openai/api_requestor.py#L401
|
||||
class APIError(APIError): # type: ignore
|
||||
class APIError(openai.APIError): # type: ignore
|
||||
def __init__(
|
||||
self, status_code, message, llm_provider, model, request: httpx.Request
|
||||
):
|
||||
|
@ -172,7 +159,7 @@ class APIError(APIError): # type: ignore
|
|||
|
||||
|
||||
# raised if an invalid request (not get, delete, put, post) is made
|
||||
class APIConnectionError(APIConnectionError): # type: ignore
|
||||
class APIConnectionError(openai.APIConnectionError): # type: ignore
|
||||
def __init__(self, message, llm_provider, model, request: httpx.Request):
|
||||
self.message = message
|
||||
self.llm_provider = llm_provider
|
||||
|
@ -182,7 +169,7 @@ class APIConnectionError(APIConnectionError): # type: ignore
|
|||
|
||||
|
||||
# raised if an invalid request (not get, delete, put, post) is made
|
||||
class APIResponseValidationError(APIResponseValidationError): # type: ignore
|
||||
class APIResponseValidationError(openai.APIResponseValidationError): # type: ignore
|
||||
def __init__(self, message, llm_provider, model):
|
||||
self.message = message
|
||||
self.llm_provider = llm_provider
|
||||
|
@ -192,7 +179,7 @@ class APIResponseValidationError(APIResponseValidationError): # type: ignore
|
|||
super().__init__(response=response, body=None, message=message)
|
||||
|
||||
|
||||
class OpenAIError(OpenAIError): # type: ignore
|
||||
class OpenAIError(openai.OpenAIError): # type: ignore
|
||||
def __init__(self, original_exception):
|
||||
self.status_code = original_exception.http_status
|
||||
super().__init__(
|
||||
|
@ -214,7 +201,7 @@ class BudgetExceededError(Exception):
|
|||
|
||||
|
||||
## DEPRECATED ##
|
||||
class InvalidRequestError(BadRequestError): # type: ignore
|
||||
class InvalidRequestError(openai.BadRequestError): # type: ignore
|
||||
def __init__(self, message, model, llm_provider):
|
||||
self.status_code = 400
|
||||
self.message = message
|
||||
|
|
|
@ -321,6 +321,7 @@ class LangFuseLogger:
|
|||
trace_id = clean_metadata.pop("trace_id", None)
|
||||
existing_trace_id = clean_metadata.pop("existing_trace_id", None)
|
||||
update_trace_keys = clean_metadata.pop("update_trace_keys", [])
|
||||
debug = clean_metadata.pop("debug_langfuse", None)
|
||||
|
||||
if trace_name is None and existing_trace_id is None:
|
||||
# just log `litellm-{call_type}` as the trace name
|
||||
|
@ -374,6 +375,13 @@ class LangFuseLogger:
|
|||
else:
|
||||
trace_params["output"] = output
|
||||
|
||||
if debug == True or (isinstance(debug, str) and debug.lower() == "true"):
|
||||
if "metadata" in trace_params:
|
||||
# log the raw_metadata in the trace
|
||||
trace_params["metadata"]["metadata_passed_to_litellm"] = metadata
|
||||
else:
|
||||
trace_params["metadata"] = {"metadata_passed_to_litellm": metadata}
|
||||
|
||||
cost = kwargs.get("response_cost", None)
|
||||
print_verbose(f"trace: {cost}")
|
||||
|
||||
|
@ -424,7 +432,6 @@ class LangFuseLogger:
|
|||
"url": url,
|
||||
"headers": clean_headers,
|
||||
}
|
||||
|
||||
trace = self.Langfuse.trace(**trace_params)
|
||||
|
||||
generation_id = None
|
||||
|
@ -465,7 +472,29 @@ class LangFuseLogger:
|
|||
}
|
||||
|
||||
if supports_prompt:
|
||||
generation_params["prompt"] = clean_metadata.pop("prompt", None)
|
||||
user_prompt = clean_metadata.pop("prompt", None)
|
||||
if user_prompt is None:
|
||||
pass
|
||||
elif isinstance(user_prompt, dict):
|
||||
from langfuse.model import (
|
||||
TextPromptClient,
|
||||
ChatPromptClient,
|
||||
Prompt_Text,
|
||||
Prompt_Chat,
|
||||
)
|
||||
|
||||
if user_prompt.get("type", "") == "chat":
|
||||
_prompt_chat = Prompt_Chat(**user_prompt)
|
||||
generation_params["prompt"] = ChatPromptClient(
|
||||
prompt=_prompt_chat
|
||||
)
|
||||
elif user_prompt.get("type", "") == "text":
|
||||
_prompt_text = Prompt_Text(**user_prompt)
|
||||
generation_params["prompt"] = TextPromptClient(
|
||||
prompt=_prompt_text
|
||||
)
|
||||
else:
|
||||
generation_params["prompt"] = user_prompt
|
||||
|
||||
if output is not None and isinstance(output, str) and level == "ERROR":
|
||||
generation_params["status_message"] = output
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
from datetime import datetime, timezone
|
||||
import traceback
|
||||
import importlib
|
||||
import sys
|
||||
|
||||
import packaging
|
||||
|
||||
|
@ -15,13 +14,33 @@ def parse_usage(usage):
|
|||
"prompt": usage["prompt_tokens"] if "prompt_tokens" in usage else 0,
|
||||
}
|
||||
|
||||
def parse_tool_calls(tool_calls):
|
||||
if tool_calls is None:
|
||||
return None
|
||||
|
||||
def clean_tool_call(tool_call):
|
||||
|
||||
serialized = {
|
||||
"type": tool_call.type,
|
||||
"id": tool_call.id,
|
||||
"function": {
|
||||
"name": tool_call.function.name,
|
||||
"arguments": tool_call.function.arguments,
|
||||
}
|
||||
}
|
||||
|
||||
return serialized
|
||||
|
||||
return [clean_tool_call(tool_call) for tool_call in tool_calls]
|
||||
|
||||
|
||||
def parse_messages(input):
|
||||
|
||||
if input is None:
|
||||
return None
|
||||
|
||||
def clean_message(message):
|
||||
# if is strin, return as is
|
||||
# if is string, return as is
|
||||
if isinstance(message, str):
|
||||
return message
|
||||
|
||||
|
@ -35,9 +54,7 @@ def parse_messages(input):
|
|||
|
||||
# Only add tool_calls and function_call to res if they are set
|
||||
if message.get("tool_calls"):
|
||||
serialized["tool_calls"] = message.get("tool_calls")
|
||||
if message.get("function_call"):
|
||||
serialized["function_call"] = message.get("function_call")
|
||||
serialized["tool_calls"] = parse_tool_calls(message.get("tool_calls"))
|
||||
|
||||
return serialized
|
||||
|
||||
|
@ -92,8 +109,13 @@ class LunaryLogger:
|
|||
print_verbose(f"Lunary Logging - Logging request for model {model}")
|
||||
|
||||
litellm_params = kwargs.get("litellm_params", {})
|
||||
optional_params = kwargs.get("optional_params", {})
|
||||
metadata = litellm_params.get("metadata", {}) or {}
|
||||
|
||||
if optional_params:
|
||||
# merge into extra
|
||||
extra = {**extra, **optional_params}
|
||||
|
||||
tags = litellm_params.pop("tags", None) or []
|
||||
|
||||
if extra:
|
||||
|
@ -103,7 +125,7 @@ class LunaryLogger:
|
|||
|
||||
# keep only serializable types
|
||||
for param, value in extra.items():
|
||||
if not isinstance(value, (str, int, bool, float)):
|
||||
if not isinstance(value, (str, int, bool, float)) and param != "tools":
|
||||
try:
|
||||
extra[param] = str(value)
|
||||
except:
|
||||
|
@ -139,7 +161,7 @@ class LunaryLogger:
|
|||
metadata=metadata,
|
||||
runtime="litellm",
|
||||
tags=tags,
|
||||
extra=extra,
|
||||
params=extra,
|
||||
)
|
||||
|
||||
self.lunary_client.track_event(
|
||||
|
|
|
@ -8,14 +8,16 @@ from litellm.utils import (
|
|||
CustomStreamWrapper,
|
||||
convert_to_model_response_object,
|
||||
TranscriptionResponse,
|
||||
get_secret,
|
||||
)
|
||||
from typing import Callable, Optional, BinaryIO
|
||||
from typing import Callable, Optional, BinaryIO, List
|
||||
from litellm import OpenAIConfig
|
||||
import litellm, json
|
||||
import httpx # type: ignore
|
||||
from .custom_httpx.azure_dall_e_2 import CustomHTTPTransport, AsyncCustomHTTPTransport
|
||||
from openai import AzureOpenAI, AsyncAzureOpenAI
|
||||
import uuid
|
||||
import os
|
||||
|
||||
|
||||
class AzureOpenAIError(Exception):
|
||||
|
@ -105,6 +107,12 @@ class AzureOpenAIConfig(OpenAIConfig):
|
|||
optional_params["azure_ad_token"] = value
|
||||
return optional_params
|
||||
|
||||
def get_eu_regions(self) -> List[str]:
|
||||
"""
|
||||
Source: https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#gpt-4-and-gpt-4-turbo-model-availability
|
||||
"""
|
||||
return ["europe", "sweden", "switzerland", "france", "uk"]
|
||||
|
||||
|
||||
def select_azure_base_url_or_endpoint(azure_client_params: dict):
|
||||
# azure_client_params = {
|
||||
|
@ -126,6 +134,51 @@ def select_azure_base_url_or_endpoint(azure_client_params: dict):
|
|||
return azure_client_params
|
||||
|
||||
|
||||
def get_azure_ad_token_from_oidc(azure_ad_token: str):
|
||||
azure_client_id = os.getenv("AZURE_CLIENT_ID", None)
|
||||
azure_tenant = os.getenv("AZURE_TENANT_ID", None)
|
||||
|
||||
if azure_client_id is None or azure_tenant is None:
|
||||
raise AzureOpenAIError(
|
||||
status_code=422,
|
||||
message="AZURE_CLIENT_ID and AZURE_TENANT_ID must be set",
|
||||
)
|
||||
|
||||
oidc_token = get_secret(azure_ad_token)
|
||||
|
||||
if oidc_token is None:
|
||||
raise AzureOpenAIError(
|
||||
status_code=401,
|
||||
message="OIDC token could not be retrieved from secret manager.",
|
||||
)
|
||||
|
||||
req_token = httpx.post(
|
||||
f"https://login.microsoftonline.com/{azure_tenant}/oauth2/v2.0/token",
|
||||
data={
|
||||
"client_id": azure_client_id,
|
||||
"grant_type": "client_credentials",
|
||||
"scope": "https://cognitiveservices.azure.com/.default",
|
||||
"client_assertion_type": "urn:ietf:params:oauth:client-assertion-type:jwt-bearer",
|
||||
"client_assertion": oidc_token,
|
||||
},
|
||||
)
|
||||
|
||||
if req_token.status_code != 200:
|
||||
raise AzureOpenAIError(
|
||||
status_code=req_token.status_code,
|
||||
message=req_token.text,
|
||||
)
|
||||
|
||||
possible_azure_ad_token = req_token.json().get("access_token", None)
|
||||
|
||||
if possible_azure_ad_token is None:
|
||||
raise AzureOpenAIError(
|
||||
status_code=422, message="Azure AD Token not returned"
|
||||
)
|
||||
|
||||
return possible_azure_ad_token
|
||||
|
||||
|
||||
class AzureChatCompletion(BaseLLM):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
|
@ -137,6 +190,8 @@ class AzureChatCompletion(BaseLLM):
|
|||
if api_key is not None:
|
||||
headers["api-key"] = api_key
|
||||
elif azure_ad_token is not None:
|
||||
if azure_ad_token.startswith("oidc/"):
|
||||
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
|
||||
headers["Authorization"] = f"Bearer {azure_ad_token}"
|
||||
return headers
|
||||
|
||||
|
@ -189,6 +244,9 @@ class AzureChatCompletion(BaseLLM):
|
|||
if api_key is not None:
|
||||
azure_client_params["api_key"] = api_key
|
||||
elif azure_ad_token is not None:
|
||||
if azure_ad_token.startswith("oidc/"):
|
||||
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
|
||||
|
||||
azure_client_params["azure_ad_token"] = azure_ad_token
|
||||
|
||||
if acompletion is True:
|
||||
|
@ -276,6 +334,8 @@ class AzureChatCompletion(BaseLLM):
|
|||
if api_key is not None:
|
||||
azure_client_params["api_key"] = api_key
|
||||
elif azure_ad_token is not None:
|
||||
if azure_ad_token.startswith("oidc/"):
|
||||
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
|
||||
azure_client_params["azure_ad_token"] = azure_ad_token
|
||||
if client is None:
|
||||
azure_client = AzureOpenAI(**azure_client_params)
|
||||
|
@ -351,6 +411,8 @@ class AzureChatCompletion(BaseLLM):
|
|||
if api_key is not None:
|
||||
azure_client_params["api_key"] = api_key
|
||||
elif azure_ad_token is not None:
|
||||
if azure_ad_token.startswith("oidc/"):
|
||||
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
|
||||
azure_client_params["azure_ad_token"] = azure_ad_token
|
||||
|
||||
# setting Azure client
|
||||
|
@ -422,6 +484,8 @@ class AzureChatCompletion(BaseLLM):
|
|||
if api_key is not None:
|
||||
azure_client_params["api_key"] = api_key
|
||||
elif azure_ad_token is not None:
|
||||
if azure_ad_token.startswith("oidc/"):
|
||||
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
|
||||
azure_client_params["azure_ad_token"] = azure_ad_token
|
||||
if client is None:
|
||||
azure_client = AzureOpenAI(**azure_client_params)
|
||||
|
@ -478,6 +542,8 @@ class AzureChatCompletion(BaseLLM):
|
|||
if api_key is not None:
|
||||
azure_client_params["api_key"] = api_key
|
||||
elif azure_ad_token is not None:
|
||||
if azure_ad_token.startswith("oidc/"):
|
||||
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
|
||||
azure_client_params["azure_ad_token"] = azure_ad_token
|
||||
if client is None:
|
||||
azure_client = AsyncAzureOpenAI(**azure_client_params)
|
||||
|
@ -599,6 +665,8 @@ class AzureChatCompletion(BaseLLM):
|
|||
if api_key is not None:
|
||||
azure_client_params["api_key"] = api_key
|
||||
elif azure_ad_token is not None:
|
||||
if azure_ad_token.startswith("oidc/"):
|
||||
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
|
||||
azure_client_params["azure_ad_token"] = azure_ad_token
|
||||
|
||||
## LOGGING
|
||||
|
@ -755,6 +823,8 @@ class AzureChatCompletion(BaseLLM):
|
|||
if api_key is not None:
|
||||
azure_client_params["api_key"] = api_key
|
||||
elif azure_ad_token is not None:
|
||||
if azure_ad_token.startswith("oidc/"):
|
||||
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
|
||||
azure_client_params["azure_ad_token"] = azure_ad_token
|
||||
|
||||
if aimg_generation == True:
|
||||
|
@ -833,6 +903,8 @@ class AzureChatCompletion(BaseLLM):
|
|||
if api_key is not None:
|
||||
azure_client_params["api_key"] = api_key
|
||||
elif azure_ad_token is not None:
|
||||
if azure_ad_token.startswith("oidc/"):
|
||||
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
|
||||
azure_client_params["azure_ad_token"] = azure_ad_token
|
||||
|
||||
if max_retries is not None:
|
||||
|
|
|
@ -52,6 +52,16 @@ class AmazonBedrockGlobalConfig:
|
|||
optional_params[mapped_params[param]] = value
|
||||
return optional_params
|
||||
|
||||
def get_eu_regions(self) -> List[str]:
|
||||
"""
|
||||
Source: https://www.aws-services.info/bedrock.html
|
||||
"""
|
||||
return [
|
||||
"eu-west-1",
|
||||
"eu-west-3",
|
||||
"eu-central-1",
|
||||
]
|
||||
|
||||
|
||||
class AmazonTitanConfig:
|
||||
"""
|
||||
|
@ -551,6 +561,7 @@ def init_bedrock_client(
|
|||
aws_session_name: Optional[str] = None,
|
||||
aws_profile_name: Optional[str] = None,
|
||||
aws_role_name: Optional[str] = None,
|
||||
aws_web_identity_token: Optional[str] = None,
|
||||
extra_headers: Optional[dict] = None,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
):
|
||||
|
@ -567,6 +578,7 @@ def init_bedrock_client(
|
|||
aws_session_name,
|
||||
aws_profile_name,
|
||||
aws_role_name,
|
||||
aws_web_identity_token,
|
||||
]
|
||||
|
||||
# Iterate over parameters and update if needed
|
||||
|
@ -582,6 +594,7 @@ def init_bedrock_client(
|
|||
aws_session_name,
|
||||
aws_profile_name,
|
||||
aws_role_name,
|
||||
aws_web_identity_token,
|
||||
) = params_to_check
|
||||
|
||||
### SET REGION NAME
|
||||
|
@ -620,7 +633,38 @@ def init_bedrock_client(
|
|||
config = boto3.session.Config()
|
||||
|
||||
### CHECK STS ###
|
||||
if aws_role_name is not None and aws_session_name is not None:
|
||||
if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
|
||||
oidc_token = get_secret(aws_web_identity_token)
|
||||
|
||||
if oidc_token is None:
|
||||
raise BedrockError(
|
||||
message="OIDC token could not be retrieved from secret manager.",
|
||||
status_code=401,
|
||||
)
|
||||
|
||||
sts_client = boto3.client(
|
||||
"sts"
|
||||
)
|
||||
|
||||
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
|
||||
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
|
||||
sts_response = sts_client.assume_role_with_web_identity(
|
||||
RoleArn=aws_role_name,
|
||||
RoleSessionName=aws_session_name,
|
||||
WebIdentityToken=oidc_token,
|
||||
DurationSeconds=3600,
|
||||
)
|
||||
|
||||
client = boto3.client(
|
||||
service_name="bedrock-runtime",
|
||||
aws_access_key_id=sts_response["Credentials"]["AccessKeyId"],
|
||||
aws_secret_access_key=sts_response["Credentials"]["SecretAccessKey"],
|
||||
aws_session_token=sts_response["Credentials"]["SessionToken"],
|
||||
region_name=region_name,
|
||||
endpoint_url=endpoint_url,
|
||||
config=config,
|
||||
)
|
||||
elif aws_role_name is not None and aws_session_name is not None:
|
||||
# use sts if role name passed in
|
||||
sts_client = boto3.client(
|
||||
"sts",
|
||||
|
@ -755,6 +799,7 @@ def completion(
|
|||
aws_bedrock_runtime_endpoint = optional_params.pop(
|
||||
"aws_bedrock_runtime_endpoint", None
|
||||
)
|
||||
aws_web_identity_token = optional_params.pop("aws_web_identity_token", None)
|
||||
|
||||
# use passed in BedrockRuntime.Client if provided, otherwise create a new one
|
||||
client = optional_params.pop("aws_bedrock_client", None)
|
||||
|
@ -769,6 +814,7 @@ def completion(
|
|||
aws_role_name=aws_role_name,
|
||||
aws_session_name=aws_session_name,
|
||||
aws_profile_name=aws_profile_name,
|
||||
aws_web_identity_token=aws_web_identity_token,
|
||||
extra_headers=extra_headers,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
@ -1291,6 +1337,7 @@ def embedding(
|
|||
aws_bedrock_runtime_endpoint = optional_params.pop(
|
||||
"aws_bedrock_runtime_endpoint", None
|
||||
)
|
||||
aws_web_identity_token = optional_params.pop("aws_web_identity_token", None)
|
||||
|
||||
# use passed in BedrockRuntime.Client if provided, otherwise create a new one
|
||||
client = init_bedrock_client(
|
||||
|
@ -1298,6 +1345,7 @@ def embedding(
|
|||
aws_secret_access_key=aws_secret_access_key,
|
||||
aws_region_name=aws_region_name,
|
||||
aws_bedrock_runtime_endpoint=aws_bedrock_runtime_endpoint,
|
||||
aws_web_identity_token=aws_web_identity_token,
|
||||
aws_role_name=aws_role_name,
|
||||
aws_session_name=aws_session_name,
|
||||
)
|
||||
|
@ -1380,6 +1428,7 @@ def image_generation(
|
|||
aws_bedrock_runtime_endpoint = optional_params.pop(
|
||||
"aws_bedrock_runtime_endpoint", None
|
||||
)
|
||||
aws_web_identity_token = optional_params.pop("aws_web_identity_token", None)
|
||||
|
||||
# use passed in BedrockRuntime.Client if provided, otherwise create a new one
|
||||
client = init_bedrock_client(
|
||||
|
@ -1387,6 +1436,7 @@ def image_generation(
|
|||
aws_secret_access_key=aws_secret_access_key,
|
||||
aws_region_name=aws_region_name,
|
||||
aws_bedrock_runtime_endpoint=aws_bedrock_runtime_endpoint,
|
||||
aws_web_identity_token=aws_web_identity_token,
|
||||
aws_role_name=aws_role_name,
|
||||
aws_session_name=aws_session_name,
|
||||
timeout=timeout,
|
||||
|
|
328
litellm/llms/clarifai.py
Normal file
328
litellm/llms/clarifai.py
Normal file
|
@ -0,0 +1,328 @@
|
|||
import os, types, traceback
|
||||
import json
|
||||
import requests
|
||||
import time
|
||||
from typing import Callable, Optional
|
||||
from litellm.utils import ModelResponse, Usage, Choices, Message, CustomStreamWrapper
|
||||
import litellm
|
||||
import httpx
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
|
||||
|
||||
class ClarifaiError(Exception):
|
||||
def __init__(self, status_code, message, url):
|
||||
self.status_code = status_code
|
||||
self.message = message
|
||||
self.request = httpx.Request(
|
||||
method="POST", url=url
|
||||
)
|
||||
self.response = httpx.Response(status_code=status_code, request=self.request)
|
||||
super().__init__(
|
||||
self.message
|
||||
)
|
||||
|
||||
class ClarifaiConfig:
|
||||
"""
|
||||
Reference: https://clarifai.com/meta/Llama-2/models/llama2-70b-chat
|
||||
TODO fill in the details
|
||||
"""
|
||||
max_tokens: Optional[int] = None
|
||||
temperature: Optional[int] = None
|
||||
top_k: Optional[int] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
max_tokens: Optional[int] = None,
|
||||
temperature: Optional[int] = None,
|
||||
top_k: Optional[int] = None,
|
||||
) -> None:
|
||||
locals_ = locals()
|
||||
for key, value in locals_.items():
|
||||
if key != "self" and value is not None:
|
||||
setattr(self.__class__, key, value)
|
||||
|
||||
@classmethod
|
||||
def get_config(cls):
|
||||
return {
|
||||
k: v
|
||||
for k, v in cls.__dict__.items()
|
||||
if not k.startswith("__")
|
||||
and not isinstance(
|
||||
v,
|
||||
(
|
||||
types.FunctionType,
|
||||
types.BuiltinFunctionType,
|
||||
classmethod,
|
||||
staticmethod,
|
||||
),
|
||||
)
|
||||
and v is not None
|
||||
}
|
||||
|
||||
def validate_environment(api_key):
|
||||
headers = {
|
||||
"accept": "application/json",
|
||||
"content-type": "application/json",
|
||||
}
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
return headers
|
||||
|
||||
def completions_to_model(payload):
|
||||
# if payload["n"] != 1:
|
||||
# raise HTTPException(
|
||||
# status_code=422,
|
||||
# detail="Only one generation is supported. Please set candidate_count to 1.",
|
||||
# )
|
||||
|
||||
params = {}
|
||||
if temperature := payload.get("temperature"):
|
||||
params["temperature"] = temperature
|
||||
if max_tokens := payload.get("max_tokens"):
|
||||
params["max_tokens"] = max_tokens
|
||||
return {
|
||||
"inputs": [{"data": {"text": {"raw": payload["prompt"]}}}],
|
||||
"model": {"output_info": {"params": params}},
|
||||
}
|
||||
|
||||
def process_response(
|
||||
model,
|
||||
prompt,
|
||||
response,
|
||||
model_response,
|
||||
api_key,
|
||||
data,
|
||||
encoding,
|
||||
logging_obj
|
||||
):
|
||||
logging_obj.post_call(
|
||||
input=prompt,
|
||||
api_key=api_key,
|
||||
original_response=response.text,
|
||||
additional_args={"complete_input_dict": data},
|
||||
)
|
||||
## RESPONSE OBJECT
|
||||
try:
|
||||
completion_response = response.json()
|
||||
except Exception:
|
||||
raise ClarifaiError(
|
||||
message=response.text, status_code=response.status_code, url=model
|
||||
)
|
||||
# print(completion_response)
|
||||
try:
|
||||
choices_list = []
|
||||
for idx, item in enumerate(completion_response["outputs"]):
|
||||
if len(item["data"]["text"]["raw"]) > 0:
|
||||
message_obj = Message(content=item["data"]["text"]["raw"])
|
||||
else:
|
||||
message_obj = Message(content=None)
|
||||
choice_obj = Choices(
|
||||
finish_reason="stop",
|
||||
index=idx + 1, #check
|
||||
message=message_obj,
|
||||
)
|
||||
choices_list.append(choice_obj)
|
||||
model_response["choices"] = choices_list
|
||||
|
||||
except Exception as e:
|
||||
raise ClarifaiError(
|
||||
message=traceback.format_exc(), status_code=response.status_code, url=model
|
||||
)
|
||||
|
||||
# Calculate Usage
|
||||
prompt_tokens = len(encoding.encode(prompt))
|
||||
completion_tokens = len(
|
||||
encoding.encode(model_response["choices"][0]["message"].get("content"))
|
||||
)
|
||||
model_response["model"] = model
|
||||
model_response["usage"] = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
)
|
||||
return model_response
|
||||
|
||||
def convert_model_to_url(model: str, api_base: str):
|
||||
user_id, app_id, model_id = model.split(".")
|
||||
return f"{api_base}/users/{user_id}/apps/{app_id}/models/{model_id}/outputs"
|
||||
|
||||
def get_prompt_model_name(url: str):
|
||||
clarifai_model_name = url.split("/")[-2]
|
||||
if "claude" in clarifai_model_name:
|
||||
return "anthropic", clarifai_model_name.replace("_", ".")
|
||||
if ("llama" in clarifai_model_name)or ("mistral" in clarifai_model_name):
|
||||
return "", "meta-llama/llama-2-chat"
|
||||
else:
|
||||
return "", clarifai_model_name
|
||||
|
||||
async def async_completion(
|
||||
model: str,
|
||||
prompt: str,
|
||||
api_base: str,
|
||||
custom_prompt_dict: dict,
|
||||
model_response: ModelResponse,
|
||||
print_verbose: Callable,
|
||||
encoding,
|
||||
api_key,
|
||||
logging_obj,
|
||||
data=None,
|
||||
optional_params=None,
|
||||
litellm_params=None,
|
||||
logger_fn=None,
|
||||
headers={}):
|
||||
|
||||
async_handler = AsyncHTTPHandler(
|
||||
timeout=httpx.Timeout(timeout=600.0, connect=5.0)
|
||||
)
|
||||
response = await async_handler.post(
|
||||
api_base, headers=headers, data=json.dumps(data)
|
||||
)
|
||||
|
||||
return process_response(
|
||||
model=model,
|
||||
prompt=prompt,
|
||||
response=response,
|
||||
model_response=model_response,
|
||||
api_key=api_key,
|
||||
data=data,
|
||||
encoding=encoding,
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
|
||||
def completion(
|
||||
model: str,
|
||||
messages: list,
|
||||
api_base: str,
|
||||
model_response: ModelResponse,
|
||||
print_verbose: Callable,
|
||||
encoding,
|
||||
api_key,
|
||||
logging_obj,
|
||||
custom_prompt_dict={},
|
||||
acompletion=False,
|
||||
optional_params=None,
|
||||
litellm_params=None,
|
||||
logger_fn=None,
|
||||
):
|
||||
headers = validate_environment(api_key)
|
||||
model = convert_model_to_url(model, api_base)
|
||||
prompt = " ".join(message["content"] for message in messages) # TODO
|
||||
|
||||
## Load Config
|
||||
config = litellm.ClarifaiConfig.get_config()
|
||||
for k, v in config.items():
|
||||
if (
|
||||
k not in optional_params
|
||||
):
|
||||
optional_params[k] = v
|
||||
|
||||
custom_llm_provider, orig_model_name = get_prompt_model_name(model)
|
||||
if custom_llm_provider == "anthropic":
|
||||
prompt = prompt_factory(
|
||||
model=orig_model_name,
|
||||
messages=messages,
|
||||
api_key=api_key,
|
||||
custom_llm_provider="clarifai"
|
||||
)
|
||||
else:
|
||||
prompt = prompt_factory(
|
||||
model=orig_model_name,
|
||||
messages=messages,
|
||||
api_key=api_key,
|
||||
custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
# print(prompt); exit(0)
|
||||
|
||||
data = {
|
||||
"prompt": prompt,
|
||||
**optional_params,
|
||||
}
|
||||
data = completions_to_model(data)
|
||||
|
||||
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
api_key=api_key,
|
||||
additional_args={
|
||||
"complete_input_dict": data,
|
||||
"headers": headers,
|
||||
"api_base": api_base,
|
||||
},
|
||||
)
|
||||
if acompletion==True:
|
||||
return async_completion(
|
||||
model=model,
|
||||
prompt=prompt,
|
||||
api_base=api_base,
|
||||
custom_prompt_dict=custom_prompt_dict,
|
||||
model_response=model_response,
|
||||
print_verbose=print_verbose,
|
||||
encoding=encoding,
|
||||
api_key=api_key,
|
||||
logging_obj=logging_obj,
|
||||
data=data,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
logger_fn=logger_fn,
|
||||
headers=headers,
|
||||
)
|
||||
else:
|
||||
## COMPLETION CALL
|
||||
response = requests.post(
|
||||
model,
|
||||
headers=headers,
|
||||
data=json.dumps(data),
|
||||
)
|
||||
# print(response.content); exit()
|
||||
|
||||
if response.status_code != 200:
|
||||
raise ClarifaiError(status_code=response.status_code, message=response.text, url=model)
|
||||
|
||||
if "stream" in optional_params and optional_params["stream"] == True:
|
||||
completion_stream = response.iter_lines()
|
||||
stream_response = CustomStreamWrapper(
|
||||
completion_stream=completion_stream,
|
||||
model=model,
|
||||
custom_llm_provider="clarifai",
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
return stream_response
|
||||
|
||||
else:
|
||||
return process_response(
|
||||
model=model,
|
||||
prompt=prompt,
|
||||
response=response,
|
||||
model_response=model_response,
|
||||
api_key=api_key,
|
||||
data=data,
|
||||
encoding=encoding,
|
||||
logging_obj=logging_obj)
|
||||
|
||||
|
||||
class ModelResponseIterator:
|
||||
def __init__(self, model_response):
|
||||
self.model_response = model_response
|
||||
self.is_done = False
|
||||
|
||||
# Sync iterator
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
if self.is_done:
|
||||
raise StopIteration
|
||||
self.is_done = True
|
||||
return self.model_response
|
||||
|
||||
# Async iterator
|
||||
def __aiter__(self):
|
||||
return self
|
||||
|
||||
async def __anext__(self):
|
||||
if self.is_done:
|
||||
raise StopAsyncIteration
|
||||
self.is_done = True
|
||||
return self.model_response
|
|
@ -1509,6 +1509,11 @@ def prompt_factory(
|
|||
model="meta-llama/Meta-Llama-3-8B-Instruct",
|
||||
messages=messages,
|
||||
)
|
||||
|
||||
elif custom_llm_provider == "clarifai":
|
||||
if "claude" in model:
|
||||
return anthropic_pt(messages=messages)
|
||||
|
||||
elif custom_llm_provider == "perplexity":
|
||||
for message in messages:
|
||||
message.pop("name", None)
|
||||
|
|
|
@ -198,6 +198,23 @@ class VertexAIConfig:
|
|||
optional_params[mapped_params[param]] = value
|
||||
return optional_params
|
||||
|
||||
def get_eu_regions(self) -> List[str]:
|
||||
"""
|
||||
Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations#available-regions
|
||||
"""
|
||||
return [
|
||||
"europe-central2",
|
||||
"europe-north1",
|
||||
"europe-southwest1",
|
||||
"europe-west1",
|
||||
"europe-west2",
|
||||
"europe-west3",
|
||||
"europe-west4",
|
||||
"europe-west6",
|
||||
"europe-west8",
|
||||
"europe-west9",
|
||||
]
|
||||
|
||||
|
||||
import asyncio
|
||||
|
||||
|
|
|
@ -1,12 +1,26 @@
|
|||
from enum import Enum
|
||||
import json, types, time # noqa: E401
|
||||
from contextlib import contextmanager
|
||||
from typing import Callable, Dict, Optional, Any, Union, List
|
||||
from contextlib import asynccontextmanager, contextmanager
|
||||
from typing import (
|
||||
Callable,
|
||||
Dict,
|
||||
Generator,
|
||||
AsyncGenerator,
|
||||
Iterator,
|
||||
AsyncIterator,
|
||||
Optional,
|
||||
Any,
|
||||
Union,
|
||||
List,
|
||||
ContextManager,
|
||||
AsyncContextManager,
|
||||
)
|
||||
|
||||
import httpx # type: ignore
|
||||
import requests # type: ignore
|
||||
import litellm
|
||||
from litellm.utils import ModelResponse, get_secret, Usage
|
||||
from litellm.utils import ModelResponse, Usage, get_secret
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
|
||||
from .base import BaseLLM
|
||||
from .prompt_templates import factory as ptf
|
||||
|
@ -149,6 +163,15 @@ class IBMWatsonXAIConfig:
|
|||
optional_params[mapped_params[param]] = value
|
||||
return optional_params
|
||||
|
||||
def get_eu_regions(self) -> List[str]:
|
||||
"""
|
||||
Source: https://www.ibm.com/docs/en/watsonx/saas?topic=integrations-regional-availability
|
||||
"""
|
||||
return [
|
||||
"eu-de",
|
||||
"eu-gb",
|
||||
]
|
||||
|
||||
|
||||
def convert_messages_to_prompt(model, messages, provider, custom_prompt_dict):
|
||||
# handle anthropic prompts and amazon titan prompts
|
||||
|
@ -188,11 +211,12 @@ class WatsonXAIEndpoint(str, Enum):
|
|||
)
|
||||
EMBEDDINGS = "/ml/v1/text/embeddings"
|
||||
PROMPTS = "/ml/v1/prompts"
|
||||
AVAILABLE_MODELS = "/ml/v1/foundation_model_specs"
|
||||
|
||||
|
||||
class IBMWatsonXAI(BaseLLM):
|
||||
"""
|
||||
Class to interface with IBM Watsonx.ai API for text generation and embeddings.
|
||||
Class to interface with IBM watsonx.ai API for text generation and embeddings.
|
||||
|
||||
Reference: https://cloud.ibm.com/apidocs/watsonx-ai
|
||||
"""
|
||||
|
@ -343,7 +367,7 @@ class IBMWatsonXAI(BaseLLM):
|
|||
)
|
||||
if token is None and api_key is not None:
|
||||
# generate the auth token
|
||||
if print_verbose:
|
||||
if print_verbose is not None:
|
||||
print_verbose("Generating IAM token for Watsonx.ai")
|
||||
token = self.generate_iam_token(api_key)
|
||||
elif token is None and api_key is None:
|
||||
|
@ -378,10 +402,11 @@ class IBMWatsonXAI(BaseLLM):
|
|||
print_verbose: Callable,
|
||||
encoding,
|
||||
logging_obj,
|
||||
optional_params: dict,
|
||||
litellm_params: Optional[dict] = None,
|
||||
optional_params=None,
|
||||
acompletion=None,
|
||||
litellm_params=None,
|
||||
logger_fn=None,
|
||||
timeout: Optional[float] = None,
|
||||
timeout=None,
|
||||
):
|
||||
"""
|
||||
Send a text generation request to the IBM Watsonx.ai API.
|
||||
|
@ -402,12 +427,12 @@ class IBMWatsonXAI(BaseLLM):
|
|||
model, messages, provider, custom_prompt_dict
|
||||
)
|
||||
|
||||
def process_text_request(request_params: dict) -> ModelResponse:
|
||||
with self._manage_response(
|
||||
request_params, logging_obj=logging_obj, input=prompt, timeout=timeout
|
||||
) as resp:
|
||||
json_resp = resp.json()
|
||||
|
||||
def process_text_gen_response(json_resp: dict) -> ModelResponse:
|
||||
if "results" not in json_resp:
|
||||
raise WatsonXAIError(
|
||||
status_code=500,
|
||||
message=f"Error: Invalid response from Watsonx.ai API: {json_resp}",
|
||||
)
|
||||
generated_text = json_resp["results"][0]["generated_text"]
|
||||
prompt_tokens = json_resp["results"][0]["input_token_count"]
|
||||
completion_tokens = json_resp["results"][0]["generated_token_count"]
|
||||
|
@ -415,36 +440,70 @@ class IBMWatsonXAI(BaseLLM):
|
|||
model_response["finish_reason"] = json_resp["results"][0]["stop_reason"]
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
setattr(
|
||||
model_response,
|
||||
"usage",
|
||||
Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
),
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
)
|
||||
setattr(model_response, "usage", usage)
|
||||
return model_response
|
||||
|
||||
def process_stream_request(
|
||||
request_params: dict,
|
||||
def process_stream_response(
|
||||
stream_resp: Union[Iterator[str], AsyncIterator],
|
||||
) -> litellm.CustomStreamWrapper:
|
||||
streamwrapper = litellm.CustomStreamWrapper(
|
||||
stream_resp,
|
||||
model=model,
|
||||
custom_llm_provider="watsonx",
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
return streamwrapper
|
||||
|
||||
# create the function to manage the request to watsonx.ai
|
||||
self.request_manager = RequestManager(logging_obj)
|
||||
|
||||
def handle_text_request(request_params: dict) -> ModelResponse:
|
||||
with self.request_manager.request(
|
||||
request_params,
|
||||
input=prompt,
|
||||
timeout=timeout,
|
||||
) as resp:
|
||||
json_resp = resp.json()
|
||||
|
||||
return process_text_gen_response(json_resp)
|
||||
|
||||
async def handle_text_request_async(request_params: dict) -> ModelResponse:
|
||||
async with self.request_manager.async_request(
|
||||
request_params,
|
||||
input=prompt,
|
||||
timeout=timeout,
|
||||
) as resp:
|
||||
json_resp = resp.json()
|
||||
return process_text_gen_response(json_resp)
|
||||
|
||||
def handle_stream_request(request_params: dict) -> litellm.CustomStreamWrapper:
|
||||
# stream the response - generated chunks will be handled
|
||||
# by litellm.utils.CustomStreamWrapper.handle_watsonx_stream
|
||||
with self._manage_response(
|
||||
with self.request_manager.request(
|
||||
request_params,
|
||||
logging_obj=logging_obj,
|
||||
stream=True,
|
||||
input=prompt,
|
||||
timeout=timeout,
|
||||
) as resp:
|
||||
response = litellm.CustomStreamWrapper(
|
||||
resp.iter_lines(),
|
||||
model=model,
|
||||
custom_llm_provider="watsonx",
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
return response
|
||||
streamwrapper = process_stream_response(resp.iter_lines())
|
||||
return streamwrapper
|
||||
|
||||
async def handle_stream_request_async(request_params: dict) -> litellm.CustomStreamWrapper:
|
||||
# stream the response - generated chunks will be handled
|
||||
# by litellm.utils.CustomStreamWrapper.handle_watsonx_stream
|
||||
async with self.request_manager.async_request(
|
||||
request_params,
|
||||
stream=True,
|
||||
input=prompt,
|
||||
timeout=timeout,
|
||||
) as resp:
|
||||
streamwrapper = process_stream_response(resp.aiter_lines())
|
||||
return streamwrapper
|
||||
|
||||
try:
|
||||
## Get the response from the model
|
||||
|
@ -455,10 +514,18 @@ class IBMWatsonXAI(BaseLLM):
|
|||
optional_params=optional_params,
|
||||
print_verbose=print_verbose,
|
||||
)
|
||||
if stream:
|
||||
return process_stream_request(req_params)
|
||||
if stream and (acompletion is True):
|
||||
# stream and async text generation
|
||||
return handle_stream_request_async(req_params)
|
||||
elif stream:
|
||||
# streaming text generation
|
||||
return handle_stream_request(req_params)
|
||||
elif (acompletion is True):
|
||||
# async text generation
|
||||
return handle_text_request_async(req_params)
|
||||
else:
|
||||
return process_text_request(req_params)
|
||||
# regular text generation
|
||||
return handle_text_request(req_params)
|
||||
except WatsonXAIError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
|
@ -473,6 +540,7 @@ class IBMWatsonXAI(BaseLLM):
|
|||
model_response=None,
|
||||
optional_params=None,
|
||||
encoding=None,
|
||||
aembedding=None,
|
||||
):
|
||||
"""
|
||||
Send a text embedding request to the IBM Watsonx.ai API.
|
||||
|
@ -507,9 +575,6 @@ class IBMWatsonXAI(BaseLLM):
|
|||
}
|
||||
request_params = dict(version=api_params["api_version"])
|
||||
url = api_params["url"].rstrip("/") + WatsonXAIEndpoint.EMBEDDINGS
|
||||
# request = httpx.Request(
|
||||
# "POST", url, headers=headers, json=payload, params=request_params
|
||||
# )
|
||||
req_params = {
|
||||
"method": "POST",
|
||||
"url": url,
|
||||
|
@ -517,25 +582,49 @@ class IBMWatsonXAI(BaseLLM):
|
|||
"json": payload,
|
||||
"params": request_params,
|
||||
}
|
||||
with self._manage_response(
|
||||
req_params, logging_obj=logging_obj, input=input
|
||||
) as resp:
|
||||
json_resp = resp.json()
|
||||
request_manager = RequestManager(logging_obj)
|
||||
|
||||
results = json_resp.get("results", [])
|
||||
embedding_response = []
|
||||
for idx, result in enumerate(results):
|
||||
embedding_response.append(
|
||||
{"object": "embedding", "index": idx, "embedding": result["embedding"]}
|
||||
def process_embedding_response(json_resp: dict) -> ModelResponse:
|
||||
results = json_resp.get("results", [])
|
||||
embedding_response = []
|
||||
for idx, result in enumerate(results):
|
||||
embedding_response.append(
|
||||
{
|
||||
"object": "embedding",
|
||||
"index": idx,
|
||||
"embedding": result["embedding"],
|
||||
}
|
||||
)
|
||||
model_response["object"] = "list"
|
||||
model_response["data"] = embedding_response
|
||||
model_response["model"] = model
|
||||
input_tokens = json_resp.get("input_token_count", 0)
|
||||
model_response.usage = Usage(
|
||||
prompt_tokens=input_tokens,
|
||||
completion_tokens=0,
|
||||
total_tokens=input_tokens,
|
||||
)
|
||||
model_response["object"] = "list"
|
||||
model_response["data"] = embedding_response
|
||||
model_response["model"] = model
|
||||
input_tokens = json_resp.get("input_token_count", 0)
|
||||
model_response.usage = Usage(
|
||||
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
|
||||
)
|
||||
return model_response
|
||||
return model_response
|
||||
|
||||
def handle_embedding(request_params: dict) -> ModelResponse:
|
||||
with request_manager.request(request_params, input=input) as resp:
|
||||
json_resp = resp.json()
|
||||
return process_embedding_response(json_resp)
|
||||
|
||||
async def handle_aembedding(request_params: dict) -> ModelResponse:
|
||||
async with request_manager.async_request(request_params, input=input) as resp:
|
||||
json_resp = resp.json()
|
||||
return process_embedding_response(json_resp)
|
||||
|
||||
try:
|
||||
if aembedding is True:
|
||||
return handle_embedding(req_params)
|
||||
else:
|
||||
return handle_aembedding(req_params)
|
||||
except WatsonXAIError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
raise WatsonXAIError(status_code=500, message=str(e))
|
||||
|
||||
def generate_iam_token(self, api_key=None, **params):
|
||||
headers = {}
|
||||
|
@ -558,52 +647,144 @@ class IBMWatsonXAI(BaseLLM):
|
|||
self.token = iam_access_token
|
||||
return iam_access_token
|
||||
|
||||
@contextmanager
|
||||
def _manage_response(
|
||||
self,
|
||||
request_params: dict,
|
||||
logging_obj: Any,
|
||||
stream: bool = False,
|
||||
input: Optional[Any] = None,
|
||||
timeout: Optional[float] = None,
|
||||
):
|
||||
request_str = (
|
||||
f"response = {request_params['method']}(\n"
|
||||
f"\turl={request_params['url']},\n"
|
||||
f"\tjson={request_params['json']},\n"
|
||||
f")"
|
||||
)
|
||||
logging_obj.pre_call(
|
||||
input=input,
|
||||
api_key=request_params["headers"].get("Authorization"),
|
||||
additional_args={
|
||||
"complete_input_dict": request_params["json"],
|
||||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
if timeout:
|
||||
request_params["timeout"] = timeout
|
||||
try:
|
||||
if stream:
|
||||
resp = requests.request(
|
||||
**request_params,
|
||||
stream=True,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
yield resp
|
||||
else:
|
||||
resp = requests.request(**request_params)
|
||||
resp.raise_for_status()
|
||||
yield resp
|
||||
except Exception as e:
|
||||
raise WatsonXAIError(status_code=500, message=str(e))
|
||||
if not stream:
|
||||
logging_obj.post_call(
|
||||
def get_available_models(self, *, ids_only: bool = True, **params):
|
||||
api_params = self._get_api_params(params)
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_params['token']}",
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
request_params = dict(version=api_params["api_version"])
|
||||
url = api_params["url"].rstrip("/") + WatsonXAIEndpoint.AVAILABLE_MODELS
|
||||
req_params = dict(method="GET", url=url, headers=headers, params=request_params)
|
||||
with RequestManager(logging_obj=None).request(req_params) as resp:
|
||||
json_resp = resp.json()
|
||||
if not ids_only:
|
||||
return json_resp
|
||||
return [res["model_id"] for res in json_resp["resources"]]
|
||||
|
||||
class RequestManager:
|
||||
"""
|
||||
Returns a context manager that manages the response from the request.
|
||||
if async_ is True, returns an async context manager, otherwise returns a regular context manager.
|
||||
|
||||
Usage:
|
||||
```python
|
||||
request_params = dict(method="POST", url="https://api.example.com", headers={"Authorization" : "Bearer token"}, json={"key": "value"})
|
||||
request_manager = RequestManager(logging_obj=logging_obj)
|
||||
async with request_manager.request(request_params) as resp:
|
||||
...
|
||||
# or
|
||||
with request_manager.async_request(request_params) as resp:
|
||||
...
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, logging_obj=None):
|
||||
self.logging_obj = logging_obj
|
||||
|
||||
def pre_call(
|
||||
self,
|
||||
request_params: dict,
|
||||
input: Optional[Any] = None,
|
||||
):
|
||||
if self.logging_obj is None:
|
||||
return
|
||||
request_str = (
|
||||
f"response = {request_params['method']}(\n"
|
||||
f"\turl={request_params['url']},\n"
|
||||
f"\tjson={request_params.get('json')},\n"
|
||||
f")"
|
||||
)
|
||||
self.logging_obj.pre_call(
|
||||
input=input,
|
||||
api_key=request_params["headers"].get("Authorization"),
|
||||
additional_args={
|
||||
"complete_input_dict": request_params.get("json"),
|
||||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
|
||||
def post_call(self, resp, request_params):
|
||||
if self.logging_obj is None:
|
||||
return
|
||||
self.logging_obj.post_call(
|
||||
input=input,
|
||||
api_key=request_params["headers"].get("Authorization"),
|
||||
original_response=json.dumps(resp.json()),
|
||||
additional_args={
|
||||
"status_code": resp.status_code,
|
||||
"complete_input_dict": request_params["json"],
|
||||
"complete_input_dict": request_params.get(
|
||||
"data", request_params.get("json")
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
@contextmanager
|
||||
def request(
|
||||
self,
|
||||
request_params: dict,
|
||||
stream: bool = False,
|
||||
input: Optional[Any] = None,
|
||||
timeout=None,
|
||||
) -> Generator[requests.Response, None, None]:
|
||||
"""
|
||||
Returns a context manager that yields the response from the request.
|
||||
"""
|
||||
self.pre_call(request_params, input)
|
||||
if timeout:
|
||||
request_params["timeout"] = timeout
|
||||
if stream:
|
||||
request_params["stream"] = stream
|
||||
try:
|
||||
resp = requests.request(**request_params)
|
||||
if not resp.ok:
|
||||
raise WatsonXAIError(
|
||||
status_code=resp.status_code,
|
||||
message=f"Error {resp.status_code} ({resp.reason}): {resp.text}",
|
||||
)
|
||||
yield resp
|
||||
except Exception as e:
|
||||
raise WatsonXAIError(status_code=500, message=str(e))
|
||||
if not stream:
|
||||
self.post_call(resp, request_params)
|
||||
|
||||
@asynccontextmanager
|
||||
async def async_request(
|
||||
self,
|
||||
request_params: dict,
|
||||
stream: bool = False,
|
||||
input: Optional[Any] = None,
|
||||
timeout=None,
|
||||
) -> AsyncGenerator[httpx.Response, None]:
|
||||
self.pre_call(request_params, input)
|
||||
if timeout:
|
||||
request_params["timeout"] = timeout
|
||||
if stream:
|
||||
request_params["stream"] = stream
|
||||
try:
|
||||
# async with AsyncHTTPHandler(timeout=timeout) as client:
|
||||
self.async_handler = AsyncHTTPHandler(
|
||||
timeout=httpx.Timeout(
|
||||
timeout=request_params.pop("timeout", 600.0), connect=5.0
|
||||
),
|
||||
)
|
||||
# async_handler.client.verify = False
|
||||
if "json" in request_params:
|
||||
request_params["data"] = json.dumps(request_params.pop("json", {}))
|
||||
method = request_params.pop("method")
|
||||
if method.upper() == "POST":
|
||||
resp = await self.async_handler.post(**request_params)
|
||||
else:
|
||||
resp = await self.async_handler.get(**request_params)
|
||||
if resp.status_code not in [200, 201]:
|
||||
raise WatsonXAIError(
|
||||
status_code=resp.status_code,
|
||||
message=f"Error {resp.status_code} ({resp.reason}): {resp.text}",
|
||||
)
|
||||
yield resp
|
||||
# await async_handler.close()
|
||||
except Exception as e:
|
||||
raise WatsonXAIError(status_code=500, message=str(e))
|
||||
if not stream:
|
||||
self.post_call(resp, request_params)
|
|
@ -56,6 +56,7 @@ from .llms import (
|
|||
ollama,
|
||||
ollama_chat,
|
||||
cloudflare,
|
||||
clarifai,
|
||||
cohere,
|
||||
cohere_chat,
|
||||
petals,
|
||||
|
@ -1212,6 +1213,60 @@ def completion(
|
|||
)
|
||||
|
||||
response = model_response
|
||||
elif ("clarifai" in model
|
||||
or custom_llm_provider == "clarifai"
|
||||
or model in litellm.clarifai_models
|
||||
):
|
||||
clarifai_key = None
|
||||
clarifai_key = (
|
||||
api_key
|
||||
or litellm.clarifai_key
|
||||
or litellm.api_key
|
||||
or get_secret("CLARIFAI_API_KEY")
|
||||
or get_secret("CLARIFAI_API_TOKEN")
|
||||
)
|
||||
|
||||
api_base = (
|
||||
api_base
|
||||
or litellm.api_base
|
||||
or get_secret("CLARIFAI_API_BASE")
|
||||
or "https://api.clarifai.com/v2"
|
||||
)
|
||||
|
||||
custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
|
||||
model_response = clarifai.completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
api_base=api_base,
|
||||
model_response=model_response,
|
||||
print_verbose=print_verbose,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
acompletion=acompletion,
|
||||
logger_fn=logger_fn,
|
||||
encoding=encoding, # for calculating input/output tokens
|
||||
api_key=clarifai_key,
|
||||
logging_obj=logging,
|
||||
custom_prompt_dict=custom_prompt_dict,
|
||||
)
|
||||
|
||||
if "stream" in optional_params and optional_params["stream"] == True:
|
||||
# don't try to access stream object,
|
||||
## LOGGING
|
||||
logging.post_call(
|
||||
input=messages,
|
||||
api_key=api_key,
|
||||
original_response=model_response,
|
||||
)
|
||||
|
||||
if optional_params.get("stream", False) or acompletion == True:
|
||||
## LOGGING
|
||||
logging.post_call(
|
||||
input=messages,
|
||||
api_key=clarifai_key,
|
||||
original_response=model_response,
|
||||
)
|
||||
response = model_response
|
||||
|
||||
elif custom_llm_provider == "anthropic":
|
||||
api_key = (
|
||||
|
|
|
@ -1571,6 +1571,135 @@
|
|||
"litellm_provider": "replicate",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/microsoft/wizardlm-2-8x22b:nitro": {
|
||||
"max_tokens": 65536,
|
||||
"input_cost_per_token": 0.000001,
|
||||
"output_cost_per_token": 0.000001,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/google/gemini-pro-1.5": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1000000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.0000025,
|
||||
"output_cost_per_token": 0.0000075,
|
||||
"input_cost_per_image": 0.00265,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"openrouter/mistralai/mixtral-8x22b-instruct": {
|
||||
"max_tokens": 65536,
|
||||
"input_cost_per_token": 0.00000065,
|
||||
"output_cost_per_token": 0.00000065,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/cohere/command-r-plus": {
|
||||
"max_tokens": 128000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/databricks/dbrx-instruct": {
|
||||
"max_tokens": 32768,
|
||||
"input_cost_per_token": 0.0000006,
|
||||
"output_cost_per_token": 0.0000006,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/anthropic/claude-3-haiku": {
|
||||
"max_tokens": 200000,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.00000125,
|
||||
"input_cost_per_image": 0.0004,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"openrouter/anthropic/claude-3-sonnet": {
|
||||
"max_tokens": 200000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"input_cost_per_image": 0.0048,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"openrouter/mistralai/mistral-large": {
|
||||
"max_tokens": 32000,
|
||||
"input_cost_per_token": 0.000008,
|
||||
"output_cost_per_token": 0.000024,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/cognitivecomputations/dolphin-mixtral-8x7b": {
|
||||
"max_tokens": 32769,
|
||||
"input_cost_per_token": 0.0000005,
|
||||
"output_cost_per_token": 0.0000005,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/google/gemini-pro-vision": {
|
||||
"max_tokens": 45875,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000375,
|
||||
"input_cost_per_image": 0.0025,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"openrouter/fireworks/firellava-13b": {
|
||||
"max_tokens": 4096,
|
||||
"input_cost_per_token": 0.0000002,
|
||||
"output_cost_per_token": 0.0000002,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/meta-llama/llama-3-8b-instruct:free": {
|
||||
"max_tokens": 8192,
|
||||
"input_cost_per_token": 0.0,
|
||||
"output_cost_per_token": 0.0,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/meta-llama/llama-3-8b-instruct:extended": {
|
||||
"max_tokens": 16384,
|
||||
"input_cost_per_token": 0.000000225,
|
||||
"output_cost_per_token": 0.00000225,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/meta-llama/llama-3-70b-instruct:nitro": {
|
||||
"max_tokens": 8192,
|
||||
"input_cost_per_token": 0.0000009,
|
||||
"output_cost_per_token": 0.0000009,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/meta-llama/llama-3-70b-instruct": {
|
||||
"max_tokens": 8192,
|
||||
"input_cost_per_token": 0.00000059,
|
||||
"output_cost_per_token": 0.00000079,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/openai/gpt-4-vision-preview": {
|
||||
"max_tokens": 130000,
|
||||
"input_cost_per_token": 0.00001,
|
||||
"output_cost_per_token": 0.00003,
|
||||
"input_cost_per_image": 0.01445,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"openrouter/openai/gpt-3.5-turbo": {
|
||||
"max_tokens": 4095,
|
||||
"input_cost_per_token": 0.0000015,
|
||||
|
@ -1621,14 +1750,14 @@
|
|||
"tool_use_system_prompt_tokens": 395
|
||||
},
|
||||
"openrouter/google/palm-2-chat-bison": {
|
||||
"max_tokens": 8000,
|
||||
"max_tokens": 25804,
|
||||
"input_cost_per_token": 0.0000005,
|
||||
"output_cost_per_token": 0.0000005,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/google/palm-2-codechat-bison": {
|
||||
"max_tokens": 8000,
|
||||
"max_tokens": 20070,
|
||||
"input_cost_per_token": 0.0000005,
|
||||
"output_cost_per_token": 0.0000005,
|
||||
"litellm_provider": "openrouter",
|
||||
|
@ -1711,13 +1840,6 @@
|
|||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/meta-llama/llama-3-70b-instruct": {
|
||||
"max_tokens": 8192,
|
||||
"input_cost_per_token": 0.0000008,
|
||||
"output_cost_per_token": 0.0000008,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"j2-ultra": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 8192,
|
||||
|
@ -3226,4 +3348,4 @@
|
|||
"mode": "embedding"
|
||||
}
|
||||
|
||||
}
|
||||
}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/a1602eb39f799143.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
||||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/f04e46b02318b660.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-5b257e1ab47d4b4a.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-5b257e1ab47d4b4a.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/a1602eb39f799143.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[25539,[\"936\",\"static/chunks/2f6dbc85-17d29013b8ff3da5.js\",\"566\",\"static/chunks/566-ccd699ab19124658.js\",\"931\",\"static/chunks/app/page-c804e862b63be987.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/a1602eb39f799143.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"K8KXTbmuI2ArWjjdMi2iq\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f04e46b02318b660.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[7926,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"884\",\"static/chunks/884-7576ee407a2ecbe6.js\",\"931\",\"static/chunks/app/page-e6190351ac8da62a.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f04e46b02318b660.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"84BZ5uERcn4DsO4_POsLl\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[25539,["936","static/chunks/2f6dbc85-17d29013b8ff3da5.js","566","static/chunks/566-ccd699ab19124658.js","931","static/chunks/app/page-c804e862b63be987.js"],""]
|
||||
3:I[7926,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","884","static/chunks/884-7576ee407a2ecbe6.js","931","static/chunks/app/page-e6190351ac8da62a.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["K8KXTbmuI2ArWjjdMi2iq",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/a1602eb39f799143.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["84BZ5uERcn4DsO4_POsLl",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f04e46b02318b660.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -1,25 +1,13 @@
|
|||
model_list:
|
||||
- litellm_params:
|
||||
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
|
||||
api_key: my-fake-key
|
||||
model: openai/my-fake-model
|
||||
model_name: fake-openai-endpoint
|
||||
- litellm_params:
|
||||
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
|
||||
api_key: my-fake-key-2
|
||||
model: openai/my-fake-model-2
|
||||
model_name: fake-openai-endpoint
|
||||
- litellm_params:
|
||||
api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
|
||||
api_key: my-fake-key-3
|
||||
model: openai/my-fake-model-3
|
||||
model_name: fake-openai-endpoint
|
||||
- model_name: gpt-4
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
- litellm_params:
|
||||
model: together_ai/codellama/CodeLlama-13b-Instruct-hf
|
||||
model_name: CodeLlama-13b-Instruct
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_version: 2023-07-01-preview
|
||||
model: azure/azure-embedding-model
|
||||
model_info:
|
||||
base_model: text-embedding-ada-002
|
||||
mode: embedding
|
||||
model_name: text-embedding-ada-002
|
||||
|
||||
router_settings:
|
||||
redis_host: redis
|
||||
|
@ -28,6 +16,7 @@ router_settings:
|
|||
|
||||
litellm_settings:
|
||||
set_verbose: True
|
||||
enable_preview_features: true
|
||||
# service_callback: ["prometheus_system"]
|
||||
# success_callback: ["prometheus"]
|
||||
# failure_callback: ["prometheus"]
|
||||
|
|
147
litellm/proxy/hooks/azure_content_safety.py
Normal file
147
litellm/proxy/hooks/azure_content_safety.py
Normal file
|
@ -0,0 +1,147 @@
|
|||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
import litellm, traceback, sys, uuid
|
||||
from fastapi import HTTPException
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class _PROXY_AzureContentSafety(
|
||||
CustomLogger
|
||||
): # https://docs.litellm.ai/docs/observability/custom_callback#callback-class
|
||||
# Class variables or attributes
|
||||
|
||||
def __init__(self, endpoint, api_key, thresholds=None):
|
||||
try:
|
||||
from azure.ai.contentsafety.aio import ContentSafetyClient
|
||||
from azure.core.credentials import AzureKeyCredential
|
||||
from azure.ai.contentsafety.models import (
|
||||
TextCategory,
|
||||
AnalyzeTextOptions,
|
||||
AnalyzeTextOutputType,
|
||||
)
|
||||
from azure.core.exceptions import HttpResponseError
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
f"\033[91mAzure Content-Safety not installed, try running 'pip install azure-ai-contentsafety' to fix this error: {e}\n{traceback.format_exc()}\033[0m"
|
||||
)
|
||||
self.endpoint = endpoint
|
||||
self.api_key = api_key
|
||||
self.text_category = TextCategory
|
||||
self.analyze_text_options = AnalyzeTextOptions
|
||||
self.analyze_text_output_type = AnalyzeTextOutputType
|
||||
self.azure_http_error = HttpResponseError
|
||||
|
||||
self.thresholds = self._configure_thresholds(thresholds)
|
||||
|
||||
self.client = ContentSafetyClient(
|
||||
self.endpoint, AzureKeyCredential(self.api_key)
|
||||
)
|
||||
|
||||
def _configure_thresholds(self, thresholds=None):
|
||||
default_thresholds = {
|
||||
self.text_category.HATE: 4,
|
||||
self.text_category.SELF_HARM: 4,
|
||||
self.text_category.SEXUAL: 4,
|
||||
self.text_category.VIOLENCE: 4,
|
||||
}
|
||||
|
||||
if thresholds is None:
|
||||
return default_thresholds
|
||||
|
||||
for key, default in default_thresholds.items():
|
||||
if key not in thresholds:
|
||||
thresholds[key] = default
|
||||
|
||||
return thresholds
|
||||
|
||||
def _compute_result(self, response):
|
||||
result = {}
|
||||
|
||||
category_severity = {
|
||||
item.category: item.severity for item in response.categories_analysis
|
||||
}
|
||||
for category in self.text_category:
|
||||
severity = category_severity.get(category)
|
||||
if severity is not None:
|
||||
result[category] = {
|
||||
"filtered": severity >= self.thresholds[category],
|
||||
"severity": severity,
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
async def test_violation(self, content: str, source: Optional[str] = None):
|
||||
verbose_proxy_logger.debug("Testing Azure Content-Safety for: %s", content)
|
||||
|
||||
# Construct a request
|
||||
request = self.analyze_text_options(
|
||||
text=content,
|
||||
output_type=self.analyze_text_output_type.EIGHT_SEVERITY_LEVELS,
|
||||
)
|
||||
|
||||
# Analyze text
|
||||
try:
|
||||
response = await self.client.analyze_text(request)
|
||||
except self.azure_http_error as e:
|
||||
verbose_proxy_logger.debug(
|
||||
"Error in Azure Content-Safety: %s", traceback.format_exc()
|
||||
)
|
||||
traceback.print_exc()
|
||||
raise
|
||||
|
||||
result = self._compute_result(response)
|
||||
verbose_proxy_logger.debug("Azure Content-Safety Result: %s", result)
|
||||
|
||||
for key, value in result.items():
|
||||
if value["filtered"]:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={
|
||||
"error": "Violated content safety policy",
|
||||
"source": source,
|
||||
"category": key,
|
||||
"severity": value["severity"],
|
||||
},
|
||||
)
|
||||
|
||||
async def async_pre_call_hook(
|
||||
self,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
cache: DualCache,
|
||||
data: dict,
|
||||
call_type: str, # "completion", "embeddings", "image_generation", "moderation"
|
||||
):
|
||||
verbose_proxy_logger.debug("Inside Azure Content-Safety Pre-Call Hook")
|
||||
try:
|
||||
if call_type == "completion" and "messages" in data:
|
||||
for m in data["messages"]:
|
||||
if "content" in m and isinstance(m["content"], str):
|
||||
await self.test_violation(content=m["content"], source="input")
|
||||
|
||||
except HTTPException as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
|
||||
async def async_post_call_success_hook(
|
||||
self,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
response,
|
||||
):
|
||||
verbose_proxy_logger.debug("Inside Azure Content-Safety Post-Call Hook")
|
||||
if isinstance(response, litellm.ModelResponse) and isinstance(
|
||||
response.choices[0], litellm.utils.Choices
|
||||
):
|
||||
await self.test_violation(
|
||||
content=response.choices[0].message.content, source="output"
|
||||
)
|
||||
|
||||
# async def async_post_call_streaming_hook(
|
||||
# self,
|
||||
# user_api_key_dict: UserAPIKeyAuth,
|
||||
# response: str,
|
||||
# ):
|
||||
# verbose_proxy_logger.debug("Inside Azure Content-Safety Call-Stream Hook")
|
||||
# await self.test_violation(content=response, source="output")
|
|
@ -4,6 +4,12 @@ model_list:
|
|||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
- model_name: llama3
|
||||
litellm_params:
|
||||
model: groq/llama3-8b-8192
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
- model_name: "*"
|
||||
litellm_params:
|
||||
model: openai/*
|
||||
|
|
|
@ -2255,6 +2255,23 @@ class ProxyConfig:
|
|||
|
||||
batch_redis_obj = _PROXY_BatchRedisRequests()
|
||||
imported_list.append(batch_redis_obj)
|
||||
elif (
|
||||
isinstance(callback, str)
|
||||
and callback == "azure_content_safety"
|
||||
):
|
||||
from litellm.proxy.hooks.azure_content_safety import (
|
||||
_PROXY_AzureContentSafety,
|
||||
)
|
||||
|
||||
azure_content_safety_params = litellm_settings["azure_content_safety_params"]
|
||||
for k, v in azure_content_safety_params.items():
|
||||
if v is not None and isinstance(v, str) and v.startswith("os.environ/"):
|
||||
azure_content_safety_params[k] = litellm.get_secret(v)
|
||||
|
||||
azure_content_safety_obj = _PROXY_AzureContentSafety(
|
||||
**azure_content_safety_params,
|
||||
)
|
||||
imported_list.append(azure_content_safety_obj)
|
||||
else:
|
||||
imported_list.append(
|
||||
get_instance_fn(
|
||||
|
@ -3639,7 +3656,7 @@ async def chat_completion(
|
|||
### MODEL ALIAS MAPPING ###
|
||||
# check if model name in model alias map
|
||||
# get the actual model name
|
||||
if data["model"] in litellm.model_alias_map:
|
||||
if isinstance(data["model"], str) and data["model"] in litellm.model_alias_map:
|
||||
data["model"] = litellm.model_alias_map[data["model"]]
|
||||
|
||||
## LOGGING OBJECT ## - initialize logging object for logging success/failure events for call
|
||||
|
@ -3673,6 +3690,9 @@ async def chat_completion(
|
|||
# skip router if user passed their key
|
||||
if "api_key" in data:
|
||||
tasks.append(litellm.acompletion(**data))
|
||||
elif isinstance(data["model"], list) and llm_router is not None:
|
||||
_models = data.pop("model")
|
||||
tasks.append(llm_router.abatch_completion(models=_models, **data))
|
||||
elif "user_config" in data:
|
||||
# initialize a new router instance. make request using this Router
|
||||
router_config = data.pop("user_config")
|
||||
|
|
|
@ -48,6 +48,7 @@ from litellm.types.router import (
|
|||
AlertingConfig,
|
||||
)
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.llms.azure import get_azure_ad_token_from_oidc
|
||||
|
||||
|
||||
class Router:
|
||||
|
@ -605,6 +606,33 @@ class Router:
|
|||
self.fail_calls[model_name] += 1
|
||||
raise e
|
||||
|
||||
async def abatch_completion(
|
||||
self, models: List[str], messages: List[Dict[str, str]], **kwargs
|
||||
):
|
||||
|
||||
async def _async_completion_no_exceptions(
|
||||
model: str, messages: List[Dict[str, str]], **kwargs
|
||||
):
|
||||
"""
|
||||
Wrapper around self.async_completion that catches exceptions and returns them as a result
|
||||
"""
|
||||
try:
|
||||
return await self.acompletion(model=model, messages=messages, **kwargs)
|
||||
except Exception as e:
|
||||
return e
|
||||
|
||||
_tasks = []
|
||||
for model in models:
|
||||
# add each task but if the task fails
|
||||
_tasks.append(
|
||||
_async_completion_no_exceptions(
|
||||
model=model, messages=messages, **kwargs
|
||||
)
|
||||
)
|
||||
|
||||
response = await asyncio.gather(*_tasks)
|
||||
return response
|
||||
|
||||
def image_generation(self, prompt: str, model: str, **kwargs):
|
||||
try:
|
||||
kwargs["model"] = model
|
||||
|
@ -1480,26 +1508,29 @@ class Router:
|
|||
except Exception as e:
|
||||
original_exception = e
|
||||
"""
|
||||
- Check if available deployments - 'get_healthy_deployments() -> List`
|
||||
- if no, Check if available fallbacks - `is_fallback(model_group: str, exception) -> bool`
|
||||
- if no, back-off and retry up till num_retries - `_router_should_retry -> float`
|
||||
Retry Logic
|
||||
|
||||
"""
|
||||
### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR w/ fallbacks available / Bad Request Error
|
||||
if (
|
||||
isinstance(original_exception, litellm.ContextWindowExceededError)
|
||||
and context_window_fallbacks is not None
|
||||
) or (
|
||||
isinstance(original_exception, openai.RateLimitError)
|
||||
and fallbacks is not None
|
||||
):
|
||||
raise original_exception
|
||||
### RETRY
|
||||
_healthy_deployments = await self._async_get_healthy_deployments(
|
||||
model=kwargs.get("model"),
|
||||
)
|
||||
|
||||
_timeout = self._router_should_retry(
|
||||
# raises an exception if this error should not be retries
|
||||
self.should_retry_this_error(
|
||||
error=e,
|
||||
healthy_deployments=_healthy_deployments,
|
||||
context_window_fallbacks=context_window_fallbacks,
|
||||
)
|
||||
|
||||
# decides how long to sleep before retry
|
||||
_timeout = self._time_to_sleep_before_retry(
|
||||
e=original_exception,
|
||||
remaining_retries=num_retries,
|
||||
num_retries=num_retries,
|
||||
healthy_deployments=_healthy_deployments,
|
||||
)
|
||||
|
||||
# sleeps for the length of the timeout
|
||||
await asyncio.sleep(_timeout)
|
||||
|
||||
if (
|
||||
|
@ -1533,10 +1564,14 @@ class Router:
|
|||
## LOGGING
|
||||
kwargs = self.log_retry(kwargs=kwargs, e=e)
|
||||
remaining_retries = num_retries - current_attempt
|
||||
_timeout = self._router_should_retry(
|
||||
_healthy_deployments = await self._async_get_healthy_deployments(
|
||||
model=kwargs.get("model"),
|
||||
)
|
||||
_timeout = self._time_to_sleep_before_retry(
|
||||
e=original_exception,
|
||||
remaining_retries=remaining_retries,
|
||||
num_retries=num_retries,
|
||||
healthy_deployments=_healthy_deployments,
|
||||
)
|
||||
await asyncio.sleep(_timeout)
|
||||
try:
|
||||
|
@ -1545,6 +1580,40 @@ class Router:
|
|||
pass
|
||||
raise original_exception
|
||||
|
||||
def should_retry_this_error(
|
||||
self,
|
||||
error: Exception,
|
||||
healthy_deployments: Optional[List] = None,
|
||||
context_window_fallbacks: Optional[List] = None,
|
||||
):
|
||||
"""
|
||||
1. raise an exception for ContextWindowExceededError if context_window_fallbacks is not None
|
||||
|
||||
2. raise an exception for RateLimitError if
|
||||
- there are no fallbacks
|
||||
- there are no healthy deployments in the same model group
|
||||
"""
|
||||
|
||||
_num_healthy_deployments = 0
|
||||
if healthy_deployments is not None and isinstance(healthy_deployments, list):
|
||||
_num_healthy_deployments = len(healthy_deployments)
|
||||
|
||||
### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR w/ fallbacks available / Bad Request Error
|
||||
if (
|
||||
isinstance(error, litellm.ContextWindowExceededError)
|
||||
and context_window_fallbacks is None
|
||||
):
|
||||
raise error
|
||||
|
||||
# Error we should only retry if there are other deployments
|
||||
if isinstance(error, openai.RateLimitError) or isinstance(
|
||||
error, openai.AuthenticationError
|
||||
):
|
||||
if _num_healthy_deployments <= 0:
|
||||
raise error
|
||||
|
||||
return True
|
||||
|
||||
def function_with_fallbacks(self, *args, **kwargs):
|
||||
"""
|
||||
Try calling the function_with_retries
|
||||
|
@ -1633,12 +1702,27 @@ class Router:
|
|||
raise e
|
||||
raise original_exception
|
||||
|
||||
def _router_should_retry(
|
||||
self, e: Exception, remaining_retries: int, num_retries: int
|
||||
def _time_to_sleep_before_retry(
|
||||
self,
|
||||
e: Exception,
|
||||
remaining_retries: int,
|
||||
num_retries: int,
|
||||
healthy_deployments: Optional[List] = None,
|
||||
) -> Union[int, float]:
|
||||
"""
|
||||
Calculate back-off, then retry
|
||||
|
||||
It should instantly retry only when:
|
||||
1. there are healthy deployments in the same model group
|
||||
2. there are fallbacks for the completion call
|
||||
"""
|
||||
if (
|
||||
healthy_deployments is not None
|
||||
and isinstance(healthy_deployments, list)
|
||||
and len(healthy_deployments) > 0
|
||||
):
|
||||
return 0
|
||||
|
||||
if hasattr(e, "response") and hasattr(e.response, "headers"):
|
||||
timeout = litellm._calculate_retry_after(
|
||||
remaining_retries=remaining_retries,
|
||||
|
@ -1675,23 +1759,29 @@ class Router:
|
|||
except Exception as e:
|
||||
original_exception = e
|
||||
### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR
|
||||
if (
|
||||
isinstance(original_exception, litellm.ContextWindowExceededError)
|
||||
and context_window_fallbacks is not None
|
||||
) or (
|
||||
isinstance(original_exception, openai.RateLimitError)
|
||||
and fallbacks is not None
|
||||
):
|
||||
raise original_exception
|
||||
## LOGGING
|
||||
if num_retries > 0:
|
||||
kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
|
||||
### RETRY
|
||||
_timeout = self._router_should_retry(
|
||||
_healthy_deployments = self._get_healthy_deployments(
|
||||
model=kwargs.get("model"),
|
||||
)
|
||||
|
||||
# raises an exception if this error should not be retries
|
||||
self.should_retry_this_error(
|
||||
error=e,
|
||||
healthy_deployments=_healthy_deployments,
|
||||
context_window_fallbacks=context_window_fallbacks,
|
||||
)
|
||||
|
||||
# decides how long to sleep before retry
|
||||
_timeout = self._time_to_sleep_before_retry(
|
||||
e=original_exception,
|
||||
remaining_retries=num_retries,
|
||||
num_retries=num_retries,
|
||||
healthy_deployments=_healthy_deployments,
|
||||
)
|
||||
|
||||
## LOGGING
|
||||
if num_retries > 0:
|
||||
kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
|
||||
|
||||
time.sleep(_timeout)
|
||||
for current_attempt in range(num_retries):
|
||||
verbose_router_logger.debug(
|
||||
|
@ -1705,11 +1795,15 @@ class Router:
|
|||
except Exception as e:
|
||||
## LOGGING
|
||||
kwargs = self.log_retry(kwargs=kwargs, e=e)
|
||||
_healthy_deployments = self._get_healthy_deployments(
|
||||
model=kwargs.get("model"),
|
||||
)
|
||||
remaining_retries = num_retries - current_attempt
|
||||
_timeout = self._router_should_retry(
|
||||
_timeout = self._time_to_sleep_before_retry(
|
||||
e=e,
|
||||
remaining_retries=remaining_retries,
|
||||
num_retries=num_retries,
|
||||
healthy_deployments=_healthy_deployments,
|
||||
)
|
||||
time.sleep(_timeout)
|
||||
raise original_exception
|
||||
|
@ -1912,6 +2006,47 @@ class Router:
|
|||
verbose_router_logger.debug(f"retrieve cooldown models: {cooldown_models}")
|
||||
return cooldown_models
|
||||
|
||||
def _get_healthy_deployments(self, model: str):
|
||||
_all_deployments: list = []
|
||||
try:
|
||||
_, _all_deployments = self._common_checks_available_deployment( # type: ignore
|
||||
model=model,
|
||||
)
|
||||
if type(_all_deployments) == dict:
|
||||
return []
|
||||
except:
|
||||
pass
|
||||
|
||||
unhealthy_deployments = self._get_cooldown_deployments()
|
||||
healthy_deployments: list = []
|
||||
for deployment in _all_deployments:
|
||||
if deployment["model_info"]["id"] in unhealthy_deployments:
|
||||
continue
|
||||
else:
|
||||
healthy_deployments.append(deployment)
|
||||
|
||||
return healthy_deployments
|
||||
|
||||
async def _async_get_healthy_deployments(self, model: str):
|
||||
_all_deployments: list = []
|
||||
try:
|
||||
_, _all_deployments = self._common_checks_available_deployment( # type: ignore
|
||||
model=model,
|
||||
)
|
||||
if type(_all_deployments) == dict:
|
||||
return []
|
||||
except:
|
||||
pass
|
||||
|
||||
unhealthy_deployments = await self._async_get_cooldown_deployments()
|
||||
healthy_deployments: list = []
|
||||
for deployment in _all_deployments:
|
||||
if deployment["model_info"]["id"] in unhealthy_deployments:
|
||||
continue
|
||||
else:
|
||||
healthy_deployments.append(deployment)
|
||||
return healthy_deployments
|
||||
|
||||
def routing_strategy_pre_call_checks(self, deployment: dict):
|
||||
"""
|
||||
Mimics 'async_routing_strategy_pre_call_checks'
|
||||
|
@ -2120,6 +2255,10 @@ class Router:
|
|||
raise ValueError(
|
||||
f"api_base is required for Azure OpenAI. Set it on your config. Model - {model}"
|
||||
)
|
||||
azure_ad_token = litellm_params.get("azure_ad_token")
|
||||
if azure_ad_token is not None:
|
||||
if azure_ad_token.startswith("oidc/"):
|
||||
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
|
||||
if api_version is None:
|
||||
api_version = "2023-07-01-preview"
|
||||
|
||||
|
@ -2131,6 +2270,7 @@ class Router:
|
|||
cache_key = f"{model_id}_async_client"
|
||||
_client = openai.AsyncAzureOpenAI(
|
||||
api_key=api_key,
|
||||
azure_ad_token=azure_ad_token,
|
||||
base_url=api_base,
|
||||
api_version=api_version,
|
||||
timeout=timeout,
|
||||
|
@ -2155,6 +2295,7 @@ class Router:
|
|||
cache_key = f"{model_id}_client"
|
||||
_client = openai.AzureOpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
azure_ad_token=azure_ad_token,
|
||||
base_url=api_base,
|
||||
api_version=api_version,
|
||||
timeout=timeout,
|
||||
|
@ -2179,6 +2320,7 @@ class Router:
|
|||
cache_key = f"{model_id}_stream_async_client"
|
||||
_client = openai.AsyncAzureOpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
azure_ad_token=azure_ad_token,
|
||||
base_url=api_base,
|
||||
api_version=api_version,
|
||||
timeout=stream_timeout,
|
||||
|
@ -2203,6 +2345,7 @@ class Router:
|
|||
cache_key = f"{model_id}_stream_client"
|
||||
_client = openai.AzureOpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
azure_ad_token=azure_ad_token,
|
||||
base_url=api_base,
|
||||
api_version=api_version,
|
||||
timeout=stream_timeout,
|
||||
|
@ -2235,6 +2378,7 @@ class Router:
|
|||
"api_key": api_key,
|
||||
"azure_endpoint": api_base,
|
||||
"api_version": api_version,
|
||||
"azure_ad_token": azure_ad_token,
|
||||
}
|
||||
from litellm.llms.azure import select_azure_base_url_or_endpoint
|
||||
|
||||
|
@ -2334,7 +2478,7 @@ class Router:
|
|||
) # cache for 1 hr
|
||||
|
||||
else:
|
||||
_api_key = api_key
|
||||
_api_key = api_key # type: ignore
|
||||
if _api_key is not None and isinstance(_api_key, str):
|
||||
# only show first 5 chars of api_key
|
||||
_api_key = _api_key[:8] + "*" * 15
|
||||
|
@ -2562,23 +2706,25 @@ class Router:
|
|||
# init OpenAI, Azure clients
|
||||
self.set_client(model=deployment.to_json(exclude_none=True))
|
||||
|
||||
# set region (if azure model)
|
||||
_auto_infer_region = os.environ.get("AUTO_INFER_REGION", False)
|
||||
if _auto_infer_region == True or _auto_infer_region == "True":
|
||||
# set region (if azure model) ## PREVIEW FEATURE ##
|
||||
if litellm.enable_preview_features == True:
|
||||
print("Auto inferring region") # noqa
|
||||
"""
|
||||
Hiding behind a feature flag
|
||||
When there is a large amount of LLM deployments this makes startup times blow up
|
||||
"""
|
||||
try:
|
||||
if "azure" in deployment.litellm_params.model:
|
||||
if (
|
||||
"azure" in deployment.litellm_params.model
|
||||
and deployment.litellm_params.region_name is None
|
||||
):
|
||||
region = litellm.utils.get_model_region(
|
||||
litellm_params=deployment.litellm_params, mode=None
|
||||
)
|
||||
|
||||
deployment.litellm_params.region_name = region
|
||||
except Exception as e:
|
||||
verbose_router_logger.error(
|
||||
verbose_router_logger.debug(
|
||||
"Unable to get the region for azure model - {}, {}".format(
|
||||
deployment.litellm_params.model, str(e)
|
||||
)
|
||||
|
@ -2956,7 +3102,7 @@ class Router:
|
|||
):
|
||||
# check if in allowed_model_region
|
||||
if (
|
||||
_is_region_eu(model_region=_litellm_params["region_name"])
|
||||
_is_region_eu(litellm_params=LiteLLM_Params(**_litellm_params))
|
||||
== False
|
||||
):
|
||||
invalid_model_indices.append(idx)
|
||||
|
|
|
@ -312,7 +312,7 @@ async def test_langfuse_logging_metadata(langfuse_client):
|
|||
metadata["existing_trace_id"] = trace_id
|
||||
|
||||
langfuse_client.flush()
|
||||
await asyncio.sleep(2)
|
||||
await asyncio.sleep(10)
|
||||
|
||||
# Tests the metadata filtering and the override of the output to be the last generation
|
||||
for trace_id, generation_ids in trace_identifiers.items():
|
||||
|
@ -339,6 +339,13 @@ async def test_langfuse_logging_metadata(langfuse_client):
|
|||
for generation_id, generation in zip(generation_ids, generations):
|
||||
assert generation.id == generation_id
|
||||
assert generation.trace_id == trace_id
|
||||
print(
|
||||
"common keys in trace",
|
||||
set(generation.metadata.keys()).intersection(
|
||||
expected_filtered_metadata_keys
|
||||
),
|
||||
)
|
||||
|
||||
assert set(generation.metadata.keys()).isdisjoint(
|
||||
expected_filtered_metadata_keys
|
||||
)
|
||||
|
|
290
litellm/tests/test_azure_content_safety.py
Normal file
290
litellm/tests/test_azure_content_safety.py
Normal file
|
@ -0,0 +1,290 @@
|
|||
# What is this?
|
||||
## Unit test for azure content safety
|
||||
import sys, os, asyncio, time, random
|
||||
from datetime import datetime
|
||||
import traceback
|
||||
from dotenv import load_dotenv
|
||||
from fastapi import HTTPException
|
||||
|
||||
load_dotenv()
|
||||
import os
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import pytest
|
||||
import litellm
|
||||
from litellm import Router, mock_completion
|
||||
from litellm.proxy.utils import ProxyLogging
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.caching import DualCache
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip(reason="beta feature - local testing is failing")
|
||||
async def test_strict_input_filtering_01():
|
||||
"""
|
||||
- have a response with a filtered input
|
||||
- call the pre call hook
|
||||
"""
|
||||
from litellm.proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
|
||||
|
||||
azure_content_safety = _PROXY_AzureContentSafety(
|
||||
endpoint=os.getenv("AZURE_CONTENT_SAFETY_ENDPOINT"),
|
||||
api_key=os.getenv("AZURE_CONTENT_SAFETY_API_KEY"),
|
||||
thresholds={"Hate": 2},
|
||||
)
|
||||
|
||||
data = {
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are an helpfull assistant"},
|
||||
{"role": "user", "content": "Fuck yourself you stupid bitch"},
|
||||
]
|
||||
}
|
||||
|
||||
with pytest.raises(HTTPException) as exc_info:
|
||||
await azure_content_safety.async_pre_call_hook(
|
||||
user_api_key_dict=UserAPIKeyAuth(),
|
||||
cache=DualCache(),
|
||||
data=data,
|
||||
call_type="completion",
|
||||
)
|
||||
|
||||
assert exc_info.value.detail["source"] == "input"
|
||||
assert exc_info.value.detail["category"] == "Hate"
|
||||
assert exc_info.value.detail["severity"] == 2
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip(reason="beta feature - local testing is failing")
|
||||
async def test_strict_input_filtering_02():
|
||||
"""
|
||||
- have a response with a filtered input
|
||||
- call the pre call hook
|
||||
"""
|
||||
from litellm.proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
|
||||
|
||||
azure_content_safety = _PROXY_AzureContentSafety(
|
||||
endpoint=os.getenv("AZURE_CONTENT_SAFETY_ENDPOINT"),
|
||||
api_key=os.getenv("AZURE_CONTENT_SAFETY_API_KEY"),
|
||||
thresholds={"Hate": 2},
|
||||
)
|
||||
|
||||
data = {
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are an helpfull assistant"},
|
||||
{"role": "user", "content": "Hello how are you ?"},
|
||||
]
|
||||
}
|
||||
|
||||
await azure_content_safety.async_pre_call_hook(
|
||||
user_api_key_dict=UserAPIKeyAuth(),
|
||||
cache=DualCache(),
|
||||
data=data,
|
||||
call_type="completion",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip(reason="beta feature - local testing is failing")
|
||||
async def test_loose_input_filtering_01():
|
||||
"""
|
||||
- have a response with a filtered input
|
||||
- call the pre call hook
|
||||
"""
|
||||
from litellm.proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
|
||||
|
||||
azure_content_safety = _PROXY_AzureContentSafety(
|
||||
endpoint=os.getenv("AZURE_CONTENT_SAFETY_ENDPOINT"),
|
||||
api_key=os.getenv("AZURE_CONTENT_SAFETY_API_KEY"),
|
||||
thresholds={"Hate": 8},
|
||||
)
|
||||
|
||||
data = {
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are an helpfull assistant"},
|
||||
{"role": "user", "content": "Fuck yourself you stupid bitch"},
|
||||
]
|
||||
}
|
||||
|
||||
await azure_content_safety.async_pre_call_hook(
|
||||
user_api_key_dict=UserAPIKeyAuth(),
|
||||
cache=DualCache(),
|
||||
data=data,
|
||||
call_type="completion",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip(reason="beta feature - local testing is failing")
|
||||
async def test_loose_input_filtering_02():
|
||||
"""
|
||||
- have a response with a filtered input
|
||||
- call the pre call hook
|
||||
"""
|
||||
from litellm.proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
|
||||
|
||||
azure_content_safety = _PROXY_AzureContentSafety(
|
||||
endpoint=os.getenv("AZURE_CONTENT_SAFETY_ENDPOINT"),
|
||||
api_key=os.getenv("AZURE_CONTENT_SAFETY_API_KEY"),
|
||||
thresholds={"Hate": 8},
|
||||
)
|
||||
|
||||
data = {
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are an helpfull assistant"},
|
||||
{"role": "user", "content": "Hello how are you ?"},
|
||||
]
|
||||
}
|
||||
|
||||
await azure_content_safety.async_pre_call_hook(
|
||||
user_api_key_dict=UserAPIKeyAuth(),
|
||||
cache=DualCache(),
|
||||
data=data,
|
||||
call_type="completion",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip(reason="beta feature - local testing is failing")
|
||||
async def test_strict_output_filtering_01():
|
||||
"""
|
||||
- have a response with a filtered output
|
||||
- call the post call hook
|
||||
"""
|
||||
from litellm.proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
|
||||
|
||||
azure_content_safety = _PROXY_AzureContentSafety(
|
||||
endpoint=os.getenv("AZURE_CONTENT_SAFETY_ENDPOINT"),
|
||||
api_key=os.getenv("AZURE_CONTENT_SAFETY_API_KEY"),
|
||||
thresholds={"Hate": 2},
|
||||
)
|
||||
|
||||
response = mock_completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a song writer expert. You help users to write songs about any topic in any genre.",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Help me write a rap text song. Add some insults to make it more credible.",
|
||||
},
|
||||
],
|
||||
mock_response="I'm the king of the mic, you're just a fucking dick. Don't fuck with me your stupid bitch.",
|
||||
)
|
||||
|
||||
with pytest.raises(HTTPException) as exc_info:
|
||||
await azure_content_safety.async_post_call_success_hook(
|
||||
user_api_key_dict=UserAPIKeyAuth(), response=response
|
||||
)
|
||||
|
||||
assert exc_info.value.detail["source"] == "output"
|
||||
assert exc_info.value.detail["category"] == "Hate"
|
||||
assert exc_info.value.detail["severity"] == 2
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip(reason="beta feature - local testing is failing")
|
||||
async def test_strict_output_filtering_02():
|
||||
"""
|
||||
- have a response with a filtered output
|
||||
- call the post call hook
|
||||
"""
|
||||
from litellm.proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
|
||||
|
||||
azure_content_safety = _PROXY_AzureContentSafety(
|
||||
endpoint=os.getenv("AZURE_CONTENT_SAFETY_ENDPOINT"),
|
||||
api_key=os.getenv("AZURE_CONTENT_SAFETY_API_KEY"),
|
||||
thresholds={"Hate": 2},
|
||||
)
|
||||
|
||||
response = mock_completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a song writer expert. You help users to write songs about any topic in any genre.",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Help me write a rap text song. Add some insults to make it more credible.",
|
||||
},
|
||||
],
|
||||
mock_response="I'm unable to help with you with hate speech",
|
||||
)
|
||||
|
||||
await azure_content_safety.async_post_call_success_hook(
|
||||
user_api_key_dict=UserAPIKeyAuth(), response=response
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip(reason="beta feature - local testing is failing")
|
||||
async def test_loose_output_filtering_01():
|
||||
"""
|
||||
- have a response with a filtered output
|
||||
- call the post call hook
|
||||
"""
|
||||
from litellm.proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
|
||||
|
||||
azure_content_safety = _PROXY_AzureContentSafety(
|
||||
endpoint=os.getenv("AZURE_CONTENT_SAFETY_ENDPOINT"),
|
||||
api_key=os.getenv("AZURE_CONTENT_SAFETY_API_KEY"),
|
||||
thresholds={"Hate": 8},
|
||||
)
|
||||
|
||||
response = mock_completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a song writer expert. You help users to write songs about any topic in any genre.",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Help me write a rap text song. Add some insults to make it more credible.",
|
||||
},
|
||||
],
|
||||
mock_response="I'm the king of the mic, you're just a fucking dick. Don't fuck with me your stupid bitch.",
|
||||
)
|
||||
|
||||
await azure_content_safety.async_post_call_success_hook(
|
||||
user_api_key_dict=UserAPIKeyAuth(), response=response
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip(reason="beta feature - local testing is failing")
|
||||
async def test_loose_output_filtering_02():
|
||||
"""
|
||||
- have a response with a filtered output
|
||||
- call the post call hook
|
||||
"""
|
||||
from litellm.proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
|
||||
|
||||
azure_content_safety = _PROXY_AzureContentSafety(
|
||||
endpoint=os.getenv("AZURE_CONTENT_SAFETY_ENDPOINT"),
|
||||
api_key=os.getenv("AZURE_CONTENT_SAFETY_API_KEY"),
|
||||
thresholds={"Hate": 8},
|
||||
)
|
||||
|
||||
response = mock_completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a song writer expert. You help users to write songs about any topic in any genre.",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Help me write a rap text song. Add some insults to make it more credible.",
|
||||
},
|
||||
],
|
||||
mock_response="I'm unable to help with you with hate speech",
|
||||
)
|
||||
|
||||
await azure_content_safety.async_post_call_success_hook(
|
||||
user_api_key_dict=UserAPIKeyAuth(), response=response
|
||||
)
|
|
@ -206,6 +206,35 @@ def test_completion_bedrock_claude_sts_client_auth():
|
|||
|
||||
# test_completion_bedrock_claude_sts_client_auth()
|
||||
|
||||
@pytest.mark.skip(reason="We don't have Circle CI OIDC credentials as yet")
|
||||
def test_completion_bedrock_claude_sts_oidc_auth():
|
||||
print("\ncalling bedrock claude with oidc auth")
|
||||
import os
|
||||
|
||||
aws_web_identity_token = "oidc/circleci_v2/"
|
||||
aws_region_name = os.environ["AWS_REGION_NAME"]
|
||||
aws_role_name = os.environ["AWS_TEMP_ROLE_NAME"]
|
||||
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
|
||||
response = completion(
|
||||
model="bedrock/anthropic.claude-instant-v1",
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
temperature=0.1,
|
||||
aws_region_name=aws_region_name,
|
||||
aws_web_identity_token=aws_web_identity_token,
|
||||
aws_role_name=aws_role_name,
|
||||
aws_session_name="my-test-session",
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
except RateLimitError:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
def test_bedrock_extra_headers():
|
||||
try:
|
||||
|
|
103
litellm/tests/test_clarifai_completion.py
Normal file
103
litellm/tests/test_clarifai_completion.py
Normal file
|
@ -0,0 +1,103 @@
|
|||
import sys, os
|
||||
import traceback
|
||||
from dotenv import load_dotenv
|
||||
import asyncio, logging
|
||||
|
||||
load_dotenv()
|
||||
import os, io
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import pytest
|
||||
import litellm
|
||||
from litellm import (
|
||||
embedding,
|
||||
completion,
|
||||
acompletion,
|
||||
acreate,
|
||||
completion_cost,
|
||||
Timeout,
|
||||
ModelResponse,
|
||||
)
|
||||
from litellm import RateLimitError
|
||||
|
||||
# litellm.num_retries = 3
|
||||
litellm.cache = None
|
||||
litellm.success_callback = []
|
||||
user_message = "Write a short poem about the sky"
|
||||
messages = [{"content": user_message, "role": "user"}]
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_callbacks():
|
||||
print("\npytest fixture - resetting callbacks")
|
||||
litellm.success_callback = []
|
||||
litellm._async_success_callback = []
|
||||
litellm.failure_callback = []
|
||||
litellm.callbacks = []
|
||||
|
||||
|
||||
def test_completion_clarifai_claude_2_1():
|
||||
print("calling clarifai claude completion")
|
||||
import os
|
||||
|
||||
clarifai_pat = os.environ["CLARIFAI_API_KEY"]
|
||||
|
||||
try:
|
||||
response = completion(
|
||||
model="clarifai/anthropic.completion.claude-2_1",
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
temperature=0.1,
|
||||
)
|
||||
print(response)
|
||||
|
||||
except RateLimitError:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occured: {e}")
|
||||
|
||||
|
||||
def test_completion_clarifai_mistral_large():
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
response: ModelResponse = completion(
|
||||
model="clarifai/mistralai.completion.mistral-small",
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
temperature=0.78,
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
assert len(response.choices) > 0
|
||||
assert len(response.choices[0].message.content) > 0
|
||||
except RateLimitError:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
def test_async_completion_clarifai():
|
||||
import asyncio
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
async def test_get_response():
|
||||
user_message = "Hello, how are you?"
|
||||
messages = [{"content": user_message, "role": "user"}]
|
||||
try:
|
||||
response = await acompletion(
|
||||
model="clarifai/openai.chat-completion.GPT-4",
|
||||
messages=messages,
|
||||
timeout=10,
|
||||
api_key=os.getenv("CLARIFAI_API_KEY"),
|
||||
)
|
||||
print(f"response: {response}")
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred: {e}")
|
||||
|
||||
asyncio.run(test_get_response())
|
|
@ -1305,7 +1305,7 @@ def test_hf_classifier_task():
|
|||
|
||||
########################### End of Hugging Face Tests ##############################################
|
||||
# def test_completion_hf_api():
|
||||
# # failing on circle ci commenting out
|
||||
# # failing on circle-ci commenting out
|
||||
# try:
|
||||
# user_message = "write some code to find the sum of two numbers"
|
||||
# messages = [{ "content": user_message,"role": "user"}]
|
||||
|
@ -3300,6 +3300,25 @@ def test_completion_watsonx():
|
|||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
def test_completion_stream_watsonx():
|
||||
litellm.set_verbose = True
|
||||
model_name = "watsonx/ibm/granite-13b-chat-v2"
|
||||
try:
|
||||
response = completion(
|
||||
model=model_name,
|
||||
messages=messages,
|
||||
stop=["stop"],
|
||||
max_tokens=20,
|
||||
stream=True,
|
||||
)
|
||||
for chunk in response:
|
||||
print(chunk)
|
||||
except litellm.APIError as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"provider, model, project, region_name, token",
|
||||
[
|
||||
|
@ -3364,6 +3383,26 @@ async def test_acompletion_watsonx():
|
|||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_acompletion_stream_watsonx():
|
||||
litellm.set_verbose = True
|
||||
model_name = "watsonx/ibm/granite-13b-chat-v2"
|
||||
print("testing watsonx")
|
||||
try:
|
||||
response = await litellm.acompletion(
|
||||
model=model_name,
|
||||
messages=messages,
|
||||
temperature=0.2,
|
||||
max_tokens=80,
|
||||
stream=True,
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
async for chunk in response:
|
||||
print(chunk)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# test_completion_palm_stream()
|
||||
|
||||
# test_completion_deep_infra()
|
||||
|
|
|
@ -11,7 +11,6 @@ litellm.failure_callback = ["lunary"]
|
|||
litellm.success_callback = ["lunary"]
|
||||
litellm.set_verbose = True
|
||||
|
||||
|
||||
def test_lunary_logging():
|
||||
try:
|
||||
response = completion(
|
||||
|
@ -59,9 +58,46 @@ def test_lunary_logging_with_metadata():
|
|||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
#test_lunary_logging_with_metadata()
|
||||
|
||||
# test_lunary_logging_with_metadata()
|
||||
def test_lunary_with_tools():
|
||||
|
||||
import litellm
|
||||
|
||||
messages = [{"role": "user", "content": "What's the weather like in San Francisco, Tokyo, and Paris?"}]
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
response = litellm.completion(
|
||||
model="gpt-3.5-turbo-1106",
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
tool_choice="auto", # auto is default, but we'll be explicit
|
||||
)
|
||||
|
||||
response_message = response.choices[0].message
|
||||
print("\nLLM Response:\n", response.choices[0].message)
|
||||
|
||||
|
||||
#test_lunary_with_tools()
|
||||
|
||||
def test_lunary_logging_with_streaming_and_metadata():
|
||||
try:
|
||||
|
|
|
@ -109,7 +109,18 @@ def mock_patch_aimage_generation():
|
|||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def client_no_auth():
|
||||
def fake_env_vars(monkeypatch):
|
||||
# Set some fake environment variables
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "fake_openai_api_key")
|
||||
monkeypatch.setenv("OPENAI_API_BASE", "http://fake-openai-api-base")
|
||||
monkeypatch.setenv("AZURE_API_BASE", "http://fake-azure-api-base")
|
||||
monkeypatch.setenv("AZURE_OPENAI_API_KEY", "fake_azure_openai_api_key")
|
||||
monkeypatch.setenv("AZURE_SWEDEN_API_BASE", "http://fake-azure-sweden-api-base")
|
||||
monkeypatch.setenv("REDIS_HOST", "localhost")
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def client_no_auth(fake_env_vars):
|
||||
# Assuming litellm.proxy.proxy_server is an object
|
||||
from litellm.proxy.proxy_server import cleanup_router_config_variables
|
||||
|
||||
|
@ -495,7 +506,18 @@ def test_chat_completion_optional_params(mock_acompletion, client_no_auth):
|
|||
from litellm.proxy.proxy_server import ProxyConfig
|
||||
|
||||
|
||||
def test_load_router_config():
|
||||
@mock.patch("litellm.proxy.proxy_server.litellm.Cache")
|
||||
def test_load_router_config(mock_cache, fake_env_vars):
|
||||
mock_cache.return_value.cache.__dict__ = {"redis_client": None}
|
||||
mock_cache.return_value.supported_call_types = [
|
||||
"completion",
|
||||
"acompletion",
|
||||
"embedding",
|
||||
"aembedding",
|
||||
"atranscription",
|
||||
"transcription",
|
||||
]
|
||||
|
||||
try:
|
||||
import asyncio
|
||||
|
||||
|
@ -557,6 +579,10 @@ def test_load_router_config():
|
|||
litellm.disable_cache()
|
||||
|
||||
print("testing reading proxy config for cache with params")
|
||||
mock_cache.return_value.supported_call_types = [
|
||||
"embedding",
|
||||
"aembedding",
|
||||
]
|
||||
asyncio.run(
|
||||
proxy_config.load_config(
|
||||
router=None,
|
||||
|
|
|
@ -687,6 +687,55 @@ def test_router_context_window_check_pre_call_check_out_group():
|
|||
pytest.fail(f"Got unexpected exception on router! - {str(e)}")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("allowed_model_region", ["eu", None])
|
||||
def test_router_region_pre_call_check(allowed_model_region):
|
||||
"""
|
||||
If region based routing set
|
||||
- check if only model in allowed region is allowed by '_pre_call_checks'
|
||||
"""
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"base_model": "azure/gpt-35-turbo",
|
||||
"region_name": "eu",
|
||||
},
|
||||
"model_info": {"id": "1"},
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo-large", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "gpt-3.5-turbo-1106",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
"model_info": {"id": "2"},
|
||||
},
|
||||
]
|
||||
|
||||
router = Router(model_list=model_list, enable_pre_call_checks=True)
|
||||
|
||||
_healthy_deployments = router._pre_call_checks(
|
||||
model="gpt-3.5-turbo",
|
||||
healthy_deployments=model_list,
|
||||
messages=[{"role": "user", "content": "Hey!"}],
|
||||
allowed_model_region=allowed_model_region,
|
||||
)
|
||||
|
||||
if allowed_model_region is None:
|
||||
assert len(_healthy_deployments) == 2
|
||||
else:
|
||||
assert len(_healthy_deployments) == 1, "No models selected as healthy"
|
||||
assert (
|
||||
_healthy_deployments[0]["model_info"]["id"] == "1"
|
||||
), "Incorrect model id picked. Got id={}, expected id=1".format(
|
||||
_healthy_deployments[0]["model_info"]["id"]
|
||||
)
|
||||
|
||||
|
||||
### FUNCTION CALLING
|
||||
|
||||
|
||||
|
|
60
litellm/tests/test_router_batch_completion.py
Normal file
60
litellm/tests/test_router_batch_completion.py
Normal file
|
@ -0,0 +1,60 @@
|
|||
#### What this tests ####
|
||||
# This tests litellm router with batch completion
|
||||
|
||||
import sys, os, time, openai
|
||||
import traceback, asyncio
|
||||
import pytest
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import litellm
|
||||
from litellm import Router
|
||||
from litellm.router import Deployment, LiteLLM_Params, ModelInfo
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from collections import defaultdict
|
||||
from dotenv import load_dotenv
|
||||
import os, httpx
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_batch_completion_multiple_models():
|
||||
litellm.set_verbose = True
|
||||
|
||||
router = litellm.Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "gpt-3.5-turbo",
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "groq-llama",
|
||||
"litellm_params": {
|
||||
"model": "groq/llama3-8b-8192",
|
||||
},
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
response = await router.abatch_completion(
|
||||
models=["gpt-3.5-turbo", "groq-llama"],
|
||||
messages=[
|
||||
{"role": "user", "content": "is litellm becoming a better product ?"}
|
||||
],
|
||||
max_tokens=15,
|
||||
)
|
||||
|
||||
print(response)
|
||||
assert len(response) == 2
|
||||
|
||||
models_in_responses = []
|
||||
for individual_response in response:
|
||||
_model = individual_response["model"]
|
||||
models_in_responses.append(_model)
|
||||
|
||||
# assert both models are different
|
||||
assert models_in_responses[0] != models_in_responses[1]
|
|
@ -83,7 +83,6 @@ def test_async_fallbacks(caplog):
|
|||
# - error request, falling back notice, success notice
|
||||
expected_logs = [
|
||||
"litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}} \nModel: gpt-3.5-turbo\nAPI Base: https://api.openai.com\nMessages: [{'content': 'Hello, how are you?', 'role': 'user'}]\nmodel_group: gpt-3.5-turbo\n\ndeployment: gpt-3.5-turbo\n\x1b[0m",
|
||||
"litellm.acompletion(model=None)\x1b[31m Exception No deployments available for selected model, passed model=gpt-3.5-turbo\x1b[0m",
|
||||
"Falling back to model_group = azure/gpt-3.5-turbo",
|
||||
"litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m",
|
||||
]
|
||||
|
|
|
@ -269,7 +269,7 @@ def test_sync_fallbacks_embeddings():
|
|||
response = router.embedding(**kwargs)
|
||||
print(f"customHandler.previous_models: {customHandler.previous_models}")
|
||||
time.sleep(0.05) # allow a delay as success_callbacks are on a separate thread
|
||||
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
|
||||
assert customHandler.previous_models == 1 # 1 init call, 2 retries, 1 fallback
|
||||
router.reset()
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
|
@ -323,7 +323,7 @@ async def test_async_fallbacks_embeddings():
|
|||
await asyncio.sleep(
|
||||
0.05
|
||||
) # allow a delay as success_callbacks are on a separate thread
|
||||
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
|
||||
assert customHandler.previous_models == 1 # 1 init call with a bad key
|
||||
router.reset()
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
|
|
|
@ -12,6 +12,7 @@ sys.path.insert(
|
|||
import litellm
|
||||
from litellm import Router
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
import openai, httpx
|
||||
|
||||
|
||||
class MyCustomHandler(CustomLogger):
|
||||
|
@ -191,8 +192,8 @@ async def test_dynamic_router_retry_policy(model_group):
|
|||
from litellm.router import RetryPolicy
|
||||
|
||||
model_group_retry_policy = {
|
||||
"gpt-3.5-turbo": RetryPolicy(ContentPolicyViolationErrorRetries=0),
|
||||
"bad-model": RetryPolicy(AuthenticationErrorRetries=4),
|
||||
"gpt-3.5-turbo": RetryPolicy(ContentPolicyViolationErrorRetries=2),
|
||||
"bad-model": RetryPolicy(AuthenticationErrorRetries=0),
|
||||
}
|
||||
|
||||
router = litellm.Router(
|
||||
|
@ -205,6 +206,33 @@ async def test_dynamic_router_retry_policy(model_group):
|
|||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
},
|
||||
"model_info": {
|
||||
"id": "model-0",
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
},
|
||||
"model_info": {
|
||||
"id": "model-1",
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
},
|
||||
"model_info": {
|
||||
"id": "model-2",
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "bad-model", # openai model name
|
||||
|
@ -240,6 +268,264 @@ async def test_dynamic_router_retry_policy(model_group):
|
|||
print("customHandler.previous_models: ", customHandler.previous_models)
|
||||
|
||||
if model_group == "bad-model":
|
||||
assert customHandler.previous_models == 4
|
||||
elif model_group == "gpt-3.5-turbo":
|
||||
assert customHandler.previous_models == 0
|
||||
elif model_group == "gpt-3.5-turbo":
|
||||
assert customHandler.previous_models == 2
|
||||
|
||||
|
||||
"""
|
||||
Unit Tests for Router Retry Logic
|
||||
|
||||
Test 1. Retry Rate Limit Errors when there are other healthy deployments
|
||||
|
||||
Test 2. Do not retry rate limit errors when - there are no fallbacks and no healthy deployments
|
||||
|
||||
"""
|
||||
|
||||
rate_limit_error = openai.RateLimitError(
|
||||
message="Rate limit exceeded",
|
||||
response=httpx.Response(
|
||||
status_code=429,
|
||||
request=httpx.Request(method="POST", url="https://api.openai.com/v1"),
|
||||
),
|
||||
body={
|
||||
"error": {
|
||||
"type": "rate_limit_exceeded",
|
||||
"param": None,
|
||||
"code": "rate_limit_exceeded",
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def test_retry_rate_limit_error_with_healthy_deployments():
|
||||
"""
|
||||
Test 1. It SHOULD retry when there is a rate limit error and len(healthy_deployments) > 0
|
||||
"""
|
||||
healthy_deployments = [
|
||||
"deployment1",
|
||||
"deployment2",
|
||||
] # multiple healthy deployments mocked up
|
||||
|
||||
router = litellm.Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
},
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
# Act & Assert
|
||||
try:
|
||||
response = router.should_retry_this_error(
|
||||
error=rate_limit_error, healthy_deployments=healthy_deployments
|
||||
)
|
||||
print("response from should_retry_this_error: ", response)
|
||||
except Exception as e:
|
||||
pytest.fail(
|
||||
"Should not have raised an error, since there are healthy deployments. Raises",
|
||||
e,
|
||||
)
|
||||
|
||||
|
||||
def test_do_not_retry_rate_limit_error_with_no_fallbacks_and_no_healthy_deployments():
|
||||
"""
|
||||
Test 2. It SHOULD NOT Retry, when healthy_deployments is [] and fallbacks is None
|
||||
"""
|
||||
healthy_deployments = []
|
||||
|
||||
router = litellm.Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
},
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
# Act & Assert
|
||||
try:
|
||||
response = router.should_retry_this_error(
|
||||
error=rate_limit_error, healthy_deployments=healthy_deployments
|
||||
)
|
||||
assert response != True, "Should have raised RateLimitError"
|
||||
except openai.RateLimitError:
|
||||
pass
|
||||
|
||||
|
||||
def test_raise_context_window_exceeded_error():
|
||||
"""
|
||||
Retry Context Window Exceeded Error, when context_window_fallbacks is not None
|
||||
"""
|
||||
context_window_error = litellm.ContextWindowExceededError(
|
||||
message="Context window exceeded",
|
||||
response=httpx.Response(
|
||||
status_code=400,
|
||||
request=httpx.Request(method="POST", url="https://api.openai.com/v1"),
|
||||
),
|
||||
llm_provider="azure",
|
||||
model="gpt-3.5-turbo",
|
||||
)
|
||||
context_window_fallbacks = [{"gpt-3.5-turbo": ["azure/chatgpt-v-2"]}]
|
||||
|
||||
router = litellm.Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
},
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
response = router.should_retry_this_error(
|
||||
error=context_window_error,
|
||||
healthy_deployments=None,
|
||||
context_window_fallbacks=context_window_fallbacks,
|
||||
)
|
||||
assert (
|
||||
response == True
|
||||
), "Should not have raised exception since we have context window fallbacks"
|
||||
|
||||
|
||||
def test_raise_context_window_exceeded_error_no_retry():
|
||||
"""
|
||||
Do not Retry Context Window Exceeded Error, when context_window_fallbacks is None
|
||||
"""
|
||||
context_window_error = litellm.ContextWindowExceededError(
|
||||
message="Context window exceeded",
|
||||
response=httpx.Response(
|
||||
status_code=400,
|
||||
request=httpx.Request(method="POST", url="https://api.openai.com/v1"),
|
||||
),
|
||||
llm_provider="azure",
|
||||
model="gpt-3.5-turbo",
|
||||
)
|
||||
context_window_fallbacks = None
|
||||
|
||||
router = litellm.Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
},
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
try:
|
||||
response = router.should_retry_this_error(
|
||||
error=context_window_error,
|
||||
healthy_deployments=None,
|
||||
context_window_fallbacks=context_window_fallbacks,
|
||||
)
|
||||
assert (
|
||||
response != True
|
||||
), "Should have raised exception since we do not have context window fallbacks"
|
||||
except litellm.ContextWindowExceededError:
|
||||
pass
|
||||
|
||||
|
||||
## Unit test time to back off for router retries
|
||||
|
||||
"""
|
||||
1. Timeout is 0.0 when RateLimit Error and healthy deployments are > 0
|
||||
2. Timeout is 0.0 when RateLimit Error and fallbacks are > 0
|
||||
3. Timeout is > 0.0 when RateLimit Error and healthy deployments == 0 and fallbacks == None
|
||||
"""
|
||||
|
||||
|
||||
def test_timeout_for_rate_limit_error_with_healthy_deployments():
|
||||
"""
|
||||
Test 1. Timeout is 0.0 when RateLimit Error and healthy deployments are > 0
|
||||
"""
|
||||
healthy_deployments = [
|
||||
"deployment1",
|
||||
"deployment2",
|
||||
] # multiple healthy deployments mocked up
|
||||
fallbacks = None
|
||||
|
||||
router = litellm.Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
},
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
_timeout = router._time_to_sleep_before_retry(
|
||||
e=rate_limit_error,
|
||||
remaining_retries=4,
|
||||
num_retries=4,
|
||||
healthy_deployments=healthy_deployments,
|
||||
)
|
||||
|
||||
print(
|
||||
"timeout=",
|
||||
_timeout,
|
||||
"error is rate_limit_error and there are healthy deployments=",
|
||||
healthy_deployments,
|
||||
)
|
||||
|
||||
assert _timeout == 0.0
|
||||
|
||||
|
||||
def test_timeout_for_rate_limit_error_with_no_healthy_deployments():
|
||||
"""
|
||||
Test 2. Timeout is > 0.0 when RateLimit Error and healthy deployments == 0
|
||||
"""
|
||||
healthy_deployments = []
|
||||
|
||||
router = litellm.Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
},
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
_timeout = router._time_to_sleep_before_retry(
|
||||
e=rate_limit_error,
|
||||
remaining_retries=4,
|
||||
num_retries=4,
|
||||
healthy_deployments=healthy_deployments,
|
||||
)
|
||||
|
||||
print(
|
||||
"timeout=",
|
||||
_timeout,
|
||||
"error is rate_limit_error and there are no healthy deployments",
|
||||
)
|
||||
|
||||
assert _timeout > 0.0
|
||||
|
|
|
@ -23,3 +23,50 @@ def test_aws_secret_manager():
|
|||
print(f"secret_val: {secret_val}")
|
||||
|
||||
assert secret_val == "sk-1234"
|
||||
|
||||
|
||||
def redact_oidc_signature(secret_val):
|
||||
# remove the last part of `.` and replace it with "SIGNATURE_REMOVED"
|
||||
return secret_val.split(".")[:-1] + ["SIGNATURE_REMOVED"]
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("K_SERVICE") is None,
|
||||
reason="Cannot run without being in GCP Cloud Run",
|
||||
)
|
||||
def test_oidc_google():
|
||||
secret_val = get_secret(
|
||||
"oidc/google/https://bedrock-runtime.us-east-1.amazonaws.com/model/amazon.titan-text-express-v1/invoke"
|
||||
)
|
||||
|
||||
print(f"secret_val: {redact_oidc_signature(secret_val)}")
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("ACTIONS_ID_TOKEN_REQUEST_TOKEN") is None,
|
||||
reason="Cannot run without being in GitHub Actions",
|
||||
)
|
||||
def test_oidc_github():
|
||||
secret_val = get_secret(
|
||||
"oidc/github/https://bedrock-runtime.us-east-1.amazonaws.com/model/amazon.titan-text-express-v1/invoke"
|
||||
)
|
||||
|
||||
print(f"secret_val: {redact_oidc_signature(secret_val)}")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Cannot run without being in a CircleCI Runner")
|
||||
def test_oidc_circleci():
|
||||
secret_val = get_secret(
|
||||
"oidc/circleci/https://bedrock-runtime.us-east-1.amazonaws.com/model/amazon.titan-text-express-v1/invoke"
|
||||
)
|
||||
|
||||
print(f"secret_val: {redact_oidc_signature(secret_val)}")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Cannot run without being in a CircleCI Runner")
|
||||
def test_oidc_circleci_v2():
|
||||
secret_val = get_secret(
|
||||
"oidc/circleci_v2/https://bedrock-runtime.us-east-1.amazonaws.com/model/amazon.titan-text-express-v1/invoke"
|
||||
)
|
||||
|
||||
print(f"secret_val: {redact_oidc_signature(secret_val)}")
|
||||
|
|
|
@ -456,8 +456,7 @@ def test_completion_claude_stream():
|
|||
print(f"completion_response: {complete_response}")
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
|
||||
# test_completion_claude_stream()
|
||||
def test_completion_claude_2_stream():
|
||||
litellm.set_verbose = True
|
||||
|
|
|
@ -132,6 +132,8 @@ class GenericLiteLLMParams(BaseModel):
|
|||
aws_access_key_id: Optional[str] = None
|
||||
aws_secret_access_key: Optional[str] = None
|
||||
aws_region_name: Optional[str] = None
|
||||
## IBM WATSONX ##
|
||||
watsonx_region_name: Optional[str] = None
|
||||
## CUSTOM PRICING ##
|
||||
input_cost_per_token: Optional[float] = None
|
||||
output_cost_per_token: Optional[float] = None
|
||||
|
@ -161,6 +163,8 @@ class GenericLiteLLMParams(BaseModel):
|
|||
aws_access_key_id: Optional[str] = None,
|
||||
aws_secret_access_key: Optional[str] = None,
|
||||
aws_region_name: Optional[str] = None,
|
||||
## IBM WATSONX ##
|
||||
watsonx_region_name: Optional[str] = None,
|
||||
input_cost_per_token: Optional[float] = None,
|
||||
output_cost_per_token: Optional[float] = None,
|
||||
input_cost_per_second: Optional[float] = None,
|
||||
|
|
283
litellm/utils.py
283
litellm/utils.py
|
@ -33,6 +33,10 @@ from dataclasses import (
|
|||
)
|
||||
|
||||
import litellm._service_logger # for storing API inputs, outputs, and metadata
|
||||
from litellm.llms.custom_httpx.http_handler import HTTPHandler
|
||||
from litellm.caching import DualCache
|
||||
|
||||
oidc_cache = DualCache()
|
||||
|
||||
try:
|
||||
# this works in python 3.8
|
||||
|
@ -107,7 +111,18 @@ try:
|
|||
except Exception as e:
|
||||
verbose_logger.debug(f"Exception import enterprise features {str(e)}")
|
||||
|
||||
from typing import cast, List, Dict, Union, Optional, Literal, Any, BinaryIO, Iterable
|
||||
from typing import (
|
||||
cast,
|
||||
List,
|
||||
Dict,
|
||||
Union,
|
||||
Optional,
|
||||
Literal,
|
||||
Any,
|
||||
BinaryIO,
|
||||
Iterable,
|
||||
Tuple,
|
||||
)
|
||||
from .caching import Cache
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
|
@ -2942,6 +2957,7 @@ def client(original_function):
|
|||
)
|
||||
else:
|
||||
return result
|
||||
|
||||
return result
|
||||
|
||||
# Prints Exactly what was passed to litellm function - don't execute any logic here - it should just print
|
||||
|
@ -3045,6 +3061,7 @@ def client(original_function):
|
|||
model_response_object=ModelResponse(),
|
||||
stream=kwargs.get("stream", False),
|
||||
)
|
||||
|
||||
if kwargs.get("stream", False) == True:
|
||||
cached_result = CustomStreamWrapper(
|
||||
completion_stream=cached_result,
|
||||
|
@ -5879,10 +5896,70 @@ def calculate_max_parallel_requests(
|
|||
return None
|
||||
|
||||
|
||||
def _is_region_eu(model_region: str) -> bool:
|
||||
EU_Regions = ["europe", "sweden", "switzerland", "france", "uk"]
|
||||
for region in EU_Regions:
|
||||
if "europe" in model_region.lower():
|
||||
def _get_model_region(
|
||||
custom_llm_provider: str, litellm_params: LiteLLM_Params
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Return the region for a model, for a given provider
|
||||
"""
|
||||
if custom_llm_provider == "vertex_ai":
|
||||
# check 'vertex_location'
|
||||
vertex_ai_location = (
|
||||
litellm_params.vertex_location
|
||||
or litellm.vertex_location
|
||||
or get_secret("VERTEXAI_LOCATION")
|
||||
or get_secret("VERTEX_LOCATION")
|
||||
)
|
||||
if vertex_ai_location is not None and isinstance(vertex_ai_location, str):
|
||||
return vertex_ai_location
|
||||
elif custom_llm_provider == "bedrock":
|
||||
aws_region_name = litellm_params.aws_region_name
|
||||
if aws_region_name is not None:
|
||||
return aws_region_name
|
||||
elif custom_llm_provider == "watsonx":
|
||||
watsonx_region_name = litellm_params.watsonx_region_name
|
||||
if watsonx_region_name is not None:
|
||||
return watsonx_region_name
|
||||
return litellm_params.region_name
|
||||
|
||||
|
||||
def _is_region_eu(litellm_params: LiteLLM_Params) -> bool:
|
||||
"""
|
||||
Return true/false if a deployment is in the EU
|
||||
"""
|
||||
if litellm_params.region_name == "eu":
|
||||
return True
|
||||
|
||||
## ELSE ##
|
||||
"""
|
||||
- get provider
|
||||
- get provider regions
|
||||
- return true if given region (get_provider_region) in eu region (config.get_eu_regions())
|
||||
"""
|
||||
model, custom_llm_provider, _, _ = litellm.get_llm_provider(
|
||||
model=litellm_params.model, litellm_params=litellm_params
|
||||
)
|
||||
|
||||
model_region = _get_model_region(
|
||||
custom_llm_provider=custom_llm_provider, litellm_params=litellm_params
|
||||
)
|
||||
|
||||
if model_region is None:
|
||||
return False
|
||||
|
||||
if custom_llm_provider == "azure":
|
||||
eu_regions = litellm.AzureOpenAIConfig().get_eu_regions()
|
||||
elif custom_llm_provider == "vertex_ai":
|
||||
eu_regions = litellm.VertexAIConfig().get_eu_regions()
|
||||
elif custom_llm_provider == "bedrock":
|
||||
eu_regions = litellm.AmazonBedrockGlobalConfig().get_eu_regions()
|
||||
elif custom_llm_provider == "watsonx":
|
||||
eu_regions = litellm.IBMWatsonXAIConfig().get_eu_regions()
|
||||
else:
|
||||
return False
|
||||
|
||||
for region in eu_regions:
|
||||
if region in model_region.lower():
|
||||
return True
|
||||
return False
|
||||
|
||||
|
@ -6308,8 +6385,23 @@ def get_llm_provider(
|
|||
custom_llm_provider: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
):
|
||||
litellm_params: Optional[LiteLLM_Params] = None,
|
||||
) -> Tuple[str, str, Optional[str], Optional[str]]:
|
||||
"""
|
||||
Returns the provider for a given model name - e.g. 'azure/chatgpt-v-2' -> 'azure'
|
||||
|
||||
For router -> Can also give the whole litellm param dict -> this function will extract the relevant details
|
||||
"""
|
||||
try:
|
||||
## IF LITELLM PARAMS GIVEN ##
|
||||
if litellm_params is not None:
|
||||
assert (
|
||||
custom_llm_provider is None and api_base is None and api_key is None
|
||||
), "Either pass in litellm_params or the custom_llm_provider/api_base/api_key. Otherwise, these values will be overriden."
|
||||
custom_llm_provider = litellm_params.custom_llm_provider
|
||||
api_base = litellm_params.api_base
|
||||
api_key = litellm_params.api_key
|
||||
|
||||
dynamic_api_key = None
|
||||
# check if llm provider provided
|
||||
# AZURE AI-Studio Logic - Azure AI Studio supports AZURE/Cohere
|
||||
|
@ -6370,7 +6462,8 @@ def get_llm_provider(
|
|||
api_base
|
||||
or get_secret("MISTRAL_AZURE_API_BASE") # for Azure AI Mistral
|
||||
or "https://api.mistral.ai/v1"
|
||||
)
|
||||
) # type: ignore
|
||||
|
||||
# if api_base does not end with /v1 we add it
|
||||
if api_base is not None and not api_base.endswith(
|
||||
"/v1"
|
||||
|
@ -6393,10 +6486,30 @@ def get_llm_provider(
|
|||
or get_secret("TOGETHERAI_API_KEY")
|
||||
or get_secret("TOGETHER_AI_TOKEN")
|
||||
)
|
||||
if api_base is not None and not isinstance(api_base, str):
|
||||
raise Exception(
|
||||
"api base needs to be a string. api_base={}".format(api_base)
|
||||
)
|
||||
if dynamic_api_key is not None and not isinstance(dynamic_api_key, str):
|
||||
raise Exception(
|
||||
"dynamic_api_key needs to be a string. dynamic_api_key={}".format(
|
||||
dynamic_api_key
|
||||
)
|
||||
)
|
||||
return model, custom_llm_provider, dynamic_api_key, api_base
|
||||
elif model.split("/", 1)[0] in litellm.provider_list:
|
||||
custom_llm_provider = model.split("/", 1)[0]
|
||||
model = model.split("/", 1)[1]
|
||||
if api_base is not None and not isinstance(api_base, str):
|
||||
raise Exception(
|
||||
"api base needs to be a string. api_base={}".format(api_base)
|
||||
)
|
||||
if dynamic_api_key is not None and not isinstance(dynamic_api_key, str):
|
||||
raise Exception(
|
||||
"dynamic_api_key needs to be a string. dynamic_api_key={}".format(
|
||||
dynamic_api_key
|
||||
)
|
||||
)
|
||||
return model, custom_llm_provider, dynamic_api_key, api_base
|
||||
# check if api base is a known openai compatible endpoint
|
||||
if api_base:
|
||||
|
@ -6420,7 +6533,22 @@ def get_llm_provider(
|
|||
elif endpoint == "api.deepseek.com/v1":
|
||||
custom_llm_provider = "deepseek"
|
||||
dynamic_api_key = get_secret("DEEPSEEK_API_KEY")
|
||||
return model, custom_llm_provider, dynamic_api_key, api_base
|
||||
|
||||
if api_base is not None and not isinstance(api_base, str):
|
||||
raise Exception(
|
||||
"api base needs to be a string. api_base={}".format(
|
||||
api_base
|
||||
)
|
||||
)
|
||||
if dynamic_api_key is not None and not isinstance(
|
||||
dynamic_api_key, str
|
||||
):
|
||||
raise Exception(
|
||||
"dynamic_api_key needs to be a string. dynamic_api_key={}".format(
|
||||
dynamic_api_key
|
||||
)
|
||||
)
|
||||
return model, custom_llm_provider, dynamic_api_key, api_base # type: ignore
|
||||
|
||||
# check if model in known model provider list -> for huggingface models, raise exception as they don't have a fixed provider (can be togetherai, anyscale, baseten, runpod, et.)
|
||||
## openai - chatcompletion + text completion
|
||||
|
@ -6511,6 +6639,16 @@ def get_llm_provider(
|
|||
),
|
||||
llm_provider="",
|
||||
)
|
||||
if api_base is not None and not isinstance(api_base, str):
|
||||
raise Exception(
|
||||
"api base needs to be a string. api_base={}".format(api_base)
|
||||
)
|
||||
if dynamic_api_key is not None and not isinstance(dynamic_api_key, str):
|
||||
raise Exception(
|
||||
"dynamic_api_key needs to be a string. dynamic_api_key={}".format(
|
||||
dynamic_api_key
|
||||
)
|
||||
)
|
||||
return model, custom_llm_provider, dynamic_api_key, api_base
|
||||
except Exception as e:
|
||||
if isinstance(e, litellm.exceptions.BadRequestError):
|
||||
|
@ -8081,10 +8219,7 @@ def exception_type(
|
|||
+ "Exception"
|
||||
)
|
||||
|
||||
if (
|
||||
"This model's maximum context length is" in error_str
|
||||
or "Request too large" in error_str
|
||||
):
|
||||
if "This model's maximum context length is" in error_str:
|
||||
exception_mapping_worked = True
|
||||
raise ContextWindowExceededError(
|
||||
message=f"{exception_provider} - {message} {extra_information}",
|
||||
|
@ -8125,6 +8260,13 @@ def exception_type(
|
|||
model=model,
|
||||
response=original_exception.response,
|
||||
)
|
||||
elif "Request too large" in error_str:
|
||||
raise RateLimitError(
|
||||
message=f"{exception_provider} - {message} {extra_information}",
|
||||
model=model,
|
||||
llm_provider=custom_llm_provider,
|
||||
response=original_exception.response,
|
||||
)
|
||||
elif (
|
||||
"The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable"
|
||||
in error_str
|
||||
|
@ -9410,6 +9552,75 @@ def get_secret(
|
|||
if secret_name.startswith("os.environ/"):
|
||||
secret_name = secret_name.replace("os.environ/", "")
|
||||
|
||||
# Example: oidc/google/https://bedrock-runtime.us-east-1.amazonaws.com/model/stability.stable-diffusion-xl-v1/invoke
|
||||
if secret_name.startswith("oidc/"):
|
||||
secret_name_split = secret_name.replace("oidc/", "")
|
||||
oidc_provider, oidc_aud = secret_name_split.split("/", 1)
|
||||
# TODO: Add caching for HTTP requests
|
||||
if oidc_provider == "google":
|
||||
oidc_token = oidc_cache.get_cache(key=secret_name)
|
||||
if oidc_token is not None:
|
||||
return oidc_token
|
||||
|
||||
oidc_client = HTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
|
||||
# https://cloud.google.com/compute/docs/instances/verifying-instance-identity#request_signature
|
||||
response = oidc_client.get(
|
||||
"http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/identity",
|
||||
params={"audience": oidc_aud},
|
||||
headers={"Metadata-Flavor": "Google"},
|
||||
)
|
||||
if response.status_code == 200:
|
||||
oidc_token = response.text
|
||||
oidc_cache.set_cache(key=secret_name, value=oidc_token, ttl=3600 - 60)
|
||||
return oidc_token
|
||||
else:
|
||||
raise ValueError("Google OIDC provider failed")
|
||||
elif oidc_provider == "circleci":
|
||||
# https://circleci.com/docs/openid-connect-tokens/
|
||||
env_secret = os.getenv("CIRCLE_OIDC_TOKEN")
|
||||
if env_secret is None:
|
||||
raise ValueError("CIRCLE_OIDC_TOKEN not found in environment")
|
||||
return env_secret
|
||||
elif oidc_provider == "circleci_v2":
|
||||
# https://circleci.com/docs/openid-connect-tokens/
|
||||
env_secret = os.getenv("CIRCLE_OIDC_TOKEN_V2")
|
||||
if env_secret is None:
|
||||
raise ValueError("CIRCLE_OIDC_TOKEN_V2 not found in environment")
|
||||
return env_secret
|
||||
elif oidc_provider == "github":
|
||||
# https://docs.github.com/en/actions/deployment/security-hardening-your-deployments/configuring-openid-connect-in-cloud-providers#using-custom-actions
|
||||
actions_id_token_request_url = os.getenv("ACTIONS_ID_TOKEN_REQUEST_URL")
|
||||
actions_id_token_request_token = os.getenv("ACTIONS_ID_TOKEN_REQUEST_TOKEN")
|
||||
if (
|
||||
actions_id_token_request_url is None
|
||||
or actions_id_token_request_token is None
|
||||
):
|
||||
raise ValueError(
|
||||
"ACTIONS_ID_TOKEN_REQUEST_URL or ACTIONS_ID_TOKEN_REQUEST_TOKEN not found in environment"
|
||||
)
|
||||
|
||||
oidc_token = oidc_cache.get_cache(key=secret_name)
|
||||
if oidc_token is not None:
|
||||
return oidc_token
|
||||
|
||||
oidc_client = HTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
|
||||
response = oidc_client.get(
|
||||
actions_id_token_request_url,
|
||||
params={"audience": oidc_aud},
|
||||
headers={
|
||||
"Authorization": f"Bearer {actions_id_token_request_token}",
|
||||
"Accept": "application/json; api-version=2.0",
|
||||
},
|
||||
)
|
||||
if response.status_code == 200:
|
||||
oidc_token = response.text["value"]
|
||||
oidc_cache.set_cache(key=secret_name, value=oidc_token, ttl=300 - 5)
|
||||
return oidc_token
|
||||
else:
|
||||
raise ValueError("Github OIDC provider failed")
|
||||
else:
|
||||
raise ValueError("Unsupported OIDC provider")
|
||||
|
||||
try:
|
||||
if litellm.secret_manager_client is not None:
|
||||
try:
|
||||
|
@ -10364,7 +10575,12 @@ class CustomStreamWrapper:
|
|||
response = chunk.replace("data: ", "").strip()
|
||||
parsed_response = json.loads(response)
|
||||
else:
|
||||
return {"text": "", "is_finished": False}
|
||||
return {
|
||||
"text": "",
|
||||
"is_finished": False,
|
||||
"prompt_tokens": 0,
|
||||
"completion_tokens": 0,
|
||||
}
|
||||
else:
|
||||
print_verbose(f"chunk: {chunk} (Type: {type(chunk)})")
|
||||
raise ValueError(
|
||||
|
@ -10379,13 +10595,47 @@ class CustomStreamWrapper:
|
|||
"text": text,
|
||||
"is_finished": is_finished,
|
||||
"finish_reason": finish_reason,
|
||||
"prompt_tokens": results[0].get("input_token_count", None),
|
||||
"completion_tokens": results[0].get("generated_token_count", None),
|
||||
"prompt_tokens": results[0].get("input_token_count", 0),
|
||||
"completion_tokens": results[0].get("generated_token_count", 0),
|
||||
}
|
||||
return {"text": "", "is_finished": False}
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def handle_clarifai_completion_chunk(self, chunk):
|
||||
try:
|
||||
if isinstance(chunk, dict):
|
||||
parsed_response = chunk
|
||||
if isinstance(chunk, (str, bytes)):
|
||||
if isinstance(chunk, bytes):
|
||||
parsed_response = chunk.decode("utf-8")
|
||||
else:
|
||||
parsed_response = chunk
|
||||
data_json = json.loads(parsed_response)
|
||||
text = (
|
||||
data_json.get("outputs", "")[0]
|
||||
.get("data", "")
|
||||
.get("text", "")
|
||||
.get("raw", "")
|
||||
)
|
||||
prompt_tokens = len(
|
||||
encoding.encode(
|
||||
data_json.get("outputs", "")[0]
|
||||
.get("input", "")
|
||||
.get("data", "")
|
||||
.get("text", "")
|
||||
.get("raw", "")
|
||||
)
|
||||
)
|
||||
completion_tokens = len(encoding.encode(text))
|
||||
return {
|
||||
"text": text,
|
||||
"is_finished": True,
|
||||
}
|
||||
except:
|
||||
traceback.print_exc()
|
||||
return ""
|
||||
|
||||
def model_response_creator(self):
|
||||
model_response = ModelResponse(
|
||||
stream=True, model=self.model, stream_options=self.stream_options
|
||||
|
@ -10431,6 +10681,9 @@ class CustomStreamWrapper:
|
|||
completion_obj["content"] = response_obj["text"]
|
||||
if response_obj["is_finished"]:
|
||||
self.received_finish_reason = response_obj["finish_reason"]
|
||||
elif self.custom_llm_provider and self.custom_llm_provider == "clarifai":
|
||||
response_obj = self.handle_clarifai_completion_chunk(chunk)
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
elif self.model == "replicate" or self.custom_llm_provider == "replicate":
|
||||
response_obj = self.handle_replicate_chunk(chunk)
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
|
|
|
@ -1571,6 +1571,135 @@
|
|||
"litellm_provider": "replicate",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/microsoft/wizardlm-2-8x22b:nitro": {
|
||||
"max_tokens": 65536,
|
||||
"input_cost_per_token": 0.000001,
|
||||
"output_cost_per_token": 0.000001,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/google/gemini-pro-1.5": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1000000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.0000025,
|
||||
"output_cost_per_token": 0.0000075,
|
||||
"input_cost_per_image": 0.00265,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"openrouter/mistralai/mixtral-8x22b-instruct": {
|
||||
"max_tokens": 65536,
|
||||
"input_cost_per_token": 0.00000065,
|
||||
"output_cost_per_token": 0.00000065,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/cohere/command-r-plus": {
|
||||
"max_tokens": 128000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/databricks/dbrx-instruct": {
|
||||
"max_tokens": 32768,
|
||||
"input_cost_per_token": 0.0000006,
|
||||
"output_cost_per_token": 0.0000006,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/anthropic/claude-3-haiku": {
|
||||
"max_tokens": 200000,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.00000125,
|
||||
"input_cost_per_image": 0.0004,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"openrouter/anthropic/claude-3-sonnet": {
|
||||
"max_tokens": 200000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"input_cost_per_image": 0.0048,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"openrouter/mistralai/mistral-large": {
|
||||
"max_tokens": 32000,
|
||||
"input_cost_per_token": 0.000008,
|
||||
"output_cost_per_token": 0.000024,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/cognitivecomputations/dolphin-mixtral-8x7b": {
|
||||
"max_tokens": 32769,
|
||||
"input_cost_per_token": 0.0000005,
|
||||
"output_cost_per_token": 0.0000005,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/google/gemini-pro-vision": {
|
||||
"max_tokens": 45875,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000375,
|
||||
"input_cost_per_image": 0.0025,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"openrouter/fireworks/firellava-13b": {
|
||||
"max_tokens": 4096,
|
||||
"input_cost_per_token": 0.0000002,
|
||||
"output_cost_per_token": 0.0000002,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/meta-llama/llama-3-8b-instruct:free": {
|
||||
"max_tokens": 8192,
|
||||
"input_cost_per_token": 0.0,
|
||||
"output_cost_per_token": 0.0,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/meta-llama/llama-3-8b-instruct:extended": {
|
||||
"max_tokens": 16384,
|
||||
"input_cost_per_token": 0.000000225,
|
||||
"output_cost_per_token": 0.00000225,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/meta-llama/llama-3-70b-instruct:nitro": {
|
||||
"max_tokens": 8192,
|
||||
"input_cost_per_token": 0.0000009,
|
||||
"output_cost_per_token": 0.0000009,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/meta-llama/llama-3-70b-instruct": {
|
||||
"max_tokens": 8192,
|
||||
"input_cost_per_token": 0.00000059,
|
||||
"output_cost_per_token": 0.00000079,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/openai/gpt-4-vision-preview": {
|
||||
"max_tokens": 130000,
|
||||
"input_cost_per_token": 0.00001,
|
||||
"output_cost_per_token": 0.00003,
|
||||
"input_cost_per_image": 0.01445,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"openrouter/openai/gpt-3.5-turbo": {
|
||||
"max_tokens": 4095,
|
||||
"input_cost_per_token": 0.0000015,
|
||||
|
@ -1621,14 +1750,14 @@
|
|||
"tool_use_system_prompt_tokens": 395
|
||||
},
|
||||
"openrouter/google/palm-2-chat-bison": {
|
||||
"max_tokens": 8000,
|
||||
"max_tokens": 25804,
|
||||
"input_cost_per_token": 0.0000005,
|
||||
"output_cost_per_token": 0.0000005,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/google/palm-2-codechat-bison": {
|
||||
"max_tokens": 8000,
|
||||
"max_tokens": 20070,
|
||||
"input_cost_per_token": 0.0000005,
|
||||
"output_cost_per_token": 0.0000005,
|
||||
"litellm_provider": "openrouter",
|
||||
|
@ -1711,13 +1840,6 @@
|
|||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"openrouter/meta-llama/llama-3-70b-instruct": {
|
||||
"max_tokens": 8192,
|
||||
"input_cost_per_token": 0.0000008,
|
||||
"output_cost_per_token": 0.0000008,
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
},
|
||||
"j2-ultra": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 8192,
|
||||
|
@ -3226,4 +3348,4 @@
|
|||
"mode": "embedding"
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -83,6 +83,7 @@ model_list:
|
|||
model: text-completion-openai/gpt-3.5-turbo-instruct
|
||||
litellm_settings:
|
||||
drop_params: True
|
||||
enable_preview_features: True
|
||||
# max_budget: 100
|
||||
# budget_duration: 30d
|
||||
num_retries: 5
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.37.4"
|
||||
version = "1.37.5"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.37.4"
|
||||
version = "1.37.5"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
|
@ -26,6 +26,8 @@ fastapi-sso==0.10.0 # admin UI, SSO
|
|||
pyjwt[crypto]==2.8.0
|
||||
python-multipart==0.0.9 # admin UI
|
||||
Pillow==10.3.0
|
||||
azure-ai-contentsafety==1.0.0 # for azure content safety
|
||||
azure-identity==1.15.0 # for azure content safety
|
||||
|
||||
### LITELLM PACKAGE DEPENDENCIES
|
||||
python-dotenv==1.0.0 # for env
|
||||
|
|
|
@ -4,6 +4,7 @@ import pytest
|
|||
import asyncio
|
||||
import aiohttp, openai
|
||||
from openai import OpenAI, AsyncOpenAI
|
||||
from typing import Optional, List, Union
|
||||
|
||||
|
||||
def response_header_check(response):
|
||||
|
@ -71,7 +72,7 @@ async def new_user(session):
|
|||
return await response.json()
|
||||
|
||||
|
||||
async def chat_completion(session, key, model="gpt-4"):
|
||||
async def chat_completion(session, key, model: Union[str, List] = "gpt-4"):
|
||||
url = "http://0.0.0.0:4000/chat/completions"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {key}",
|
||||
|
@ -409,3 +410,27 @@ async def test_openai_wildcard_chat_completion():
|
|||
|
||||
# call chat/completions with a model that the key was not created for + the model is not on the config.yaml
|
||||
await chat_completion(session=session, key=key, model="gpt-3.5-turbo-0125")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_batch_chat_completions():
|
||||
"""
|
||||
- Make chat completion call using
|
||||
|
||||
"""
|
||||
async with aiohttp.ClientSession() as session:
|
||||
|
||||
# call chat/completions with a model that the key was not created for + the model is not on the config.yaml
|
||||
response = await chat_completion(
|
||||
session=session,
|
||||
key="sk-1234",
|
||||
model=[
|
||||
"gpt-3.5-turbo",
|
||||
"fake-openai-endpoint",
|
||||
],
|
||||
)
|
||||
|
||||
print(f"response: {response}")
|
||||
|
||||
assert len(response) == 2
|
||||
assert isinstance(response, list)
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/a1602eb39f799143.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
||||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/f04e46b02318b660.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-5b257e1ab47d4b4a.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-5b257e1ab47d4b4a.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/a1602eb39f799143.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[25539,[\"936\",\"static/chunks/2f6dbc85-17d29013b8ff3da5.js\",\"566\",\"static/chunks/566-ccd699ab19124658.js\",\"931\",\"static/chunks/app/page-c804e862b63be987.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/a1602eb39f799143.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"K8KXTbmuI2ArWjjdMi2iq\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f04e46b02318b660.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[7926,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"884\",\"static/chunks/884-7576ee407a2ecbe6.js\",\"931\",\"static/chunks/app/page-e6190351ac8da62a.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f04e46b02318b660.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"84BZ5uERcn4DsO4_POsLl\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[25539,["936","static/chunks/2f6dbc85-17d29013b8ff3da5.js","566","static/chunks/566-ccd699ab19124658.js","931","static/chunks/app/page-c804e862b63be987.js"],""]
|
||||
3:I[7926,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","884","static/chunks/884-7576ee407a2ecbe6.js","931","static/chunks/app/page-e6190351ac8da62a.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["K8KXTbmuI2ArWjjdMi2iq",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/a1602eb39f799143.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["84BZ5uERcn4DsO4_POsLl",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f04e46b02318b660.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -152,7 +152,7 @@ const handleSubmit = async (formValues: Record<string, any>, accessToken: string
|
|||
litellmExtraParams = JSON.parse(value);
|
||||
}
|
||||
catch (error) {
|
||||
message.error("Failed to parse LiteLLM Extra Params: " + error, 20);
|
||||
message.error("Failed to parse LiteLLM Extra Params: " + error, 10);
|
||||
throw new Error("Failed to parse litellm_extra_params: " + error);
|
||||
}
|
||||
for (const [key, value] of Object.entries(litellmExtraParams)) {
|
||||
|
@ -188,7 +188,7 @@ const handleSubmit = async (formValues: Record<string, any>, accessToken: string
|
|||
|
||||
|
||||
} catch (error) {
|
||||
message.error("Failed to create model: " + error, 20);
|
||||
message.error("Failed to create model: " + error, 10);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -43,7 +43,7 @@ export const modelCreateCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error("Failed to create key: " + errorData, 20);
|
||||
message.error("Failed to create key: " + errorData, 10);
|
||||
console.error("Error response from the server:", errorData);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
@ -78,7 +78,7 @@ export const modelDeleteCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error("Failed to create key: " + errorData, 20);
|
||||
message.error("Failed to create key: " + errorData, 10);
|
||||
console.error("Error response from the server:", errorData);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
@ -120,7 +120,7 @@ export const keyCreateCall = async (
|
|||
try {
|
||||
formValues.metadata = JSON.parse(formValues.metadata);
|
||||
} catch (error) {
|
||||
message.error("Failed to parse metadata: " + error, 20);
|
||||
message.error("Failed to parse metadata: " + error, 10);
|
||||
throw new Error("Failed to parse metadata: " + error);
|
||||
}
|
||||
}
|
||||
|
@ -141,7 +141,7 @@ export const keyCreateCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error("Failed to create key: " + errorData, 20);
|
||||
message.error("Failed to create key: " + errorData, 10);
|
||||
console.error("Error response from the server:", errorData);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
@ -183,7 +183,7 @@ export const userCreateCall = async (
|
|||
try {
|
||||
formValues.metadata = JSON.parse(formValues.metadata);
|
||||
} catch (error) {
|
||||
message.error("Failed to parse metadata: " + error, 20);
|
||||
message.error("Failed to parse metadata: " + error, 10);
|
||||
throw new Error("Failed to parse metadata: " + error);
|
||||
}
|
||||
}
|
||||
|
@ -204,7 +204,7 @@ export const userCreateCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error("Failed to create key: " + errorData, 20);
|
||||
message.error("Failed to create key: " + errorData, 10);
|
||||
console.error("Error response from the server:", errorData);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
@ -237,7 +237,7 @@ export const keyDeleteCall = async (accessToken: String, user_key: String) => {
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error("Failed to delete key: " + errorData, 20);
|
||||
message.error("Failed to delete key: " + errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
|
@ -269,7 +269,7 @@ export const teamDeleteCall = async (accessToken: String, teamID: String) => {
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error("Failed to delete team: " + errorData, 20);
|
||||
message.error("Failed to delete team: " + errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
const data = await response.json();
|
||||
|
@ -314,7 +314,7 @@ export const userInfoCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 20);
|
||||
message.error(errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
|
@ -350,7 +350,7 @@ export const teamInfoCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 20);
|
||||
message.error(errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
|
@ -385,7 +385,7 @@ export const getTotalSpendCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 20);
|
||||
message.error(errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
|
@ -420,7 +420,7 @@ export const modelInfoCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 20);
|
||||
message.error(errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
|
@ -463,7 +463,7 @@ export const modelMetricsCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 20);
|
||||
message.error(errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
const data = await response.json();
|
||||
|
@ -506,7 +506,7 @@ export const modelMetricsSlowResponsesCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 20);
|
||||
message.error(errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
const data = await response.json();
|
||||
|
@ -547,7 +547,7 @@ export const modelExceptionsCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 20);
|
||||
message.error(errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
const data = await response.json();
|
||||
|
@ -583,7 +583,7 @@ export const modelAvailableCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 20);
|
||||
message.error(errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
|
@ -612,7 +612,7 @@ export const keySpendLogsCall = async (accessToken: String, token: String) => {
|
|||
});
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 20);
|
||||
message.error(errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
|
@ -641,7 +641,7 @@ export const teamSpendLogsCall = async (accessToken: String) => {
|
|||
});
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 20);
|
||||
message.error(errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
|
@ -670,7 +670,7 @@ export const tagsSpendLogsCall = async (accessToken: String) => {
|
|||
});
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 20);
|
||||
message.error(errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
|
@ -710,7 +710,7 @@ export const userSpendLogsCall = async (
|
|||
});
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 20);
|
||||
message.error(errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
|
@ -740,7 +740,7 @@ export const adminSpendLogsCall = async (accessToken: String) => {
|
|||
});
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 20);
|
||||
message.error(errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
|
@ -770,7 +770,7 @@ export const adminTopKeysCall = async (accessToken: String) => {
|
|||
});
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 20);
|
||||
message.error(errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
|
@ -825,7 +825,7 @@ export const adminTopEndUsersCall = async (
|
|||
const response = await fetch(url, requestOptions);
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 20);
|
||||
message.error(errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
|
@ -855,7 +855,7 @@ export const adminTopModelsCall = async (accessToken: String) => {
|
|||
});
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 20);
|
||||
message.error(errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
|
@ -886,7 +886,7 @@ export const keyInfoCall = async (accessToken: String, keys: String[]) => {
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 20);
|
||||
message.error(errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
|
@ -912,7 +912,7 @@ export const spendUsersCall = async (accessToken: String, userID: String) => {
|
|||
});
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 20);
|
||||
message.error(errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
|
@ -950,7 +950,7 @@ export const userRequestModelCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error("Failed to delete key: " + errorData, 20);
|
||||
message.error("Failed to delete key: " + errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
const data = await response.json();
|
||||
|
@ -980,7 +980,7 @@ export const userGetRequesedtModelsCall = async (accessToken: String) => {
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error("Failed to delete key: " + errorData, 20);
|
||||
message.error("Failed to delete key: " + errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
const data = await response.json();
|
||||
|
@ -1020,7 +1020,7 @@ export const userGetAllUsersCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error("Failed to delete key: " + errorData, 20);
|
||||
message.error("Failed to delete key: " + errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
const data = await response.json();
|
||||
|
@ -1055,7 +1055,7 @@ export const teamCreateCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error("Failed to create key: " + errorData, 20);
|
||||
message.error("Failed to create key: " + errorData, 10);
|
||||
console.error("Error response from the server:", errorData);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
@ -1092,7 +1092,7 @@ export const keyUpdateCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error("Failed to update key: " + errorData, 20);
|
||||
message.error("Failed to update key: " + errorData, 10);
|
||||
console.error("Error response from the server:", errorData);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
@ -1127,7 +1127,7 @@ export const teamUpdateCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error("Failed to update team: " + errorData, 20);
|
||||
message.error("Failed to update team: " + errorData, 10);
|
||||
console.error("Error response from the server:", errorData);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
@ -1162,7 +1162,7 @@ export const modelUpdateCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error("Failed to update model: " + errorData, 20);
|
||||
message.error("Failed to update model: " + errorData, 10);
|
||||
console.error("Error update from the server:", errorData);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
@ -1207,7 +1207,7 @@ export const teamMemberAddCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error("Failed to create key: " + errorData, 20);
|
||||
message.error("Failed to create key: " + errorData, 10);
|
||||
console.error("Error response from the server:", errorData);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
@ -1247,7 +1247,7 @@ export const userUpdateUserCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error("Failed to create key: " + errorData, 20);
|
||||
message.error("Failed to create key: " + errorData, 10);
|
||||
console.error("Error response from the server:", errorData);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
@ -1287,7 +1287,7 @@ export const PredictedSpendLogsCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 20);
|
||||
message.error(errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
|
@ -1398,7 +1398,7 @@ export const getCallbacksCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 20);
|
||||
message.error(errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
|
@ -1441,7 +1441,7 @@ export const setCallbacksCall = async (
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 20);
|
||||
message.error(errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
|
|
|
@ -1,141 +0,0 @@
|
|||
"use client";
|
||||
|
||||
import React, { useState, useEffect } from "react";
|
||||
import {
|
||||
Button as Button2,
|
||||
Modal,
|
||||
Form,
|
||||
Input,
|
||||
InputNumber,
|
||||
Select,
|
||||
message,
|
||||
} from "antd";
|
||||
import {
|
||||
Button,
|
||||
Text,
|
||||
Card,
|
||||
Table,
|
||||
BarChart,
|
||||
Title,
|
||||
Subtitle,
|
||||
BarList,
|
||||
Metric,
|
||||
} from "@tremor/react";
|
||||
import { keySpendLogsCall, PredictedSpendLogsCall } from "./networking";
|
||||
|
||||
interface ViewKeySpendReportProps {
|
||||
token: string;
|
||||
accessToken: string;
|
||||
keySpend: number;
|
||||
keyBudget: number;
|
||||
keyName: string;
|
||||
}
|
||||
|
||||
type ResponseValueType = {
|
||||
startTime: string; // Assuming startTime is a string, adjust it if it's of a different type
|
||||
spend: number; // Assuming spend is a number, adjust it if it's of a different type
|
||||
user: string; // Assuming user is a string, adjust it if it's of a different type
|
||||
};
|
||||
|
||||
const ViewKeySpendReport: React.FC<ViewKeySpendReportProps> = ({
|
||||
token,
|
||||
accessToken,
|
||||
keySpend,
|
||||
keyBudget,
|
||||
keyName,
|
||||
}) => {
|
||||
const [isModalVisible, setIsModalVisible] = useState(false);
|
||||
const [data, setData] = useState<{ day: string; spend: number }[] | null>(
|
||||
null
|
||||
);
|
||||
const [predictedSpendString, setPredictedSpendString] = useState("");
|
||||
const [userData, setUserData] = useState<
|
||||
{ name: string; value: number }[] | null
|
||||
>(null);
|
||||
|
||||
const showModal = () => {
|
||||
console.log("Show Modal triggered");
|
||||
setIsModalVisible(true);
|
||||
fetchData();
|
||||
};
|
||||
|
||||
const handleOk = () => {
|
||||
setIsModalVisible(false);
|
||||
};
|
||||
|
||||
const handleCancel = () => {
|
||||
setIsModalVisible(false);
|
||||
};
|
||||
|
||||
// call keySpendLogsCall and set the data
|
||||
const fetchData = async () => {
|
||||
try {
|
||||
if (accessToken == null || token == null) {
|
||||
return;
|
||||
}
|
||||
console.log(`accessToken: ${accessToken}; token: ${token}`);
|
||||
const response = await keySpendLogsCall(
|
||||
(accessToken = accessToken),
|
||||
(token = token)
|
||||
);
|
||||
console.log("Response:", response);
|
||||
setData(response);
|
||||
|
||||
// predict spend based on response
|
||||
const predictedSpend = await PredictedSpendLogsCall(accessToken, response);
|
||||
console.log("Response2:", predictedSpend);
|
||||
|
||||
// append predictedSpend to data
|
||||
const combinedData = [...response, ...predictedSpend.response];
|
||||
setData(combinedData);
|
||||
setPredictedSpendString(predictedSpend.predicted_spend)
|
||||
|
||||
console.log("Combined Data:", combinedData);
|
||||
// setPredictedSpend(predictedSpend);
|
||||
|
||||
} catch (error) {
|
||||
console.error("There was an error fetching the data", error);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
if (!token) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return (
|
||||
<div>
|
||||
<Button size = "xs" onClick={showModal} variant="secondary">
|
||||
Spend Report
|
||||
</Button>
|
||||
<Modal
|
||||
visible={isModalVisible}
|
||||
width={1400}
|
||||
onOk={handleOk}
|
||||
onCancel={handleCancel}
|
||||
footer={null}
|
||||
>
|
||||
<Title style={{ textAlign: "left" }}>Key Name: {keyName}</Title>
|
||||
|
||||
<Metric>Monthly Spend ${keySpend}</Metric>
|
||||
<Title>{predictedSpendString}</Title>
|
||||
|
||||
<Card className="mt-6 mb-6">
|
||||
{data && (
|
||||
<BarChart
|
||||
className="mt-6"
|
||||
data={data}
|
||||
colors={["blue", "amber"]}
|
||||
index="date"
|
||||
categories={["spend", "predicted_spend"]}
|
||||
yAxisWidth={80}
|
||||
/>
|
||||
)}
|
||||
</Card>
|
||||
|
||||
</Modal>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default ViewKeySpendReport;
|
|
@ -17,6 +17,7 @@ import {
|
|||
DialogPanel,
|
||||
Text,
|
||||
Title,
|
||||
Subtitle,
|
||||
Icon,
|
||||
BarChart,
|
||||
} from "@tremor/react";
|
||||
|
@ -32,8 +33,6 @@ import {
|
|||
Select,
|
||||
} from "antd";
|
||||
|
||||
import ViewKeySpendReport from "./view_key_spend_report";
|
||||
|
||||
const { Option } = Select;
|
||||
|
||||
|
||||
|
@ -84,7 +83,6 @@ const ViewKeyTable: React.FC<ViewKeyTableProps> = ({
|
|||
const [isButtonClicked, setIsButtonClicked] = useState(false);
|
||||
const [isDeleteModalOpen, setIsDeleteModalOpen] = useState(false);
|
||||
const [keyToDelete, setKeyToDelete] = useState<string | null>(null);
|
||||
const [openDialogId, setOpenDialogId] = React.useState<null | number>(null);
|
||||
const [selectedItem, setSelectedItem] = useState<ItemData | null>(null);
|
||||
const [spendData, setSpendData] = useState<{ day: string; spend: number }[] | null>(
|
||||
null
|
||||
|
@ -92,6 +90,7 @@ const ViewKeyTable: React.FC<ViewKeyTableProps> = ({
|
|||
const [predictedSpendString, setPredictedSpendString] = useState("");
|
||||
|
||||
const [editModalVisible, setEditModalVisible] = useState(false);
|
||||
const [infoDialogVisible, setInfoDialogVisible] = useState(false);
|
||||
const [selectedToken, setSelectedToken] = useState<ItemData | null>(null);
|
||||
const [userModels, setUserModels] = useState([]);
|
||||
const initialKnownTeamIDs: Set<string> = new Set();
|
||||
|
@ -328,47 +327,6 @@ const ViewKeyTable: React.FC<ViewKeyTableProps> = ({
|
|||
};
|
||||
|
||||
|
||||
|
||||
// call keySpendLogsCall and set the data
|
||||
const fetchData = async (item: ItemData | null) => {
|
||||
try {
|
||||
if (accessToken == null || item == null) {
|
||||
return;
|
||||
}
|
||||
console.log(`accessToken: ${accessToken}; token: ${item.token}`);
|
||||
const response = await keySpendLogsCall(accessToken, item.token);
|
||||
|
||||
console.log("Response:", response);
|
||||
setSpendData(response);
|
||||
|
||||
// predict spend based on response
|
||||
try {
|
||||
const predictedSpend = await PredictedSpendLogsCall(accessToken, response);
|
||||
console.log("Response2:", predictedSpend);
|
||||
|
||||
// append predictedSpend to data
|
||||
const combinedData = [...response, ...predictedSpend.response];
|
||||
setSpendData(combinedData);
|
||||
setPredictedSpendString(predictedSpend.predicted_spend)
|
||||
|
||||
console.log("Combined Data:", combinedData);
|
||||
} catch (error) {
|
||||
console.error("There was an error fetching the predicted data", error);
|
||||
}
|
||||
|
||||
// setPredictedSpend(predictedSpend);
|
||||
|
||||
} catch (error) {
|
||||
console.error("There was an error fetching the data", error);
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
fetchData(selectedItem);
|
||||
}, [selectedItem]);
|
||||
|
||||
|
||||
|
||||
const handleDelete = async (token: any) => {
|
||||
console.log("handleDelete:", token);
|
||||
if (token.token == null) {
|
||||
|
@ -415,13 +373,6 @@ const ViewKeyTable: React.FC<ViewKeyTableProps> = ({
|
|||
if (data == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
// useEffect(() => {
|
||||
// if (openDialogId !== null && selectedItem !== null) {
|
||||
// fetchData(selectedItem);
|
||||
// }
|
||||
// }, [openDialogId, selectedItem]);
|
||||
|
||||
console.log("RERENDER TRIGGERED");
|
||||
return (
|
||||
<div>
|
||||
|
@ -564,25 +515,27 @@ const ViewKeyTable: React.FC<ViewKeyTableProps> = ({
|
|||
<TableCell>
|
||||
<Icon
|
||||
onClick={() => {
|
||||
setSelectedItem(item);
|
||||
setOpenDialogId(item.id);
|
||||
setSelectedToken(item);
|
||||
setInfoDialogVisible(true);
|
||||
}}
|
||||
icon={InformationCircleIcon}
|
||||
size="sm"
|
||||
/>
|
||||
|
||||
|
||||
<Dialog
|
||||
open={openDialogId !== null}
|
||||
onClose={() => {
|
||||
setOpenDialogId(null);
|
||||
setSelectedItem(null);
|
||||
}}
|
||||
<Modal
|
||||
open={infoDialogVisible}
|
||||
onCancel={() => {
|
||||
setInfoDialogVisible(false);
|
||||
setSelectedToken(null);
|
||||
}}
|
||||
footer={null}
|
||||
width={800}
|
||||
>
|
||||
|
||||
>
|
||||
<DialogPanel>
|
||||
{selectedItem && (
|
||||
{selectedToken && (
|
||||
<>
|
||||
<div className="grid grid-cols-1 gap-6 sm:grid-cols-2 lg:grid-cols-3">
|
||||
<div className="grid grid-cols-1 gap-6 sm:grid-cols-2 lg:grid-cols-3 mt-8">
|
||||
<Card>
|
||||
<p className="text-tremor-default font-medium text-tremor-content dark:text-dark-tremor-content">
|
||||
Spend
|
||||
|
@ -591,9 +544,9 @@ const ViewKeyTable: React.FC<ViewKeyTableProps> = ({
|
|||
<p className="text-tremor font-semibold text-tremor-content-strong dark:text-dark-tremor-content-strong">
|
||||
{(() => {
|
||||
try {
|
||||
return parseFloat(selectedItem.spend).toFixed(4);
|
||||
return parseFloat(selectedToken.spend).toFixed(4);
|
||||
} catch (error) {
|
||||
return selectedItem.spend;
|
||||
return selectedToken.spend;
|
||||
}
|
||||
})()}
|
||||
|
||||
|
@ -606,8 +559,8 @@ const ViewKeyTable: React.FC<ViewKeyTableProps> = ({
|
|||
</p>
|
||||
<div className="mt-2 flex items-baseline space-x-2.5">
|
||||
<p className="text-tremor font-semibold text-tremor-content-strong dark:text-dark-tremor-content-strong">
|
||||
{selectedItem.max_budget != null ? (
|
||||
<>{selectedItem.max_budget}</>
|
||||
{selectedToken.max_budget != null ? (
|
||||
<>{selectedToken.max_budget}</>
|
||||
) : (
|
||||
<>Unlimited</>
|
||||
)}
|
||||
|
@ -620,9 +573,9 @@ const ViewKeyTable: React.FC<ViewKeyTableProps> = ({
|
|||
</p>
|
||||
<div className="mt-2 flex items-baseline space-x-2.5">
|
||||
<p className="text-tremor-default font-small text-tremor-content-strong dark:text-dark-tremor-content-strong">
|
||||
{selectedItem.expires != null ? (
|
||||
{selectedToken.expires != null ? (
|
||||
<>
|
||||
{new Date(selectedItem.expires).toLocaleString(undefined, {
|
||||
{new Date(selectedToken.expires).toLocaleString(undefined, {
|
||||
day: 'numeric',
|
||||
month: 'long',
|
||||
year: 'numeric',
|
||||
|
@ -639,38 +592,28 @@ const ViewKeyTable: React.FC<ViewKeyTableProps> = ({
|
|||
</Card>
|
||||
</div>
|
||||
|
||||
<Card className="mt-6 mb-6">
|
||||
{spendData && (
|
||||
<BarChart
|
||||
className="mt-6"
|
||||
data={spendData}
|
||||
colors={["blue", "amber"]}
|
||||
index="date"
|
||||
categories={["spend", "predicted_spend"]}
|
||||
yAxisWidth={80}
|
||||
/>
|
||||
)}
|
||||
</Card>
|
||||
|
||||
<Card className="my-4">
|
||||
<Title>Token Name</Title>
|
||||
<Text className="my-1">{selectedToken.key_alias ? selectedToken.key_alias : selectedToken.key_name}</Text>
|
||||
<Title>Token ID</Title>
|
||||
<Text className="my-1 text-[12px]">{selectedToken.token}</Text>
|
||||
<Title>Metadata</Title>
|
||||
|
||||
<Text>{JSON.stringify(selectedItem.metadata)}</Text>
|
||||
|
||||
<Text className="my-1"><pre>{JSON.stringify(selectedToken.metadata)} </pre></Text>
|
||||
</Card>
|
||||
|
||||
<Button
|
||||
variant="light"
|
||||
className="mx-auto flex items-center"
|
||||
onClick={() => {
|
||||
setOpenDialogId(null);
|
||||
setSelectedItem(null);
|
||||
setInfoDialogVisible(false);
|
||||
setSelectedToken(null);
|
||||
}}
|
||||
>
|
||||
Close
|
||||
</Button>
|
||||
</>
|
||||
)}
|
||||
</DialogPanel>
|
||||
</Dialog>
|
||||
|
||||
</Modal>
|
||||
<Icon
|
||||
icon={PencilAltIcon}
|
||||
size="sm"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue