forked from phoenix/litellm-mirror
Merge pull request #4896 from BerriAI/docs_add_example_usage_with_mistral_python
Docs Proxy - add example usage with mistral SDK with Proxy
This commit is contained in:
commit
87cebdefd3
4 changed files with 204 additions and 160 deletions
|
@ -255,6 +255,12 @@ litellm --config your_config.yaml
|
||||||
|
|
||||||
## Using LiteLLM Proxy - Curl Request, OpenAI Package, Langchain
|
## Using LiteLLM Proxy - Curl Request, OpenAI Package, Langchain
|
||||||
|
|
||||||
|
:::info
|
||||||
|
LiteLLM is compatible with several SDKs - including OpenAI SDK, Anthropic SDK, Mistral SDK, LLamaIndex, Langchain (Js, Python)
|
||||||
|
|
||||||
|
[More examples here](user_keys)
|
||||||
|
:::
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="Curl" label="Curl Request">
|
<TabItem value="Curl" label="Curl Request">
|
||||||
|
|
||||||
|
@ -396,165 +402,6 @@ print(response)
|
||||||
- POST `/key/generate` - generate a key to access the proxy
|
- POST `/key/generate` - generate a key to access the proxy
|
||||||
|
|
||||||
|
|
||||||
## Using with OpenAI compatible projects
|
|
||||||
Set `base_url` to the LiteLLM Proxy server
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="openai" label="OpenAI v1.0.0+">
|
|
||||||
|
|
||||||
```python
|
|
||||||
import openai
|
|
||||||
client = openai.OpenAI(
|
|
||||||
api_key="anything",
|
|
||||||
base_url="http://0.0.0.0:4000"
|
|
||||||
)
|
|
||||||
|
|
||||||
# request sent to model set on litellm proxy, `litellm --model`
|
|
||||||
response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "this is a test request, write a short poem"
|
|
||||||
}
|
|
||||||
])
|
|
||||||
|
|
||||||
print(response)
|
|
||||||
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="librechat" label="LibreChat">
|
|
||||||
|
|
||||||
#### Start the LiteLLM proxy
|
|
||||||
```shell
|
|
||||||
litellm --model gpt-3.5-turbo
|
|
||||||
|
|
||||||
#INFO: Proxy running on http://0.0.0.0:4000
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 1. Clone the repo
|
|
||||||
|
|
||||||
```shell
|
|
||||||
git clone https://github.com/danny-avila/LibreChat.git
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
#### 2. Modify Librechat's `docker-compose.yml`
|
|
||||||
LiteLLM Proxy is running on port `4000`, set `4000` as the proxy below
|
|
||||||
```yaml
|
|
||||||
OPENAI_REVERSE_PROXY=http://host.docker.internal:4000/v1/chat/completions
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 3. Save fake OpenAI key in Librechat's `.env`
|
|
||||||
|
|
||||||
Copy Librechat's `.env.example` to `.env` and overwrite the default OPENAI_API_KEY (by default it requires the user to pass a key).
|
|
||||||
```env
|
|
||||||
OPENAI_API_KEY=sk-1234
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 4. Run LibreChat:
|
|
||||||
```shell
|
|
||||||
docker compose up
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
|
|
||||||
<TabItem value="continue-dev" label="ContinueDev">
|
|
||||||
|
|
||||||
Continue-Dev brings ChatGPT to VSCode. See how to [install it here](https://continue.dev/docs/quickstart).
|
|
||||||
|
|
||||||
In the [config.py](https://continue.dev/docs/reference/Models/openai) set this as your default model.
|
|
||||||
```python
|
|
||||||
default=OpenAI(
|
|
||||||
api_key="IGNORED",
|
|
||||||
model="fake-model-name",
|
|
||||||
context_length=2048, # customize if needed for your model
|
|
||||||
api_base="http://localhost:4000" # your proxy server url
|
|
||||||
),
|
|
||||||
```
|
|
||||||
|
|
||||||
Credits [@vividfog](https://github.com/ollama/ollama/issues/305#issuecomment-1751848077) for this tutorial.
|
|
||||||
</TabItem>
|
|
||||||
|
|
||||||
<TabItem value="aider" label="Aider">
|
|
||||||
|
|
||||||
```shell
|
|
||||||
$ pip install aider
|
|
||||||
|
|
||||||
$ aider --openai-api-base http://0.0.0.0:4000 --openai-api-key fake-key
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="autogen" label="AutoGen">
|
|
||||||
|
|
||||||
```python
|
|
||||||
pip install pyautogen
|
|
||||||
```
|
|
||||||
|
|
||||||
```python
|
|
||||||
from autogen import AssistantAgent, UserProxyAgent, oai
|
|
||||||
config_list=[
|
|
||||||
{
|
|
||||||
"model": "my-fake-model",
|
|
||||||
"api_base": "http://localhost:4000", #litellm compatible endpoint
|
|
||||||
"api_type": "open_ai",
|
|
||||||
"api_key": "NULL", # just a placeholder
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
response = oai.Completion.create(config_list=config_list, prompt="Hi")
|
|
||||||
print(response) # works fine
|
|
||||||
|
|
||||||
llm_config={
|
|
||||||
"config_list": config_list,
|
|
||||||
}
|
|
||||||
|
|
||||||
assistant = AssistantAgent("assistant", llm_config=llm_config)
|
|
||||||
user_proxy = UserProxyAgent("user_proxy")
|
|
||||||
user_proxy.initiate_chat(assistant, message="Plot a chart of META and TESLA stock price change YTD.", config_list=config_list)
|
|
||||||
```
|
|
||||||
|
|
||||||
Credits [@victordibia](https://github.com/microsoft/autogen/issues/45#issuecomment-1749921972) for this tutorial.
|
|
||||||
</TabItem>
|
|
||||||
|
|
||||||
<TabItem value="guidance" label="guidance">
|
|
||||||
A guidance language for controlling large language models.
|
|
||||||
https://github.com/guidance-ai/guidance
|
|
||||||
|
|
||||||
**NOTE:** Guidance sends additional params like `stop_sequences` which can cause some models to fail if they don't support it.
|
|
||||||
|
|
||||||
**Fix**: Start your proxy using the `--drop_params` flag
|
|
||||||
|
|
||||||
```shell
|
|
||||||
litellm --model ollama/codellama --temperature 0.3 --max_tokens 2048 --drop_params
|
|
||||||
```
|
|
||||||
|
|
||||||
```python
|
|
||||||
import guidance
|
|
||||||
|
|
||||||
# set api_base to your proxy
|
|
||||||
# set api_key to anything
|
|
||||||
gpt4 = guidance.llms.OpenAI("gpt-4", api_base="http://0.0.0.0:4000", api_key="anything")
|
|
||||||
|
|
||||||
experts = guidance('''
|
|
||||||
{{#system~}}
|
|
||||||
You are a helpful and terse assistant.
|
|
||||||
{{~/system}}
|
|
||||||
|
|
||||||
{{#user~}}
|
|
||||||
I want a response to the following question:
|
|
||||||
{{query}}
|
|
||||||
Name 3 world-class experts (past or present) who would be great at answering this?
|
|
||||||
Don't answer the question yet.
|
|
||||||
{{~/user}}
|
|
||||||
|
|
||||||
{{#assistant~}}
|
|
||||||
{{gen 'expert_names' temperature=0 max_tokens=300}}
|
|
||||||
{{~/assistant}}
|
|
||||||
''', llm=gpt4)
|
|
||||||
|
|
||||||
result = experts(query='How can I be more productive?')
|
|
||||||
print(result)
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
## Debugging Proxy
|
## Debugging Proxy
|
||||||
|
|
||||||
Events that occur during normal operation
|
Events that occur during normal operation
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Use with Langchain, OpenAI SDK, LlamaIndex, Instructor, Curl
|
# 💡 Use with Langchain, OpenAI SDK, LlamaIndex, Instructor, Curl
|
||||||
|
|
||||||
:::info
|
:::info
|
||||||
|
|
||||||
|
@ -234,6 +234,26 @@ main();
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
<TabItem value="mistral-py" label="Mistral Python SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from mistralai.client import MistralClient
|
||||||
|
from mistralai.models.chat_completion import ChatMessage
|
||||||
|
|
||||||
|
|
||||||
|
client = MistralClient(api_key="sk-1234", endpoint="http://0.0.0.0:4000")
|
||||||
|
chat_response = client.chat(
|
||||||
|
model="mistral-small-latest",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "this is a test request, write a short poem"}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
print(chat_response.choices[0].message.content)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
<TabItem value="instructor" label="Instructor">
|
<TabItem value="instructor" label="Instructor">
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
@ -566,6 +586,166 @@ curl --location 'http://0.0.0.0:4000/moderations' \
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Using with OpenAI compatible projects
|
||||||
|
Set `base_url` to the LiteLLM Proxy server
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="openai" label="OpenAI v1.0.0+">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="anything",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "this is a test request, write a short poem"
|
||||||
|
}
|
||||||
|
])
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="librechat" label="LibreChat">
|
||||||
|
|
||||||
|
#### Start the LiteLLM proxy
|
||||||
|
```shell
|
||||||
|
litellm --model gpt-3.5-turbo
|
||||||
|
|
||||||
|
#INFO: Proxy running on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 1. Clone the repo
|
||||||
|
|
||||||
|
```shell
|
||||||
|
git clone https://github.com/danny-avila/LibreChat.git
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
#### 2. Modify Librechat's `docker-compose.yml`
|
||||||
|
LiteLLM Proxy is running on port `4000`, set `4000` as the proxy below
|
||||||
|
```yaml
|
||||||
|
OPENAI_REVERSE_PROXY=http://host.docker.internal:4000/v1/chat/completions
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 3. Save fake OpenAI key in Librechat's `.env`
|
||||||
|
|
||||||
|
Copy Librechat's `.env.example` to `.env` and overwrite the default OPENAI_API_KEY (by default it requires the user to pass a key).
|
||||||
|
```env
|
||||||
|
OPENAI_API_KEY=sk-1234
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 4. Run LibreChat:
|
||||||
|
```shell
|
||||||
|
docker compose up
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="continue-dev" label="ContinueDev">
|
||||||
|
|
||||||
|
Continue-Dev brings ChatGPT to VSCode. See how to [install it here](https://continue.dev/docs/quickstart).
|
||||||
|
|
||||||
|
In the [config.py](https://continue.dev/docs/reference/Models/openai) set this as your default model.
|
||||||
|
```python
|
||||||
|
default=OpenAI(
|
||||||
|
api_key="IGNORED",
|
||||||
|
model="fake-model-name",
|
||||||
|
context_length=2048, # customize if needed for your model
|
||||||
|
api_base="http://localhost:4000" # your proxy server url
|
||||||
|
),
|
||||||
|
```
|
||||||
|
|
||||||
|
Credits [@vividfog](https://github.com/ollama/ollama/issues/305#issuecomment-1751848077) for this tutorial.
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="aider" label="Aider">
|
||||||
|
|
||||||
|
```shell
|
||||||
|
$ pip install aider
|
||||||
|
|
||||||
|
$ aider --openai-api-base http://0.0.0.0:4000 --openai-api-key fake-key
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="autogen" label="AutoGen">
|
||||||
|
|
||||||
|
```python
|
||||||
|
pip install pyautogen
|
||||||
|
```
|
||||||
|
|
||||||
|
```python
|
||||||
|
from autogen import AssistantAgent, UserProxyAgent, oai
|
||||||
|
config_list=[
|
||||||
|
{
|
||||||
|
"model": "my-fake-model",
|
||||||
|
"api_base": "http://localhost:4000", #litellm compatible endpoint
|
||||||
|
"api_type": "open_ai",
|
||||||
|
"api_key": "NULL", # just a placeholder
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
response = oai.Completion.create(config_list=config_list, prompt="Hi")
|
||||||
|
print(response) # works fine
|
||||||
|
|
||||||
|
llm_config={
|
||||||
|
"config_list": config_list,
|
||||||
|
}
|
||||||
|
|
||||||
|
assistant = AssistantAgent("assistant", llm_config=llm_config)
|
||||||
|
user_proxy = UserProxyAgent("user_proxy")
|
||||||
|
user_proxy.initiate_chat(assistant, message="Plot a chart of META and TESLA stock price change YTD.", config_list=config_list)
|
||||||
|
```
|
||||||
|
|
||||||
|
Credits [@victordibia](https://github.com/microsoft/autogen/issues/45#issuecomment-1749921972) for this tutorial.
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="guidance" label="guidance">
|
||||||
|
A guidance language for controlling large language models.
|
||||||
|
https://github.com/guidance-ai/guidance
|
||||||
|
|
||||||
|
**NOTE:** Guidance sends additional params like `stop_sequences` which can cause some models to fail if they don't support it.
|
||||||
|
|
||||||
|
**Fix**: Start your proxy using the `--drop_params` flag
|
||||||
|
|
||||||
|
```shell
|
||||||
|
litellm --model ollama/codellama --temperature 0.3 --max_tokens 2048 --drop_params
|
||||||
|
```
|
||||||
|
|
||||||
|
```python
|
||||||
|
import guidance
|
||||||
|
|
||||||
|
# set api_base to your proxy
|
||||||
|
# set api_key to anything
|
||||||
|
gpt4 = guidance.llms.OpenAI("gpt-4", api_base="http://0.0.0.0:4000", api_key="anything")
|
||||||
|
|
||||||
|
experts = guidance('''
|
||||||
|
{{#system~}}
|
||||||
|
You are a helpful and terse assistant.
|
||||||
|
{{~/system}}
|
||||||
|
|
||||||
|
{{#user~}}
|
||||||
|
I want a response to the following question:
|
||||||
|
{{query}}
|
||||||
|
Name 3 world-class experts (past or present) who would be great at answering this?
|
||||||
|
Don't answer the question yet.
|
||||||
|
{{~/user}}
|
||||||
|
|
||||||
|
{{#assistant~}}
|
||||||
|
{{gen 'expert_names' temperature=0 max_tokens=300}}
|
||||||
|
{{~/assistant}}
|
||||||
|
''', llm=gpt4)
|
||||||
|
|
||||||
|
result = experts(query='How can I be more productive?')
|
||||||
|
print(result)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
## Advanced
|
## Advanced
|
||||||
|
|
||||||
### (BETA) Batch Completions - pass multiple models
|
### (BETA) Batch Completions - pass multiple models
|
||||||
|
|
|
@ -8,6 +8,10 @@ model_list:
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
|
model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
|
||||||
api_key: "os.environ/FIREWORKS"
|
api_key: "os.environ/FIREWORKS"
|
||||||
|
- model_name: mistral-small-latest
|
||||||
|
litellm_params:
|
||||||
|
model: mistral/mistral-small-latest
|
||||||
|
api_key: "os.environ/MISTRAL_API_KEY"
|
||||||
- model_name: tts
|
- model_name: tts
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/tts-1
|
model: openai/tts-1
|
||||||
|
|
13
litellm/proxy/tests/test_mistral_sdk.py
Normal file
13
litellm/proxy/tests/test_mistral_sdk.py
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
from mistralai.client import MistralClient
|
||||||
|
from mistralai.models.chat_completion import ChatMessage
|
||||||
|
|
||||||
|
client = MistralClient(api_key="sk-1234", endpoint="http://0.0.0.0:4000")
|
||||||
|
chat_response = client.chat(
|
||||||
|
model="mistral-small-latest",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "this is a test request, write a short poem"}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
print(chat_response.choices[0].message.content)
|
Loading…
Add table
Add a link
Reference in a new issue