mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
fix(converse_transformation.py): fix encoding model
This commit is contained in:
parent
814d8ba54c
commit
8e7363acf5
4 changed files with 441 additions and 324 deletions
|
@ -1262,308 +1262,6 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
## Boto3 - Authentication
|
|
||||||
|
|
||||||
### Passing credentials as parameters - Completion()
|
|
||||||
Pass AWS credentials as parameters to litellm.completion
|
|
||||||
```python
|
|
||||||
import os
|
|
||||||
from litellm import completion
|
|
||||||
|
|
||||||
response = completion(
|
|
||||||
model="bedrock/anthropic.claude-instant-v1",
|
|
||||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
|
||||||
aws_access_key_id="",
|
|
||||||
aws_secret_access_key="",
|
|
||||||
aws_region_name="",
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Passing extra headers + Custom API Endpoints
|
|
||||||
|
|
||||||
This can be used to override existing headers (e.g. `Authorization`) when calling custom api endpoints
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="sdk" label="SDK">
|
|
||||||
|
|
||||||
```python
|
|
||||||
import os
|
|
||||||
import litellm
|
|
||||||
from litellm import completion
|
|
||||||
|
|
||||||
litellm.set_verbose = True # 👈 SEE RAW REQUEST
|
|
||||||
|
|
||||||
response = completion(
|
|
||||||
model="bedrock/anthropic.claude-instant-v1",
|
|
||||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
|
||||||
aws_access_key_id="",
|
|
||||||
aws_secret_access_key="",
|
|
||||||
aws_region_name="",
|
|
||||||
aws_bedrock_runtime_endpoint="https://my-fake-endpoint.com",
|
|
||||||
extra_headers={"key": "value"}
|
|
||||||
)
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="proxy" label="PROXY">
|
|
||||||
|
|
||||||
1. Setup config.yaml
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: bedrock-model
|
|
||||||
litellm_params:
|
|
||||||
model: bedrock/anthropic.claude-instant-v1
|
|
||||||
aws_access_key_id: "",
|
|
||||||
aws_secret_access_key: "",
|
|
||||||
aws_region_name: "",
|
|
||||||
aws_bedrock_runtime_endpoint: "https://my-fake-endpoint.com",
|
|
||||||
extra_headers: {"key": "value"}
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Start proxy
|
|
||||||
|
|
||||||
```bash
|
|
||||||
litellm --config /path/to/config.yaml --detailed_debug
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Test it!
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
|
||||||
-H 'Content-Type: application/json' \
|
|
||||||
-H 'Authorization: Bearer sk-1234' \
|
|
||||||
-d '{
|
|
||||||
"model": "bedrock-model",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "system",
|
|
||||||
"content": "You are a helpful math tutor. Guide the user through the solution step by step."
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "how can I solve 8x + 7 = -23"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
### SSO Login (AWS Profile)
|
|
||||||
- Set `AWS_PROFILE` environment variable
|
|
||||||
- Make bedrock completion call
|
|
||||||
```python
|
|
||||||
import os
|
|
||||||
from litellm import completion
|
|
||||||
|
|
||||||
response = completion(
|
|
||||||
model="bedrock/anthropic.claude-instant-v1",
|
|
||||||
messages=[{ "content": "Hello, how are you?","role": "user"}]
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
or pass `aws_profile_name`:
|
|
||||||
|
|
||||||
```python
|
|
||||||
import os
|
|
||||||
from litellm import completion
|
|
||||||
|
|
||||||
response = completion(
|
|
||||||
model="bedrock/anthropic.claude-instant-v1",
|
|
||||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
|
||||||
aws_profile_name="dev-profile",
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
### STS (Role-based Auth)
|
|
||||||
|
|
||||||
- Set `aws_role_name` and `aws_session_name`
|
|
||||||
|
|
||||||
|
|
||||||
| LiteLLM Parameter | Boto3 Parameter | Description | Boto3 Documentation |
|
|
||||||
|------------------|-----------------|-------------|-------------------|
|
|
||||||
| `aws_access_key_id` | `aws_access_key_id` | AWS access key associated with an IAM user or role | [Credentials](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html) |
|
|
||||||
| `aws_secret_access_key` | `aws_secret_access_key` | AWS secret key associated with the access key | [Credentials](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html) |
|
|
||||||
| `aws_role_name` | `RoleArn` | The Amazon Resource Name (ARN) of the role to assume | [AssumeRole API](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts.html#STS.Client.assume_role) |
|
|
||||||
| `aws_session_name` | `RoleSessionName` | An identifier for the assumed role session | [AssumeRole API](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts.html#STS.Client.assume_role) |
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Make the bedrock completion call
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="sdk" label="SDK">
|
|
||||||
|
|
||||||
```python
|
|
||||||
from litellm import completion
|
|
||||||
|
|
||||||
response = completion(
|
|
||||||
model="bedrock/anthropic.claude-instant-v1",
|
|
||||||
messages=messages,
|
|
||||||
max_tokens=10,
|
|
||||||
temperature=0.1,
|
|
||||||
aws_role_name=aws_role_name,
|
|
||||||
aws_session_name="my-test-session",
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
If you also need to dynamically set the aws user accessing the role, add the additional args in the completion()/embedding() function
|
|
||||||
|
|
||||||
```python
|
|
||||||
from litellm import completion
|
|
||||||
|
|
||||||
response = completion(
|
|
||||||
model="bedrock/anthropic.claude-instant-v1",
|
|
||||||
messages=messages,
|
|
||||||
max_tokens=10,
|
|
||||||
temperature=0.1,
|
|
||||||
aws_region_name=aws_region_name,
|
|
||||||
aws_access_key_id=aws_access_key_id,
|
|
||||||
aws_secret_access_key=aws_secret_access_key,
|
|
||||||
aws_role_name=aws_role_name,
|
|
||||||
aws_session_name="my-test-session",
|
|
||||||
)
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
|
|
||||||
<TabItem value="proxy" label="PROXY">
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: bedrock/*
|
|
||||||
litellm_params:
|
|
||||||
model: bedrock/*
|
|
||||||
aws_role_name: arn:aws:iam::888602223428:role/iam_local_role # AWS RoleArn
|
|
||||||
aws_session_name: "bedrock-session" # AWS RoleSessionName
|
|
||||||
aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # [OPTIONAL - not required if using role]
|
|
||||||
aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # [OPTIONAL - not required if using role]
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
|
|
||||||
### Passing an external BedrockRuntime.Client as a parameter - Completion()
|
|
||||||
|
|
||||||
:::warning
|
|
||||||
|
|
||||||
This is a deprecated flow. Boto3 is not async. And boto3.client does not let us make the http call through httpx. Pass in your aws params through the method above 👆. [See Auth Code](https://github.com/BerriAI/litellm/blob/55a20c7cce99a93d36a82bf3ae90ba3baf9a7f89/litellm/llms/bedrock_httpx.py#L284) [Add new auth flow](https://github.com/BerriAI/litellm/issues)
|
|
||||||
|
|
||||||
|
|
||||||
Experimental - 2024-Jun-23:
|
|
||||||
`aws_access_key_id`, `aws_secret_access_key`, and `aws_session_token` will be extracted from boto3.client and be passed into the httpx client
|
|
||||||
|
|
||||||
:::
|
|
||||||
|
|
||||||
Pass an external BedrockRuntime.Client object as a parameter to litellm.completion. Useful when using an AWS credentials profile, SSO session, assumed role session, or if environment variables are not available for auth.
|
|
||||||
|
|
||||||
Create a client from session credentials:
|
|
||||||
```python
|
|
||||||
import boto3
|
|
||||||
from litellm import completion
|
|
||||||
|
|
||||||
bedrock = boto3.client(
|
|
||||||
service_name="bedrock-runtime",
|
|
||||||
region_name="us-east-1",
|
|
||||||
aws_access_key_id="",
|
|
||||||
aws_secret_access_key="",
|
|
||||||
aws_session_token="",
|
|
||||||
)
|
|
||||||
|
|
||||||
response = completion(
|
|
||||||
model="bedrock/anthropic.claude-instant-v1",
|
|
||||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
|
||||||
aws_bedrock_client=bedrock,
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
Create a client from AWS profile in `~/.aws/config`:
|
|
||||||
```python
|
|
||||||
import boto3
|
|
||||||
from litellm import completion
|
|
||||||
|
|
||||||
dev_session = boto3.Session(profile_name="dev-profile")
|
|
||||||
bedrock = dev_session.client(
|
|
||||||
service_name="bedrock-runtime",
|
|
||||||
region_name="us-east-1",
|
|
||||||
)
|
|
||||||
|
|
||||||
response = completion(
|
|
||||||
model="bedrock/anthropic.claude-instant-v1",
|
|
||||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
|
||||||
aws_bedrock_client=bedrock,
|
|
||||||
)
|
|
||||||
```
|
|
||||||
## Calling via Internal Proxy
|
|
||||||
|
|
||||||
Use the `bedrock/converse_like/model` endpoint to call bedrock converse model via your internal proxy.
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="sdk" label="SDK">
|
|
||||||
|
|
||||||
```python
|
|
||||||
from litellm import completion
|
|
||||||
|
|
||||||
response = completion(
|
|
||||||
model="bedrock/converse_like/some-model",
|
|
||||||
messages=[{"role": "user", "content": "What's AWS?"}],
|
|
||||||
api_key="sk-1234",
|
|
||||||
api_base="https://some-api-url/models",
|
|
||||||
extra_headers={"test": "hello world"},
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="proxy" label="LiteLLM Proxy">
|
|
||||||
|
|
||||||
1. Setup config.yaml
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: anthropic-claude
|
|
||||||
litellm_params:
|
|
||||||
model: bedrock/converse_like/some-model
|
|
||||||
api_base: https://some-api-url/models
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Start proxy server
|
|
||||||
|
|
||||||
```bash
|
|
||||||
litellm --config config.yaml
|
|
||||||
|
|
||||||
# RUNNING on http://0.0.0.0:4000
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Test it!
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
|
||||||
-H 'Content-Type: application/json' \
|
|
||||||
-H 'Authorization: Bearer sk-1234' \
|
|
||||||
-d '{
|
|
||||||
"model": "anthropic-claude",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "system",
|
|
||||||
"content": "You are a helpful math tutor. Guide the user through the solution step by step."
|
|
||||||
},
|
|
||||||
{ "content": "Hello, how are you?", "role": "user" }
|
|
||||||
]
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
**Expected Output URL**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
https://some-api-url/models
|
|
||||||
```
|
|
||||||
|
|
||||||
## Bedrock Imported Models (Deepseek, Deepseek R1)
|
## Bedrock Imported Models (Deepseek, Deepseek R1)
|
||||||
|
|
||||||
### Deepseek R1
|
### Deepseek R1
|
||||||
|
@ -1958,3 +1656,377 @@ curl http://0.0.0.0:4000/rerank \
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
## Bedrock Application Inference Profile
|
||||||
|
|
||||||
|
Use Bedrock Application Inference Profile to track costs for projects on AWS.
|
||||||
|
|
||||||
|
You can either pass it in the model name - `model="bedrock/arn:...` or as a separate `model_id="arn:..` param.
|
||||||
|
|
||||||
|
### Set via `model_id`
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem label="SDK" value="sdk">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["AWS_ACCESS_KEY_ID"] = ""
|
||||||
|
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
|
||||||
|
os.environ["AWS_REGION_NAME"] = ""
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
|
||||||
|
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
||||||
|
model_id="arn:aws:bedrock:eu-central-1:000000000000:application-inference-profile/a0a0a0a0a0a0",
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem label="PROXY" value="proxy">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: anthropic-claude-3-5-sonnet
|
||||||
|
litellm_params:
|
||||||
|
model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0
|
||||||
|
# You have to set the ARN application inference profile in the model_id parameter
|
||||||
|
model_id: arn:aws:bedrock:eu-central-1:000000000000:application-inference-profile/a0a0a0a0a0a0
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer $LITELLM_API_KEY' \
|
||||||
|
-d '{
|
||||||
|
"model": "anthropic-claude-3-5-sonnet",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "List 5 important events in the XIX century"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
## Boto3 - Authentication
|
||||||
|
|
||||||
|
### Passing credentials as parameters - Completion()
|
||||||
|
Pass AWS credentials as parameters to litellm.completion
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="bedrock/anthropic.claude-instant-v1",
|
||||||
|
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||||
|
aws_access_key_id="",
|
||||||
|
aws_secret_access_key="",
|
||||||
|
aws_region_name="",
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Passing extra headers + Custom API Endpoints
|
||||||
|
|
||||||
|
This can be used to override existing headers (e.g. `Authorization`) when calling custom api endpoints
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
import litellm
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
litellm.set_verbose = True # 👈 SEE RAW REQUEST
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="bedrock/anthropic.claude-instant-v1",
|
||||||
|
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||||
|
aws_access_key_id="",
|
||||||
|
aws_secret_access_key="",
|
||||||
|
aws_region_name="",
|
||||||
|
aws_bedrock_runtime_endpoint="https://my-fake-endpoint.com",
|
||||||
|
extra_headers={"key": "value"}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: bedrock-model
|
||||||
|
litellm_params:
|
||||||
|
model: bedrock/anthropic.claude-instant-v1
|
||||||
|
aws_access_key_id: "",
|
||||||
|
aws_secret_access_key: "",
|
||||||
|
aws_region_name: "",
|
||||||
|
aws_bedrock_runtime_endpoint: "https://my-fake-endpoint.com",
|
||||||
|
extra_headers: {"key": "value"}
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml --detailed_debug
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-d '{
|
||||||
|
"model": "bedrock-model",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are a helpful math tutor. Guide the user through the solution step by step."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "how can I solve 8x + 7 = -23"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
### SSO Login (AWS Profile)
|
||||||
|
- Set `AWS_PROFILE` environment variable
|
||||||
|
- Make bedrock completion call
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="bedrock/anthropic.claude-instant-v1",
|
||||||
|
messages=[{ "content": "Hello, how are you?","role": "user"}]
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
or pass `aws_profile_name`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="bedrock/anthropic.claude-instant-v1",
|
||||||
|
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||||
|
aws_profile_name="dev-profile",
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### STS (Role-based Auth)
|
||||||
|
|
||||||
|
- Set `aws_role_name` and `aws_session_name`
|
||||||
|
|
||||||
|
|
||||||
|
| LiteLLM Parameter | Boto3 Parameter | Description | Boto3 Documentation |
|
||||||
|
|------------------|-----------------|-------------|-------------------|
|
||||||
|
| `aws_access_key_id` | `aws_access_key_id` | AWS access key associated with an IAM user or role | [Credentials](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html) |
|
||||||
|
| `aws_secret_access_key` | `aws_secret_access_key` | AWS secret key associated with the access key | [Credentials](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html) |
|
||||||
|
| `aws_role_name` | `RoleArn` | The Amazon Resource Name (ARN) of the role to assume | [AssumeRole API](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts.html#STS.Client.assume_role) |
|
||||||
|
| `aws_session_name` | `RoleSessionName` | An identifier for the assumed role session | [AssumeRole API](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts.html#STS.Client.assume_role) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Make the bedrock completion call
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="bedrock/anthropic.claude-instant-v1",
|
||||||
|
messages=messages,
|
||||||
|
max_tokens=10,
|
||||||
|
temperature=0.1,
|
||||||
|
aws_role_name=aws_role_name,
|
||||||
|
aws_session_name="my-test-session",
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
If you also need to dynamically set the aws user accessing the role, add the additional args in the completion()/embedding() function
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="bedrock/anthropic.claude-instant-v1",
|
||||||
|
messages=messages,
|
||||||
|
max_tokens=10,
|
||||||
|
temperature=0.1,
|
||||||
|
aws_region_name=aws_region_name,
|
||||||
|
aws_access_key_id=aws_access_key_id,
|
||||||
|
aws_secret_access_key=aws_secret_access_key,
|
||||||
|
aws_role_name=aws_role_name,
|
||||||
|
aws_session_name="my-test-session",
|
||||||
|
)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: bedrock/*
|
||||||
|
litellm_params:
|
||||||
|
model: bedrock/*
|
||||||
|
aws_role_name: arn:aws:iam::888602223428:role/iam_local_role # AWS RoleArn
|
||||||
|
aws_session_name: "bedrock-session" # AWS RoleSessionName
|
||||||
|
aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # [OPTIONAL - not required if using role]
|
||||||
|
aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # [OPTIONAL - not required if using role]
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
### Passing an external BedrockRuntime.Client as a parameter - Completion()
|
||||||
|
|
||||||
|
:::warning
|
||||||
|
|
||||||
|
This is a deprecated flow. Boto3 is not async. And boto3.client does not let us make the http call through httpx. Pass in your aws params through the method above 👆. [See Auth Code](https://github.com/BerriAI/litellm/blob/55a20c7cce99a93d36a82bf3ae90ba3baf9a7f89/litellm/llms/bedrock_httpx.py#L284) [Add new auth flow](https://github.com/BerriAI/litellm/issues)
|
||||||
|
|
||||||
|
|
||||||
|
Experimental - 2024-Jun-23:
|
||||||
|
`aws_access_key_id`, `aws_secret_access_key`, and `aws_session_token` will be extracted from boto3.client and be passed into the httpx client
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
Pass an external BedrockRuntime.Client object as a parameter to litellm.completion. Useful when using an AWS credentials profile, SSO session, assumed role session, or if environment variables are not available for auth.
|
||||||
|
|
||||||
|
Create a client from session credentials:
|
||||||
|
```python
|
||||||
|
import boto3
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
bedrock = boto3.client(
|
||||||
|
service_name="bedrock-runtime",
|
||||||
|
region_name="us-east-1",
|
||||||
|
aws_access_key_id="",
|
||||||
|
aws_secret_access_key="",
|
||||||
|
aws_session_token="",
|
||||||
|
)
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="bedrock/anthropic.claude-instant-v1",
|
||||||
|
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||||
|
aws_bedrock_client=bedrock,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a client from AWS profile in `~/.aws/config`:
|
||||||
|
```python
|
||||||
|
import boto3
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
dev_session = boto3.Session(profile_name="dev-profile")
|
||||||
|
bedrock = dev_session.client(
|
||||||
|
service_name="bedrock-runtime",
|
||||||
|
region_name="us-east-1",
|
||||||
|
)
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="bedrock/anthropic.claude-instant-v1",
|
||||||
|
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||||
|
aws_bedrock_client=bedrock,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
## Calling via Internal Proxy (not bedrock url compatible)
|
||||||
|
|
||||||
|
Use the `bedrock/converse_like/model` endpoint to call bedrock converse model via your internal proxy.
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="bedrock/converse_like/some-model",
|
||||||
|
messages=[{"role": "user", "content": "What's AWS?"}],
|
||||||
|
api_key="sk-1234",
|
||||||
|
api_base="https://some-api-url/models",
|
||||||
|
extra_headers={"test": "hello world"},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="LiteLLM Proxy">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: anthropic-claude
|
||||||
|
litellm_params:
|
||||||
|
model: bedrock/converse_like/some-model
|
||||||
|
api_base: https://some-api-url/models
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy server
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config config.yaml
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-d '{
|
||||||
|
"model": "anthropic-claude",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are a helpful math tutor. Guide the user through the solution step by step."
|
||||||
|
},
|
||||||
|
{ "content": "Hello, how are you?", "role": "user" }
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
**Expected Output URL**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
https://some-api-url/models
|
||||||
|
```
|
||||||
|
|
|
@ -279,16 +279,30 @@ class BaseAWSLLM:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _get_aws_region_name(
|
def _get_aws_region_name(
|
||||||
self, optional_params: dict, model: Optional[str] = None
|
self,
|
||||||
|
optional_params: dict,
|
||||||
|
model: Optional[str] = None,
|
||||||
|
model_id: Optional[str] = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Get the AWS region name from the environment variables
|
Get the AWS region name from the environment variables.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
optional_params (dict): Optional parameters for the model call
|
||||||
|
model (str): The model name
|
||||||
|
model_id (str): The model ID. This is the ARN of the model, if passed in as a separate param.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The AWS region name
|
||||||
"""
|
"""
|
||||||
aws_region_name = optional_params.get("aws_region_name", None)
|
aws_region_name = optional_params.get("aws_region_name", None)
|
||||||
### SET REGION NAME ###
|
### SET REGION NAME ###
|
||||||
if aws_region_name is None:
|
if aws_region_name is None:
|
||||||
# check model arn #
|
# check model arn #
|
||||||
aws_region_name = self._get_aws_region_from_model_arn(model)
|
if model_id is not None:
|
||||||
|
aws_region_name = self._get_aws_region_from_model_arn(model_id)
|
||||||
|
else:
|
||||||
|
aws_region_name = self._get_aws_region_from_model_arn(model)
|
||||||
# check env #
|
# check env #
|
||||||
litellm_aws_region_name = get_secret("AWS_REGION_NAME", None)
|
litellm_aws_region_name = get_secret("AWS_REGION_NAME", None)
|
||||||
|
|
||||||
|
|
|
@ -268,23 +268,29 @@ class BedrockConverseLLM(BaseAWSLLM):
|
||||||
|
|
||||||
## SETUP ##
|
## SETUP ##
|
||||||
stream = optional_params.pop("stream", None)
|
stream = optional_params.pop("stream", None)
|
||||||
modelId = optional_params.pop("model_id", None)
|
unencoded_model_id = optional_params.pop("model_id", None)
|
||||||
fake_stream = optional_params.pop("fake_stream", False)
|
fake_stream = optional_params.pop("fake_stream", False)
|
||||||
json_mode = optional_params.get("json_mode", False)
|
json_mode = optional_params.get("json_mode", False)
|
||||||
if modelId is not None:
|
if unencoded_model_id is not None:
|
||||||
modelId = self.encode_model_id(model_id=modelId)
|
modelId = self.encode_model_id(model_id=unencoded_model_id)
|
||||||
else:
|
else:
|
||||||
modelId = self.encode_model_id(model_id=model)
|
modelId = self.encode_model_id(model_id=model)
|
||||||
|
|
||||||
if stream is True and "ai21" in modelId:
|
if stream is True and "ai21" in modelId:
|
||||||
fake_stream = True
|
fake_stream = True
|
||||||
|
|
||||||
|
### SET REGION NAME ###
|
||||||
|
aws_region_name = self._get_aws_region_name(
|
||||||
|
optional_params=optional_params,
|
||||||
|
model=model,
|
||||||
|
model_id=unencoded_model_id,
|
||||||
|
)
|
||||||
|
|
||||||
## CREDENTIALS ##
|
## CREDENTIALS ##
|
||||||
# pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
|
# pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
|
||||||
aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
|
aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
|
||||||
aws_access_key_id = optional_params.pop("aws_access_key_id", None)
|
aws_access_key_id = optional_params.pop("aws_access_key_id", None)
|
||||||
aws_session_token = optional_params.pop("aws_session_token", None)
|
aws_session_token = optional_params.pop("aws_session_token", None)
|
||||||
aws_region_name = optional_params.pop("aws_region_name", None)
|
|
||||||
aws_role_name = optional_params.pop("aws_role_name", None)
|
aws_role_name = optional_params.pop("aws_role_name", None)
|
||||||
aws_session_name = optional_params.pop("aws_session_name", None)
|
aws_session_name = optional_params.pop("aws_session_name", None)
|
||||||
aws_profile_name = optional_params.pop("aws_profile_name", None)
|
aws_profile_name = optional_params.pop("aws_profile_name", None)
|
||||||
|
@ -293,25 +299,25 @@ class BedrockConverseLLM(BaseAWSLLM):
|
||||||
) # https://bedrock-runtime.{region_name}.amazonaws.com
|
) # https://bedrock-runtime.{region_name}.amazonaws.com
|
||||||
aws_web_identity_token = optional_params.pop("aws_web_identity_token", None)
|
aws_web_identity_token = optional_params.pop("aws_web_identity_token", None)
|
||||||
aws_sts_endpoint = optional_params.pop("aws_sts_endpoint", None)
|
aws_sts_endpoint = optional_params.pop("aws_sts_endpoint", None)
|
||||||
|
optional_params.pop("aws_region_name", None)
|
||||||
|
|
||||||
### SET REGION NAME ###
|
# if aws_region_name is None:
|
||||||
if aws_region_name is None:
|
# # check env #
|
||||||
# check env #
|
# litellm_aws_region_name = get_secret("AWS_REGION_NAME", None)
|
||||||
litellm_aws_region_name = get_secret("AWS_REGION_NAME", None)
|
|
||||||
|
|
||||||
if litellm_aws_region_name is not None and isinstance(
|
# if litellm_aws_region_name is not None and isinstance(
|
||||||
litellm_aws_region_name, str
|
# litellm_aws_region_name, str
|
||||||
):
|
# ):
|
||||||
aws_region_name = litellm_aws_region_name
|
# aws_region_name = litellm_aws_region_name
|
||||||
|
|
||||||
standard_aws_region_name = get_secret("AWS_REGION", None)
|
# standard_aws_region_name = get_secret("AWS_REGION", None)
|
||||||
if standard_aws_region_name is not None and isinstance(
|
# if standard_aws_region_name is not None and isinstance(
|
||||||
standard_aws_region_name, str
|
# standard_aws_region_name, str
|
||||||
):
|
# ):
|
||||||
aws_region_name = standard_aws_region_name
|
# aws_region_name = standard_aws_region_name
|
||||||
|
|
||||||
if aws_region_name is None:
|
# if aws_region_name is None:
|
||||||
aws_region_name = "us-west-2"
|
# aws_region_name = "us-west-2"
|
||||||
|
|
||||||
litellm_params["aws_region_name"] = (
|
litellm_params["aws_region_name"] = (
|
||||||
aws_region_name # [DO NOT DELETE] important for async calls
|
aws_region_name # [DO NOT DELETE] important for async calls
|
||||||
|
|
|
@ -2948,3 +2948,28 @@ async def test_bedrock_stream_thinking_content_openwebui():
|
||||||
assert (
|
assert (
|
||||||
len(response_content) > 0
|
len(response_content) > 0
|
||||||
), "There should be non-empty content after thinking tags"
|
), "There should be non-empty content after thinking tags"
|
||||||
|
|
||||||
|
|
||||||
|
def test_bedrock_application_inference_profile():
|
||||||
|
from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler
|
||||||
|
|
||||||
|
client = HTTPHandler()
|
||||||
|
|
||||||
|
with patch.object(client, "post") as mock_post:
|
||||||
|
try:
|
||||||
|
resp = completion(
|
||||||
|
model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
|
||||||
|
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
||||||
|
model_id="arn:aws:bedrock:eu-central-1:000000000000:application-inference-profile/a0a0a0a0a0a0",
|
||||||
|
client=client,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
mock_post.assert_called_once()
|
||||||
|
|
||||||
|
print(mock_post.call_args.kwargs)
|
||||||
|
json_data = mock_post.call_args.kwargs["data"]
|
||||||
|
assert mock_post.call_args.kwargs["url"].startswith(
|
||||||
|
"https://bedrock-runtime.eu-central-1.amazonaws.com/"
|
||||||
|
)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue