Merge pull request #1315 from spdustin/feature_allow_claude_prefill

Adds "pre-fill" support for Claude
This commit is contained in:
Krish Dholakia 2024-01-08 10:48:15 +05:30 committed by GitHub
commit 4ea3e778f7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 270 additions and 31 deletions

View file

@ -1,42 +1,102 @@
# Anthropic # Anthropic
LiteLLM supports LiteLLM supports
- `claude-2.1` - `claude-2`
- `claude-2.1` - `claude-2.1`
- `claude-instant-1` - `claude-instant-1`
- `claude-instant-1.2` - `claude-instant-1.2`
## API Keys ## API Keys
```python ```python
import os import os
os.environ["ANTHROPIC_API_KEY"] = "your-api-key" os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
``` ```
## Sample Usage ## Usage
```python ```python
import os import os
from litellm import completion from litellm import completion
# set env - [OPTIONAL] replace with your anthropic key # set env - [OPTIONAL] replace with your anthropic key
os.environ["ANTHROPIC_API_KEY"] = "your-api-key" os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
messages = [{"role": "user", "content": "Hey! how's it going?"}] messages = [{"role": "user", "content": "Hey! how's it going?"}]
response = completion(model="claude-instant-1", messages=messages) response = completion(model="claude-instant-1", messages=messages)
print(response) print(response)
``` ```
## streaming ## Usage - "Assistant Pre-fill"
You can "put words in Claude's mouth" by including an `assistant` role message as the last item in the `messages` array.
> [!IMPORTANT]
> The returned completion will _not_ include your "pre-fill" text, since it is part of the prompt itself. Make sure to prefix Claude's completion with your pre-fill.
```python
import os
from litellm import completion
# set env - [OPTIONAL] replace with your anthropic key
os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
messages = [
{"role": "user", "content": "How do you say 'Hello' in German? Return your answer as a JSON object, like this:\n\n{ \"Hello\": \"Hallo\" }"},
{"role": "assistant", "content": "{"},
]
response = completion(model="claude-2.1", messages=messages)
print(response)
```
### Example prompt sent to Claude
```
Human: How do you say 'Hello' in German? Return your answer as a JSON object, like this:
{ "Hello": "Hallo" }
Assistant: {
```
## Usage - "System" messages
If you're using Anthropic's Claude 2.1 with Bedrock, `system` role messages are properly formatted for you.
```python
import os
from litellm import completion
# set env - [OPTIONAL] replace with your anthropic key
os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
messages = [
{"role": "system", "content": "You are a snarky assistant."},
{"role": "user", "content": "How do I boil water?"},
]
response = completion(model="claude-2.1", messages=messages)
```
### Example prompt sent to Claude
```
You are a snarky assistant.
Human: How do I boil water?
Assistant:
```
## Streaming
Just set `stream=True` when calling completion. Just set `stream=True` when calling completion.
```python ```python
import os import os
from litellm import completion from litellm import completion
# set env # set env
os.environ["ANTHROPIC_API_KEY"] = "your-api-key" os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
messages = [{"role": "user", "content": "Hey! how's it going?"}] messages = [{"role": "user", "content": "Hey! how's it going?"}]
response = completion(model="claude-instant-1", messages=messages, stream=True) response = completion(model="claude-instant-1", messages=messages, stream=True)

View file

@ -21,7 +21,7 @@ os.environ["AWS_REGION_NAME"] = "" # us-east-1, us-east-2, us-west-1, us-west-2
</a> </a>
```python ```python
import os import os
from litellm import completion from litellm import completion
os.environ["AWS_ACCESS_KEY_ID"] = "" os.environ["AWS_ACCESS_KEY_ID"] = ""
@ -29,14 +29,77 @@ os.environ["AWS_SECRET_ACCESS_KEY"] = ""
os.environ["AWS_REGION_NAME"] = "" os.environ["AWS_REGION_NAME"] = ""
response = completion( response = completion(
model="anthropic.claude-instant-v1", model="anthropic.claude-instant-v1",
messages=[{ "content": "Hello, how are you?","role": "user"}] messages=[{ "content": "Hello, how are you?","role": "user"}]
) )
``` ```
## Usage - "Assistant Pre-fill"
If you're using Anthropic's Claude with Bedrock, you can "put words in Claude's mouth" by including an `assistant` role message as the last item in the `messages` array.
> [!IMPORTANT]
> The returned completion will _**not**_ include your "pre-fill" text, since it is part of the prompt itself. Make sure to prefix Claude's completion with your pre-fill.
```python
import os
from litellm import completion
os.environ["AWS_ACCESS_KEY_ID"] = ""
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
os.environ["AWS_REGION_NAME"] = ""
messages = [
{"role": "user", "content": "How do you say 'Hello' in German? Return your answer as a JSON object, like this:\n\n{ \"Hello\": \"Hallo\" }"},
{"role": "assistant", "content": "{"},
]
response = completion(model="anthropic.claude-v2", messages=messages)
```
### Example prompt sent to Claude
```
Human: How do you say 'Hello' in German? Return your answer as a JSON object, like this:
{ "Hello": "Hallo" }
Assistant: {
```
## Usage - "System" messages
If you're using Anthropic's Claude 2.1 with Bedrock, `system` role messages are properly formatted for you.
```python
import os
from litellm import completion
os.environ["AWS_ACCESS_KEY_ID"] = ""
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
os.environ["AWS_REGION_NAME"] = ""
messages = [
{"role": "system", "content": "You are a snarky assistant."},
{"role": "user", "content": "How do I boil water?"},
]
response = completion(model="anthropic.claude-v2:1", messages=messages)
```
### Example prompt sent to Claude
```
You are a snarky assistant.
Human: How do I boil water?
Assistant:
```
## Usage - Streaming ## Usage - Streaming
```python ```python
import os import os
from litellm import completion from litellm import completion
os.environ["AWS_ACCESS_KEY_ID"] = "" os.environ["AWS_ACCESS_KEY_ID"] = ""
@ -44,7 +107,7 @@ os.environ["AWS_SECRET_ACCESS_KEY"] = ""
os.environ["AWS_REGION_NAME"] = "" os.environ["AWS_REGION_NAME"] = ""
response = completion( response = completion(
model="anthropic.claude-instant-v1", model="anthropic.claude-instant-v1",
messages=[{ "content": "Hello, how are you?","role": "user"}], messages=[{ "content": "Hello, how are you?","role": "user"}],
stream=True stream=True
) )
@ -79,11 +142,11 @@ for chunk in response:
### Passing credentials as parameters - Completion() ### Passing credentials as parameters - Completion()
Pass AWS credentials as parameters to litellm.completion Pass AWS credentials as parameters to litellm.completion
```python ```python
import os import os
from litellm import completion from litellm import completion
response = completion( response = completion(
model="anthropic.claude-instant-v1", model="anthropic.claude-instant-v1",
messages=[{ "content": "Hello, how are you?","role": "user"}], messages=[{ "content": "Hello, how are you?","role": "user"}],
aws_access_key_id="", aws_access_key_id="",
aws_secret_access_key="", aws_secret_access_key="",
@ -133,10 +196,11 @@ response = completion(
``` ```
## Supported AWS Bedrock Models ## Supported AWS Bedrock Models
Here's an example of using a bedrock model with LiteLLM Here's an example of using a bedrock model with LiteLLM
| Model Name | Command | | Model Name | Command |
|--------------------------|------------------------------------------------------------------| |--------------------------|------------------------------------------------------------------|
| Anthropic Claude-V2.1 | `completion(model='anthropic.claude-v2:1', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |
| Anthropic Claude-V2 | `completion(model='anthropic.claude-v2', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` | | Anthropic Claude-V2 | `completion(model='anthropic.claude-v2', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |
| Anthropic Claude-Instant V1 | `completion(model='anthropic.claude-instant-v1', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` | | Anthropic Claude-Instant V1 | `completion(model='anthropic.claude-instant-v1', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |
| Anthropic Claude-V1 | `completion(model='anthropic.claude-v1', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` | | Anthropic Claude-V1 | `completion(model='anthropic.claude-v1', messages=messages)` | `os.environ['ANTHROPIC_ACCESS_KEY_ID']`, `os.environ['ANTHROPIC_SECRET_ACCESS_KEY']` |

View file

@ -295,6 +295,9 @@ def claude_2_1_pt(
if system message is passed in, you can only do system, human, assistant or system, human if system message is passed in, you can only do system, human, assistant or system, human
if a system message is passed in and followed by an assistant message, insert a blank human message between them. if a system message is passed in and followed by an assistant message, insert a blank human message between them.
Additionally, you can "put words in Claude's mouth" by ending with an assistant message.
See: https://docs.anthropic.com/claude/docs/put-words-in-claudes-mouth
""" """
class AnthropicConstants(Enum): class AnthropicConstants(Enum):
@ -311,7 +314,8 @@ def claude_2_1_pt(
if idx > 0 and messages[idx - 1]["role"] == "system": if idx > 0 and messages[idx - 1]["role"] == "system":
prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}" # Insert a blank human message prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}" # Insert a blank human message
prompt += f"{AnthropicConstants.AI_PROMPT.value}{message['content']}" prompt += f"{AnthropicConstants.AI_PROMPT.value}{message['content']}"
prompt += f"{AnthropicConstants.AI_PROMPT.value}" # prompt must end with \"\n\nAssistant: " turn if messages[-1]["role"] != "assistant":
prompt += f"{AnthropicConstants.AI_PROMPT.value}" # prompt must end with \"\n\nAssistant: " turn
return prompt return prompt
@ -364,6 +368,10 @@ def format_prompt_togetherai(messages, prompt_format, chat_template):
def anthropic_pt( def anthropic_pt(
messages: list, messages: list,
): # format - https://docs.anthropic.com/claude/reference/complete_post ): # format - https://docs.anthropic.com/claude/reference/complete_post
"""
You can "put words in Claude's mouth" by ending with an assistant message.
See: https://docs.anthropic.com/claude/docs/put-words-in-claudes-mouth
"""
class AnthropicConstants(Enum): class AnthropicConstants(Enum):
HUMAN_PROMPT = "\n\nHuman: " HUMAN_PROMPT = "\n\nHuman: "
AI_PROMPT = "\n\nAssistant: " AI_PROMPT = "\n\nAssistant: "
@ -382,7 +390,8 @@ def anthropic_pt(
idx == 0 and message["role"] == "assistant" idx == 0 and message["role"] == "assistant"
): # ensure the prompt always starts with `\n\nHuman: ` ): # ensure the prompt always starts with `\n\nHuman: `
prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}" + prompt prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}" + prompt
prompt += f"{AnthropicConstants.AI_PROMPT.value}" if messages[-1]["role"] != "assistant":
prompt += f"{AnthropicConstants.AI_PROMPT.value}"
return prompt return prompt
@ -580,7 +589,7 @@ def prompt_factory(
if custom_llm_provider == "ollama": if custom_llm_provider == "ollama":
return ollama_pt(model=model, messages=messages) return ollama_pt(model=model, messages=messages)
elif custom_llm_provider == "anthropic": elif custom_llm_provider == "anthropic":
if "claude-2.1" in model: if any(_ in model for _ in ["claude-2.1","claude-v2:1"]):
return claude_2_1_pt(messages=messages) return claude_2_1_pt(messages=messages)
else: else:
return anthropic_pt(messages=messages) return anthropic_pt(messages=messages)

View file

@ -66,6 +66,43 @@ def test_completion_bedrock_claude_completion_auth():
test_completion_bedrock_claude_completion_auth() test_completion_bedrock_claude_completion_auth()
def test_completion_bedrock_claude_2_1_completion_auth():
print("calling bedrock claude 2.1 completion params auth")
import os
aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"]
aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
aws_region_name = os.environ["AWS_REGION_NAME"]
os.environ.pop("AWS_ACCESS_KEY_ID", None)
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
os.environ.pop("AWS_REGION_NAME", None)
try:
response = completion(
model="bedrock/anthropic.claude-v2:1",
messages=messages,
max_tokens=10,
temperature=0.1,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
aws_region_name=aws_region_name,
)
# Add any assertions here to check the response
print(response)
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
os.environ["AWS_REGION_NAME"] = aws_region_name
except RateLimitError:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
test_completion_bedrock_claude_2_1_completion_auth()
def test_completion_bedrock_claude_external_client_auth(): def test_completion_bedrock_claude_external_client_auth():
print("\ncalling bedrock claude external client auth") print("\ncalling bedrock claude external client auth")
import os import os

View file

@ -2,27 +2,96 @@
# This tests if prompts are being correctly formatted # This tests if prompts are being correctly formatted
import sys import sys
import os import os
import io
sys.path.insert(0, os.path.abspath("../..")) sys.path.insert(0, os.path.abspath("../.."))
# from litellm.llms.prompt_templates.factory import prompt_factory # from litellm.llms.prompt_templates.factory import prompt_factory
from litellm import completion from litellm import completion
from litellm.llms.prompt_templates.factory import (
anthropic_pt,
claude_2_1_pt,
llama_2_chat_pt,
)
def codellama_prompt_format(): def test_codellama_prompt_format():
model = "huggingface/codellama/CodeLlama-7b-Instruct-hf"
messages = [ messages = [
{"role": "system", "content": "You are a good bot"}, {"role": "system", "content": "You are a good bot"},
{"role": "user", "content": "Hey, how's it going?"}, {"role": "user", "content": "Hey, how's it going?"},
] ]
expected_response = """[INST] <<SYS>> expected_prompt = "<s>[INST] <<SYS>>\nYou are a good bot\n<</SYS>>\n [/INST]\n[INST] Hey, how's it going? [/INST]\n"
You are a good bot assert llama_2_chat_pt(messages) == expected_prompt
<</SYS>>
[/INST]
[INST] Hey, how's it going? [/INST]""" def test_claude_2_1_pt_formatting():
response = completion(model=model, messages=messages) # Test case: User only, should add Assistant
print(response) messages = [{"role": "user", "content": "Hello"}]
expected_prompt = "\n\nHuman: Hello\n\nAssistant: "
assert claude_2_1_pt(messages) == expected_prompt
# Test case: System, User, and Assistant "pre-fill" sequence,
# Should return pre-fill
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": 'Please return "Hello World" as a JSON object.'},
{"role": "assistant", "content": "{"},
]
expected_prompt = 'You are a helpful assistant.\n\nHuman: Please return "Hello World" as a JSON object.\n\nAssistant: {'
assert claude_2_1_pt(messages) == expected_prompt
# Test case: System, Assistant sequence, should insert blank Human message
# before Assistant pre-fill
messages = [
{"role": "system", "content": "You are a storyteller."},
{"role": "assistant", "content": "Once upon a time, there "},
]
expected_prompt = (
"You are a storyteller.\n\nHuman: \n\nAssistant: Once upon a time, there "
)
assert claude_2_1_pt(messages) == expected_prompt
# Test case: System, User sequence
messages = [
{"role": "system", "content": "System reboot"},
{"role": "user", "content": "Is everything okay?"},
]
expected_prompt = "System reboot\n\nHuman: Is everything okay?\n\nAssistant: "
assert claude_2_1_pt(messages) == expected_prompt
def test_anthropic_pt_formatting():
# Test case: User only, should add Assistant
messages = [{"role": "user", "content": "Hello"}]
expected_prompt = "\n\nHuman: Hello\n\nAssistant: "
assert anthropic_pt(messages) == expected_prompt
# Test case: System, User, and Assistant "pre-fill" sequence,
# Should return pre-fill
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": 'Please return "Hello World" as a JSON object.'},
{"role": "assistant", "content": "{"},
]
expected_prompt = '\n\nHuman: <admin>You are a helpful assistant.</admin>\n\nHuman: Please return "Hello World" as a JSON object.\n\nAssistant: {'
assert anthropic_pt(messages) == expected_prompt
# Test case: System, Assistant sequence, should NOT insert blank Human message
# before Assistant pre-fill, because "System" messages are Human
# messages wrapped with <admin></admin>
messages = [
{"role": "system", "content": "You are a storyteller."},
{"role": "assistant", "content": "Once upon a time, there "},
]
expected_prompt = "\n\nHuman: <admin>You are a storyteller.</admin>\n\nAssistant: Once upon a time, there "
assert anthropic_pt(messages) == expected_prompt
# Test case: System, User sequence
messages = [
{"role": "system", "content": "System reboot"},
{"role": "user", "content": "Is everything okay?"},
]
expected_prompt = "\n\nHuman: <admin>System reboot</admin>\n\nHuman: Is everything okay?\n\nAssistant: "
assert anthropic_pt(messages) == expected_prompt
# codellama_prompt_format() # codellama_prompt_format()