Merge pull request #2315 from BerriAI/litellm_add_claude_3

[FEAT]- add claude 3
This commit is contained in:
Ishaan Jaff 2024-03-04 09:23:13 -08:00 committed by GitHub
commit 14fc8355fb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 179 additions and 68 deletions

View file

@ -1,9 +1,9 @@
# Anthropic
LiteLLM supports
- `claude-3` (`claude-3-opus-20240229`, `claude-3-sonnet-20240229`)
- `claude-2`
- `claude-2.1`
- `claude-instant-1`
- `claude-instant-1.2`
## API Keys
@ -24,11 +24,42 @@ from litellm import completion
os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
messages = [{"role": "user", "content": "Hey! how's it going?"}]
response = completion(model="claude-instant-1", messages=messages)
response = completion(model="claude-3-opus-20240229", messages=messages)
print(response)
```
## Usage - "Assistant Pre-fill"
## Usage - Streaming
Just set `stream=True` when calling completion.
```python
import os
from litellm import completion
# set env
os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
messages = [{"role": "user", "content": "Hey! how's it going?"}]
response = completion(model="claude-3-opus-20240229", messages=messages, stream=True)
for chunk in response:
print(chunk["choices"][0]["delta"]["content"]) # same as openai format
```
## Supported Models
| Model Name | Function Call |
|------------------|--------------------------------------------|
| claude-3-opus | `completion('claude-3-opus-20240229', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
| claude-3-sonnet | `completion('claude-3-sonnet-20240229', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
| claude-2.1 | `completion('claude-2.1', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
| claude-2 | `completion('claude-2', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
| claude-instant-1.2 | `completion('claude-instant-1.2', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
## Advanced
### Usage - "Assistant Pre-fill"
You can "put words in Claude's mouth" by including an `assistant` role message as the last item in the `messages` array.
@ -50,7 +81,7 @@ response = completion(model="claude-2.1", messages=messages)
print(response)
```
### Example prompt sent to Claude
#### Example prompt sent to Claude
```
@ -61,7 +92,7 @@ Human: How do you say 'Hello' in German? Return your answer as a JSON object, li
Assistant: {
```
## Usage - "System" messages
### Usage - "System" messages
If you're using Anthropic's Claude 2.1 with Bedrock, `system` role messages are properly formatted for you.
```python
@ -78,7 +109,7 @@ messages = [
response = completion(model="claude-2.1", messages=messages)
```
### Example prompt sent to Claude
#### Example prompt sent to Claude
```
You are a snarky assistant.
@ -88,28 +119,3 @@ Human: How do I boil water?
Assistant:
```
## Streaming
Just set `stream=True` when calling completion.
```python
import os
from litellm import completion
# set env
os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
messages = [{"role": "user", "content": "Hey! how's it going?"}]
response = completion(model="claude-instant-1", messages=messages, stream=True)
for chunk in response:
print(chunk["choices"][0]["delta"]["content"]) # same as openai format
```
### Model Details
| Model Name | Function Call | Required OS Variables |
|------------------|--------------------------------------------|--------------------------------------|
| claude-2.1 | `completion('claude-2.1', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
| claude-2 | `completion('claude-2', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
| claude-instant-1 | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
| claude-instant-1.2 | `completion('claude-instant-1.2', messages)` | `os.environ['ANTHROPIC_API_KEY']` |

View file

@ -20,7 +20,7 @@ class AnthropicError(Exception):
self.status_code = status_code
self.message = message
self.request = httpx.Request(
method="POST", url="https://api.anthropic.com/v1/complete"
method="POST", url="https://api.anthropic.com/v1/messages"
)
self.response = httpx.Response(status_code=status_code, request=self.request)
super().__init__(
@ -35,9 +35,7 @@ class AnthropicConfig:
to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
"""
max_tokens_to_sample: Optional[
int
] = litellm.max_tokens # anthropic requires a default
max_tokens: Optional[int] = litellm.max_tokens # anthropic requires a default
stop_sequences: Optional[list] = None
temperature: Optional[int] = None
top_p: Optional[int] = None
@ -46,7 +44,7 @@ class AnthropicConfig:
def __init__(
self,
max_tokens_to_sample: Optional[int] = 256, # anthropic requires a default
max_tokens: Optional[int] = 256, # anthropic requires a default
stop_sequences: Optional[list] = None,
temperature: Optional[int] = None,
top_p: Optional[int] = None,
@ -123,6 +121,35 @@ def completion(
prompt = prompt_factory(
model=model, messages=messages, custom_llm_provider="anthropic"
)
"""
format messages for anthropic
1. Anthropic supports roles like "user" and "assistant", (here litellm translates system-> assistant)
2. The first message always needs to be of role "user"
3. Each message must alternate between "user" and "assistant" (this is not addressed as now by litellm)
4. final assistant content cannot end with trailing whitespace (anthropic raises an error otherwise)
"""
# 1. Anthropic only supports roles like "user" and "assistant"
for idx, message in enumerate(messages):
if message["role"] == "system":
message["role"] = "assistant"
# if this is the final assistant message, remove trailing whitespace
# TODO: only do this if it's the final assistant message
if message["role"] == "assistant":
message["content"] = message["content"].strip()
# 2. The first message always needs to be of role "user"
if len(messages) > 0:
if messages[0]["role"] != "user":
# find the index of the first user message
for i, message in enumerate(messages):
if message["role"] == "user":
break
# remove the user message at existing position and add it to the front
messages.pop(i)
# move the first user message to the front
messages = [message] + messages
## Load Config
config = litellm.AnthropicConfig.get_config()
@ -134,7 +161,7 @@ def completion(
data = {
"model": model,
"prompt": prompt,
"messages": messages,
**optional_params,
}
@ -173,7 +200,7 @@ def completion(
## LOGGING
logging_obj.post_call(
input=prompt,
input=messages,
api_key=api_key,
original_response=response.text,
additional_args={"complete_input_dict": data},
@ -191,20 +218,20 @@ def completion(
message=str(completion_response["error"]),
status_code=response.status_code,
)
elif len(completion_response["content"]) == 0:
raise AnthropicError(
message="No content in response",
status_code=response.status_code,
)
else:
if len(completion_response["completion"]) > 0:
model_response["choices"][0]["message"][
"content"
] = completion_response["completion"]
text_content = completion_response["content"][0].get("text", None)
model_response.choices[0].message.content = text_content # type: ignore
model_response.choices[0].finish_reason = completion_response["stop_reason"]
## CALCULATING USAGE
prompt_tokens = len(
encoding.encode(prompt)
) ##[TODO] use the anthropic tokenizer here
completion_tokens = len(
encoding.encode(model_response["choices"][0]["message"].get("content", ""))
) ##[TODO] use the anthropic tokenizer here
prompt_tokens = completion_response["usage"]["input_tokens"]
completion_tokens = completion_response["usage"]["output_tokens"]
total_tokens = prompt_tokens + completion_tokens
model_response["created"] = int(time.time())
model_response["model"] = model

View file

@ -1023,7 +1023,7 @@ def completion(
api_base
or litellm.api_base
or get_secret("ANTHROPIC_API_BASE")
or "https://api.anthropic.com/v1/complete"
or "https://api.anthropic.com/v1/messages"
)
custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
response = anthropic.completion(

View file

@ -643,6 +643,22 @@
"litellm_provider": "anthropic",
"mode": "chat"
},
"claude-3-opus-20240229": {
"max_tokens": 200000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000075,
"litellm_provider": "anthropic",
"mode": "chat"
},
"claude-3-sonnet-20240229": {
"max_tokens": 200000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000015,
"litellm_provider": "anthropic",
"mode": "chat"
},
"text-bison": {
"max_tokens": 8192,
"input_cost_per_token": 0.000000125,

View file

@ -47,8 +47,9 @@ test_function_call_non_openai_model()
## case 2: add_function_to_prompt set
def test_function_call_non_openai_model_litellm_mod_set():
litellm.add_function_to_prompt = True
litellm.set_verbose = True
try:
model = "claude-instant-1"
model = "claude-instant-1.2"
messages = [{"role": "user", "content": "what's the weather in sf?"}]
functions = [
{

View file

@ -56,7 +56,7 @@ def test_completion_custom_provider_model_name():
def test_completion_claude():
litellm.set_verbose = True
litellm.cache = None
litellm.AnthropicConfig(max_tokens_to_sample=200, metadata={"user_id": "1224"})
litellm.AnthropicConfig(max_tokens=200, metadata={"user_id": "1224"})
messages = [
{
"role": "system",
@ -67,7 +67,7 @@ def test_completion_claude():
try:
# test without max tokens
response = completion(
model="claude-instant-1",
model="claude-instant-1.2",
messages=messages,
request_timeout=10,
)
@ -84,6 +84,40 @@ def test_completion_claude():
# test_completion_claude()
def test_completion_claude_3():
litellm.set_verbose = True
messages = [{"role": "user", "content": "Hello, world"}]
try:
# test without max tokens
response = completion(
model="anthropic/claude-3-opus-20240229",
messages=messages,
)
# Add any assertions, here to check response args
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_claude_3_stream():
litellm.set_verbose = False
messages = [{"role": "user", "content": "Hello, world"}]
try:
# test without max tokens
response = completion(
model="anthropic/claude-3-opus-20240229",
messages=messages,
max_tokens=10,
stream=True,
)
# Add any assertions, here to check response args
print(response)
for chunk in response:
print(chunk)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_mistral_api():
try:
litellm.set_verbose = True
@ -163,19 +197,17 @@ def test_completion_mistral_api_modified_input():
def test_completion_claude2_1():
try:
litellm.set_verbose = True
print("claude2.1 test request")
messages = [
{
"role": "system",
"content": "Your goal is generate a joke on the topic user gives",
"content": "Your goal is generate a joke on the topic user gives.",
},
{"role": "assistant", "content": "Hi, how can i assist you today?"},
{"role": "user", "content": "Generate a 3 liner joke for me"},
]
# test without max tokens
response = completion(
model="claude-2.1", messages=messages, request_timeout=10, max_tokens=10
)
response = completion(model="claude-2.1", messages=messages)
# Add any assertions here to check the response
print(response)
print(response.usage)

View file

@ -70,7 +70,7 @@ models = ["command-nightly"]
@pytest.mark.parametrize("model", models)
def test_context_window_with_fallbacks(model):
ctx_window_fallback_dict = {
"command-nightly": "claude-2",
"command-nightly": "claude-2.1",
"gpt-3.5-turbo-instruct": "gpt-3.5-turbo-16k",
"azure/chatgpt-v-2": "gpt-3.5-turbo-16k",
}

View file

@ -53,7 +53,7 @@ def claude_test_completion():
try:
# OVERRIDE WITH DYNAMIC MAX TOKENS
response_1 = litellm.completion(
model="claude-instant-1",
model="claude-instant-1.2",
messages=[{"content": "Hello, how are you?", "role": "user"}],
max_tokens=10,
)
@ -63,7 +63,7 @@ def claude_test_completion():
# USE CONFIG TOKENS
response_2 = litellm.completion(
model="claude-instant-1",
model="claude-instant-1.2",
messages=[{"content": "Hello, how are you?", "role": "user"}],
)
# Add any assertions here to check the response
@ -74,7 +74,7 @@ def claude_test_completion():
try:
response_3 = litellm.completion(
model="claude-instant-1",
model="claude-instant-1.2",
messages=[{"content": "Hello, how are you?", "role": "user"}],
n=2,
)

View file

@ -933,7 +933,7 @@ def test_router_anthropic_key_dynamic():
{
"model_name": "anthropic-claude",
"litellm_params": {
"model": "claude-instant-1",
"model": "claude-instant-1.2",
"api_key": anthropic_api_key,
},
}

View file

@ -35,7 +35,7 @@ def test_router_timeouts():
{
"model_name": "anthropic-claude-instant-1.2",
"litellm_params": {
"model": "claude-instant-1",
"model": "claude-instant-1.2",
"api_key": "os.environ/ANTHROPIC_API_KEY",
},
"tpm": 20000,

View file

@ -348,7 +348,7 @@ def test_completion_claude_stream():
},
]
response = completion(
model="claude-instant-1", messages=messages, stream=True, max_tokens=50
model="claude-instant-1.2", messages=messages, stream=True, max_tokens=50
)
complete_response = ""
# Add any assertions here to check the response

View file

@ -2836,6 +2836,8 @@ def test_completion_hf_prompt_array():
print(str(e))
if "is currently loading" in str(e):
return
if "Service Unavailable" in str(e):
return
pytest.fail(f"Error occurred: {e}")

View file

@ -4200,7 +4200,7 @@ def get_optional_params(
if top_p is not None:
optional_params["top_p"] = top_p
if max_tokens is not None:
optional_params["max_tokens_to_sample"] = max_tokens
optional_params["max_tokens"] = max_tokens
elif custom_llm_provider == "cohere":
## check if unsupported param passed in
supported_params = [
@ -8032,10 +8032,21 @@ class CustomStreamWrapper:
finish_reason = None
if str_line.startswith("data:"):
data_json = json.loads(str_line[5:])
text = data_json.get("completion", "")
if data_json.get("stop_reason", None):
type_chunk = data_json.get("type", None)
if type_chunk == "content_block_delta":
"""
Anthropic content chunk
chunk = {'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': 'Hello'}}
"""
text = data_json.get("delta", {}).get("text", "")
elif type_chunk == "message_delta":
"""
Anthropic
chunk = {'type': 'message_delta', 'delta': {'stop_reason': 'max_tokens', 'stop_sequence': None}, 'usage': {'output_tokens': 10}}
"""
# TODO - get usage from this chunk, set in response
finish_reason = data_json.get("delta", {}).get("stop_reason", None)
is_finished = True
finish_reason = data_json["stop_reason"]
return {
"text": text,
"is_finished": is_finished,

View file

@ -643,6 +643,22 @@
"litellm_provider": "anthropic",
"mode": "chat"
},
"claude-3-opus-20240229": {
"max_tokens": 200000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000075,
"litellm_provider": "anthropic",
"mode": "chat"
},
"claude-3-sonnet-20240229": {
"max_tokens": 200000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000015,
"litellm_provider": "anthropic",
"mode": "chat"
},
"text-bison": {
"max_tokens": 8192,
"input_cost_per_token": 0.000000125,