mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
Merge pull request #2315 from BerriAI/litellm_add_claude_3
[FEAT]- add claude 3
This commit is contained in:
commit
14fc8355fb
14 changed files with 179 additions and 68 deletions
|
@ -1,9 +1,9 @@
|
||||||
# Anthropic
|
# Anthropic
|
||||||
LiteLLM supports
|
LiteLLM supports
|
||||||
|
|
||||||
|
- `claude-3` (`claude-3-opus-20240229`, `claude-3-sonnet-20240229`)
|
||||||
- `claude-2`
|
- `claude-2`
|
||||||
- `claude-2.1`
|
- `claude-2.1`
|
||||||
- `claude-instant-1`
|
|
||||||
- `claude-instant-1.2`
|
- `claude-instant-1.2`
|
||||||
|
|
||||||
## API Keys
|
## API Keys
|
||||||
|
@ -24,11 +24,42 @@ from litellm import completion
|
||||||
os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
|
os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
|
||||||
|
|
||||||
messages = [{"role": "user", "content": "Hey! how's it going?"}]
|
messages = [{"role": "user", "content": "Hey! how's it going?"}]
|
||||||
response = completion(model="claude-instant-1", messages=messages)
|
response = completion(model="claude-3-opus-20240229", messages=messages)
|
||||||
print(response)
|
print(response)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Usage - "Assistant Pre-fill"
|
|
||||||
|
## Usage - Streaming
|
||||||
|
Just set `stream=True` when calling completion.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
# set env
|
||||||
|
os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
|
||||||
|
|
||||||
|
messages = [{"role": "user", "content": "Hey! how's it going?"}]
|
||||||
|
response = completion(model="claude-3-opus-20240229", messages=messages, stream=True)
|
||||||
|
for chunk in response:
|
||||||
|
print(chunk["choices"][0]["delta"]["content"]) # same as openai format
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Supported Models
|
||||||
|
|
||||||
|
| Model Name | Function Call |
|
||||||
|
|------------------|--------------------------------------------|
|
||||||
|
| claude-3-opus | `completion('claude-3-opus-20240229', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
||||||
|
| claude-3-sonnet | `completion('claude-3-sonnet-20240229', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
||||||
|
| claude-2.1 | `completion('claude-2.1', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
||||||
|
| claude-2 | `completion('claude-2', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
||||||
|
| claude-instant-1.2 | `completion('claude-instant-1.2', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
||||||
|
|
||||||
|
## Advanced
|
||||||
|
|
||||||
|
### Usage - "Assistant Pre-fill"
|
||||||
|
|
||||||
You can "put words in Claude's mouth" by including an `assistant` role message as the last item in the `messages` array.
|
You can "put words in Claude's mouth" by including an `assistant` role message as the last item in the `messages` array.
|
||||||
|
|
||||||
|
@ -50,7 +81,7 @@ response = completion(model="claude-2.1", messages=messages)
|
||||||
print(response)
|
print(response)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Example prompt sent to Claude
|
#### Example prompt sent to Claude
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -61,7 +92,7 @@ Human: How do you say 'Hello' in German? Return your answer as a JSON object, li
|
||||||
Assistant: {
|
Assistant: {
|
||||||
```
|
```
|
||||||
|
|
||||||
## Usage - "System" messages
|
### Usage - "System" messages
|
||||||
If you're using Anthropic's Claude 2.1 with Bedrock, `system` role messages are properly formatted for you.
|
If you're using Anthropic's Claude 2.1 with Bedrock, `system` role messages are properly formatted for you.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
@ -78,7 +109,7 @@ messages = [
|
||||||
response = completion(model="claude-2.1", messages=messages)
|
response = completion(model="claude-2.1", messages=messages)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Example prompt sent to Claude
|
#### Example prompt sent to Claude
|
||||||
|
|
||||||
```
|
```
|
||||||
You are a snarky assistant.
|
You are a snarky assistant.
|
||||||
|
@ -88,28 +119,3 @@ Human: How do I boil water?
|
||||||
Assistant:
|
Assistant:
|
||||||
```
|
```
|
||||||
|
|
||||||
## Streaming
|
|
||||||
Just set `stream=True` when calling completion.
|
|
||||||
|
|
||||||
```python
|
|
||||||
import os
|
|
||||||
from litellm import completion
|
|
||||||
|
|
||||||
# set env
|
|
||||||
os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
|
|
||||||
|
|
||||||
messages = [{"role": "user", "content": "Hey! how's it going?"}]
|
|
||||||
response = completion(model="claude-instant-1", messages=messages, stream=True)
|
|
||||||
for chunk in response:
|
|
||||||
print(chunk["choices"][0]["delta"]["content"]) # same as openai format
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
### Model Details
|
|
||||||
|
|
||||||
| Model Name | Function Call | Required OS Variables |
|
|
||||||
|------------------|--------------------------------------------|--------------------------------------|
|
|
||||||
| claude-2.1 | `completion('claude-2.1', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
|
||||||
| claude-2 | `completion('claude-2', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
|
||||||
| claude-instant-1 | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
|
||||||
| claude-instant-1.2 | `completion('claude-instant-1.2', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ class AnthropicError(Exception):
|
||||||
self.status_code = status_code
|
self.status_code = status_code
|
||||||
self.message = message
|
self.message = message
|
||||||
self.request = httpx.Request(
|
self.request = httpx.Request(
|
||||||
method="POST", url="https://api.anthropic.com/v1/complete"
|
method="POST", url="https://api.anthropic.com/v1/messages"
|
||||||
)
|
)
|
||||||
self.response = httpx.Response(status_code=status_code, request=self.request)
|
self.response = httpx.Response(status_code=status_code, request=self.request)
|
||||||
super().__init__(
|
super().__init__(
|
||||||
|
@ -35,9 +35,7 @@ class AnthropicConfig:
|
||||||
to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
|
to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
max_tokens_to_sample: Optional[
|
max_tokens: Optional[int] = litellm.max_tokens # anthropic requires a default
|
||||||
int
|
|
||||||
] = litellm.max_tokens # anthropic requires a default
|
|
||||||
stop_sequences: Optional[list] = None
|
stop_sequences: Optional[list] = None
|
||||||
temperature: Optional[int] = None
|
temperature: Optional[int] = None
|
||||||
top_p: Optional[int] = None
|
top_p: Optional[int] = None
|
||||||
|
@ -46,7 +44,7 @@ class AnthropicConfig:
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
max_tokens_to_sample: Optional[int] = 256, # anthropic requires a default
|
max_tokens: Optional[int] = 256, # anthropic requires a default
|
||||||
stop_sequences: Optional[list] = None,
|
stop_sequences: Optional[list] = None,
|
||||||
temperature: Optional[int] = None,
|
temperature: Optional[int] = None,
|
||||||
top_p: Optional[int] = None,
|
top_p: Optional[int] = None,
|
||||||
|
@ -123,6 +121,35 @@ def completion(
|
||||||
prompt = prompt_factory(
|
prompt = prompt_factory(
|
||||||
model=model, messages=messages, custom_llm_provider="anthropic"
|
model=model, messages=messages, custom_llm_provider="anthropic"
|
||||||
)
|
)
|
||||||
|
"""
|
||||||
|
format messages for anthropic
|
||||||
|
1. Anthropic supports roles like "user" and "assistant", (here litellm translates system-> assistant)
|
||||||
|
2. The first message always needs to be of role "user"
|
||||||
|
3. Each message must alternate between "user" and "assistant" (this is not addressed as now by litellm)
|
||||||
|
4. final assistant content cannot end with trailing whitespace (anthropic raises an error otherwise)
|
||||||
|
"""
|
||||||
|
# 1. Anthropic only supports roles like "user" and "assistant"
|
||||||
|
for idx, message in enumerate(messages):
|
||||||
|
if message["role"] == "system":
|
||||||
|
message["role"] = "assistant"
|
||||||
|
|
||||||
|
# if this is the final assistant message, remove trailing whitespace
|
||||||
|
# TODO: only do this if it's the final assistant message
|
||||||
|
if message["role"] == "assistant":
|
||||||
|
message["content"] = message["content"].strip()
|
||||||
|
|
||||||
|
# 2. The first message always needs to be of role "user"
|
||||||
|
if len(messages) > 0:
|
||||||
|
if messages[0]["role"] != "user":
|
||||||
|
# find the index of the first user message
|
||||||
|
for i, message in enumerate(messages):
|
||||||
|
if message["role"] == "user":
|
||||||
|
break
|
||||||
|
|
||||||
|
# remove the user message at existing position and add it to the front
|
||||||
|
messages.pop(i)
|
||||||
|
# move the first user message to the front
|
||||||
|
messages = [message] + messages
|
||||||
|
|
||||||
## Load Config
|
## Load Config
|
||||||
config = litellm.AnthropicConfig.get_config()
|
config = litellm.AnthropicConfig.get_config()
|
||||||
|
@ -134,7 +161,7 @@ def completion(
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
"model": model,
|
"model": model,
|
||||||
"prompt": prompt,
|
"messages": messages,
|
||||||
**optional_params,
|
**optional_params,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -173,7 +200,7 @@ def completion(
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging_obj.post_call(
|
logging_obj.post_call(
|
||||||
input=prompt,
|
input=messages,
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
original_response=response.text,
|
original_response=response.text,
|
||||||
additional_args={"complete_input_dict": data},
|
additional_args={"complete_input_dict": data},
|
||||||
|
@ -191,20 +218,20 @@ def completion(
|
||||||
message=str(completion_response["error"]),
|
message=str(completion_response["error"]),
|
||||||
status_code=response.status_code,
|
status_code=response.status_code,
|
||||||
)
|
)
|
||||||
|
elif len(completion_response["content"]) == 0:
|
||||||
|
raise AnthropicError(
|
||||||
|
message="No content in response",
|
||||||
|
status_code=response.status_code,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
if len(completion_response["completion"]) > 0:
|
text_content = completion_response["content"][0].get("text", None)
|
||||||
model_response["choices"][0]["message"][
|
model_response.choices[0].message.content = text_content # type: ignore
|
||||||
"content"
|
|
||||||
] = completion_response["completion"]
|
|
||||||
model_response.choices[0].finish_reason = completion_response["stop_reason"]
|
model_response.choices[0].finish_reason = completion_response["stop_reason"]
|
||||||
|
|
||||||
## CALCULATING USAGE
|
## CALCULATING USAGE
|
||||||
prompt_tokens = len(
|
prompt_tokens = completion_response["usage"]["input_tokens"]
|
||||||
encoding.encode(prompt)
|
completion_tokens = completion_response["usage"]["output_tokens"]
|
||||||
) ##[TODO] use the anthropic tokenizer here
|
total_tokens = prompt_tokens + completion_tokens
|
||||||
completion_tokens = len(
|
|
||||||
encoding.encode(model_response["choices"][0]["message"].get("content", ""))
|
|
||||||
) ##[TODO] use the anthropic tokenizer here
|
|
||||||
|
|
||||||
model_response["created"] = int(time.time())
|
model_response["created"] = int(time.time())
|
||||||
model_response["model"] = model
|
model_response["model"] = model
|
||||||
|
|
|
@ -1023,7 +1023,7 @@ def completion(
|
||||||
api_base
|
api_base
|
||||||
or litellm.api_base
|
or litellm.api_base
|
||||||
or get_secret("ANTHROPIC_API_BASE")
|
or get_secret("ANTHROPIC_API_BASE")
|
||||||
or "https://api.anthropic.com/v1/complete"
|
or "https://api.anthropic.com/v1/messages"
|
||||||
)
|
)
|
||||||
custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
|
custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
|
||||||
response = anthropic.completion(
|
response = anthropic.completion(
|
||||||
|
|
|
@ -643,6 +643,22 @@
|
||||||
"litellm_provider": "anthropic",
|
"litellm_provider": "anthropic",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
|
"claude-3-opus-20240229": {
|
||||||
|
"max_tokens": 200000,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000015,
|
||||||
|
"output_cost_per_token": 0.000075,
|
||||||
|
"litellm_provider": "anthropic",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"claude-3-sonnet-20240229": {
|
||||||
|
"max_tokens": 200000,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000015,
|
||||||
|
"litellm_provider": "anthropic",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"text-bison": {
|
"text-bison": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"input_cost_per_token": 0.000000125,
|
"input_cost_per_token": 0.000000125,
|
||||||
|
|
|
@ -47,8 +47,9 @@ test_function_call_non_openai_model()
|
||||||
## case 2: add_function_to_prompt set
|
## case 2: add_function_to_prompt set
|
||||||
def test_function_call_non_openai_model_litellm_mod_set():
|
def test_function_call_non_openai_model_litellm_mod_set():
|
||||||
litellm.add_function_to_prompt = True
|
litellm.add_function_to_prompt = True
|
||||||
|
litellm.set_verbose = True
|
||||||
try:
|
try:
|
||||||
model = "claude-instant-1"
|
model = "claude-instant-1.2"
|
||||||
messages = [{"role": "user", "content": "what's the weather in sf?"}]
|
messages = [{"role": "user", "content": "what's the weather in sf?"}]
|
||||||
functions = [
|
functions = [
|
||||||
{
|
{
|
||||||
|
|
|
@ -56,7 +56,7 @@ def test_completion_custom_provider_model_name():
|
||||||
def test_completion_claude():
|
def test_completion_claude():
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
litellm.cache = None
|
litellm.cache = None
|
||||||
litellm.AnthropicConfig(max_tokens_to_sample=200, metadata={"user_id": "1224"})
|
litellm.AnthropicConfig(max_tokens=200, metadata={"user_id": "1224"})
|
||||||
messages = [
|
messages = [
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
|
@ -67,7 +67,7 @@ def test_completion_claude():
|
||||||
try:
|
try:
|
||||||
# test without max tokens
|
# test without max tokens
|
||||||
response = completion(
|
response = completion(
|
||||||
model="claude-instant-1",
|
model="claude-instant-1.2",
|
||||||
messages=messages,
|
messages=messages,
|
||||||
request_timeout=10,
|
request_timeout=10,
|
||||||
)
|
)
|
||||||
|
@ -84,6 +84,40 @@ def test_completion_claude():
|
||||||
# test_completion_claude()
|
# test_completion_claude()
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_claude_3():
|
||||||
|
litellm.set_verbose = True
|
||||||
|
messages = [{"role": "user", "content": "Hello, world"}]
|
||||||
|
try:
|
||||||
|
# test without max tokens
|
||||||
|
response = completion(
|
||||||
|
model="anthropic/claude-3-opus-20240229",
|
||||||
|
messages=messages,
|
||||||
|
)
|
||||||
|
# Add any assertions, here to check response args
|
||||||
|
print(response)
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_claude_3_stream():
|
||||||
|
litellm.set_verbose = False
|
||||||
|
messages = [{"role": "user", "content": "Hello, world"}]
|
||||||
|
try:
|
||||||
|
# test without max tokens
|
||||||
|
response = completion(
|
||||||
|
model="anthropic/claude-3-opus-20240229",
|
||||||
|
messages=messages,
|
||||||
|
max_tokens=10,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
# Add any assertions, here to check response args
|
||||||
|
print(response)
|
||||||
|
for chunk in response:
|
||||||
|
print(chunk)
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_completion_mistral_api():
|
def test_completion_mistral_api():
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
@ -163,19 +197,17 @@ def test_completion_mistral_api_modified_input():
|
||||||
|
|
||||||
def test_completion_claude2_1():
|
def test_completion_claude2_1():
|
||||||
try:
|
try:
|
||||||
|
litellm.set_verbose = True
|
||||||
print("claude2.1 test request")
|
print("claude2.1 test request")
|
||||||
messages = [
|
messages = [
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
"content": "Your goal is generate a joke on the topic user gives",
|
"content": "Your goal is generate a joke on the topic user gives.",
|
||||||
},
|
},
|
||||||
{"role": "assistant", "content": "Hi, how can i assist you today?"},
|
|
||||||
{"role": "user", "content": "Generate a 3 liner joke for me"},
|
{"role": "user", "content": "Generate a 3 liner joke for me"},
|
||||||
]
|
]
|
||||||
# test without max tokens
|
# test without max tokens
|
||||||
response = completion(
|
response = completion(model="claude-2.1", messages=messages)
|
||||||
model="claude-2.1", messages=messages, request_timeout=10, max_tokens=10
|
|
||||||
)
|
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(response)
|
print(response)
|
||||||
print(response.usage)
|
print(response.usage)
|
||||||
|
|
|
@ -70,7 +70,7 @@ models = ["command-nightly"]
|
||||||
@pytest.mark.parametrize("model", models)
|
@pytest.mark.parametrize("model", models)
|
||||||
def test_context_window_with_fallbacks(model):
|
def test_context_window_with_fallbacks(model):
|
||||||
ctx_window_fallback_dict = {
|
ctx_window_fallback_dict = {
|
||||||
"command-nightly": "claude-2",
|
"command-nightly": "claude-2.1",
|
||||||
"gpt-3.5-turbo-instruct": "gpt-3.5-turbo-16k",
|
"gpt-3.5-turbo-instruct": "gpt-3.5-turbo-16k",
|
||||||
"azure/chatgpt-v-2": "gpt-3.5-turbo-16k",
|
"azure/chatgpt-v-2": "gpt-3.5-turbo-16k",
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,7 +53,7 @@ def claude_test_completion():
|
||||||
try:
|
try:
|
||||||
# OVERRIDE WITH DYNAMIC MAX TOKENS
|
# OVERRIDE WITH DYNAMIC MAX TOKENS
|
||||||
response_1 = litellm.completion(
|
response_1 = litellm.completion(
|
||||||
model="claude-instant-1",
|
model="claude-instant-1.2",
|
||||||
messages=[{"content": "Hello, how are you?", "role": "user"}],
|
messages=[{"content": "Hello, how are you?", "role": "user"}],
|
||||||
max_tokens=10,
|
max_tokens=10,
|
||||||
)
|
)
|
||||||
|
@ -63,7 +63,7 @@ def claude_test_completion():
|
||||||
|
|
||||||
# USE CONFIG TOKENS
|
# USE CONFIG TOKENS
|
||||||
response_2 = litellm.completion(
|
response_2 = litellm.completion(
|
||||||
model="claude-instant-1",
|
model="claude-instant-1.2",
|
||||||
messages=[{"content": "Hello, how are you?", "role": "user"}],
|
messages=[{"content": "Hello, how are you?", "role": "user"}],
|
||||||
)
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
|
@ -74,7 +74,7 @@ def claude_test_completion():
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response_3 = litellm.completion(
|
response_3 = litellm.completion(
|
||||||
model="claude-instant-1",
|
model="claude-instant-1.2",
|
||||||
messages=[{"content": "Hello, how are you?", "role": "user"}],
|
messages=[{"content": "Hello, how are you?", "role": "user"}],
|
||||||
n=2,
|
n=2,
|
||||||
)
|
)
|
||||||
|
|
|
@ -933,7 +933,7 @@ def test_router_anthropic_key_dynamic():
|
||||||
{
|
{
|
||||||
"model_name": "anthropic-claude",
|
"model_name": "anthropic-claude",
|
||||||
"litellm_params": {
|
"litellm_params": {
|
||||||
"model": "claude-instant-1",
|
"model": "claude-instant-1.2",
|
||||||
"api_key": anthropic_api_key,
|
"api_key": anthropic_api_key,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,7 +35,7 @@ def test_router_timeouts():
|
||||||
{
|
{
|
||||||
"model_name": "anthropic-claude-instant-1.2",
|
"model_name": "anthropic-claude-instant-1.2",
|
||||||
"litellm_params": {
|
"litellm_params": {
|
||||||
"model": "claude-instant-1",
|
"model": "claude-instant-1.2",
|
||||||
"api_key": "os.environ/ANTHROPIC_API_KEY",
|
"api_key": "os.environ/ANTHROPIC_API_KEY",
|
||||||
},
|
},
|
||||||
"tpm": 20000,
|
"tpm": 20000,
|
||||||
|
|
|
@ -348,7 +348,7 @@ def test_completion_claude_stream():
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
response = completion(
|
response = completion(
|
||||||
model="claude-instant-1", messages=messages, stream=True, max_tokens=50
|
model="claude-instant-1.2", messages=messages, stream=True, max_tokens=50
|
||||||
)
|
)
|
||||||
complete_response = ""
|
complete_response = ""
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
|
|
|
@ -2836,6 +2836,8 @@ def test_completion_hf_prompt_array():
|
||||||
print(str(e))
|
print(str(e))
|
||||||
if "is currently loading" in str(e):
|
if "is currently loading" in str(e):
|
||||||
return
|
return
|
||||||
|
if "Service Unavailable" in str(e):
|
||||||
|
return
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -4200,7 +4200,7 @@ def get_optional_params(
|
||||||
if top_p is not None:
|
if top_p is not None:
|
||||||
optional_params["top_p"] = top_p
|
optional_params["top_p"] = top_p
|
||||||
if max_tokens is not None:
|
if max_tokens is not None:
|
||||||
optional_params["max_tokens_to_sample"] = max_tokens
|
optional_params["max_tokens"] = max_tokens
|
||||||
elif custom_llm_provider == "cohere":
|
elif custom_llm_provider == "cohere":
|
||||||
## check if unsupported param passed in
|
## check if unsupported param passed in
|
||||||
supported_params = [
|
supported_params = [
|
||||||
|
@ -8032,10 +8032,21 @@ class CustomStreamWrapper:
|
||||||
finish_reason = None
|
finish_reason = None
|
||||||
if str_line.startswith("data:"):
|
if str_line.startswith("data:"):
|
||||||
data_json = json.loads(str_line[5:])
|
data_json = json.loads(str_line[5:])
|
||||||
text = data_json.get("completion", "")
|
type_chunk = data_json.get("type", None)
|
||||||
if data_json.get("stop_reason", None):
|
if type_chunk == "content_block_delta":
|
||||||
|
"""
|
||||||
|
Anthropic content chunk
|
||||||
|
chunk = {'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': 'Hello'}}
|
||||||
|
"""
|
||||||
|
text = data_json.get("delta", {}).get("text", "")
|
||||||
|
elif type_chunk == "message_delta":
|
||||||
|
"""
|
||||||
|
Anthropic
|
||||||
|
chunk = {'type': 'message_delta', 'delta': {'stop_reason': 'max_tokens', 'stop_sequence': None}, 'usage': {'output_tokens': 10}}
|
||||||
|
"""
|
||||||
|
# TODO - get usage from this chunk, set in response
|
||||||
|
finish_reason = data_json.get("delta", {}).get("stop_reason", None)
|
||||||
is_finished = True
|
is_finished = True
|
||||||
finish_reason = data_json["stop_reason"]
|
|
||||||
return {
|
return {
|
||||||
"text": text,
|
"text": text,
|
||||||
"is_finished": is_finished,
|
"is_finished": is_finished,
|
||||||
|
|
|
@ -643,6 +643,22 @@
|
||||||
"litellm_provider": "anthropic",
|
"litellm_provider": "anthropic",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
|
"claude-3-opus-20240229": {
|
||||||
|
"max_tokens": 200000,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000015,
|
||||||
|
"output_cost_per_token": 0.000075,
|
||||||
|
"litellm_provider": "anthropic",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"claude-3-sonnet-20240229": {
|
||||||
|
"max_tokens": 200000,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000015,
|
||||||
|
"litellm_provider": "anthropic",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"text-bison": {
|
"text-bison": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"input_cost_per_token": 0.000000125,
|
"input_cost_per_token": 0.000000125,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue