diff --git a/docs/my-website/docs/providers/anthropic.md b/docs/my-website/docs/providers/anthropic.md index cfa14cd325..198a6a03dc 100644 --- a/docs/my-website/docs/providers/anthropic.md +++ b/docs/my-website/docs/providers/anthropic.md @@ -1,9 +1,9 @@ # Anthropic LiteLLM supports +- `claude-3` (`claude-3-opus-20240229`, `claude-3-sonnet-20240229`) - `claude-2` - `claude-2.1` -- `claude-instant-1` - `claude-instant-1.2` ## API Keys @@ -24,11 +24,42 @@ from litellm import completion os.environ["ANTHROPIC_API_KEY"] = "your-api-key" messages = [{"role": "user", "content": "Hey! how's it going?"}] -response = completion(model="claude-instant-1", messages=messages) +response = completion(model="claude-3-opus-20240229", messages=messages) print(response) ``` -## Usage - "Assistant Pre-fill" + +## Usage - Streaming +Just set `stream=True` when calling completion. + +```python +import os +from litellm import completion + +# set env +os.environ["ANTHROPIC_API_KEY"] = "your-api-key" + +messages = [{"role": "user", "content": "Hey! how's it going?"}] +response = completion(model="claude-3-opus-20240229", messages=messages, stream=True) +for chunk in response: + print(chunk["choices"][0]["delta"]["content"]) # same as openai format +``` + + + +## Supported Models + +| Model Name | Function Call | +|------------------|--------------------------------------------| +| claude-3-opus | `completion('claude-3-opus-20240229', messages)` | `os.environ['ANTHROPIC_API_KEY']` | +| claude-3-sonnet | `completion('claude-3-sonnet-20240229', messages)` | `os.environ['ANTHROPIC_API_KEY']` | +| claude-2.1 | `completion('claude-2.1', messages)` | `os.environ['ANTHROPIC_API_KEY']` | +| claude-2 | `completion('claude-2', messages)` | `os.environ['ANTHROPIC_API_KEY']` | +| claude-instant-1.2 | `completion('claude-instant-1.2', messages)` | `os.environ['ANTHROPIC_API_KEY']` | + +## Advanced + +### Usage - "Assistant Pre-fill" You can "put words in Claude's mouth" by including an `assistant` role message as the last item in the `messages` array. @@ -50,7 +81,7 @@ response = completion(model="claude-2.1", messages=messages) print(response) ``` -### Example prompt sent to Claude +#### Example prompt sent to Claude ``` @@ -61,7 +92,7 @@ Human: How do you say 'Hello' in German? Return your answer as a JSON object, li Assistant: { ``` -## Usage - "System" messages +### Usage - "System" messages If you're using Anthropic's Claude 2.1 with Bedrock, `system` role messages are properly formatted for you. ```python @@ -78,7 +109,7 @@ messages = [ response = completion(model="claude-2.1", messages=messages) ``` -### Example prompt sent to Claude +#### Example prompt sent to Claude ``` You are a snarky assistant. @@ -88,28 +119,3 @@ Human: How do I boil water? Assistant: ``` -## Streaming -Just set `stream=True` when calling completion. - -```python -import os -from litellm import completion - -# set env -os.environ["ANTHROPIC_API_KEY"] = "your-api-key" - -messages = [{"role": "user", "content": "Hey! how's it going?"}] -response = completion(model="claude-instant-1", messages=messages, stream=True) -for chunk in response: - print(chunk["choices"][0]["delta"]["content"]) # same as openai format -``` - - -### Model Details - -| Model Name | Function Call | Required OS Variables | -|------------------|--------------------------------------------|--------------------------------------| -| claude-2.1 | `completion('claude-2.1', messages)` | `os.environ['ANTHROPIC_API_KEY']` | -| claude-2 | `completion('claude-2', messages)` | `os.environ['ANTHROPIC_API_KEY']` | -| claude-instant-1 | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']` | -| claude-instant-1.2 | `completion('claude-instant-1.2', messages)` | `os.environ['ANTHROPIC_API_KEY']` | diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py index 150ae0e076..44a1b128a9 100644 --- a/litellm/llms/anthropic.py +++ b/litellm/llms/anthropic.py @@ -20,7 +20,7 @@ class AnthropicError(Exception): self.status_code = status_code self.message = message self.request = httpx.Request( - method="POST", url="https://api.anthropic.com/v1/complete" + method="POST", url="https://api.anthropic.com/v1/messages" ) self.response = httpx.Response(status_code=status_code, request=self.request) super().__init__( @@ -35,9 +35,7 @@ class AnthropicConfig: to pass metadata to anthropic, it's {"user_id": "any-relevant-information"} """ - max_tokens_to_sample: Optional[ - int - ] = litellm.max_tokens # anthropic requires a default + max_tokens: Optional[int] = litellm.max_tokens # anthropic requires a default stop_sequences: Optional[list] = None temperature: Optional[int] = None top_p: Optional[int] = None @@ -46,7 +44,7 @@ class AnthropicConfig: def __init__( self, - max_tokens_to_sample: Optional[int] = 256, # anthropic requires a default + max_tokens: Optional[int] = 256, # anthropic requires a default stop_sequences: Optional[list] = None, temperature: Optional[int] = None, top_p: Optional[int] = None, @@ -123,6 +121,35 @@ def completion( prompt = prompt_factory( model=model, messages=messages, custom_llm_provider="anthropic" ) + """ + format messages for anthropic + 1. Anthropic supports roles like "user" and "assistant", (here litellm translates system-> assistant) + 2. The first message always needs to be of role "user" + 3. Each message must alternate between "user" and "assistant" (this is not addressed as now by litellm) + 4. final assistant content cannot end with trailing whitespace (anthropic raises an error otherwise) + """ + # 1. Anthropic only supports roles like "user" and "assistant" + for idx, message in enumerate(messages): + if message["role"] == "system": + message["role"] = "assistant" + + # if this is the final assistant message, remove trailing whitespace + # TODO: only do this if it's the final assistant message + if message["role"] == "assistant": + message["content"] = message["content"].strip() + + # 2. The first message always needs to be of role "user" + if len(messages) > 0: + if messages[0]["role"] != "user": + # find the index of the first user message + for i, message in enumerate(messages): + if message["role"] == "user": + break + + # remove the user message at existing position and add it to the front + messages.pop(i) + # move the first user message to the front + messages = [message] + messages ## Load Config config = litellm.AnthropicConfig.get_config() @@ -134,7 +161,7 @@ def completion( data = { "model": model, - "prompt": prompt, + "messages": messages, **optional_params, } @@ -173,7 +200,7 @@ def completion( ## LOGGING logging_obj.post_call( - input=prompt, + input=messages, api_key=api_key, original_response=response.text, additional_args={"complete_input_dict": data}, @@ -191,20 +218,20 @@ def completion( message=str(completion_response["error"]), status_code=response.status_code, ) + elif len(completion_response["content"]) == 0: + raise AnthropicError( + message="No content in response", + status_code=response.status_code, + ) else: - if len(completion_response["completion"]) > 0: - model_response["choices"][0]["message"][ - "content" - ] = completion_response["completion"] + text_content = completion_response["content"][0].get("text", None) + model_response.choices[0].message.content = text_content # type: ignore model_response.choices[0].finish_reason = completion_response["stop_reason"] ## CALCULATING USAGE - prompt_tokens = len( - encoding.encode(prompt) - ) ##[TODO] use the anthropic tokenizer here - completion_tokens = len( - encoding.encode(model_response["choices"][0]["message"].get("content", "")) - ) ##[TODO] use the anthropic tokenizer here + prompt_tokens = completion_response["usage"]["input_tokens"] + completion_tokens = completion_response["usage"]["output_tokens"] + total_tokens = prompt_tokens + completion_tokens model_response["created"] = int(time.time()) model_response["model"] = model diff --git a/litellm/main.py b/litellm/main.py index 67586603d9..9fbd1b8283 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -1023,7 +1023,7 @@ def completion( api_base or litellm.api_base or get_secret("ANTHROPIC_API_BASE") - or "https://api.anthropic.com/v1/complete" + or "https://api.anthropic.com/v1/messages" ) custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict response = anthropic.completion( diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index d97c042c5a..b29e152294 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -643,6 +643,22 @@ "litellm_provider": "anthropic", "mode": "chat" }, + "claude-3-opus-20240229": { + "max_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "litellm_provider": "anthropic", + "mode": "chat" + }, + "claude-3-sonnet-20240229": { + "max_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "anthropic", + "mode": "chat" + }, "text-bison": { "max_tokens": 8192, "input_cost_per_token": 0.000000125, diff --git a/litellm/tests/test_add_function_to_prompt.py b/litellm/tests/test_add_function_to_prompt.py index 932e6edd14..93b09cd8c8 100644 --- a/litellm/tests/test_add_function_to_prompt.py +++ b/litellm/tests/test_add_function_to_prompt.py @@ -47,8 +47,9 @@ test_function_call_non_openai_model() ## case 2: add_function_to_prompt set def test_function_call_non_openai_model_litellm_mod_set(): litellm.add_function_to_prompt = True + litellm.set_verbose = True try: - model = "claude-instant-1" + model = "claude-instant-1.2" messages = [{"role": "user", "content": "what's the weather in sf?"}] functions = [ { diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index f6be6e9528..a9d41be8d1 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -56,7 +56,7 @@ def test_completion_custom_provider_model_name(): def test_completion_claude(): litellm.set_verbose = True litellm.cache = None - litellm.AnthropicConfig(max_tokens_to_sample=200, metadata={"user_id": "1224"}) + litellm.AnthropicConfig(max_tokens=200, metadata={"user_id": "1224"}) messages = [ { "role": "system", @@ -67,7 +67,7 @@ def test_completion_claude(): try: # test without max tokens response = completion( - model="claude-instant-1", + model="claude-instant-1.2", messages=messages, request_timeout=10, ) @@ -84,6 +84,40 @@ def test_completion_claude(): # test_completion_claude() +def test_completion_claude_3(): + litellm.set_verbose = True + messages = [{"role": "user", "content": "Hello, world"}] + try: + # test without max tokens + response = completion( + model="anthropic/claude-3-opus-20240229", + messages=messages, + ) + # Add any assertions, here to check response args + print(response) + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + +def test_completion_claude_3_stream(): + litellm.set_verbose = False + messages = [{"role": "user", "content": "Hello, world"}] + try: + # test without max tokens + response = completion( + model="anthropic/claude-3-opus-20240229", + messages=messages, + max_tokens=10, + stream=True, + ) + # Add any assertions, here to check response args + print(response) + for chunk in response: + print(chunk) + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + def test_completion_mistral_api(): try: litellm.set_verbose = True @@ -163,19 +197,17 @@ def test_completion_mistral_api_modified_input(): def test_completion_claude2_1(): try: + litellm.set_verbose = True print("claude2.1 test request") messages = [ { "role": "system", - "content": "Your goal is generate a joke on the topic user gives", + "content": "Your goal is generate a joke on the topic user gives.", }, - {"role": "assistant", "content": "Hi, how can i assist you today?"}, {"role": "user", "content": "Generate a 3 liner joke for me"}, ] # test without max tokens - response = completion( - model="claude-2.1", messages=messages, request_timeout=10, max_tokens=10 - ) + response = completion(model="claude-2.1", messages=messages) # Add any assertions here to check the response print(response) print(response.usage) diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py index 9c90014c0d..311bbfa571 100644 --- a/litellm/tests/test_exceptions.py +++ b/litellm/tests/test_exceptions.py @@ -70,7 +70,7 @@ models = ["command-nightly"] @pytest.mark.parametrize("model", models) def test_context_window_with_fallbacks(model): ctx_window_fallback_dict = { - "command-nightly": "claude-2", + "command-nightly": "claude-2.1", "gpt-3.5-turbo-instruct": "gpt-3.5-turbo-16k", "azure/chatgpt-v-2": "gpt-3.5-turbo-16k", } diff --git a/litellm/tests/test_provider_specific_config.py b/litellm/tests/test_provider_specific_config.py index 5e5d19c786..dcb4dcb4c7 100644 --- a/litellm/tests/test_provider_specific_config.py +++ b/litellm/tests/test_provider_specific_config.py @@ -53,7 +53,7 @@ def claude_test_completion(): try: # OVERRIDE WITH DYNAMIC MAX TOKENS response_1 = litellm.completion( - model="claude-instant-1", + model="claude-instant-1.2", messages=[{"content": "Hello, how are you?", "role": "user"}], max_tokens=10, ) @@ -63,7 +63,7 @@ def claude_test_completion(): # USE CONFIG TOKENS response_2 = litellm.completion( - model="claude-instant-1", + model="claude-instant-1.2", messages=[{"content": "Hello, how are you?", "role": "user"}], ) # Add any assertions here to check the response @@ -74,7 +74,7 @@ def claude_test_completion(): try: response_3 = litellm.completion( - model="claude-instant-1", + model="claude-instant-1.2", messages=[{"content": "Hello, how are you?", "role": "user"}], n=2, ) diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py index 127caf223b..7c182ee686 100644 --- a/litellm/tests/test_router.py +++ b/litellm/tests/test_router.py @@ -933,7 +933,7 @@ def test_router_anthropic_key_dynamic(): { "model_name": "anthropic-claude", "litellm_params": { - "model": "claude-instant-1", + "model": "claude-instant-1.2", "api_key": anthropic_api_key, }, } diff --git a/litellm/tests/test_router_timeout.py b/litellm/tests/test_router_timeout.py index 139914f6df..3816c649e9 100644 --- a/litellm/tests/test_router_timeout.py +++ b/litellm/tests/test_router_timeout.py @@ -35,7 +35,7 @@ def test_router_timeouts(): { "model_name": "anthropic-claude-instant-1.2", "litellm_params": { - "model": "claude-instant-1", + "model": "claude-instant-1.2", "api_key": "os.environ/ANTHROPIC_API_KEY", }, "tpm": 20000, diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index 083953f574..5767a944b2 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -348,7 +348,7 @@ def test_completion_claude_stream(): }, ] response = completion( - model="claude-instant-1", messages=messages, stream=True, max_tokens=50 + model="claude-instant-1.2", messages=messages, stream=True, max_tokens=50 ) complete_response = "" # Add any assertions here to check the response diff --git a/litellm/tests/test_text_completion.py b/litellm/tests/test_text_completion.py index 0b98111839..a16b864494 100644 --- a/litellm/tests/test_text_completion.py +++ b/litellm/tests/test_text_completion.py @@ -2836,6 +2836,8 @@ def test_completion_hf_prompt_array(): print(str(e)) if "is currently loading" in str(e): return + if "Service Unavailable" in str(e): + return pytest.fail(f"Error occurred: {e}") diff --git a/litellm/utils.py b/litellm/utils.py index b590d9c688..53e6e82450 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4200,7 +4200,7 @@ def get_optional_params( if top_p is not None: optional_params["top_p"] = top_p if max_tokens is not None: - optional_params["max_tokens_to_sample"] = max_tokens + optional_params["max_tokens"] = max_tokens elif custom_llm_provider == "cohere": ## check if unsupported param passed in supported_params = [ @@ -8032,10 +8032,21 @@ class CustomStreamWrapper: finish_reason = None if str_line.startswith("data:"): data_json = json.loads(str_line[5:]) - text = data_json.get("completion", "") - if data_json.get("stop_reason", None): + type_chunk = data_json.get("type", None) + if type_chunk == "content_block_delta": + """ + Anthropic content chunk + chunk = {'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': 'Hello'}} + """ + text = data_json.get("delta", {}).get("text", "") + elif type_chunk == "message_delta": + """ + Anthropic + chunk = {'type': 'message_delta', 'delta': {'stop_reason': 'max_tokens', 'stop_sequence': None}, 'usage': {'output_tokens': 10}} + """ + # TODO - get usage from this chunk, set in response + finish_reason = data_json.get("delta", {}).get("stop_reason", None) is_finished = True - finish_reason = data_json["stop_reason"] return { "text": text, "is_finished": is_finished, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 1c79f7d2a6..23afaf04d6 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -643,6 +643,22 @@ "litellm_provider": "anthropic", "mode": "chat" }, + "claude-3-opus-20240229": { + "max_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "litellm_provider": "anthropic", + "mode": "chat" + }, + "claude-3-sonnet-20240229": { + "max_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "anthropic", + "mode": "chat" + }, "text-bison": { "max_tokens": 8192, "input_cost_per_token": 0.000000125,