From 19eb9063fb09226be3178b2b47be3ab4e22822f3 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 4 Mar 2024 07:13:08 -0800 Subject: [PATCH 01/10] (feat) - add claude 3 --- litellm/llms/anthropic.py | 31 ++++++++++++++----------------- litellm/main.py | 2 +- litellm/tests/test_completion.py | 16 ++++++++++++++++ litellm/utils.py | 2 +- 4 files changed, 32 insertions(+), 19 deletions(-) diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py index 150ae0e076..829a8becd1 100644 --- a/litellm/llms/anthropic.py +++ b/litellm/llms/anthropic.py @@ -20,7 +20,7 @@ class AnthropicError(Exception): self.status_code = status_code self.message = message self.request = httpx.Request( - method="POST", url="https://api.anthropic.com/v1/complete" + method="POST", url="https://api.anthropic.com/v1/messages" ) self.response = httpx.Response(status_code=status_code, request=self.request) super().__init__( @@ -35,9 +35,7 @@ class AnthropicConfig: to pass metadata to anthropic, it's {"user_id": "any-relevant-information"} """ - max_tokens_to_sample: Optional[ - int - ] = litellm.max_tokens # anthropic requires a default + max_tokens: Optional[int] = litellm.max_tokens # anthropic requires a default stop_sequences: Optional[list] = None temperature: Optional[int] = None top_p: Optional[int] = None @@ -46,7 +44,7 @@ class AnthropicConfig: def __init__( self, - max_tokens_to_sample: Optional[int] = 256, # anthropic requires a default + max_tokens: Optional[int] = 256, # anthropic requires a default stop_sequences: Optional[list] = None, temperature: Optional[int] = None, top_p: Optional[int] = None, @@ -124,6 +122,10 @@ def completion( model=model, messages=messages, custom_llm_provider="anthropic" ) + for message in messages: + if message["role"] == "system": + message["role"] = "assistant" + ## Load Config config = litellm.AnthropicConfig.get_config() for k, v in config.items(): @@ -134,7 +136,7 @@ def completion( data = { "model": model, - "prompt": prompt, + "messages": messages, **optional_params, } @@ -173,7 +175,7 @@ def completion( ## LOGGING logging_obj.post_call( - input=prompt, + input=messages, api_key=api_key, original_response=response.text, additional_args={"complete_input_dict": data}, @@ -192,19 +194,14 @@ def completion( status_code=response.status_code, ) else: - if len(completion_response["completion"]) > 0: - model_response["choices"][0]["message"][ - "content" - ] = completion_response["completion"] + text_content = completion_response["content"][0].get("text", None) + model_response.choices[0].message.content = text_content # type: ignore model_response.choices[0].finish_reason = completion_response["stop_reason"] ## CALCULATING USAGE - prompt_tokens = len( - encoding.encode(prompt) - ) ##[TODO] use the anthropic tokenizer here - completion_tokens = len( - encoding.encode(model_response["choices"][0]["message"].get("content", "")) - ) ##[TODO] use the anthropic tokenizer here + prompt_tokens = completion_response["usage"]["input_tokens"] + completion_tokens = completion_response["usage"]["output_tokens"] + total_tokens = prompt_tokens + completion_tokens model_response["created"] = int(time.time()) model_response["model"] = model diff --git a/litellm/main.py b/litellm/main.py index 67586603d9..9fbd1b8283 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -1023,7 +1023,7 @@ def completion( api_base or litellm.api_base or get_secret("ANTHROPIC_API_BASE") - or "https://api.anthropic.com/v1/complete" + or "https://api.anthropic.com/v1/messages" ) custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict response = anthropic.completion( diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index f6be6e9528..200b0ae58c 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -84,6 +84,22 @@ def test_completion_claude(): # test_completion_claude() +def test_completion_claude_3(): + litellm.set_verbose = True + messages = [{"role": "user", "content": "Hello, world"}] + try: + # test without max tokens + response = completion( + model="anthropic/claude-3-opus-20240229", + messages=messages, + max_tokens=10, + ) + # Add any assertions, here to check response args + print(response) + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + def test_completion_mistral_api(): try: litellm.set_verbose = True diff --git a/litellm/utils.py b/litellm/utils.py index b3e197c1a1..173f5e79e5 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4200,7 +4200,7 @@ def get_optional_params( if top_p is not None: optional_params["top_p"] = top_p if max_tokens is not None: - optional_params["max_tokens_to_sample"] = max_tokens + optional_params["max_tokens"] = max_tokens elif custom_llm_provider == "cohere": ## check if unsupported param passed in supported_params = [ From fdd8199a255f3eae8f647e2e253d795ae6c86b06 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 4 Mar 2024 07:29:23 -0800 Subject: [PATCH 02/10] (feat) streaming claude-3 --- litellm/tests/test_completion.py | 19 +++++++++++++++++++ litellm/utils.py | 17 ++++++++++++++--- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 200b0ae58c..163a66af37 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -100,6 +100,25 @@ def test_completion_claude_3(): pytest.fail(f"Error occurred: {e}") +def test_completion_claude_3_stream(): + litellm.set_verbose = False + messages = [{"role": "user", "content": "Hello, world"}] + try: + # test without max tokens + response = completion( + model="anthropic/claude-3-opus-20240229", + messages=messages, + max_tokens=10, + stream=True, + ) + # Add any assertions, here to check response args + print(response) + for chunk in response: + print(chunk) + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + def test_completion_mistral_api(): try: litellm.set_verbose = True diff --git a/litellm/utils.py b/litellm/utils.py index 173f5e79e5..c315ca4a44 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -8017,10 +8017,21 @@ class CustomStreamWrapper: finish_reason = None if str_line.startswith("data:"): data_json = json.loads(str_line[5:]) - text = data_json.get("completion", "") - if data_json.get("stop_reason", None): + type_chunk = data_json.get("type", None) + if type_chunk == "content_block_delta": + """ + Anthropic content chunk + chunk = {'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': 'Hello'}} + """ + text = data_json.get("delta", {}).get("text", "") + elif type_chunk == "message_delta": + """ + Anthropic + chunk = {'type': 'message_delta', 'delta': {'stop_reason': 'max_tokens', 'stop_sequence': None}, 'usage': {'output_tokens': 10}} + """ + # TODO - get usage from this chunk, set in response + finish_reason = data_json.get("delta", {}).get("stop_reason", None) is_finished = True - finish_reason = data_json["stop_reason"] return { "text": text, "is_finished": is_finished, From d5c750dcddf5013461813e535a48f51a891c2865 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 4 Mar 2024 07:35:21 -0800 Subject: [PATCH 03/10] (feat) add claude-3 to model_prices --- .../model_prices_and_context_window_backup.json | 16 ++++++++++++++++ model_prices_and_context_window.json | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index d97c042c5a..b29e152294 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -643,6 +643,22 @@ "litellm_provider": "anthropic", "mode": "chat" }, + "claude-3-opus-20240229": { + "max_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "litellm_provider": "anthropic", + "mode": "chat" + }, + "claude-3-sonnet-20240229": { + "max_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "anthropic", + "mode": "chat" + }, "text-bison": { "max_tokens": 8192, "input_cost_per_token": 0.000000125, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index d97c042c5a..b29e152294 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -643,6 +643,22 @@ "litellm_provider": "anthropic", "mode": "chat" }, + "claude-3-opus-20240229": { + "max_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "litellm_provider": "anthropic", + "mode": "chat" + }, + "claude-3-sonnet-20240229": { + "max_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "anthropic", + "mode": "chat" + }, "text-bison": { "max_tokens": 8192, "input_cost_per_token": 0.000000125, From 25bbb73ce317436e8153a80d00484a356829069e Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 4 Mar 2024 07:40:18 -0800 Subject: [PATCH 04/10] (docs) add claude-3 --- docs/my-website/docs/providers/anthropic.md | 68 ++++++++++++--------- 1 file changed, 38 insertions(+), 30 deletions(-) diff --git a/docs/my-website/docs/providers/anthropic.md b/docs/my-website/docs/providers/anthropic.md index cfa14cd325..617476b972 100644 --- a/docs/my-website/docs/providers/anthropic.md +++ b/docs/my-website/docs/providers/anthropic.md @@ -1,6 +1,7 @@ # Anthropic LiteLLM supports +- `claude-3` (`claude-3-opus-20240229`, `claude-3-sonnet-20240229`) - `claude-2` - `claude-2.1` - `claude-instant-1` @@ -24,11 +25,43 @@ from litellm import completion os.environ["ANTHROPIC_API_KEY"] = "your-api-key" messages = [{"role": "user", "content": "Hey! how's it going?"}] -response = completion(model="claude-instant-1", messages=messages) +response = completion(model="claude-3-opus-20240229", messages=messages) print(response) ``` -## Usage - "Assistant Pre-fill" + +## Usage - Streaming +Just set `stream=True` when calling completion. + +```python +import os +from litellm import completion + +# set env +os.environ["ANTHROPIC_API_KEY"] = "your-api-key" + +messages = [{"role": "user", "content": "Hey! how's it going?"}] +response = completion(model="claude-3-opus-20240229", messages=messages, stream=True) +for chunk in response: + print(chunk["choices"][0]["delta"]["content"]) # same as openai format +``` + + + +## Supported Models + +| Model Name | Function Call | +|------------------|--------------------------------------------| +| claude-3-opus | `completion('claude-3-opus-20240229', messages)` | `os.environ['ANTHROPIC_API_KEY']` | +| claude-3-sonnet | `completion('claude-3-sonnet-20240229', messages)` | `os.environ['ANTHROPIC_API_KEY']` | +| claude-2.1 | `completion('claude-2.1', messages)` | `os.environ['ANTHROPIC_API_KEY']` | +| claude-2 | `completion('claude-2', messages)` | `os.environ['ANTHROPIC_API_KEY']` | +| claude-instant-1 | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']` | +| claude-instant-1.2 | `completion('claude-instant-1.2', messages)` | `os.environ['ANTHROPIC_API_KEY']` | + +## Advanced + +### Usage - "Assistant Pre-fill" You can "put words in Claude's mouth" by including an `assistant` role message as the last item in the `messages` array. @@ -50,7 +83,7 @@ response = completion(model="claude-2.1", messages=messages) print(response) ``` -### Example prompt sent to Claude +#### Example prompt sent to Claude ``` @@ -61,7 +94,7 @@ Human: How do you say 'Hello' in German? Return your answer as a JSON object, li Assistant: { ``` -## Usage - "System" messages +### Usage - "System" messages If you're using Anthropic's Claude 2.1 with Bedrock, `system` role messages are properly formatted for you. ```python @@ -78,7 +111,7 @@ messages = [ response = completion(model="claude-2.1", messages=messages) ``` -### Example prompt sent to Claude +#### Example prompt sent to Claude ``` You are a snarky assistant. @@ -88,28 +121,3 @@ Human: How do I boil water? Assistant: ``` -## Streaming -Just set `stream=True` when calling completion. - -```python -import os -from litellm import completion - -# set env -os.environ["ANTHROPIC_API_KEY"] = "your-api-key" - -messages = [{"role": "user", "content": "Hey! how's it going?"}] -response = completion(model="claude-instant-1", messages=messages, stream=True) -for chunk in response: - print(chunk["choices"][0]["delta"]["content"]) # same as openai format -``` - - -### Model Details - -| Model Name | Function Call | Required OS Variables | -|------------------|--------------------------------------------|--------------------------------------| -| claude-2.1 | `completion('claude-2.1', messages)` | `os.environ['ANTHROPIC_API_KEY']` | -| claude-2 | `completion('claude-2', messages)` | `os.environ['ANTHROPIC_API_KEY']` | -| claude-instant-1 | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']` | -| claude-instant-1.2 | `completion('claude-instant-1.2', messages)` | `os.environ['ANTHROPIC_API_KEY']` | From d179ae376e4256c2e9fdba4a84c054c3ee8ca971 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 4 Mar 2024 07:53:06 -0800 Subject: [PATCH 05/10] (feat) claude-3 test fixes --- docs/my-website/docs/providers/anthropic.md | 2 -- litellm/llms/anthropic.py | 20 +++++++++++++++++++- litellm/tests/test_completion.py | 4 ++-- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/docs/my-website/docs/providers/anthropic.md b/docs/my-website/docs/providers/anthropic.md index 617476b972..198a6a03dc 100644 --- a/docs/my-website/docs/providers/anthropic.md +++ b/docs/my-website/docs/providers/anthropic.md @@ -4,7 +4,6 @@ LiteLLM supports - `claude-3` (`claude-3-opus-20240229`, `claude-3-sonnet-20240229`) - `claude-2` - `claude-2.1` -- `claude-instant-1` - `claude-instant-1.2` ## API Keys @@ -56,7 +55,6 @@ for chunk in response: | claude-3-sonnet | `completion('claude-3-sonnet-20240229', messages)` | `os.environ['ANTHROPIC_API_KEY']` | | claude-2.1 | `completion('claude-2.1', messages)` | `os.environ['ANTHROPIC_API_KEY']` | | claude-2 | `completion('claude-2', messages)` | `os.environ['ANTHROPIC_API_KEY']` | -| claude-instant-1 | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']` | | claude-instant-1.2 | `completion('claude-instant-1.2', messages)` | `os.environ['ANTHROPIC_API_KEY']` | ## Advanced diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py index 829a8becd1..79b84c4d29 100644 --- a/litellm/llms/anthropic.py +++ b/litellm/llms/anthropic.py @@ -121,11 +121,29 @@ def completion( prompt = prompt_factory( model=model, messages=messages, custom_llm_provider="anthropic" ) - + """ + format messages for anthropic + 1. Anthropic supports roles like "user" and "assistant", (here litellm translates system-> assistant) + 2. The first message always needs to be of role "user" + """ + # 1. Anthropic only supports roles like "user" and "assistant" for message in messages: if message["role"] == "system": message["role"] = "assistant" + # 2. The first message always needs to be of role "user" + if len(messages) > 0: + if messages[0]["role"] != "user": + # find the index of the first user message + for i, message in enumerate(messages): + if message["role"] == "user": + break + + # remove the user message at existing position and add it to the front + messages.pop(i) + # move the first user message to the front + messages = [message] + messages + ## Load Config config = litellm.AnthropicConfig.get_config() for k, v in config.items(): diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 163a66af37..bbe1dae3a6 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -56,7 +56,7 @@ def test_completion_custom_provider_model_name(): def test_completion_claude(): litellm.set_verbose = True litellm.cache = None - litellm.AnthropicConfig(max_tokens_to_sample=200, metadata={"user_id": "1224"}) + litellm.AnthropicConfig(max_tokens=200, metadata={"user_id": "1224"}) messages = [ { "role": "system", @@ -67,7 +67,7 @@ def test_completion_claude(): try: # test without max tokens response = completion( - model="claude-instant-1", + model="claude-instant-1.2", messages=messages, request_timeout=10, ) From ba460e68384cb274e9f6767d43fb27a5a6b001fc Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 4 Mar 2024 08:20:26 -0800 Subject: [PATCH 06/10] (test) claude fixes --- litellm/llms/anthropic.py | 14 +++++++++++++- litellm/tests/test_add_function_to_prompt.py | 3 ++- litellm/tests/test_completion.py | 9 +++------ 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py index 79b84c4d29..44a1b128a9 100644 --- a/litellm/llms/anthropic.py +++ b/litellm/llms/anthropic.py @@ -125,12 +125,19 @@ def completion( format messages for anthropic 1. Anthropic supports roles like "user" and "assistant", (here litellm translates system-> assistant) 2. The first message always needs to be of role "user" + 3. Each message must alternate between "user" and "assistant" (this is not addressed as now by litellm) + 4. final assistant content cannot end with trailing whitespace (anthropic raises an error otherwise) """ # 1. Anthropic only supports roles like "user" and "assistant" - for message in messages: + for idx, message in enumerate(messages): if message["role"] == "system": message["role"] = "assistant" + # if this is the final assistant message, remove trailing whitespace + # TODO: only do this if it's the final assistant message + if message["role"] == "assistant": + message["content"] = message["content"].strip() + # 2. The first message always needs to be of role "user" if len(messages) > 0: if messages[0]["role"] != "user": @@ -211,6 +218,11 @@ def completion( message=str(completion_response["error"]), status_code=response.status_code, ) + elif len(completion_response["content"]) == 0: + raise AnthropicError( + message="No content in response", + status_code=response.status_code, + ) else: text_content = completion_response["content"][0].get("text", None) model_response.choices[0].message.content = text_content # type: ignore diff --git a/litellm/tests/test_add_function_to_prompt.py b/litellm/tests/test_add_function_to_prompt.py index 932e6edd14..93b09cd8c8 100644 --- a/litellm/tests/test_add_function_to_prompt.py +++ b/litellm/tests/test_add_function_to_prompt.py @@ -47,8 +47,9 @@ test_function_call_non_openai_model() ## case 2: add_function_to_prompt set def test_function_call_non_openai_model_litellm_mod_set(): litellm.add_function_to_prompt = True + litellm.set_verbose = True try: - model = "claude-instant-1" + model = "claude-instant-1.2" messages = [{"role": "user", "content": "what's the weather in sf?"}] functions = [ { diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index bbe1dae3a6..a9d41be8d1 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -92,7 +92,6 @@ def test_completion_claude_3(): response = completion( model="anthropic/claude-3-opus-20240229", messages=messages, - max_tokens=10, ) # Add any assertions, here to check response args print(response) @@ -198,19 +197,17 @@ def test_completion_mistral_api_modified_input(): def test_completion_claude2_1(): try: + litellm.set_verbose = True print("claude2.1 test request") messages = [ { "role": "system", - "content": "Your goal is generate a joke on the topic user gives", + "content": "Your goal is generate a joke on the topic user gives.", }, - {"role": "assistant", "content": "Hi, how can i assist you today?"}, {"role": "user", "content": "Generate a 3 liner joke for me"}, ] # test without max tokens - response = completion( - model="claude-2.1", messages=messages, request_timeout=10, max_tokens=10 - ) + response = completion(model="claude-2.1", messages=messages) # Add any assertions here to check the response print(response) print(response.usage) From 3e8cecc03b18d1612e0b5426d11db052e26f7f61 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 4 Mar 2024 08:29:16 -0800 Subject: [PATCH 07/10] (fix) test streaming --- litellm/tests/test_streaming.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index 083953f574..5767a944b2 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -348,7 +348,7 @@ def test_completion_claude_stream(): }, ] response = completion( - model="claude-instant-1", messages=messages, stream=True, max_tokens=50 + model="claude-instant-1.2", messages=messages, stream=True, max_tokens=50 ) complete_response = "" # Add any assertions here to check the response From 9bac163e4ebd3fc73a4af79490f570997171cd60 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 4 Mar 2024 08:32:13 -0800 Subject: [PATCH 08/10] (test) claude-instant-1 --- litellm/tests/test_provider_specific_config.py | 6 +++--- litellm/tests/test_router.py | 2 +- litellm/tests/test_router_timeout.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/litellm/tests/test_provider_specific_config.py b/litellm/tests/test_provider_specific_config.py index 5e5d19c786..dcb4dcb4c7 100644 --- a/litellm/tests/test_provider_specific_config.py +++ b/litellm/tests/test_provider_specific_config.py @@ -53,7 +53,7 @@ def claude_test_completion(): try: # OVERRIDE WITH DYNAMIC MAX TOKENS response_1 = litellm.completion( - model="claude-instant-1", + model="claude-instant-1.2", messages=[{"content": "Hello, how are you?", "role": "user"}], max_tokens=10, ) @@ -63,7 +63,7 @@ def claude_test_completion(): # USE CONFIG TOKENS response_2 = litellm.completion( - model="claude-instant-1", + model="claude-instant-1.2", messages=[{"content": "Hello, how are you?", "role": "user"}], ) # Add any assertions here to check the response @@ -74,7 +74,7 @@ def claude_test_completion(): try: response_3 = litellm.completion( - model="claude-instant-1", + model="claude-instant-1.2", messages=[{"content": "Hello, how are you?", "role": "user"}], n=2, ) diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py index 127caf223b..7c182ee686 100644 --- a/litellm/tests/test_router.py +++ b/litellm/tests/test_router.py @@ -933,7 +933,7 @@ def test_router_anthropic_key_dynamic(): { "model_name": "anthropic-claude", "litellm_params": { - "model": "claude-instant-1", + "model": "claude-instant-1.2", "api_key": anthropic_api_key, }, } diff --git a/litellm/tests/test_router_timeout.py b/litellm/tests/test_router_timeout.py index 139914f6df..3816c649e9 100644 --- a/litellm/tests/test_router_timeout.py +++ b/litellm/tests/test_router_timeout.py @@ -35,7 +35,7 @@ def test_router_timeouts(): { "model_name": "anthropic-claude-instant-1.2", "litellm_params": { - "model": "claude-instant-1", + "model": "claude-instant-1.2", "api_key": "os.environ/ANTHROPIC_API_KEY", }, "tpm": 20000, From 1bd92b20dd49370b412da25f9a4549f5f265ce5d Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 4 Mar 2024 08:44:39 -0800 Subject: [PATCH 09/10] (test-fix) hf is unstable --- litellm/tests/test_text_completion.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/litellm/tests/test_text_completion.py b/litellm/tests/test_text_completion.py index 0b98111839..a16b864494 100644 --- a/litellm/tests/test_text_completion.py +++ b/litellm/tests/test_text_completion.py @@ -2836,6 +2836,8 @@ def test_completion_hf_prompt_array(): print(str(e)) if "is currently loading" in str(e): return + if "Service Unavailable" in str(e): + return pytest.fail(f"Error occurred: {e}") From 2d051addf1d3d4e92240e208849dda7a0df65d61 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 4 Mar 2024 09:06:42 -0800 Subject: [PATCH 10/10] (test) test_exceptions.py --- litellm/tests/test_exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py index 9c90014c0d..311bbfa571 100644 --- a/litellm/tests/test_exceptions.py +++ b/litellm/tests/test_exceptions.py @@ -70,7 +70,7 @@ models = ["command-nightly"] @pytest.mark.parametrize("model", models) def test_context_window_with_fallbacks(model): ctx_window_fallback_dict = { - "command-nightly": "claude-2", + "command-nightly": "claude-2.1", "gpt-3.5-turbo-instruct": "gpt-3.5-turbo-16k", "azure/chatgpt-v-2": "gpt-3.5-turbo-16k", }