mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
Litellm dev 02 13 2025 p2 (#8525)
* fix(azure/chat/gpt_transformation.py): add 'prediction' as a support azure param Closes https://github.com/BerriAI/litellm/issues/8500 * build(model_prices_and_context_window.json): add new 'gemini-2.0-pro-exp-02-05' model * style: cleanup invalid json trailing commma * feat(utils.py): support passing 'tokenizer_config' to register_prompt_template enables passing complete tokenizer config of model to litellm Allows calling deepseek on bedrock with the correct prompt template * fix(utils.py): fix register_prompt_template for custom model names * test(test_prompt_factory.py): fix test * test(test_completion.py): add e2e test for bedrock invoke deepseek ft model * feat(base_invoke_transformation.py): support hf_model_name param for bedrock invoke calls enables proxy admin to set base model for ft bedrock deepseek model * feat(bedrock/invoke): support deepseek_r1 route for bedrock makes it easy to apply the right chat template to that call * feat(constants.py): store deepseek r1 chat template - allow user to get correct response from deepseek r1 without extra work * test(test_completion.py): add e2e mock test for bedrock deepseek * docs(bedrock.md): document new deepseek_r1 route for bedrock allows us to use the right config * fix(exception_mapping_utils.py): catch read operation timeout
This commit is contained in:
parent
be31a5f7b4
commit
e33543ae4f
15 changed files with 444 additions and 39 deletions
|
@ -7,7 +7,7 @@ ALL Bedrock models (Anthropic, Meta, Deepseek, Mistral, Amazon, etc.) are Suppor
|
||||||
| Property | Details |
|
| Property | Details |
|
||||||
|-------|-------|
|
|-------|-------|
|
||||||
| Description | Amazon Bedrock is a fully managed service that offers a choice of high-performing foundation models (FMs). |
|
| Description | Amazon Bedrock is a fully managed service that offers a choice of high-performing foundation models (FMs). |
|
||||||
| Provider Route on LiteLLM | `bedrock/`, [`bedrock/converse/`](#set-converse--invoke-route), [`bedrock/invoke/`](#set-invoke-route), [`bedrock/converse_like/`](#calling-via-internal-proxy), [`bedrock/llama/`](#bedrock-imported-models-deepseek) |
|
| Provider Route on LiteLLM | `bedrock/`, [`bedrock/converse/`](#set-converse--invoke-route), [`bedrock/invoke/`](#set-invoke-route), [`bedrock/converse_like/`](#calling-via-internal-proxy), [`bedrock/llama/`](#deepseek-not-r1), [`bedrock/deepseek_r1/`](#deepseek-r1) |
|
||||||
| Provider Doc | [Amazon Bedrock ↗](https://docs.aws.amazon.com/bedrock/latest/userguide/what-is-bedrock.html) |
|
| Provider Doc | [Amazon Bedrock ↗](https://docs.aws.amazon.com/bedrock/latest/userguide/what-is-bedrock.html) |
|
||||||
| Supported OpenAI Endpoints | `/chat/completions`, `/completions`, `/embeddings`, `/images/generations` |
|
| Supported OpenAI Endpoints | `/chat/completions`, `/completions`, `/embeddings`, `/images/generations` |
|
||||||
| Pass-through Endpoint | [Supported](../pass_through/bedrock.md) |
|
| Pass-through Endpoint | [Supported](../pass_through/bedrock.md) |
|
||||||
|
@ -1277,13 +1277,83 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
https://some-api-url/models
|
https://some-api-url/models
|
||||||
```
|
```
|
||||||
|
|
||||||
## Bedrock Imported Models (Deepseek)
|
## Bedrock Imported Models (Deepseek, Deepseek R1)
|
||||||
|
|
||||||
|
### Deepseek R1
|
||||||
|
|
||||||
|
This is a separate route, as the chat template is different.
|
||||||
|
|
||||||
|
| Property | Details |
|
||||||
|
|----------|---------|
|
||||||
|
| Provider Route | `bedrock/deepseek_r1/{model_arn}` |
|
||||||
|
| Provider Documentation | [Bedrock Imported Models](https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-import-model.html), [Deepseek Bedrock Imported Model](https://aws.amazon.com/blogs/machine-learning/deploy-deepseek-r1-distilled-llama-models-with-amazon-bedrock-custom-model-import/) |
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
import os
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="bedrock/deepseek_r1/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n", # bedrock/deepseek_r1/{your-model-arn}
|
||||||
|
messages=[{"role": "user", "content": "Tell me a joke"}],
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="proxy" label="Proxy">
|
||||||
|
|
||||||
|
|
||||||
|
**1. Add to config**
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: DeepSeek-R1-Distill-Llama-70B
|
||||||
|
litellm_params:
|
||||||
|
model: bedrock/deepseek_r1/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
**2. Start proxy**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
|
||||||
|
# RUNNING at http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
**3. Test it!**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data '{
|
||||||
|
"model": "DeepSeek-R1-Distill-Llama-70B", # 👈 the 'model_name' in config
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "what llm are you"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
### Deepseek (not R1)
|
||||||
|
|
||||||
| Property | Details |
|
| Property | Details |
|
||||||
|----------|---------|
|
|----------|---------|
|
||||||
| Provider Route | `bedrock/llama/{model_arn}` |
|
| Provider Route | `bedrock/llama/{model_arn}` |
|
||||||
| Provider Documentation | [Bedrock Imported Models](https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-import-model.html), [Deepseek Bedrock Imported Model](https://aws.amazon.com/blogs/machine-learning/deploy-deepseek-r1-distilled-llama-models-with-amazon-bedrock-custom-model-import/) |
|
| Provider Documentation | [Bedrock Imported Models](https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-import-model.html), [Deepseek Bedrock Imported Model](https://aws.amazon.com/blogs/machine-learning/deploy-deepseek-r1-distilled-llama-models-with-amazon-bedrock-custom-model-import/) |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Use this route to call Bedrock Imported Models that follow the `llama` Invoke Request / Response spec
|
Use this route to call Bedrock Imported Models that follow the `llama` Invoke Request / Response spec
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -52,6 +52,7 @@ from litellm.constants import (
|
||||||
open_ai_embedding_models,
|
open_ai_embedding_models,
|
||||||
cohere_embedding_models,
|
cohere_embedding_models,
|
||||||
bedrock_embedding_models,
|
bedrock_embedding_models,
|
||||||
|
known_tokenizer_config,
|
||||||
)
|
)
|
||||||
from litellm.types.guardrails import GuardrailItem
|
from litellm.types.guardrails import GuardrailItem
|
||||||
from litellm.proxy._types import (
|
from litellm.proxy._types import (
|
||||||
|
@ -360,7 +361,15 @@ BEDROCK_CONVERSE_MODELS = [
|
||||||
"meta.llama3-2-90b-instruct-v1:0",
|
"meta.llama3-2-90b-instruct-v1:0",
|
||||||
]
|
]
|
||||||
BEDROCK_INVOKE_PROVIDERS_LITERAL = Literal[
|
BEDROCK_INVOKE_PROVIDERS_LITERAL = Literal[
|
||||||
"cohere", "anthropic", "mistral", "amazon", "meta", "llama", "ai21", "nova"
|
"cohere",
|
||||||
|
"anthropic",
|
||||||
|
"mistral",
|
||||||
|
"amazon",
|
||||||
|
"meta",
|
||||||
|
"llama",
|
||||||
|
"ai21",
|
||||||
|
"nova",
|
||||||
|
"deepseek_r1",
|
||||||
]
|
]
|
||||||
####### COMPLETION MODELS ###################
|
####### COMPLETION MODELS ###################
|
||||||
open_ai_chat_completion_models: List = []
|
open_ai_chat_completion_models: List = []
|
||||||
|
|
|
@ -335,6 +335,63 @@ bedrock_embedding_models: List = [
|
||||||
"cohere.embed-multilingual-v3",
|
"cohere.embed-multilingual-v3",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
known_tokenizer_config = {
|
||||||
|
"mistralai/Mistral-7B-Instruct-v0.1": {
|
||||||
|
"tokenizer": {
|
||||||
|
"chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
|
||||||
|
"bos_token": "<s>",
|
||||||
|
"eos_token": "</s>",
|
||||||
|
},
|
||||||
|
"status": "success",
|
||||||
|
},
|
||||||
|
"meta-llama/Meta-Llama-3-8B-Instruct": {
|
||||||
|
"tokenizer": {
|
||||||
|
"chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
|
||||||
|
"bos_token": "<|begin_of_text|>",
|
||||||
|
"eos_token": "",
|
||||||
|
},
|
||||||
|
"status": "success",
|
||||||
|
},
|
||||||
|
"deepseek-r1/deepseek-r1-7b-instruct": {
|
||||||
|
"tokenizer": {
|
||||||
|
"add_bos_token": True,
|
||||||
|
"add_eos_token": False,
|
||||||
|
"bos_token": {
|
||||||
|
"__type": "AddedToken",
|
||||||
|
"content": "<|begin▁of▁sentence|>",
|
||||||
|
"lstrip": False,
|
||||||
|
"normalized": True,
|
||||||
|
"rstrip": False,
|
||||||
|
"single_word": False,
|
||||||
|
},
|
||||||
|
"clean_up_tokenization_spaces": False,
|
||||||
|
"eos_token": {
|
||||||
|
"__type": "AddedToken",
|
||||||
|
"content": "<|end▁of▁sentence|>",
|
||||||
|
"lstrip": False,
|
||||||
|
"normalized": True,
|
||||||
|
"rstrip": False,
|
||||||
|
"single_word": False,
|
||||||
|
},
|
||||||
|
"legacy": True,
|
||||||
|
"model_max_length": 16384,
|
||||||
|
"pad_token": {
|
||||||
|
"__type": "AddedToken",
|
||||||
|
"content": "<|end▁of▁sentence|>",
|
||||||
|
"lstrip": False,
|
||||||
|
"normalized": True,
|
||||||
|
"rstrip": False,
|
||||||
|
"single_word": False,
|
||||||
|
},
|
||||||
|
"sp_model_kwargs": {},
|
||||||
|
"unk_token": None,
|
||||||
|
"tokenizer_class": "LlamaTokenizerFast",
|
||||||
|
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
|
||||||
|
},
|
||||||
|
"status": "success",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
OPENAI_FINISH_REASONS = ["stop", "length", "function_call", "content_filter", "null"]
|
OPENAI_FINISH_REASONS = ["stop", "length", "function_call", "content_filter", "null"]
|
||||||
HUMANLOOP_PROMPT_CACHE_TTL_SECONDS = 60 # 1 minute
|
HUMANLOOP_PROMPT_CACHE_TTL_SECONDS = 60 # 1 minute
|
||||||
|
|
|
@ -223,6 +223,7 @@ def exception_type( # type: ignore # noqa: PLR0915
|
||||||
"Request Timeout Error" in error_str
|
"Request Timeout Error" in error_str
|
||||||
or "Request timed out" in error_str
|
or "Request timed out" in error_str
|
||||||
or "Timed out generating response" in error_str
|
or "Timed out generating response" in error_str
|
||||||
|
or "The read operation timed out" in error_str
|
||||||
):
|
):
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
|
||||||
|
|
|
@ -325,26 +325,6 @@ def phind_codellama_pt(messages):
|
||||||
return prompt
|
return prompt
|
||||||
|
|
||||||
|
|
||||||
known_tokenizer_config = {
|
|
||||||
"mistralai/Mistral-7B-Instruct-v0.1": {
|
|
||||||
"tokenizer": {
|
|
||||||
"chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
|
|
||||||
"bos_token": "<s>",
|
|
||||||
"eos_token": "</s>",
|
|
||||||
},
|
|
||||||
"status": "success",
|
|
||||||
},
|
|
||||||
"meta-llama/Meta-Llama-3-8B-Instruct": {
|
|
||||||
"tokenizer": {
|
|
||||||
"chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
|
|
||||||
"bos_token": "<|begin_of_text|>",
|
|
||||||
"eos_token": "",
|
|
||||||
},
|
|
||||||
"status": "success",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def hf_chat_template( # noqa: PLR0915
|
def hf_chat_template( # noqa: PLR0915
|
||||||
model: str, messages: list, chat_template: Optional[Any] = None
|
model: str, messages: list, chat_template: Optional[Any] = None
|
||||||
):
|
):
|
||||||
|
@ -378,11 +358,11 @@ def hf_chat_template( # noqa: PLR0915
|
||||||
else:
|
else:
|
||||||
return {"status": "failure"}
|
return {"status": "failure"}
|
||||||
|
|
||||||
if model in known_tokenizer_config:
|
if model in litellm.known_tokenizer_config:
|
||||||
tokenizer_config = known_tokenizer_config[model]
|
tokenizer_config = litellm.known_tokenizer_config[model]
|
||||||
else:
|
else:
|
||||||
tokenizer_config = _get_tokenizer_config(model)
|
tokenizer_config = _get_tokenizer_config(model)
|
||||||
known_tokenizer_config.update({model: tokenizer_config})
|
litellm.known_tokenizer_config.update({model: tokenizer_config})
|
||||||
|
|
||||||
if (
|
if (
|
||||||
tokenizer_config["status"] == "failure"
|
tokenizer_config["status"] == "failure"
|
||||||
|
@ -475,6 +455,12 @@ def hf_chat_template( # noqa: PLR0915
|
||||||
) # don't use verbose_logger.exception, if exception is raised
|
) # don't use verbose_logger.exception, if exception is raised
|
||||||
|
|
||||||
|
|
||||||
|
def deepseek_r1_pt(messages):
|
||||||
|
return hf_chat_template(
|
||||||
|
model="deepseek-r1/deepseek-r1-7b-instruct", messages=messages
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# Anthropic template
|
# Anthropic template
|
||||||
def claude_2_1_pt(
|
def claude_2_1_pt(
|
||||||
messages: list,
|
messages: list,
|
||||||
|
|
|
@ -98,6 +98,7 @@ class AzureOpenAIConfig(BaseConfig):
|
||||||
"seed",
|
"seed",
|
||||||
"extra_headers",
|
"extra_headers",
|
||||||
"parallel_tool_calls",
|
"parallel_tool_calls",
|
||||||
|
"prediction",
|
||||||
]
|
]
|
||||||
|
|
||||||
def _is_response_format_supported_model(self, model: str) -> bool:
|
def _is_response_format_supported_model(self, model: str) -> bool:
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
"""
|
"""
|
||||||
Manages calling Bedrock's `/converse` API + `/invoke` API
|
TODO: DELETE FILE. Bedrock LLM is no longer used. Goto `litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py`
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import copy
|
import copy
|
||||||
|
|
|
@ -14,6 +14,7 @@ from litellm.litellm_core_utils.logging_utils import track_llm_api_timing
|
||||||
from litellm.litellm_core_utils.prompt_templates.factory import (
|
from litellm.litellm_core_utils.prompt_templates.factory import (
|
||||||
cohere_message_pt,
|
cohere_message_pt,
|
||||||
custom_prompt,
|
custom_prompt,
|
||||||
|
deepseek_r1_pt,
|
||||||
prompt_factory,
|
prompt_factory,
|
||||||
)
|
)
|
||||||
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
|
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
|
||||||
|
@ -178,11 +179,15 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
|
||||||
## SETUP ##
|
## SETUP ##
|
||||||
stream = optional_params.pop("stream", None)
|
stream = optional_params.pop("stream", None)
|
||||||
custom_prompt_dict: dict = litellm_params.pop("custom_prompt_dict", None) or {}
|
custom_prompt_dict: dict = litellm_params.pop("custom_prompt_dict", None) or {}
|
||||||
|
hf_model_name = litellm_params.get("hf_model_name", None)
|
||||||
|
|
||||||
provider = self.get_bedrock_invoke_provider(model)
|
provider = self.get_bedrock_invoke_provider(model)
|
||||||
|
|
||||||
prompt, chat_history = self.convert_messages_to_prompt(
|
prompt, chat_history = self.convert_messages_to_prompt(
|
||||||
model, messages, provider, custom_prompt_dict
|
model=hf_model_name or model,
|
||||||
|
messages=messages,
|
||||||
|
provider=provider,
|
||||||
|
custom_prompt_dict=custom_prompt_dict,
|
||||||
)
|
)
|
||||||
inference_params = copy.deepcopy(optional_params)
|
inference_params = copy.deepcopy(optional_params)
|
||||||
inference_params = {
|
inference_params = {
|
||||||
|
@ -266,7 +271,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
|
||||||
"inputText": prompt,
|
"inputText": prompt,
|
||||||
"textGenerationConfig": inference_params,
|
"textGenerationConfig": inference_params,
|
||||||
}
|
}
|
||||||
elif provider == "meta" or provider == "llama":
|
elif provider == "meta" or provider == "llama" or provider == "deepseek_r1":
|
||||||
## LOAD CONFIG
|
## LOAD CONFIG
|
||||||
config = litellm.AmazonLlamaConfig.get_config()
|
config = litellm.AmazonLlamaConfig.get_config()
|
||||||
for k, v in config.items():
|
for k, v in config.items():
|
||||||
|
@ -351,7 +356,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
|
||||||
outputText = (
|
outputText = (
|
||||||
completion_response.get("completions")[0].get("data").get("text")
|
completion_response.get("completions")[0].get("data").get("text")
|
||||||
)
|
)
|
||||||
elif provider == "meta" or provider == "llama":
|
elif provider == "meta" or provider == "llama" or provider == "deepseek_r1":
|
||||||
outputText = completion_response["generation"]
|
outputText = completion_response["generation"]
|
||||||
elif provider == "mistral":
|
elif provider == "mistral":
|
||||||
outputText = completion_response["outputs"][0]["text"]
|
outputText = completion_response["outputs"][0]["text"]
|
||||||
|
@ -664,6 +669,8 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
|
||||||
)
|
)
|
||||||
elif provider == "cohere":
|
elif provider == "cohere":
|
||||||
prompt, chat_history = cohere_message_pt(messages=messages)
|
prompt, chat_history = cohere_message_pt(messages=messages)
|
||||||
|
elif provider == "deepseek_r1":
|
||||||
|
prompt = deepseek_r1_pt(messages=messages)
|
||||||
else:
|
else:
|
||||||
prompt = ""
|
prompt = ""
|
||||||
for message in messages:
|
for message in messages:
|
||||||
|
|
|
@ -215,7 +215,6 @@ azure_audio_transcriptions = AzureAudioTranscription()
|
||||||
huggingface = Huggingface()
|
huggingface = Huggingface()
|
||||||
predibase_chat_completions = PredibaseChatCompletion()
|
predibase_chat_completions = PredibaseChatCompletion()
|
||||||
codestral_text_completions = CodestralTextCompletion()
|
codestral_text_completions = CodestralTextCompletion()
|
||||||
bedrock_chat_completion = BedrockLLM()
|
|
||||||
bedrock_converse_chat_completion = BedrockConverseLLM()
|
bedrock_converse_chat_completion = BedrockConverseLLM()
|
||||||
bedrock_embedding = BedrockEmbedding()
|
bedrock_embedding = BedrockEmbedding()
|
||||||
bedrock_image_generation = BedrockImageGeneration()
|
bedrock_image_generation = BedrockImageGeneration()
|
||||||
|
@ -3947,7 +3946,7 @@ async def atext_completion(
|
||||||
),
|
),
|
||||||
model=model,
|
model=model,
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
stream_options=kwargs.get('stream_options'),
|
stream_options=kwargs.get("stream_options"),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
## OpenAI / Azure Text Completion Returns here
|
## OpenAI / Azure Text Completion Returns here
|
||||||
|
|
|
@ -3658,6 +3658,42 @@
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models",
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models",
|
||||||
"supports_tool_choice": true
|
"supports_tool_choice": true
|
||||||
},
|
},
|
||||||
|
"gemini-2.0-pro-exp-02-05": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 2097152,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"max_images_per_prompt": 3000,
|
||||||
|
"max_videos_per_prompt": 10,
|
||||||
|
"max_video_length": 1,
|
||||||
|
"max_audio_length_hours": 8.4,
|
||||||
|
"max_audio_per_prompt": 1,
|
||||||
|
"max_pdf_size_mb": 30,
|
||||||
|
"input_cost_per_image": 0,
|
||||||
|
"input_cost_per_video_per_second": 0,
|
||||||
|
"input_cost_per_audio_per_second": 0,
|
||||||
|
"input_cost_per_token": 0,
|
||||||
|
"input_cost_per_character": 0,
|
||||||
|
"input_cost_per_token_above_128k_tokens": 0,
|
||||||
|
"input_cost_per_character_above_128k_tokens": 0,
|
||||||
|
"input_cost_per_image_above_128k_tokens": 0,
|
||||||
|
"input_cost_per_video_per_second_above_128k_tokens": 0,
|
||||||
|
"input_cost_per_audio_per_second_above_128k_tokens": 0,
|
||||||
|
"output_cost_per_token": 0,
|
||||||
|
"output_cost_per_character": 0,
|
||||||
|
"output_cost_per_token_above_128k_tokens": 0,
|
||||||
|
"output_cost_per_character_above_128k_tokens": 0,
|
||||||
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_audio_input": true,
|
||||||
|
"supports_video_input": true,
|
||||||
|
"supports_pdf_input": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"supports_tool_choice": true,
|
||||||
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
||||||
|
},
|
||||||
"gemini-2.0-flash-exp": {
|
"gemini-2.0-flash-exp": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 1048576,
|
"max_input_tokens": 1048576,
|
||||||
|
|
|
@ -5194,9 +5194,10 @@ def _calculate_retry_after(
|
||||||
# custom prompt helper function
|
# custom prompt helper function
|
||||||
def register_prompt_template(
|
def register_prompt_template(
|
||||||
model: str,
|
model: str,
|
||||||
roles: dict,
|
roles: dict = {},
|
||||||
initial_prompt_value: str = "",
|
initial_prompt_value: str = "",
|
||||||
final_prompt_value: str = "",
|
final_prompt_value: str = "",
|
||||||
|
tokenizer_config: dict = {},
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Register a prompt template to follow your custom format for a given model
|
Register a prompt template to follow your custom format for a given model
|
||||||
|
@ -5233,12 +5234,27 @@ def register_prompt_template(
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
|
complete_model = model
|
||||||
|
potential_models = [complete_model]
|
||||||
|
try:
|
||||||
model = get_llm_provider(model=model)[0]
|
model = get_llm_provider(model=model)[0]
|
||||||
litellm.custom_prompt_dict[model] = {
|
potential_models.append(model)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if tokenizer_config:
|
||||||
|
for m in potential_models:
|
||||||
|
litellm.known_tokenizer_config[m] = {
|
||||||
|
"tokenizer": tokenizer_config,
|
||||||
|
"status": "success",
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
for m in potential_models:
|
||||||
|
litellm.custom_prompt_dict[m] = {
|
||||||
"roles": roles,
|
"roles": roles,
|
||||||
"initial_prompt_value": initial_prompt_value,
|
"initial_prompt_value": initial_prompt_value,
|
||||||
"final_prompt_value": final_prompt_value,
|
"final_prompt_value": final_prompt_value,
|
||||||
}
|
}
|
||||||
|
|
||||||
return litellm.custom_prompt_dict
|
return litellm.custom_prompt_dict
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -3658,6 +3658,42 @@
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models",
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models",
|
||||||
"supports_tool_choice": true
|
"supports_tool_choice": true
|
||||||
},
|
},
|
||||||
|
"gemini-2.0-pro-exp-02-05": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 2097152,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"max_images_per_prompt": 3000,
|
||||||
|
"max_videos_per_prompt": 10,
|
||||||
|
"max_video_length": 1,
|
||||||
|
"max_audio_length_hours": 8.4,
|
||||||
|
"max_audio_per_prompt": 1,
|
||||||
|
"max_pdf_size_mb": 30,
|
||||||
|
"input_cost_per_image": 0,
|
||||||
|
"input_cost_per_video_per_second": 0,
|
||||||
|
"input_cost_per_audio_per_second": 0,
|
||||||
|
"input_cost_per_token": 0,
|
||||||
|
"input_cost_per_character": 0,
|
||||||
|
"input_cost_per_token_above_128k_tokens": 0,
|
||||||
|
"input_cost_per_character_above_128k_tokens": 0,
|
||||||
|
"input_cost_per_image_above_128k_tokens": 0,
|
||||||
|
"input_cost_per_video_per_second_above_128k_tokens": 0,
|
||||||
|
"input_cost_per_audio_per_second_above_128k_tokens": 0,
|
||||||
|
"output_cost_per_token": 0,
|
||||||
|
"output_cost_per_character": 0,
|
||||||
|
"output_cost_per_token_above_128k_tokens": 0,
|
||||||
|
"output_cost_per_character_above_128k_tokens": 0,
|
||||||
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_audio_input": true,
|
||||||
|
"supports_video_input": true,
|
||||||
|
"supports_pdf_input": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"supports_tool_choice": true,
|
||||||
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
||||||
|
},
|
||||||
"gemini-2.0-flash-exp": {
|
"gemini-2.0-flash-exp": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 1048576,
|
"max_input_tokens": 1048576,
|
||||||
|
|
|
@ -1069,6 +1069,21 @@ def test_gemini_frequency_penalty():
|
||||||
assert optional_params["frequency_penalty"] == 0.5
|
assert optional_params["frequency_penalty"] == 0.5
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def test_azure_prediction_param():
|
||||||
|
optional_params = get_optional_params(
|
||||||
|
model="chatgpt-v2",
|
||||||
|
custom_llm_provider="azure",
|
||||||
|
prediction={
|
||||||
|
"type": "content",
|
||||||
|
"content": "LiteLLM is a very useful way to connect to a variety of LLMs.",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert optional_params["prediction"] == {
|
||||||
|
"type": "content",
|
||||||
|
"content": "LiteLLM is a very useful way to connect to a variety of LLMs.",
|
||||||
|
}
|
||||||
|
|
||||||
def test_vertex_ai_ft_llama():
|
def test_vertex_ai_ft_llama():
|
||||||
optional_params = get_optional_params(
|
optional_params = get_optional_params(
|
||||||
model="1984786713414729728",
|
model="1984786713414729728",
|
||||||
|
|
|
@ -708,3 +708,60 @@ def test_convert_generic_image_chunk_to_openai_image_obj():
|
||||||
url_str = convert_generic_image_chunk_to_openai_image_obj(image_obj)
|
url_str = convert_generic_image_chunk_to_openai_image_obj(image_obj)
|
||||||
image_obj = convert_to_anthropic_image_obj(url_str)
|
image_obj = convert_to_anthropic_image_obj(url_str)
|
||||||
print(image_obj)
|
print(image_obj)
|
||||||
|
|
||||||
|
|
||||||
|
def test_hf_chat_template():
|
||||||
|
from litellm.litellm_core_utils.prompt_templates.factory import (
|
||||||
|
hf_chat_template,
|
||||||
|
)
|
||||||
|
|
||||||
|
model = "llama/arn:aws:bedrock:us-east-1:1234:imported-model/45d34re"
|
||||||
|
litellm.register_prompt_template(
|
||||||
|
model=model,
|
||||||
|
tokenizer_config={
|
||||||
|
"add_bos_token": True,
|
||||||
|
"add_eos_token": False,
|
||||||
|
"bos_token": {
|
||||||
|
"__type": "AddedToken",
|
||||||
|
"content": "<|begin▁of▁sentence|>",
|
||||||
|
"lstrip": False,
|
||||||
|
"normalized": True,
|
||||||
|
"rstrip": False,
|
||||||
|
"single_word": False,
|
||||||
|
},
|
||||||
|
"clean_up_tokenization_spaces": False,
|
||||||
|
"eos_token": {
|
||||||
|
"__type": "AddedToken",
|
||||||
|
"content": "<|end▁of▁sentence|>",
|
||||||
|
"lstrip": False,
|
||||||
|
"normalized": True,
|
||||||
|
"rstrip": False,
|
||||||
|
"single_word": False,
|
||||||
|
},
|
||||||
|
"legacy": True,
|
||||||
|
"model_max_length": 16384,
|
||||||
|
"pad_token": {
|
||||||
|
"__type": "AddedToken",
|
||||||
|
"content": "<|end▁of▁sentence|>",
|
||||||
|
"lstrip": False,
|
||||||
|
"normalized": True,
|
||||||
|
"rstrip": False,
|
||||||
|
"single_word": False,
|
||||||
|
},
|
||||||
|
"sp_model_kwargs": {},
|
||||||
|
"unk_token": None,
|
||||||
|
"tokenizer_class": "LlamaTokenizerFast",
|
||||||
|
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
{"role": "user", "content": "What is the weather in Copenhagen?"},
|
||||||
|
]
|
||||||
|
chat_template = hf_chat_template(model=model, messages=messages)
|
||||||
|
print(chat_template)
|
||||||
|
assert (
|
||||||
|
chat_template.rstrip()
|
||||||
|
== """<|begin▁of▁sentence|>You are a helpful assistant.<|User|>What is the weather in Copenhagen?<|Assistant|><think>"""
|
||||||
|
)
|
||||||
|
|
|
@ -3242,6 +3242,121 @@ def test_replicate_custom_prompt_dict():
|
||||||
litellm.custom_prompt_dict = {} # reset
|
litellm.custom_prompt_dict = {} # reset
|
||||||
|
|
||||||
|
|
||||||
|
def test_bedrock_deepseek_custom_prompt_dict():
|
||||||
|
model = "llama/arn:aws:bedrock:us-east-1:1234:imported-model/45d34re"
|
||||||
|
litellm.register_prompt_template(
|
||||||
|
model=model,
|
||||||
|
tokenizer_config={
|
||||||
|
"add_bos_token": True,
|
||||||
|
"add_eos_token": False,
|
||||||
|
"bos_token": {
|
||||||
|
"__type": "AddedToken",
|
||||||
|
"content": "<|begin▁of▁sentence|>",
|
||||||
|
"lstrip": False,
|
||||||
|
"normalized": True,
|
||||||
|
"rstrip": False,
|
||||||
|
"single_word": False,
|
||||||
|
},
|
||||||
|
"clean_up_tokenization_spaces": False,
|
||||||
|
"eos_token": {
|
||||||
|
"__type": "AddedToken",
|
||||||
|
"content": "<|end▁of▁sentence|>",
|
||||||
|
"lstrip": False,
|
||||||
|
"normalized": True,
|
||||||
|
"rstrip": False,
|
||||||
|
"single_word": False,
|
||||||
|
},
|
||||||
|
"legacy": True,
|
||||||
|
"model_max_length": 16384,
|
||||||
|
"pad_token": {
|
||||||
|
"__type": "AddedToken",
|
||||||
|
"content": "<|end▁of▁sentence|>",
|
||||||
|
"lstrip": False,
|
||||||
|
"normalized": True,
|
||||||
|
"rstrip": False,
|
||||||
|
"single_word": False,
|
||||||
|
},
|
||||||
|
"sp_model_kwargs": {},
|
||||||
|
"unk_token": None,
|
||||||
|
"tokenizer_class": "LlamaTokenizerFast",
|
||||||
|
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert model in litellm.known_tokenizer_config
|
||||||
|
from litellm.llms.custom_httpx.http_handler import HTTPHandler
|
||||||
|
|
||||||
|
client = HTTPHandler()
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "You are a good assistant"},
|
||||||
|
{"role": "user", "content": "What is the weather in Copenhagen?"},
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch.object(client, "post") as mock_post:
|
||||||
|
try:
|
||||||
|
completion(
|
||||||
|
model="bedrock/" + model,
|
||||||
|
messages=messages,
|
||||||
|
client=client,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
|
||||||
|
mock_post.assert_called_once()
|
||||||
|
print(mock_post.call_args.kwargs)
|
||||||
|
json_data = json.loads(mock_post.call_args.kwargs["data"])
|
||||||
|
assert (
|
||||||
|
json_data["prompt"].rstrip()
|
||||||
|
== """<|begin▁of▁sentence|>You are a good assistant<|User|>What is the weather in Copenhagen?<|Assistant|><think>"""
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_bedrock_deepseek_known_tokenizer_config():
|
||||||
|
model = "deepseek_r1/arn:aws:bedrock:us-east-1:1234:imported-model/45d34re"
|
||||||
|
from litellm.llms.custom_httpx.http_handler import HTTPHandler
|
||||||
|
from unittest.mock import Mock
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
mock_response = Mock(spec=httpx.Response)
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.headers = {
|
||||||
|
"x-amzn-bedrock-input-token-count": "20",
|
||||||
|
"x-amzn-bedrock-output-token-count": "30",
|
||||||
|
}
|
||||||
|
|
||||||
|
# The response format for deepseek_r1
|
||||||
|
response_data = {
|
||||||
|
"generation": "The weather in Copenhagen is currently sunny with a temperature of 20°C (68°F). The forecast shows clear skies throughout the day with a gentle breeze from the northwest.",
|
||||||
|
"stop_reason": "stop",
|
||||||
|
"stop_sequence": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
mock_response.json.return_value = response_data
|
||||||
|
mock_response.text = json.dumps(response_data)
|
||||||
|
|
||||||
|
client = HTTPHandler()
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "You are a good assistant"},
|
||||||
|
{"role": "user", "content": "What is the weather in Copenhagen?"},
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch.object(client, "post", return_value=mock_response) as mock_post:
|
||||||
|
completion(
|
||||||
|
model="bedrock/" + model,
|
||||||
|
messages=messages,
|
||||||
|
client=client,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_post.assert_called_once()
|
||||||
|
print(mock_post.call_args.kwargs)
|
||||||
|
json_data = json.loads(mock_post.call_args.kwargs["data"])
|
||||||
|
assert (
|
||||||
|
json_data["prompt"].rstrip()
|
||||||
|
== """<|begin▁of▁sentence|>You are a good assistant<|User|>What is the weather in Copenhagen?<|Assistant|><think>"""
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# test_replicate_custom_prompt_dict()
|
# test_replicate_custom_prompt_dict()
|
||||||
|
|
||||||
# commenthing this out since we won't be always testing a custom, replicate deployment
|
# commenthing this out since we won't be always testing a custom, replicate deployment
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue