forked from phoenix/litellm-mirror
docs(input.md): add hf_model_name to docs
This commit is contained in:
parent
88845dddb1
commit
ff949490de
4 changed files with 39 additions and 8 deletions
|
@ -40,7 +40,7 @@ This list is constantly being updated.
|
||||||
|AI21| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | |
|
|AI21| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | |
|
||||||
|VertexAI| ✅ | ✅ | | ✅ | | | | | | |
|
|VertexAI| ✅ | ✅ | | ✅ | | | | | | |
|
||||||
|Bedrock| ✅ | ✅ | ✅ | ✅ | ✅ | | | | | |
|
|Bedrock| ✅ | ✅ | ✅ | ✅ | ✅ | | | | | |
|
||||||
|Sagemaker| ✅ | ✅ (only `jumpstart llama2`) | | ✅ | | | | | | |
|
|Sagemaker| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | |
|
||||||
|TogetherAI| ✅ | ✅ | ✅ | ✅ | ✅ | | | | | |
|
|TogetherAI| ✅ | ✅ | ✅ | ✅ | ✅ | | | | | |
|
||||||
|AlephAlpha| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | |
|
|AlephAlpha| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | |
|
||||||
|Palm| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | |
|
|Palm| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | |
|
||||||
|
@ -185,6 +185,25 @@ def completion(
|
||||||
|
|
||||||
- `metadata`: *dict (optional)* - Any additional data you want to be logged when the call is made (sent to logging integrations, eg. promptlayer and accessible via custom callback function)
|
- `metadata`: *dict (optional)* - Any additional data you want to be logged when the call is made (sent to logging integrations, eg. promptlayer and accessible via custom callback function)
|
||||||
|
|
||||||
|
**CUSTOM MODEL COST**
|
||||||
|
- `input_cost_per_token`: *float (optional)* - The cost per input token for the completion call
|
||||||
|
|
||||||
|
- `output_cost_per_token`: *float (optional)* - The cost per output token for the completion call
|
||||||
|
|
||||||
|
**CUSTOM PROMPT TEMPLATE** (See [prompt formatting for more info](./prompt_formatting.md#format-prompt-yourself))
|
||||||
|
- `initial_prompt_value`: *string (optional)* - Initial string applied at the start of the input messages
|
||||||
|
|
||||||
|
- `roles`: *dict (optional)* - Dictionary specifying how to format the prompt based on the role + message passed in via `messages`.
|
||||||
|
|
||||||
|
- `final_prompt_value`: *string (optional)* - Final string applied at the end of the input messages
|
||||||
|
|
||||||
|
- `bos_token`: *string (optional)* - Initial string applied at the start of a sequence
|
||||||
|
|
||||||
|
- `eos_token`: *string (optional)* - Initial string applied at the end of a sequence
|
||||||
|
|
||||||
|
- `hf_model_name`: *string (optional)* - [Sagemaker Only] The corresponding huggingface name of the model, used to pull the right chat template for the model.
|
||||||
|
|
||||||
|
|
||||||
## Provider-specific Params
|
## Provider-specific Params
|
||||||
Providers might offer params not supported by OpenAI (e.g. top_k). You can pass those in 2 ways:
|
Providers might offer params not supported by OpenAI (e.g. top_k). You can pass those in 2 ways:
|
||||||
- via completion(): We'll pass the non-openai param, straight to the provider as part of the request body.
|
- via completion(): We'll pass the non-openai param, straight to the provider as part of the request body.
|
||||||
|
|
|
@ -149,12 +149,15 @@ def completion(
|
||||||
additional_args={"complete_input_dict": data, "request_str": request_str},
|
additional_args={"complete_input_dict": data, "request_str": request_str},
|
||||||
)
|
)
|
||||||
## COMPLETION CALL
|
## COMPLETION CALL
|
||||||
response = client.invoke_endpoint(
|
try:
|
||||||
EndpointName=model,
|
response = client.invoke_endpoint(
|
||||||
ContentType="application/json",
|
EndpointName=model,
|
||||||
Body=data,
|
ContentType="application/json",
|
||||||
CustomAttributes="accept_eula=true",
|
Body=data,
|
||||||
)
|
CustomAttributes="accept_eula=true",
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise SagemakerError(status_code=500, message=f"{str(e)}")
|
||||||
response = response["Body"].read().decode("utf8")
|
response = response["Body"].read().decode("utf8")
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging_obj.post_call(
|
logging_obj.post_call(
|
||||||
|
|
|
@ -1053,10 +1053,11 @@ def test_completion_chat_sagemaker():
|
||||||
print("testing sagemaker")
|
print("testing sagemaker")
|
||||||
litellm.set_verbose=True
|
litellm.set_verbose=True
|
||||||
response = completion(
|
response = completion(
|
||||||
model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b-f",
|
model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b-chat",
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=100,
|
max_tokens=100,
|
||||||
stream=True,
|
stream=True,
|
||||||
|
n=2,
|
||||||
hf_model_name="meta-llama/Llama-2-7b-chat-hf",
|
hf_model_name="meta-llama/Llama-2-7b-chat-hf",
|
||||||
)
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
|
|
|
@ -4065,6 +4065,14 @@ def exception_type(
|
||||||
llm_provider="sagemaker",
|
llm_provider="sagemaker",
|
||||||
response=original_exception.response
|
response=original_exception.response
|
||||||
)
|
)
|
||||||
|
elif "Input validation error: `best_of` must be > 0 and <= 2" in error_str:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise BadRequestError(
|
||||||
|
message=f"SagemakerException - the value of 'n' must be > 0 and <= 2 for sagemaker endpoints",
|
||||||
|
model=model,
|
||||||
|
llm_provider="sagemaker",
|
||||||
|
response=original_exception.response
|
||||||
|
)
|
||||||
elif custom_llm_provider == "vertex_ai":
|
elif custom_llm_provider == "vertex_ai":
|
||||||
if "Vertex AI API has not been used in project" in error_str or "Unable to find your project" in error_str:
|
if "Vertex AI API has not been used in project" in error_str or "Unable to find your project" in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue