litellm/tests/local_testing/test_model_max_token_adjust.py

29 lines
771 B
Python

# What this tests?
## Tests if max tokens get adjusted, if over limit
import sys, os, time
import traceback, asyncio
import pytest
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import completion
@pytest.mark.skip(reason="AWS Suspended Account")
def test_completion_sagemaker():
litellm.set_verbose = True
litellm.drop_params = True
response = completion(
model="sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4",
messages=[{"content": "Hello, how are you?", "role": "user"}],
temperature=0.2,
max_tokens=80000,
hf_model_name="meta-llama/Llama-2-70b-chat-hf",
)
print(f"response: {response}")
# test_completion_sagemaker()