refactor(sagemaker/): separate chat + completion routes + make them b… (#7151)

* refactor(sagemaker/): separate chat + completion routes + make them both use base llm config Addresses https://github.com/andrewyng/aisuite/issues/113#issuecomment-2512369132 * fix(main.py): pass hf model name + custom prompt dict to litellm params
2025-04-27 03:34:10 +00:00 · 2024-12-10 19:40:05 -08:00 · 2024-12-10 19:40:05 -08:00 · 61afdab228
commit 61afdab228
parent df12f87a64
14 changed files with 799 additions and 534 deletions
--- a/tests/local_testing/test_async_fn.py
+++ b/tests/local_testing/test_async_fn.py
@ -246,23 +246,6 @@ async def test_hf_completion_tgi():
 # test_get_cloudflare_response_streaming()


-@pytest.mark.skip(reason="AWS Suspended Account")
-@pytest.mark.asyncio
-async def test_completion_sagemaker():
-    # litellm.set_verbose=True
-    try:
-        response = await acompletion(
-            model="sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4",
-            messages=[{"content": "Hello, how are you?", "role": "user"}],
-        )
-        # Add any assertions here to check the response
-        print(response)
-    except litellm.Timeout as e:
-        pass
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-
-
 def test_get_response_streaming():
    import asyncio