diff --git a/litellm/main.py b/litellm/main.py index 820033e50..8dbdb820c 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -165,6 +165,10 @@ async def acompletion(*args, **kwargs): if (custom_llm_provider == "openai" or custom_llm_provider == "azure" or custom_llm_provider == "custom_openai" + or custom_llm_provider == "anyscale" + or custom_llm_provider == "openrouter" + or custom_llm_provider == "deepinfra" + or custom_llm_provider == "perplexity" or custom_llm_provider == "text-completion-openai" or custom_llm_provider == "huggingface"): # currently implemented aiohttp calls for just azure and openai, soon all. if kwargs.get("stream", False): diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 470868031..8543a535e 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -582,6 +582,11 @@ async def chat_completion(request: Request, model: Optional[str] = None): detail=error_msg ) +@router.post("/v1/embeddings", dependencies=[Depends(user_api_key_auth)]) +@router.post("/embeddings", dependencies=[Depends(user_api_key_auth)]) +async def embeddings(request: Request): + pass + @router.post("/key/generate", dependencies=[Depends(user_api_key_auth)]) async def generate_key_fn(request: Request): data = await request.json() diff --git a/litellm/tests/test_async_fn.py b/litellm/tests/test_async_fn.py index 920b7524e..69bd97019 100644 --- a/litellm/tests/test_async_fn.py +++ b/litellm/tests/test_async_fn.py @@ -56,7 +56,7 @@ def test_async_response_openai(): asyncio.run(test_get_response()) -test_async_response_openai() +# test_async_response_openai() def test_async_response_azure(): import asyncio @@ -130,6 +130,7 @@ def test_get_response_streaming(): def test_get_response_non_openai_streaming(): import asyncio litellm.set_verbose = True + litellm.num_retries = 0 async def test_async_call(): user_message = "Hello, how are you?" messages = [{"content": user_message, "role": "user"}] @@ -161,4 +162,4 @@ def test_get_response_non_openai_streaming(): return response asyncio.run(test_async_call()) -# test_get_response_non_openai_streaming() \ No newline at end of file +test_get_response_non_openai_streaming() \ No newline at end of file