forked from phoenix/litellm-mirror
LiteLLM Minor Fixes & Improvements (10/07/2024) (#6101)
* fix(utils.py): support dropping temperature param for azure o1 models * fix(main.py): handle azure o1 streaming requests o1 doesn't support streaming, fake it to ensure code works as expected * feat(utils.py): expose `hosted_vllm/` endpoint, with tool handling for vllm Fixes https://github.com/BerriAI/litellm/issues/6088 * refactor(internal_user_endpoints.py): cleanup unused params + update docstring Closes https://github.com/BerriAI/litellm/issues/6100 * fix(main.py): expose custom image generation api support Fixes https://github.com/BerriAI/litellm/issues/6097 * fix: fix linting errors * docs(custom_llm_server.md): add docs on custom api for image gen calls * fix(types/utils.py): handle dict type * fix(types/utils.py): fix linting errors
This commit is contained in:
parent
5de69cb1b2
commit
6729c9ca7f
17 changed files with 643 additions and 76 deletions
|
@ -2156,7 +2156,13 @@ def test_openai_chat_completion_complete_response_call():
|
|||
# test_openai_chat_completion_complete_response_call()
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
["gpt-3.5-turbo", "azure/chatgpt-v-2", "claude-3-haiku-20240307", "o1-preview"], #
|
||||
[
|
||||
"gpt-3.5-turbo",
|
||||
"azure/chatgpt-v-2",
|
||||
"claude-3-haiku-20240307",
|
||||
"o1-preview",
|
||||
"azure/fake-o1-mini",
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"sync",
|
||||
|
@ -2164,6 +2170,7 @@ def test_openai_chat_completion_complete_response_call():
|
|||
)
|
||||
@pytest.mark.asyncio
|
||||
async def test_openai_stream_options_call(model, sync):
|
||||
litellm.enable_preview_features = True
|
||||
litellm.set_verbose = True
|
||||
usage = None
|
||||
chunks = []
|
||||
|
@ -2175,7 +2182,6 @@ async def test_openai_stream_options_call(model, sync):
|
|||
],
|
||||
stream=True,
|
||||
stream_options={"include_usage": True},
|
||||
max_tokens=10,
|
||||
)
|
||||
for chunk in response:
|
||||
print("chunk: ", chunk)
|
||||
|
@ -2186,7 +2192,6 @@ async def test_openai_stream_options_call(model, sync):
|
|||
messages=[{"role": "user", "content": "say GM - we're going to make it "}],
|
||||
stream=True,
|
||||
stream_options={"include_usage": True},
|
||||
max_tokens=10,
|
||||
)
|
||||
|
||||
async for chunk in response:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue