forked from phoenix/litellm-mirror
LiteLLM Minor Fixes & Improvements (10/07/2024) (#6101)
* fix(utils.py): support dropping temperature param for azure o1 models * fix(main.py): handle azure o1 streaming requests o1 doesn't support streaming, fake it to ensure code works as expected * feat(utils.py): expose `hosted_vllm/` endpoint, with tool handling for vllm Fixes https://github.com/BerriAI/litellm/issues/6088 * refactor(internal_user_endpoints.py): cleanup unused params + update docstring Closes https://github.com/BerriAI/litellm/issues/6100 * fix(main.py): expose custom image generation api support Fixes https://github.com/BerriAI/litellm/issues/6097 * fix: fix linting errors * docs(custom_llm_server.md): add docs on custom api for image gen calls * fix(types/utils.py): handle dict type * fix(types/utils.py): fix linting errors
This commit is contained in:
parent
5de69cb1b2
commit
6729c9ca7f
17 changed files with 643 additions and 76 deletions
|
@ -4223,7 +4223,8 @@ def mock_post(*args, **kwargs):
|
|||
return mock_response
|
||||
|
||||
|
||||
def test_completion_vllm():
|
||||
@pytest.mark.parametrize("provider", ["openai", "hosted_vllm"])
|
||||
def test_completion_vllm(provider):
|
||||
"""
|
||||
Asserts a text completion call for vllm actually goes to the text completion endpoint
|
||||
"""
|
||||
|
@ -4235,7 +4236,10 @@ def test_completion_vllm():
|
|||
client.completions.with_raw_response, "create", side_effect=mock_post
|
||||
) as mock_call:
|
||||
response = text_completion(
|
||||
model="openai/gemini-1.5-flash", prompt="ping", client=client, hello="world"
|
||||
model="{provider}/gemini-1.5-flash".format(provider=provider),
|
||||
prompt="ping",
|
||||
client=client,
|
||||
hello="world",
|
||||
)
|
||||
print("raw response", response)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue