mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
LiteLLM Minor Fixes & Improvements (2024/16/01) (#7826)
* fix(lm_studio/chat/transformation.py): Fix https://github.com/BerriAI/litellm/issues/7811 * fix(router.py): fix mock timeout check * fix: drop model name from fallback args since it causes a conflict with the model=model that is provided later on. (#7806) This error happens if you provide multiple fallback models to the completion function with model name defined in each one. * fix(router.py): remove mock_timeout before sending to request prevents reuse in fallbacks * test: update test * test: revert test change - wrong pr --------- Co-authored-by: Dudu Lasry <david1542@users.noreply.github.com>
This commit is contained in:
parent
a2762fb273
commit
1bea338597
7 changed files with 40 additions and 9 deletions
|
@ -40,7 +40,7 @@ async def async_completion_with_fallbacks(**kwargs):
|
||||||
|
|
||||||
# Handle dictionary fallback configurations
|
# Handle dictionary fallback configurations
|
||||||
if isinstance(fallback, dict):
|
if isinstance(fallback, dict):
|
||||||
model = fallback.get("model", original_model)
|
model = fallback.pop("model", original_model)
|
||||||
completion_kwargs.update(fallback)
|
completion_kwargs.update(fallback)
|
||||||
else:
|
else:
|
||||||
model = fallback
|
model = fallback
|
||||||
|
|
|
@ -384,6 +384,7 @@ def _get_openai_compatible_provider_info( # noqa: PLR0915
|
||||||
dynamic_api_key: Optional[str]
|
dynamic_api_key: Optional[str]
|
||||||
api_base: Optional[str]
|
api_base: Optional[str]
|
||||||
"""
|
"""
|
||||||
|
|
||||||
custom_llm_provider = model.split("/", 1)[0]
|
custom_llm_provider = model.split("/", 1)[0]
|
||||||
model = model.split("/", 1)[1]
|
model = model.split("/", 1)[1]
|
||||||
|
|
||||||
|
|
|
@ -15,6 +15,6 @@ class LMStudioChatConfig(OpenAIGPTConfig):
|
||||||
) -> Tuple[Optional[str], Optional[str]]:
|
) -> Tuple[Optional[str], Optional[str]]:
|
||||||
api_base = api_base or get_secret_str("LM_STUDIO_API_BASE") # type: ignore
|
api_base = api_base or get_secret_str("LM_STUDIO_API_BASE") # type: ignore
|
||||||
dynamic_api_key = (
|
dynamic_api_key = (
|
||||||
api_key or get_secret_str("LM_STUDIO_API_KEY") or ""
|
api_key or get_secret_str("LM_STUDIO_API_KEY") or " "
|
||||||
) # vllm does not require an api key
|
) # vllm does not require an api key
|
||||||
return api_base, dynamic_api_key
|
return api_base, dynamic_api_key
|
||||||
|
|
|
@ -358,6 +358,7 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
organization=organization,
|
organization=organization,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
|
||||||
_new_client = OpenAI(
|
_new_client = OpenAI(
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
base_url=api_base,
|
base_url=api_base,
|
||||||
|
|
|
@ -1,5 +1,13 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: bedrock/us.anthropic.claude-3-haiku-20240307-v1:0
|
- model_name: openai-gpt-4o
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: bedrock/us.anthropic.claude-3-haiku-20240307-v1:0
|
model: openai/my-fake-openai-endpoint
|
||||||
|
api_key: sk-1234
|
||||||
|
api_base: https://exampleopenaiendpoint-production.up.railway.app
|
||||||
|
- model_name: openai-o1
|
||||||
|
litellm_params:
|
||||||
|
model: openai/o1
|
||||||
|
api_key: sk-1234
|
||||||
|
api_base: https://exampleopenaiendpoint-production.up.railway.app
|
||||||
|
mock_timeout: true
|
||||||
|
timeout: 3
|
||||||
|
|
|
@ -810,6 +810,7 @@ class Router:
|
||||||
kwargs["messages"] = messages
|
kwargs["messages"] = messages
|
||||||
kwargs["stream"] = stream
|
kwargs["stream"] = stream
|
||||||
kwargs["original_function"] = self._acompletion
|
kwargs["original_function"] = self._acompletion
|
||||||
|
|
||||||
self._update_kwargs_before_fallbacks(model=model, kwargs=kwargs)
|
self._update_kwargs_before_fallbacks(model=model, kwargs=kwargs)
|
||||||
request_priority = kwargs.get("priority") or self.default_priority
|
request_priority = kwargs.get("priority") or self.default_priority
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
@ -891,8 +892,8 @@ class Router:
|
||||||
deployment=deployment, parent_otel_span=parent_otel_span
|
deployment=deployment, parent_otel_span=parent_otel_span
|
||||||
)
|
)
|
||||||
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
|
||||||
|
|
||||||
data = deployment["litellm_params"].copy()
|
data = deployment["litellm_params"].copy()
|
||||||
|
|
||||||
model_name = data["model"]
|
model_name = data["model"]
|
||||||
|
|
||||||
model_client = self._get_async_openai_model_client(
|
model_client = self._get_async_openai_model_client(
|
||||||
|
@ -2790,9 +2791,12 @@ class Router:
|
||||||
content_policy_fallbacks=content_policy_fallbacks,
|
content_policy_fallbacks=content_policy_fallbacks,
|
||||||
)
|
)
|
||||||
|
|
||||||
response = await self.async_function_with_retries(
|
if mock_timeout is not None:
|
||||||
*args, **kwargs, mock_timeout=mock_timeout
|
response = await self.async_function_with_retries(
|
||||||
)
|
*args, **kwargs, mock_timeout=mock_timeout
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
response = await self.async_function_with_retries(*args, **kwargs)
|
||||||
verbose_router_logger.debug(f"Async Response: {response}")
|
verbose_router_logger.debug(f"Async Response: {response}")
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
@ -1756,6 +1756,23 @@ async def test_openai_compatible_custom_api_base(provider):
|
||||||
assert "hello" in mock_call.call_args.kwargs["extra_body"]
|
assert "hello" in mock_call.call_args.kwargs["extra_body"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_lm_studio_completion(monkeypatch):
|
||||||
|
monkeypatch.delenv("LM_STUDIO_API_KEY", raising=False)
|
||||||
|
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||||
|
try:
|
||||||
|
completion(
|
||||||
|
model="lm_studio/typhoon2-quen2.5-7b-instruct",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "What's the weather like in San Francisco?"}
|
||||||
|
],
|
||||||
|
api_base="https://exampleopenaiendpoint-production.up.railway.app/",
|
||||||
|
)
|
||||||
|
except litellm.AuthenticationError as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
except litellm.APIError as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_litellm_gateway_from_sdk():
|
async def test_litellm_gateway_from_sdk():
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue