Litellm dev 12 25 2024 p1 (#7411)

* test(test_watsonx.py): e2e unit test for watsonx custom header

covers https://github.com/BerriAI/litellm/issues/7408

* fix(common_utils.py): handle auth token already present in headers (watsonx + openai-like base handler)

Fixes https://github.com/BerriAI/litellm/issues/7408

* fix(watsonx/chat): fix chat route

Fixes https://github.com/BerriAI/litellm/issues/7408

* fix(huggingface/chat/handler.py): fix huggingface async completion calls

* Correct handling of max_retries=0 to disable AzureOpenAI retries (#7379)

* test: fix test

---------

Co-authored-by: Minh Duc <phamminhduc0711@gmail.com>
This commit is contained in:
Krish Dholakia 2024-12-25 17:36:30 -08:00 committed by GitHub
parent 157810fcbf
commit 9237357bcc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 299 additions and 14 deletions

View file

@ -203,7 +203,26 @@ class Huggingface(BaseLLM):
return self.async_streaming(logging_obj=logging_obj, api_base=completion_url, data=data, headers=headers, model_response=model_response, model=model, timeout=timeout, messages=messages) # type: ignore
else:
### ASYNC COMPLETION
return self.acompletion(api_base=completion_url, data=data, headers=headers, model_response=model_response, task=task, encoding=encoding, model=model, optional_params=optional_params, timeout=timeout, litellm_params=litellm_params) # type: ignore
return self.acompletion(
api_base=completion_url,
data=data,
headers=headers,
model_response=model_response,
encoding=encoding,
model=model,
optional_params=optional_params,
timeout=timeout,
litellm_params=litellm_params,
logging_obj=logging_obj,
api_key=api_key,
messages=messages,
client=(
client
if client is not None
and isinstance(client, AsyncHTTPHandler)
else None
),
)
if client is None or not isinstance(client, HTTPHandler):
client = _get_httpx_client()
### SYNC STREAMING
@ -267,14 +286,16 @@ class Huggingface(BaseLLM):
logging_obj: LiteLLMLoggingObj,
api_key: str,
messages: List[AllMessageValues],
client: Optional[AsyncHTTPHandler] = None,
):
response: Optional[httpx.Response] = None
try:
http_client = get_async_httpx_client(
llm_provider=litellm.LlmProviders.HUGGINGFACE
)
if client is None:
client = get_async_httpx_client(
llm_provider=litellm.LlmProviders.HUGGINGFACE
)
### ASYNC COMPLETION
http_response = await http_client.post(
http_response = await client.post(
url=api_base, headers=headers, data=json.dumps(data), timeout=timeout
)