mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
* refactor: initial commit moving watsonx_text to base_llm_http_handler + clarifying new provider directory structure * refactor(watsonx/completion/handler.py): move to using base llm http handler removes 'requests' library usage * fix(watsonx_text/transformation.py): fix result transformation migrates to transformation.py, for usage with base llm http handler * fix(streaming_handler.py): migrate watsonx streaming to transformation.py ensures streaming works with base llm http handler * fix(streaming_handler.py): fix streaming linting errors and remove watsonx conditional logic * fix(watsonx/): fix chat route post completion route refactor * refactor(watsonx/embed): refactor watsonx to use base llm http handler for embedding calls as well * refactor(base.py): remove requests library usage from litellm * build(pyproject.toml): remove requests library usage * fix: fix linting errors * fix: fix linting errors * fix(types/utils.py): fix validation errors for modelresponsestream * fix(replicate/handler.py): fix linting errors * fix(litellm_logging.py): handle modelresponsestream object * fix(streaming_handler.py): fix modelresponsestream args * fix: remove unused imports * test: fix test * fix: fix test * test: fix test * test: fix tests * test: fix test * test: fix patch target * test: fix test
689 lines
22 KiB
Python
689 lines
22 KiB
Python
import json
|
|
from typing import TYPE_CHECKING, Any, Optional, Tuple, Union
|
|
|
|
import httpx # type: ignore
|
|
|
|
import litellm
|
|
import litellm.litellm_core_utils
|
|
import litellm.types
|
|
import litellm.types.utils
|
|
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
|
|
from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig
|
|
from litellm.llms.base_llm.rerank.transformation import BaseRerankConfig
|
|
from litellm.llms.custom_httpx.http_handler import (
|
|
AsyncHTTPHandler,
|
|
HTTPHandler,
|
|
_get_httpx_client,
|
|
get_async_httpx_client,
|
|
)
|
|
from litellm.types.rerank import OptionalRerankParams, RerankResponse
|
|
from litellm.types.utils import EmbeddingResponse
|
|
from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager
|
|
|
|
if TYPE_CHECKING:
|
|
from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
|
|
|
|
LiteLLMLoggingObj = _LiteLLMLoggingObj
|
|
else:
|
|
LiteLLMLoggingObj = Any
|
|
|
|
|
|
class BaseLLMHTTPHandler:
|
|
async def async_completion(
|
|
self,
|
|
custom_llm_provider: str,
|
|
provider_config: BaseConfig,
|
|
api_base: str,
|
|
headers: dict,
|
|
data: dict,
|
|
timeout: Union[float, httpx.Timeout],
|
|
model: str,
|
|
model_response: ModelResponse,
|
|
logging_obj: LiteLLMLoggingObj,
|
|
messages: list,
|
|
optional_params: dict,
|
|
litellm_params: dict,
|
|
encoding: Any,
|
|
api_key: Optional[str] = None,
|
|
client: Optional[AsyncHTTPHandler] = None,
|
|
):
|
|
if client is None:
|
|
async_httpx_client = get_async_httpx_client(
|
|
llm_provider=litellm.LlmProviders(custom_llm_provider)
|
|
)
|
|
else:
|
|
async_httpx_client = client
|
|
|
|
try:
|
|
response = await async_httpx_client.post(
|
|
url=api_base,
|
|
headers=headers,
|
|
data=json.dumps(data),
|
|
timeout=timeout,
|
|
)
|
|
except Exception as e:
|
|
raise self._handle_error(e=e, provider_config=provider_config)
|
|
return provider_config.transform_response(
|
|
model=model,
|
|
raw_response=response,
|
|
model_response=model_response,
|
|
logging_obj=logging_obj,
|
|
api_key=api_key,
|
|
request_data=data,
|
|
messages=messages,
|
|
optional_params=optional_params,
|
|
litellm_params=litellm_params,
|
|
encoding=encoding,
|
|
)
|
|
|
|
def completion(
|
|
self,
|
|
model: str,
|
|
messages: list,
|
|
api_base: str,
|
|
custom_llm_provider: str,
|
|
model_response: ModelResponse,
|
|
encoding,
|
|
logging_obj: LiteLLMLoggingObj,
|
|
optional_params: dict,
|
|
timeout: Union[float, httpx.Timeout],
|
|
litellm_params: dict,
|
|
acompletion: bool,
|
|
stream: Optional[bool] = False,
|
|
fake_stream: bool = False,
|
|
api_key: Optional[str] = None,
|
|
headers={},
|
|
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
|
):
|
|
provider_config = ProviderConfigManager.get_provider_chat_config(
|
|
model=model, provider=litellm.LlmProviders(custom_llm_provider)
|
|
)
|
|
# get config from model, custom llm provider
|
|
headers = provider_config.validate_environment(
|
|
api_key=api_key,
|
|
headers=headers,
|
|
model=model,
|
|
messages=messages,
|
|
optional_params=optional_params,
|
|
)
|
|
|
|
api_base = provider_config.get_complete_url(
|
|
api_base=api_base,
|
|
model=model,
|
|
optional_params=optional_params,
|
|
stream=stream,
|
|
)
|
|
|
|
data = provider_config.transform_request(
|
|
model=model,
|
|
messages=messages,
|
|
optional_params=optional_params,
|
|
litellm_params=litellm_params,
|
|
headers=headers,
|
|
)
|
|
|
|
## LOGGING
|
|
logging_obj.pre_call(
|
|
input=messages,
|
|
api_key=api_key,
|
|
additional_args={
|
|
"complete_input_dict": data,
|
|
"api_base": api_base,
|
|
"headers": headers,
|
|
},
|
|
)
|
|
|
|
if acompletion is True:
|
|
if stream is True:
|
|
if fake_stream is not True:
|
|
data["stream"] = stream
|
|
return self.acompletion_stream_function(
|
|
model=model,
|
|
messages=messages,
|
|
api_base=api_base,
|
|
headers=headers,
|
|
custom_llm_provider=custom_llm_provider,
|
|
provider_config=provider_config,
|
|
timeout=timeout,
|
|
logging_obj=logging_obj,
|
|
data=data,
|
|
fake_stream=fake_stream,
|
|
client=(
|
|
client
|
|
if client is not None and isinstance(client, AsyncHTTPHandler)
|
|
else None
|
|
),
|
|
)
|
|
|
|
else:
|
|
return self.async_completion(
|
|
custom_llm_provider=custom_llm_provider,
|
|
provider_config=provider_config,
|
|
api_base=api_base,
|
|
headers=headers,
|
|
data=data,
|
|
timeout=timeout,
|
|
model=model,
|
|
model_response=model_response,
|
|
logging_obj=logging_obj,
|
|
api_key=api_key,
|
|
messages=messages,
|
|
optional_params=optional_params,
|
|
litellm_params=litellm_params,
|
|
encoding=encoding,
|
|
client=(
|
|
client
|
|
if client is not None and isinstance(client, AsyncHTTPHandler)
|
|
else None
|
|
),
|
|
)
|
|
|
|
if stream is True:
|
|
if fake_stream is not True:
|
|
data["stream"] = stream
|
|
completion_stream, headers = self.make_sync_call(
|
|
provider_config=provider_config,
|
|
api_base=api_base,
|
|
headers=headers, # type: ignore
|
|
data=json.dumps(data),
|
|
model=model,
|
|
messages=messages,
|
|
logging_obj=logging_obj,
|
|
timeout=timeout,
|
|
fake_stream=fake_stream,
|
|
client=(
|
|
client
|
|
if client is not None and isinstance(client, HTTPHandler)
|
|
else None
|
|
),
|
|
)
|
|
return CustomStreamWrapper(
|
|
completion_stream=completion_stream,
|
|
model=model,
|
|
custom_llm_provider=custom_llm_provider,
|
|
logging_obj=logging_obj,
|
|
)
|
|
|
|
if client is None or not isinstance(client, HTTPHandler):
|
|
sync_httpx_client = _get_httpx_client()
|
|
else:
|
|
sync_httpx_client = client
|
|
|
|
try:
|
|
response = sync_httpx_client.post(
|
|
url=api_base,
|
|
headers=headers,
|
|
data=json.dumps(data),
|
|
timeout=timeout,
|
|
)
|
|
except Exception as e:
|
|
raise self._handle_error(
|
|
e=e,
|
|
provider_config=provider_config,
|
|
)
|
|
|
|
return provider_config.transform_response(
|
|
model=model,
|
|
raw_response=response,
|
|
model_response=model_response,
|
|
logging_obj=logging_obj,
|
|
api_key=api_key,
|
|
request_data=data,
|
|
messages=messages,
|
|
optional_params=optional_params,
|
|
litellm_params=litellm_params,
|
|
encoding=encoding,
|
|
)
|
|
|
|
def make_sync_call(
|
|
self,
|
|
provider_config: BaseConfig,
|
|
api_base: str,
|
|
headers: dict,
|
|
data: str,
|
|
model: str,
|
|
messages: list,
|
|
logging_obj,
|
|
timeout: Optional[Union[float, httpx.Timeout]],
|
|
fake_stream: bool = False,
|
|
client: Optional[HTTPHandler] = None,
|
|
) -> Tuple[Any, httpx.Headers]:
|
|
if client is None or not isinstance(client, HTTPHandler):
|
|
sync_httpx_client = _get_httpx_client()
|
|
else:
|
|
sync_httpx_client = client
|
|
try:
|
|
stream = True
|
|
if fake_stream is True:
|
|
stream = False
|
|
response = sync_httpx_client.post(
|
|
api_base, headers=headers, data=data, timeout=timeout, stream=stream
|
|
)
|
|
except httpx.HTTPStatusError as e:
|
|
raise self._handle_error(
|
|
e=e,
|
|
provider_config=provider_config,
|
|
)
|
|
except Exception as e:
|
|
for exception in litellm.LITELLM_EXCEPTION_TYPES:
|
|
if isinstance(e, exception):
|
|
raise e
|
|
raise self._handle_error(
|
|
e=e,
|
|
provider_config=provider_config,
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
raise BaseLLMException(
|
|
status_code=response.status_code,
|
|
message=str(response.read()),
|
|
)
|
|
|
|
if fake_stream is True:
|
|
completion_stream = provider_config.get_model_response_iterator(
|
|
streaming_response=response.json(), sync_stream=True
|
|
)
|
|
else:
|
|
completion_stream = provider_config.get_model_response_iterator(
|
|
streaming_response=response.iter_lines(), sync_stream=True
|
|
)
|
|
|
|
# LOGGING
|
|
logging_obj.post_call(
|
|
input=messages,
|
|
api_key="",
|
|
original_response="first stream response received",
|
|
additional_args={"complete_input_dict": data},
|
|
)
|
|
|
|
return completion_stream, response.headers
|
|
|
|
async def acompletion_stream_function(
|
|
self,
|
|
model: str,
|
|
messages: list,
|
|
api_base: str,
|
|
custom_llm_provider: str,
|
|
headers: dict,
|
|
provider_config: BaseConfig,
|
|
timeout: Union[float, httpx.Timeout],
|
|
logging_obj: LiteLLMLoggingObj,
|
|
data: dict,
|
|
fake_stream: bool = False,
|
|
client: Optional[AsyncHTTPHandler] = None,
|
|
):
|
|
completion_stream, _response_headers = await self.make_async_call(
|
|
custom_llm_provider=custom_llm_provider,
|
|
provider_config=provider_config,
|
|
api_base=api_base,
|
|
headers=headers,
|
|
data=json.dumps(data),
|
|
messages=messages,
|
|
logging_obj=logging_obj,
|
|
timeout=timeout,
|
|
fake_stream=fake_stream,
|
|
client=client,
|
|
)
|
|
streamwrapper = CustomStreamWrapper(
|
|
completion_stream=completion_stream,
|
|
model=model,
|
|
custom_llm_provider=custom_llm_provider,
|
|
logging_obj=logging_obj,
|
|
)
|
|
return streamwrapper
|
|
|
|
async def make_async_call(
|
|
self,
|
|
custom_llm_provider: str,
|
|
provider_config: BaseConfig,
|
|
api_base: str,
|
|
headers: dict,
|
|
data: str,
|
|
messages: list,
|
|
logging_obj: LiteLLMLoggingObj,
|
|
timeout: Optional[Union[float, httpx.Timeout]],
|
|
fake_stream: bool = False,
|
|
client: Optional[AsyncHTTPHandler] = None,
|
|
) -> Tuple[Any, httpx.Headers]:
|
|
if client is None:
|
|
async_httpx_client = get_async_httpx_client(
|
|
llm_provider=litellm.LlmProviders(custom_llm_provider)
|
|
)
|
|
else:
|
|
async_httpx_client = client
|
|
stream = True
|
|
if fake_stream is True:
|
|
stream = False
|
|
try:
|
|
response = await async_httpx_client.post(
|
|
api_base, headers=headers, data=data, stream=stream, timeout=timeout
|
|
)
|
|
except httpx.HTTPStatusError as e:
|
|
raise self._handle_error(
|
|
e=e,
|
|
provider_config=provider_config,
|
|
)
|
|
except Exception as e:
|
|
for exception in litellm.LITELLM_EXCEPTION_TYPES:
|
|
if isinstance(e, exception):
|
|
raise e
|
|
raise self._handle_error(
|
|
e=e,
|
|
provider_config=provider_config,
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
raise BaseLLMException(
|
|
status_code=response.status_code,
|
|
message=str(response.read()),
|
|
)
|
|
if fake_stream is True:
|
|
completion_stream = provider_config.get_model_response_iterator(
|
|
streaming_response=response.json(), sync_stream=False
|
|
)
|
|
else:
|
|
completion_stream = provider_config.get_model_response_iterator(
|
|
streaming_response=response.aiter_lines(), sync_stream=False
|
|
)
|
|
# LOGGING
|
|
logging_obj.post_call(
|
|
input=messages,
|
|
api_key="",
|
|
original_response="first stream response received",
|
|
additional_args={"complete_input_dict": data},
|
|
)
|
|
|
|
return completion_stream, response.headers
|
|
|
|
def embedding(
|
|
self,
|
|
model: str,
|
|
input: list,
|
|
timeout: float,
|
|
custom_llm_provider: str,
|
|
logging_obj: LiteLLMLoggingObj,
|
|
api_base: Optional[str],
|
|
optional_params: dict,
|
|
litellm_params: dict,
|
|
model_response: EmbeddingResponse,
|
|
api_key: Optional[str] = None,
|
|
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
|
aembedding: bool = False,
|
|
headers={},
|
|
) -> EmbeddingResponse:
|
|
|
|
provider_config = ProviderConfigManager.get_provider_embedding_config(
|
|
model=model, provider=litellm.LlmProviders(custom_llm_provider)
|
|
)
|
|
# get config from model, custom llm provider
|
|
headers = provider_config.validate_environment(
|
|
api_key=api_key,
|
|
headers=headers,
|
|
model=model,
|
|
messages=[],
|
|
optional_params=optional_params,
|
|
)
|
|
|
|
api_base = provider_config.get_complete_url(
|
|
api_base=api_base,
|
|
model=model,
|
|
optional_params=optional_params,
|
|
)
|
|
|
|
data = provider_config.transform_embedding_request(
|
|
model=model,
|
|
input=input,
|
|
optional_params=optional_params,
|
|
headers=headers,
|
|
)
|
|
|
|
## LOGGING
|
|
logging_obj.pre_call(
|
|
input=input,
|
|
api_key=api_key,
|
|
additional_args={
|
|
"complete_input_dict": data,
|
|
"api_base": api_base,
|
|
"headers": headers,
|
|
},
|
|
)
|
|
|
|
if aembedding is True:
|
|
return self.aembedding( # type: ignore
|
|
request_data=data,
|
|
api_base=api_base,
|
|
headers=headers,
|
|
model=model,
|
|
custom_llm_provider=custom_llm_provider,
|
|
provider_config=provider_config,
|
|
model_response=model_response,
|
|
logging_obj=logging_obj,
|
|
api_key=api_key,
|
|
timeout=timeout,
|
|
client=client,
|
|
optional_params=optional_params,
|
|
litellm_params=litellm_params,
|
|
)
|
|
|
|
if client is None or not isinstance(client, HTTPHandler):
|
|
sync_httpx_client = _get_httpx_client()
|
|
else:
|
|
sync_httpx_client = client
|
|
|
|
try:
|
|
response = sync_httpx_client.post(
|
|
url=api_base,
|
|
headers=headers,
|
|
data=json.dumps(data),
|
|
timeout=timeout,
|
|
)
|
|
except Exception as e:
|
|
raise self._handle_error(
|
|
e=e,
|
|
provider_config=provider_config,
|
|
)
|
|
|
|
return provider_config.transform_embedding_response(
|
|
model=model,
|
|
raw_response=response,
|
|
model_response=model_response,
|
|
logging_obj=logging_obj,
|
|
api_key=api_key,
|
|
request_data=data,
|
|
optional_params=optional_params,
|
|
litellm_params=litellm_params,
|
|
)
|
|
|
|
async def aembedding(
|
|
self,
|
|
request_data: dict,
|
|
api_base: str,
|
|
headers: dict,
|
|
model: str,
|
|
custom_llm_provider: str,
|
|
provider_config: BaseEmbeddingConfig,
|
|
model_response: EmbeddingResponse,
|
|
logging_obj: LiteLLMLoggingObj,
|
|
optional_params: dict,
|
|
litellm_params: dict,
|
|
api_key: Optional[str] = None,
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
|
) -> EmbeddingResponse:
|
|
if client is None or not isinstance(client, AsyncHTTPHandler):
|
|
async_httpx_client = get_async_httpx_client(
|
|
llm_provider=litellm.LlmProviders(custom_llm_provider)
|
|
)
|
|
else:
|
|
async_httpx_client = client
|
|
|
|
try:
|
|
response = await async_httpx_client.post(
|
|
url=api_base,
|
|
headers=headers,
|
|
data=json.dumps(request_data),
|
|
timeout=timeout,
|
|
)
|
|
except Exception as e:
|
|
raise self._handle_error(e=e, provider_config=provider_config)
|
|
|
|
return provider_config.transform_embedding_response(
|
|
model=model,
|
|
raw_response=response,
|
|
model_response=model_response,
|
|
logging_obj=logging_obj,
|
|
api_key=api_key,
|
|
request_data=request_data,
|
|
optional_params=optional_params,
|
|
litellm_params=litellm_params,
|
|
)
|
|
|
|
def rerank(
|
|
self,
|
|
model: str,
|
|
custom_llm_provider: str,
|
|
logging_obj: LiteLLMLoggingObj,
|
|
optional_rerank_params: OptionalRerankParams,
|
|
timeout: Optional[Union[float, httpx.Timeout]],
|
|
model_response: RerankResponse,
|
|
_is_async: bool = False,
|
|
headers: dict = {},
|
|
api_key: Optional[str] = None,
|
|
api_base: Optional[str] = None,
|
|
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
|
) -> RerankResponse:
|
|
|
|
provider_config = ProviderConfigManager.get_provider_rerank_config(
|
|
model=model, provider=litellm.LlmProviders(custom_llm_provider)
|
|
)
|
|
# get config from model, custom llm provider
|
|
headers = provider_config.validate_environment(
|
|
api_key=api_key,
|
|
headers=headers,
|
|
model=model,
|
|
)
|
|
|
|
api_base = provider_config.get_complete_url(
|
|
api_base=api_base,
|
|
model=model,
|
|
)
|
|
|
|
data = provider_config.transform_rerank_request(
|
|
model=model,
|
|
optional_rerank_params=optional_rerank_params,
|
|
headers=headers,
|
|
)
|
|
|
|
## LOGGING
|
|
logging_obj.pre_call(
|
|
input=optional_rerank_params.get("query", ""),
|
|
api_key=api_key,
|
|
additional_args={
|
|
"complete_input_dict": data,
|
|
"api_base": api_base,
|
|
"headers": headers,
|
|
},
|
|
)
|
|
|
|
if _is_async is True:
|
|
return self.arerank( # type: ignore
|
|
model=model,
|
|
request_data=data,
|
|
custom_llm_provider=custom_llm_provider,
|
|
provider_config=provider_config,
|
|
logging_obj=logging_obj,
|
|
model_response=model_response,
|
|
api_base=api_base,
|
|
headers=headers,
|
|
api_key=api_key,
|
|
timeout=timeout,
|
|
client=client,
|
|
)
|
|
|
|
if client is None or not isinstance(client, HTTPHandler):
|
|
sync_httpx_client = _get_httpx_client()
|
|
else:
|
|
sync_httpx_client = client
|
|
|
|
try:
|
|
response = sync_httpx_client.post(
|
|
url=api_base,
|
|
headers=headers,
|
|
data=json.dumps(data),
|
|
timeout=timeout,
|
|
)
|
|
except Exception as e:
|
|
raise self._handle_error(
|
|
e=e,
|
|
provider_config=provider_config,
|
|
)
|
|
|
|
return provider_config.transform_rerank_response(
|
|
model=model,
|
|
raw_response=response,
|
|
model_response=model_response,
|
|
logging_obj=logging_obj,
|
|
api_key=api_key,
|
|
request_data=data,
|
|
)
|
|
|
|
async def arerank(
|
|
self,
|
|
model: str,
|
|
request_data: dict,
|
|
custom_llm_provider: str,
|
|
provider_config: BaseRerankConfig,
|
|
logging_obj: LiteLLMLoggingObj,
|
|
model_response: RerankResponse,
|
|
api_base: str,
|
|
headers: dict,
|
|
api_key: Optional[str] = None,
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
|
) -> RerankResponse:
|
|
|
|
if client is None or not isinstance(client, AsyncHTTPHandler):
|
|
async_httpx_client = get_async_httpx_client(
|
|
llm_provider=litellm.LlmProviders(custom_llm_provider)
|
|
)
|
|
else:
|
|
async_httpx_client = client
|
|
try:
|
|
response = await async_httpx_client.post(
|
|
url=api_base,
|
|
headers=headers,
|
|
data=json.dumps(request_data),
|
|
timeout=timeout,
|
|
)
|
|
except Exception as e:
|
|
raise self._handle_error(e=e, provider_config=provider_config)
|
|
|
|
return provider_config.transform_rerank_response(
|
|
model=model,
|
|
raw_response=response,
|
|
model_response=model_response,
|
|
logging_obj=logging_obj,
|
|
api_key=api_key,
|
|
request_data=request_data,
|
|
)
|
|
|
|
def _handle_error(
|
|
self, e: Exception, provider_config: Union[BaseConfig, BaseRerankConfig]
|
|
):
|
|
status_code = getattr(e, "status_code", 500)
|
|
error_headers = getattr(e, "headers", None)
|
|
error_text = getattr(e, "text", str(e))
|
|
error_response = getattr(e, "response", None)
|
|
if error_headers is None and error_response:
|
|
error_headers = getattr(error_response, "headers", None)
|
|
if error_response and hasattr(error_response, "text"):
|
|
error_text = getattr(error_response, "text", error_text)
|
|
if error_headers:
|
|
error_headers = dict(error_headers)
|
|
else:
|
|
error_headers = {}
|
|
raise provider_config.get_error_class(
|
|
error_message=error_text,
|
|
status_code=status_code,
|
|
headers=error_headers,
|
|
)
|