mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
LITELLM: Remove requests
library usage (#7235)
* fix(generic_api_callback.py): remove requests lib usage * fix(budget_manager.py): remove requests lib usgae * fix(main.py): cleanup requests lib usage * fix(utils.py): remove requests lib usage * fix(argilla.py): fix argilla test * fix(athina.py): replace 'requests' lib usage with litellm module * fix(greenscale.py): replace 'requests' lib usage with httpx * fix: remove unused 'requests' lib import + replace usage in some places * fix(prompt_layer.py): remove 'requests' lib usage from prompt layer * fix(ollama_chat.py): remove 'requests' lib usage * fix(baseten.py): replace 'requests' lib usage * fix(codestral/): replace 'requests' lib usage * fix(predibase/): replace 'requests' lib usage * refactor: cleanup unused 'requests' lib imports * fix(oobabooga.py): cleanup 'requests' lib usage * fix(invoke_handler.py): remove unused 'requests' lib usage * refactor: cleanup unused 'requests' lib import * fix: fix linting errors * refactor(ollama/): move ollama to using base llm http handler removes 'requests' lib dep for ollama integration * fix(ollama_chat.py): fix linting errors * fix(ollama/completion/transformation.py): convert non-jpeg/png image to jpeg/png before passing to ollama
This commit is contained in:
parent
f628290ce7
commit
03e711e3e4
46 changed files with 523 additions and 612 deletions
|
@ -1,20 +1,34 @@
|
|||
import json
|
||||
import time
|
||||
import types
|
||||
from typing import TYPE_CHECKING, Any, List, Optional, Union
|
||||
import uuid
|
||||
from typing import TYPE_CHECKING, Any, AsyncIterator, Iterator, List, Optional, Union
|
||||
|
||||
from httpx._models import Headers, Response
|
||||
|
||||
import litellm
|
||||
from litellm.litellm_core_utils.prompt_templates.factory import (
|
||||
convert_to_ollama_image,
|
||||
custom_prompt,
|
||||
ollama_pt,
|
||||
)
|
||||
from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
|
||||
from litellm.llms.base_llm.transformation import BaseConfig, BaseLLMException
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
from litellm.types.llms.openai import AllMessageValues
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
ChatCompletionToolCallChunk,
|
||||
ChatCompletionUsageBlock,
|
||||
)
|
||||
from litellm.types.utils import (
|
||||
GenericStreamingChunk,
|
||||
ModelInfo,
|
||||
ModelResponse,
|
||||
ProviderField,
|
||||
StreamingChoices,
|
||||
)
|
||||
|
||||
from ..common_utils import OllamaError
|
||||
from ..common_utils import OllamaError, _convert_image
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
|
||||
|
@ -247,7 +261,47 @@ class OllamaConfig(BaseConfig):
|
|||
api_key: Optional[str] = None,
|
||||
json_mode: Optional[bool] = None,
|
||||
) -> ModelResponse:
|
||||
raise NotImplementedError("transformation currently done in handler.py")
|
||||
response_json = raw_response.json()
|
||||
## RESPONSE OBJECT
|
||||
model_response.choices[0].finish_reason = "stop"
|
||||
if request_data.get("format", "") == "json":
|
||||
function_call = json.loads(response_json["response"])
|
||||
message = litellm.Message(
|
||||
content=None,
|
||||
tool_calls=[
|
||||
{
|
||||
"id": f"call_{str(uuid.uuid4())}",
|
||||
"function": {
|
||||
"name": function_call["name"],
|
||||
"arguments": json.dumps(function_call["arguments"]),
|
||||
},
|
||||
"type": "function",
|
||||
}
|
||||
],
|
||||
)
|
||||
model_response.choices[0].message = message # type: ignore
|
||||
model_response.choices[0].finish_reason = "tool_calls"
|
||||
else:
|
||||
model_response.choices[0].message.content = response_json["response"] # type: ignore
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = "ollama/" + model
|
||||
_prompt = request_data.get("prompt", "")
|
||||
prompt_tokens = response_json.get(
|
||||
"prompt_eval_count", len(encoding.encode(_prompt, disallowed_special=())) # type: ignore
|
||||
)
|
||||
completion_tokens = response_json.get(
|
||||
"eval_count", len(response_json.get("message", dict()).get("content", ""))
|
||||
)
|
||||
setattr(
|
||||
model_response,
|
||||
"usage",
|
||||
litellm.Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
),
|
||||
)
|
||||
return model_response
|
||||
|
||||
def transform_request(
|
||||
self,
|
||||
|
@ -257,7 +311,46 @@ class OllamaConfig(BaseConfig):
|
|||
litellm_params: dict,
|
||||
headers: dict,
|
||||
) -> dict:
|
||||
raise NotImplementedError("transformation currently done in handler.py")
|
||||
custom_prompt_dict = (
|
||||
litellm_params.get("custom_prompt_dict") or litellm.custom_prompt_dict
|
||||
)
|
||||
if model in custom_prompt_dict:
|
||||
# check if the model has a registered custom prompt
|
||||
model_prompt_details = custom_prompt_dict[model]
|
||||
ollama_prompt = custom_prompt(
|
||||
role_dict=model_prompt_details["roles"],
|
||||
initial_prompt_value=model_prompt_details["initial_prompt_value"],
|
||||
final_prompt_value=model_prompt_details["final_prompt_value"],
|
||||
messages=messages,
|
||||
)
|
||||
else:
|
||||
modified_prompt = ollama_pt(model=model, messages=messages)
|
||||
if isinstance(modified_prompt, dict):
|
||||
ollama_prompt, images = (
|
||||
modified_prompt["prompt"],
|
||||
modified_prompt["images"],
|
||||
)
|
||||
optional_params["images"] = images
|
||||
else:
|
||||
ollama_prompt = modified_prompt
|
||||
stream = optional_params.pop("stream", False)
|
||||
format = optional_params.pop("format", None)
|
||||
images = optional_params.pop("images", None)
|
||||
data = {
|
||||
"model": model,
|
||||
"prompt": ollama_prompt,
|
||||
"options": optional_params,
|
||||
"stream": stream,
|
||||
}
|
||||
|
||||
if format is not None:
|
||||
data["format"] = format
|
||||
if images is not None:
|
||||
data["images"] = [
|
||||
_convert_image(convert_to_ollama_image(image)) for image in images
|
||||
]
|
||||
|
||||
return data
|
||||
|
||||
def validate_environment(
|
||||
self,
|
||||
|
@ -267,4 +360,77 @@ class OllamaConfig(BaseConfig):
|
|||
optional_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
) -> dict:
|
||||
raise NotImplementedError("validation currently done in handler.py")
|
||||
return headers
|
||||
|
||||
def get_complete_url(self, api_base: str, model: str) -> str:
|
||||
"""
|
||||
OPTIONAL
|
||||
|
||||
Get the complete url for the request
|
||||
|
||||
Some providers need `model` in `api_base`
|
||||
"""
|
||||
if api_base.endswith("/api/generate"):
|
||||
url = api_base
|
||||
else:
|
||||
url = f"{api_base}/api/generate"
|
||||
|
||||
return url
|
||||
|
||||
def get_model_response_iterator(
|
||||
self,
|
||||
streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],
|
||||
sync_stream: bool,
|
||||
json_mode: Optional[bool] = False,
|
||||
):
|
||||
return OllamaTextCompletionResponseIterator(
|
||||
streaming_response=streaming_response,
|
||||
sync_stream=sync_stream,
|
||||
json_mode=json_mode,
|
||||
)
|
||||
|
||||
|
||||
class OllamaTextCompletionResponseIterator(BaseModelResponseIterator):
|
||||
def _handle_string_chunk(self, str_line: str) -> GenericStreamingChunk:
|
||||
return self.chunk_parser(json.loads(str_line))
|
||||
|
||||
def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
|
||||
try:
|
||||
if "error" in chunk:
|
||||
raise Exception(f"Ollama Error - {chunk}")
|
||||
|
||||
text = ""
|
||||
is_finished = False
|
||||
finish_reason = None
|
||||
if chunk["done"] is True:
|
||||
text = ""
|
||||
is_finished = True
|
||||
finish_reason = "stop"
|
||||
prompt_eval_count: Optional[int] = chunk.get("prompt_eval_count", None)
|
||||
eval_count: Optional[int] = chunk.get("eval_count", None)
|
||||
|
||||
usage: Optional[ChatCompletionUsageBlock] = None
|
||||
if prompt_eval_count is not None and eval_count is not None:
|
||||
usage = ChatCompletionUsageBlock(
|
||||
prompt_tokens=prompt_eval_count,
|
||||
completion_tokens=eval_count,
|
||||
total_tokens=prompt_eval_count + eval_count,
|
||||
)
|
||||
return GenericStreamingChunk(
|
||||
text=text,
|
||||
is_finished=is_finished,
|
||||
finish_reason=finish_reason,
|
||||
usage=usage,
|
||||
)
|
||||
elif chunk["response"]:
|
||||
text = chunk["response"]
|
||||
return GenericStreamingChunk(
|
||||
text=text,
|
||||
is_finished=is_finished,
|
||||
finish_reason="stop",
|
||||
usage=None,
|
||||
)
|
||||
else:
|
||||
raise Exception(f"Unable to parse ollama chunk - {chunk}")
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue