Litellm merge pr (#7161)

* build: merge branch

* test: fix openai naming

* fix(main.py): fix openai renaming

* style: ignore function length for config factory

* fix(sagemaker/): fix routing logic

* fix: fix imports

* fix: fix override
This commit is contained in:
Krish Dholakia 2024-12-10 22:49:26 -08:00 committed by GitHub
parent d5aae81c6d
commit 350cfc36f7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
88 changed files with 3617 additions and 4421 deletions

View file

@ -0,0 +1,140 @@
import json
import os
import time
import types
from enum import Enum
from typing import Any, Callable, List, Optional, Union
import httpx
import litellm
from litellm.llms.base_llm.transformation import BaseConfig, BaseLLMException
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
_get_httpx_client,
get_async_httpx_client,
)
from litellm.types.llms.openai import AllMessageValues
from litellm.utils import ModelResponse, Usage
from ..common_utils import NLPCloudError
from .transformation import NLPCloudConfig
nlp_config = NLPCloudConfig()
def completion(
model: str,
messages: list,
api_base: str,
model_response: ModelResponse,
print_verbose: Callable,
encoding,
api_key,
logging_obj,
optional_params: dict,
litellm_params: dict,
logger_fn=None,
default_max_tokens_to_sample=None,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
headers={},
):
headers = nlp_config.validate_environment(
api_key=api_key,
headers=headers,
model=model,
messages=messages,
optional_params=optional_params,
)
## Load Config
config = litellm.NLPCloudConfig.get_config()
for k, v in config.items():
if (
k not in optional_params
): # completion(top_k=3) > togetherai_config(top_k=3) <- allows for dynamic variables to be passed in
optional_params[k] = v
completion_url_fragment_1 = api_base
completion_url_fragment_2 = "/generation"
model = model
completion_url = completion_url_fragment_1 + model + completion_url_fragment_2
data = nlp_config.transform_request(
model=model,
messages=messages,
optional_params=optional_params,
litellm_params=litellm_params,
headers=headers,
)
## LOGGING
logging_obj.pre_call(
input=None,
api_key=api_key,
additional_args={
"complete_input_dict": data,
"headers": headers,
"api_base": completion_url,
},
)
## COMPLETION CALL
if client is None or not isinstance(client, HTTPHandler):
client = _get_httpx_client()
response = client.post(
completion_url,
headers=headers,
data=json.dumps(data),
stream=optional_params["stream"] if "stream" in optional_params else False,
)
if "stream" in optional_params and optional_params["stream"] is True:
return clean_and_iterate_chunks(response)
else:
return nlp_config.transform_response(
model=model,
raw_response=response,
model_response=model_response,
logging_obj=logging_obj,
api_key=api_key,
request_data=data,
messages=messages,
optional_params=optional_params,
litellm_params=litellm_params,
encoding=encoding,
)
# def clean_and_iterate_chunks(response):
# def process_chunk(chunk):
# print(f"received chunk: {chunk}")
# cleaned_chunk = chunk.decode("utf-8")
# # Perform further processing based on your needs
# return cleaned_chunk
# for line in response.iter_lines():
# if line:
# yield process_chunk(line)
def clean_and_iterate_chunks(response):
buffer = b""
for chunk in response.iter_content(chunk_size=1024):
if not chunk:
break
buffer += chunk
while b"\x00" in buffer:
buffer = buffer.replace(b"\x00", b"")
yield buffer.decode("utf-8")
buffer = b""
# No more data expected, yield any remaining data in the buffer
if buffer:
yield buffer.decode("utf-8")
def embedding():
# logic for parsing in - calling - parsing out model embedding calls
pass