feat - add afile_content, file_content

This commit is contained in:
Ishaan Jaff 2024-05-28 20:58:22 -07:00
parent 18830e58e9
commit cd4a3627e8
4 changed files with 222 additions and 2 deletions

View file

@ -30,6 +30,8 @@ from ..types.llms.openai import (
FileTypes, FileTypes,
FileObject, FileObject,
Batch, Batch,
FileContentRequest,
HttpxBinaryResponseContent,
) )
####### ENVIRONMENT VARIABLES ################### ####### ENVIRONMENT VARIABLES ###################
@ -170,6 +172,134 @@ def create_file(
raise e raise e
async def afile_content(
file_id: str,
custom_llm_provider: Literal["openai"] = "openai",
extra_headers: Optional[Dict[str, str]] = None,
extra_body: Optional[Dict[str, str]] = None,
**kwargs,
) -> Coroutine[Any, Any, HttpxBinaryResponseContent]:
"""
Async: Get file contents
LiteLLM Equivalent of GET https://api.openai.com/v1/files
"""
try:
loop = asyncio.get_event_loop()
kwargs["afile_content"] = True
# Use a partial function to pass your keyword arguments
func = partial(
file_content,
file_id,
custom_llm_provider,
extra_headers,
extra_body,
**kwargs,
)
# Add the context to the function
ctx = contextvars.copy_context()
func_with_context = partial(ctx.run, func)
init_response = await loop.run_in_executor(None, func_with_context)
if asyncio.iscoroutine(init_response):
response = await init_response
else:
response = init_response # type: ignore
return response
except Exception as e:
raise e
def file_content(
file_id: str,
custom_llm_provider: Literal["openai"] = "openai",
extra_headers: Optional[Dict[str, str]] = None,
extra_body: Optional[Dict[str, str]] = None,
**kwargs,
) -> Union[HttpxBinaryResponseContent, Coroutine[Any, Any, HttpxBinaryResponseContent]]:
"""
Returns the contents of the specified file.
LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files
"""
try:
optional_params = GenericLiteLLMParams(**kwargs)
if custom_llm_provider == "openai":
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
api_base = (
optional_params.api_base
or litellm.api_base
or os.getenv("OPENAI_API_BASE")
or "https://api.openai.com/v1"
)
organization = (
optional_params.organization
or litellm.organization
or os.getenv("OPENAI_ORGANIZATION", None)
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
)
# set API KEY
api_key = (
optional_params.api_key
or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
or litellm.openai_key
or os.getenv("OPENAI_API_KEY")
)
### TIMEOUT LOGIC ###
timeout = (
optional_params.timeout or kwargs.get("request_timeout", 600) or 600
)
# set timeout for 10 minutes by default
if (
timeout is not None
and isinstance(timeout, httpx.Timeout)
and supports_httpx_timeout(custom_llm_provider) == False
):
read_timeout = timeout.read or 600
timeout = read_timeout # default 10 min timeout
elif timeout is not None and not isinstance(timeout, httpx.Timeout):
timeout = float(timeout) # type: ignore
elif timeout is None:
timeout = 600.0
_file_content_request = FileContentRequest(
file_id=file_id,
extra_headers=extra_headers,
extra_body=extra_body,
)
_is_async = kwargs.pop("afile_content", False) is True
response = openai_files_instance.file_content(
_is_async=_is_async,
file_content_request=_file_content_request,
api_base=api_base,
api_key=api_key,
timeout=timeout,
max_retries=optional_params.max_retries,
organization=organization,
)
else:
raise litellm.exceptions.BadRequestError(
message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format(
custom_llm_provider
),
model="n/a",
llm_provider=custom_llm_provider,
response=httpx.Response(
status_code=400,
content="Unsupported provider",
request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore
),
)
return response
except Exception as e:
raise e
async def acreate_batch( async def acreate_batch(
completion_window: Literal["24h"], completion_window: Literal["24h"],
endpoint: Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"], endpoint: Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"],

View file

@ -1585,6 +1585,54 @@ class OpenAIFilesAPI(BaseLLM):
response = openai_client.files.create(**create_file_data) response = openai_client.files.create(**create_file_data)
return response return response
async def afile_content(
self,
file_content_request: FileContentRequest,
openai_client: AsyncOpenAI,
) -> HttpxBinaryResponseContent:
response = await openai_client.files.content(**file_content_request)
return response
def file_content(
self,
_is_async: bool,
file_content_request: FileContentRequest,
api_base: str,
api_key: Optional[str],
timeout: Union[float, httpx.Timeout],
max_retries: Optional[int],
organization: Optional[str],
client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
) -> Union[
HttpxBinaryResponseContent, Coroutine[Any, Any, HttpxBinaryResponseContent]
]:
openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
api_key=api_key,
api_base=api_base,
timeout=timeout,
max_retries=max_retries,
organization=organization,
client=client,
_is_async=_is_async,
)
if openai_client is None:
raise ValueError(
"OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
)
if _is_async is True:
if not isinstance(openai_client, AsyncOpenAI):
raise ValueError(
"OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
)
return self.afile_content( # type: ignore
file_content_request=file_content_request,
openai_client=openai_client,
)
response = openai_client.files.content(**file_content_request)
return response
class OpenAIBatchesAPI(BaseLLM): class OpenAIBatchesAPI(BaseLLM):
""" """

View file

@ -60,8 +60,6 @@ def test_create_batch():
create_batch_response.input_file_id == batch_input_file_id create_batch_response.input_file_id == batch_input_file_id
), f"Failed to create batch, expected input_file_id to be {batch_input_file_id} but got {create_batch_response.input_file_id}" ), f"Failed to create batch, expected input_file_id to be {batch_input_file_id} but got {create_batch_response.input_file_id}"
time.sleep(30)
retrieved_batch = litellm.retrieve_batch( retrieved_batch = litellm.retrieve_batch(
batch_id=create_batch_response.id, custom_llm_provider="openai" batch_id=create_batch_response.id, custom_llm_provider="openai"
) )
@ -70,6 +68,17 @@ def test_create_batch():
assert retrieved_batch.id == create_batch_response.id assert retrieved_batch.id == create_batch_response.id
file_content = litellm.file_content(
file_id=batch_input_file_id, custom_llm_provider="openai"
)
result = file_content.content
result_file_name = "batch_job_results_furniture.jsonl"
with open(result_file_name, "wb") as file:
file.write(result)
pass pass
@ -127,6 +136,18 @@ async def test_async_create_batch():
assert retrieved_batch.id == create_batch_response.id assert retrieved_batch.id == create_batch_response.id
# try to get file content for our original file
file_content = await litellm.afile_content(
file_id=batch_input_file_id, custom_llm_provider="openai"
)
print("file content = ", file_content)
# # write this file content to a file
# with open("file_content.json", "w") as f:
# json.dump(file_content, f)
def test_retrieve_batch(): def test_retrieve_batch():
pass pass

View file

@ -20,6 +20,7 @@ from openai.types.beta.assistant import Assistant
from openai.pagination import SyncCursorPage from openai.pagination import SyncCursorPage
from os import PathLike from os import PathLike
from openai.types import FileObject, Batch from openai.types import FileObject, Batch
from openai._legacy_response import HttpxBinaryResponseContent
from typing import TypedDict, List, Optional, Tuple, Mapping, IO from typing import TypedDict, List, Optional, Tuple, Mapping, IO
@ -186,6 +187,26 @@ class CreateFileRequest(TypedDict, total=False):
timeout: Optional[float] timeout: Optional[float]
class FileContentRequest(TypedDict, total=False):
"""
FileContentRequest
Used by Assistants API, Batches API, and Fine-Tunes API
Required Params:
file_id: str
Optional Params:
extra_headers: Optional[Dict[str, str]]
extra_body: Optional[Dict[str, str]] = None
timeout: Optional[float] = None
"""
file_id: str
extra_headers: Optional[Dict[str, str]]
extra_body: Optional[Dict[str, str]]
timeout: Optional[float]
# OpenAI Batches Types # OpenAI Batches Types
class CreateBatchRequest(TypedDict, total=False): class CreateBatchRequest(TypedDict, total=False):
""" """