diff --git a/litellm/batches/main.py b/litellm/batches/main.py index 917ad45e10..5d9a3a1411 100644 --- a/litellm/batches/main.py +++ b/litellm/batches/main.py @@ -30,6 +30,8 @@ from ..types.llms.openai import ( FileTypes, FileObject, Batch, + FileContentRequest, + HttpxBinaryResponseContent, ) ####### ENVIRONMENT VARIABLES ################### @@ -170,6 +172,134 @@ def create_file( raise e +async def afile_content( + file_id: str, + custom_llm_provider: Literal["openai"] = "openai", + extra_headers: Optional[Dict[str, str]] = None, + extra_body: Optional[Dict[str, str]] = None, + **kwargs, +) -> Coroutine[Any, Any, HttpxBinaryResponseContent]: + """ + Async: Get file contents + + LiteLLM Equivalent of GET https://api.openai.com/v1/files + """ + try: + loop = asyncio.get_event_loop() + kwargs["afile_content"] = True + + # Use a partial function to pass your keyword arguments + func = partial( + file_content, + file_id, + custom_llm_provider, + extra_headers, + extra_body, + **kwargs, + ) + + # Add the context to the function + ctx = contextvars.copy_context() + func_with_context = partial(ctx.run, func) + init_response = await loop.run_in_executor(None, func_with_context) + if asyncio.iscoroutine(init_response): + response = await init_response + else: + response = init_response # type: ignore + + return response + except Exception as e: + raise e + + +def file_content( + file_id: str, + custom_llm_provider: Literal["openai"] = "openai", + extra_headers: Optional[Dict[str, str]] = None, + extra_body: Optional[Dict[str, str]] = None, + **kwargs, +) -> Union[HttpxBinaryResponseContent, Coroutine[Any, Any, HttpxBinaryResponseContent]]: + """ + Returns the contents of the specified file. + + LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files + """ + try: + optional_params = GenericLiteLLMParams(**kwargs) + if custom_llm_provider == "openai": + # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there + api_base = ( + optional_params.api_base + or litellm.api_base + or os.getenv("OPENAI_API_BASE") + or "https://api.openai.com/v1" + ) + organization = ( + optional_params.organization + or litellm.organization + or os.getenv("OPENAI_ORGANIZATION", None) + or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105 + ) + # set API KEY + api_key = ( + optional_params.api_key + or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there + or litellm.openai_key + or os.getenv("OPENAI_API_KEY") + ) + ### TIMEOUT LOGIC ### + timeout = ( + optional_params.timeout or kwargs.get("request_timeout", 600) or 600 + ) + # set timeout for 10 minutes by default + + if ( + timeout is not None + and isinstance(timeout, httpx.Timeout) + and supports_httpx_timeout(custom_llm_provider) == False + ): + read_timeout = timeout.read or 600 + timeout = read_timeout # default 10 min timeout + elif timeout is not None and not isinstance(timeout, httpx.Timeout): + timeout = float(timeout) # type: ignore + elif timeout is None: + timeout = 600.0 + + _file_content_request = FileContentRequest( + file_id=file_id, + extra_headers=extra_headers, + extra_body=extra_body, + ) + + _is_async = kwargs.pop("afile_content", False) is True + + response = openai_files_instance.file_content( + _is_async=_is_async, + file_content_request=_file_content_request, + api_base=api_base, + api_key=api_key, + timeout=timeout, + max_retries=optional_params.max_retries, + organization=organization, + ) + else: + raise litellm.exceptions.BadRequestError( + message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format( + custom_llm_provider + ), + model="n/a", + llm_provider=custom_llm_provider, + response=httpx.Response( + status_code=400, + content="Unsupported provider", + request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore + ), + ) + return response + except Exception as e: + raise e + + async def acreate_batch( completion_window: Literal["24h"], endpoint: Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"], diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py index 43d088f0db..1a1dc4e6dd 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/openai.py @@ -1585,6 +1585,54 @@ class OpenAIFilesAPI(BaseLLM): response = openai_client.files.create(**create_file_data) return response + async def afile_content( + self, + file_content_request: FileContentRequest, + openai_client: AsyncOpenAI, + ) -> HttpxBinaryResponseContent: + response = await openai_client.files.content(**file_content_request) + return response + + def file_content( + self, + _is_async: bool, + file_content_request: FileContentRequest, + api_base: str, + api_key: Optional[str], + timeout: Union[float, httpx.Timeout], + max_retries: Optional[int], + organization: Optional[str], + client: Optional[Union[OpenAI, AsyncOpenAI]] = None, + ) -> Union[ + HttpxBinaryResponseContent, Coroutine[Any, Any, HttpxBinaryResponseContent] + ]: + openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client( + api_key=api_key, + api_base=api_base, + timeout=timeout, + max_retries=max_retries, + organization=organization, + client=client, + _is_async=_is_async, + ) + if openai_client is None: + raise ValueError( + "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment." + ) + + if _is_async is True: + if not isinstance(openai_client, AsyncOpenAI): + raise ValueError( + "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client." + ) + return self.afile_content( # type: ignore + file_content_request=file_content_request, + openai_client=openai_client, + ) + response = openai_client.files.content(**file_content_request) + + return response + class OpenAIBatchesAPI(BaseLLM): """ diff --git a/litellm/tests/test_openai_batches.py b/litellm/tests/test_openai_batches.py index fc29331a5a..d7e3e18098 100644 --- a/litellm/tests/test_openai_batches.py +++ b/litellm/tests/test_openai_batches.py @@ -60,8 +60,6 @@ def test_create_batch(): create_batch_response.input_file_id == batch_input_file_id ), f"Failed to create batch, expected input_file_id to be {batch_input_file_id} but got {create_batch_response.input_file_id}" - time.sleep(30) - retrieved_batch = litellm.retrieve_batch( batch_id=create_batch_response.id, custom_llm_provider="openai" ) @@ -70,6 +68,17 @@ def test_create_batch(): assert retrieved_batch.id == create_batch_response.id + file_content = litellm.file_content( + file_id=batch_input_file_id, custom_llm_provider="openai" + ) + + result = file_content.content + + result_file_name = "batch_job_results_furniture.jsonl" + + with open(result_file_name, "wb") as file: + file.write(result) + pass @@ -127,6 +136,18 @@ async def test_async_create_batch(): assert retrieved_batch.id == create_batch_response.id + # try to get file content for our original file + + file_content = await litellm.afile_content( + file_id=batch_input_file_id, custom_llm_provider="openai" + ) + + print("file content = ", file_content) + + # # write this file content to a file + # with open("file_content.json", "w") as f: + # json.dump(file_content, f) + def test_retrieve_batch(): pass diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py index 50ac1335ec..77791b8ece 100644 --- a/litellm/types/llms/openai.py +++ b/litellm/types/llms/openai.py @@ -20,6 +20,7 @@ from openai.types.beta.assistant import Assistant from openai.pagination import SyncCursorPage from os import PathLike from openai.types import FileObject, Batch +from openai._legacy_response import HttpxBinaryResponseContent from typing import TypedDict, List, Optional, Tuple, Mapping, IO @@ -186,6 +187,26 @@ class CreateFileRequest(TypedDict, total=False): timeout: Optional[float] +class FileContentRequest(TypedDict, total=False): + """ + FileContentRequest + Used by Assistants API, Batches API, and Fine-Tunes API + + Required Params: + file_id: str + + Optional Params: + extra_headers: Optional[Dict[str, str]] + extra_body: Optional[Dict[str, str]] = None + timeout: Optional[float] = None + """ + + file_id: str + extra_headers: Optional[Dict[str, str]] + extra_body: Optional[Dict[str, str]] + timeout: Optional[float] + + # OpenAI Batches Types class CreateBatchRequest(TypedDict, total=False): """