feat - add afile_content, file_content

2025-04-26 11:14:04 +00:00 · 2024-05-28 20:58:22 -07:00 · 2024-05-28 20:58:22 -07:00 · cd4a3627e8
commit cd4a3627e8
parent 18830e58e9
4 changed files with 222 additions and 2 deletions
--- a/litellm/batches/main.py
+++ b/litellm/batches/main.py
@ -30,6 +30,8 @@ from ..types.llms.openai import (
    FileTypes,
    FileObject,
    Batch,
    FileContentRequest,
    HttpxBinaryResponseContent,
 )
 ####### ENVIRONMENT VARIABLES ###################
@ -170,6 +172,134 @@ def create_file(
        raise e
 async def afile_content(
    file_id: str,
    custom_llm_provider: Literal["openai"] = "openai",
    extra_headers: Optional[Dict[str, str]] = None,
    extra_body: Optional[Dict[str, str]] = None,
    **kwargs,
 ) -> Coroutine[Any, Any, HttpxBinaryResponseContent]:
    """
    Async: Get file contents
    LiteLLM Equivalent of GET https://api.openai.com/v1/files
    """
    try:
        loop = asyncio.get_event_loop()
        kwargs["afile_content"] = True
        # Use a partial function to pass your keyword arguments
        func = partial(
            file_content,
            file_id,
            custom_llm_provider,
            extra_headers,
            extra_body,
            **kwargs,
        )
        # Add the context to the function
        ctx = contextvars.copy_context()
        func_with_context = partial(ctx.run, func)
        init_response = await loop.run_in_executor(None, func_with_context)
        if asyncio.iscoroutine(init_response):
            response = await init_response
        else:
            response = init_response  # type: ignore
        return response
    except Exception as e:
        raise e
 def file_content(
    file_id: str,
    custom_llm_provider: Literal["openai"] = "openai",
    extra_headers: Optional[Dict[str, str]] = None,
    extra_body: Optional[Dict[str, str]] = None,
    **kwargs,
 ) -> Union[HttpxBinaryResponseContent, Coroutine[Any, Any, HttpxBinaryResponseContent]]:
    """
    Returns the contents of the specified file.
    LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files
    """
    try:
        optional_params = GenericLiteLLMParams(**kwargs)
        if custom_llm_provider == "openai":
            # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
            api_base = (
                optional_params.api_base
                or litellm.api_base
                or os.getenv("OPENAI_API_BASE")
                or "https://api.openai.com/v1"
            )
            organization = (
                optional_params.organization
                or litellm.organization
                or os.getenv("OPENAI_ORGANIZATION", None)
                or None  # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
            )
            # set API KEY
            api_key = (
                optional_params.api_key
                or litellm.api_key  # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
                or litellm.openai_key
                or os.getenv("OPENAI_API_KEY")
            )
            ### TIMEOUT LOGIC ###
            timeout = (
                optional_params.timeout or kwargs.get("request_timeout", 600) or 600
            )
            # set timeout for 10 minutes by default
            if (
                timeout is not None
                and isinstance(timeout, httpx.Timeout)
                and supports_httpx_timeout(custom_llm_provider) == False
            ):
                read_timeout = timeout.read or 600
                timeout = read_timeout  # default 10 min timeout
            elif timeout is not None and not isinstance(timeout, httpx.Timeout):
                timeout = float(timeout)  # type: ignore
            elif timeout is None:
                timeout = 600.0
            _file_content_request = FileContentRequest(
                file_id=file_id,
                extra_headers=extra_headers,
                extra_body=extra_body,
            )
            _is_async = kwargs.pop("afile_content", False) is True
            response = openai_files_instance.file_content(
                _is_async=_is_async,
                file_content_request=_file_content_request,
                api_base=api_base,
                api_key=api_key,
                timeout=timeout,
                max_retries=optional_params.max_retries,
                organization=organization,
            )
        else:
            raise litellm.exceptions.BadRequestError(
                message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format(
                    custom_llm_provider
                ),
                model="n/a",
                llm_provider=custom_llm_provider,
                response=httpx.Response(
                    status_code=400,
                    content="Unsupported provider",
                    request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"),  # type: ignore
                ),
            )
        return response
    except Exception as e:
        raise e
 async def acreate_batch(
    completion_window: Literal["24h"],
    endpoint: Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"],
--- a/litellm/llms/openai.py
+++ b/litellm/llms/openai.py
@ -1585,6 +1585,54 @@ class OpenAIFilesAPI(BaseLLM):
        response = openai_client.files.create(**create_file_data)
        return response
    async def afile_content(
        self,
        file_content_request: FileContentRequest,
        openai_client: AsyncOpenAI,
    ) -> HttpxBinaryResponseContent:
        response = await openai_client.files.content(**file_content_request)
        return response
    def file_content(
        self,
        _is_async: bool,
        file_content_request: FileContentRequest,
        api_base: str,
        api_key: Optional[str],
        timeout: Union[float, httpx.Timeout],
        max_retries: Optional[int],
        organization: Optional[str],
        client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
    ) -> Union[
        HttpxBinaryResponseContent, Coroutine[Any, Any, HttpxBinaryResponseContent]
    ]:
        openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
            api_key=api_key,
            api_base=api_base,
            timeout=timeout,
            max_retries=max_retries,
            organization=organization,
            client=client,
            _is_async=_is_async,
        )
        if openai_client is None:
            raise ValueError(
                "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
            )
        if _is_async is True:
            if not isinstance(openai_client, AsyncOpenAI):
                raise ValueError(
                    "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
                )
            return self.afile_content(  # type: ignore
                file_content_request=file_content_request,
                openai_client=openai_client,
            )
        response = openai_client.files.content(**file_content_request)
        return response
 class OpenAIBatchesAPI(BaseLLM):
    """
--- a/litellm/tests/test_openai_batches.py
+++ b/litellm/tests/test_openai_batches.py
@ -60,8 +60,6 @@ def test_create_batch():
        create_batch_response.input_file_id == batch_input_file_id
    ), f"Failed to create batch, expected input_file_id to be {batch_input_file_id} but got {create_batch_response.input_file_id}"
    time.sleep(30)
    retrieved_batch = litellm.retrieve_batch(
        batch_id=create_batch_response.id, custom_llm_provider="openai"
    )
@ -70,6 +68,17 @@ def test_create_batch():
    assert retrieved_batch.id == create_batch_response.id
    file_content = litellm.file_content(
        file_id=batch_input_file_id, custom_llm_provider="openai"
    )
    result = file_content.content
    result_file_name = "batch_job_results_furniture.jsonl"
    with open(result_file_name, "wb") as file:
        file.write(result)
    pass
@ -127,6 +136,18 @@ async def test_async_create_batch():
    assert retrieved_batch.id == create_batch_response.id
    # try to get file content for our original file
    file_content = await litellm.afile_content(
        file_id=batch_input_file_id, custom_llm_provider="openai"
    )
    print("file content = ", file_content)
    # # write this file content to a file
    # with open("file_content.json", "w") as f:
    #     json.dump(file_content, f)
 def test_retrieve_batch():
    pass
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@ -20,6 +20,7 @@ from openai.types.beta.assistant import Assistant
 from openai.pagination import SyncCursorPage
 from os import PathLike
 from openai.types import FileObject, Batch
 from openai._legacy_response import HttpxBinaryResponseContent
 from typing import TypedDict, List, Optional, Tuple, Mapping, IO
@ -186,6 +187,26 @@ class CreateFileRequest(TypedDict, total=False):
    timeout: Optional[float]
 class FileContentRequest(TypedDict, total=False):
    """
    FileContentRequest
    Used by Assistants API, Batches API, and Fine-Tunes API
    Required Params:
        file_id: str
    Optional Params:
        extra_headers: Optional[Dict[str, str]]
        extra_body: Optional[Dict[str, str]] = None
        timeout: Optional[float] = None
    """
    file_id: str
    extra_headers: Optional[Dict[str, str]]
    extra_body: Optional[Dict[str, str]]
    timeout: Optional[float]
 # OpenAI Batches Types
 class CreateBatchRequest(TypedDict, total=False):
    """