diff --git a/.env.example b/.env.example index 82b09ca25e..24e6d1a014 100644 --- a/.env.example +++ b/.env.example @@ -1,6 +1,6 @@ # OpenAI OPENAI_API_KEY = "" -OPENAI_API_BASE = "" +OPENAI_BASE_URL = "" # Cohere COHERE_API_KEY = "" # OpenRouter diff --git a/docs/my-website/docs/providers/openai.md b/docs/my-website/docs/providers/openai.md index 9ab9061aaa..c60e521431 100644 --- a/docs/my-website/docs/providers/openai.md +++ b/docs/my-website/docs/providers/openai.md @@ -156,7 +156,7 @@ print(response) ```python import os os.environ["OPENAI_ORGANIZATION"] = "your-org-id" # OPTIONAL -os.environ["OPENAI_API_BASE"] = "openaiai-api-base" # OPTIONAL +os.environ["OPENAI_BASE_URL"] = "https://your_host/v1" # OPTIONAL ``` ### OpenAI Chat Completion Models @@ -188,7 +188,7 @@ os.environ["OPENAI_API_BASE"] = "openaiai-api-base" # OPTIONAL | gpt-4-32k-0613 | `response = completion(model="gpt-4-32k-0613", messages=messages)` | -These also support the `OPENAI_API_BASE` environment variable, which can be used to specify a custom API endpoint. +These also support the `OPENAI_BASE_URL` environment variable, which can be used to specify a custom API endpoint. ## OpenAI Vision Models | Model Name | Function Call | @@ -614,8 +614,8 @@ os.environ["OPENAI_API_KEY"] = "" # set custom api base to your proxy # either set .env or litellm.api_base -# os.environ["OPENAI_API_BASE"] = "" -litellm.api_base = "your-openai-proxy-url" +# os.environ["OPENAI_BASE_URL"] = "https://your_host/v1" +litellm.api_base = "https://your_host/v1" messages = [{ "content": "Hello, how are you?","role": "user"}] diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md index 863349a8fe..f6b2df7505 100644 --- a/docs/my-website/docs/proxy/config_settings.md +++ b/docs/my-website/docs/proxy/config_settings.md @@ -453,7 +453,7 @@ router_settings: | NO_DOCS | Flag to disable documentation generation | NO_PROXY | List of addresses to bypass proxy | OAUTH_TOKEN_INFO_ENDPOINT | Endpoint for OAuth token info retrieval -| OPENAI_API_BASE | Base URL for OpenAI API +| OPENAI_BASE_URL | Base URL for OpenAI API | OPENAI_API_KEY | API key for OpenAI services | OPENAI_ORGANIZATION | Organization identifier for OpenAI | OPENID_BASE_URL | Base URL for OpenID Connect services diff --git a/docs/my-website/docs/proxy_server.md b/docs/my-website/docs/proxy_server.md index 0d08db7444..e23d64e443 100644 --- a/docs/my-website/docs/proxy_server.md +++ b/docs/my-website/docs/proxy_server.md @@ -337,7 +337,7 @@ export OPENAI_API_KEY="sk-1234" ``` ```shell -export OPENAI_API_BASE="http://0.0.0.0:8000" +export OPENAI_BASE_URL="http://0.0.0.0:8000" ``` ```shell python3 run.py --task "a script that says hello world" --name "hello world" @@ -572,7 +572,7 @@ export OPENAI_API_KEY="sk-1234" ``` ```shell -export OPENAI_API_BASE="http://0.0.0.0:8000" +export OPENAI_BASE_URL="http://0.0.0.0:8000" ``` ```shell python3 run.py --task "a script that says hello world" --name "hello world" diff --git a/docs/my-website/docs/set_keys.md b/docs/my-website/docs/set_keys.md index 693cf5f7f4..295d9ec550 100644 --- a/docs/my-website/docs/set_keys.md +++ b/docs/my-website/docs/set_keys.md @@ -44,7 +44,7 @@ os.environ['AZURE_API_VERSION'] = "2023-05-15" # [OPTIONAL] os.environ['AZURE_API_TYPE'] = "azure" # [OPTIONAL] # for openai -os.environ['OPENAI_API_BASE'] = "https://openai-gpt-4-test2-v-12.openai.azure.com/" +os.environ['OPENAI_BASE_URL'] = "https://your_host/v1" ``` ### Setting Project, Location, Token diff --git a/docs/my-website/docs/tutorials/lm_evaluation_harness.md b/docs/my-website/docs/tutorials/lm_evaluation_harness.md index c28f2dac77..01fdb4b304 100644 --- a/docs/my-website/docs/tutorials/lm_evaluation_harness.md +++ b/docs/my-website/docs/tutorials/lm_evaluation_harness.md @@ -39,7 +39,7 @@ pip install openai==0.28.01 **Step 3: Set OpenAI API Base & Key** ```shell -$ export OPENAI_API_BASE=http://0.0.0.0:8000 +$ export OPENAI_BASE_URL=http://0.0.0.0:8000 ``` LM Harness requires you to set an OpenAI API key `OPENAI_API_SECRET_KEY` for running benchmarks @@ -74,7 +74,7 @@ $ litellm --model huggingface/bigcode/starcoder **Step 2: Set OpenAI API Base & Key** ```shell -$ export OPENAI_API_BASE=http://0.0.0.0:8000 +$ export OPENAI_BASE_URL=http://0.0.0.0:8000 ``` Set this to anything since the proxy has the credentials @@ -93,12 +93,12 @@ cd FastEval **Set API Base on FastEval** -On FastEval make the following **2 line code change** to set `OPENAI_API_BASE` +On FastEval make the following **2 line code change** to set `OPENAI_BASE_URL` https://github.com/FastEval/FastEval/pull/90/files ```python try: - api_base = os.environ["OPENAI_API_BASE"] #changed: read api base from .env + api_base = os.environ["OPENAI_BASE_URL"] #changed: read api base from .env if api_base == None: api_base = "https://api.openai.com/v1" response = await self.reply_two_attempts_with_different_max_new_tokens( @@ -130,7 +130,7 @@ $ litellm --model huggingface/bigcode/starcoder **Step 2: Set OpenAI API Base & Key** ```shell -$ export OPENAI_API_BASE=http://0.0.0.0:8000 +$ export OPENAI_BASE_URL=http://0.0.0.0:8000 ``` **Step 3 Run with FLASK** diff --git a/litellm/assistants/main.py b/litellm/assistants/main.py index 28f4518f15..b93541dc06 100644 --- a/litellm/assistants/main.py +++ b/litellm/assistants/main.py @@ -110,6 +110,7 @@ def get_assistants( api_base = ( optional_params.api_base # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there or litellm.api_base + or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1" ) @@ -309,6 +310,7 @@ def create_assistants( api_base = ( optional_params.api_base # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there or litellm.api_base + or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1" ) @@ -485,6 +487,7 @@ def delete_assistant( api_base = ( optional_params.api_base or litellm.api_base + or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1" ) @@ -673,6 +676,7 @@ def create_thread( api_base = ( optional_params.api_base # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there or litellm.api_base + or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1" ) @@ -828,6 +832,7 @@ def get_thread( api_base = ( optional_params.api_base # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there or litellm.api_base + or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1" ) @@ -1016,6 +1021,7 @@ def add_message( api_base = ( optional_params.api_base # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there or litellm.api_base + or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1" ) @@ -1177,6 +1183,7 @@ def get_messages( api_base = ( optional_params.api_base # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there or litellm.api_base + or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1" ) @@ -1375,6 +1382,7 @@ def run_thread( api_base = ( optional_params.api_base # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there or litellm.api_base + or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1" ) diff --git a/litellm/batches/main.py b/litellm/batches/main.py index f4f74c72fb..0be9667790 100644 --- a/litellm/batches/main.py +++ b/litellm/batches/main.py @@ -157,6 +157,7 @@ def create_batch( api_base = ( optional_params.api_base or litellm.api_base + or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1" ) @@ -361,6 +362,7 @@ def retrieve_batch( api_base = ( optional_params.api_base or litellm.api_base + or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1" ) @@ -556,6 +558,7 @@ def list_batches( api_base = ( optional_params.api_base or litellm.api_base + or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1" ) @@ -713,6 +716,7 @@ def cancel_batch( api_base = ( optional_params.api_base or litellm.api_base + or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1" ) diff --git a/litellm/files/main.py b/litellm/files/main.py index ebe79c1079..ded74cc653 100644 --- a/litellm/files/main.py +++ b/litellm/files/main.py @@ -164,6 +164,7 @@ def create_file( api_base = ( optional_params.api_base or litellm.api_base + or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1" ) @@ -343,6 +344,7 @@ def file_retrieve( api_base = ( optional_params.api_base or litellm.api_base + or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1" ) @@ -496,6 +498,7 @@ def file_delete( api_base = ( optional_params.api_base or litellm.api_base + or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1" ) @@ -649,6 +652,7 @@ def file_list( api_base = ( optional_params.api_base or litellm.api_base + or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1" ) @@ -809,6 +813,7 @@ def file_content( api_base = ( optional_params.api_base or litellm.api_base + or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1" ) diff --git a/litellm/fine_tuning/main.py b/litellm/fine_tuning/main.py index 09c070fffb..b7efcb40d4 100644 --- a/litellm/fine_tuning/main.py +++ b/litellm/fine_tuning/main.py @@ -142,6 +142,7 @@ def create_fine_tuning_job( api_base = ( optional_params.api_base or litellm.api_base + or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1" ) @@ -363,6 +364,7 @@ def cancel_fine_tuning_job( api_base = ( optional_params.api_base or litellm.api_base + or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1" ) @@ -524,6 +526,7 @@ def list_fine_tuning_jobs( api_base = ( optional_params.api_base or litellm.api_base + or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1" ) @@ -678,6 +681,7 @@ def retrieve_fine_tuning_job( api_base = ( optional_params.api_base or litellm.api_base + or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1" ) diff --git a/litellm/llms/openai/chat/gpt_transformation.py b/litellm/llms/openai/chat/gpt_transformation.py index 03257e50f0..a49ae6e102 100644 --- a/litellm/llms/openai/chat/gpt_transformation.py +++ b/litellm/llms/openai/chat/gpt_transformation.py @@ -384,6 +384,7 @@ class OpenAIGPTConfig(BaseLLMModelInfo, BaseConfig): return ( api_base or litellm.api_base + or get_secret_str("OPENAI_BASE_URL") or get_secret_str("OPENAI_API_BASE") or "https://api.openai.com/v1" ) diff --git a/litellm/llms/openai/responses/transformation.py b/litellm/llms/openai/responses/transformation.py index e062c0c9fa..8fd9448e19 100644 --- a/litellm/llms/openai/responses/transformation.py +++ b/litellm/llms/openai/responses/transformation.py @@ -119,6 +119,7 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig): api_base = ( api_base or litellm.api_base + or get_secret_str("OPENAI_BASE_URL") or get_secret_str("OPENAI_API_BASE") or "https://api.openai.com/v1" ) diff --git a/litellm/main.py b/litellm/main.py index 3f1d9a1e76..928ccc7aae 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -1515,6 +1515,7 @@ def completion( # type: ignore # noqa: PLR0915 api_base = ( api_base or litellm.api_base + or get_secret("OPENAI_BASE_URL") or get_secret("OPENAI_API_BASE") or "https://api.openai.com/v1" ) @@ -1642,6 +1643,7 @@ def completion( # type: ignore # noqa: PLR0915 api_base = ( api_base # for deepinfra/perplexity/anyscale/groq/friendliai we check in get_llm_provider and pass in the api base from there or litellm.api_base + or get_secret("OPENAI_BASE_URL") or get_secret("OPENAI_API_BASE") or "https://api.openai.com/v1" ) @@ -1695,6 +1697,7 @@ def completion( # type: ignore # noqa: PLR0915 api_base = ( api_base # for deepinfra/perplexity/anyscale/groq/friendliai we check in get_llm_provider and pass in the api base from there or litellm.api_base + or get_secret("OPENAI_BASE_URL") or get_secret("OPENAI_API_BASE") or "https://api.openai.com/v1" ) @@ -3481,6 +3484,7 @@ def embedding( # noqa: PLR0915 api_base = ( api_base or litellm.api_base + or get_secret_str("OPENAI_BASE_URL") or get_secret_str("OPENAI_API_BASE") or "https://api.openai.com/v1" ) @@ -5150,6 +5154,7 @@ def transcription( api_base = ( api_base or litellm.api_base + or get_secret("OPENAI_BASE_URL") or get_secret("OPENAI_API_BASE") or "https://api.openai.com/v1" ) # type: ignore @@ -5320,6 +5325,7 @@ def speech( # noqa: PLR0915 api_base = ( api_base # for deepinfra/perplexity/anyscale/groq/friendliai we check in get_llm_provider and pass in the api base from there or litellm.api_base + or get_secret("OPENAI_BASE_URL") or get_secret("OPENAI_API_BASE") or "https://api.openai.com/v1" ) # type: ignore diff --git a/tests/local_testing/test_router.py b/tests/local_testing/test_router.py index 13eaeb09ab..88f40fe427 100644 --- a/tests/local_testing/test_router.py +++ b/tests/local_testing/test_router.py @@ -9,6 +9,8 @@ import traceback import openai import pytest +import threading +from http.server import BaseHTTPRequestHandler, HTTPServer import litellm.types import litellm.types.router @@ -704,7 +706,7 @@ async def test_async_router_context_window_fallback(sync_mode): "litellm_params": { # params for litellm completion/embedding call "model": "gpt-4", "api_key": os.getenv("OPENAI_API_KEY"), - "api_base": os.getenv("OPENAI_API_BASE"), + "api_base": os.getenv("OPENAI_BASE_URL") or os.getenv("OPENAI_API_BASE"), }, }, { @@ -1373,6 +1375,87 @@ async def test_mistral_on_router(): # asyncio.run(test_mistral_on_router()) +@pytest.mark.parametrize("env_base", ["OPENAI_BASE_URL", "OPENAI_API_BASE"]) +@pytest.mark.asyncio +async def test_openai_env_on_router(monkeypatch, env_base): + "This tests OpenAI env variables are honored, including legacy OPENAI_API_BASE" + + # See https://github.com/openai/openai-openapi/blob/master/openapi.yaml + class MockOpenAIRequestHandler(BaseHTTPRequestHandler): + def do_POST(self): + if not self.path.startswith("/v1/chat/completions"): + self.send_response(404) + self.end_headers() + self.wfile.write(b"Not Found") + return + + # Define a proper OpenAI chat completion response + response_body = b"""{ + "id": "chatcmpl-AyPNinnUqUDYo9SAdA52NobMflmj2", + "object": "chat.completion", + "created": 1738960610, + "model": "gpt-4o", + "usage": { + "prompt_tokens": 13, + "completion_tokens": 18, + "total_tokens": 31 + }, + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "hello back at ya!" + }, + "finish_reason": "stop" + } + ] + }""" + + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(response_body))) + self.end_headers() + self.wfile.write(response_body) + + # Set up the server + server_address = ("localhost", 0) # Ephemeral port + httpd = HTTPServer(server_address, MockOpenAIRequestHandler) + port = httpd.server_port + + # Run the server in a separate thread + server_thread = threading.Thread(target=httpd.serve_forever) + server_thread.daemon = True + server_thread.start() + + # Configure environment variables + monkeypatch.setenv(env_base, f"http://localhost:{port}/v1") + monkeypatch.setenv("OPENAI_API_KEY", "fake_openai_api_key") + + # Set up the router + model_list = [ + { + "model_name": "gpt-4o", + "litellm_params": { + "model": "gpt-4o", + }, + }, + ] + router = Router(model_list=model_list) + + # Make the async call + response = await router.acompletion( + model="gpt-4o", + messages=[ + { + "role": "user", + "content": "hello from litellm test", + } + ], + ) + print(response) + assert len(response.choices[0].message.content) > 0 + def test_openai_completion_on_router(): # [PROD Use Case] - Makes an acompletion call + async acompletion call, and sync acompletion call, sync completion + stream diff --git a/tests/proxy_unit_tests/test_proxy_server.py b/tests/proxy_unit_tests/test_proxy_server.py index 68f4ff8ec4..6d9d0550f1 100644 --- a/tests/proxy_unit_tests/test_proxy_server.py +++ b/tests/proxy_unit_tests/test_proxy_server.py @@ -118,7 +118,7 @@ def mock_patch_aimage_generation(): def fake_env_vars(monkeypatch): # Set some fake environment variables monkeypatch.setenv("OPENAI_API_KEY", "fake_openai_api_key") - monkeypatch.setenv("OPENAI_API_BASE", "http://fake-openai-api-base") + monkeypatch.setenv("OPENAI_BASE_URL", "http://fake-openai-api-base/v1") monkeypatch.setenv("AZURE_API_BASE", "http://fake-azure-api-base") monkeypatch.setenv("AZURE_OPENAI_API_KEY", "fake_azure_openai_api_key") monkeypatch.setenv("AZURE_SWEDEN_API_BASE", "http://fake-azure-sweden-api-base")