use api_base instead of custom_api_base

2023-09-02 17:11:30 -07:00 · 2023-09-02 17:11:30 -07:00 · 09ae510a58
commit 09ae510a58
parent e6836985c8
9 changed files with 39 additions and 39 deletions
--- a/cookbook/liteLLM_Ollama.ipynb
+++ b/cookbook/liteLLM_Ollama.ipynb
@ -88,7 +88,7 @@
    }
   ],
   "source": [
-    "response = completion(model=\"llama2\", messages=messages, custom_api_base=\"http://localhost:11434\", custom_llm_provider=\"ollama\", stream=True)\n",
+    "response = completion(model=\"llama2\", messages=messages, api_base=\"http://localhost:11434\", custom_llm_provider=\"ollama\", stream=True)\n",
    "print(response)"
   ]
  },
--- a/docs/my-website/docs/completion/supported.md
+++ b/docs/my-website/docs/completion/supported.md
@ -178,12 +178,12 @@ Ollama supported models: https://github.com/jmorganca/ollama

 | Model Name           | Function Call                                                                     | Required OS Variables          |
 |----------------------|-----------------------------------------------------------------------------------|--------------------------------|
-| Llama2 7B            | `completion(model='llama2', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
-| Llama2 13B           | `completion(model='llama2:13b', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
-| Llama2 70B           | `completion(model='llama2:70b', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
-| Llama2 Uncensored    | `completion(model='llama2-uncensored', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
-| Orca Mini            | `completion(model='orca-mini', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
-| Vicuna               | `completion(model='vicuna', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
-| Nous-Hermes          | `completion(model='nous-hermes', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
-| Nous-Hermes 13B      | `completion(model='nous-hermes:13b', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
-| Wizard Vicuna Uncensored | `completion(model='wizard-vicuna', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Llama2 7B            | `completion(model='llama2', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Llama2 13B           | `completion(model='llama2:13b', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Llama2 70B           | `completion(model='llama2:70b', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Llama2 Uncensored    | `completion(model='llama2-uncensored', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Orca Mini            | `completion(model='orca-mini', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Vicuna               | `completion(model='vicuna', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Nous-Hermes          | `completion(model='nous-hermes', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Nous-Hermes 13B      | `completion(model='nous-hermes:13b', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Wizard Vicuna Uncensored | `completion(model='wizard-vicuna', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
--- a/docs/my-website/docs/tutorials/huggingface_tutorial.md
+++ b/docs/my-website/docs/tutorials/huggingface_tutorial.md
@ -41,14 +41,14 @@ from litellm import completion

 model = "meta-llama/Llama-2-7b-hf"
 messages = [{"role": "user", "content": "Hey, how's it going?"}] # LiteLLM follows the OpenAI format 
-custom_api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
+api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"

 ### CALLING ENDPOINT
-completion(model=model, messages=messages, custom_llm_provider="huggingface", custom_api_base=custom_api_base)
+completion(model=model, messages=messages, custom_llm_provider="huggingface", api_base=api_base)
 ```

 What's happening? 
- custom_api_base: Optional param. Since this uses a deployed endpoint (not the [default huggingface inference endpoint](https://github.com/BerriAI/litellm/blob/6aff47083be659b80e00cb81eb783cb24db2e183/litellm/llms/huggingface_restapi.py#L35)), we pass that to LiteLLM. 
+- api_base: Optional param. Since this uses a deployed endpoint (not the [default huggingface inference endpoint](https://github.com/BerriAI/litellm/blob/6aff47083be659b80e00cb81eb783cb24db2e183/litellm/llms/huggingface_restapi.py#L35)), we pass that to LiteLLM. 

 ### Case 3: Call Llama2 private Huggingface endpoint

@ -72,10 +72,10 @@ os.environ["HF_TOKEN] = "..."

 model = "meta-llama/Llama-2-7b-hf"
 messages = [{"role": "user", "content": "Hey, how's it going?"}] # LiteLLM follows the OpenAI format 
-custom_api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
+api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"

 ### CALLING ENDPOINT
-completion(model=model, messages=messages, custom_llm_provider="huggingface", custom_api_base=custom_api_base)
+completion(model=model, messages=messages, custom_llm_provider="huggingface", api_base=api_base)
 ```

 **Setting it as package variable**  
@ -93,10 +93,10 @@ litellm.huggingface_key = "..."

 model = "meta-llama/Llama-2-7b-hf"
 messages = [{"role": "user", "content": "Hey, how's it going?"}] # LiteLLM follows the OpenAI format 
-custom_api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
+api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"

 ### CALLING ENDPOINT
-completion(model=model, messages=messages, custom_llm_provider="huggingface", custom_api_base=custom_api_base)
+completion(model=model, messages=messages, custom_llm_provider="huggingface", api_base=api_base)
 ```

 **Passed in during completion call**  
@ -111,8 +111,8 @@ from litellm import completion

 model = "meta-llama/Llama-2-7b-hf"
 messages = [{"role": "user", "content": "Hey, how's it going?"}] # LiteLLM follows the OpenAI format 
-custom_api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
+api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"

 ### CALLING ENDPOINT
-completion(model=model, messages=messages, custom_llm_provider="huggingface", custom_api_base=custom_api_base, api_key="...")
+completion(model=model, messages=messages, custom_llm_provider="huggingface", api_base=api_base, api_key="...")
 ```
--- a/litellm/llms/huggingface_restapi.py
+++ b/litellm/llms/huggingface_restapi.py
@ -38,7 +38,7 @@ class HuggingfaceRestAPILLM:
        self,
        model: str,
        messages: list,
-        custom_api_base: str,
+        api_base: str,
        model_response: ModelResponse,
        print_verbose: Callable,
        optional_params=None,
@ -48,8 +48,8 @@ class HuggingfaceRestAPILLM:
        completion_url: str = ""
        if "https" in model:
            completion_url = model
-        elif custom_api_base:
-            completion_url = custom_api_base
+        elif api_base:
+            completion_url = api_base
        elif "HF_API_BASE" in os.environ:
            completion_url = os.getenv("HF_API_BASE", "")
        else:
--- a/litellm/main.py
+++ b/litellm/main.py
@ -92,7 +92,7 @@ def completion(
    verbose=False,
    azure=False,
    custom_llm_provider=None,
-    custom_api_base=None,
+    api_base=None,
    litellm_call_id=None,
    litellm_logging_obj=None,
    use_client=False,
@ -153,7 +153,7 @@ def completion(
            logger_fn=logger_fn,
            verbose=verbose,
            custom_llm_provider=custom_llm_provider,
-            custom_api_base=custom_api_base,
+            api_base=api_base,
            litellm_call_id=litellm_call_id,
            model_alias_map=litellm.model_alias_map,
            completion_call_id=id
@ -223,7 +223,7 @@ def completion(
            # note: if a user sets a custom base - we should ensure this works
            # allow for the setting of dynamic and stateful api-bases
            api_base = (
-                custom_api_base
+                api_base
                or litellm.api_base
                or get_secret("OPENAI_API_BASE")
                or "https://api.openai.com/v1"
@ -567,7 +567,7 @@ def completion(
            model_response = huggingface_client.completion(
                model=model,
                messages=messages,
-                custom_api_base=custom_api_base,
+                api_base=api_base,
                model_response=model_response,
                print_verbose=print_verbose,
                optional_params=optional_params,
@ -692,7 +692,7 @@ def completion(
            response = model_response
        elif custom_llm_provider == "ollama":
            endpoint = (
-                litellm.api_base if litellm.api_base is not None else custom_api_base
+                litellm.api_base if litellm.api_base is not None else api_base
            )
            prompt = " ".join([message["content"] for message in messages])

--- a/litellm/testing.py
+++ b/litellm/testing.py
@ -31,9 +31,9 @@ def testing_batch_completion(*args, **kwargs):
                        if isinstance(model, dict) and "custom_llm_provider" in model
                        else None
                    )
-                    kwargs_modified["custom_api_base"] = (
-                        model["custom_api_base"]
-                        if isinstance(model, dict) and "custom_api_base" in model
+                    kwargs_modified["api_base"] = (
+                        model["api_base"]
+                        if isinstance(model, dict) and "api_base" in model
                        else None
                    )
                for message_list in batch_messages:
--- a/litellm/tests/test_custom_api_base.py
+++ b/litellm/tests/test_custom_api_base.py
@ -20,14 +20,14 @@ models = ["gorilla-7b-hf-v1", "gpt-4"]
 custom_llm_provider = None
 messages = [{"role": "user", "content": "Hey,  how's it going?"}]
 for model in models:  # iterate through list
-    custom_api_base = None
+    api_base = None
    if model == "gorilla-7b-hf-v1":
        custom_llm_provider = "custom_openai"
-        custom_api_base = "http://zanino.millennium.berkeley.edu:8000/v1"
+        api_base = "http://zanino.millennium.berkeley.edu:8000/v1"
    completion(
        model=model,
        messages=messages,
        custom_llm_provider=custom_llm_provider,
-        custom_api_base=custom_api_base,
+        api_base=api_base,
        logger_fn=logging_fn,
    )
--- a/litellm/tests/test_ollama_local.py
+++ b/litellm/tests/test_ollama_local.py
@ -24,7 +24,7 @@

 # def test_completion_ollama():
 #     try:
-#         response = completion(model="llama2", messages=messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama")
+#         response = completion(model="llama2", messages=messages, api_base="http://localhost:11434", custom_llm_provider="ollama")
 #         print(response)
 #         string_response = asyncio.run(get_response(response))
 #         print(string_response)
@ -36,7 +36,7 @@

 # def test_completion_ollama_stream():
 #     try:
-#         response = completion(model="llama2", messages=messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)
+#         response = completion(model="llama2", messages=messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)
 #         print(response)
 #         string_response = asyncio.run(get_response(response))
 #         print(string_response)
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -658,7 +658,7 @@ def get_litellm_params(
    replicate=False,
    together_ai=False,
    custom_llm_provider=None,
-    custom_api_base=None,
+    api_base=None,
    litellm_call_id=None,
    model_alias_map=None,
    completion_call_id=None
@ -670,7 +670,7 @@ def get_litellm_params(
        "logger_fn": logger_fn,
        "verbose": verbose,
        "custom_llm_provider": custom_llm_provider,
-        "custom_api_base": custom_api_base,
+        "api_base": api_base,
        "litellm_call_id": litellm_call_id,
        "model_alias_map": model_alias_map,
        "completion_call_id": completion_call_id,
@ -834,7 +834,7 @@ def get_optional_params(  # use the openai defaults
 def load_test_model(
    model: str,
    custom_llm_provider: str = "",
-    custom_api_base: str = "",
+    api_base: str = "",
    prompt: str = "",
    num_calls: int = 0,
    force_timeout: int = 0,
@ -852,7 +852,7 @@ def load_test_model(
            model=model,
            messages=messages,
            custom_llm_provider=custom_llm_provider,
-            custom_api_base=custom_api_base,
+            api_base=api_base,
            force_timeout=force_timeout,
        )
        end_time = time.time()