diff --git a/cookbook/liteLLM_Ollama.ipynb b/cookbook/liteLLM_Ollama.ipynb index 9abfadb17..c5770b02f 100644 --- a/cookbook/liteLLM_Ollama.ipynb +++ b/cookbook/liteLLM_Ollama.ipynb @@ -88,7 +88,7 @@ } ], "source": [ - "response = completion(model=\"llama2\", messages=messages, custom_api_base=\"http://localhost:11434\", custom_llm_provider=\"ollama\", stream=True)\n", + "response = completion(model=\"llama2\", messages=messages, api_base=\"http://localhost:11434\", custom_llm_provider=\"ollama\", stream=True)\n", "print(response)" ] }, diff --git a/docs/my-website/docs/completion/supported.md b/docs/my-website/docs/completion/supported.md index 57670a86e..ea8978066 100644 --- a/docs/my-website/docs/completion/supported.md +++ b/docs/my-website/docs/completion/supported.md @@ -178,12 +178,12 @@ Ollama supported models: https://github.com/jmorganca/ollama | Model Name | Function Call | Required OS Variables | |----------------------|-----------------------------------------------------------------------------------|--------------------------------| -| Llama2 7B | `completion(model='llama2', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required | -| Llama2 13B | `completion(model='llama2:13b', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required | -| Llama2 70B | `completion(model='llama2:70b', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required | -| Llama2 Uncensored | `completion(model='llama2-uncensored', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required | -| Orca Mini | `completion(model='orca-mini', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required | -| Vicuna | `completion(model='vicuna', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required | -| Nous-Hermes | `completion(model='nous-hermes', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required | -| Nous-Hermes 13B | `completion(model='nous-hermes:13b', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required | -| Wizard Vicuna Uncensored | `completion(model='wizard-vicuna', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required | +| Llama2 7B | `completion(model='llama2', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required | +| Llama2 13B | `completion(model='llama2:13b', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required | +| Llama2 70B | `completion(model='llama2:70b', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required | +| Llama2 Uncensored | `completion(model='llama2-uncensored', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required | +| Orca Mini | `completion(model='orca-mini', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required | +| Vicuna | `completion(model='vicuna', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required | +| Nous-Hermes | `completion(model='nous-hermes', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required | +| Nous-Hermes 13B | `completion(model='nous-hermes:13b', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required | +| Wizard Vicuna Uncensored | `completion(model='wizard-vicuna', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required | diff --git a/docs/my-website/docs/tutorials/huggingface_tutorial.md b/docs/my-website/docs/tutorials/huggingface_tutorial.md index caae28ffa..8f37bb4b9 100644 --- a/docs/my-website/docs/tutorials/huggingface_tutorial.md +++ b/docs/my-website/docs/tutorials/huggingface_tutorial.md @@ -41,14 +41,14 @@ from litellm import completion model = "meta-llama/Llama-2-7b-hf" messages = [{"role": "user", "content": "Hey, how's it going?"}] # LiteLLM follows the OpenAI format -custom_api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud" +api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud" ### CALLING ENDPOINT -completion(model=model, messages=messages, custom_llm_provider="huggingface", custom_api_base=custom_api_base) +completion(model=model, messages=messages, custom_llm_provider="huggingface", api_base=api_base) ``` What's happening? -- custom_api_base: Optional param. Since this uses a deployed endpoint (not the [default huggingface inference endpoint](https://github.com/BerriAI/litellm/blob/6aff47083be659b80e00cb81eb783cb24db2e183/litellm/llms/huggingface_restapi.py#L35)), we pass that to LiteLLM. +- api_base: Optional param. Since this uses a deployed endpoint (not the [default huggingface inference endpoint](https://github.com/BerriAI/litellm/blob/6aff47083be659b80e00cb81eb783cb24db2e183/litellm/llms/huggingface_restapi.py#L35)), we pass that to LiteLLM. ### Case 3: Call Llama2 private Huggingface endpoint @@ -72,10 +72,10 @@ os.environ["HF_TOKEN] = "..." model = "meta-llama/Llama-2-7b-hf" messages = [{"role": "user", "content": "Hey, how's it going?"}] # LiteLLM follows the OpenAI format -custom_api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud" +api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud" ### CALLING ENDPOINT -completion(model=model, messages=messages, custom_llm_provider="huggingface", custom_api_base=custom_api_base) +completion(model=model, messages=messages, custom_llm_provider="huggingface", api_base=api_base) ``` **Setting it as package variable** @@ -93,10 +93,10 @@ litellm.huggingface_key = "..." model = "meta-llama/Llama-2-7b-hf" messages = [{"role": "user", "content": "Hey, how's it going?"}] # LiteLLM follows the OpenAI format -custom_api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud" +api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud" ### CALLING ENDPOINT -completion(model=model, messages=messages, custom_llm_provider="huggingface", custom_api_base=custom_api_base) +completion(model=model, messages=messages, custom_llm_provider="huggingface", api_base=api_base) ``` **Passed in during completion call** @@ -111,8 +111,8 @@ from litellm import completion model = "meta-llama/Llama-2-7b-hf" messages = [{"role": "user", "content": "Hey, how's it going?"}] # LiteLLM follows the OpenAI format -custom_api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud" +api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud" ### CALLING ENDPOINT -completion(model=model, messages=messages, custom_llm_provider="huggingface", custom_api_base=custom_api_base, api_key="...") +completion(model=model, messages=messages, custom_llm_provider="huggingface", api_base=api_base, api_key="...") ``` diff --git a/litellm/llms/huggingface_restapi.py b/litellm/llms/huggingface_restapi.py index a07f07fbe..55614549c 100644 --- a/litellm/llms/huggingface_restapi.py +++ b/litellm/llms/huggingface_restapi.py @@ -38,7 +38,7 @@ class HuggingfaceRestAPILLM: self, model: str, messages: list, - custom_api_base: str, + api_base: str, model_response: ModelResponse, print_verbose: Callable, optional_params=None, @@ -48,8 +48,8 @@ class HuggingfaceRestAPILLM: completion_url: str = "" if "https" in model: completion_url = model - elif custom_api_base: - completion_url = custom_api_base + elif api_base: + completion_url = api_base elif "HF_API_BASE" in os.environ: completion_url = os.getenv("HF_API_BASE", "") else: diff --git a/litellm/main.py b/litellm/main.py index 7fd789957..174077260 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -92,7 +92,7 @@ def completion( verbose=False, azure=False, custom_llm_provider=None, - custom_api_base=None, + api_base=None, litellm_call_id=None, litellm_logging_obj=None, use_client=False, @@ -153,7 +153,7 @@ def completion( logger_fn=logger_fn, verbose=verbose, custom_llm_provider=custom_llm_provider, - custom_api_base=custom_api_base, + api_base=api_base, litellm_call_id=litellm_call_id, model_alias_map=litellm.model_alias_map, completion_call_id=id @@ -223,7 +223,7 @@ def completion( # note: if a user sets a custom base - we should ensure this works # allow for the setting of dynamic and stateful api-bases api_base = ( - custom_api_base + api_base or litellm.api_base or get_secret("OPENAI_API_BASE") or "https://api.openai.com/v1" @@ -567,7 +567,7 @@ def completion( model_response = huggingface_client.completion( model=model, messages=messages, - custom_api_base=custom_api_base, + api_base=api_base, model_response=model_response, print_verbose=print_verbose, optional_params=optional_params, @@ -692,7 +692,7 @@ def completion( response = model_response elif custom_llm_provider == "ollama": endpoint = ( - litellm.api_base if litellm.api_base is not None else custom_api_base + litellm.api_base if litellm.api_base is not None else api_base ) prompt = " ".join([message["content"] for message in messages]) diff --git a/litellm/testing.py b/litellm/testing.py index 5db01d182..7cab6115c 100644 --- a/litellm/testing.py +++ b/litellm/testing.py @@ -31,9 +31,9 @@ def testing_batch_completion(*args, **kwargs): if isinstance(model, dict) and "custom_llm_provider" in model else None ) - kwargs_modified["custom_api_base"] = ( - model["custom_api_base"] - if isinstance(model, dict) and "custom_api_base" in model + kwargs_modified["api_base"] = ( + model["api_base"] + if isinstance(model, dict) and "api_base" in model else None ) for message_list in batch_messages: diff --git a/litellm/tests/test_custom_api_base.py b/litellm/tests/test_custom_api_base.py index 70a477eab..40506b57c 100644 --- a/litellm/tests/test_custom_api_base.py +++ b/litellm/tests/test_custom_api_base.py @@ -20,14 +20,14 @@ models = ["gorilla-7b-hf-v1", "gpt-4"] custom_llm_provider = None messages = [{"role": "user", "content": "Hey, how's it going?"}] for model in models: # iterate through list - custom_api_base = None + api_base = None if model == "gorilla-7b-hf-v1": custom_llm_provider = "custom_openai" - custom_api_base = "http://zanino.millennium.berkeley.edu:8000/v1" + api_base = "http://zanino.millennium.berkeley.edu:8000/v1" completion( model=model, messages=messages, custom_llm_provider=custom_llm_provider, - custom_api_base=custom_api_base, + api_base=api_base, logger_fn=logging_fn, ) diff --git a/litellm/tests/test_ollama_local.py b/litellm/tests/test_ollama_local.py index a9431a932..cc15274bc 100644 --- a/litellm/tests/test_ollama_local.py +++ b/litellm/tests/test_ollama_local.py @@ -24,7 +24,7 @@ # def test_completion_ollama(): # try: -# response = completion(model="llama2", messages=messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama") +# response = completion(model="llama2", messages=messages, api_base="http://localhost:11434", custom_llm_provider="ollama") # print(response) # string_response = asyncio.run(get_response(response)) # print(string_response) @@ -36,7 +36,7 @@ # def test_completion_ollama_stream(): # try: -# response = completion(model="llama2", messages=messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True) +# response = completion(model="llama2", messages=messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True) # print(response) # string_response = asyncio.run(get_response(response)) # print(string_response) diff --git a/litellm/utils.py b/litellm/utils.py index 68076023c..1ecc130d7 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -658,7 +658,7 @@ def get_litellm_params( replicate=False, together_ai=False, custom_llm_provider=None, - custom_api_base=None, + api_base=None, litellm_call_id=None, model_alias_map=None, completion_call_id=None @@ -670,7 +670,7 @@ def get_litellm_params( "logger_fn": logger_fn, "verbose": verbose, "custom_llm_provider": custom_llm_provider, - "custom_api_base": custom_api_base, + "api_base": api_base, "litellm_call_id": litellm_call_id, "model_alias_map": model_alias_map, "completion_call_id": completion_call_id, @@ -834,7 +834,7 @@ def get_optional_params( # use the openai defaults def load_test_model( model: str, custom_llm_provider: str = "", - custom_api_base: str = "", + api_base: str = "", prompt: str = "", num_calls: int = 0, force_timeout: int = 0, @@ -852,7 +852,7 @@ def load_test_model( model=model, messages=messages, custom_llm_provider=custom_llm_provider, - custom_api_base=custom_api_base, + api_base=api_base, force_timeout=force_timeout, ) end_time = time.time()