forked from phoenix/litellm-mirror
use api_base instead of custom_api_base
This commit is contained in:
parent
e6836985c8
commit
09ae510a58
9 changed files with 39 additions and 39 deletions
2
cookbook/liteLLM_Ollama.ipynb
vendored
2
cookbook/liteLLM_Ollama.ipynb
vendored
|
@ -88,7 +88,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"response = completion(model=\"llama2\", messages=messages, custom_api_base=\"http://localhost:11434\", custom_llm_provider=\"ollama\", stream=True)\n",
|
||||
"response = completion(model=\"llama2\", messages=messages, api_base=\"http://localhost:11434\", custom_llm_provider=\"ollama\", stream=True)\n",
|
||||
"print(response)"
|
||||
]
|
||||
},
|
||||
|
|
|
@ -178,12 +178,12 @@ Ollama supported models: https://github.com/jmorganca/ollama
|
|||
|
||||
| Model Name | Function Call | Required OS Variables |
|
||||
|----------------------|-----------------------------------------------------------------------------------|--------------------------------|
|
||||
| Llama2 7B | `completion(model='llama2', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
|
||||
| Llama2 13B | `completion(model='llama2:13b', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
|
||||
| Llama2 70B | `completion(model='llama2:70b', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
|
||||
| Llama2 Uncensored | `completion(model='llama2-uncensored', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
|
||||
| Orca Mini | `completion(model='orca-mini', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
|
||||
| Vicuna | `completion(model='vicuna', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
|
||||
| Nous-Hermes | `completion(model='nous-hermes', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
|
||||
| Nous-Hermes 13B | `completion(model='nous-hermes:13b', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
|
||||
| Wizard Vicuna Uncensored | `completion(model='wizard-vicuna', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
|
||||
| Llama2 7B | `completion(model='llama2', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
|
||||
| Llama2 13B | `completion(model='llama2:13b', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
|
||||
| Llama2 70B | `completion(model='llama2:70b', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
|
||||
| Llama2 Uncensored | `completion(model='llama2-uncensored', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
|
||||
| Orca Mini | `completion(model='orca-mini', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
|
||||
| Vicuna | `completion(model='vicuna', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
|
||||
| Nous-Hermes | `completion(model='nous-hermes', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
|
||||
| Nous-Hermes 13B | `completion(model='nous-hermes:13b', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
|
||||
| Wizard Vicuna Uncensored | `completion(model='wizard-vicuna', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
|
||||
|
|
|
@ -41,14 +41,14 @@ from litellm import completion
|
|||
|
||||
model = "meta-llama/Llama-2-7b-hf"
|
||||
messages = [{"role": "user", "content": "Hey, how's it going?"}] # LiteLLM follows the OpenAI format
|
||||
custom_api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
|
||||
api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
|
||||
|
||||
### CALLING ENDPOINT
|
||||
completion(model=model, messages=messages, custom_llm_provider="huggingface", custom_api_base=custom_api_base)
|
||||
completion(model=model, messages=messages, custom_llm_provider="huggingface", api_base=api_base)
|
||||
```
|
||||
|
||||
What's happening?
|
||||
- custom_api_base: Optional param. Since this uses a deployed endpoint (not the [default huggingface inference endpoint](https://github.com/BerriAI/litellm/blob/6aff47083be659b80e00cb81eb783cb24db2e183/litellm/llms/huggingface_restapi.py#L35)), we pass that to LiteLLM.
|
||||
- api_base: Optional param. Since this uses a deployed endpoint (not the [default huggingface inference endpoint](https://github.com/BerriAI/litellm/blob/6aff47083be659b80e00cb81eb783cb24db2e183/litellm/llms/huggingface_restapi.py#L35)), we pass that to LiteLLM.
|
||||
|
||||
### Case 3: Call Llama2 private Huggingface endpoint
|
||||
|
||||
|
@ -72,10 +72,10 @@ os.environ["HF_TOKEN] = "..."
|
|||
|
||||
model = "meta-llama/Llama-2-7b-hf"
|
||||
messages = [{"role": "user", "content": "Hey, how's it going?"}] # LiteLLM follows the OpenAI format
|
||||
custom_api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
|
||||
api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
|
||||
|
||||
### CALLING ENDPOINT
|
||||
completion(model=model, messages=messages, custom_llm_provider="huggingface", custom_api_base=custom_api_base)
|
||||
completion(model=model, messages=messages, custom_llm_provider="huggingface", api_base=api_base)
|
||||
```
|
||||
|
||||
**Setting it as package variable**
|
||||
|
@ -93,10 +93,10 @@ litellm.huggingface_key = "..."
|
|||
|
||||
model = "meta-llama/Llama-2-7b-hf"
|
||||
messages = [{"role": "user", "content": "Hey, how's it going?"}] # LiteLLM follows the OpenAI format
|
||||
custom_api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
|
||||
api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
|
||||
|
||||
### CALLING ENDPOINT
|
||||
completion(model=model, messages=messages, custom_llm_provider="huggingface", custom_api_base=custom_api_base)
|
||||
completion(model=model, messages=messages, custom_llm_provider="huggingface", api_base=api_base)
|
||||
```
|
||||
|
||||
**Passed in during completion call**
|
||||
|
@ -111,8 +111,8 @@ from litellm import completion
|
|||
|
||||
model = "meta-llama/Llama-2-7b-hf"
|
||||
messages = [{"role": "user", "content": "Hey, how's it going?"}] # LiteLLM follows the OpenAI format
|
||||
custom_api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
|
||||
api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
|
||||
|
||||
### CALLING ENDPOINT
|
||||
completion(model=model, messages=messages, custom_llm_provider="huggingface", custom_api_base=custom_api_base, api_key="...")
|
||||
completion(model=model, messages=messages, custom_llm_provider="huggingface", api_base=api_base, api_key="...")
|
||||
```
|
||||
|
|
|
@ -38,7 +38,7 @@ class HuggingfaceRestAPILLM:
|
|||
self,
|
||||
model: str,
|
||||
messages: list,
|
||||
custom_api_base: str,
|
||||
api_base: str,
|
||||
model_response: ModelResponse,
|
||||
print_verbose: Callable,
|
||||
optional_params=None,
|
||||
|
@ -48,8 +48,8 @@ class HuggingfaceRestAPILLM:
|
|||
completion_url: str = ""
|
||||
if "https" in model:
|
||||
completion_url = model
|
||||
elif custom_api_base:
|
||||
completion_url = custom_api_base
|
||||
elif api_base:
|
||||
completion_url = api_base
|
||||
elif "HF_API_BASE" in os.environ:
|
||||
completion_url = os.getenv("HF_API_BASE", "")
|
||||
else:
|
||||
|
|
|
@ -92,7 +92,7 @@ def completion(
|
|||
verbose=False,
|
||||
azure=False,
|
||||
custom_llm_provider=None,
|
||||
custom_api_base=None,
|
||||
api_base=None,
|
||||
litellm_call_id=None,
|
||||
litellm_logging_obj=None,
|
||||
use_client=False,
|
||||
|
@ -153,7 +153,7 @@ def completion(
|
|||
logger_fn=logger_fn,
|
||||
verbose=verbose,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
custom_api_base=custom_api_base,
|
||||
api_base=api_base,
|
||||
litellm_call_id=litellm_call_id,
|
||||
model_alias_map=litellm.model_alias_map,
|
||||
completion_call_id=id
|
||||
|
@ -223,7 +223,7 @@ def completion(
|
|||
# note: if a user sets a custom base - we should ensure this works
|
||||
# allow for the setting of dynamic and stateful api-bases
|
||||
api_base = (
|
||||
custom_api_base
|
||||
api_base
|
||||
or litellm.api_base
|
||||
or get_secret("OPENAI_API_BASE")
|
||||
or "https://api.openai.com/v1"
|
||||
|
@ -567,7 +567,7 @@ def completion(
|
|||
model_response = huggingface_client.completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
custom_api_base=custom_api_base,
|
||||
api_base=api_base,
|
||||
model_response=model_response,
|
||||
print_verbose=print_verbose,
|
||||
optional_params=optional_params,
|
||||
|
@ -692,7 +692,7 @@ def completion(
|
|||
response = model_response
|
||||
elif custom_llm_provider == "ollama":
|
||||
endpoint = (
|
||||
litellm.api_base if litellm.api_base is not None else custom_api_base
|
||||
litellm.api_base if litellm.api_base is not None else api_base
|
||||
)
|
||||
prompt = " ".join([message["content"] for message in messages])
|
||||
|
||||
|
|
|
@ -31,9 +31,9 @@ def testing_batch_completion(*args, **kwargs):
|
|||
if isinstance(model, dict) and "custom_llm_provider" in model
|
||||
else None
|
||||
)
|
||||
kwargs_modified["custom_api_base"] = (
|
||||
model["custom_api_base"]
|
||||
if isinstance(model, dict) and "custom_api_base" in model
|
||||
kwargs_modified["api_base"] = (
|
||||
model["api_base"]
|
||||
if isinstance(model, dict) and "api_base" in model
|
||||
else None
|
||||
)
|
||||
for message_list in batch_messages:
|
||||
|
|
|
@ -20,14 +20,14 @@ models = ["gorilla-7b-hf-v1", "gpt-4"]
|
|||
custom_llm_provider = None
|
||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
for model in models: # iterate through list
|
||||
custom_api_base = None
|
||||
api_base = None
|
||||
if model == "gorilla-7b-hf-v1":
|
||||
custom_llm_provider = "custom_openai"
|
||||
custom_api_base = "http://zanino.millennium.berkeley.edu:8000/v1"
|
||||
api_base = "http://zanino.millennium.berkeley.edu:8000/v1"
|
||||
completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
custom_api_base=custom_api_base,
|
||||
api_base=api_base,
|
||||
logger_fn=logging_fn,
|
||||
)
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
|
||||
# def test_completion_ollama():
|
||||
# try:
|
||||
# response = completion(model="llama2", messages=messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama")
|
||||
# response = completion(model="llama2", messages=messages, api_base="http://localhost:11434", custom_llm_provider="ollama")
|
||||
# print(response)
|
||||
# string_response = asyncio.run(get_response(response))
|
||||
# print(string_response)
|
||||
|
@ -36,7 +36,7 @@
|
|||
|
||||
# def test_completion_ollama_stream():
|
||||
# try:
|
||||
# response = completion(model="llama2", messages=messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)
|
||||
# response = completion(model="llama2", messages=messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)
|
||||
# print(response)
|
||||
# string_response = asyncio.run(get_response(response))
|
||||
# print(string_response)
|
||||
|
|
|
@ -658,7 +658,7 @@ def get_litellm_params(
|
|||
replicate=False,
|
||||
together_ai=False,
|
||||
custom_llm_provider=None,
|
||||
custom_api_base=None,
|
||||
api_base=None,
|
||||
litellm_call_id=None,
|
||||
model_alias_map=None,
|
||||
completion_call_id=None
|
||||
|
@ -670,7 +670,7 @@ def get_litellm_params(
|
|||
"logger_fn": logger_fn,
|
||||
"verbose": verbose,
|
||||
"custom_llm_provider": custom_llm_provider,
|
||||
"custom_api_base": custom_api_base,
|
||||
"api_base": api_base,
|
||||
"litellm_call_id": litellm_call_id,
|
||||
"model_alias_map": model_alias_map,
|
||||
"completion_call_id": completion_call_id,
|
||||
|
@ -834,7 +834,7 @@ def get_optional_params( # use the openai defaults
|
|||
def load_test_model(
|
||||
model: str,
|
||||
custom_llm_provider: str = "",
|
||||
custom_api_base: str = "",
|
||||
api_base: str = "",
|
||||
prompt: str = "",
|
||||
num_calls: int = 0,
|
||||
force_timeout: int = 0,
|
||||
|
@ -852,7 +852,7 @@ def load_test_model(
|
|||
model=model,
|
||||
messages=messages,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
custom_api_base=custom_api_base,
|
||||
api_base=api_base,
|
||||
force_timeout=force_timeout,
|
||||
)
|
||||
end_time = time.time()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue