mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
fix: allow api base to be set for all providers
enables proxy use cases
This commit is contained in:
parent
72f55a4e6c
commit
00993f3575
7 changed files with 76 additions and 11 deletions
|
@ -92,6 +92,7 @@ def validate_environment(api_key):
|
||||||
def completion(
|
def completion(
|
||||||
model: str,
|
model: str,
|
||||||
messages: list,
|
messages: list,
|
||||||
|
api_base: str,
|
||||||
model_response: ModelResponse,
|
model_response: ModelResponse,
|
||||||
print_verbose: Callable,
|
print_verbose: Callable,
|
||||||
encoding,
|
encoding,
|
||||||
|
@ -137,7 +138,7 @@ def completion(
|
||||||
)
|
)
|
||||||
## COMPLETION CALL
|
## COMPLETION CALL
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
"https://api.ai21.com/studio/v1/" + model + "/complete", headers=headers, data=json.dumps(data)
|
api_base + model + "/complete", headers=headers, data=json.dumps(data)
|
||||||
)
|
)
|
||||||
if "stream" in optional_params and optional_params["stream"] == True:
|
if "stream" in optional_params and optional_params["stream"] == True:
|
||||||
return response.iter_lines()
|
return response.iter_lines()
|
||||||
|
|
|
@ -160,6 +160,7 @@ def validate_environment(api_key):
|
||||||
def completion(
|
def completion(
|
||||||
model: str,
|
model: str,
|
||||||
messages: list,
|
messages: list,
|
||||||
|
api_base: str,
|
||||||
model_response: ModelResponse,
|
model_response: ModelResponse,
|
||||||
print_verbose: Callable,
|
print_verbose: Callable,
|
||||||
encoding,
|
encoding,
|
||||||
|
@ -178,7 +179,7 @@ def completion(
|
||||||
if k not in optional_params: # completion(top_k=3) > aleph_alpha_config(top_k=3) <- allows for dynamic variables to be passed in
|
if k not in optional_params: # completion(top_k=3) > aleph_alpha_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||||
optional_params[k] = v
|
optional_params[k] = v
|
||||||
|
|
||||||
completion_url = "https://api.aleph-alpha.com/complete"
|
completion_url = api_base
|
||||||
model = model
|
model = model
|
||||||
prompt = ""
|
prompt = ""
|
||||||
if "control" in model: # follow the ###Instruction / ###Response format
|
if "control" in model: # follow the ###Instruction / ###Response format
|
||||||
|
|
|
@ -100,6 +100,7 @@ def validate_environment(api_key):
|
||||||
def completion(
|
def completion(
|
||||||
model: str,
|
model: str,
|
||||||
messages: list,
|
messages: list,
|
||||||
|
api_base: str,
|
||||||
model_response: ModelResponse,
|
model_response: ModelResponse,
|
||||||
print_verbose: Callable,
|
print_verbose: Callable,
|
||||||
encoding,
|
encoding,
|
||||||
|
@ -110,7 +111,7 @@ def completion(
|
||||||
logger_fn=None,
|
logger_fn=None,
|
||||||
):
|
):
|
||||||
headers = validate_environment(api_key)
|
headers = validate_environment(api_key)
|
||||||
completion_url = "https://api.cohere.ai/v1/generate"
|
completion_url = api_base
|
||||||
model = model
|
model = model
|
||||||
prompt = " ".join(message["content"] for message in messages)
|
prompt = " ".join(message["content"] for message in messages)
|
||||||
|
|
||||||
|
|
|
@ -96,6 +96,7 @@ def validate_environment(api_key):
|
||||||
def completion(
|
def completion(
|
||||||
model: str,
|
model: str,
|
||||||
messages: list,
|
messages: list,
|
||||||
|
api_base: str,
|
||||||
model_response: ModelResponse,
|
model_response: ModelResponse,
|
||||||
print_verbose: Callable,
|
print_verbose: Callable,
|
||||||
encoding,
|
encoding,
|
||||||
|
@ -114,7 +115,7 @@ def completion(
|
||||||
if k not in optional_params: # completion(top_k=3) > togetherai_config(top_k=3) <- allows for dynamic variables to be passed in
|
if k not in optional_params: # completion(top_k=3) > togetherai_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||||
optional_params[k] = v
|
optional_params[k] = v
|
||||||
|
|
||||||
completion_url_fragment_1 = "https://api.nlpcloud.io/v1/gpu/"
|
completion_url_fragment_1 = api_base
|
||||||
completion_url_fragment_2 = "/generation"
|
completion_url_fragment_2 = "/generation"
|
||||||
model = model
|
model = model
|
||||||
text = " ".join(message["content"] for message in messages)
|
text = " ".join(message["content"] for message in messages)
|
||||||
|
@ -125,6 +126,7 @@ def completion(
|
||||||
}
|
}
|
||||||
|
|
||||||
completion_url = completion_url_fragment_1 + model + completion_url_fragment_2
|
completion_url = completion_url_fragment_1 + model + completion_url_fragment_2
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging_obj.pre_call(
|
logging_obj.pre_call(
|
||||||
input=text,
|
input=text,
|
||||||
|
|
|
@ -74,8 +74,8 @@ class ReplicateConfig():
|
||||||
|
|
||||||
|
|
||||||
# Function to start a prediction and get the prediction URL
|
# Function to start a prediction and get the prediction URL
|
||||||
def start_prediction(version_id, input_data, api_token, logging_obj):
|
def start_prediction(version_id, input_data, api_token, api_base, logging_obj):
|
||||||
base_url = "https://api.replicate.com/v1"
|
base_url = api_base
|
||||||
headers = {
|
headers = {
|
||||||
"Authorization": f"Token {api_token}",
|
"Authorization": f"Token {api_token}",
|
||||||
"Content-Type": "application/json"
|
"Content-Type": "application/json"
|
||||||
|
@ -159,6 +159,7 @@ def model_to_version_id(model):
|
||||||
def completion(
|
def completion(
|
||||||
model: str,
|
model: str,
|
||||||
messages: list,
|
messages: list,
|
||||||
|
api_base: str,
|
||||||
model_response: ModelResponse,
|
model_response: ModelResponse,
|
||||||
print_verbose: Callable,
|
print_verbose: Callable,
|
||||||
logging_obj,
|
logging_obj,
|
||||||
|
@ -208,7 +209,7 @@ def completion(
|
||||||
## Step2: Poll prediction url for response
|
## Step2: Poll prediction url for response
|
||||||
## Step2: is handled with and without streaming
|
## Step2: is handled with and without streaming
|
||||||
model_response["created"] = time.time() # for pricing this must remain right before calling api
|
model_response["created"] = time.time() # for pricing this must remain right before calling api
|
||||||
prediction_url = start_prediction(version_id, input_data, api_key, logging_obj=logging_obj)
|
prediction_url = start_prediction(version_id, input_data, api_key, api_base, logging_obj=logging_obj)
|
||||||
print_verbose(prediction_url)
|
print_verbose(prediction_url)
|
||||||
|
|
||||||
# Handle the prediction response (streaming or non-streaming)
|
# Handle the prediction response (streaming or non-streaming)
|
||||||
|
|
|
@ -80,6 +80,7 @@ def validate_environment(api_key):
|
||||||
def completion(
|
def completion(
|
||||||
model: str,
|
model: str,
|
||||||
messages: list,
|
messages: list,
|
||||||
|
api_base: str,
|
||||||
model_response: ModelResponse,
|
model_response: ModelResponse,
|
||||||
print_verbose: Callable,
|
print_verbose: Callable,
|
||||||
encoding,
|
encoding,
|
||||||
|
@ -129,7 +130,7 @@ def completion(
|
||||||
and optional_params["stream_tokens"] == True
|
and optional_params["stream_tokens"] == True
|
||||||
):
|
):
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
"https://api.together.xyz/inference",
|
api_base,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
data=json.dumps(data),
|
data=json.dumps(data),
|
||||||
stream=optional_params["stream_tokens"],
|
stream=optional_params["stream_tokens"],
|
||||||
|
@ -137,7 +138,7 @@ def completion(
|
||||||
return response.iter_lines()
|
return response.iter_lines()
|
||||||
else:
|
else:
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
"https://api.together.xyz/inference",
|
api_base,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
data=json.dumps(data)
|
data=json.dumps(data)
|
||||||
)
|
)
|
||||||
|
|
|
@ -560,9 +560,17 @@ def completion(
|
||||||
or get_secret("REPLICATE_API_TOKEN")
|
or get_secret("REPLICATE_API_TOKEN")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
api_base = (
|
||||||
|
api_base
|
||||||
|
or litellm.api_base
|
||||||
|
or get_secret("REPLICATE_API_BASE")
|
||||||
|
or "https://api.replicate.com/v1"
|
||||||
|
)
|
||||||
|
|
||||||
model_response = replicate.completion(
|
model_response = replicate.completion(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
api_base=api_base,
|
||||||
model_response=model_response,
|
model_response=model_response,
|
||||||
print_verbose=print_verbose,
|
print_verbose=print_verbose,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
|
@ -611,9 +619,17 @@ def completion(
|
||||||
api_key or litellm.nlp_cloud_key or get_secret("NLP_CLOUD_API_KEY") or litellm.api_key
|
api_key or litellm.nlp_cloud_key or get_secret("NLP_CLOUD_API_KEY") or litellm.api_key
|
||||||
)
|
)
|
||||||
|
|
||||||
|
api_base = (
|
||||||
|
api_base
|
||||||
|
or litellm.api_base
|
||||||
|
or get_secret("NLP_CLOUD_API_BASE")
|
||||||
|
or "https://api.nlpcloud.io/v1/gpu/"
|
||||||
|
)
|
||||||
|
|
||||||
model_response = nlp_cloud.completion(
|
model_response = nlp_cloud.completion(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
api_base=api_base,
|
||||||
model_response=model_response,
|
model_response=model_response,
|
||||||
print_verbose=print_verbose,
|
print_verbose=print_verbose,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
|
@ -634,9 +650,17 @@ def completion(
|
||||||
api_key or litellm.aleph_alpha_key or get_secret("ALEPH_ALPHA_API_KEY") or get_secret("ALEPHALPHA_API_KEY") or litellm.api_key
|
api_key or litellm.aleph_alpha_key or get_secret("ALEPH_ALPHA_API_KEY") or get_secret("ALEPHALPHA_API_KEY") or litellm.api_key
|
||||||
)
|
)
|
||||||
|
|
||||||
|
api_base = (
|
||||||
|
api_base
|
||||||
|
or litellm.api_base
|
||||||
|
or get_secret("ALEPH_ALPHA_API_BASE")
|
||||||
|
or "https://api.aleph-alpha.com/complete"
|
||||||
|
)
|
||||||
|
|
||||||
model_response = aleph_alpha.completion(
|
model_response = aleph_alpha.completion(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
api_base=api_base,
|
||||||
model_response=model_response,
|
model_response=model_response,
|
||||||
print_verbose=print_verbose,
|
print_verbose=print_verbose,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
|
@ -661,9 +685,18 @@ def completion(
|
||||||
or get_secret("CO_API_KEY")
|
or get_secret("CO_API_KEY")
|
||||||
or litellm.api_key
|
or litellm.api_key
|
||||||
)
|
)
|
||||||
|
|
||||||
|
api_base = (
|
||||||
|
api_base
|
||||||
|
or litellm.api_base
|
||||||
|
or get_secret("COHERE_API_BASE")
|
||||||
|
or "https://api.cohere.ai/v1/generate"
|
||||||
|
)
|
||||||
|
|
||||||
model_response = cohere.completion(
|
model_response = cohere.completion(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
api_base=api_base,
|
||||||
model_response=model_response,
|
model_response=model_response,
|
||||||
print_verbose=print_verbose,
|
print_verbose=print_verbose,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
|
@ -687,6 +720,14 @@ def completion(
|
||||||
litellm.openai_key or
|
litellm.openai_key or
|
||||||
get_secret("DEEPINFRA_API_KEY")
|
get_secret("DEEPINFRA_API_KEY")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
api_base = (
|
||||||
|
api_base
|
||||||
|
or litellm.api_base
|
||||||
|
or get_secret("DEEPINFRA_API_BASE")
|
||||||
|
or "https://api.deepinfra.com/v1/openai"
|
||||||
|
)
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging.pre_call(
|
logging.pre_call(
|
||||||
input=messages,
|
input=messages,
|
||||||
|
@ -698,7 +739,7 @@ def completion(
|
||||||
response = openai.ChatCompletion.create(
|
response = openai.ChatCompletion.create(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
api_base="https://api.deepinfra.com/v1/openai", # use the deepinfra api base
|
api_base=api_base, # use the deepinfra api base
|
||||||
api_type="openai",
|
api_type="openai",
|
||||||
api_version=api_version, # default None
|
api_version=api_version, # default None
|
||||||
**optional_params,
|
**optional_params,
|
||||||
|
@ -840,10 +881,18 @@ def completion(
|
||||||
or get_secret("TOGETHERAI_API_KEY")
|
or get_secret("TOGETHERAI_API_KEY")
|
||||||
or litellm.api_key
|
or litellm.api_key
|
||||||
)
|
)
|
||||||
|
|
||||||
|
api_base = (
|
||||||
|
api_base
|
||||||
|
or litellm.api_base
|
||||||
|
or get_secret("TOGETHERAI_API_BASE")
|
||||||
|
or "https://api.together.xyz/inference"
|
||||||
|
)
|
||||||
|
|
||||||
model_response = together_ai.completion(
|
model_response = together_ai.completion(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
api_base=api_base,
|
||||||
model_response=model_response,
|
model_response=model_response,
|
||||||
print_verbose=print_verbose,
|
print_verbose=print_verbose,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
|
@ -923,10 +972,19 @@ def completion(
|
||||||
or litellm.ai21_key
|
or litellm.ai21_key
|
||||||
or os.environ.get("AI21_API_KEY")
|
or os.environ.get("AI21_API_KEY")
|
||||||
or litellm.api_key
|
or litellm.api_key
|
||||||
)
|
)
|
||||||
|
|
||||||
|
api_base = (
|
||||||
|
api_base
|
||||||
|
or litellm.api_base
|
||||||
|
or get_secret("AI21_API_BASE")
|
||||||
|
or "https://api.ai21.com/studio/v1/"
|
||||||
|
)
|
||||||
|
|
||||||
model_response = ai21.completion(
|
model_response = ai21.completion(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
api_base=api_base,
|
||||||
model_response=model_response,
|
model_response=model_response,
|
||||||
print_verbose=print_verbose,
|
print_verbose=print_verbose,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue