diff --git a/docs/my-website/docs/completion/reliable_completions.md b/docs/my-website/docs/completion/reliable_completions.md index a58975c1b..7aecbed0c 100644 --- a/docs/my-website/docs/completion/reliable_completions.md +++ b/docs/my-website/docs/completion/reliable_completions.md @@ -1,12 +1,9 @@ # Reliability - - - ## Helper utils LiteLLM supports the following functions for reliability: * `litellm.longer_context_model_fallback_dict`: Dictionary which has a mapping for those models which have larger equivalents * `completion_with_retries`: use tenacity retries -* `completion()` with fallback models: set `fallback_models=['gpt-3.5-turbo', 'command-nightly', 'llama2`]. If primary model fails try fallback models +* `completion()` with fallbacks: switch between models/keys/api bases in case of errors. ## Context Window Errors @@ -55,7 +52,8 @@ def test_completion_custom_provider_model_name(): printf"Error occurred: {e}") ``` -## Specify fallback models +## Switch Models/API Keys/API Bases + LLM APIs can be unstable, completion() with fallbacks ensures you'll always get a response from your calls ## Usage @@ -63,8 +61,19 @@ To use fallback models with `completion()`, specify a list of models in the `fal The `fallbacks` list should include the primary model you want to use, followed by additional models that can be used as backups in case the primary model fails to provide a response. +### switch models ```python -response = completion(model="bad-model", fallbacks=["gpt-3.5-turbo" "command-nightly"], messages=messages) +response = completion(model="bad-model", messages=messages, + fallbacks=["gpt-3.5-turbo" "command-nightly"]) +``` + +### switch api keys/bases (E.g. azure deployment) +Switch between different keys for the same azure deployment, or use another deployment as well. + +```python +api_key="bad-key" +response = completion(model="azure/gpt-4", messages=messages, api_key=api_key, + fallbacks=[{"api_key": "good-key-1"}, {"api_key": "good-key-2", "api_base": "good-api-base-2"}]) ``` ### Output from calls diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 9773545ee..f1f66b5eb 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/main.py b/litellm/main.py index 20e554c76..761080b35 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -672,22 +672,29 @@ def completion( litellm.openai_key or get_secret("DEEPINFRA_API_KEY") ) + openai.api_base = "https://api.deepinfra.com/v1/openai" ## LOGGING logging.pre_call( input=messages, api_key=api_key, ) ## COMPLETION CALL - openai.api_key = api_key # set key for deep infra + openai.api_key = "1LntUh4fmg5z6iEW7UPPRhGdBDNuJx5y" + openai.api_base = "https://api.deepinfra.com/v1/openai" try: - response = openai.ChatCompletion.create( - model=model, - messages=messages, - api_base="https://api.deepinfra.com/v1/openai", # use the deepinfra api base - api_type="openai", - api_version=api_version, # default None - **optional_params, + chat_completion = openai.ChatCompletion.create( + model="meta-llama/Llama-2-70b-chat-hf", + messages=[{"role": "user", "content": "Hello world"}] ) + print(f"chat_completion: {chat_completion}") + # response = openai.ChatCompletion.create( + # model=model, + # messages=messages, + # api_base="https://api.deepinfra.com/v1/openai", # use the deepinfra api base + # api_type="openai", + # api_version=api_version, # default None + # **optional_params, + # ) except Exception as e: ## LOGGING - log the original exception returned logging.post_call( diff --git a/litellm/tests/test_bad_params.py b/litellm/tests/test_bad_params.py index 4296d997b..576cc18f9 100644 --- a/litellm/tests/test_bad_params.py +++ b/litellm/tests/test_bad_params.py @@ -12,7 +12,7 @@ import litellm from litellm import embedding, completion -litellm.set_verbose = True +# litellm.set_verbose = True user_message = "Hello, how are you?" messages = [{"content": user_message, "role": "user"}] model_val = None @@ -41,7 +41,7 @@ def test_completion_with_no_provider(): print(f"error occurred: {e}") pass -test_completion_with_no_provider() +# test_completion_with_no_provider() # # bad key # temp_key = os.environ.get("OPENAI_API_KEY") # os.environ["OPENAI_API_KEY"] = "bad-key" @@ -53,3 +53,10 @@ test_completion_with_no_provider() # print(f"error occurred: {traceback.format_exc()}") # pass # os.environ["OPENAI_API_KEY"] = str(temp_key) # this passes linting#5 + +def logger_fn(model_details): + print(model_details) +os.environ['OPENAI_API_KEY'] = "1LntUh4fmg5z6iEW7UPPRhGdBDNuJx5y" +messages = [{"role":"user","content":"Request boss to grant me 1 day leave"}] +litellm.api_base = "https://api.deepinfra.com/v1/" +response = litellm.completion(model="meta-llama/Llama-2-70b-chat-hf", messages=messages, custom_llm_provider="openai", logger_fn=logger_fn) \ No newline at end of file