From ba6df5fbb90e0e86060c27c7b3a426578524b924 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 17 Aug 2023 16:06:32 -0700 Subject: [PATCH] updates TG_AI --- docs/my-website/docs/completion/supported.md | 54 ++++++++++++++------ docs/my-website/docusaurus.config.js | 16 ++---- docs/my-website/sidebars.js | 7 ++- litellm/__init__.py | 1 + litellm/main.py | 4 +- litellm/tests/test_completion.py | 2 +- 6 files changed, 51 insertions(+), 33 deletions(-) diff --git a/docs/my-website/docs/completion/supported.md b/docs/my-website/docs/completion/supported.md index 7ae64024a..2719929b4 100644 --- a/docs/my-website/docs/completion/supported.md +++ b/docs/my-website/docs/completion/supported.md @@ -1,4 +1,10 @@ -# Generation/Completion/Chat Completion Models +# Supported Chat, Completion Models + +## API Keys +liteLLM reads keys set in the environment variables or your Key Manager +liteLLM standardizes naming keys in the following format +`PROVIDER_API_KEY` for example `OPENAI_API_KEY` or `TOGETHERAI_API_KEY` or `HUGGINGFACE_API_KEY`. In addition to this liteLLM also allows you to use the provider specificed naming conventions for keys +Example Both `HF_TOKEN` and `HUGGINGFACE_API_KEY` will work for Hugging Face models ### OpenAI Chat Completion Models @@ -49,6 +55,7 @@ VertexAI requires you to set `application_default_credentials.json`, this can be | Model Name | Function Call | Required OS Variables | |------------------|--------------------------------------------|--------------------------------------| | claude-instant-1 | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']` | +| claude-instant-1.2 | `completion('claude-instant-1.2', messages)` | `os.environ['ANTHROPIC_API_KEY']` | | claude-2 | `completion('claude-2', messages)` | `os.environ['ANTHROPIC_API_KEY']` | ### Hugging Face Inference API @@ -64,10 +71,10 @@ Here are some examples of supported models: | Model Name | Function Call | Required OS Variables | |------------------|-------------------------------------------------------------------------------------|--------------------------------------| -| [stabilityai/stablecode-completion-alpha-3b-4k](https://huggingface.co/stabilityai/stablecode-completion-alpha-3b-4k) | `completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HF_TOKEN']` | -| [bigcode/starcoder](https://huggingface.co/bigcode/starcoder) | `completion(model="bigcode/starcoder", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HF_TOKEN']` | -| [google/flan-t5-xxl](https://huggingface.co/google/flan-t5-xxl) | `completion(model="google/flan-t5-xxl", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HF_TOKEN']` | -| [google/flan-t5-large](https://huggingface.co/google/flan-t5-large) | `completion(model="google/flan-t5-large", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HF_TOKEN']` | +| [stabilityai/stablecode-completion-alpha-3b-4k](https://huggingface.co/stabilityai/stablecode-completion-alpha-3b-4k) | `completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HUGGINGFACE_API_KEY']` | +| [bigcode/starcoder](https://huggingface.co/bigcode/starcoder) | `completion(model="bigcode/starcoder", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HUGGINGFACE_API_KEY']` | +| [google/flan-t5-xxl](https://huggingface.co/google/flan-t5-xxl) | `completion(model="google/flan-t5-xxl", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HUGGINGFACE_API_KEY']` | +| [google/flan-t5-large](https://huggingface.co/google/flan-t5-large) | `completion(model="google/flan-t5-large", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HUGGINGFACE_API_KEY']` | ### AI21 Models | Model Name | Function Call | Required OS Variables | @@ -82,10 +89,23 @@ Here are some examples of supported models: |------------------|--------------------------------------------|--------------------------------------| | command-nightly | `completion('command-nightly', messages)` | `os.environ['COHERE_API_KEY']` | -### BaseTen Models +### Together AI Models +liteLLM supports `non-streaming` and `streaming` requests to all models on https://api.together.xyz/ + +# Example TogetherAI Usage - Note: liteLLM supports all models deployed on TogetherAI +| Model Name | Function Call | Required OS Variables | +|-----------------------------------|------------------------------------------------------------------------|---------------------------------| +| togethercomputer/llama-2-70b-chat | `completion('togethercomputer/llama-2-70b-chat', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| togethercomputer/LLaMA-2-13b-chat | `completion('togethercomputer/LLaMA-2-13b-chat', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| togethercomputer/code-and-talk-v1 | `completion('togethercomputer/code-and-talk-v1', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| togethercomputer/creative-v1 | `completion('togethercomputer/creative-v1', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| togethercomputer/yourmodel | `completion('togethercomputer/yourmodel', messages)` | `os.environ['TOGETHERAI_API_KEY']` | + + +### Baseten Models Baseten provides infrastructure to deploy and serve ML models https://www.baseten.co/. Use liteLLM to easily call models deployed on Baseten. - +# Example Baseten Usage - Note: liteLLM supports all models deployed on Basten | Model Name | Function Call | Required OS Variables | |------------------|--------------------------------------------|------------------------------------| | Falcon 7B | `completion(model='', messages=messages, custom_llm_provider="baseten")` | `os.environ['BASETEN_API_KEY']` | @@ -99,13 +119,13 @@ All the text models from [OpenRouter](https://openrouter.ai/docs) are supported | Model Name | Function Call | Required OS Variables | |------------------|--------------------------------------------|--------------------------------------| -| openai/gpt-3.5-turbo | `completion('openai/gpt-3.5-turbo', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` | -| openai/gpt-3.5-turbo-16k | `completion('openai/gpt-3.5-turbo-16k', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` | -| openai/gpt-4 | `completion('openai/gpt-4', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` | -| openai/gpt-4-32k | `completion('openai/gpt-4-32k', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` | -| anthropic/claude-2 | `completion('anthropic/claude-2', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` | -| anthropic/claude-instant-v1 | `completion('anthropic/claude-instant-v1', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` | -| google/palm-2-chat-bison | `completion('google/palm-2-chat-bison', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` | -| google/palm-2-codechat-bison | `completion('google/palm-2-codechat-bison', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` | -| meta-llama/llama-2-13b-chat | `completion('meta-llama/llama-2-13b-chat', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` | -| meta-llama/llama-2-70b-chat | `completion('meta-llama/llama-2-70b-chat', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` | \ No newline at end of file +| openai/gpt-3.5-turbo | `completion('openai/gpt-3.5-turbo', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` | +| openai/gpt-3.5-turbo-16k | `completion('openai/gpt-3.5-turbo-16k', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` | +| openai/gpt-4 | `completion('openai/gpt-4', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` | +| openai/gpt-4-32k | `completion('openai/gpt-4-32k', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` | +| anthropic/claude-2 | `completion('anthropic/claude-2', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` | +| anthropic/claude-instant-v1 | `completion('anthropic/claude-instant-v1', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` | +| google/palm-2-chat-bison | `completion('google/palm-2-chat-bison', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` | +| google/palm-2-codechat-bison | `completion('google/palm-2-codechat-bison', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` | +| meta-llama/llama-2-13b-chat | `completion('meta-llama/llama-2-13b-chat', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` | +| meta-llama/llama-2-70b-chat | `completion('meta-llama/llama-2-70b-chat', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` | \ No newline at end of file diff --git a/docs/my-website/docusaurus.config.js b/docs/my-website/docusaurus.config.js index e1e5cbb01..2a414618f 100644 --- a/docs/my-website/docusaurus.config.js +++ b/docs/my-website/docusaurus.config.js @@ -80,35 +80,27 @@ const config = { { title: 'Community', items: [ - { - label: 'Stack Overflow', - href: 'https://stackoverflow.com/questions/tagged/docusaurus', - }, { label: 'Discord', - href: 'https://discordapp.com/invite/docusaurus', + href: 'https://discord.com/invite/wuPM9dRgDw', }, { label: 'Twitter', - href: 'https://twitter.com/docusaurus', + href: 'https://twitter.com/LiteLLM, }, ], }, { title: 'More', items: [ - { - label: 'Blog', - to: '/blog', - }, { label: 'GitHub', - href: 'https://github.com/facebook/docusaurus', + href: 'https://github.com/BerriAI/litellm/', }, ], }, ], - copyright: `Copyright © ${new Date().getFullYear()} My Project, Inc. Built with Docusaurus.`, + copyright: `Copyright © ${new Date().getFullYear()} liteLLM`, }, prism: { theme: lightCodeTheme, diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 09978e440..ba82855d1 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -22,13 +22,18 @@ const sidebars = { { type: 'category', label: 'completion_function', - items: ['completion/input', 'completion/supported','completion/output'], + items: ['completion/input','completion/output'], }, { type: 'category', label: 'embedding_function', items: ['embedding/supported_embedding'], }, + { + type: 'category', + label: 'Supported Chat, Completion Models', + items: ['completion/supported'], + }, { type: 'category', label: 'Tutorials', diff --git a/litellm/__init__.py b/litellm/__init__.py index d0ef3d981..0fda1f351 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -17,6 +17,7 @@ vertex_project = None vertex_location = None caching = False hugging_api_token = None +togetherai_api_key = None model_cost = { "gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, "gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name diff --git a/litellm/main.py b/litellm/main.py index 7959a4306..af385b3c0 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -282,9 +282,9 @@ def completion( response = CustomStreamWrapper(model_response, model, custom_llm_provider="huggingface") return response response = model_response - elif custom_llm_provider == "together_ai": + elif custom_llm_provider == "together_ai" or ("togethercomputer" in model): import requests - TOGETHER_AI_TOKEN = get_secret("TOGETHER_AI_TOKEN") or get_secret("TOGETHERAI_API_KEY") + TOGETHER_AI_TOKEN = get_secret("TOGETHER_AI_TOKEN") or get_secret("TOGETHERAI_API_KEY") or api_key or litellm.togetherai_api_key headers = {"Authorization": f"Bearer {TOGETHER_AI_TOKEN}"} endpoint = 'https://api.together.xyz/inference' prompt = " ".join([message["content"] for message in messages]) # TODO: Add chat support for together AI diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 79e18a0f3..863dc7c45 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -232,7 +232,7 @@ def test_completion_replicate_stability(): def test_completion_together_ai(): model_name = "togethercomputer/llama-2-70b-chat" try: - response = completion(model=model_name, messages=messages, custom_llm_provider="together_ai") + response = completion(model=model_name, messages=messages) # Add any assertions here to check the response print(response) except Exception as e: