forked from phoenix/litellm-mirror
updates TG_AI
This commit is contained in:
parent
b077110da5
commit
ba6df5fbb9
6 changed files with 51 additions and 33 deletions
|
@ -1,4 +1,10 @@
|
|||
# Generation/Completion/Chat Completion Models
|
||||
# Supported Chat, Completion Models
|
||||
|
||||
## API Keys
|
||||
liteLLM reads keys set in the environment variables or your Key Manager
|
||||
liteLLM standardizes naming keys in the following format
|
||||
`PROVIDER_API_KEY` for example `OPENAI_API_KEY` or `TOGETHERAI_API_KEY` or `HUGGINGFACE_API_KEY`. In addition to this liteLLM also allows you to use the provider specificed naming conventions for keys
|
||||
Example Both `HF_TOKEN` and `HUGGINGFACE_API_KEY` will work for Hugging Face models
|
||||
|
||||
### OpenAI Chat Completion Models
|
||||
|
||||
|
@ -49,6 +55,7 @@ VertexAI requires you to set `application_default_credentials.json`, this can be
|
|||
| Model Name | Function Call | Required OS Variables |
|
||||
|------------------|--------------------------------------------|--------------------------------------|
|
||||
| claude-instant-1 | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
||||
| claude-instant-1.2 | `completion('claude-instant-1.2', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
||||
| claude-2 | `completion('claude-2', messages)` | `os.environ['ANTHROPIC_API_KEY']` |
|
||||
|
||||
### Hugging Face Inference API
|
||||
|
@ -64,10 +71,10 @@ Here are some examples of supported models:
|
|||
|
||||
| Model Name | Function Call | Required OS Variables |
|
||||
|------------------|-------------------------------------------------------------------------------------|--------------------------------------|
|
||||
| [stabilityai/stablecode-completion-alpha-3b-4k](https://huggingface.co/stabilityai/stablecode-completion-alpha-3b-4k) | `completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HF_TOKEN']` |
|
||||
| [bigcode/starcoder](https://huggingface.co/bigcode/starcoder) | `completion(model="bigcode/starcoder", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HF_TOKEN']` |
|
||||
| [google/flan-t5-xxl](https://huggingface.co/google/flan-t5-xxl) | `completion(model="google/flan-t5-xxl", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HF_TOKEN']` |
|
||||
| [google/flan-t5-large](https://huggingface.co/google/flan-t5-large) | `completion(model="google/flan-t5-large", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HF_TOKEN']` |
|
||||
| [stabilityai/stablecode-completion-alpha-3b-4k](https://huggingface.co/stabilityai/stablecode-completion-alpha-3b-4k) | `completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HUGGINGFACE_API_KEY']` |
|
||||
| [bigcode/starcoder](https://huggingface.co/bigcode/starcoder) | `completion(model="bigcode/starcoder", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HUGGINGFACE_API_KEY']` |
|
||||
| [google/flan-t5-xxl](https://huggingface.co/google/flan-t5-xxl) | `completion(model="google/flan-t5-xxl", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HUGGINGFACE_API_KEY']` |
|
||||
| [google/flan-t5-large](https://huggingface.co/google/flan-t5-large) | `completion(model="google/flan-t5-large", messages=messages, custom_llm_provider="huggingface")` | `os.environ['HUGGINGFACE_API_KEY']` |
|
||||
### AI21 Models
|
||||
|
||||
| Model Name | Function Call | Required OS Variables |
|
||||
|
@ -82,10 +89,23 @@ Here are some examples of supported models:
|
|||
|------------------|--------------------------------------------|--------------------------------------|
|
||||
| command-nightly | `completion('command-nightly', messages)` | `os.environ['COHERE_API_KEY']` |
|
||||
|
||||
### BaseTen Models
|
||||
### Together AI Models
|
||||
liteLLM supports `non-streaming` and `streaming` requests to all models on https://api.together.xyz/
|
||||
|
||||
# Example TogetherAI Usage - Note: liteLLM supports all models deployed on TogetherAI
|
||||
| Model Name | Function Call | Required OS Variables |
|
||||
|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
|
||||
| togethercomputer/llama-2-70b-chat | `completion('togethercomputer/llama-2-70b-chat', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||
| togethercomputer/LLaMA-2-13b-chat | `completion('togethercomputer/LLaMA-2-13b-chat', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||
| togethercomputer/code-and-talk-v1 | `completion('togethercomputer/code-and-talk-v1', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||
| togethercomputer/creative-v1 | `completion('togethercomputer/creative-v1', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||
| togethercomputer/yourmodel | `completion('togethercomputer/yourmodel', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||
|
||||
|
||||
### Baseten Models
|
||||
Baseten provides infrastructure to deploy and serve ML models https://www.baseten.co/. Use liteLLM to easily call models deployed on Baseten.
|
||||
|
||||
|
||||
# Example Baseten Usage - Note: liteLLM supports all models deployed on Basten
|
||||
| Model Name | Function Call | Required OS Variables |
|
||||
|------------------|--------------------------------------------|------------------------------------|
|
||||
| Falcon 7B | `completion(model='<your model version id>', messages=messages, custom_llm_provider="baseten")` | `os.environ['BASETEN_API_KEY']` |
|
||||
|
@ -99,13 +119,13 @@ All the text models from [OpenRouter](https://openrouter.ai/docs) are supported
|
|||
|
||||
| Model Name | Function Call | Required OS Variables |
|
||||
|------------------|--------------------------------------------|--------------------------------------|
|
||||
| openai/gpt-3.5-turbo | `completion('openai/gpt-3.5-turbo', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
|
||||
| openai/gpt-3.5-turbo-16k | `completion('openai/gpt-3.5-turbo-16k', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
|
||||
| openai/gpt-4 | `completion('openai/gpt-4', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
|
||||
| openai/gpt-4-32k | `completion('openai/gpt-4-32k', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
|
||||
| anthropic/claude-2 | `completion('anthropic/claude-2', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
|
||||
| anthropic/claude-instant-v1 | `completion('anthropic/claude-instant-v1', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
|
||||
| google/palm-2-chat-bison | `completion('google/palm-2-chat-bison', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
|
||||
| google/palm-2-codechat-bison | `completion('google/palm-2-codechat-bison', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
|
||||
| meta-llama/llama-2-13b-chat | `completion('meta-llama/llama-2-13b-chat', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
|
||||
| meta-llama/llama-2-70b-chat | `completion('meta-llama/llama-2-70b-chat', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OR_API_KEY']` |
|
||||
| openai/gpt-3.5-turbo | `completion('openai/gpt-3.5-turbo', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
|
||||
| openai/gpt-3.5-turbo-16k | `completion('openai/gpt-3.5-turbo-16k', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
|
||||
| openai/gpt-4 | `completion('openai/gpt-4', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
|
||||
| openai/gpt-4-32k | `completion('openai/gpt-4-32k', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
|
||||
| anthropic/claude-2 | `completion('anthropic/claude-2', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
|
||||
| anthropic/claude-instant-v1 | `completion('anthropic/claude-instant-v1', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
|
||||
| google/palm-2-chat-bison | `completion('google/palm-2-chat-bison', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
|
||||
| google/palm-2-codechat-bison | `completion('google/palm-2-codechat-bison', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
|
||||
| meta-llama/llama-2-13b-chat | `completion('meta-llama/llama-2-13b-chat', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
|
||||
| meta-llama/llama-2-70b-chat | `completion('meta-llama/llama-2-70b-chat', messages)` | `os.environ['OR_SITE_URL']`,`os.environ['OR_APP_NAME']`,`os.environ['OPENROUTER_API_KEY']` |
|
|
@ -80,35 +80,27 @@ const config = {
|
|||
{
|
||||
title: 'Community',
|
||||
items: [
|
||||
{
|
||||
label: 'Stack Overflow',
|
||||
href: 'https://stackoverflow.com/questions/tagged/docusaurus',
|
||||
},
|
||||
{
|
||||
label: 'Discord',
|
||||
href: 'https://discordapp.com/invite/docusaurus',
|
||||
href: 'https://discord.com/invite/wuPM9dRgDw',
|
||||
},
|
||||
{
|
||||
label: 'Twitter',
|
||||
href: 'https://twitter.com/docusaurus',
|
||||
href: 'https://twitter.com/LiteLLM,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
title: 'More',
|
||||
items: [
|
||||
{
|
||||
label: 'Blog',
|
||||
to: '/blog',
|
||||
},
|
||||
{
|
||||
label: 'GitHub',
|
||||
href: 'https://github.com/facebook/docusaurus',
|
||||
href: 'https://github.com/BerriAI/litellm/',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
copyright: `Copyright © ${new Date().getFullYear()} My Project, Inc. Built with Docusaurus.`,
|
||||
copyright: `Copyright © ${new Date().getFullYear()} liteLLM`,
|
||||
},
|
||||
prism: {
|
||||
theme: lightCodeTheme,
|
||||
|
|
|
@ -22,13 +22,18 @@ const sidebars = {
|
|||
{
|
||||
type: 'category',
|
||||
label: 'completion_function',
|
||||
items: ['completion/input', 'completion/supported','completion/output'],
|
||||
items: ['completion/input','completion/output'],
|
||||
},
|
||||
{
|
||||
type: 'category',
|
||||
label: 'embedding_function',
|
||||
items: ['embedding/supported_embedding'],
|
||||
},
|
||||
{
|
||||
type: 'category',
|
||||
label: 'Supported Chat, Completion Models',
|
||||
items: ['completion/supported'],
|
||||
},
|
||||
{
|
||||
type: 'category',
|
||||
label: 'Tutorials',
|
||||
|
|
|
@ -17,6 +17,7 @@ vertex_project = None
|
|||
vertex_location = None
|
||||
caching = False
|
||||
hugging_api_token = None
|
||||
togetherai_api_key = None
|
||||
model_cost = {
|
||||
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
||||
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
|
||||
|
|
|
@ -282,9 +282,9 @@ def completion(
|
|||
response = CustomStreamWrapper(model_response, model, custom_llm_provider="huggingface")
|
||||
return response
|
||||
response = model_response
|
||||
elif custom_llm_provider == "together_ai":
|
||||
elif custom_llm_provider == "together_ai" or ("togethercomputer" in model):
|
||||
import requests
|
||||
TOGETHER_AI_TOKEN = get_secret("TOGETHER_AI_TOKEN") or get_secret("TOGETHERAI_API_KEY")
|
||||
TOGETHER_AI_TOKEN = get_secret("TOGETHER_AI_TOKEN") or get_secret("TOGETHERAI_API_KEY") or api_key or litellm.togetherai_api_key
|
||||
headers = {"Authorization": f"Bearer {TOGETHER_AI_TOKEN}"}
|
||||
endpoint = 'https://api.together.xyz/inference'
|
||||
prompt = " ".join([message["content"] for message in messages]) # TODO: Add chat support for together AI
|
||||
|
|
|
@ -232,7 +232,7 @@ def test_completion_replicate_stability():
|
|||
def test_completion_together_ai():
|
||||
model_name = "togethercomputer/llama-2-70b-chat"
|
||||
try:
|
||||
response = completion(model=model_name, messages=messages, custom_llm_provider="together_ai")
|
||||
response = completion(model=model_name, messages=messages)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
except Exception as e:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue