(feat) add XAI ChatCompletion Support (#6373)

* init commit for XAI * add full logic for xai chat completion * test_completion_xai * docs xAI * add xai/grok-beta * test_xai_chat_config_get_openai_compatible_provider_info * test_xai_chat_config_map_openai_params * add xai streaming test
2024-11-01 20:37:09 +05:30 · 2024-11-01 20:37:09 +05:30 · 5652c375b3
commit 5652c375b3
parent 9545b0e5cd
9 changed files with 400 additions and 0 deletions
--- a/docs/my-website/docs/providers/xai.md
+++ b/docs/my-website/docs/providers/xai.md
@ -0,0 +1,146 @@
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 # XAI
 https://docs.x.ai/docs
 :::tip
 **We support ALL XAI models, just set `model=xai/<any-model-on-xai>` as a prefix when sending litellm requests**
 :::
 ## API Key
 ```python
 # env variable
 os.environ['XAI_API_KEY']
 ```
 ## Sample Usage
 ```python
 from litellm import completion
 import os
 os.environ['XAI_API_KEY'] = ""
 response = completion(
    model="xai/grok-beta",
    messages=[
        {
            "role": "user",
            "content": "What's the weather like in Boston today in Fahrenheit?",
        }
    ],
    max_tokens=10,
    response_format={ "type": "json_object" },
    seed=123,
    stop=["\n\n"],
    temperature=0.2,
    top_p=0.9,
    tool_choice="auto",
    tools=[],
    user="user",
 )
 print(response)
 ```
 ## Sample Usage - Streaming
 ```python
 from litellm import completion
 import os
 os.environ['XAI_API_KEY'] = ""
 response = completion(
    model="xai/grok-beta",
    messages=[
        {
            "role": "user",
            "content": "What's the weather like in Boston today in Fahrenheit?",
        }
    ],
    stream=True,
    max_tokens=10,
    response_format={ "type": "json_object" },
    seed=123,
    stop=["\n\n"],
    temperature=0.2,
    top_p=0.9,
    tool_choice="auto",
    tools=[],
    user="user",
 )
 for chunk in response:
    print(chunk)
 ```
 ## Usage with LiteLLM Proxy Server
 Here's how to call a XAI model with the LiteLLM Proxy Server
 1. Modify the config.yaml 
  ```yaml
  model_list:
    - model_name: my-model
      litellm_params:
        model: xai/<your-model-name>  # add xai/ prefix to route as XAI provider
        api_key: api-key                 # api key to send your model
  ```
 2. Start the proxy 
  ```bash
  $ litellm --config /path/to/config.yaml
  ```
 3. Send Request to LiteLLM Proxy Server
  <Tabs>
  <TabItem value="openai" label="OpenAI Python v1.0.0+">
  ```python
  import openai
  client = openai.OpenAI(
      api_key="sk-1234",             # pass litellm proxy key, if you're using virtual keys
      base_url="http://0.0.0.0:4000" # litellm-proxy-base url
  )
  response = client.chat.completions.create(
      model="my-model",
      messages = [
          {
              "role": "user",
              "content": "what llm are you"
          }
      ],
  )
  print(response)
  ```
  </TabItem>
  <TabItem value="curl" label="curl">
  ```shell
  curl --location 'http://0.0.0.0:4000/chat/completions' \
      --header 'Authorization: Bearer sk-1234' \
      --header 'Content-Type: application/json' \
      --data '{
      "model": "my-model",
      "messages": [
          {
          "role": "user",
          "content": "what llm are you"
          }
      ],
  }'
  ```
  </TabItem>
  </Tabs>
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -155,6 +155,7 @@ const sidebars = {
        "providers/watsonx",
        "providers/predibase",
        "providers/nvidia_nim", 
        "providers/xai",
        "providers/cerebras", 
        "providers/volcano", 
        "providers/triton-inference-server",
--- a/litellm/init.py
+++ b/litellm/init.py
@ -490,6 +490,7 @@ openai_compatible_endpoints: List = [
    "app.empower.dev/api/v1",
    "inference.friendli.ai/v1",
    "api.sambanova.ai/v1",
    "api.x.ai/v1",
 ]
 # this is maintained for Exception Mapping
@ -507,6 +508,7 @@ openai_compatible_providers: List = [
    "deepinfra",
    "perplexity",
    "xinference",
    "xai",
    "together_ai",
    "fireworks_ai",
    "empower",
@ -717,6 +719,7 @@ class LlmProviders(str, Enum):
    OPENAI = "openai"
    OPENAI_LIKE = "openai_like"  # embedding only
    JINA_AI = "jina_ai"
    XAI = "xai"
    CUSTOM_OPENAI = "custom_openai"
    TEXT_COMPLETION_OPENAI = "text-completion-openai"
    COHERE = "cohere"
@ -1021,6 +1024,7 @@ from .llms.fireworks_ai.embed.fireworks_ai_transformation import (
    FireworksAIEmbeddingConfig,
 )
 from .llms.jina_ai.embedding.transformation import JinaAIEmbeddingConfig
 from .llms.xai.chat.xai_transformation import XAIChatConfig
 from .llms.volcengine import VolcEngineConfig
 from .llms.text_completion_codestral import MistralTextCompletionConfig
 from .llms.AzureOpenAI.azure import (
--- a/litellm/litellm_core_utils/get_llm_provider_logic.py
+++ b/litellm/litellm_core_utils/get_llm_provider_logic.py
@ -480,6 +480,13 @@ def _get_openai_compatible_provider_info(  # noqa: PLR0915
        ) = litellm.JinaAIEmbeddingConfig()._get_openai_compatible_provider_info(
            api_base, api_key
        )
    elif custom_llm_provider == "xai":
        (
            api_base,
            dynamic_api_key,
        ) = litellm.XAIChatConfig()._get_openai_compatible_provider_info(
            api_base, api_key
        )
    elif custom_llm_provider == "voyage":
        # voyage is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.voyageai.com/v1
        api_base = (
--- a/litellm/llms/xai/chat/xai_transformation.py
+++ b/litellm/llms/xai/chat/xai_transformation.py
@ -0,0 +1,56 @@
 import types
 from typing import Literal, Optional, Tuple, Union
 from litellm.secret_managers.main import get_secret_str
 from ...OpenAI.chat.gpt_transformation import OpenAIGPTConfig
 XAI_API_BASE = "https://api.x.ai/v1"
 class XAIChatConfig(OpenAIGPTConfig):
    def _get_openai_compatible_provider_info(
        self, api_base: Optional[str], api_key: Optional[str]
    ) -> Tuple[Optional[str], Optional[str]]:
        api_base = api_base or get_secret_str("XAI_API_BASE") or XAI_API_BASE  # type: ignore
        dynamic_api_key = api_key or get_secret_str("XAI_API_KEY")
        return api_base, dynamic_api_key
    def get_supported_openai_params(self, model: str) -> list:
        return [
            "frequency_penalty",
            "logit_bias",
            "logprobs",
            "max_tokens",
            "messages",
            "model",
            "n",
            "presence_penalty",
            "response_format",
            "seed",
            "stop",
            "stream",
            "stream_options",
            "temperature",
            "tool_choice",
            "tools",
            "top_logprobs",
            "top_p",
            "user",
        ]
    def map_openai_params(
        self,
        non_default_params: dict,
        optional_params: dict,
        model: str,
        drop_params: bool = False,
    ) -> dict:
        supported_openai_params = self.get_supported_openai_params(model=model)
        for param, value in non_default_params.items():
            if param == "max_completion_tokens":
                optional_params["max_tokens"] = value
            elif param in supported_openai_params:
                if value is not None:
                    optional_params[param] = value
        return optional_params
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -1502,6 +1502,17 @@
        "mode": "completion",
        "source": "https://docs.mistral.ai/capabilities/code_generation/"
    },
    "xai/grok-beta": {
        "max_tokens": 131072,
        "max_input_tokens": 131072,
        "max_output_tokens": 131072,
        "input_cost_per_token": 0.000005,
        "output_cost_per_token": 0.000015,
        "litellm_provider": "xai",
        "mode": "chat",
        "supports_function_calling": true,
        "supports_vision": true
    },
    "deepseek-coder": {
        "max_tokens": 4096,
        "max_input_tokens": 128000,
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -2680,6 +2680,7 @@ def get_optional_params(  # noqa: PLR0915
            and custom_llm_provider != "groq"
            and custom_llm_provider != "nvidia_nim"
            and custom_llm_provider != "cerebras"
            and custom_llm_provider != "xai"
            and custom_llm_provider != "ai21_chat"
            and custom_llm_provider != "volcengine"
            and custom_llm_provider != "deepseek"
@ -3456,6 +3457,16 @@ def get_optional_params(  # noqa: PLR0915
            optional_params=optional_params,
            model=model,
        )
    elif custom_llm_provider == "xai":
        supported_params = get_supported_openai_params(
            model=model, custom_llm_provider=custom_llm_provider
        )
        _check_valid_arg(supported_params=supported_params)
        optional_params = litellm.XAIChatConfig().map_openai_params(
            model=model,
            non_default_params=non_default_params,
            optional_params=optional_params,
        )
    elif custom_llm_provider == "ai21_chat":
        supported_params = get_supported_openai_params(
            model=model, custom_llm_provider=custom_llm_provider
@ -4184,6 +4195,8 @@ def get_supported_openai_params(  # noqa: PLR0915
            return litellm.nvidiaNimEmbeddingConfig.get_supported_openai_params()
    elif custom_llm_provider == "cerebras":
        return litellm.CerebrasConfig().get_supported_openai_params(model=model)
    elif custom_llm_provider == "xai":
        return litellm.XAIChatConfig().get_supported_openai_params(model=model)
    elif custom_llm_provider == "ai21_chat":
        return litellm.AI21ChatConfig().get_supported_openai_params(model=model)
    elif custom_llm_provider == "volcengine":
@ -5344,6 +5357,11 @@ def validate_environment(  # noqa: PLR0915
                keys_in_environment = True
            else:
                missing_keys.append("CEREBRAS_API_KEY")
        elif custom_llm_provider == "xai":
            if "XAI_API_KEY" in os.environ:
                keys_in_environment = True
            else:
                missing_keys.append("XAI_API_KEY")
        elif custom_llm_provider == "ai21_chat":
            if "AI21_API_KEY" in os.environ:
                keys_in_environment = True
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -1502,6 +1502,17 @@
        "mode": "completion",
        "source": "https://docs.mistral.ai/capabilities/code_generation/"
    },
    "xai/grok-beta": {
        "max_tokens": 131072,
        "max_input_tokens": 131072,
        "max_output_tokens": 131072,
        "input_cost_per_token": 0.000005,
        "output_cost_per_token": 0.000015,
        "litellm_provider": "xai",
        "mode": "chat",
        "supports_function_calling": true,
        "supports_vision": true
    },
    "deepseek-coder": {
        "max_tokens": 4096,
        "max_input_tokens": 128000,
--- a/tests/llm_translation/test_xai.py
+++ b/tests/llm_translation/test_xai.py
@ -0,0 +1,146 @@
 import json
 import os
 import sys
 from datetime import datetime
 from unittest.mock import AsyncMock
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import httpx
 import pytest
 from respx import MockRouter
 import litellm
 from litellm import Choices, Message, ModelResponse, EmbeddingResponse, Usage
 from litellm import completion
 from unittest.mock import patch
 from litellm.llms.xai.chat.xai_transformation import XAIChatConfig, XAI_API_BASE
 def test_xai_chat_config_get_openai_compatible_provider_info():
    config = XAIChatConfig()
    # Test with default values
    api_base, api_key = config._get_openai_compatible_provider_info(
        api_base=None, api_key=None
    )
    assert api_base == XAI_API_BASE
    assert api_key == os.environ.get("XAI_API_KEY")
    # Test with custom API key
    custom_api_key = "test_api_key"
    api_base, api_key = config._get_openai_compatible_provider_info(
        api_base=None, api_key=custom_api_key
    )
    assert api_base == XAI_API_BASE
    assert api_key == custom_api_key
    # Test with custom environment variables for api_base and api_key
    with patch.dict(
        "os.environ",
        {"XAI_API_BASE": "https://env.x.ai/v1", "XAI_API_KEY": "env_api_key"},
    ):
        api_base, api_key = config._get_openai_compatible_provider_info(None, None)
        assert api_base == "https://env.x.ai/v1"
        assert api_key == "env_api_key"
 def test_xai_chat_config_map_openai_params():
    """
    XAI is OpenAI compatible*
    Does not support all OpenAI parameters:
    - max_completion_tokens -> max_tokens
    """
    config = XAIChatConfig()
    # Test mapping of parameters
    non_default_params = {
        "max_completion_tokens": 100,
        "frequency_penalty": 0.5,
        "logit_bias": {"50256": -100},
        "logprobs": 5,
        "messages": [{"role": "user", "content": "Hello"}],
        "model": "xai/grok-beta",
        "n": 2,
        "presence_penalty": 0.2,
        "response_format": {"type": "json_object"},
        "seed": 42,
        "stop": ["END"],
        "stream": True,
        "stream_options": {},
        "temperature": 0.7,
        "tool_choice": "auto",
        "tools": [{"type": "function", "function": {"name": "get_weather"}}],
        "top_logprobs": 3,
        "top_p": 0.9,
        "user": "test_user",
        "unsupported_param": "value",
    }
    optional_params = {}
    model = "xai/grok-beta"
    result = config.map_openai_params(non_default_params, optional_params, model)
    # Assert all supported parameters are present in the result
    assert result["max_tokens"] == 100  # max_completion_tokens -> max_tokens
    assert result["frequency_penalty"] == 0.5
    assert result["logit_bias"] == {"50256": -100}
    assert result["logprobs"] == 5
    assert result["messages"] == [{"role": "user", "content": "Hello"}]
    assert result["model"] == "xai/grok-beta"
    assert result["n"] == 2
    assert result["presence_penalty"] == 0.2
    assert result["response_format"] == {"type": "json_object"}
    assert result["seed"] == 42
    assert result["stop"] == ["END"]
    assert result["stream"] is True
    assert result["stream_options"] == {}
    assert result["temperature"] == 0.7
    assert result["tool_choice"] == "auto"
    assert result["tools"] == [
        {"type": "function", "function": {"name": "get_weather"}}
    ]
    assert result["top_logprobs"] == 3
    assert result["top_p"] == 0.9
    assert result["user"] == "test_user"
    # Assert unsupported parameter is not in the result
    assert "unsupported_param" not in result
@pytest.mark.parametrize("stream", [False, True])
 def test_completion_xai(stream):
    try:
        litellm.set_verbose = True
        messages = [
            {"role": "system", "content": "You're a good bot"},
            {
                "role": "user",
                "content": "Hey",
            },
        ]
        response = completion(
            model="xai/grok-beta",
            messages=messages,
            stream=stream,
        )
        print(response)
        if stream is True:
            for chunk in response:
                print(chunk)
                assert chunk is not None
                assert isinstance(chunk, litellm.ModelResponse)
                assert isinstance(chunk.choices[0], litellm.utils.StreamingChoices)
        else:
            assert response is not None
            assert isinstance(response, litellm.ModelResponse)
            assert response.choices[0].message.content is not None
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")