Merge pull request #5079 from BerriAI/litellm_add_pydantic_model_support

feat(utils.py): support passing response_format as pydantic model
2025-04-25 18:54:30 +00:00 · 2024-08-07 14:43:05 -07:00 · 2024-08-07 14:43:05 -07:00 · 2e434d56e3
commit 2e434d56e3
parent 1bf1185506 55ad5bb7c6
10 changed files with 353 additions and 100 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -47,7 +47,7 @@ jobs:
            pip install opentelemetry-api==1.25.0
            pip install opentelemetry-sdk==1.25.0
            pip install opentelemetry-exporter-otlp==1.25.0
-            pip install openai==1.34.0
+            pip install openai==1.40.0
            pip install prisma==0.11.0   
            pip install "detect_secrets==1.5.0"         
            pip install "httpx==0.24.1"
@ -165,7 +165,7 @@ jobs:
            pip install "pytest==7.3.1"
            pip install "pytest-asyncio==0.21.1"
            pip install aiohttp
-            pip install openai
+            pip install "openai==1.40.0"
            python -m pip install --upgrade pip
            python -m pip install -r .circleci/requirements.txt
            pip install "pytest==7.3.1"
--- a/docs/my-website/docs/completion/json_mode.md
+++ b/docs/my-website/docs/completion/json_mode.md
@ -69,13 +69,10 @@ To use Structured Outputs, simply specify
 response_format: { "type": "json_schema", "json_schema": … , "strict": true }
 ```

-Works for OpenAI models 
-
-:::info
-
-Support for passing in a pydantic object to litellm sdk will be [coming soon](https://github.com/BerriAI/litellm/issues/5074#issuecomment-2272355842)
-
-:::
+Works for:
+- OpenAI models 
+- Google AI Studio - Gemini models
+- Vertex AI models (Gemini + Anthropic)

 <Tabs>
 <TabItem value="sdk" label="SDK">
@ -89,36 +86,15 @@ os.environ["OPENAI_API_KEY"] = ""

 messages = [{"role": "user", "content": "List 5 cookie recipes"}]

+class CalendarEvent(BaseModel):
+  name: str
+  date: str
+  participants: list[str]
+
 resp = completion(
    model="gpt-4o-2024-08-06",
    messages=messages,
-    response_format={
-        "type": "json_schema",
-        "json_schema": {
-          "name": "math_reasoning",
-          "schema": {
-            "type": "object",
-            "properties": {
-              "steps": {
-                "type": "array",
-                "items": {
-                  "type": "object",
-                  "properties": {
-                    "explanation": { "type": "string" },
-                    "output": { "type": "string" }
-                  },
-                  "required": ["explanation", "output"],
-                  "additionalProperties": False
-                }
-              },
-              "final_answer": { "type": "string" }
-            },
-            "required": ["steps", "final_answer"],
-            "additionalProperties": False
-          },
-          "strict": True
-        },
-    }
+    response_format=CalendarEvent
 )

 print("Received={}".format(resp))
@ -229,15 +205,15 @@ curl -X POST 'http://0.0.0.0:4000/v1/chat/completions' \

 ## Validate JSON Schema 

-:::info

-Support for doing this in the openai 'json_schema' format will be [coming soon](https://github.com/BerriAI/litellm/issues/5074#issuecomment-2272355842)
+Not all vertex models support passing the json_schema to them (e.g. `gemini-1.5-flash`). To solve this, LiteLLM supports client-side validation of the json schema. 

-:::
+```
+litellm.enable_json_schema_validation=True
+```
+If `litellm.enable_json_schema_validation=True` is set, LiteLLM will validate the json response using `jsonvalidator`. 

-For VertexAI models, LiteLLM supports passing the `response_schema` and validating the JSON output.
-
-This works across Gemini (`vertex_ai_beta/`) + Anthropic (`vertex_ai/`) models. 
+[**See Code**](https://github.com/BerriAI/litellm/blob/671d8ac496b6229970c7f2a3bdedd6cb84f0746b/litellm/litellm_core_utils/json_validation_rule.py#L4)


 <Tabs>
@ -245,33 +221,28 @@ This works across Gemini (`vertex_ai_beta/`) + Anthropic (`vertex_ai/`) models.

 ```python
 # !gcloud auth application-default login - run this to add vertex credentials to your env
-
+import litellm, os
 from litellm import completion 
+from pydantic import BaseModel 

-messages = [{"role": "user", "content": "List 5 cookie recipes"}]

-response_schema = {
-    "type": "array",
-    "items": {
-        "type": "object",
-        "properties": {
-            "recipe_name": {
-                "type": "string",
-            },
-        },
-        "required": ["recipe_name"],
-    },
-}
+messages=[
+        {"role": "system", "content": "Extract the event information."},
+        {"role": "user", "content": "Alice and Bob are going to a science fair on Friday."},
+    ]
+
+litellm.enable_json_schema_validation = True
+litellm.set_verbose = True # see the raw request made by litellm
+
+class CalendarEvent(BaseModel):
+  name: str
+  date: str
+  participants: list[str]

 resp = completion(
-    model="vertex_ai_beta/gemini-1.5-pro",
+    model="gemini/gemini-1.5-pro",
    messages=messages,
-    response_format={
-        "type": "json_object",
-        "response_schema": response_schema,
-        "enforce_validation": True, # client-side json schema validation
-    },
-    vertex_location="us-east5",
+    response_format=CalendarEvent,
 )

 print("Received={}".format(resp))
@ -279,26 +250,63 @@ print("Received={}".format(resp))
 </TabItem>
 <TabItem value="proxy" label="PROXY">

+1. Create config.yaml
+```yaml
+model_list:
+  - model_name: "gemini-1.5-flash"
+    litellm_params:
+      model: "gemini/gemini-1.5-flash"
+      api_key: os.environ/GEMINI_API_KEY
+
+litellm_settings:
+  enable_json_schema_validation: True
+```
+
+2. Start proxy 
+
+```bash
+litellm --config /path/to/config.yaml
+```
+
+3. Test it! 
+
 ```bash
 curl http://0.0.0.0:4000/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer $LITELLM_API_KEY" \
  -d '{
-    "model": "vertex_ai_beta/gemini-1.5-pro",
-    "messages": [{"role": "user", "content": "List 5 cookie recipes"}]
+    "model": "gemini-1.5-flash",
+    "messages": [
+        {"role": "system", "content": "Extract the event information."},
+        {"role": "user", "content": "Alice and Bob are going to a science fair on Friday."},
+    ],
    "response_format": { 
        "type": "json_object",
-        "enforce_validation: true, 
        "response_schema": { 
-            "type": "array",
-            "items": {
+            "type": "json_schema",
+            "json_schema": {
+              "name": "math_reasoning",
+              "schema": {
                "type": "object",
                "properties": {
-                    "recipe_name": {
-                        "type": "string",
-                    },
+                  "steps": {
+                    "type": "array",
+                    "items": {
+                      "type": "object",
+                      "properties": {
+                        "explanation": { "type": "string" },
+                        "output": { "type": "string" }
+                      },
+                      "required": ["explanation", "output"],
+                      "additionalProperties": false
+                    }
+                  },
+                  "final_answer": { "type": "string" }
                },
-                "required": ["recipe_name"],
+                "required": ["steps", "final_answer"],
+                "additionalProperties": false
+              },
+              "strict": true
            },
        }
    },
--- a/litellm/init.py
+++ b/litellm/init.py
@ -144,6 +144,7 @@ enable_preview_features: bool = False
 return_response_headers: bool = (
    False  # get response headers from LLM Api providers - example x-remaining-requests,
 )
+enable_json_schema_validation: bool = False
 ##################
 logging: bool = True
 enable_caching_on_provider_specific_optional_params: bool = (
--- a/litellm/llms/vertex_ai_anthropic.py
+++ b/litellm/llms/vertex_ai_anthropic.py
@ -148,7 +148,12 @@ class VertexAIAnthropicConfig:
                optional_params["temperature"] = value
            if param == "top_p":
                optional_params["top_p"] = value
-            if param == "response_format" and "response_schema" in value:
+            if param == "response_format" and isinstance(value, dict):
+                json_schema: Optional[dict] = None
+                if "response_schema" in value:
+                    json_schema = value["response_schema"]
+                elif "json_schema" in value:
+                    json_schema = value["json_schema"]["schema"]
                """
                When using tools in this way: - https://docs.anthropic.com/en/docs/build-with-claude/tool-use#json-mode
                - You usually want to provide a single tool
@ -162,7 +167,7 @@ class VertexAIAnthropicConfig:
                    name="json_tool_call",
                    input_schema={
                        "type": "object",
-                        "properties": {"values": value["response_schema"]},  # type: ignore
+                        "properties": {"values": json_schema},  # type: ignore
                    },
                )

--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@ -181,13 +181,17 @@ class GoogleAIStudioGeminiConfig:  # key diff from VertexAI - 'frequency_penalty
                    optional_params["stop_sequences"] = value
            if param == "max_tokens":
                optional_params["max_output_tokens"] = value
-            if param == "response_format" and value["type"] == "json_object":  # type: ignore
+            if param == "response_format":  # type: ignore
                if value["type"] == "json_object":  # type: ignore
-                    optional_params["response_mime_type"] = "application/json"
-                elif value["type"] == "text":  # type: ignore
-                    optional_params["response_mime_type"] = "text/plain"
-                if "response_schema" in value:  # type: ignore
-                    optional_params["response_schema"] = value["response_schema"]  # type: ignore
+                    if value["type"] == "json_object":  # type: ignore
+                        optional_params["response_mime_type"] = "application/json"
+                    elif value["type"] == "text":  # type: ignore
+                        optional_params["response_mime_type"] = "text/plain"
+                    if "response_schema" in value:  # type: ignore
+                        optional_params["response_schema"] = value["response_schema"]  # type: ignore
+                elif value["type"] == "json_schema":  # type: ignore
+                    if "json_schema" in value and "schema" in value["json_schema"]:  # type: ignore
+                        optional_params["response_schema"] = value["json_schema"]["schema"]  # type: ignore
            if param == "tools" and isinstance(value, list):
                gtool_func_declarations = []
                for tool in value:
@ -396,6 +400,9 @@ class VertexGeminiConfig:
                    optional_params["response_mime_type"] = "text/plain"
                if "response_schema" in value:
                    optional_params["response_schema"] = value["response_schema"]
+                elif value["type"] == "json_schema":  # type: ignore
+                    if "json_schema" in value and "schema" in value["json_schema"]:  # type: ignore
+                        optional_params["response_schema"] = value["json_schema"]["schema"]  # type: ignore
            if param == "frequency_penalty":
                optional_params["frequency_penalty"] = value
            if param == "presence_penalty":
--- a/litellm/main.py
+++ b/litellm/main.py
@ -31,6 +31,7 @@ from typing import (
    Literal,
    Mapping,
    Optional,
+    Type,
    Union,
 )

@ -608,7 +609,7 @@ def completion(
    logit_bias: Optional[dict] = None,
    user: Optional[str] = None,
    # openai v1.0+ new params
-    response_format: Optional[dict] = None,
+    response_format: Optional[Union[dict, Type[BaseModel]]] = None,
    seed: Optional[int] = None,
    tools: Optional[List] = None,
    tool_choice: Optional[Union[str, dict]] = None,
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -11,4 +11,5 @@ model_list:
      model: "gpt-4o"

 litellm_settings:
+  enable_json_schema_validation: true
  fallbacks: [{"gpt-3.5-turbo": ["gpt-4", "gpt-4o"]}]
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@ -1192,7 +1192,15 @@ def vertex_httpx_mock_post_valid_response(*args, **kwargs):
                    "role": "model",
                    "parts": [
                        {
-                            "text": '[{"recipe_name": "Chocolate Chip Cookies"}, {"recipe_name": "Oatmeal Raisin Cookies"}, {"recipe_name": "Peanut Butter Cookies"}, {"recipe_name": "Sugar Cookies"}, {"recipe_name": "Snickerdoodles"}]\n'
+                            "text": """{
+                            "recipes": [
+                                {"recipe_name": "Chocolate Chip Cookies"},
+                                {"recipe_name": "Oatmeal Raisin Cookies"},
+                                {"recipe_name": "Peanut Butter Cookies"},
+                                {"recipe_name": "Sugar Cookies"},
+                                {"recipe_name": "Snickerdoodles"}
+                            ]
+                            }"""
                        }
                    ],
                },
@ -1253,13 +1261,15 @@ def vertex_httpx_mock_post_valid_response_anthropic(*args, **kwargs):
                "id": "toolu_vrtx_01YMnYZrToPPfcmY2myP2gEB",
                "name": "json_tool_call",
                "input": {
-                    "values": [
-                        {"recipe_name": "Chocolate Chip Cookies"},
-                        {"recipe_name": "Oatmeal Raisin Cookies"},
-                        {"recipe_name": "Peanut Butter Cookies"},
-                        {"recipe_name": "Snickerdoodle Cookies"},
-                        {"recipe_name": "Sugar Cookies"},
-                    ]
+                    "values": {
+                        "recipes": [
+                            {"recipe_name": "Chocolate Chip Cookies"},
+                            {"recipe_name": "Oatmeal Raisin Cookies"},
+                            {"recipe_name": "Peanut Butter Cookies"},
+                            {"recipe_name": "Snickerdoodle Cookies"},
+                            {"recipe_name": "Sugar Cookies"},
+                        ]
+                    }
                },
            }
        ],
@ -1377,16 +1387,19 @@ async def test_gemini_pro_json_schema_args_sent_httpx(
    from litellm.llms.custom_httpx.http_handler import HTTPHandler

    response_schema = {
-        "type": "array",
-        "items": {
-            "type": "object",
-            "properties": {
-                "recipe_name": {
-                    "type": "string",
+        "type": "object",
+        "properties": {
+            "recipes": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {"recipe_name": {"type": "string"}},
+                    "required": ["recipe_name"],
                },
-            },
-            "required": ["recipe_name"],
+            }
        },
+        "required": ["recipes"],
+        "additionalProperties": False,
    }

    client = HTTPHandler()
@ -1448,6 +1461,105 @@ async def test_gemini_pro_json_schema_args_sent_httpx(
                )


+@pytest.mark.parametrize(
+    "model, vertex_location, supports_response_schema",
+    [
+        ("vertex_ai_beta/gemini-1.5-pro-001", "us-central1", True),
+        ("gemini/gemini-1.5-pro", None, True),
+        ("vertex_ai_beta/gemini-1.5-flash", "us-central1", False),
+        ("vertex_ai/claude-3-5-sonnet@20240620", "us-east5", False),
+    ],
+)
+@pytest.mark.parametrize(
+    "invalid_response",
+    [True, False],
+)
+@pytest.mark.parametrize(
+    "enforce_validation",
+    [True, False],
+)
+@pytest.mark.asyncio
+async def test_gemini_pro_json_schema_args_sent_httpx_openai_schema(
+    model,
+    supports_response_schema,
+    vertex_location,
+    invalid_response,
+    enforce_validation,
+):
+    from typing import List
+
+    from pydantic import BaseModel
+
+    load_vertex_ai_credentials()
+    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+    litellm.model_cost = litellm.get_model_cost_map(url="")
+
+    litellm.set_verbose = True
+
+    messages = [{"role": "user", "content": "List 5 cookie recipes"}]
+    from litellm.llms.custom_httpx.http_handler import HTTPHandler
+
+    class Recipe(BaseModel):
+        recipe_name: str
+
+    class ResponseSchema(BaseModel):
+        recipes: List[Recipe]
+
+    client = HTTPHandler()
+
+    httpx_response = MagicMock()
+    if invalid_response is True:
+        if "claude" in model:
+            httpx_response.side_effect = (
+                vertex_httpx_mock_post_invalid_schema_response_anthropic
+            )
+        else:
+            httpx_response.side_effect = vertex_httpx_mock_post_invalid_schema_response
+    else:
+        if "claude" in model:
+            httpx_response.side_effect = vertex_httpx_mock_post_valid_response_anthropic
+        else:
+            httpx_response.side_effect = vertex_httpx_mock_post_valid_response
+    with patch.object(client, "post", new=httpx_response) as mock_call:
+        print("SENDING CLIENT POST={}".format(client.post))
+        try:
+            resp = completion(
+                model=model,
+                messages=messages,
+                response_format=ResponseSchema,
+                vertex_location=vertex_location,
+                client=client,
+            )
+            print("Received={}".format(resp))
+            if invalid_response is True and enforce_validation is True:
+                pytest.fail("Expected this to fail")
+        except litellm.JSONSchemaValidationError as e:
+            if invalid_response is False:
+                pytest.fail("Expected this to pass. Got={}".format(e))
+
+        mock_call.assert_called_once()
+        if "claude" not in model:
+            print(mock_call.call_args.kwargs)
+            print(mock_call.call_args.kwargs["json"]["generationConfig"])
+
+            if supports_response_schema:
+                assert (
+                    "response_schema"
+                    in mock_call.call_args.kwargs["json"]["generationConfig"]
+                )
+            else:
+                assert (
+                    "response_schema"
+                    not in mock_call.call_args.kwargs["json"]["generationConfig"]
+                )
+                assert (
+                    "Use this JSON schema:"
+                    in mock_call.call_args.kwargs["json"]["contents"][0]["parts"][1][
+                        "text"
+                    ]
+                )
+
+
@pytest.mark.parametrize("provider", ["vertex_ai_beta"])  # "vertex_ai",
@pytest.mark.asyncio
 async def test_gemini_pro_httpx_custom_api_base(provider):
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -2130,6 +2130,43 @@ def test_completion_openai():
        pytest.fail(f"Error occurred: {e}")


+def test_completion_openai_pydantic():
+    try:
+        litellm.set_verbose = True
+        from pydantic import BaseModel
+
+        class CalendarEvent(BaseModel):
+            name: str
+            date: str
+            participants: list[str]
+
+        print(f"api key: {os.environ['OPENAI_API_KEY']}")
+        litellm.api_key = os.environ["OPENAI_API_KEY"]
+        response = completion(
+            model="gpt-4o-2024-08-06",
+            messages=[{"role": "user", "content": "Hey"}],
+            max_tokens=10,
+            metadata={"hi": "bye"},
+            response_format=CalendarEvent,
+        )
+        print("This is the response object\n", response)
+
+        response_str = response["choices"][0]["message"]["content"]
+        response_str_2 = response.choices[0].message.content
+
+        cost = completion_cost(completion_response=response)
+        print("Cost for completion call with gpt-3.5-turbo: ", f"${float(cost):.10f}")
+        assert response_str == response_str_2
+        assert type(response_str) == str
+        assert len(response_str) > 1
+
+        litellm.api_key = None
+    except Timeout as e:
+        pass
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
 def test_completion_openai_organization():
    try:
        litellm.set_verbose = True
@ -4062,7 +4099,7 @@ def test_completion_gemini(model):
        if "InternalServerError" in str(e):
            pass
        else:
-            pytest.fail(f"Error occurred: {e}")
+            pytest.fail(f"Error occurred:{e}")


 # test_completion_gemini()
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -45,6 +45,8 @@ import requests
 import tiktoken
 from httpx import Proxy
 from httpx._utils import get_environment_proxies
+from openai.lib import _parsing, _pydantic
+from openai.types.chat.completion_create_params import ResponseFormat
 from pydantic import BaseModel
 from tokenizers import Tokenizer

@ -158,6 +160,7 @@ from typing import (
    Literal,
    Optional,
    Tuple,
+    Type,
    Union,
    cast,
    get_args,
@ -629,8 +632,8 @@ def client(original_function):
                    call_type == CallTypes.completion.value
                    or call_type == CallTypes.acompletion.value
                ):
-                    is_coroutine = check_coroutine(original_function)
-                    if is_coroutine == True:
+                    is_coroutine = check_coroutine(original_response)
+                    if is_coroutine is True:
                        pass
                    else:
                        if isinstance(original_response, ModelResponse):
@ -643,6 +646,49 @@ def client(original_function):
                                    input=model_response, model=model
                                )
                                ### JSON SCHEMA VALIDATION ###
+                                if litellm.enable_json_schema_validation is True:
+                                    try:
+                                        if (
+                                            optional_params is not None
+                                            and "response_format" in optional_params
+                                            and optional_params["response_format"]
+                                            is not None
+                                        ):
+                                            json_response_format: Optional[dict] = None
+                                            if (
+                                                isinstance(
+                                                    optional_params["response_format"],
+                                                    dict,
+                                                )
+                                                and optional_params[
+                                                    "response_format"
+                                                ].get("json_schema")
+                                                is not None
+                                            ):
+                                                json_response_format = optional_params[
+                                                    "response_format"
+                                                ]
+                                            elif (
+                                                _parsing._completions.is_basemodel_type(
+                                                    optional_params["response_format"]
+                                                )
+                                            ):
+                                                json_response_format = (
+                                                    type_to_response_format_param(
+                                                        response_format=optional_params[
+                                                            "response_format"
+                                                        ]
+                                                    )
+                                                )
+                                            if json_response_format is not None:
+                                                litellm.litellm_core_utils.json_validation_rule.validate_schema(
+                                                    schema=json_response_format[
+                                                        "json_schema"
+                                                    ]["schema"],
+                                                    response=model_response,
+                                                )
+                                    except TypeError:
+                                        pass
                                if (
                                    optional_params is not None
                                    and "response_format" in optional_params
@ -2806,6 +2852,11 @@ def get_optional_params(
                    message=f"Function calling is not supported by {custom_llm_provider}.",
                )

+    if "response_format" in non_default_params:
+        non_default_params["response_format"] = type_to_response_format_param(
+            response_format=non_default_params["response_format"]
+        )
+
    if "tools" in non_default_params and isinstance(
        non_default_params, list
    ):  # fixes https://github.com/BerriAI/litellm/issues/4933
@ -6104,6 +6155,36 @@ def _should_retry(status_code: int):
    return False


+def type_to_response_format_param(
+    response_format: Optional[Union[Type[BaseModel], dict]],
+) -> Optional[dict]:
+    """
+    Re-implementation of openai's 'type_to_response_format_param' function
+
+    Used for converting pydantic object to api schema.
+    """
+    if response_format is None:
+        return None
+
+    if isinstance(response_format, dict):
+        return response_format
+
+    # type checkers don't narrow the negation of a `TypeGuard` as it isn't
+    # a safe default behaviour but we know that at this point the `response_format`
+    # can only be a `type`
+    if not _parsing._completions.is_basemodel_type(response_format):
+        raise TypeError(f"Unsupported response_format type - {response_format}")
+
+    return {
+        "type": "json_schema",
+        "json_schema": {
+            "schema": _pydantic.to_strict_json_schema(response_format),
+            "name": response_format.__name__,
+            "strict": True,
+        },
+    }
+
+
 def _get_retry_after_from_exception_header(
    response_headers: Optional[httpx.Headers] = None,
 ):