Merge pull request #4478 from BerriAI/litellm_support_response_schema_param_vertex_ai_old

feat(vertex_httpx.py): support the 'response_schema' param for older vertex ai models
2024-06-29 20:17:39 -07:00 · 2024-06-29 20:17:39 -07:00 · 58d0330cd7
commit 58d0330cd7
parent eac42bb473 25b1463920
14 changed files with 444 additions and 171 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -66,7 +66,7 @@ jobs:
            pip install "pydantic==2.7.1"
            pip install "diskcache==5.6.1"
            pip install "Pillow==10.3.0"
-            pip install "ijson==3.2.3"
+            pip install "jsonschema==4.22.0"
      - save_cache:
          paths:
            - ./venv
@ -128,7 +128,7 @@ jobs:
            pip install jinja2
            pip install tokenizers
            pip install openai
-            pip install ijson
+            pip install jsonschema
      - run:
          name: Run tests
          command: |
@ -183,7 +183,7 @@ jobs:
            pip install numpydoc
            pip install prisma            
            pip install fastapi            
-            pip install ijson            
+            pip install jsonschema   
            pip install "httpx==0.24.1"
            pip install "gunicorn==21.2.0"
            pip install "anyio==3.7.1"
--- a/litellm/init.py
+++ b/litellm/init.py
@ -749,6 +749,7 @@ from .utils import (
    create_pretrained_tokenizer,
    create_tokenizer,
    supports_function_calling,
    supports_response_schema,
    supports_parallel_function_calling,
    supports_vision,
    supports_system_messages,
@ -852,6 +853,7 @@ from .exceptions import (
    APIResponseValidationError,
    UnprocessableEntityError,
    InternalServerError,
    JSONSchemaValidationError,
    LITELLM_EXCEPTION_TYPES,
 )
 from .budget_manager import BudgetManager
--- a/litellm/exceptions.py
+++ b/litellm/exceptions.py
@ -551,7 +551,7 @@ class APIError(openai.APIError):  # type: ignore
        message,
        llm_provider,
        model,
-        request: httpx.Request,
+        request: Optional[httpx.Request] = None,
        litellm_debug_info: Optional[str] = None,
        max_retries: Optional[int] = None,
        num_retries: Optional[int] = None,
@ -563,6 +563,8 @@ class APIError(openai.APIError):  # type: ignore
        self.litellm_debug_info = litellm_debug_info
        self.max_retries = max_retries
        self.num_retries = num_retries
        if request is None:
            request = httpx.Request(method="POST", url="https://api.openai.com/v1")
        super().__init__(self.message, request=request, body=None)  # type: ignore
    def __str__(self):
@ -664,6 +666,22 @@ class OpenAIError(openai.OpenAIError):  # type: ignore
        self.llm_provider = "openai"
 class JSONSchemaValidationError(APIError):
    def __init__(
        self, model: str, llm_provider: str, raw_response: str, schema: str
    ) -> None:
        self.raw_response = raw_response
        self.schema = schema
        self.model = model
        message = "litellm.JSONSchemaValidationError: model={}, returned an invalid response={}, for schema={}.\nAccess raw response with `e.raw_response`".format(
            model, raw_response, schema
        )
        self.message = message
        super().__init__(
            model=model, message=message, llm_provider=llm_provider, status_code=500
        )
 LITELLM_EXCEPTION_TYPES = [
    AuthenticationError,
    NotFoundError,
@ -682,6 +700,7 @@ LITELLM_EXCEPTION_TYPES = [
    APIResponseValidationError,
    OpenAIError,
    InternalServerError,
    JSONSchemaValidationError,
 ]
--- a/litellm/litellm_core_utils/json_validation_rule.py
+++ b/litellm/litellm_core_utils/json_validation_rule.py
@ -0,0 +1,23 @@
 import json
 def validate_schema(schema: dict, response: str):
    """
    Validate if the returned json response follows the schema.
    Params:
    - schema - dict: JSON schema
    - response - str: Received json response as string.
    """
    from jsonschema import ValidationError, validate
    from litellm import JSONSchemaValidationError
    response_dict = json.loads(response)
    try:
        validate(response_dict, schema=schema)
    except ValidationError:
        raise JSONSchemaValidationError(
            model="", llm_provider="", raw_response=response, schema=json.dumps(schema)
        )
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@ -2033,6 +2033,50 @@ def function_call_prompt(messages: list, functions: list):
    return messages
 def response_schema_prompt(model: str, response_schema: dict) -> str:
    """
    Decides if a user-defined custom prompt or default needs to be used
    Returns the prompt str that's passed to the model as a user message
    """
    custom_prompt_details: Optional[dict] = None
    response_schema_as_message = [
        {"role": "user", "content": "{}".format(response_schema)}
    ]
    if f"{model}/response_schema_prompt" in litellm.custom_prompt_dict:
        custom_prompt_details = litellm.custom_prompt_dict[
            f"{model}/response_schema_prompt"
        ]  # allow user to define custom response schema prompt by model
    elif "response_schema_prompt" in litellm.custom_prompt_dict:
        custom_prompt_details = litellm.custom_prompt_dict["response_schema_prompt"]
    if custom_prompt_details is not None:
        return custom_prompt(
            role_dict=custom_prompt_details["roles"],
            initial_prompt_value=custom_prompt_details["initial_prompt_value"],
            final_prompt_value=custom_prompt_details["final_prompt_value"],
            messages=response_schema_as_message,
        )
    else:
        return default_response_schema_prompt(response_schema=response_schema)
 def default_response_schema_prompt(response_schema: dict) -> str:
    """
    Used if provider/model doesn't support 'response_schema' param.
    This is the default prompt. Allow user to override this with a custom_prompt.
    """
    prompt_str = """Use this JSON schema: 
    ```json 
    {}
    ```""".format(
        response_schema
    )
    return prompt_str
 # Custom prompt template
 def custom_prompt(
    role_dict: dict,
--- a/litellm/llms/vertex_ai.py
+++ b/litellm/llms/vertex_ai.py
@ -12,6 +12,7 @@ import requests  # type: ignore
 from pydantic import BaseModel
 import litellm
 from litellm._logging import verbose_logger
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
 from litellm.llms.prompt_templates.factory import (
    convert_to_anthropic_image_obj,
@ -328,11 +329,14 @@ def _gemini_convert_messages_with_history(messages: list) -> List[ContentType]:
    contents: List[ContentType] = []
    msg_i = 0
    try:
        while msg_i < len(messages):
            user_content: List[PartType] = []
            init_msg_i = msg_i
            ## MERGE CONSECUTIVE USER CONTENT ##
-        while msg_i < len(messages) and messages[msg_i]["role"] in user_message_types:
+            while (
                msg_i < len(messages) and messages[msg_i]["role"] in user_message_types
            ):
                if isinstance(messages[msg_i]["content"], list):
                    _parts: List[PartType] = []
                    for element in messages[msg_i]["content"]:
@ -375,7 +379,9 @@ def _gemini_convert_messages_with_history(messages: list) -> List[ContentType]:
                    "tool_calls", []
                ):  # support assistant tool invoke conversion
                    assistant_content.extend(
-                    convert_to_gemini_tool_call_invoke(messages[msg_i]["tool_calls"])
+                        convert_to_gemini_tool_call_invoke(
                            messages[msg_i]["tool_calls"]
                        )
                    )
                else:
                    assistant_text = (
@ -400,8 +406,9 @@ def _gemini_convert_messages_with_history(messages: list) -> List[ContentType]:
                        messages[msg_i]
                    )
                )
        return contents
    except Exception as e:
        raise e
 def _get_client_cache_key(model: str, vertex_project: str, vertex_location: str):
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@ -12,7 +12,6 @@ from functools import partial
 from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
 import httpx  # type: ignore
 import ijson
 import requests  # type: ignore
 import litellm
@ -21,7 +20,10 @@ import litellm.litellm_core_utils.litellm_logging
 from litellm import verbose_logger
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
-from litellm.llms.prompt_templates.factory import convert_url_to_base64
+from litellm.llms.prompt_templates.factory import (
    convert_url_to_base64,
    response_schema_prompt,
 )
 from litellm.llms.vertex_ai import _gemini_convert_messages_with_history
 from litellm.types.llms.openai import (
    ChatCompletionResponseMessage,
@ -1011,6 +1013,22 @@ class VertexLLM(BaseLLM):
            if len(system_prompt_indices) > 0:
                for idx in reversed(system_prompt_indices):
                    messages.pop(idx)
        # Checks for 'response_schema' support - if passed in
        if "response_schema" in optional_params:
            supports_response_schema = litellm.supports_response_schema(
                model=model, custom_llm_provider="vertex_ai"
            )
            if supports_response_schema is False:
                user_response_schema_message = response_schema_prompt(
                    model=model, response_schema=optional_params.get("response_schema")  # type: ignore
                )
                messages.append(
                    {"role": "user", "content": user_response_schema_message}
                )
                optional_params.pop("response_schema")
        try:
            content = _gemini_convert_messages_with_history(messages=messages)
            tools: Optional[Tools] = optional_params.pop("tools", None)
            tool_choice: Optional[ToolConfig] = optional_params.pop("tool_choice", None)
@ -1040,6 +1058,8 @@ class VertexLLM(BaseLLM):
                headers["Authorization"] = f"Bearer {auth_header}"
            if extra_headers is not None:
                headers.update(extra_headers)
        except Exception as e:
            raise e
        ## LOGGING
        logging_obj.pre_call(
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -1538,6 +1538,7 @@
        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_tool_choice": true, 
        "supports_response_schema": true, 
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
    },
    "gemini-1.5-pro-preview-0215": { 
@ -1563,6 +1564,7 @@
        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_tool_choice": true, 
        "supports_response_schema": true, 
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
    },
    "gemini-1.5-pro-preview-0409": {
@ -1587,6 +1589,7 @@
        "mode": "chat",
        "supports_function_calling": true,
        "supports_tool_choice": true,
        "supports_response_schema": true, 
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
    },
    "gemini-1.5-flash": {
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@ -880,10 +880,141 @@ Using this JSON schema:
        mock_call.assert_called_once()
-@pytest.mark.parametrize("provider", ["vertex_ai_beta"])  # "vertex_ai",
+def vertex_httpx_mock_post_valid_response(*args, **kwargs):
    mock_response = MagicMock()
    mock_response.status_code = 200
    mock_response.headers = {"Content-Type": "application/json"}
    mock_response.json.return_value = {
        "candidates": [
            {
                "content": {
                    "role": "model",
                    "parts": [
                        {
                            "text": '[{"recipe_name": "Chocolate Chip Cookies"}, {"recipe_name": "Oatmeal Raisin Cookies"}, {"recipe_name": "Peanut Butter Cookies"}, {"recipe_name": "Sugar Cookies"}, {"recipe_name": "Snickerdoodles"}]\n'
                        }
                    ],
                },
                "finishReason": "STOP",
                "safetyRatings": [
                    {
                        "category": "HARM_CATEGORY_HATE_SPEECH",
                        "probability": "NEGLIGIBLE",
                        "probabilityScore": 0.09790669,
                        "severity": "HARM_SEVERITY_NEGLIGIBLE",
                        "severityScore": 0.11736965,
                    },
                    {
                        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
                        "probability": "NEGLIGIBLE",
                        "probabilityScore": 0.1261379,
                        "severity": "HARM_SEVERITY_NEGLIGIBLE",
                        "severityScore": 0.08601588,
                    },
                    {
                        "category": "HARM_CATEGORY_HARASSMENT",
                        "probability": "NEGLIGIBLE",
                        "probabilityScore": 0.083441176,
                        "severity": "HARM_SEVERITY_NEGLIGIBLE",
                        "severityScore": 0.0355444,
                    },
                    {
                        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
                        "probability": "NEGLIGIBLE",
                        "probabilityScore": 0.071981624,
                        "severity": "HARM_SEVERITY_NEGLIGIBLE",
                        "severityScore": 0.08108212,
                    },
                ],
            }
        ],
        "usageMetadata": {
            "promptTokenCount": 60,
            "candidatesTokenCount": 55,
            "totalTokenCount": 115,
        },
    }
    return mock_response
 def vertex_httpx_mock_post_invalid_schema_response(*args, **kwargs):
    mock_response = MagicMock()
    mock_response.status_code = 200
    mock_response.headers = {"Content-Type": "application/json"}
    mock_response.json.return_value = {
        "candidates": [
            {
                "content": {
                    "role": "model",
                    "parts": [
                        {"text": '[{"recipe_world": "Chocolate Chip Cookies"}]\n'}
                    ],
                },
                "finishReason": "STOP",
                "safetyRatings": [
                    {
                        "category": "HARM_CATEGORY_HATE_SPEECH",
                        "probability": "NEGLIGIBLE",
                        "probabilityScore": 0.09790669,
                        "severity": "HARM_SEVERITY_NEGLIGIBLE",
                        "severityScore": 0.11736965,
                    },
                    {
                        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
                        "probability": "NEGLIGIBLE",
                        "probabilityScore": 0.1261379,
                        "severity": "HARM_SEVERITY_NEGLIGIBLE",
                        "severityScore": 0.08601588,
                    },
                    {
                        "category": "HARM_CATEGORY_HARASSMENT",
                        "probability": "NEGLIGIBLE",
                        "probabilityScore": 0.083441176,
                        "severity": "HARM_SEVERITY_NEGLIGIBLE",
                        "severityScore": 0.0355444,
                    },
                    {
                        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
                        "probability": "NEGLIGIBLE",
                        "probabilityScore": 0.071981624,
                        "severity": "HARM_SEVERITY_NEGLIGIBLE",
                        "severityScore": 0.08108212,
                    },
                ],
            }
        ],
        "usageMetadata": {
            "promptTokenCount": 60,
            "candidatesTokenCount": 55,
            "totalTokenCount": 115,
        },
    }
    return mock_response
@pytest.mark.parametrize(
    "model, supports_response_schema",
    [
        ("vertex_ai_beta/gemini-1.5-pro-001", True),
        ("vertex_ai_beta/gemini-1.5-flash", False),
    ],
 )
@pytest.mark.parametrize(
    "invalid_response",
    [True, False],
 )
@pytest.mark.parametrize(
    "enforce_validation",
    [True, False],
 )
@pytest.mark.asyncio
-async def test_gemini_pro_json_schema_httpx(provider):
+async def test_gemini_pro_json_schema_args_sent_httpx(
    model, supports_response_schema, invalid_response, enforce_validation
 ):
    load_vertex_ai_credentials()
    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
    litellm.model_cost = litellm.get_model_cost_map(url="")
    litellm.set_verbose = True
    messages = [{"role": "user", "content": "List 5 cookie recipes"}]
    from litellm.llms.custom_httpx.http_handler import HTTPHandler
@ -903,25 +1034,46 @@ async def test_gemini_pro_json_schema_httpx(provider):
    client = HTTPHandler()
-    with patch.object(client, "post", new=MagicMock()) as mock_call:
+    httpx_response = MagicMock()
    if invalid_response is True:
        httpx_response.side_effect = vertex_httpx_mock_post_invalid_schema_response
    else:
        httpx_response.side_effect = vertex_httpx_mock_post_valid_response
    with patch.object(client, "post", new=httpx_response) as mock_call:
        try:
-            response = completion(
+            _ = completion(
-                model="vertex_ai_beta/gemini-1.5-pro-001",
+                model=model,
                messages=messages,
                response_format={
                    "type": "json_object",
                    "response_schema": response_schema,
                    "enforce_validation": enforce_validation,
                },
                client=client,
            )
-        except Exception as e:
+            if invalid_response is True and enforce_validation is True:
-            pass
+                pytest.fail("Expected this to fail")
        except litellm.JSONSchemaValidationError as e:
            if invalid_response is False:
                pytest.fail("Expected this to pass. Got={}".format(e))
        mock_call.assert_called_once()
        print(mock_call.call_args.kwargs)
        print(mock_call.call_args.kwargs["json"]["generationConfig"])
        if supports_response_schema:
            assert (
-            "response_schema" in mock_call.call_args.kwargs["json"]["generationConfig"]
+                "response_schema"
                in mock_call.call_args.kwargs["json"]["generationConfig"]
            )
        else:
            assert (
                "response_schema"
                not in mock_call.call_args.kwargs["json"]["generationConfig"]
            )
            assert (
                "Use this JSON schema:"
                in mock_call.call_args.kwargs["json"]["contents"][0]["parts"][1]["text"]
            )
@ -959,48 +1111,6 @@ async def test_gemini_pro_httpx_custom_api_base(provider):
        assert "hello" in mock_call.call_args.kwargs["headers"]
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.parametrize("provider", ["vertex_ai_beta"])  # "vertex_ai",
@pytest.mark.asyncio
 async def test_gemini_pro_httpx_custom_api_base_streaming_real_call(
    provider, sync_mode
 ):
    load_vertex_ai_credentials()
    import random
    litellm.set_verbose = True
    messages = [
        {
            "role": "user",
            "content": "Hey, how's it going?",
        }
    ]
    vertex_region = random.sample(["asia-southeast1", "us-central1"], k=1)[0]
    if sync_mode is True:
        response = completion(
            model="vertex_ai_beta/gemini-1.5-flash",
            messages=messages,
            api_base="https://gateway.ai.cloudflare.com/v1/fa4cdcab1f32b95ca3b53fd36043d691/test/google-vertex-ai/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.5-flash",
            stream=True,
            vertex_region=vertex_region,
        )
        for chunk in response:
            print(chunk)
    else:
        response = await litellm.acompletion(
            model="vertex_ai_beta/gemini-1.5-flash",
            messages=messages,
            api_base="https://gateway.ai.cloudflare.com/v1/fa4cdcab1f32b95ca3b53fd36043d691/test/google-vertex-ai/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.5-flash",
            stream=True,
            vertex_region=vertex_region,
        )
        async for chunk in response:
            print(chunk)
@pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
@pytest.mark.parametrize("sync_mode", [True])
@pytest.mark.parametrize("provider", ["vertex_ai"])
--- a/litellm/tests/test_jwt.py
+++ b/litellm/tests/test_jwt.py
@ -61,7 +61,6 @@ async def test_token_single_public_key():
    import jwt
    jwt_handler = JWTHandler()
    backend_keys = {
        "keys": [
            {
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -48,6 +48,7 @@ from tokenizers import Tokenizer
 import litellm
 import litellm._service_logger  # for storing API inputs, outputs, and metadata
 import litellm.litellm_core_utils
 import litellm.litellm_core_utils.json_validation_rule
 from litellm.caching import DualCache
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
 from litellm.litellm_core_utils.exception_mapping_utils import get_error_message
@ -580,7 +581,7 @@ def client(original_function):
        else:
            return False
-    def post_call_processing(original_response, model):
+    def post_call_processing(original_response, model, optional_params: Optional[dict]):
        try:
            if original_response is None:
                pass
@ -595,11 +596,47 @@ def client(original_function):
                        pass
                    else:
                        if isinstance(original_response, ModelResponse):
-                            model_response = original_response.choices[
+                            model_response: Optional[str] = original_response.choices[
                                0
-                            ].message.content
+                            ].message.content  # type: ignore
                            if model_response is not None:
                                ### POST-CALL RULES ###
-                            rules_obj.post_call_rules(input=model_response, model=model)
+                                rules_obj.post_call_rules(
                                    input=model_response, model=model
                                )
                                ### JSON SCHEMA VALIDATION ###
                                if (
                                    optional_params is not None
                                    and "response_format" in optional_params
                                    and isinstance(
                                        optional_params["response_format"], dict
                                    )
                                    and "type" in optional_params["response_format"]
                                    and optional_params["response_format"]["type"]
                                    == "json_object"
                                    and "response_schema"
                                    in optional_params["response_format"]
                                    and isinstance(
                                        optional_params["response_format"][
                                            "response_schema"
                                        ],
                                        dict,
                                    )
                                    and "enforce_validation"
                                    in optional_params["response_format"]
                                    and optional_params["response_format"][
                                        "enforce_validation"
                                    ]
                                    is True
                                ):
                                    # schema given, json response expected, and validation enforced
                                    litellm.litellm_core_utils.json_validation_rule.validate_schema(
                                        schema=optional_params["response_format"][
                                            "response_schema"
                                        ],
                                        response=model_response,
                                    )
        except Exception as e:
            raise e
@ -868,7 +905,11 @@ def client(original_function):
                return result
            ### POST-CALL RULES ###
-            post_call_processing(original_response=result, model=model or None)
+            post_call_processing(
                original_response=result,
                model=model or None,
                optional_params=kwargs,
            )
            # [OPTIONAL] ADD TO CACHE
            if (
@ -1317,7 +1358,9 @@ def client(original_function):
                ).total_seconds() * 1000  # return response latency in ms like openai
            ### POST-CALL RULES ###
-            post_call_processing(original_response=result, model=model)
+            post_call_processing(
                original_response=result, model=model, optional_params=kwargs
            )
            # [OPTIONAL] ADD TO CACHE
            if (
@ -1880,8 +1923,7 @@ def supports_response_schema(model: str, custom_llm_provider: Optional[str]) ->
    Returns:
    bool: True if the model supports response_schema, False otherwise.
-    Raises:
+    Does not raise error. Defaults to 'False'. Outputs logging.error.
    Exception: If the given model is not found in model_prices_and_context_window.json.
    """
    try:
        ## GET LLM PROVIDER ##
@ -1901,9 +1943,10 @@ def supports_response_schema(model: str, custom_llm_provider: Optional[str]) ->
            return True
        return False
    except Exception:
-        raise Exception(
+        verbose_logger.error(
            f"Model not in model_prices_and_context_window.json. You passed model={model}, custom_llm_provider={custom_llm_provider}."
        )
        return False
 def supports_function_calling(model: str) -> bool:
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -1538,6 +1538,7 @@
        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_tool_choice": true, 
        "supports_response_schema": true, 
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
    },
    "gemini-1.5-pro-preview-0215": { 
@ -1563,6 +1564,7 @@
        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_tool_choice": true, 
        "supports_response_schema": true, 
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
    },
    "gemini-1.5-pro-preview-0409": {
@ -1587,6 +1589,7 @@
        "mode": "chat",
        "supports_function_calling": true,
        "supports_tool_choice": true,
        "supports_response_schema": true, 
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
    },
    "gemini-1.5-flash": {
--- a/pyproject.toml
+++ b/pyproject.toml
@ -27,7 +27,7 @@ jinja2 = "^3.1.2"
 aiohttp = "*"
 requests = "^2.31.0"
 pydantic = "^2.0.0"
-ijson = "*"
+jsonschema = "^4.22.0"
 uvicorn = {version = "^0.22.0", optional = true}
 gunicorn = {version = "^22.0.0", optional = true}
--- a/requirements.txt
+++ b/requirements.txt
@ -46,5 +46,5 @@ aiohttp==3.9.0 # for network calls
 aioboto3==12.3.0 # for async sagemaker calls
 tenacity==8.2.3  # for retrying requests, when litellm.num_retries set
 pydantic==2.7.1 # proxy + openai req.
-ijson==3.2.3 # for google ai studio streaming
+jsonschema==4.22.0 # validating json schema
 ####