Fix rerank integration test based on client side changes

2025-10-08 13:00:52 +00:00 · 2025-10-01 10:37:58 -07:00 · 2025-10-01 10:37:58 -07:00 · 6b4940806f
commit 6b4940806f
parent bb2eb33fc3
8 changed files with 27 additions and 276 deletions
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@ -13335,7 +13335,7 @@
        },
        {
            "name": "Inference",
-            "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
+            "description": "This API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents based on their relevance to a query.",
            "x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
        },
        {
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@ -9990,13 +9990,16 @@ tags:
    description: ''
  - name: Inference
    description: >-
-      This API provides the raw interface to the underlying models. Two kinds of models
-      are supported:
+      This API provides the raw interface to the underlying models. Three kinds of
+      models are supported:

      - LLM models: these models generate "raw" and "chat" (conversational) completions.

      - Embedding models: these models generate embeddings to be used for semantic
      search.
+
+      - Rerank models: these models reorder the documents based on their relevance
+      to a query.
    x-displayName: >-
      Llama Stack Inference API for generating completions, chat completions, and
      embeddings.
--- a/docs/static/stainless-llama-stack-spec.html
+++ b/docs/static/stainless-llama-stack-spec.html
@ -8838,7 +8838,8 @@
                "type": "string",
                "enum": [
                    "llm",
-                    "embedding"
+                    "embedding",
+                    "rerank"
                ],
                "title": "ModelType",
                "description": "Enumeration of supported model types in Llama Stack."
@ -17033,7 +17034,7 @@
                "properties": {
                    "model": {
                        "type": "string",
-                        "description": "The identifier of the reranking model to use."
+                        "description": "The identifier of the reranking model to use. The model must be a reranking model registered with Llama Stack and available via the /models endpoint."
                    },
                    "query": {
                        "oneOf": [
@ -18456,7 +18457,7 @@
        },
        {
            "name": "Inference",
-            "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
+            "description": "This API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents based on their relevance to a query.",
            "x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
        },
        {
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -6603,6 +6603,7 @@ components:
      enum:
        - llm
        - embedding
+        - rerank
      title: ModelType
      description: >-
        Enumeration of supported model types in Llama Stack.
@ -12693,7 +12694,8 @@ components:
        model:
          type: string
          description: >-
-            The identifier of the reranking model to use.
+            The identifier of the reranking model to use. The model must be a reranking
+            model registered with Llama Stack and available via the /models endpoint.
        query:
          oneOf:
            - type: string
@ -13774,13 +13776,16 @@ tags:
    description: ''
  - name: Inference
    description: >-
-      This API provides the raw interface to the underlying models. Two kinds of models
-      are supported:
+      This API provides the raw interface to the underlying models. Three kinds of
+      models are supported:

      - LLM models: these models generate "raw" and "chat" (conversational) completions.

      - Embedding models: these models generate embeddings to be used for semantic
      search.
+
+      - Rerank models: these models reorder the documents based on their relevance
+      to a query.
    x-displayName: >-
      Llama Stack Inference API for generating completions, chat completions, and
      embeddings.