diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index bae5188fa..ec782c331 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -1,9 +1,18 @@ name: Integration tests on: - pull_request: push: - branches: [main] + branches: [ main ] + pull_request: + branches: [ main ] + paths: + - 'distributions/**' + - 'llama_stack/**' + - 'tests/integration/**' + - 'uv.lock' + - 'pyproject.toml' + - 'requirements.txt' + - '.github/workflows/integration-tests.yml' # This workflow jobs: ollama: @@ -56,8 +65,7 @@ jobs: INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" run: | source .venv/bin/activate - # TODO: use "llama stack run" - nohup uv run python -m llama_stack.distribution.server.server --yaml-config ./llama_stack/templates/ollama/run.yaml > server.log 2>&1 & + nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv > server.log 2>&1 & - name: Wait for Llama Stack server to be ready run: | diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml index be4298a98..e6871bf99 100644 --- a/.github/workflows/providers-build.yml +++ b/.github/workflows/providers-build.yml @@ -40,6 +40,7 @@ jobs: matrix: template: ${{ fromJson(needs.generate-matrix.outputs.templates) }} image-type: [venv, container] + fail-fast: false # We want to run all jobs even if some fail steps: - name: Checkout repository @@ -67,7 +68,9 @@ jobs: - name: Run Llama Stack Build run: | - uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test + # USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead + # LLAMA_STACK_DIR is set to the current directory so we are building from the source + USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test - name: Print dependencies in the image if: matrix.image-type == 'venv' diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index c7a30e9b8..6d6e91f22 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -5,6 +5,14 @@ on: branches: [ main ] pull_request: branches: [ main ] + paths: + - 'distributions/**' + - 'llama_stack/**' + - 'tests/unit/**' + - 'uv.lock' + - 'pyproject.toml' + - 'requirements.txt' + - '.github/workflows/unit-tests.yml' # This workflow workflow_dispatch: jobs: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 072fa21e2..e83e64672 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -77,7 +77,7 @@ repos: name: Distribution Template Codegen additional_dependencies: - uv==0.6.0 - entry: uv run --extra codegen python -m llama_stack.scripts.distro_codegen + entry: uv run --extra codegen ./scripts/distro_codegen.py language: python pass_filenames: false require_serial: true diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1f188f259..e458fec0a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -159,7 +159,7 @@ LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama ### Updating Provider Configurations -If you have made changes to a provider's configuration in any form (introducing a new config key, or changing models, etc.), you should run `python llama_stack/scripts/distro_codegen.py` to re-generate various YAML files as well as the documentation. You should not change `docs/source/.../distributions/` files manually as they are auto-generated. +If you have made changes to a provider's configuration in any form (introducing a new config key, or changing models, etc.), you should run `./scripts/distro_codegen.py` to re-generate various YAML files as well as the documentation. You should not change `docs/source/.../distributions/` files manually as they are auto-generated. ### Building the Documentation diff --git a/distributions/dependencies.json b/distributions/dependencies.json index d2ed12d3a..33b497a33 100644 --- a/distributions/dependencies.json +++ b/distributions/dependencies.json @@ -401,16 +401,13 @@ ], "nvidia": [ "aiosqlite", - "autoevals", "blobfile", "chardet", - "datasets", "faiss-cpu", "fastapi", "fire", "httpx", "matplotlib", - "mcp", "nltk", "numpy", "openai", diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 608237cfd..7df0c901e 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -2233,6 +2233,67 @@ } }, "/v1/datasetio/iterrows/{dataset_id}": { + "get": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/IterrowsResponse" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "DatasetIO" + ], + "description": "Get a paginated list of rows from a dataset. Uses cursor-based pagination.", + "parameters": [ + { + "name": "dataset_id", + "in": "path", + "description": "The ID of the dataset to get the rows from.", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "start_index", + "in": "query", + "description": "Index into dataset for the first row to get. Get all rows if None.", + "required": false, + "schema": { + "type": "integer" + } + }, + { + "name": "limit", + "in": "query", + "description": "The number of rows to get.", + "required": false, + "schema": { + "type": "integer" + } + } + ] + } + }, + "/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": { "get": { "responses": { "200": { @@ -6552,100 +6613,14 @@ "const": "factuality", "default": "factuality" }, - "factuality": { - "type": "object", - "properties": { - "aggregation_functions": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "average", - "median", - "categorical_count", - "accuracy" - ], - "title": "AggregationFunctionType", - "description": "A type of aggregation function." - } - } - }, - "additionalProperties": false, - "required": [ - "aggregation_functions" - ], - "title": "BasicGraderParams" - } - }, - "additionalProperties": false, - "required": [ - "type", - "factuality" - ], - "title": "FactualityGrader" - }, - "FaithfulnessGrader": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "faithfulness", - "default": "faithfulness" - }, - "faithfulness": { - "type": "object", - "properties": { - "aggregation_functions": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "average", - "median", - "categorical_count", - "accuracy" - ], - "title": "AggregationFunctionType", - "description": "A type of aggregation function." - } - } - }, - "additionalProperties": false, - "required": [ - "aggregation_functions" - ], - "title": "BasicGraderParams" - } - }, - "additionalProperties": false, - "required": [ - "type", - "faithfulness" - ], - "title": "FaithfulnessGrader" - }, - "Grader": { - "type": "object", - "properties": { - "identifier": { + "dataset_id": { "type": "string" }, - "provider_resource_id": { - "type": "string" - }, - "provider_id": { - "type": "string" - }, - "type": { - "type": "string", - "const": "grader", - "default": "grader" - }, - "grader": { - "$ref": "#/components/schemas/GraderDefinition" - }, - "description": { - "type": "string" + "scoring_functions": { + "type": "array", + "items": { + "type": "string" + } }, "metadata": { "type": "object", @@ -6679,98 +6654,163 @@ "provider_resource_id", "provider_id", "type", - "grader", + "dataset_id", + "scoring_functions", "metadata" ], - "title": "Grader" + "title": "Benchmark" }, - "GraderDefinition": { + "DataSource": { "oneOf": [ { - "$ref": "#/components/schemas/LlmGrader" + "$ref": "#/components/schemas/URIDataSource" }, { - "$ref": "#/components/schemas/RegexParserGrader" - }, - { - "$ref": "#/components/schemas/EqualityGrader" - }, - { - "$ref": "#/components/schemas/SubsetOfGrader" - }, - { - "$ref": "#/components/schemas/FactualityGrader" - }, - { - "$ref": "#/components/schemas/FaithfulnessGrader" + "$ref": "#/components/schemas/RowsDataSource" } ], "discriminator": { "propertyName": "type", "mapping": { - "llm": "#/components/schemas/LlmGrader", - "regex_parser": "#/components/schemas/RegexParserGrader", - "equality": "#/components/schemas/EqualityGrader", - "subset_of": "#/components/schemas/SubsetOfGrader", - "factuality": "#/components/schemas/FactualityGrader", - "faithfulness": "#/components/schemas/FaithfulnessGrader" + "uri": "#/components/schemas/URIDataSource", + "rows": "#/components/schemas/RowsDataSource" } } }, - "LlmGrader": { + "Grader": { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "provider_resource_id": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "type": { + "type": "string", + "const": "grader", + "default": "grader" + }, + "purpose": { + "type": "string", + "enum": [ + "post-training/messages", + "eval/question-answer", + "eval/messages-answer" + ], + "title": "DatasetPurpose", + "description": "Purpose of the dataset. Each purpose has a required input data schema." + }, + "source": { + "$ref": "#/components/schemas/DataSource" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "provider_resource_id", + "provider_id", + "type", + "purpose", + "source", + "metadata" + ], + "title": "Dataset" + }, + "RowsDataSource": { "type": "object", "properties": { "type": { "type": "string", - "const": "llm", - "default": "llm" + "const": "rows", + "default": "rows" }, - "llm": { - "type": "object", - "properties": { - "model": { - "type": "string" - }, - "prompt": { - "type": "string" - }, - "score_regexes": { - "type": "array", - "items": { - "type": "string" - } - }, - "aggregation_functions": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "average", - "median", - "categorical_count", - "accuracy" - ], - "title": "AggregationFunctionType", - "description": "A type of aggregation function." - } + "rows": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] } }, - "additionalProperties": false, - "required": [ - "model", - "prompt", - "score_regexes", - "aggregation_functions" - ], - "title": "LlmGraderParams" + "description": "The dataset is stored in rows. E.g. - [ {\"messages\": [{\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}]} ]" } }, "additionalProperties": false, "required": [ "type", - "llm" + "rows" ], - "title": "LlmGrader" + "title": "RowsDataSource", + "description": "A dataset stored in rows." + }, + "URIDataSource": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "uri", + "default": "uri" + }, + "uri": { + "type": "string", + "description": "The dataset can be obtained from a URI. E.g. - \"https://mywebsite.com/mydata.jsonl\" - \"lsfs://mydata.jsonl\" - \"data:csv;base64,{base64_content}\"" + } + }, + "additionalProperties": false, + "required": [ + "type", + "uri" + ], + "title": "URIDataSource", + "description": "A dataset that can be obtained from a URI." }, "RegexParserGrader": { "type": "object", @@ -6819,45 +6859,182 @@ ], "title": "RegexParserGrader" }, - "SubsetOfGrader": { + "ModelType": { + "type": "string", + "enum": [ + "llm", + "embedding" + ], + "title": "ModelType" + }, + "AgentTurnInputType": { "type": "object", "properties": { "type": { "type": "string", - "const": "subset_of", - "default": "subset_of" - }, - "subset_of": { - "type": "object", - "properties": { - "aggregation_functions": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "average", - "median", - "categorical_count", - "accuracy" - ], - "title": "AggregationFunctionType", - "description": "A type of aggregation function." - } - } - }, - "additionalProperties": false, - "required": [ - "aggregation_functions" - ], - "title": "BasicGraderParams" + "const": "agent_turn_input", + "default": "agent_turn_input" } }, "additionalProperties": false, "required": [ - "type", - "subset_of" + "type" ], - "title": "SubsetOfGrader" + "title": "AgentTurnInputType" + }, + "ArrayType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "array", + "default": "array" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "ArrayType" + }, + "BooleanType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "boolean", + "default": "boolean" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "BooleanType" + }, + "ChatCompletionInputType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "chat_completion_input", + "default": "chat_completion_input" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "ChatCompletionInputType" + }, + "CompletionInputType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "completion_input", + "default": "completion_input" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "CompletionInputType" + }, + "JsonType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "json", + "default": "json" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "JsonType" + }, + "NumberType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "number", + "default": "number" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "NumberType" + }, + "ObjectType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "object", + "default": "object" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "ObjectType" + }, + "ParamType": { + "oneOf": [ + { + "$ref": "#/components/schemas/StringType" + }, + { + "$ref": "#/components/schemas/NumberType" + }, + { + "$ref": "#/components/schemas/BooleanType" + }, + { + "$ref": "#/components/schemas/ArrayType" + }, + { + "$ref": "#/components/schemas/ObjectType" + }, + { + "$ref": "#/components/schemas/JsonType" + }, + { + "$ref": "#/components/schemas/UnionType" + }, + { + "$ref": "#/components/schemas/ChatCompletionInputType" + }, + { + "$ref": "#/components/schemas/CompletionInputType" + }, + { + "$ref": "#/components/schemas/AgentTurnInputType" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "string": "#/components/schemas/StringType", + "number": "#/components/schemas/NumberType", + "boolean": "#/components/schemas/BooleanType", + "array": "#/components/schemas/ArrayType", + "object": "#/components/schemas/ObjectType", + "json": "#/components/schemas/JsonType", + "union": "#/components/schemas/UnionType", + "chat_completion_input": "#/components/schemas/ChatCompletionInputType", + "completion_input": "#/components/schemas/CompletionInputType", + "agent_turn_input": "#/components/schemas/AgentTurnInputType" + } + } }, "Model": { "type": "object", @@ -6913,17 +7090,39 @@ "provider_id", "type", "metadata", - "model_type" + "return_type" ], - "title": "Model" + "title": "ScoringFn" }, - "ModelType": { - "type": "string", - "enum": [ - "llm", - "embedding" + "StringType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "string", + "default": "string" + } + }, + "additionalProperties": false, + "required": [ + "type" ], - "title": "ModelType" + "title": "StringType" + }, + "UnionType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "union", + "default": "union" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "UnionType" }, "Shield": { "type": "object", @@ -8131,7 +8330,7 @@ }, "description": "The rows in the current page." }, - "next_index": { + "next_start_index": { "type": "integer", "description": "Index into dataset for the first row in the next page. None if there are no more rows." } @@ -9440,7 +9639,7 @@ }, "source": { "$ref": "#/components/schemas/DataSource", - "description": "The data source of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"data:csv;base64,{base64_content}\" } - { \"type\": \"uri\", \"uri\": \"huggingface://llamastack/simpleqa?split=train\" } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }" + "description": "The data source of the dataset. Ensure that the data source schema is compatible with the purpose of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"data:csv;base64,{base64_content}\" } - { \"type\": \"uri\", \"uri\": \"huggingface://llamastack/simpleqa?split=train\" } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }" }, "metadata": { "type": "object", @@ -9478,50 +9677,6 @@ "purpose", "source" ], - "title": "RegisterDatasetRequest" - }, - "RegisterGraderRequest": { - "type": "object", - "properties": { - "grader": { - "$ref": "#/components/schemas/GraderDefinition", - "description": "The grader definition, E.g. - { \"type\": \"llm\", \"llm\": { \"model\": \"llama-405b\", \"prompt\": \"You are a judge. Score the answer based on the question. {question} {answer}\", } }" - }, - "grader_id": { - "type": "string", - "description": "(Optional) The ID of the grader. If not provided, a random ID will be generated." - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "(Optional) Any additional metadata for this grader. - E.g. { \"description\": \"A grader that scores the answer based on the question.\", }" - } - }, - "additionalProperties": false, - "required": [ - "grader" - ], "title": "RegisterGraderRequest" }, "RegisterModelRequest": { @@ -10199,9 +10354,6 @@ { "name": "Files" }, - { - "name": "Graders" - }, { "name": "Inference", "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.", @@ -10254,9 +10406,8 @@ "Benchmarks", "DatasetIO", "Datasets", - "Evaluation", + "Eval", "Files", - "Graders", "Inference", "Inspect", "Models", diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 57bf76478..90b04b50a 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -1507,6 +1507,50 @@ paths: $ref: '#/components/schemas/InvokeToolRequest' required: true /v1/datasetio/iterrows/{dataset_id}: + get: + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/IterrowsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - DatasetIO + description: >- + Get a paginated list of rows from a dataset. Uses cursor-based pagination. + parameters: + - name: dataset_id + in: path + description: >- + The ID of the dataset to get the rows from. + required: true + schema: + type: string + - name: start_index + in: query + description: >- + Index into dataset for the first row to get. Get all rows if None. + required: false + schema: + type: integer + - name: limit + in: query + description: The number of rows to get. + required: false + schema: + type: integer + /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}: get: responses: '200': @@ -4527,255 +4571,6 @@ components: title: URIDataSource description: >- A dataset that can be obtained from a URI. - EqualityGrader: - type: object - properties: - type: - type: string - const: equality - default: equality - equality: - type: object - properties: - aggregation_functions: - type: array - items: - type: string - enum: - - average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - description: A type of aggregation function. - additionalProperties: false - required: - - aggregation_functions - title: BasicGraderParams - additionalProperties: false - required: - - type - - equality - title: EqualityGrader - FactualityGrader: - type: object - properties: - type: - type: string - const: factuality - default: factuality - factuality: - type: object - properties: - aggregation_functions: - type: array - items: - type: string - enum: - - average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - description: A type of aggregation function. - additionalProperties: false - required: - - aggregation_functions - title: BasicGraderParams - additionalProperties: false - required: - - type - - factuality - title: FactualityGrader - FaithfulnessGrader: - type: object - properties: - type: - type: string - const: faithfulness - default: faithfulness - faithfulness: - type: object - properties: - aggregation_functions: - type: array - items: - type: string - enum: - - average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - description: A type of aggregation function. - additionalProperties: false - required: - - aggregation_functions - title: BasicGraderParams - additionalProperties: false - required: - - type - - faithfulness - title: FaithfulnessGrader - Grader: - type: object - properties: - identifier: - type: string - provider_resource_id: - type: string - provider_id: - type: string - type: - type: string - const: grader - default: grader - grader: - $ref: '#/components/schemas/GraderDefinition' - description: - type: string - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - additionalProperties: false - required: - - identifier - - provider_resource_id - - provider_id - - type - - grader - - metadata - title: Grader - GraderDefinition: - oneOf: - - $ref: '#/components/schemas/LlmGrader' - - $ref: '#/components/schemas/RegexParserGrader' - - $ref: '#/components/schemas/EqualityGrader' - - $ref: '#/components/schemas/SubsetOfGrader' - - $ref: '#/components/schemas/FactualityGrader' - - $ref: '#/components/schemas/FaithfulnessGrader' - discriminator: - propertyName: type - mapping: - llm: '#/components/schemas/LlmGrader' - regex_parser: '#/components/schemas/RegexParserGrader' - equality: '#/components/schemas/EqualityGrader' - subset_of: '#/components/schemas/SubsetOfGrader' - factuality: '#/components/schemas/FactualityGrader' - faithfulness: '#/components/schemas/FaithfulnessGrader' - LlmGrader: - type: object - properties: - type: - type: string - const: llm - default: llm - llm: - type: object - properties: - model: - type: string - prompt: - type: string - score_regexes: - type: array - items: - type: string - aggregation_functions: - type: array - items: - type: string - enum: - - average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - description: A type of aggregation function. - additionalProperties: false - required: - - model - - prompt - - score_regexes - - aggregation_functions - title: LlmGraderParams - additionalProperties: false - required: - - type - - llm - title: LlmGrader - RegexParserGrader: - type: object - properties: - type: - type: string - const: regex_parser - default: regex_parser - regex_parser: - type: object - properties: - parsing_regexes: - type: array - items: - type: string - aggregation_functions: - type: array - items: - type: string - enum: - - average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - description: A type of aggregation function. - additionalProperties: false - required: - - parsing_regexes - - aggregation_functions - title: RegexParserGraderParams - additionalProperties: false - required: - - type - - regex_parser - title: RegexParserGrader - SubsetOfGrader: - type: object - properties: - type: - type: string - const: subset_of - default: subset_of - subset_of: - type: object - properties: - aggregation_functions: - type: array - items: - type: string - enum: - - average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - description: A type of aggregation function. - additionalProperties: false - required: - - aggregation_functions - title: BasicGraderParams - additionalProperties: false - required: - - type - - subset_of - title: SubsetOfGrader Model: type: object properties: @@ -4817,6 +4612,224 @@ components: - llm - embedding title: ModelType + AgentTurnInputType: + type: object + properties: + type: + type: string + const: agent_turn_input + default: agent_turn_input + additionalProperties: false + required: + - type + title: AgentTurnInputType + ArrayType: + type: object + properties: + type: + type: string + const: array + default: array + additionalProperties: false + required: + - type + title: ArrayType + BooleanType: + type: object + properties: + type: + type: string + const: boolean + default: boolean + additionalProperties: false + required: + - type + title: BooleanType + ChatCompletionInputType: + type: object + properties: + type: + type: string + const: chat_completion_input + default: chat_completion_input + additionalProperties: false + required: + - type + title: ChatCompletionInputType + CompletionInputType: + type: object + properties: + type: + type: string + const: completion_input + default: completion_input + additionalProperties: false + required: + - type + title: CompletionInputType + JsonType: + type: object + properties: + type: + type: string + const: rows + default: rows + rows: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user", + "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, + world!"}]} ] + additionalProperties: false + required: + - type + - rows + title: RowsDataSource + description: A dataset stored in rows. + URIDataSource: + type: object + properties: + type: + type: string + const: uri + default: uri + uri: + type: string + description: >- + The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl" + - "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}" + additionalProperties: false + required: + - type + - uri + title: URIDataSource + description: >- + A dataset that can be obtained from a URI. + EqualityGrader: + type: object + properties: + type: + type: string + const: equality + default: equality + equality: + type: object + properties: + aggregation_functions: + type: array + items: + type: string + enum: + - average + - median + - categorical_count + - accuracy + title: AggregationFunctionType + description: A type of aggregation function. + additionalProperties: false + required: + - aggregation_functions + title: BasicGraderParams + additionalProperties: false + required: + - type + title: ObjectType + ParamType: + oneOf: + - $ref: '#/components/schemas/StringType' + - $ref: '#/components/schemas/NumberType' + - $ref: '#/components/schemas/BooleanType' + - $ref: '#/components/schemas/ArrayType' + - $ref: '#/components/schemas/ObjectType' + - $ref: '#/components/schemas/JsonType' + - $ref: '#/components/schemas/UnionType' + - $ref: '#/components/schemas/ChatCompletionInputType' + - $ref: '#/components/schemas/CompletionInputType' + - $ref: '#/components/schemas/AgentTurnInputType' + discriminator: + propertyName: type + mapping: + string: '#/components/schemas/StringType' + number: '#/components/schemas/NumberType' + boolean: '#/components/schemas/BooleanType' + array: '#/components/schemas/ArrayType' + object: '#/components/schemas/ObjectType' + json: '#/components/schemas/JsonType' + union: '#/components/schemas/UnionType' + chat_completion_input: '#/components/schemas/ChatCompletionInputType' + completion_input: '#/components/schemas/CompletionInputType' + agent_turn_input: '#/components/schemas/AgentTurnInputType' + ScoringFn: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + const: scoring_function + default: scoring_function + description: + type: string + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + return_type: + $ref: '#/components/schemas/ParamType' + params: + $ref: '#/components/schemas/ScoringFnParams' + additionalProperties: false + required: + - identifier + - provider_resource_id + - provider_id + - type + - grader + - metadata + - return_type + title: ScoringFn + StringType: + type: object + properties: + type: + type: string + const: string + default: string + additionalProperties: false + required: + - type + title: StringType + UnionType: + type: object + properties: + type: + type: string + const: union + default: union + additionalProperties: false + required: + - type + title: UnionType Shield: type: object properties: @@ -5580,7 +5593,7 @@ components: - type: array - type: object description: The rows in the current page. - next_index: + next_start_index: type: integer description: >- Index into dataset for the first row in the next page. None if there are @@ -6461,12 +6474,14 @@ components: source: $ref: '#/components/schemas/DataSource' description: >- - The data source of the dataset. Examples: - { "type": "uri", "uri": "https://mywebsite.com/mydata.jsonl" - } - { "type": "uri", "uri": "lsfs://mydata.jsonl" } - { "type": "uri", - "uri": "data:csv;base64,{base64_content}" } - { "type": "uri", "uri": - "huggingface://llamastack/simpleqa?split=train" } - { "type": "rows", - "rows": [ { "messages": [ {"role": "user", "content": "Hello, world!"}, - {"role": "assistant", "content": "Hello, world!"}, ] } ] } + The data source of the dataset. Ensure that the data source schema is + compatible with the purpose of the dataset. Examples: - { "type": "uri", + "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri": + "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}" + } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train" + } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content": + "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ] + } ] } metadata: type: object additionalProperties: @@ -6488,37 +6503,6 @@ components: - purpose - source title: RegisterDatasetRequest - RegisterGraderRequest: - type: object - properties: - grader: - $ref: '#/components/schemas/GraderDefinition' - description: >- - The grader definition, E.g. - { "type": "llm", "llm": { "model": "llama-405b", - "prompt": "You are a judge. Score the answer based on the question. {question} - {answer}", } } - grader_id: - type: string - description: >- - (Optional) The ID of the grader. If not provided, a random ID will be - generated. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Any additional metadata for this grader. - E.g. { "description": - "A grader that scores the answer based on the question.", } - additionalProperties: false - required: - - grader - title: RegisterGraderRequest RegisterModelRequest: type: object properties: @@ -6951,9 +6935,10 @@ tags: - name: Benchmarks - name: DatasetIO - name: Datasets - - name: Evaluation + - name: Eval + x-displayName: >- + Llama Stack Evaluation API for running evaluations on model and agent candidates. - name: Files - - name: Graders - name: Inference description: >- This API provides the raw interface to the underlying models. Two kinds of models @@ -6988,9 +6973,8 @@ x-tagGroups: - Benchmarks - DatasetIO - Datasets - - Evaluation + - Eval - Files - - Graders - Inference - Inspect - Models diff --git a/docs/source/contributing/new_api_provider.md b/docs/source/contributing/new_api_provider.md index a72f71319..c412a350b 100644 --- a/docs/source/contributing/new_api_provider.md +++ b/docs/source/contributing/new_api_provider.md @@ -6,7 +6,7 @@ This guide will walk you through the process of adding a new API provider to Lla - Begin by reviewing the [core concepts](../concepts/index.md) of Llama Stack and choose the API your provider belongs to (Inference, Safety, VectorIO, etc.) - Determine the provider type ({repopath}`Remote::llama_stack/providers/remote` or {repopath}`Inline::llama_stack/providers/inline`). Remote providers make requests to external services, while inline providers execute implementation locally. - Add your provider to the appropriate {repopath}`Registry::llama_stack/providers/registry/`. Specify pip dependencies necessary. -- Update any distribution {repopath}`Templates::llama_stack/templates/` build.yaml and run.yaml files if they should include your provider by default. Run {repopath}`llama_stack/scripts/distro_codegen.py` if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation. +- Update any distribution {repopath}`Templates::llama_stack/templates/` build.yaml and run.yaml files if they should include your provider by default. Run {repopath}`./scripts/distro_codegen.py` if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation. Here are some example PRs to help you get started: diff --git a/docs/source/distributions/remote_hosted_distro/nvidia.md b/docs/source/distributions/remote_hosted_distro/nvidia.md index efa0a2d74..774d5ec1b 100644 --- a/docs/source/distributions/remote_hosted_distro/nvidia.md +++ b/docs/source/distributions/remote_hosted_distro/nvidia.md @@ -6,13 +6,13 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov | API | Provider(s) | |-----|-------------| | agents | `inline::meta-reference` | -| datasetio | `remote::huggingface`, `inline::localfs` | +| datasetio | `inline::localfs` | | eval | `inline::meta-reference` | | inference | `remote::nvidia` | -| safety | `inline::llama-guard` | -| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | +| safety | `remote::nvidia` | +| scoring | `inline::basic` | | telemetry | `inline::meta-reference` | -| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` | +| tool_runtime | `inline::rag-runtime` | | vector_io | `inline::faiss` | @@ -20,8 +20,10 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov The following environment variables can be configured: -- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) - `NVIDIA_API_KEY`: NVIDIA API Key (default: ``) +- `GUARDRAILS_SERVICE_URL`: URL for the NeMo Guardrails Service (default: `http://0.0.0.0:7331`) +- `INFERENCE_MODEL`: Inference model (default: `Llama3.1-8B-Instruct`) +- `SAFETY_MODEL`: Name of the model to use for safety (default: `meta/llama-3.1-8b-instruct`) ### Models diff --git a/docs/source/references/llama_stack_client_cli_reference.md b/docs/source/references/llama_stack_client_cli_reference.md index 26b81cf92..0b84027f0 100644 --- a/docs/source/references/llama_stack_client_cli_reference.md +++ b/docs/source/references/llama_stack_client_cli_reference.md @@ -6,17 +6,32 @@ The `llama-stack-client` CLI allows you to query information about the distribut ### `llama-stack-client` ```bash -llama-stack-client -h +llama-stack-client +Usage: llama-stack-client [OPTIONS] COMMAND [ARGS]... -usage: llama-stack-client [-h] {models,memory_banks,shields} ... + Welcome to the LlamaStackClient CLI -Welcome to the LlamaStackClient CLI +Options: + --version Show the version and exit. + --endpoint TEXT Llama Stack distribution endpoint + --api-key TEXT Llama Stack distribution API key + --config TEXT Path to config file + --help Show this message and exit. -options: - -h, --help show this help message and exit - -subcommands: - {models,memory_banks,shields} +Commands: + configure Configure Llama Stack Client CLI. + datasets Manage datasets. + eval Run evaluation tasks. + eval_tasks Manage evaluation tasks. + inference Inference (chat). + inspect Inspect server configuration. + models Manage GenAI models. + post_training Post-training. + providers Manage API providers. + scoring_functions Manage scoring functions. + shields Manage safety shield services. + toolgroups Manage available tool groups. + vector_dbs Manage vector databases. ``` ### `llama-stack-client configure` @@ -127,11 +142,11 @@ llama-stack-client vector_dbs list llama-stack-client vector_dbs register [--provider-id ] [--provider-vector-db-id ] [--embedding-model ] [--embedding-dimension ] ``` -Options: -- `--provider-id`: Optional. Provider ID for the vector db -- `--provider-vector-db-id`: Optional. Provider's vector db ID -- `--embedding-model`: Optional. Embedding model to use. Default: "all-MiniLM-L6-v2" -- `--embedding-dimension`: Optional. Dimension of embeddings. Default: 384 +Optional arguments: +- `--provider-id`: Provider ID for the vector db +- `--provider-vector-db-id`: Provider's vector db ID +- `--embedding-model`: Embedding model to use. Default: "all-MiniLM-L6-v2" +- `--embedding-dimension`: Dimension of embeddings. Default: 384 ### `llama-stack-client vector_dbs unregister` ```bash @@ -157,11 +172,13 @@ llama-stack-client shields list llama-stack-client shields register --shield-id [--provider-id ] [--provider-shield-id ] [--params ] ``` -Options: -- `--shield-id`: Required. ID of the shield -- `--provider-id`: Optional. Provider ID for the shield -- `--provider-shield-id`: Optional. Provider's shield ID -- `--params`: Optional. JSON configuration parameters for the shield +Required arguments: +- `--shield-id`: ID of the shield + +Optional arguments: +- `--provider-id`: Provider ID for the shield +- `--provider-shield-id`: Provider's shield ID +- `--params`: JSON configuration parameters for the shield ## Eval Task Management @@ -175,13 +192,15 @@ llama-stack-client benchmarks list llama-stack-client benchmarks register --eval-task-id --dataset-id --scoring-functions [ ...] [--provider-id ] [--provider-eval-task-id ] [--metadata ] ``` -Options: -- `--eval-task-id`: Required. ID of the eval task -- `--dataset-id`: Required. ID of the dataset to evaluate -- `--scoring-functions`: Required. One or more scoring functions to use for evaluation -- `--provider-id`: Optional. Provider ID for the eval task -- `--provider-eval-task-id`: Optional. Provider's eval task ID -- `--metadata`: Optional. Metadata for the eval task in JSON format +Required arguments: +- `--eval-task-id`: ID of the eval task +- `--dataset-id`: ID of the dataset to evaluate +- `--scoring-functions`: One or more scoring functions to use for evaluation + +Optional arguments: +- `--provider-id`: Provider ID for the eval task +- `--provider-eval-task-id`: Provider's eval task ID +- `--metadata`: Metadata for the eval task in JSON format ## Eval execution ### `llama-stack-client eval run-benchmark` @@ -189,11 +208,13 @@ Options: llama-stack-client eval run-benchmark [ ...] --eval-task-config --output-dir [--num-examples ] [--visualize] ``` -Options: -- `--eval-task-config`: Required. Path to the eval task config file in JSON format -- `--output-dir`: Required. Path to the directory where evaluation results will be saved -- `--num-examples`: Optional. Number of examples to evaluate (useful for debugging) -- `--visualize`: Optional flag. If set, visualizes evaluation results after completion +Required arguments: +- `--eval-task-config`: Path to the eval task config file in JSON format +- `--output-dir`: Path to the directory where evaluation results will be saved + +Optional arguments: +- `--num-examples`: Number of examples to evaluate (useful for debugging) +- `--visualize`: If set, visualizes evaluation results after completion Example benchmark_config.json: ```json @@ -214,11 +235,13 @@ Example benchmark_config.json: llama-stack-client eval run-scoring --eval-task-config --output-dir [--num-examples ] [--visualize] ``` -Options: -- `--eval-task-config`: Required. Path to the eval task config file in JSON format -- `--output-dir`: Required. Path to the directory where scoring results will be saved -- `--num-examples`: Optional. Number of examples to evaluate (useful for debugging) -- `--visualize`: Optional flag. If set, visualizes scoring results after completion +Required arguments: +- `--eval-task-config`: Path to the eval task config file in JSON format +- `--output-dir`: Path to the directory where scoring results will be saved + +Optional arguments: +- `--num-examples`: Number of examples to evaluate (useful for debugging) +- `--visualize`: If set, visualizes scoring results after completion ## Tool Group Management @@ -230,11 +253,11 @@ llama-stack-client toolgroups list +---------------------------+------------------+------+---------------+ | identifier | provider_id | args | mcp_endpoint | +===========================+==================+======+===============+ -| builtin::code_interpreter | code-interpreter | None | None | +| builtin::code_interpreter | code-interpreter | None | None | +---------------------------+------------------+------+---------------+ -| builtin::rag | rag-runtime | None | None | +| builtin::rag | rag-runtime | None | None | +---------------------------+------------------+------+---------------+ -| builtin::websearch | tavily-search | None | None | +| builtin::websearch | tavily-search | None | None | +---------------------------+------------------+------+---------------+ ``` @@ -250,11 +273,11 @@ Shows detailed information about a specific toolgroup. If the toolgroup is not f llama-stack-client toolgroups register [--provider-id ] [--provider-toolgroup-id ] [--mcp-config ] [--args ] ``` -Options: -- `--provider-id`: Optional. Provider ID for the toolgroup -- `--provider-toolgroup-id`: Optional. Provider's toolgroup ID -- `--mcp-config`: Optional. JSON configuration for the MCP endpoint -- `--args`: Optional. JSON arguments for the toolgroup +Optional arguments: +- `--provider-id`: Provider ID for the toolgroup +- `--provider-toolgroup-id`: Provider's toolgroup ID +- `--mcp-config`: JSON configuration for the MCP endpoint +- `--args`: JSON arguments for the toolgroup ### `llama-stack-client toolgroups unregister` ```bash diff --git a/llama_stack/apis/datasetio/datasetio.py b/llama_stack/apis/datasetio/datasetio.py index 6079e5b99..b1eaffa17 100644 --- a/llama_stack/apis/datasetio/datasetio.py +++ b/llama_stack/apis/datasetio/datasetio.py @@ -18,11 +18,11 @@ class IterrowsResponse(BaseModel): A paginated list of rows from a dataset. :param data: The rows in the current page. - :param next_index: Index into dataset for the first row in the next page. None if there are no more rows. + :param next_start_index: Index into dataset for the first row in the next page. None if there are no more rows. """ data: List[Dict[str, Any]] - next_index: Optional[int] = None + next_start_index: Optional[int] = None class DatasetStore(Protocol): @@ -46,9 +46,11 @@ class DatasetIO(Protocol): :param dataset_id: The ID of the dataset to get the rows from. :param start_index: Index into dataset for the first row to get. Get all rows if None. - :param limit: The number of rows to get per page. + :param limit: The number of rows to get. """ ... @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST") - async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: ... + async def append_rows( + self, dataset_id: str, rows: List[Dict[str, Any]] + ) -> None: ... diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py index 1dd29ad8e..d033d0b70 100644 --- a/llama_stack/apis/datasets/datasets.py +++ b/llama_stack/apis/datasets/datasets.py @@ -163,7 +163,7 @@ class Datasets(Protocol): ], "answer": "John Doe" } - :param source: The data source of the dataset. Examples: + :param source: The data source of the dataset. Ensure that the data source schema is compatible with the purpose of the dataset. Examples: - { "type": "uri", "uri": "https://mywebsite.com/mydata.jsonl" diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py index 3887bf4f9..d87e3bd0b 100644 --- a/llama_stack/cli/stack/_build.py +++ b/llama_stack/cli/stack/_build.py @@ -38,7 +38,7 @@ from llama_stack.distribution.distribution import get_provider_registry from llama_stack.distribution.resolver import InvalidProviderError from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR from llama_stack.distribution.utils.dynamic import instantiate_class_type -from llama_stack.distribution.utils.exec import formulate_run_args, run_with_pty +from llama_stack.distribution.utils.exec import formulate_run_args, run_command from llama_stack.distribution.utils.image_types import LlamaStackImageType from llama_stack.providers.datatypes import Api @@ -213,7 +213,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None: config = parse_and_maybe_upgrade_config(config_dict) run_args = formulate_run_args(args.image_type, args.image_name, config, args.template) run_args.extend([run_config, str(os.getenv("LLAMA_STACK_PORT", 8321))]) - run_with_pty(run_args) + run_command(run_args) def _generate_run_config( diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py index e5686fb10..57a0b28cc 100644 --- a/llama_stack/cli/stack/run.py +++ b/llama_stack/cli/stack/run.py @@ -82,7 +82,7 @@ class StackRun(Subcommand): from llama_stack.distribution.configure import parse_and_maybe_upgrade_config from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR - from llama_stack.distribution.utils.exec import formulate_run_args, run_with_pty + from llama_stack.distribution.utils.exec import formulate_run_args, run_command config_file = Path(args.config) has_yaml_suffix = args.config.endswith(".yaml") @@ -136,4 +136,4 @@ class StackRun(Subcommand): if args.tls_keyfile and args.tls_certfile: run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile]) - run_with_pty(run_args) + run_command(run_args) diff --git a/llama_stack/distribution/build.py b/llama_stack/distribution/build.py index 0e990d129..a8ee372da 100644 --- a/llama_stack/distribution/build.py +++ b/llama_stack/distribution/build.py @@ -6,7 +6,6 @@ import importlib.resources import logging -import sys from pathlib import Path from typing import Dict, List @@ -15,7 +14,7 @@ from termcolor import cprint from llama_stack.distribution.datatypes import BuildConfig, Provider from llama_stack.distribution.distribution import get_provider_registry -from llama_stack.distribution.utils.exec import run_command, run_with_pty +from llama_stack.distribution.utils.exec import run_command from llama_stack.distribution.utils.image_types import LlamaStackImageType from llama_stack.providers.datatypes import Api @@ -123,11 +122,7 @@ def build_image( if special_deps: args.append("#".join(special_deps)) - is_terminal = sys.stdin.isatty() - if is_terminal: - return_code = run_with_pty(args) - else: - return_code = run_command(args) + return_code = run_command(args) if return_code != 0: log.error( diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh index 077b396cb..a8346c3b6 100755 --- a/llama_stack/distribution/build_container.sh +++ b/llama_stack/distribution/build_container.sh @@ -43,7 +43,7 @@ RED='\033[0;31m' NC='\033[0m' # No Color CONTAINER_BINARY=${CONTAINER_BINARY:-docker} -CONTAINER_OPTS=${CONTAINER_OPTS:-} +CONTAINER_OPTS=${CONTAINER_OPTS:---progress=plain} TEMP_DIR=$(mktemp -d) @@ -253,8 +253,7 @@ $CONTAINER_BINARY build \ "${CLI_ARGS[@]}" \ -t "$image_tag" \ -f "$TEMP_DIR/Containerfile" \ - "." \ - --progress=plain + "." # clean up tmp/configs set +x diff --git a/llama_stack/distribution/providers.py b/llama_stack/distribution/providers.py index fb2476767..cf9b0b975 100644 --- a/llama_stack/distribution/providers.py +++ b/llama_stack/distribution/providers.py @@ -8,10 +8,13 @@ from pydantic import BaseModel from llama_stack.apis.providers import ListProvidersResponse, ProviderInfo, Providers +from llama_stack.log import get_logger from .datatypes import StackRunConfig from .stack import redact_sensitive_fields +logger = get_logger(name=__name__, category="core") + class ProviderImplConfig(BaseModel): run_config: StackRunConfig @@ -31,6 +34,10 @@ class ProviderImpl(Providers): async def initialize(self) -> None: pass + async def shutdown(self) -> None: + logger.debug("ProviderImpl.shutdown") + pass + async def list_providers(self) -> ListProvidersResponse: run_config = self.config.run_config safe_config = StackRunConfig(**redact_sensitive_fields(run_config.model_dump())) diff --git a/llama_stack/distribution/utils/exec.py b/llama_stack/distribution/utils/exec.py index 86613dc9c..3bf3c81ce 100644 --- a/llama_stack/distribution/utils/exec.py +++ b/llama_stack/distribution/utils/exec.py @@ -4,13 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import errno import logging import os -import select import signal import subprocess -import sys from termcolor import cprint @@ -88,13 +85,6 @@ def formulate_run_args(image_type, image_name, config, template_name) -> list: return run_args -def run_with_pty(command): - if sys.platform.startswith("win"): - return _run_with_pty_win(command) - else: - return _run_with_pty_unix(command) - - def in_notebook(): try: from IPython import get_ipython @@ -108,19 +98,19 @@ def in_notebook(): return True -# run a command in a pseudo-terminal, with interrupt handling, -# useful when you want to run interactive things -def _run_with_pty_unix(command): - import pty - import termios +def run_command(command: list[str]) -> int: + """ + Run a command with interrupt handling and output capture. + Uses subprocess.run with direct stream piping for better performance. - master, slave = pty.openpty() + Args: + command (list): The command to run. - old_settings = termios.tcgetattr(sys.stdin) + Returns: + int: The return code of the command. + """ original_sigint = signal.getsignal(signal.SIGINT) - ctrl_c_pressed = False - process = None def sigint_handler(signum, frame): nonlocal ctrl_c_pressed @@ -131,106 +121,19 @@ def _run_with_pty_unix(command): # Set up the signal handler signal.signal(signal.SIGINT, sigint_handler) - new_settings = termios.tcgetattr(sys.stdin) - new_settings[3] = new_settings[3] & ~termios.ECHO # Disable echo - new_settings[3] = new_settings[3] & ~termios.ICANON # Disable canonical mode - termios.tcsetattr(sys.stdin, termios.TCSADRAIN, new_settings) - - process = subprocess.Popen( + # Run the command with stdout/stderr piped directly to system streams + result = subprocess.run( command, - stdin=slave, - stdout=slave, - stderr=slave, - universal_newlines=True, - preexec_fn=os.setsid, + text=True, + check=False, ) - - # Close the slave file descriptor as it's now owned by the subprocess - os.close(slave) - - def handle_io(): - while not ctrl_c_pressed: - try: - rlist, _, _ = select.select([sys.stdin, master], [], [], 0.1) - - if sys.stdin in rlist: - data = os.read(sys.stdin.fileno(), 1024) - if not data: - break - os.write(master, data) - - if master in rlist: - data = os.read(master, 1024) - if not data: - break - sys.stdout.buffer.write(data) - sys.stdout.flush() - - except KeyboardInterrupt: - # This will be raised when Ctrl+C is pressed - break - - if process.poll() is not None: - break - - handle_io() - except (EOFError, KeyboardInterrupt): - pass - except OSError as e: - if e.errno != errno.EIO: - raise - finally: - # Clean up - termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings) - signal.signal(signal.SIGINT, original_sigint) - - os.close(master) - if process and process.poll() is None: - process.terminate() - process.wait() - - return process.returncode - - -# run a command in a pseudo-terminal in windows, with interrupt handling, -def _run_with_pty_win(command): - """ - Runs a command with interactive support using subprocess directly. - """ - try: - # For shell scripts on Windows, use appropriate shell - if isinstance(command, (list, tuple)): - if command[0].endswith(".sh"): - if os.path.exists("/usr/bin/bash"): # WSL - command = ["bash"] + command - else: - # Use cmd.exe with bash while preserving all arguments - command = ["cmd.exe", "/c", "bash"] + command - - process = subprocess.Popen( - command, - shell=True, - universal_newlines=True, - ) - - process.wait() - + return result.returncode + except subprocess.SubprocessError as e: + log.error(f"Subprocess error: {e}") + return 1 except Exception as e: - print(f"Error: {str(e)}") + log.exception(f"Unexpected error: {e}") return 1 finally: - if process and process.poll() is None: - process.terminate() - process.wait() - return process.returncode - - -def run_command(command): - try: - result = subprocess.run(command, capture_output=True, text=True, check=True) - print("Script Output\n", result.stdout) - return result.returncode - except subprocess.CalledProcessError as e: - print("Error running script:", e) - print("Error output:", e.stderr) - return e.returncode + # Restore the original signal handler + signal.signal(signal.SIGINT, original_sigint) diff --git a/llama_stack/providers/inline/datasetio/localfs/datasetio.py b/llama_stack/providers/inline/datasetio/localfs/datasetio.py index 3b0d01edd..958c7d387 100644 --- a/llama_stack/providers/inline/datasetio/localfs/datasetio.py +++ b/llama_stack/providers/inline/datasetio/localfs/datasetio.py @@ -44,7 +44,9 @@ class PandasDataframeDataset: elif self.dataset_def.source.type == "rows": self.df = pandas.DataFrame(self.dataset_def.source.rows) else: - raise ValueError(f"Unsupported dataset source type: {self.dataset_def.source.type}") + raise ValueError( + f"Unsupported dataset source type: {self.dataset_def.source.type}" + ) if self.df is None: raise ValueError(f"Failed to load dataset from {self.dataset_def.url}") @@ -108,7 +110,7 @@ class LocalFSDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate): return IterrowsResponse( data=rows, - next_index=end if end < len(dataset_impl) else None, + next_start_index=end if end < len(dataset_impl) else None, ) async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: @@ -117,4 +119,6 @@ class LocalFSDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate): dataset_impl.load() new_rows_df = pandas.DataFrame(rows) - dataset_impl.df = pandas.concat([dataset_impl.df, new_rows_df], ignore_index=True) + dataset_impl.df = pandas.concat( + [dataset_impl.df, new_rows_df], ignore_index=True + ) diff --git a/llama_stack/providers/registry/safety.py b/llama_stack/providers/registry/safety.py index 1364352e6..32c0b4e98 100644 --- a/llama_stack/providers/registry/safety.py +++ b/llama_stack/providers/registry/safety.py @@ -55,4 +55,13 @@ def available_providers() -> List[ProviderSpec]: config_class="llama_stack.providers.remote.safety.bedrock.BedrockSafetyConfig", ), ), + remote_provider_spec( + api=Api.safety, + adapter=AdapterSpec( + adapter_type="nvidia", + pip_packages=["requests"], + module="llama_stack.providers.remote.safety.nvidia", + config_class="llama_stack.providers.remote.safety.nvidia.NVIDIASafetyConfig", + ), + ), ] diff --git a/llama_stack/providers/remote/datasetio/huggingface/huggingface.py b/llama_stack/providers/remote/datasetio/huggingface/huggingface.py index 41ce747f7..db6edbce3 100644 --- a/llama_stack/providers/remote/datasetio/huggingface/huggingface.py +++ b/llama_stack/providers/remote/datasetio/huggingface/huggingface.py @@ -86,7 +86,7 @@ class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate): return IterrowsResponse( data=rows, - next_index=end if end < len(loaded_dataset) else None, + next_start_index=end if end < len(loaded_dataset) else None, ) async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: @@ -98,9 +98,13 @@ class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate): new_dataset = hf_datasets.Dataset.from_list(rows) # Concatenate the new rows with existing dataset - updated_dataset = hf_datasets.concatenate_datasets([loaded_dataset, new_dataset]) + updated_dataset = hf_datasets.concatenate_datasets( + [loaded_dataset, new_dataset] + ) if dataset_def.metadata.get("path", None): updated_dataset.push_to_hub(dataset_def.metadata["path"]) else: - raise NotImplementedError("Uploading to URL-based datasets is not supported yet") + raise NotImplementedError( + "Uploading to URL-based datasets is not supported yet" + ) diff --git a/llama_stack/providers/remote/inference/passthrough/passthrough.py b/llama_stack/providers/remote/inference/passthrough/passthrough.py index 8f3a0d147..96b2d73d8 100644 --- a/llama_stack/providers/remote/inference/passthrough/passthrough.py +++ b/llama_stack/providers/remote/inference/passthrough/passthrough.py @@ -12,6 +12,7 @@ from llama_stack.apis.common.content_types import InterleavedContent from llama_stack.apis.inference import ( ChatCompletionResponse, ChatCompletionResponseStreamChunk, + CompletionMessage, EmbeddingsResponse, EmbeddingTaskType, Inference, @@ -160,12 +161,14 @@ class PassthroughInferenceAdapter(Inference): client = self._get_client() response = await client.inference.chat_completion(**json_params) - response = response.to_dict() - - # temporary hack to remove the metrics from the response - response["metrics"] = [] - - return convert_to_pydantic(ChatCompletionResponse, response) + return ChatCompletionResponse( + completion_message=CompletionMessage( + content=response.completion_message.content.text, + stop_reason=response.completion_message.stop_reason, + tool_calls=response.completion_message.tool_calls, + ), + logprobs=response.logprobs, + ) async def _stream_chat_completion(self, json_params: Dict[str, Any]) -> AsyncGenerator: client = self._get_client() diff --git a/llama_stack/providers/remote/safety/nvidia/__init__.py b/llama_stack/providers/remote/safety/nvidia/__init__.py new file mode 100644 index 000000000..4677268c6 --- /dev/null +++ b/llama_stack/providers/remote/safety/nvidia/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from typing import Any + +from .config import NVIDIASafetyConfig + + +async def get_adapter_impl(config: NVIDIASafetyConfig, _deps) -> Any: + from .nvidia import NVIDIASafetyAdapter + + impl = NVIDIASafetyAdapter(config) + await impl.initialize() + return impl diff --git a/llama_stack/providers/remote/safety/nvidia/config.py b/llama_stack/providers/remote/safety/nvidia/config.py new file mode 100644 index 000000000..3df80ed4f --- /dev/null +++ b/llama_stack/providers/remote/safety/nvidia/config.py @@ -0,0 +1,37 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. +import os +from typing import Any, Dict, Optional + +from pydantic import BaseModel, Field + +from llama_stack.schema_utils import json_schema_type + + +@json_schema_type +class NVIDIASafetyConfig(BaseModel): + """ + Configuration for the NVIDIA Guardrail microservice endpoint. + + Attributes: + guardrails_service_url (str): A base url for accessing the NVIDIA guardrail endpoint, e.g. http://0.0.0.0:7331 + config_id (str): The ID of the guardrails configuration to use from the configuration store + (https://developer.nvidia.com/docs/nemo-microservices/guardrails/source/guides/configuration-store-guide.html) + + """ + + guardrails_service_url: str = Field( + default_factory=lambda: os.getenv("GUARDRAILS_SERVICE_URL", "http://0.0.0.0:7331"), + description="The url for accessing the guardrails service", + ) + config_id: Optional[str] = Field(default="self-check", description="Config ID to use from the config store") + + @classmethod + def sample_run_config(cls, **kwargs) -> Dict[str, Any]: + return { + "guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}", + "config_id": "self-check", + } diff --git a/llama_stack/providers/remote/safety/nvidia/nvidia.py b/llama_stack/providers/remote/safety/nvidia/nvidia.py new file mode 100644 index 000000000..6da2a8344 --- /dev/null +++ b/llama_stack/providers/remote/safety/nvidia/nvidia.py @@ -0,0 +1,154 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import logging +from typing import Any, List, Optional + +import requests + +from llama_stack.apis.inference import Message +from llama_stack.apis.safety import RunShieldResponse, Safety, SafetyViolation, ViolationLevel +from llama_stack.apis.shields import Shield +from llama_stack.distribution.library_client import convert_pydantic_to_json_value +from llama_stack.providers.datatypes import ShieldsProtocolPrivate + +from .config import NVIDIASafetyConfig + +logger = logging.getLogger(__name__) + + +class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate): + def __init__(self, config: NVIDIASafetyConfig) -> None: + """ + Initialize the NVIDIASafetyAdapter with a given safety configuration. + + Args: + config (NVIDIASafetyConfig): The configuration containing the guardrails service URL and config ID. + """ + print(f"Initializing NVIDIASafetyAdapter({config.guardrails_service_url})...") + self.config = config + + async def initialize(self) -> None: + pass + + async def shutdown(self) -> None: + pass + + async def register_shield(self, shield: Shield) -> None: + if not shield.provider_resource_id: + raise ValueError("Shield model not provided.") + + async def run_shield( + self, shield_id: str, messages: List[Message], params: Optional[dict[str, Any]] = None + ) -> RunShieldResponse: + """ + Run a safety shield check against the provided messages. + + Args: + shield_id (str): The unique identifier for the shield to be used. + messages (List[Message]): A list of Message objects representing the conversation history. + params (Optional[dict[str, Any]]): Additional parameters for the shield check. + + Returns: + RunShieldResponse: The response containing safety violation details if any. + + Raises: + ValueError: If the shield with the provided shield_id is not found. + """ + shield = await self.shield_store.get_shield(shield_id) + if not shield: + raise ValueError(f"Shield {shield_id} not found") + + self.shield = NeMoGuardrails(self.config, shield.shield_id) + return await self.shield.run(messages) + + +class NeMoGuardrails: + """ + A class that encapsulates NVIDIA's guardrails safety logic. + + Sends messages to the guardrails service and interprets the response to determine + if a safety violation has occurred. + """ + + def __init__( + self, + config: NVIDIASafetyConfig, + model: str, + threshold: float = 0.9, + temperature: float = 1.0, + ): + """ + Initialize a NeMoGuardrails instance with the provided parameters. + + Args: + config (NVIDIASafetyConfig): The safety configuration containing the config ID and guardrails URL. + model (str): The identifier or name of the model to be used for safety checks. + threshold (float, optional): The threshold for flagging violations. Defaults to 0.9. + temperature (float, optional): The temperature setting for the underlying model. Must be greater than 0. Defaults to 1.0. + + Raises: + ValueError: If temperature is less than or equal to 0. + AssertionError: If config_id is not provided in the configuration. + """ + self.config_id = config.config_id + self.model = model + assert self.config_id is not None, "Must provide config id" + if temperature <= 0: + raise ValueError("Temperature must be greater than 0") + + self.temperature = temperature + self.threshold = threshold + self.guardrails_service_url = config.guardrails_service_url + + async def run(self, messages: List[Message]) -> RunShieldResponse: + """ + Queries the /v1/guardrails/checks endpoint of the NeMo guardrails deployed API. + + Args: + messages (List[Message]): A list of Message objects to be checked for safety violations. + + Returns: + RunShieldResponse: If the response indicates a violation ("blocked" status), returns a + RunShieldResponse with a SafetyViolation; otherwise, returns a RunShieldResponse with violation set to None. + + Raises: + requests.HTTPError: If the POST request fails. + """ + headers = { + "Accept": "application/json", + } + request_data = { + "model": self.model, + "messages": convert_pydantic_to_json_value(messages), + "temperature": self.temperature, + "top_p": 1, + "frequency_penalty": 0, + "presence_penalty": 0, + "max_tokens": 160, + "stream": False, + "guardrails": { + "config_id": self.config_id, + }, + } + response = requests.post( + url=f"{self.guardrails_service_url}/v1/guardrail/checks", headers=headers, json=request_data + ) + response.raise_for_status() + if "Content-Type" in response.headers and response.headers["Content-Type"].startswith("application/json"): + response_json = response.json() + if response_json["status"] == "blocked": + user_message = "Sorry I cannot do this." + metadata = response_json["rails_status"] + + return RunShieldResponse( + violation=SafetyViolation( + user_message=user_message, + violation_level=ViolationLevel.ERROR, + metadata=metadata, + ) + ) + return RunShieldResponse(violation=None) diff --git a/llama_stack/scripts/__init__.py b/llama_stack/scripts/__init__.py deleted file mode 100644 index 756f351d8..000000000 --- a/llama_stack/scripts/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. diff --git a/llama_stack/scripts/run_tests.sh b/llama_stack/scripts/run_tests.sh deleted file mode 100644 index 49229d1b1..000000000 --- a/llama_stack/scripts/run_tests.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -THIS_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" - -set -euo pipefail -set -x - -stack_dir=$(dirname $(dirname $THIS_DIR)) -PYTHONPATH=$stack_dir pytest -p no:warnings --asyncio-mode auto --tb=short diff --git a/llama_stack/templates/nvidia/build.yaml b/llama_stack/templates/nvidia/build.yaml index e9748721a..0c788ce86 100644 --- a/llama_stack/templates/nvidia/build.yaml +++ b/llama_stack/templates/nvidia/build.yaml @@ -1,13 +1,13 @@ version: '2' distribution_spec: - description: Use NVIDIA NIM for running LLM inference + description: Use NVIDIA NIM for running LLM inference and safety providers: inference: - remote::nvidia vector_io: - inline::faiss safety: - - inline::llama-guard + - remote::nvidia agents: - inline::meta-reference telemetry: @@ -15,16 +15,9 @@ distribution_spec: eval: - inline::meta-reference datasetio: - - remote::huggingface - inline::localfs scoring: - inline::basic - - inline::llm-as-judge - - inline::braintrust tool_runtime: - - remote::brave-search - - remote::tavily-search - - inline::code-interpreter - inline::rag-runtime - - remote::model-context-protocol image_type: conda diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py index cc5e96333..308c0e2a6 100644 --- a/llama_stack/templates/nvidia/nvidia.py +++ b/llama_stack/templates/nvidia/nvidia.py @@ -6,9 +6,10 @@ from pathlib import Path -from llama_stack.distribution.datatypes import Provider, ToolGroupInput +from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput, ToolGroupInput from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES +from llama_stack.providers.remote.safety.nvidia import NVIDIASafetyConfig from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry @@ -16,19 +17,13 @@ def get_distribution_template() -> DistributionTemplate: providers = { "inference": ["remote::nvidia"], "vector_io": ["inline::faiss"], - "safety": ["inline::llama-guard"], + "safety": ["remote::nvidia"], "agents": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"], "eval": ["inline::meta-reference"], - "datasetio": ["remote::huggingface", "inline::localfs"], - "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], - "tool_runtime": [ - "remote::brave-search", - "remote::tavily-search", - "inline::code-interpreter", - "inline::rag-runtime", - "remote::model-context-protocol", - ], + "datasetio": ["inline::localfs"], + "scoring": ["inline::basic"], + "tool_runtime": ["inline::rag-runtime"], } inference_provider = Provider( @@ -36,30 +31,35 @@ def get_distribution_template() -> DistributionTemplate: provider_type="remote::nvidia", config=NVIDIAConfig.sample_run_config(), ) + safety_provider = Provider( + provider_id="nvidia", + provider_type="remote::nvidia", + config=NVIDIASafetyConfig.sample_run_config(), + ) + inference_model = ModelInput( + model_id="${env.INFERENCE_MODEL}", + provider_id="nvidia", + ) + safety_model = ModelInput( + model_id="${env.SAFETY_MODEL}", + provider_id="nvidia", + ) available_models = { "nvidia": MODEL_ENTRIES, } default_tool_groups = [ - ToolGroupInput( - toolgroup_id="builtin::websearch", - provider_id="tavily-search", - ), ToolGroupInput( toolgroup_id="builtin::rag", provider_id="rag-runtime", ), - ToolGroupInput( - toolgroup_id="builtin::code_interpreter", - provider_id="code-interpreter", - ), ] default_models = get_model_registry(available_models) return DistributionTemplate( name="nvidia", distro_type="remote_hosted", - description="Use NVIDIA NIM for running LLM inference", + description="Use NVIDIA NIM for running LLM inference and safety", container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, @@ -72,15 +72,34 @@ def get_distribution_template() -> DistributionTemplate: default_models=default_models, default_tool_groups=default_tool_groups, ), + "run-with-safety.yaml": RunConfigSettings( + provider_overrides={ + "inference": [ + inference_provider, + safety_provider, + ] + }, + default_models=[inference_model, safety_model], + default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}", provider_id="nvidia")], + default_tool_groups=default_tool_groups, + ), }, run_config_env_vars={ - "LLAMASTACK_PORT": ( - "5001", - "Port for the Llama Stack distribution server", - ), "NVIDIA_API_KEY": ( "", "NVIDIA API Key", ), + "GUARDRAILS_SERVICE_URL": ( + "http://0.0.0.0:7331", + "URL for the NeMo Guardrails Service", + ), + "INFERENCE_MODEL": ( + "Llama3.1-8B-Instruct", + "Inference model", + ), + "SAFETY_MODEL": ( + "meta/llama-3.1-8b-instruct", + "Name of the model to use for safety", + ), }, ) diff --git a/llama_stack/templates/nvidia/run-with-safety.yaml b/llama_stack/templates/nvidia/run-with-safety.yaml new file mode 100644 index 000000000..04da1bcda --- /dev/null +++ b/llama_stack/templates/nvidia/run-with-safety.yaml @@ -0,0 +1,101 @@ +version: '2' +image_name: nvidia +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: nvidia + provider_type: remote::nvidia + config: + url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com} + api_key: ${env.NVIDIA_API_KEY:} + - provider_id: nvidia + provider_type: remote::nvidia + config: + guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331} + config_id: self-check + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db + safety: + - provider_id: nvidia + provider_type: remote::nvidia + config: + guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331} + config_id: self-check + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: ${env.OTEL_SERVICE_NAME:llama-stack} + sinks: ${env.TELEMETRY_SINKS:console,sqlite} + sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/nvidia/trace_store.db} + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/meta_reference_eval.db + datasetio: + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + tool_runtime: + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: nvidia + model_type: llm +- metadata: {} + model_id: ${env.SAFETY_MODEL} + provider_id: nvidia + model_type: llm +shields: +- shield_id: ${env.SAFETY_MODEL} + provider_id: nvidia +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml index 213e22cb2..3abdd82a7 100644 --- a/llama_stack/templates/nvidia/run.yaml +++ b/llama_stack/templates/nvidia/run.yaml @@ -26,10 +26,11 @@ providers: namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db safety: - - provider_id: llama-guard - provider_type: inline::llama-guard + - provider_id: nvidia + provider_type: remote::nvidia config: - excluded_categories: [] + guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331} + config_id: self-check agents: - provider_id: meta-reference provider_type: inline::meta-reference @@ -54,13 +55,6 @@ providers: namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/meta_reference_eval.db datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: @@ -72,33 +66,10 @@ providers: - provider_id: basic provider_type: inline::basic config: {} - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - config: {} - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:} tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} - max_results: 3 - - provider_id: code-interpreter - provider_type: inline::code-interpreter - config: {} - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db @@ -227,11 +198,7 @@ datasets: [] scoring_fns: [] benchmarks: [] tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search - toolgroup_id: builtin::rag provider_id: rag-runtime -- toolgroup_id: builtin::code_interpreter - provider_id: code-interpreter server: port: 8321 diff --git a/pyproject.toml b/pyproject.toml index 4a5befbd0..a006d69f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -269,6 +269,7 @@ exclude = [ "^llama_stack/providers/remote/inference/together/", "^llama_stack/providers/remote/inference/vllm/", "^llama_stack/providers/remote/safety/bedrock/", + "^llama_stack/providers/remote/safety/nvidia/", "^llama_stack/providers/remote/safety/sample/", "^llama_stack/providers/remote/tool_runtime/bing_search/", "^llama_stack/providers/remote/tool_runtime/brave_search/", diff --git a/llama_stack/scripts/distro_codegen.py b/scripts/distro_codegen.py old mode 100644 new mode 100755 similarity index 98% rename from llama_stack/scripts/distro_codegen.py rename to scripts/distro_codegen.py index 92c82983e..e19ba8cb9 --- a/llama_stack/scripts/distro_codegen.py +++ b/scripts/distro_codegen.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # @@ -20,7 +21,7 @@ from llama_stack.distribution.build import ( get_provider_dependencies, ) -REPO_ROOT = Path(__file__).parent.parent.parent +REPO_ROOT = Path(__file__).parent.parent class ChangedPathTracker: diff --git a/scripts/gen-changelog.py b/scripts/gen-changelog.py old mode 100644 new mode 100755 index 668146901..ac4053339 --- a/scripts/gen-changelog.py +++ b/scripts/gen-changelog.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # diff --git a/llama_stack/scripts/generate_prompt_format.py b/scripts/generate_prompt_format.py old mode 100644 new mode 100755 similarity index 92% rename from llama_stack/scripts/generate_prompt_format.py rename to scripts/generate_prompt_format.py index 338b23f3e..72b057992 --- a/llama_stack/scripts/generate_prompt_format.py +++ b/scripts/generate_prompt_format.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # @@ -18,7 +19,7 @@ import fire from llama_stack.models.llama.sku_list import resolve_model from llama_stack.providers.inline.inference.meta_reference.config import MetaReferenceInferenceConfig -from llama_stack.providers.inline.inference.meta_reference.generation import Llama +from llama_stack.providers.inline.inference.meta_reference.llama3.generation import Llama3 THIS_DIR = Path(__file__).parent.resolve() @@ -41,7 +42,7 @@ def run_main( llama_model = resolve_model(model_id) if not llama_model: raise ValueError(f"Model {model_id} not found") - generator = Llama.build( + generator = Llama3.build( config=config, model_id=model_id, llama_model=llama_model, diff --git a/llama_stack/scripts/run_client_sdk_tests.py b/scripts/run_client_sdk_tests.py old mode 100644 new mode 100755 similarity index 91% rename from llama_stack/scripts/run_client_sdk_tests.py rename to scripts/run_client_sdk_tests.py index e70d187aa..b93316c4f --- a/llama_stack/scripts/run_client_sdk_tests.py +++ b/scripts/run_client_sdk_tests.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # @@ -15,8 +16,7 @@ Script for running api on AsyncLlamaStackAsLibraryClient with templates Assuming directory structure: - llama-stack - - llama_stack - - scripts + - scripts - tests - api @@ -25,10 +25,10 @@ Example command: cd llama-stack EXPORT TOGETHER_API_KEY=<..> EXPORT FIREWORKS_API_KEY=<..> -python llama_stack/scripts/run_client_sdk_tests.py --templates together fireworks --report +./scripts/run_client_sdk_tests.py --templates together fireworks --report """ -REPO_ROOT = Path(__file__).parent.parent.parent +REPO_ROOT = Path(__file__).parent.parent CLIENT_SDK_TESTS_RELATIVE_PATH = "tests/api/"