diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml index 8268a0085..6de72cd60 100644 --- a/.github/workflows/providers-build.yml +++ b/.github/workflows/providers-build.yml @@ -11,6 +11,8 @@ on: - 'llama_stack/distribution/*.sh' - '.github/workflows/providers-build.yml' - 'llama_stack/templates/**' + - 'pyproject.toml' + pull_request: paths: - 'llama_stack/cli/stack/build.py' @@ -19,6 +21,7 @@ on: - 'llama_stack/distribution/*.sh' - '.github/workflows/providers-build.yml' - 'llama_stack/templates/**' + - 'pyproject.toml' concurrency: group: ${{ github.workflow }}-${{ github.ref }} diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index affc426d6..f9e4bb38e 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -7390,6 +7390,147 @@ ], "title": "AgentTurnResponseTurnStartPayload" }, + "OpenAIResponseAnnotationCitation": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "url_citation", + "default": "url_citation" + }, + "end_index": { + "type": "integer" + }, + "start_index": { + "type": "integer" + }, + "title": { + "type": "string" + }, + "url": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "type", + "end_index", + "start_index", + "title", + "url" + ], + "title": "OpenAIResponseAnnotationCitation" + }, + "OpenAIResponseAnnotationContainerFileCitation": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "container_file_citation", + "default": "container_file_citation" + }, + "container_id": { + "type": "string" + }, + "end_index": { + "type": "integer" + }, + "file_id": { + "type": "string" + }, + "filename": { + "type": "string" + }, + "start_index": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "type", + "container_id", + "end_index", + "file_id", + "filename", + "start_index" + ], + "title": "OpenAIResponseAnnotationContainerFileCitation" + }, + "OpenAIResponseAnnotationFileCitation": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "file_citation", + "default": "file_citation" + }, + "file_id": { + "type": "string" + }, + "filename": { + "type": "string" + }, + "index": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "type", + "file_id", + "filename", + "index" + ], + "title": "OpenAIResponseAnnotationFileCitation" + }, + "OpenAIResponseAnnotationFilePath": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "file_path", + "default": "file_path" + }, + "file_id": { + "type": "string" + }, + "index": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "type", + "file_id", + "index" + ], + "title": "OpenAIResponseAnnotationFilePath" + }, + "OpenAIResponseAnnotations": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationFileCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation" + }, + { + "$ref": "#/components/schemas/OpenAIResponseAnnotationFilePath" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "file_citation": "#/components/schemas/OpenAIResponseAnnotationFileCitation", + "url_citation": "#/components/schemas/OpenAIResponseAnnotationCitation", + "container_file_citation": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation", + "file_path": "#/components/schemas/OpenAIResponseAnnotationFilePath" + } + } + }, "OpenAIResponseInput": { "oneOf": [ { @@ -7764,6 +7905,10 @@ "type": "string", "const": "web_search" }, + { + "type": "string", + "const": "web_search_preview" + }, { "type": "string", "const": "web_search_preview_2025_03_11" @@ -7855,12 +8000,19 @@ "type": "string", "const": "output_text", "default": "output_text" + }, + "annotations": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseAnnotations" + } } }, "additionalProperties": false, "required": [ "text", - "type" + "type", + "annotations" ], "title": "OpenAIResponseOutputMessageContentOutputText" }, @@ -11190,6 +11342,115 @@ ], "title": "InsertRequest" }, + "Chunk": { + "type": "object", + "properties": { + "content": { + "$ref": "#/components/schemas/InterleavedContent", + "description": "The content of the chunk, which can be interleaved text, images, or other types." + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Metadata associated with the chunk that will be used in the model context during inference." + }, + "embedding": { + "type": "array", + "items": { + "type": "number" + }, + "description": "Optional embedding for the chunk. If not provided, it will be computed later." + }, + "stored_chunk_id": { + "type": "string", + "description": "The chunk ID that is stored in the vector database. Used for backend functionality." + }, + "chunk_metadata": { + "$ref": "#/components/schemas/ChunkMetadata", + "description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality." + } + }, + "additionalProperties": false, + "required": [ + "content", + "metadata" + ], + "title": "Chunk", + "description": "A chunk of content that can be inserted into a vector database." + }, + "ChunkMetadata": { + "type": "object", + "properties": { + "chunk_id": { + "type": "string", + "description": "The ID of the chunk. If not set, it will be generated based on the document ID and content." + }, + "document_id": { + "type": "string", + "description": "The ID of the document this chunk belongs to." + }, + "source": { + "type": "string", + "description": "The source of the content, such as a URL, file path, or other identifier." + }, + "created_timestamp": { + "type": "integer", + "description": "An optional timestamp indicating when the chunk was created." + }, + "updated_timestamp": { + "type": "integer", + "description": "An optional timestamp indicating when the chunk was last updated." + }, + "chunk_window": { + "type": "string", + "description": "The window of the chunk, which can be used to group related chunks together." + }, + "chunk_tokenizer": { + "type": "string", + "description": "The tokenizer used to create the chunk. Default is Tiktoken." + }, + "chunk_embedding_model": { + "type": "string", + "description": "The embedding model used to create the chunk's embedding." + }, + "chunk_embedding_dimension": { + "type": "integer", + "description": "The dimension of the embedding vector for the chunk." + }, + "content_token_count": { + "type": "integer", + "description": "The number of tokens in the content of the chunk." + }, + "metadata_token_count": { + "type": "integer", + "description": "The number of tokens in the metadata of the chunk." + } + }, + "additionalProperties": false, + "title": "ChunkMetadata", + "description": "`ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata` is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after. Use `Chunk.metadata` for metadata that will be used in the context during inference." + }, "InsertChunksRequest": { "type": "object", "properties": { @@ -11200,53 +11461,7 @@ "chunks": { "type": "array", "items": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/InterleavedContent", - "description": "The content of the chunk, which can be interleaved text, images, or other types." - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Metadata associated with the chunk, such as document ID, source, or other relevant information." - }, - "embedding": { - "type": "array", - "items": { - "type": "number" - }, - "description": "Optional embedding for the chunk. If not provided, it will be computed later." - } - }, - "additionalProperties": false, - "required": [ - "content", - "metadata" - ], - "title": "Chunk", - "description": "A chunk of content that can be inserted into a vector database." + "$ref": "#/components/schemas/Chunk" }, "description": "The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types. `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional. If `metadata` is provided, you configure how Llama Stack formats the chunk during generation. If `embedding` is not provided, it will be computed later." }, @@ -14671,53 +14886,7 @@ "chunks": { "type": "array", "items": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/InterleavedContent", - "description": "The content of the chunk, which can be interleaved text, images, or other types." - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Metadata associated with the chunk, such as document ID, source, or other relevant information." - }, - "embedding": { - "type": "array", - "items": { - "type": "number" - }, - "description": "Optional embedding for the chunk. If not provided, it will be computed later." - } - }, - "additionalProperties": false, - "required": [ - "content", - "metadata" - ], - "title": "Chunk", - "description": "A chunk of content that can be inserted into a vector database." + "$ref": "#/components/schemas/Chunk" } }, "scores": { diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 1e1293dc2..9175c97fc 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -5263,6 +5263,106 @@ components: - event_type - turn_id title: AgentTurnResponseTurnStartPayload + OpenAIResponseAnnotationCitation: + type: object + properties: + type: + type: string + const: url_citation + default: url_citation + end_index: + type: integer + start_index: + type: integer + title: + type: string + url: + type: string + additionalProperties: false + required: + - type + - end_index + - start_index + - title + - url + title: OpenAIResponseAnnotationCitation + "OpenAIResponseAnnotationContainerFileCitation": + type: object + properties: + type: + type: string + const: container_file_citation + default: container_file_citation + container_id: + type: string + end_index: + type: integer + file_id: + type: string + filename: + type: string + start_index: + type: integer + additionalProperties: false + required: + - type + - container_id + - end_index + - file_id + - filename + - start_index + title: >- + OpenAIResponseAnnotationContainerFileCitation + OpenAIResponseAnnotationFileCitation: + type: object + properties: + type: + type: string + const: file_citation + default: file_citation + file_id: + type: string + filename: + type: string + index: + type: integer + additionalProperties: false + required: + - type + - file_id + - filename + - index + title: OpenAIResponseAnnotationFileCitation + OpenAIResponseAnnotationFilePath: + type: object + properties: + type: + type: string + const: file_path + default: file_path + file_id: + type: string + index: + type: integer + additionalProperties: false + required: + - type + - file_id + - index + title: OpenAIResponseAnnotationFilePath + OpenAIResponseAnnotations: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath' + discriminator: + propertyName: type + mapping: + file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation' + url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' + container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' + file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' OpenAIResponseInput: oneOf: - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' @@ -5488,6 +5588,8 @@ components: oneOf: - type: string const: web_search + - type: string + const: web_search_preview - type: string const: web_search_preview_2025_03_11 default: web_search @@ -5547,10 +5649,15 @@ components: type: string const: output_text default: output_text + annotations: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseAnnotations' additionalProperties: false required: - text - type + - annotations title: >- OpenAIResponseOutputMessageContentOutputText "OpenAIResponseOutputMessageFileSearchToolCall": @@ -7867,6 +7974,107 @@ components: - vector_db_id - chunk_size_in_tokens title: InsertRequest + Chunk: + type: object + properties: + content: + $ref: '#/components/schemas/InterleavedContent' + description: >- + The content of the chunk, which can be interleaved text, images, or other + types. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Metadata associated with the chunk that will be used in the model context + during inference. + embedding: + type: array + items: + type: number + description: >- + Optional embedding for the chunk. If not provided, it will be computed + later. + stored_chunk_id: + type: string + description: >- + The chunk ID that is stored in the vector database. Used for backend functionality. + chunk_metadata: + $ref: '#/components/schemas/ChunkMetadata' + description: >- + Metadata for the chunk that will NOT be used in the context during inference. + The `chunk_metadata` is required backend functionality. + additionalProperties: false + required: + - content + - metadata + title: Chunk + description: >- + A chunk of content that can be inserted into a vector database. + ChunkMetadata: + type: object + properties: + chunk_id: + type: string + description: >- + The ID of the chunk. If not set, it will be generated based on the document + ID and content. + document_id: + type: string + description: >- + The ID of the document this chunk belongs to. + source: + type: string + description: >- + The source of the content, such as a URL, file path, or other identifier. + created_timestamp: + type: integer + description: >- + An optional timestamp indicating when the chunk was created. + updated_timestamp: + type: integer + description: >- + An optional timestamp indicating when the chunk was last updated. + chunk_window: + type: string + description: >- + The window of the chunk, which can be used to group related chunks together. + chunk_tokenizer: + type: string + description: >- + The tokenizer used to create the chunk. Default is Tiktoken. + chunk_embedding_model: + type: string + description: >- + The embedding model used to create the chunk's embedding. + chunk_embedding_dimension: + type: integer + description: >- + The dimension of the embedding vector for the chunk. + content_token_count: + type: integer + description: >- + The number of tokens in the content of the chunk. + metadata_token_count: + type: integer + description: >- + The number of tokens in the metadata of the chunk. + additionalProperties: false + title: ChunkMetadata + description: >- + `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional + information about the chunk that will not be used in the context during + inference, but is required for backend functionality. The `ChunkMetadata` is + set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not + expected to change after. Use `Chunk.metadata` for metadata that will + be used in the context during inference. InsertChunksRequest: type: object properties: @@ -7877,40 +8085,7 @@ components: chunks: type: array items: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The content of the chunk, which can be interleaved text, images, - or other types. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Metadata associated with the chunk, such as document ID, source, - or other relevant information. - embedding: - type: array - items: - type: number - description: >- - Optional embedding for the chunk. If not provided, it will be computed - later. - additionalProperties: false - required: - - content - - metadata - title: Chunk - description: >- - A chunk of content that can be inserted into a vector database. + $ref: '#/components/schemas/Chunk' description: >- The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types. `metadata`: `dict[str, Any]` @@ -10231,40 +10406,7 @@ components: chunks: type: array items: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The content of the chunk, which can be interleaved text, images, - or other types. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Metadata associated with the chunk, such as document ID, source, - or other relevant information. - embedding: - type: array - items: - type: number - description: >- - Optional embedding for the chunk. If not provided, it will be computed - later. - additionalProperties: false - required: - - content - - metadata - title: Chunk - description: >- - A chunk of content that can be inserted into a vector database. + $ref: '#/components/schemas/Chunk' scores: type: array items: diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md index dd73d93ea..1b50ee712 100644 --- a/docs/source/distributions/configuration.md +++ b/docs/source/distributions/configuration.md @@ -18,7 +18,7 @@ providers: - provider_id: ollama provider_type: remote::ollama config: - url: ${env.OLLAMA_URL:http://localhost:11434} + url: ${env.OLLAMA_URL:=http://localhost:11434} vector_io: - provider_id: faiss provider_type: inline::faiss @@ -26,7 +26,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -38,7 +38,7 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference @@ -46,7 +46,7 @@ providers: metadata_store: namespace: null type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} @@ -85,7 +85,7 @@ providers: # config is a dictionary that contains the configuration for the provider. # in this case, the configuration is the url of the ollama server config: - url: ${env.OLLAMA_URL:http://localhost:11434} + url: ${env.OLLAMA_URL:=http://localhost:11434} ``` A few things to note: - A _provider instance_ is identified with an (id, type, configuration) triplet. @@ -94,6 +94,95 @@ A few things to note: - The configuration dictionary is provider-specific. - Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server (via docker or via `llama stack run`), you can specify `--env OLLAMA_URL=http://my-server:11434` to override the default value. +### Environment Variable Substitution + +Llama Stack supports environment variable substitution in configuration values using the +`${env.VARIABLE_NAME}` syntax. This allows you to externalize configuration values and provide +different settings for different environments. The syntax is inspired by [bash parameter expansion](https://www.gnu.org/software/bash/manual/html_node/Shell-Parameter-Expansion.html) +and follows similar patterns. + +#### Basic Syntax + +The basic syntax for environment variable substitution is: + +```yaml +config: + api_key: ${env.API_KEY} + url: ${env.SERVICE_URL} +``` + +If the environment variable is not set, the server will raise an error during startup. + +#### Default Values + +You can provide default values using the `:=` operator: + +```yaml +config: + url: ${env.OLLAMA_URL:=http://localhost:11434} + port: ${env.PORT:=8321} + timeout: ${env.TIMEOUT:=60} +``` + +If the environment variable is not set, the default value `http://localhost:11434` will be used. +Empty defaults are not allowed so `url: ${env.OLLAMA_URL:=}` will raise an error if the environment variable is not set. + +#### Conditional Values + +You can use the `:+` operator to provide a value only when the environment variable is set: + +```yaml +config: + # Only include this field if ENVIRONMENT is set + environment: ${env.ENVIRONMENT:+production} +``` + +If the environment variable is set, the value after `:+` will be used. If it's not set, the field +will be omitted with a `None` value. +So `${env.ENVIRONMENT:+}` is supported, it means that the field will be omitted if the environment +variable is not set. It can be used to make a field optional and then enabled at runtime when desired. + +#### Examples + +Here are some common patterns: + +```yaml +# Required environment variable (will error if not set) +api_key: ${env.OPENAI_API_KEY} + +# Optional with default +base_url: ${env.API_BASE_URL:=https://api.openai.com/v1} + +# Conditional field +debug_mode: ${env.DEBUG:+true} + +# Optional field that becomes None if not set +optional_token: ${env.OPTIONAL_TOKEN:+} +``` + +#### Runtime Override + +You can override environment variables at runtime when starting the server: + +```bash +# Override specific environment variables +llama stack run --config run.yaml --env API_KEY=sk-123 --env BASE_URL=https://custom-api.com + +# Or set them in your shell +export API_KEY=sk-123 +export BASE_URL=https://custom-api.com +llama stack run --config run.yaml +``` + +#### Type Safety + +The environment variable substitution system is type-safe: + +- String values remain strings +- Empty defaults (`${env.VAR:+}`) are converted to `None` for fields that accept `str | None` +- Numeric defaults are properly typed (e.g., `${env.PORT:=8321}` becomes an integer) +- Boolean defaults work correctly (e.g., `${env.DEBUG:=false}` becomes a boolean) + ## Resources Finally, let's look at the `models` section: @@ -109,6 +198,18 @@ A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and i What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`. +If you need to conditionally register a model in the configuration, such as only when specific environment variable(s) are set, this can be accomplished by utilizing a special `__disabled__` string as the default value of an environment variable substitution, as shown below: + +```yaml +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL:__disabled__} + provider_id: ollama + provider_model_id: ${env.INFERENCE_MODEL:__disabled__} +``` + +The snippet above will only register this model if the environment variable `INFERENCE_MODEL` is set and non-empty. If the environment variable is not set, the model will not get registered at all. + ## Server Configuration The `server` section configures the HTTP server that serves the Llama Stack APIs: @@ -140,7 +241,7 @@ server: config: jwks: uri: "https://kubernetes.default.svc:8443/openid/v1/jwks" - token: "${env.TOKEN:}" + token: "${env.TOKEN:+}" key_recheck_period: 3600 tls_cafile: "/path/to/ca.crt" issuer: "https://kubernetes.default.svc" @@ -384,12 +485,12 @@ providers: - provider_id: vllm-0 provider_type: remote::vllm config: - url: ${env.VLLM_URL:http://localhost:8000} + url: ${env.VLLM_URL:=http://localhost:8000} # this vLLM server serves the llama-guard model (e.g., llama-guard:3b) - provider_id: vllm-1 provider_type: remote::vllm config: - url: ${env.SAFETY_VLLM_URL:http://localhost:8001} + url: ${env.SAFETY_VLLM_URL:=http://localhost:8001} ... models: - metadata: {} diff --git a/docs/source/distributions/k8s/stack-configmap.yaml b/docs/source/distributions/k8s/stack-configmap.yaml index fa7bacd8f..0a08bca03 100644 --- a/docs/source/distributions/k8s/stack-configmap.yaml +++ b/docs/source/distributions/k8s/stack-configmap.yaml @@ -15,10 +15,10 @@ data: - provider_id: vllm-inference provider_type: remote::vllm config: - url: ${env.VLLM_URL:http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + url: ${env.VLLM_URL:=http://localhost:8000/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: vllm-safety provider_type: remote::vllm config: @@ -30,10 +30,10 @@ data: provider_type: inline::sentence-transformers config: {} vector_io: - - provider_id: ${env.ENABLE_CHROMADB+chromadb} + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:} + url: ${env.CHROMADB_URL:+} safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -45,34 +45,34 @@ data: config: persistence_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} responses_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: ${env.OTEL_SERVICE_NAME:} + service_name: ${env.OTEL_SERVICE_NAME:+} sinks: ${env.TELEMETRY_SINKS:console} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -82,19 +82,19 @@ data: config: {} metadata_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} table_name: llamastack_kvstore inference_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} models: - metadata: embedding_dimension: 384 @@ -106,11 +106,11 @@ data: provider_id: vllm-inference model_type: llm - metadata: {} - model_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B} + model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} provider_id: vllm-safety model_type: llm shields: - - shield_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B} + - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} vector_dbs: [] datasets: [] scoring_fns: [] diff --git a/docs/source/distributions/k8s/stack_run_config.yaml b/docs/source/distributions/k8s/stack_run_config.yaml index 8e2773dd1..5ac08134c 100644 --- a/docs/source/distributions/k8s/stack_run_config.yaml +++ b/docs/source/distributions/k8s/stack_run_config.yaml @@ -12,25 +12,25 @@ providers: - provider_id: vllm-inference provider_type: remote::vllm config: - url: ${env.VLLM_URL:http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + url: ${env.VLLM_URL:=http://localhost:8000/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: vllm-safety provider_type: remote::vllm config: - url: ${env.VLLM_SAFETY_URL:http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} vector_io: - - provider_id: ${env.ENABLE_CHROMADB+chromadb} + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:} + url: ${env.CHROMADB_URL:+} safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -42,34 +42,34 @@ providers: config: persistence_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} responses_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: ${env.OTEL_SERVICE_NAME:} - sinks: ${env.TELEMETRY_SINKS:console} + service_name: ${env.OTEL_SERVICE_NAME:+console} + sinks: ${env.TELEMETRY_SINKS:+console} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -79,19 +79,19 @@ providers: config: {} metadata_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} table_name: llamastack_kvstore inference_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} models: - metadata: embedding_dimension: 384 @@ -103,11 +103,11 @@ models: provider_id: vllm-inference model_type: llm - metadata: {} - model_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B} + model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} provider_id: vllm-safety model_type: llm shields: -- shield_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B} +- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} vector_dbs: [] datasets: [] scoring_fns: [] diff --git a/llama_stack/apis/agents/__init__.py b/llama_stack/apis/agents/__init__.py index ab203b6cd..6416b283b 100644 --- a/llama_stack/apis/agents/__init__.py +++ b/llama_stack/apis/agents/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .agents import * # noqa: F401 F403 +from .agents import * diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py index addb72f14..27b85e2d6 100644 --- a/llama_stack/apis/agents/openai_responses.py +++ b/llama_stack/apis/agents/openai_responses.py @@ -44,10 +44,55 @@ OpenAIResponseInputMessageContent = Annotated[ register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent") +@json_schema_type +class OpenAIResponseAnnotationFileCitation(BaseModel): + type: Literal["file_citation"] = "file_citation" + file_id: str + filename: str + index: int + + +@json_schema_type +class OpenAIResponseAnnotationCitation(BaseModel): + type: Literal["url_citation"] = "url_citation" + end_index: int + start_index: int + title: str + url: str + + +@json_schema_type +class OpenAIResponseAnnotationContainerFileCitation(BaseModel): + type: Literal["container_file_citation"] = "container_file_citation" + container_id: str + end_index: int + file_id: str + filename: str + start_index: int + + +@json_schema_type +class OpenAIResponseAnnotationFilePath(BaseModel): + type: Literal["file_path"] = "file_path" + file_id: str + index: int + + +OpenAIResponseAnnotations = Annotated[ + OpenAIResponseAnnotationFileCitation + | OpenAIResponseAnnotationCitation + | OpenAIResponseAnnotationContainerFileCitation + | OpenAIResponseAnnotationFilePath, + Field(discriminator="type"), +] +register_schema(OpenAIResponseAnnotations, name="OpenAIResponseAnnotations") + + @json_schema_type class OpenAIResponseOutputMessageContentOutputText(BaseModel): text: str type: Literal["output_text"] = "output_text" + annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list) OpenAIResponseOutputMessageContent = Annotated[ @@ -384,9 +429,16 @@ OpenAIResponseInput = Annotated[ register_schema(OpenAIResponseInput, name="OpenAIResponseInput") +# Must match type Literals of OpenAIResponseInputToolWebSearch below +WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11"] + + @json_schema_type class OpenAIResponseInputToolWebSearch(BaseModel): - type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search" + # Must match values of WebSearchToolTypes above + type: Literal["web_search"] | Literal["web_search_preview"] | Literal["web_search_preview_2025_03_11"] = ( + "web_search" + ) # TODO: actually use search_context_size somewhere... search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$") # TODO: add user_location diff --git a/llama_stack/apis/batch_inference/__init__.py b/llama_stack/apis/batch_inference/__init__.py index 3249475ee..b9b2944b2 100644 --- a/llama_stack/apis/batch_inference/__init__.py +++ b/llama_stack/apis/batch_inference/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .batch_inference import * # noqa: F401 F403 +from .batch_inference import * diff --git a/llama_stack/apis/benchmarks/__init__.py b/llama_stack/apis/benchmarks/__init__.py index f8f564957..62d1b367c 100644 --- a/llama_stack/apis/benchmarks/__init__.py +++ b/llama_stack/apis/benchmarks/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .benchmarks import * # noqa: F401 F403 +from .benchmarks import * diff --git a/llama_stack/apis/datasetio/__init__.py b/llama_stack/apis/datasetio/__init__.py index 378afbba8..8c087bfa4 100644 --- a/llama_stack/apis/datasetio/__init__.py +++ b/llama_stack/apis/datasetio/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .datasetio import * # noqa: F401 F403 +from .datasetio import * diff --git a/llama_stack/apis/datasets/__init__.py b/llama_stack/apis/datasets/__init__.py index 102b9927f..9c9a128d2 100644 --- a/llama_stack/apis/datasets/__init__.py +++ b/llama_stack/apis/datasets/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .datasets import * # noqa: F401 F403 +from .datasets import * diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py index e3de3d5cb..8bf7a48d0 100644 --- a/llama_stack/apis/datasets/datasets.py +++ b/llama_stack/apis/datasets/datasets.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum +from enum import Enum, StrEnum from typing import Annotated, Any, Literal, Protocol from pydantic import BaseModel, Field @@ -13,7 +13,7 @@ from llama_stack.apis.resource import Resource, ResourceType from llama_stack.schema_utils import json_schema_type, register_schema, webmethod -class DatasetPurpose(str, Enum): +class DatasetPurpose(StrEnum): """ Purpose of the dataset. Each purpose has a required input data schema. diff --git a/llama_stack/apis/eval/__init__.py b/llama_stack/apis/eval/__init__.py index 5f91ad70d..28a1d6049 100644 --- a/llama_stack/apis/eval/__init__.py +++ b/llama_stack/apis/eval/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .eval import * # noqa: F401 F403 +from .eval import * diff --git a/llama_stack/apis/files/__init__.py b/llama_stack/apis/files/__init__.py index 269baf177..189e4de19 100644 --- a/llama_stack/apis/files/__init__.py +++ b/llama_stack/apis/files/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .files import * # noqa: F401 F403 +from .files import * diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py index 4dfeed448..a72dcd8d4 100644 --- a/llama_stack/apis/files/files.py +++ b/llama_stack/apis/files/files.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum +from enum import StrEnum from typing import Annotated, Literal, Protocol, runtime_checkable from fastapi import File, Form, Response, UploadFile @@ -16,7 +16,7 @@ from llama_stack.schema_utils import json_schema_type, webmethod # OpenAI Files API Models -class OpenAIFilePurpose(str, Enum): +class OpenAIFilePurpose(StrEnum): """ Valid purpose values for OpenAI Files API. """ diff --git a/llama_stack/apis/inference/__init__.py b/llama_stack/apis/inference/__init__.py index f9f77f769..f0c8783c1 100644 --- a/llama_stack/apis/inference/__init__.py +++ b/llama_stack/apis/inference/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .inference import * # noqa: F401 F403 +from .inference import * diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index c64a5f750..222099064 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -20,7 +20,7 @@ from typing_extensions import TypedDict from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent, InterleavedContentItem from llama_stack.apis.common.responses import Order from llama_stack.apis.models import Model -from llama_stack.apis.telemetry.telemetry import MetricResponseMixin +from llama_stack.apis.telemetry import MetricResponseMixin from llama_stack.models.llama.datatypes import ( BuiltinTool, StopReason, diff --git a/llama_stack/apis/inspect/__init__.py b/llama_stack/apis/inspect/__init__.py index 88ba8e908..016937e3d 100644 --- a/llama_stack/apis/inspect/__init__.py +++ b/llama_stack/apis/inspect/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .inspect import * # noqa: F401 F403 +from .inspect import * diff --git a/llama_stack/apis/models/__init__.py b/llama_stack/apis/models/__init__.py index 410d8d1f9..ee90106b6 100644 --- a/llama_stack/apis/models/__init__.py +++ b/llama_stack/apis/models/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .models import * # noqa: F401 F403 +from .models import * diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py index 3d90a92a0..36da97e62 100644 --- a/llama_stack/apis/models/models.py +++ b/llama_stack/apis/models/models.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum +from enum import StrEnum from typing import Any, Literal, Protocol, runtime_checkable from pydantic import BaseModel, ConfigDict, Field @@ -22,7 +22,7 @@ class CommonModelFields(BaseModel): @json_schema_type -class ModelType(str, Enum): +class ModelType(StrEnum): llm = "llm" embedding = "embedding" diff --git a/llama_stack/apis/post_training/__init__.py b/llama_stack/apis/post_training/__init__.py index 7129c4abd..695575a30 100644 --- a/llama_stack/apis/post_training/__init__.py +++ b/llama_stack/apis/post_training/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .post_training import * # noqa: F401 F403 +from .post_training import * diff --git a/llama_stack/apis/providers/__init__.py b/llama_stack/apis/providers/__init__.py index b554a5d23..e35e2fe47 100644 --- a/llama_stack/apis/providers/__init__.py +++ b/llama_stack/apis/providers/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .providers import * # noqa: F401 F403 +from .providers import * diff --git a/llama_stack/apis/safety/__init__.py b/llama_stack/apis/safety/__init__.py index dc3fe90b4..d93bc1355 100644 --- a/llama_stack/apis/safety/__init__.py +++ b/llama_stack/apis/safety/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .safety import * # noqa: F401 F403 +from .safety import * diff --git a/llama_stack/apis/scoring/__init__.py b/llama_stack/apis/scoring/__init__.py index 0739dfc80..624b9e704 100644 --- a/llama_stack/apis/scoring/__init__.py +++ b/llama_stack/apis/scoring/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .scoring import * # noqa: F401 F403 +from .scoring import * diff --git a/llama_stack/apis/scoring_functions/__init__.py b/llama_stack/apis/scoring_functions/__init__.py index b96acb45f..fc1de0311 100644 --- a/llama_stack/apis/scoring_functions/__init__.py +++ b/llama_stack/apis/scoring_functions/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .scoring_functions import * # noqa: F401 F403 +from .scoring_functions import * diff --git a/llama_stack/apis/shields/__init__.py b/llama_stack/apis/shields/__init__.py index edad26100..783a4d124 100644 --- a/llama_stack/apis/shields/__init__.py +++ b/llama_stack/apis/shields/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .shields import * # noqa: F401 F403 +from .shields import * diff --git a/llama_stack/apis/synthetic_data_generation/__init__.py b/llama_stack/apis/synthetic_data_generation/__init__.py index cfdec76ce..bc169e8e6 100644 --- a/llama_stack/apis/synthetic_data_generation/__init__.py +++ b/llama_stack/apis/synthetic_data_generation/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .synthetic_data_generation import * # noqa: F401 F403 +from .synthetic_data_generation import * diff --git a/llama_stack/apis/telemetry/__init__.py b/llama_stack/apis/telemetry/__init__.py index 6a111dc9e..1250767f7 100644 --- a/llama_stack/apis/telemetry/__init__.py +++ b/llama_stack/apis/telemetry/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .telemetry import * # noqa: F401 F403 +from .telemetry import * diff --git a/llama_stack/apis/tools/__init__.py b/llama_stack/apis/tools/__init__.py index be8846ba2..b25310ecf 100644 --- a/llama_stack/apis/tools/__init__.py +++ b/llama_stack/apis/tools/__init__.py @@ -4,5 +4,5 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .rag_tool import * # noqa: F401 F403 -from .tools import * # noqa: F401 F403 +from .rag_tool import * +from .tools import * diff --git a/llama_stack/apis/vector_dbs/__init__.py b/llama_stack/apis/vector_dbs/__init__.py index 158241a6d..af34ba9d4 100644 --- a/llama_stack/apis/vector_dbs/__init__.py +++ b/llama_stack/apis/vector_dbs/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .vector_dbs import * # noqa: F401 F403 +from .vector_dbs import * diff --git a/llama_stack/apis/vector_io/__init__.py b/llama_stack/apis/vector_io/__init__.py index 3fe4fa4b6..3f4c60805 100644 --- a/llama_stack/apis/vector_io/__init__.py +++ b/llama_stack/apis/vector_io/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .vector_io import * # noqa: F401 F403 +from .vector_io import * diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py index d6de0108c..2d4131315 100644 --- a/llama_stack/apis/vector_io/vector_io.py +++ b/llama_stack/apis/vector_io/vector_io.py @@ -8,6 +8,7 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import uuid from typing import Annotated, Any, Literal, Protocol, runtime_checkable from pydantic import BaseModel, Field @@ -15,21 +16,80 @@ from pydantic import BaseModel, Field from llama_stack.apis.inference import InterleavedContent from llama_stack.apis.vector_dbs import VectorDB from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol +from llama_stack.providers.utils.vector_io.chunk_utils import generate_chunk_id from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.strong_typing.schema import register_schema +@json_schema_type +class ChunkMetadata(BaseModel): + """ + `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that + will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata` + is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after. + Use `Chunk.metadata` for metadata that will be used in the context during inference. + :param chunk_id: The ID of the chunk. If not set, it will be generated based on the document ID and content. + :param document_id: The ID of the document this chunk belongs to. + :param source: The source of the content, such as a URL, file path, or other identifier. + :param created_timestamp: An optional timestamp indicating when the chunk was created. + :param updated_timestamp: An optional timestamp indicating when the chunk was last updated. + :param chunk_window: The window of the chunk, which can be used to group related chunks together. + :param chunk_tokenizer: The tokenizer used to create the chunk. Default is Tiktoken. + :param chunk_embedding_model: The embedding model used to create the chunk's embedding. + :param chunk_embedding_dimension: The dimension of the embedding vector for the chunk. + :param content_token_count: The number of tokens in the content of the chunk. + :param metadata_token_count: The number of tokens in the metadata of the chunk. + """ + + chunk_id: str | None = None + document_id: str | None = None + source: str | None = None + created_timestamp: int | None = None + updated_timestamp: int | None = None + chunk_window: str | None = None + chunk_tokenizer: str | None = None + chunk_embedding_model: str | None = None + chunk_embedding_dimension: int | None = None + content_token_count: int | None = None + metadata_token_count: int | None = None + + +@json_schema_type class Chunk(BaseModel): """ A chunk of content that can be inserted into a vector database. :param content: The content of the chunk, which can be interleaved text, images, or other types. :param embedding: Optional embedding for the chunk. If not provided, it will be computed later. - :param metadata: Metadata associated with the chunk, such as document ID, source, or other relevant information. + :param metadata: Metadata associated with the chunk that will be used in the model context during inference. + :param stored_chunk_id: The chunk ID that is stored in the vector database. Used for backend functionality. + :param chunk_metadata: Metadata for the chunk that will NOT be used in the context during inference. + The `chunk_metadata` is required backend functionality. """ content: InterleavedContent metadata: dict[str, Any] = Field(default_factory=dict) embedding: list[float] | None = None + # The alias parameter serializes the field as "chunk_id" in JSON but keeps the internal name as "stored_chunk_id" + stored_chunk_id: str | None = Field(default=None, alias="chunk_id") + chunk_metadata: ChunkMetadata | None = None + + model_config = {"populate_by_name": True} + + def model_post_init(self, __context): + # Extract chunk_id from metadata if present + if self.metadata and "chunk_id" in self.metadata: + self.stored_chunk_id = self.metadata.pop("chunk_id") + + @property + def chunk_id(self) -> str: + """Returns the chunk ID, which is either an input `chunk_id` or a generated one if not set.""" + if self.stored_chunk_id: + return self.stored_chunk_id + + if "document_id" in self.metadata: + return generate_chunk_id(self.metadata["document_id"], str(self.content)) + + return generate_chunk_id(str(uuid.uuid4()), str(self.content)) @json_schema_type diff --git a/llama_stack/distribution/access_control/datatypes.py b/llama_stack/distribution/access_control/datatypes.py index bc5ed6645..c833ed51b 100644 --- a/llama_stack/distribution/access_control/datatypes.py +++ b/llama_stack/distribution/access_control/datatypes.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum +from enum import StrEnum from typing import Self from pydantic import BaseModel, model_validator @@ -12,7 +12,7 @@ from pydantic import BaseModel, model_validator from .conditions import parse_conditions -class Action(str, Enum): +class Action(StrEnum): CREATE = "create" READ = "read" UPDATE = "update" diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py index abc3f0065..e07da001e 100644 --- a/llama_stack/distribution/datatypes.py +++ b/llama_stack/distribution/datatypes.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum +from enum import StrEnum from pathlib import Path from typing import Annotated, Any @@ -29,8 +29,8 @@ from llama_stack.providers.datatypes import Api, ProviderSpec from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig -LLAMA_STACK_BUILD_CONFIG_VERSION = "2" -LLAMA_STACK_RUN_CONFIG_VERSION = "2" +LLAMA_STACK_BUILD_CONFIG_VERSION = 2 +LLAMA_STACK_RUN_CONFIG_VERSION = 2 RoutingKey = str | list[str] @@ -159,7 +159,7 @@ class LoggingConfig(BaseModel): ) -class AuthProviderType(str, Enum): +class AuthProviderType(StrEnum): """Supported authentication provider types.""" OAUTH2_TOKEN = "oauth2_token" @@ -182,7 +182,7 @@ class AuthenticationRequiredError(Exception): pass -class QuotaPeriod(str, Enum): +class QuotaPeriod(StrEnum): DAY = "day" @@ -229,7 +229,7 @@ class ServerConfig(BaseModel): class StackRunConfig(BaseModel): - version: str = LLAMA_STACK_RUN_CONFIG_VERSION + version: int = LLAMA_STACK_RUN_CONFIG_VERSION image_name: str = Field( ..., @@ -300,7 +300,7 @@ a default SQLite store will be used.""", class BuildConfig(BaseModel): - version: str = LLAMA_STACK_BUILD_CONFIG_VERSION + version: int = LLAMA_STACK_BUILD_CONFIG_VERSION distribution_spec: DistributionSpec = Field(description="The distribution spec to build including API providers. ") image_type: str = Field( diff --git a/llama_stack/distribution/routers/inference.py b/llama_stack/distribution/routers/inference.py index 50c429315..b39da7810 100644 --- a/llama_stack/distribution/routers/inference.py +++ b/llama_stack/distribution/routers/inference.py @@ -30,7 +30,13 @@ from llama_stack.apis.inference import ( ListOpenAIChatCompletionResponse, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAICompletion, OpenAICompletionWithInputMessages, + OpenAIEmbeddingsResponse, + OpenAIMessageParam, + OpenAIResponseFormatParam, Order, ResponseFormat, SamplingParams, @@ -41,14 +47,6 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIEmbeddingsResponse, - OpenAIMessageParam, - OpenAIResponseFormatParam, -) from llama_stack.apis.models import Model, ModelType from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry from llama_stack.log import get_logger diff --git a/llama_stack/distribution/routers/vector_io.py b/llama_stack/distribution/routers/vector_io.py index 6af3bd416..4bd5952dc 100644 --- a/llama_stack/distribution/routers/vector_io.py +++ b/llama_stack/distribution/routers/vector_io.py @@ -16,17 +16,15 @@ from llama_stack.apis.vector_io import ( QueryChunksResponse, SearchRankingOptions, VectorIO, - VectorStoreDeleteResponse, - VectorStoreListResponse, - VectorStoreObject, - VectorStoreSearchResponsePage, -) -from llama_stack.apis.vector_io.vector_io import ( VectorStoreChunkingStrategy, + VectorStoreDeleteResponse, VectorStoreFileContentsResponse, VectorStoreFileDeleteResponse, VectorStoreFileObject, VectorStoreFileStatus, + VectorStoreListResponse, + VectorStoreObject, + VectorStoreSearchResponsePage, ) from llama_stack.log import get_logger from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py index 5a9708497..c86880669 100644 --- a/llama_stack/distribution/stack.py +++ b/llama_stack/distribution/stack.py @@ -98,6 +98,15 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]): method = getattr(impls[api], register_method) for obj in objects: + # In complex templates, like our starter template, we may have dynamic model ids + # given by environment variables. This allows those environment variables to have + # a default value of __disabled__ to skip registration of the model if not set. + if ( + hasattr(obj, "provider_model_id") + and obj.provider_model_id is not None + and "__disabled__" in obj.provider_model_id + ): + continue # we want to maintain the type information in arguments to method. # instead of method(**obj.model_dump()), which may convert a typed attr to a dict, # we use model_dump() to find all the attrs and then getattr to get the still typed value. @@ -118,7 +127,12 @@ class EnvVarError(Exception): def __init__(self, var_name: str, path: str = ""): self.var_name = var_name self.path = path - super().__init__(f"Environment variable '{var_name}' not set or empty{f' at {path}' if path else ''}") + super().__init__( + f"Environment variable '{var_name}' not set or empty {f'at {path}' if path else ''}. " + f"Use ${{env.{var_name}:=default_value}} to provide a default value, " + f"${{env.{var_name}:+value_if_set}} to make the field conditional, " + f"or ensure the environment variable is set." + ) def replace_env_vars(config: Any, path: str = "") -> Any: @@ -141,25 +155,27 @@ def replace_env_vars(config: Any, path: str = "") -> Any: return result elif isinstance(config, str): - # Updated pattern to support both default values (:) and conditional values (+) - pattern = r"\${env\.([A-Z0-9_]+)(?:([:\+])([^}]*))?}" + # Pattern supports bash-like syntax: := for default and :+ for conditional and a optional value + pattern = r"\${env\.([A-Z0-9_]+)(?::([=+])([^}]*))?}" - def get_env_var(match): + def get_env_var(match: re.Match): env_var = match.group(1) - operator = match.group(2) # ':' for default, '+' for conditional + operator = match.group(2) # '=' for default, '+' for conditional value_expr = match.group(3) env_value = os.environ.get(env_var) - if operator == ":": # Default value syntax: ${env.FOO:default} + if operator == "=": # Default value syntax: ${env.FOO:=default} if not env_value: - if value_expr is None: + # value_expr returns empty string (not None) when not matched + # This means ${env.FOO:=} is an error + if value_expr == "": raise EnvVarError(env_var, path) else: value = value_expr else: value = env_value - elif operator == "+": # Conditional value syntax: ${env.FOO+value_if_set} + elif operator == "+": # Conditional value syntax: ${env.FOO:+value_if_set} if env_value: value = value_expr else: @@ -174,13 +190,42 @@ def replace_env_vars(config: Any, path: str = "") -> Any: return os.path.expanduser(value) try: - return re.sub(pattern, get_env_var, config) + result = re.sub(pattern, get_env_var, config) + return _convert_string_to_proper_type(result) except EnvVarError as e: raise EnvVarError(e.var_name, e.path) from None return config +def _convert_string_to_proper_type(value: str) -> Any: + # This might be tricky depending on what the config type is, if 'str | None' we are + # good, if 'str' we need to keep the empty string... 'str | None' is more common and + # providers config should be typed this way. + # TODO: we could try to load the config class and see if the config has a field with type 'str | None' + # and then convert the empty string to None or not + if value == "": + return None + + lowered = value.lower() + if lowered == "true": + return True + elif lowered == "false": + return False + + try: + return int(value) + except ValueError: + pass + + try: + return float(value) + except ValueError: + pass + + return value + + def validate_env_pair(env_pair: str) -> tuple[str, str]: """Validate and split an environment variable key-value pair.""" try: diff --git a/llama_stack/distribution/ui/modules/api.py b/llama_stack/distribution/ui/modules/api.py index 11455ed46..9db87b280 100644 --- a/llama_stack/distribution/ui/modules/api.py +++ b/llama_stack/distribution/ui/modules/api.py @@ -25,7 +25,7 @@ class LlamaStackApi: def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: dict | None): """Run scoring on a single row""" if not scoring_params: - scoring_params = {fn_id: None for fn_id in scoring_function_ids} + scoring_params = dict.fromkeys(scoring_function_ids) return self.client.scoring.score(input_rows=[row], scoring_functions=scoring_params) diff --git a/llama_stack/log.py b/llama_stack/log.py index c14967f0a..fcbb79a5d 100644 --- a/llama_stack/log.py +++ b/llama_stack/log.py @@ -33,7 +33,7 @@ CATEGORIES = [ ] # Initialize category levels with default level -_category_levels: dict[str, int] = {category: DEFAULT_LOG_LEVEL for category in CATEGORIES} +_category_levels: dict[str, int] = dict.fromkeys(CATEGORIES, DEFAULT_LOG_LEVEL) def config_to_category_levels(category: str, level: str): diff --git a/llama_stack/models/llama/datatypes.py b/llama_stack/models/llama/datatypes.py index f9f094c3d..7f1ebed55 100644 --- a/llama_stack/models/llama/datatypes.py +++ b/llama_stack/models/llama/datatypes.py @@ -5,7 +5,7 @@ # the root directory of this source tree. import base64 -from enum import Enum +from enum import Enum, StrEnum from io import BytesIO from typing import Annotated, Any, Literal @@ -171,7 +171,7 @@ class GenerationResult(BaseModel): ignore_token: bool -class QuantizationMode(str, Enum): +class QuantizationMode(StrEnum): none = "none" fp8_mixed = "fp8_mixed" int4_mixed = "int4_mixed" diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py index 60b05545b..221ed9027 100644 --- a/llama_stack/providers/datatypes.py +++ b/llama_stack/providers/datatypes.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum +from enum import StrEnum from typing import Any, Protocol from urllib.parse import urlparse @@ -225,7 +225,7 @@ def remote_provider_spec( ) -class HealthStatus(str, Enum): +class HealthStatus(StrEnum): OK = "OK" ERROR = "Error" NOT_IMPLEMENTED = "Not Implemented" diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py index 4465a32fe..f291593f4 100644 --- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py +++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py @@ -42,9 +42,10 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseOutputMessageWebSearchToolCall, OpenAIResponseText, OpenAIResponseTextFormat, + WebSearchToolTypes, ) from llama_stack.apis.common.content_types import TextContentItem -from llama_stack.apis.inference.inference import ( +from llama_stack.apis.inference import ( Inference, OpenAIAssistantMessageParam, OpenAIChatCompletion, @@ -583,7 +584,7 @@ class OpenAIResponsesImpl: from llama_stack.apis.agents.openai_responses import ( MCPListToolsTool, ) - from llama_stack.apis.tools.tools import Tool + from llama_stack.apis.tools import Tool mcp_tool_to_server = {} @@ -609,7 +610,7 @@ class OpenAIResponsesImpl: # TODO: Handle other tool types if input_tool.type == "function": chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump())) - elif input_tool.type == "web_search": + elif input_tool.type in WebSearchToolTypes: tool_name = "web_search" tool = await self.tool_groups_api.get_tool(tool_name) if not tool: diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/llama_stack/providers/inline/eval/meta_reference/eval.py index bc0898dc5..9ae2018c4 100644 --- a/llama_stack/providers/inline/eval/meta_reference/eval.py +++ b/llama_stack/providers/inline/eval/meta_reference/eval.py @@ -208,7 +208,7 @@ class MetaReferenceEvalImpl( for scoring_fn_id in scoring_functions } else: - scoring_functions_dict = {scoring_fn_id: None for scoring_fn_id in scoring_functions} + scoring_functions_dict = dict.fromkeys(scoring_functions) score_response = await self.scoring_api.score( input_rows=score_input_rows, scoring_functions=scoring_functions_dict diff --git a/llama_stack/providers/inline/files/localfs/config.py b/llama_stack/providers/inline/files/localfs/config.py index 757a70742..6c767af8f 100644 --- a/llama_stack/providers/inline/files/localfs/config.py +++ b/llama_stack/providers/inline/files/localfs/config.py @@ -23,7 +23,7 @@ class LocalfsFilesImplConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "storage_dir": "${env.FILES_STORAGE_DIR:" + __distro_dir__ + "/files}", + "storage_dir": "${env.FILES_STORAGE_DIR:=" + __distro_dir__ + "/files}", "metadata_store": SqliteSqlStoreConfig.sample_run_config( __distro_dir__=__distro_dir__, db_name="files_metadata.db", diff --git a/llama_stack/providers/inline/inference/meta_reference/config.py b/llama_stack/providers/inline/inference/meta_reference/config.py index 7bc961443..9556b026a 100644 --- a/llama_stack/providers/inline/inference/meta_reference/config.py +++ b/llama_stack/providers/inline/inference/meta_reference/config.py @@ -49,11 +49,11 @@ class MetaReferenceInferenceConfig(BaseModel): def sample_run_config( cls, model: str = "Llama3.2-3B-Instruct", - checkpoint_dir: str = "${env.CHECKPOINT_DIR:null}", - quantization_type: str = "${env.QUANTIZATION_TYPE:bf16}", - model_parallel_size: str = "${env.MODEL_PARALLEL_SIZE:0}", - max_batch_size: str = "${env.MAX_BATCH_SIZE:1}", - max_seq_len: str = "${env.MAX_SEQ_LEN:4096}", + checkpoint_dir: str = "${env.CHECKPOINT_DIR:=null}", + quantization_type: str = "${env.QUANTIZATION_TYPE:=bf16}", + model_parallel_size: str = "${env.MODEL_PARALLEL_SIZE:=0}", + max_batch_size: str = "${env.MAX_BATCH_SIZE:=1}", + max_seq_len: str = "${env.MAX_SEQ_LEN:=4096}", **kwargs, ) -> dict[str, Any]: return { diff --git a/llama_stack/providers/inline/inference/vllm/config.py b/llama_stack/providers/inline/inference/vllm/config.py index ce8743c74..660ef206b 100644 --- a/llama_stack/providers/inline/inference/vllm/config.py +++ b/llama_stack/providers/inline/inference/vllm/config.py @@ -44,10 +44,10 @@ class VLLMConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]: return { - "tensor_parallel_size": "${env.TENSOR_PARALLEL_SIZE:1}", - "max_tokens": "${env.MAX_TOKENS:4096}", - "max_model_len": "${env.MAX_MODEL_LEN:4096}", - "max_num_seqs": "${env.MAX_NUM_SEQS:4}", - "enforce_eager": "${env.ENFORCE_EAGER:False}", - "gpu_memory_utilization": "${env.GPU_MEMORY_UTILIZATION:0.3}", + "tensor_parallel_size": "${env.TENSOR_PARALLEL_SIZE:=1}", + "max_tokens": "${env.MAX_TOKENS:=4096}", + "max_model_len": "${env.MAX_MODEL_LEN:=4096}", + "max_num_seqs": "${env.MAX_NUM_SEQS:=4}", + "enforce_eager": "${env.ENFORCE_EAGER:=False}", + "gpu_memory_utilization": "${env.GPU_MEMORY_UTILIZATION:=0.3}", } diff --git a/llama_stack/providers/inline/scoring/braintrust/config.py b/llama_stack/providers/inline/scoring/braintrust/config.py index 4a80f1e4f..f44d27f96 100644 --- a/llama_stack/providers/inline/scoring/braintrust/config.py +++ b/llama_stack/providers/inline/scoring/braintrust/config.py @@ -17,5 +17,5 @@ class BraintrustScoringConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { - "openai_api_key": "${env.OPENAI_API_KEY:}", + "openai_api_key": "${env.OPENAI_API_KEY:+}", } diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py index b705cb9b3..2bd113a94 100644 --- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py @@ -7,7 +7,7 @@ from typing import Any from llama_stack.apis.datasetio import DatasetIO from llama_stack.apis.datasets import Datasets -from llama_stack.apis.inference.inference import Inference +from llama_stack.apis.inference import Inference from llama_stack.apis.scoring import ( ScoreBatchResponse, ScoreResponse, diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py index 51cdf6c3f..340215a53 100644 --- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py @@ -6,7 +6,7 @@ import re from typing import Any -from llama_stack.apis.inference.inference import Inference, UserMessage +from llama_stack.apis.inference import Inference, UserMessage from llama_stack.apis.scoring import ScoringResultRow from llama_stack.apis.scoring_functions import ScoringFnParams from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn diff --git a/llama_stack/providers/inline/telemetry/meta_reference/config.py b/llama_stack/providers/inline/telemetry/meta_reference/config.py index 93509040c..50dd8a788 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/config.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/config.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum +from enum import StrEnum from typing import Any from pydantic import BaseModel, Field, field_validator @@ -12,7 +12,7 @@ from pydantic import BaseModel, Field, field_validator from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR -class TelemetrySink(str, Enum): +class TelemetrySink(StrEnum): OTEL_TRACE = "otel_trace" OTEL_METRIC = "otel_metric" SQLITE = "sqlite" @@ -20,12 +20,12 @@ class TelemetrySink(str, Enum): class TelemetryConfig(BaseModel): - otel_trace_endpoint: str = Field( - default="http://localhost:4318/v1/traces", + otel_trace_endpoint: str | None = Field( + default=None, description="The OpenTelemetry collector endpoint URL for traces", ) - otel_metric_endpoint: str = Field( - default="http://localhost:4318/v1/metrics", + otel_metric_endpoint: str | None = Field( + default=None, description="The OpenTelemetry collector endpoint URL for metrics", ) service_name: str = Field( @@ -52,7 +52,7 @@ class TelemetryConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str, db_name: str = "trace_store.db") -> dict[str, Any]: return { - "service_name": "${env.OTEL_SERVICE_NAME:\u200b}", - "sinks": "${env.TELEMETRY_SINKS:console,sqlite}", - "sqlite_db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name, + "service_name": "${env.OTEL_SERVICE_NAME:=\u200b}", + "sinks": "${env.TELEMETRY_SINKS:=console,sqlite}", + "sqlite_db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, } diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py index 0f6cf8619..98f5bf5a1 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py @@ -87,12 +87,16 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry): trace.set_tracer_provider(provider) _TRACER_PROVIDER = provider if TelemetrySink.OTEL_TRACE in self.config.sinks: + if self.config.otel_trace_endpoint is None: + raise ValueError("otel_trace_endpoint is required when OTEL_TRACE is enabled") span_exporter = OTLPSpanExporter( endpoint=self.config.otel_trace_endpoint, ) span_processor = BatchSpanProcessor(span_exporter) trace.get_tracer_provider().add_span_processor(span_processor) if TelemetrySink.OTEL_METRIC in self.config.sinks: + if self.config.otel_metric_endpoint is None: + raise ValueError("otel_metric_endpoint is required when OTEL_METRIC is enabled") metric_reader = PeriodicExportingMetricReader( OTLPMetricExporter( endpoint=self.config.otel_metric_endpoint, diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/llama_stack/providers/inline/tool_runtime/rag/memory.py index 7f4fe5dbd..6a7c7885c 100644 --- a/llama_stack/providers/inline/tool_runtime/rag/memory.py +++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py @@ -81,6 +81,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti chunks = [] for doc in documents: content = await content_from_doc(doc) + # TODO: we should add enrichment here as URLs won't be added to the metadata by default chunks.extend( make_overlapped_chunks( doc.document_id, @@ -157,8 +158,24 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti ) break - metadata_subset = {k: v for k, v in metadata.items() if k not in ["token_count", "metadata_token_count"]} - text_content = query_config.chunk_template.format(index=i + 1, chunk=chunk, metadata=metadata_subset) + # Add useful keys from chunk_metadata to metadata and remove some from metadata + chunk_metadata_keys_to_include_from_context = [ + "chunk_id", + "document_id", + "source", + ] + metadata_keys_to_exclude_from_context = [ + "token_count", + "metadata_token_count", + ] + metadata_for_context = {} + for k in chunk_metadata_keys_to_include_from_context: + metadata_for_context[k] = getattr(chunk.chunk_metadata, k) + for k in metadata: + if k not in metadata_keys_to_exclude_from_context: + metadata_for_context[k] = metadata[k] + + text_content = query_config.chunk_template.format(index=i + 1, chunk=chunk, metadata=metadata_for_context) picked.append(TextContentItem(text=text_content)) picked.append(TextContentItem(text="END of knowledge_search tool results.\n")) diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py index 12f4d6ad0..355750b25 100644 --- a/llama_stack/providers/inline/vector_io/faiss/faiss.py +++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py @@ -16,8 +16,7 @@ import numpy as np from numpy.typing import NDArray from llama_stack.apis.files import Files -from llama_stack.apis.inference import InterleavedContent -from llama_stack.apis.inference.inference import Inference +from llama_stack.apis.inference import Inference, InterleavedContent from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, diff --git a/llama_stack/providers/inline/vector_io/qdrant/config.py b/llama_stack/providers/inline/vector_io/qdrant/config.py index 283724b41..7cc91d918 100644 --- a/llama_stack/providers/inline/vector_io/qdrant/config.py +++ b/llama_stack/providers/inline/vector_io/qdrant/config.py @@ -19,5 +19,5 @@ class QdrantVectorIOConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "path": "${env.QDRANT_PATH:~/.llama/" + __distro_dir__ + "}/" + "qdrant.db", + "path": "${env.QDRANT_PATH:=~/.llama/" + __distro_dir__ + "}/" + "qdrant.db", } diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py index cb806cb39..4c57f4aba 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py @@ -15,5 +15,5 @@ class SQLiteVectorIOConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + "sqlite_vec.db", + "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + "sqlite_vec.db", } diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index d832e56f5..7e977635a 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -5,20 +5,18 @@ # the root directory of this source tree. import asyncio -import hashlib import json import logging import sqlite3 import struct -import uuid from typing import Any import numpy as np import sqlite_vec from numpy.typing import NDArray -from llama_stack.apis.files.files import Files -from llama_stack.apis.inference.inference import Inference +from llama_stack.apis.files import Files +from llama_stack.apis.inference import Inference from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, @@ -66,7 +64,7 @@ def _normalize_scores(scores: dict[str, float]) -> dict[str, float]: score_range = max_score - min_score if score_range > 0: return {doc_id: (score - min_score) / score_range for doc_id, score in scores.items()} - return {doc_id: 1.0 for doc_id in scores} + return dict.fromkeys(scores, 1.0) def _weighted_rerank( @@ -201,10 +199,7 @@ class SQLiteVecIndex(EmbeddingIndex): batch_embeddings = embeddings[i : i + batch_size] # Insert metadata - metadata_data = [ - (generate_chunk_id(chunk.metadata["document_id"], chunk.content), chunk.model_dump_json()) - for chunk in batch_chunks - ] + metadata_data = [(chunk.chunk_id, chunk.model_dump_json()) for chunk in batch_chunks] cur.executemany( f""" INSERT INTO {self.metadata_table} (id, chunk) @@ -218,7 +213,7 @@ class SQLiteVecIndex(EmbeddingIndex): embedding_data = [ ( ( - generate_chunk_id(chunk.metadata["document_id"], chunk.content), + chunk.chunk_id, serialize_vector(emb.tolist()), ) ) @@ -230,10 +225,7 @@ class SQLiteVecIndex(EmbeddingIndex): ) # Insert FTS content - fts_data = [ - (generate_chunk_id(chunk.metadata["document_id"], chunk.content), chunk.content) - for chunk in batch_chunks - ] + fts_data = [(chunk.chunk_id, chunk.content) for chunk in batch_chunks] # DELETE existing entries with same IDs (FTS5 doesn't support ON CONFLICT) cur.executemany( f"DELETE FROM {self.fts_table} WHERE id = ?;", @@ -381,13 +373,12 @@ class SQLiteVecIndex(EmbeddingIndex): vector_response = await self.query_vector(embedding, k, score_threshold) keyword_response = await self.query_keyword(query_string, k, score_threshold) - # Convert responses to score dictionaries using generate_chunk_id + # Convert responses to score dictionaries using chunk_id vector_scores = { - generate_chunk_id(chunk.metadata["document_id"], str(chunk.content)): score - for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False) + chunk.chunk_id: score for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False) } keyword_scores = { - generate_chunk_id(chunk.metadata["document_id"], str(chunk.content)): score + chunk.chunk_id: score for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False) } @@ -408,13 +399,7 @@ class SQLiteVecIndex(EmbeddingIndex): filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold] # Create a map of chunk_id to chunk for both responses - chunk_map = {} - for c in vector_response.chunks: - chunk_id = generate_chunk_id(c.metadata["document_id"], str(c.content)) - chunk_map[chunk_id] = c - for c in keyword_response.chunks: - chunk_id = generate_chunk_id(c.metadata["document_id"], str(c.content)) - chunk_map[chunk_id] = c + chunk_map = {c.chunk_id: c for c in vector_response.chunks + keyword_response.chunks} # Use the map to look up chunks by their IDs chunks = [] @@ -757,9 +742,3 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc if vector_db_id not in self.cache: raise ValueError(f"Vector DB {vector_db_id} not found") return await self.cache[vector_db_id].query_chunks(query, params) - - -def generate_chunk_id(document_id: str, chunk_text: str) -> str: - """Generate a unique chunk ID using a hash of document ID and chunk text.""" - hash_input = f"{document_id}:{chunk_text}".encode() - return str(uuid.UUID(hashlib.md5(hash_input).hexdigest())) diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py index 66f2e8bce..47be57eee 100644 --- a/llama_stack/providers/registry/inference.py +++ b/llama_stack/providers/registry/inference.py @@ -70,7 +70,7 @@ def available_providers() -> list[ProviderSpec]: api=Api.inference, adapter=AdapterSpec( adapter_type="ollama", - pip_packages=["ollama", "aiohttp"], + pip_packages=["ollama", "aiohttp", "h11>=0.16.0"], config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig", module="llama_stack.providers.remote.inference.ollama", ), diff --git a/llama_stack/providers/registry/safety.py b/llama_stack/providers/registry/safety.py index e0a04be48..f0fe1e9f5 100644 --- a/llama_stack/providers/registry/safety.py +++ b/llama_stack/providers/registry/safety.py @@ -67,7 +67,7 @@ def available_providers() -> list[ProviderSpec]: api=Api.safety, adapter=AdapterSpec( adapter_type="sambanova", - pip_packages=["litellm"], + pip_packages=["litellm", "requests"], module="llama_stack.providers.remote.safety.sambanova", config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig", provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator", diff --git a/llama_stack/providers/registry/scoring.py b/llama_stack/providers/registry/scoring.py index 7980d6a13..244b06842 100644 --- a/llama_stack/providers/registry/scoring.py +++ b/llama_stack/providers/registry/scoring.py @@ -13,7 +13,7 @@ def available_providers() -> list[ProviderSpec]: InlineProviderSpec( api=Api.scoring, provider_type="inline::basic", - pip_packages=[], + pip_packages=["requests"], module="llama_stack.providers.inline.scoring.basic", config_class="llama_stack.providers.inline.scoring.basic.BasicScoringConfig", api_dependencies=[ diff --git a/llama_stack/providers/remote/datasetio/nvidia/config.py b/llama_stack/providers/remote/datasetio/nvidia/config.py index e616ce25c..0f5ea22e9 100644 --- a/llama_stack/providers/remote/datasetio/nvidia/config.py +++ b/llama_stack/providers/remote/datasetio/nvidia/config.py @@ -54,8 +54,8 @@ class NvidiaDatasetIOConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { - "api_key": "${env.NVIDIA_API_KEY:}", - "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:default}", - "project_id": "${env.NVIDIA_PROJECT_ID:test-project}", - "datasets_url": "${env.NVIDIA_DATASETS_URL:http://nemo.test}", + "api_key": "${env.NVIDIA_API_KEY:+}", + "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:=default}", + "project_id": "${env.NVIDIA_PROJECT_ID:=test-project}", + "datasets_url": "${env.NVIDIA_DATASETS_URL:=http://nemo.test}", } diff --git a/llama_stack/providers/remote/datasetio/nvidia/datasetio.py b/llama_stack/providers/remote/datasetio/nvidia/datasetio.py index 1f22a935b..f723c92cc 100644 --- a/llama_stack/providers/remote/datasetio/nvidia/datasetio.py +++ b/llama_stack/providers/remote/datasetio/nvidia/datasetio.py @@ -66,7 +66,7 @@ class NvidiaDatasetIOAdapter: Returns: Dataset """ - ## add warnings for unsupported params + # add warnings for unsupported params request_body = { "name": dataset_def.identifier, "namespace": self.config.dataset_namespace, diff --git a/llama_stack/providers/remote/eval/nvidia/config.py b/llama_stack/providers/remote/eval/nvidia/config.py index 5c8f9ff76..7a1c04304 100644 --- a/llama_stack/providers/remote/eval/nvidia/config.py +++ b/llama_stack/providers/remote/eval/nvidia/config.py @@ -25,5 +25,5 @@ class NVIDIAEvalConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { - "evaluator_url": "${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}", + "evaluator_url": "${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}", } diff --git a/llama_stack/providers/remote/inference/anthropic/models.py b/llama_stack/providers/remote/inference/anthropic/models.py index 39cb64440..afaf3c4e4 100644 --- a/llama_stack/providers/remote/inference/anthropic/models.py +++ b/llama_stack/providers/remote/inference/anthropic/models.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.providers.utils.inference.model_registry import ( ProviderModelEntry, ) diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py index 79b1b5f08..1c82ff3a8 100644 --- a/llama_stack/providers/remote/inference/fireworks/fireworks.py +++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py @@ -24,6 +24,12 @@ from llama_stack.apis.inference import ( Inference, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAICompletion, + OpenAIEmbeddingsResponse, + OpenAIMessageParam, + OpenAIResponseFormatParam, ResponseFormat, ResponseFormatType, SamplingParams, @@ -33,14 +39,6 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIEmbeddingsResponse, - OpenAIMessageParam, - OpenAIResponseFormatParam, -) from llama_stack.distribution.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger from llama_stack.providers.utils.inference.model_registry import ( diff --git a/llama_stack/providers/remote/inference/fireworks/models.py b/llama_stack/providers/remote/inference/fireworks/models.py index 027eeab8d..392aed72f 100644 --- a/llama_stack/providers/remote/inference/fireworks/models.py +++ b/llama_stack/providers/remote/inference/fireworks/models.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.models.llama.sku_types import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( ProviderModelEntry, diff --git a/llama_stack/providers/remote/inference/gemini/models.py b/llama_stack/providers/remote/inference/gemini/models.py index ef1cf339f..c4bb4f08b 100644 --- a/llama_stack/providers/remote/inference/gemini/models.py +++ b/llama_stack/providers/remote/inference/gemini/models.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.providers.utils.inference.model_registry import ( ProviderModelEntry, ) diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/llama_stack/providers/remote/inference/groq/groq.py index 27d7d7961..4b295e788 100644 --- a/llama_stack/providers/remote/inference/groq/groq.py +++ b/llama_stack/providers/remote/inference/groq/groq.py @@ -9,7 +9,7 @@ from typing import Any from openai import AsyncOpenAI -from llama_stack.apis.inference.inference import ( +from llama_stack.apis.inference import ( OpenAIChatCompletion, OpenAIChatCompletionChunk, OpenAIChoiceDelta, diff --git a/llama_stack/providers/remote/inference/nvidia/config.py b/llama_stack/providers/remote/inference/nvidia/config.py index 4c449edc2..6369928bb 100644 --- a/llama_stack/providers/remote/inference/nvidia/config.py +++ b/llama_stack/providers/remote/inference/nvidia/config.py @@ -55,7 +55,7 @@ class NVIDIAConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { - "url": "${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}", - "api_key": "${env.NVIDIA_API_KEY:}", - "append_api_version": "${env.NVIDIA_APPEND_API_VERSION:True}", + "url": "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}", + "api_key": "${env.NVIDIA_API_KEY:+}", + "append_api_version": "${env.NVIDIA_APPEND_API_VERSION:=True}", } diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index cb6c6e279..1dd72da3f 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -29,20 +29,18 @@ from llama_stack.apis.inference import ( Inference, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAICompletion, OpenAIEmbeddingsResponse, + OpenAIMessageParam, + OpenAIResponseFormatParam, ResponseFormat, SamplingParams, TextTruncation, ToolChoice, ToolConfig, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIMessageParam, - OpenAIResponseFormatParam, -) from llama_stack.apis.models import Model, ModelType from llama_stack.models.llama.datatypes import ToolDefinition, ToolPromptFormat from llama_stack.providers.utils.inference import ( diff --git a/llama_stack/providers/remote/inference/ollama/__init__.py b/llama_stack/providers/remote/inference/ollama/__init__.py index 073c31cde..491339451 100644 --- a/llama_stack/providers/remote/inference/ollama/__init__.py +++ b/llama_stack/providers/remote/inference/ollama/__init__.py @@ -10,6 +10,6 @@ from .config import OllamaImplConfig async def get_adapter_impl(config: OllamaImplConfig, _deps): from .ollama import OllamaInferenceAdapter - impl = OllamaInferenceAdapter(config.url) + impl = OllamaInferenceAdapter(config) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/inference/ollama/config.py b/llama_stack/providers/remote/inference/ollama/config.py index 0e4aef0e1..b2cc4d8a7 100644 --- a/llama_stack/providers/remote/inference/ollama/config.py +++ b/llama_stack/providers/remote/inference/ollama/config.py @@ -13,7 +13,13 @@ DEFAULT_OLLAMA_URL = "http://localhost:11434" class OllamaImplConfig(BaseModel): url: str = DEFAULT_OLLAMA_URL + raise_on_connect_error: bool = True @classmethod - def sample_run_config(cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", **kwargs) -> dict[str, Any]: - return {"url": url} + def sample_run_config( + cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs + ) -> dict[str, Any]: + return { + "url": url, + "raise_on_connect_error": raise_on_connect_error, + } diff --git a/llama_stack/providers/remote/inference/ollama/models.py b/llama_stack/providers/remote/inference/ollama/models.py index 8f0f0421a..cacf88861 100644 --- a/llama_stack/providers/remote/inference/ollama/models.py +++ b/llama_stack/providers/remote/inference/ollama/models.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.models.llama.sku_types import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( ProviderModelEntry, diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index d51072fbf..e9df0dcc8 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -9,7 +9,6 @@ import uuid from collections.abc import AsyncGenerator, AsyncIterator from typing import Any -import httpx from ollama import AsyncClient # type: ignore[attr-defined] from openai import AsyncOpenAI @@ -33,6 +32,13 @@ from llama_stack.apis.inference import ( JsonSchemaResponseFormat, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAICompletion, + OpenAIEmbeddingsResponse, + OpenAIEmbeddingUsage, + OpenAIMessageParam, + OpenAIResponseFormatParam, ResponseFormat, SamplingParams, TextTruncation, @@ -41,15 +47,6 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIEmbeddingsResponse, - OpenAIEmbeddingUsage, - OpenAIMessageParam, - OpenAIResponseFormatParam, -) from llama_stack.apis.models import Model, ModelType from llama_stack.log import get_logger from llama_stack.providers.datatypes import ( @@ -57,6 +54,7 @@ from llama_stack.providers.datatypes import ( HealthStatus, ModelsProtocolPrivate, ) +from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, ) @@ -90,9 +88,10 @@ class OllamaInferenceAdapter( InferenceProvider, ModelsProtocolPrivate, ): - def __init__(self, url: str) -> None: + def __init__(self, config: OllamaImplConfig) -> None: self.register_helper = ModelRegistryHelper(MODEL_ENTRIES) - self.url = url + self.url = config.url + self.raise_on_connect_error = config.raise_on_connect_error @property def client(self) -> AsyncClient: @@ -103,8 +102,13 @@ class OllamaInferenceAdapter( return AsyncOpenAI(base_url=f"{self.url}/v1", api_key="ollama") async def initialize(self) -> None: - logger.info(f"checking connectivity to Ollama at `{self.url}`...") - await self.health() + logger.debug(f"checking connectivity to Ollama at `{self.url}`...") + health_response = await self.health() + if health_response["status"] == HealthStatus.ERROR: + if self.raise_on_connect_error: + raise RuntimeError("Ollama Server is not running, start it using `ollama serve` in a separate terminal") + else: + logger.warning("Ollama Server is not running, start it using `ollama serve` in a separate terminal") async def health(self) -> HealthResponse: """ @@ -117,10 +121,8 @@ class OllamaInferenceAdapter( try: await self.client.ps() return HealthResponse(status=HealthStatus.OK) - except httpx.ConnectError as e: - raise RuntimeError( - "Ollama Server is not running, start it using `ollama serve` in a separate terminal" - ) from e + except Exception as e: + return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}") async def shutdown(self) -> None: pass diff --git a/llama_stack/providers/remote/inference/openai/models.py b/llama_stack/providers/remote/inference/openai/models.py index e029c456c..14a6955d5 100644 --- a/llama_stack/providers/remote/inference/openai/models.py +++ b/llama_stack/providers/remote/inference/openai/models.py @@ -6,7 +6,7 @@ from dataclasses import dataclass -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.providers.utils.inference.model_registry import ( ProviderModelEntry, ) diff --git a/llama_stack/providers/remote/inference/openai/openai.py b/llama_stack/providers/remote/inference/openai/openai.py index ed4ec22aa..72428422f 100644 --- a/llama_stack/providers/remote/inference/openai/openai.py +++ b/llama_stack/providers/remote/inference/openai/openai.py @@ -10,7 +10,7 @@ from typing import Any from openai import AsyncOpenAI -from llama_stack.apis.inference.inference import ( +from llama_stack.apis.inference import ( OpenAIChatCompletion, OpenAIChatCompletionChunk, OpenAICompletion, diff --git a/llama_stack/providers/remote/inference/passthrough/passthrough.py b/llama_stack/providers/remote/inference/passthrough/passthrough.py index e9660abb9..d5b3a5973 100644 --- a/llama_stack/providers/remote/inference/passthrough/passthrough.py +++ b/llama_stack/providers/remote/inference/passthrough/passthrough.py @@ -19,7 +19,12 @@ from llama_stack.apis.inference import ( Inference, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAICompletion, OpenAIEmbeddingsResponse, + OpenAIMessageParam, + OpenAIResponseFormatParam, ResponseFormat, SamplingParams, TextTruncation, @@ -28,13 +33,6 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIMessageParam, - OpenAIResponseFormatParam, -) from llama_stack.apis.models import Model from llama_stack.distribution.library_client import convert_pydantic_to_json_value, convert_to_pydantic from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper diff --git a/llama_stack/providers/remote/inference/runpod/config.py b/llama_stack/providers/remote/inference/runpod/config.py index e3913dc35..ff32a971c 100644 --- a/llama_stack/providers/remote/inference/runpod/config.py +++ b/llama_stack/providers/remote/inference/runpod/config.py @@ -25,6 +25,6 @@ class RunpodImplConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]: return { - "url": "${env.RUNPOD_URL:}", - "api_token": "${env.RUNPOD_API_TOKEN:}", + "url": "${env.RUNPOD_URL:+}", + "api_token": "${env.RUNPOD_API_TOKEN:+}", } diff --git a/llama_stack/providers/remote/inference/runpod/runpod.py b/llama_stack/providers/remote/inference/runpod/runpod.py index f8c98893e..1863b8a50 100644 --- a/llama_stack/providers/remote/inference/runpod/runpod.py +++ b/llama_stack/providers/remote/inference/runpod/runpod.py @@ -8,7 +8,7 @@ from collections.abc import AsyncGenerator from openai import OpenAI from llama_stack.apis.inference import * # noqa: F403 -from llama_stack.apis.inference.inference import OpenAIEmbeddingsResponse +from llama_stack.apis.inference import OpenAIEmbeddingsResponse # from llama_stack.providers.datatypes import ModelsProtocolPrivate from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper diff --git a/llama_stack/providers/remote/inference/together/config.py b/llama_stack/providers/remote/inference/together/config.py index 5c7f60519..121e2cae7 100644 --- a/llama_stack/providers/remote/inference/together/config.py +++ b/llama_stack/providers/remote/inference/together/config.py @@ -26,5 +26,5 @@ class TogetherImplConfig(BaseModel): def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { "url": "https://api.together.xyz/v1", - "api_key": "${env.TOGETHER_API_KEY:}", + "api_key": "${env.TOGETHER_API_KEY:+}", } diff --git a/llama_stack/providers/remote/inference/together/models.py b/llama_stack/providers/remote/inference/together/models.py index f4b259767..85e1b1848 100644 --- a/llama_stack/providers/remote/inference/together/models.py +++ b/llama_stack/providers/remote/inference/together/models.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.models.llama.sku_types import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( ProviderModelEntry, diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py index 7030a644d..9e6877b7c 100644 --- a/llama_stack/providers/remote/inference/together/together.py +++ b/llama_stack/providers/remote/inference/together/together.py @@ -23,7 +23,12 @@ from llama_stack.apis.inference import ( Inference, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAICompletion, OpenAIEmbeddingsResponse, + OpenAIMessageParam, + OpenAIResponseFormatParam, ResponseFormat, ResponseFormatType, SamplingParams, @@ -33,13 +38,6 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIMessageParam, - OpenAIResponseFormatParam, -) from llama_stack.distribution.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper diff --git a/llama_stack/providers/remote/inference/vllm/config.py b/llama_stack/providers/remote/inference/vllm/config.py index 99abddf51..e11efa7f0 100644 --- a/llama_stack/providers/remote/inference/vllm/config.py +++ b/llama_stack/providers/remote/inference/vllm/config.py @@ -34,9 +34,6 @@ class VLLMInferenceAdapterConfig(BaseModel): @classmethod def validate_tls_verify(cls, v): if isinstance(v, str): - # Check if it's a boolean string - if v.lower() in ("true", "false"): - return v.lower() == "true" # Otherwise, treat it as a cert path cert_path = Path(v).expanduser().resolve() if not cert_path.exists(): @@ -54,7 +51,7 @@ class VLLMInferenceAdapterConfig(BaseModel): ): return { "url": url, - "max_tokens": "${env.VLLM_MAX_TOKENS:4096}", - "api_token": "${env.VLLM_API_TOKEN:fake}", - "tls_verify": "${env.VLLM_TLS_VERIFY:true}", + "max_tokens": "${env.VLLM_MAX_TOKENS:=4096}", + "api_token": "${env.VLLM_API_TOKEN:=fake}", + "tls_verify": "${env.VLLM_TLS_VERIFY:=true}", } diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 3424be6b4..d1455acaa 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -9,7 +9,7 @@ from collections.abc import AsyncGenerator, AsyncIterator from typing import Any import httpx -from openai import AsyncOpenAI +from openai import APIConnectionError, AsyncOpenAI from openai.types.chat.chat_completion_chunk import ( ChatCompletionChunk as OpenAIChatCompletionChunk, ) @@ -38,9 +38,13 @@ from llama_stack.apis.inference import ( JsonSchemaResponseFormat, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAICompletion, OpenAIEmbeddingData, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, + OpenAIMessageParam, + OpenAIResponseFormatParam, ResponseFormat, SamplingParams, TextTruncation, @@ -49,12 +53,6 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAICompletion, - OpenAIMessageParam, - OpenAIResponseFormatParam, -) from llama_stack.apis.models import Model, ModelType from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall from llama_stack.models.llama.sku_list import all_registered_models @@ -461,7 +459,12 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): model = await self.register_helper.register_model(model) except ValueError: pass # Ignore statically unknown model, will check live listing - res = await client.models.list() + try: + res = await client.models.list() + except APIConnectionError as e: + raise ValueError( + f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL." + ) from e available_models = [m.id async for m in res] if model.provider_resource_id not in available_models: raise ValueError( diff --git a/llama_stack/providers/remote/inference/watsonx/config.py b/llama_stack/providers/remote/inference/watsonx/config.py index 5eda9c5c0..9534eceeb 100644 --- a/llama_stack/providers/remote/inference/watsonx/config.py +++ b/llama_stack/providers/remote/inference/watsonx/config.py @@ -40,7 +40,7 @@ class WatsonXConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { - "url": "${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}", - "api_key": "${env.WATSONX_API_KEY:}", - "project_id": "${env.WATSONX_PROJECT_ID:}", + "url": "${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}", + "api_key": "${env.WATSONX_API_KEY:+}", + "project_id": "${env.WATSONX_PROJECT_ID:+}", } diff --git a/llama_stack/providers/remote/inference/watsonx/watsonx.py b/llama_stack/providers/remote/inference/watsonx/watsonx.py index 7cdd06a1f..78161d1cb 100644 --- a/llama_stack/providers/remote/inference/watsonx/watsonx.py +++ b/llama_stack/providers/remote/inference/watsonx/watsonx.py @@ -18,10 +18,16 @@ from llama_stack.apis.inference import ( CompletionRequest, EmbeddingsResponse, EmbeddingTaskType, + GreedySamplingStrategy, Inference, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAICompletion, OpenAIEmbeddingsResponse, + OpenAIMessageParam, + OpenAIResponseFormatParam, ResponseFormat, SamplingParams, TextTruncation, @@ -29,14 +35,6 @@ from llama_stack.apis.inference import ( ToolConfig, ToolDefinition, ToolPromptFormat, -) -from llama_stack.apis.inference.inference import ( - GreedySamplingStrategy, - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIMessageParam, - OpenAIResponseFormatParam, TopKSamplingStrategy, TopPSamplingStrategy, ) diff --git a/llama_stack/providers/remote/post_training/nvidia/config.py b/llama_stack/providers/remote/post_training/nvidia/config.py index fa08b6e3f..ea6dff0b5 100644 --- a/llama_stack/providers/remote/post_training/nvidia/config.py +++ b/llama_stack/providers/remote/post_training/nvidia/config.py @@ -55,10 +55,10 @@ class NvidiaPostTrainingConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { - "api_key": "${env.NVIDIA_API_KEY:}", - "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:default}", - "project_id": "${env.NVIDIA_PROJECT_ID:test-project}", - "customizer_url": "${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test}", + "api_key": "${env.NVIDIA_API_KEY:+}", + "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:=default}", + "project_id": "${env.NVIDIA_PROJECT_ID:=test-project}", + "customizer_url": "${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}", } diff --git a/llama_stack/providers/remote/safety/nvidia/config.py b/llama_stack/providers/remote/safety/nvidia/config.py index ddf5a3a0b..1c618f4f4 100644 --- a/llama_stack/providers/remote/safety/nvidia/config.py +++ b/llama_stack/providers/remote/safety/nvidia/config.py @@ -35,6 +35,6 @@ class NVIDIASafetyConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> dict[str, Any]: return { - "guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}", - "config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}", + "guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}", + "config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}", } diff --git a/llama_stack/providers/remote/tool_runtime/brave_search/config.py b/llama_stack/providers/remote/tool_runtime/brave_search/config.py index 37ba21304..93b97a1b2 100644 --- a/llama_stack/providers/remote/tool_runtime/brave_search/config.py +++ b/llama_stack/providers/remote/tool_runtime/brave_search/config.py @@ -22,6 +22,6 @@ class BraveSearchToolConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "api_key": "${env.BRAVE_SEARCH_API_KEY:}", + "api_key": "${env.BRAVE_SEARCH_API_KEY:+}", "max_results": 3, } diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py index c9b18d30d..5bdd27807 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py @@ -22,6 +22,6 @@ class TavilySearchToolConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "api_key": "${env.TAVILY_SEARCH_API_KEY:}", + "api_key": "${env.TAVILY_SEARCH_API_KEY:+}", "max_results": 3, } diff --git a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py b/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py index aefc86bd6..b5b10e371 100644 --- a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +++ b/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py @@ -17,5 +17,5 @@ class WolframAlphaToolConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: return { - "api_key": "${env.WOLFRAM_ALPHA_API_KEY:}", + "api_key": "${env.WOLFRAM_ALPHA_API_KEY:+}", } diff --git a/llama_stack/providers/remote/vector_io/pgvector/config.py b/llama_stack/providers/remote/vector_io/pgvector/config.py index 04b92a2e4..041e864ca 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/config.py +++ b/llama_stack/providers/remote/vector_io/pgvector/config.py @@ -22,8 +22,8 @@ class PGVectorVectorIOConfig(BaseModel): @classmethod def sample_run_config( cls, - host: str = "${env.PGVECTOR_HOST:localhost}", - port: int = "${env.PGVECTOR_PORT:5432}", + host: str = "${env.PGVECTOR_HOST:=localhost}", + port: int = "${env.PGVECTOR_PORT:=5432}", db: str = "${env.PGVECTOR_DB}", user: str = "${env.PGVECTOR_USER}", password: str = "${env.PGVECTOR_PASSWORD}", diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py index e9d6eec22..09ea08fa0 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py +++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py @@ -70,8 +70,8 @@ class QdrantIndex(EmbeddingIndex): ) points = [] - for i, (chunk, embedding) in enumerate(zip(chunks, embeddings, strict=False)): - chunk_id = f"{chunk.metadata['document_id']}:chunk-{i}" + for _i, (chunk, embedding) in enumerate(zip(chunks, embeddings, strict=False)): + chunk_id = chunk.chunk_id points.append( PointStruct( id=convert_id(chunk_id), diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py index c21f379c9..d19908368 100644 --- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py +++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py @@ -23,6 +23,13 @@ from llama_stack.apis.inference import ( JsonSchemaResponseFormat, LogProbConfig, Message, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAICompletion, + OpenAIEmbeddingsResponse, + OpenAIEmbeddingUsage, + OpenAIMessageParam, + OpenAIResponseFormatParam, ResponseFormat, SamplingParams, TextTruncation, @@ -31,16 +38,7 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIEmbeddingsResponse, - OpenAIEmbeddingUsage, - OpenAIMessageParam, - OpenAIResponseFormatParam, -) -from llama_stack.apis.models.models import Model +from llama_stack.apis.models import Model from llama_stack.distribution.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py index d707e36c2..de67e5288 100644 --- a/llama_stack/providers/utils/inference/model_registry.py +++ b/llama_stack/providers/utils/inference/model_registry.py @@ -8,7 +8,7 @@ from typing import Any from pydantic import BaseModel, Field -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate from llama_stack.providers.utils.inference import ( diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index ff95b12a7..47144ee0e 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -95,27 +95,25 @@ from llama_stack.apis.inference import ( CompletionResponse, CompletionResponseStreamChunk, GreedySamplingStrategy, - Message, - SamplingParams, - SystemMessage, - TokenLogProbs, - ToolChoice, - ToolResponseMessage, - TopKSamplingStrategy, - TopPSamplingStrategy, - UserMessage, -) -from llama_stack.apis.inference.inference import ( JsonSchemaResponseFormat, + Message, OpenAIChatCompletion, OpenAICompletion, OpenAICompletionChoice, OpenAIEmbeddingData, OpenAIMessageParam, OpenAIResponseFormatParam, + SamplingParams, + SystemMessage, + TokenLogProbs, + ToolChoice, ToolConfig, + ToolResponseMessage, + TopKSamplingStrategy, + TopPSamplingStrategy, + UserMessage, ) -from llama_stack.apis.inference.inference import ( +from llama_stack.apis.inference import ( OpenAIChoice as OpenAIChatCompletionChoice, ) from llama_stack.models.llama.datatypes import ( @@ -1026,7 +1024,9 @@ def openai_messages_to_messages( return converted_messages -def openai_content_to_content(content: str | Iterable[OpenAIChatCompletionContentPartParam]): +def openai_content_to_content(content: str | Iterable[OpenAIChatCompletionContentPartParam] | None): + if content is None: + return "" if isinstance(content, str): return content elif isinstance(content, list): diff --git a/llama_stack/providers/utils/kvstore/config.py b/llama_stack/providers/utils/kvstore/config.py index e966e13ba..a45ff4ce8 100644 --- a/llama_stack/providers/utils/kvstore/config.py +++ b/llama_stack/providers/utils/kvstore/config.py @@ -45,8 +45,8 @@ class RedisKVStoreConfig(CommonConfig): return { "type": "redis", "namespace": None, - "host": "${env.REDIS_HOST:localhost}", - "port": "${env.REDIS_PORT:6379}", + "host": "${env.REDIS_HOST:=localhost}", + "port": "${env.REDIS_PORT:=6379}", } @@ -66,7 +66,7 @@ class SqliteKVStoreConfig(CommonConfig): return { "type": "sqlite", "namespace": None, - "db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name, + "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, } @@ -84,12 +84,12 @@ class PostgresKVStoreConfig(CommonConfig): return { "type": "postgres", "namespace": None, - "host": "${env.POSTGRES_HOST:localhost}", - "port": "${env.POSTGRES_PORT:5432}", - "db": "${env.POSTGRES_DB:llamastack}", - "user": "${env.POSTGRES_USER:llamastack}", - "password": "${env.POSTGRES_PASSWORD:llamastack}", - "table_name": "${env.POSTGRES_TABLE_NAME:" + table_name + "}", + "host": "${env.POSTGRES_HOST:=localhost}", + "port": "${env.POSTGRES_PORT:=5432}", + "db": "${env.POSTGRES_DB:=llamastack}", + "user": "${env.POSTGRES_USER:=llamastack}", + "password": "${env.POSTGRES_PASSWORD:=llamastack}", + "table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}", } @classmethod @@ -131,12 +131,12 @@ class MongoDBKVStoreConfig(CommonConfig): return { "type": "mongodb", "namespace": None, - "host": "${env.MONGODB_HOST:localhost}", - "port": "${env.MONGODB_PORT:5432}", + "host": "${env.MONGODB_HOST:=localhost}", + "port": "${env.MONGODB_PORT:=5432}", "db": "${env.MONGODB_DB}", "user": "${env.MONGODB_USER}", "password": "${env.MONGODB_PASSWORD}", - "collection_name": "${env.MONGODB_COLLECTION_NAME:" + collection_name + "}", + "collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}", } diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 8b962db76..d00624aed 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -12,8 +12,7 @@ import uuid from abc import ABC, abstractmethod from typing import Any -from llama_stack.apis.files import Files -from llama_stack.apis.files.files import OpenAIFileObject +from llama_stack.apis.files import Files, OpenAIFileObject from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py index a6e420feb..ab204a75a 100644 --- a/llama_stack/providers/utils/memory/vector_store.py +++ b/llama_stack/providers/utils/memory/vector_store.py @@ -7,6 +7,7 @@ import base64 import io import logging import re +import time from abc import ABC, abstractmethod from dataclasses import dataclass from typing import Any @@ -23,12 +24,13 @@ from llama_stack.apis.common.content_types import ( ) from llama_stack.apis.tools import RAGDocument from llama_stack.apis.vector_dbs import VectorDB -from llama_stack.apis.vector_io import Chunk, QueryChunksResponse +from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse from llama_stack.models.llama.llama3.tokenizer import Tokenizer from llama_stack.providers.datatypes import Api from llama_stack.providers.utils.inference.prompt_adapter import ( interleaved_content_as_str, ) +from llama_stack.providers.utils.vector_io.chunk_utils import generate_chunk_id log = logging.getLogger(__name__) @@ -148,6 +150,7 @@ async def content_from_doc(doc: RAGDocument) -> str: def make_overlapped_chunks( document_id: str, text: str, window_len: int, overlap_len: int, metadata: dict[str, Any] ) -> list[Chunk]: + default_tokenizer = "DEFAULT_TIKTOKEN_TOKENIZER" tokenizer = Tokenizer.get_instance() tokens = tokenizer.encode(text, bos=False, eos=False) try: @@ -161,16 +164,32 @@ def make_overlapped_chunks( for i in range(0, len(tokens), window_len - overlap_len): toks = tokens[i : i + window_len] chunk = tokenizer.decode(toks) + chunk_id = generate_chunk_id(chunk, text) chunk_metadata = metadata.copy() + chunk_metadata["chunk_id"] = chunk_id chunk_metadata["document_id"] = document_id chunk_metadata["token_count"] = len(toks) chunk_metadata["metadata_token_count"] = len(metadata_tokens) + backend_chunk_metadata = ChunkMetadata( + chunk_id=chunk_id, + document_id=document_id, + source=metadata.get("source", None), + created_timestamp=metadata.get("created_timestamp", int(time.time())), + updated_timestamp=int(time.time()), + chunk_window=f"{i}-{i + len(toks)}", + chunk_tokenizer=default_tokenizer, + chunk_embedding_model=None, # This will be set in `VectorDBWithIndex.insert_chunks` + content_token_count=len(toks), + metadata_token_count=len(metadata_tokens), + ) + # chunk is a string chunks.append( Chunk( content=chunk, metadata=chunk_metadata, + chunk_metadata=backend_chunk_metadata, ) ) @@ -237,6 +256,9 @@ class VectorDBWithIndex: for i, c in enumerate(chunks): if c.embedding is None: chunks_to_embed.append(c) + if c.chunk_metadata: + c.chunk_metadata.chunk_embedding_model = self.vector_db.embedding_model + c.chunk_metadata.chunk_embedding_dimension = self.vector_db.embedding_dimension else: _validate_embedding(c.embedding, i, self.vector_db.embedding_dimension) diff --git a/llama_stack/providers/utils/sqlstore/sqlstore.py b/llama_stack/providers/utils/sqlstore/sqlstore.py index edc7672a3..d558a2a26 100644 --- a/llama_stack/providers/utils/sqlstore/sqlstore.py +++ b/llama_stack/providers/utils/sqlstore/sqlstore.py @@ -50,7 +50,7 @@ class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig): def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"): return cls( type="sqlite", - db_path="${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name, + db_path="${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, ) @property @@ -78,11 +78,11 @@ class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig): def sample_run_config(cls, **kwargs): return cls( type="postgres", - host="${env.POSTGRES_HOST:localhost}", - port="${env.POSTGRES_PORT:5432}", - db="${env.POSTGRES_DB:llamastack}", - user="${env.POSTGRES_USER:llamastack}", - password="${env.POSTGRES_PASSWORD:llamastack}", + host="${env.POSTGRES_HOST:=localhost}", + port="${env.POSTGRES_PORT:=5432}", + db="${env.POSTGRES_DB:=llamastack}", + user="${env.POSTGRES_USER:=llamastack}", + password="${env.POSTGRES_PASSWORD:=llamastack}", ) diff --git a/llama_stack/providers/utils/telemetry/tracing.py b/llama_stack/providers/utils/telemetry/tracing.py index 10081f037..4ae68ee1d 100644 --- a/llama_stack/providers/utils/telemetry/tracing.py +++ b/llama_stack/providers/utils/telemetry/tracing.py @@ -180,7 +180,7 @@ async def start_trace(name: str, attributes: dict[str, Any] = None) -> TraceCont trace_id = generate_trace_id() context = TraceContext(BACKGROUND_LOGGER, trace_id) - attributes = {marker: True for marker in ROOT_SPAN_MARKERS} | (attributes or {}) + attributes = dict.fromkeys(ROOT_SPAN_MARKERS, True) | (attributes or {}) context.push_span(name, attributes) CURRENT_TRACE_CONTEXT.set(context) diff --git a/llama_stack/providers/utils/vector_io/__init__.py b/llama_stack/providers/utils/vector_io/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/providers/utils/vector_io/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_stack/providers/utils/vector_io/chunk_utils.py b/llama_stack/providers/utils/vector_io/chunk_utils.py new file mode 100644 index 000000000..68cf11cad --- /dev/null +++ b/llama_stack/providers/utils/vector_io/chunk_utils.py @@ -0,0 +1,14 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import hashlib +import uuid + + +def generate_chunk_id(document_id: str, chunk_text: str) -> str: + """Generate a unique chunk ID using a hash of document ID and chunk text.""" + hash_input = f"{document_id}:{chunk_text}".encode() + return str(uuid.UUID(hashlib.md5(hash_input).hexdigest())) diff --git a/llama_stack/templates/bedrock/build.yaml b/llama_stack/templates/bedrock/build.yaml index 97a06f77a..1a2c883fa 100644 --- a/llama_stack/templates/bedrock/build.yaml +++ b/llama_stack/templates/bedrock/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use AWS Bedrock for running LLM inference and safety providers: diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml index 8033b2086..61bc83f02 100644 --- a/llama_stack/templates/bedrock/run.yaml +++ b/llama_stack/templates/bedrock/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: bedrock apis: - agents @@ -22,7 +22,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/faiss_store.db safety: - provider_id: bedrock provider_type: remote::bedrock @@ -34,17 +34,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -52,7 +52,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -60,14 +60,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -78,17 +78,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -98,10 +98,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/inference_store.db models: - metadata: {} model_id: meta.llama3-1-8b-instruct-v1:0 diff --git a/llama_stack/templates/cerebras/build.yaml b/llama_stack/templates/cerebras/build.yaml index f26f4ed9b..ecd0ac418 100644 --- a/llama_stack/templates/cerebras/build.yaml +++ b/llama_stack/templates/cerebras/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use Cerebras for running LLM inference providers: diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py index d891502d8..f341a88c1 100644 --- a/llama_stack/templates/cerebras/cerebras.py +++ b/llama_stack/templates/cerebras/cerebras.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml index 490648302..9bd8fcc7c 100644 --- a/llama_stack/templates/cerebras/run.yaml +++ b/llama_stack/templates/cerebras/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: cerebras apis: - agents @@ -32,7 +32,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/faiss_store.db agents: - provider_id: meta-reference provider_type: inline::meta-reference @@ -40,10 +40,10 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/responses_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -51,7 +51,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -59,14 +59,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -77,34 +77,34 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/trace_store.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/inference_store.db models: - metadata: {} model_id: llama3.1-8b diff --git a/llama_stack/templates/ci-tests/build.yaml b/llama_stack/templates/ci-tests/build.yaml index 9f4fbbdda..c061d0793 100644 --- a/llama_stack/templates/ci-tests/build.yaml +++ b/llama_stack/templates/ci-tests/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Distribution for running e2e tests in CI providers: diff --git a/llama_stack/templates/ci-tests/ci_tests.py b/llama_stack/templates/ci-tests/ci_tests.py index afa8a23ce..7de8069ae 100644 --- a/llama_stack/templates/ci-tests/ci_tests.py +++ b/llama_stack/templates/ci-tests/ci_tests.py @@ -5,7 +5,7 @@ # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml index 92497b0bf..4b7de1c0c 100644 --- a/llama_stack/templates/ci-tests/run.yaml +++ b/llama_stack/templates/ci-tests/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: ci-tests apis: - agents @@ -24,7 +24,7 @@ providers: - provider_id: sqlite-vec provider_type: inline::sqlite-vec config: - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/sqlite_vec.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -37,17 +37,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -55,7 +55,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -63,14 +63,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -81,17 +81,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -101,10 +101,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/inference_store.db models: - metadata: {} model_id: accounts/fireworks/models/llama-v3p1-8b-instruct diff --git a/llama_stack/templates/dell/build.yaml b/llama_stack/templates/dell/build.yaml index 513df16c1..ff8d58a08 100644 --- a/llama_stack/templates/dell/build.yaml +++ b/llama_stack/templates/dell/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Dell's distribution of Llama Stack. TGI inference via Dell's custom container diff --git a/llama_stack/templates/dell/dell.py b/llama_stack/templates/dell/dell.py index a7ec5f3b8..5a6f52a89 100644 --- a/llama_stack/templates/dell/dell.py +++ b/llama_stack/templates/dell/dell.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/dell/run-with-safety.yaml b/llama_stack/templates/dell/run-with-safety.yaml index 22cf1fd24..7f1d0a8c0 100644 --- a/llama_stack/templates/dell/run-with-safety.yaml +++ b/llama_stack/templates/dell/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: dell apis: - agents @@ -40,17 +40,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -58,7 +58,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -66,14 +66,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -84,27 +84,27 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/dell/run.yaml b/llama_stack/templates/dell/run.yaml index aeca2fc26..310f3cc20 100644 --- a/llama_stack/templates/dell/run.yaml +++ b/llama_stack/templates/dell/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: dell apis: - agents @@ -36,17 +36,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -54,7 +54,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -62,14 +62,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -80,27 +80,27 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/fireworks/build.yaml b/llama_stack/templates/fireworks/build.yaml index 53b47da41..eb08c1d43 100644 --- a/llama_stack/templates/fireworks/build.yaml +++ b/llama_stack/templates/fireworks/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use Fireworks.AI for running LLM inference providers: diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py index 5e8935361..ad29c648f 100644 --- a/llama_stack/templates/fireworks/fireworks.py +++ b/llama_stack/templates/fireworks/fireworks.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml index 302328486..6265f5cae 100644 --- a/llama_stack/templates/fireworks/run-with-safety.yaml +++ b/llama_stack/templates/fireworks/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: fireworks apis: - agents @@ -28,7 +28,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -46,17 +46,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -64,7 +64,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -72,14 +72,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -90,30 +90,30 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} files: - provider_id: meta-reference-files provider_type: inline::localfs config: - storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/fireworks/files} + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/fireworks/files} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/files_metadata.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/files_metadata.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} @@ -122,10 +122,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/inference_store.db models: - metadata: {} model_id: accounts/fireworks/models/llama-v3p1-8b-instruct diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml index a31ed732b..e10404e92 100644 --- a/llama_stack/templates/fireworks/run.yaml +++ b/llama_stack/templates/fireworks/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: fireworks apis: - agents @@ -28,7 +28,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -41,17 +41,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -59,7 +59,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -67,14 +67,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -85,30 +85,30 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} files: - provider_id: meta-reference-files provider_type: inline::localfs config: - storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/fireworks/files} + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/fireworks/files} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/files_metadata.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/files_metadata.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} @@ -117,10 +117,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/inference_store.db models: - metadata: {} model_id: accounts/fireworks/models/llama-v3p1-8b-instruct diff --git a/llama_stack/templates/groq/build.yaml b/llama_stack/templates/groq/build.yaml index 819df22f0..7e50a899f 100644 --- a/llama_stack/templates/groq/build.yaml +++ b/llama_stack/templates/groq/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use Groq for running LLM inference providers: diff --git a/llama_stack/templates/groq/groq.py b/llama_stack/templates/groq/groq.py index 4e52aa42d..9e166a288 100644 --- a/llama_stack/templates/groq/groq.py +++ b/llama_stack/templates/groq/groq.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml index 7f1912a6f..21c8f7e0f 100644 --- a/llama_stack/templates/groq/run.yaml +++ b/llama_stack/templates/groq/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: groq apis: - agents @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -40,17 +40,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -58,7 +58,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -66,14 +66,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -84,27 +84,27 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/inference_store.db models: - metadata: {} model_id: groq/llama3-8b-8192 diff --git a/llama_stack/templates/hf-endpoint/build.yaml b/llama_stack/templates/hf-endpoint/build.yaml index 8ede83694..9fca9ac22 100644 --- a/llama_stack/templates/hf-endpoint/build.yaml +++ b/llama_stack/templates/hf-endpoint/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use (an external) Hugging Face Inference Endpoint for running LLM inference providers: diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py index 69e037299..23887469f 100644 --- a/llama_stack/templates/hf-endpoint/hf_endpoint.py +++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml index 8b00f4ba5..2ae1d7685 100644 --- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml +++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: hf-endpoint apis: - agents @@ -32,7 +32,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -45,17 +45,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -63,7 +63,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -71,14 +71,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -89,17 +89,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -109,10 +109,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml index 8a9cd5c49..3ec5ae9c1 100644 --- a/llama_stack/templates/hf-endpoint/run.yaml +++ b/llama_stack/templates/hf-endpoint/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: hf-endpoint apis: - agents @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -40,17 +40,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -58,7 +58,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -66,14 +66,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -84,17 +84,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -104,10 +104,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml index d0752db9a..214245116 100644 --- a/llama_stack/templates/hf-serverless/build.yaml +++ b/llama_stack/templates/hf-serverless/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use (an external) Hugging Face Inference Endpoint for running LLM inference providers: diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py index ecfe2a167..c58c0921d 100644 --- a/llama_stack/templates/hf-serverless/hf_serverless.py +++ b/llama_stack/templates/hf-serverless/hf_serverless.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml index fec64c1df..3871b77e7 100644 --- a/llama_stack/templates/hf-serverless/run-with-safety.yaml +++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: hf-serverless apis: - agents @@ -32,7 +32,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -45,17 +45,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -63,7 +63,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -71,14 +71,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -89,17 +89,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -109,10 +109,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml index d4a6286d7..0a5b59400 100644 --- a/llama_stack/templates/hf-serverless/run.yaml +++ b/llama_stack/templates/hf-serverless/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: hf-serverless apis: - agents @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -40,17 +40,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -58,7 +58,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -66,14 +66,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -84,17 +84,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -104,10 +104,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/llama_api/build.yaml b/llama_stack/templates/llama_api/build.yaml index 857e5f014..44a42594a 100644 --- a/llama_stack/templates/llama_api/build.yaml +++ b/llama_stack/templates/llama_api/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Distribution for running e2e tests in CI providers: diff --git a/llama_stack/templates/llama_api/llama_api.py b/llama_stack/templates/llama_api/llama_api.py index b4641b9da..7631781af 100644 --- a/llama_stack/templates/llama_api/llama_api.py +++ b/llama_stack/templates/llama_api/llama_api.py @@ -5,7 +5,7 @@ # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, @@ -41,7 +41,7 @@ def get_inference_providers() -> tuple[list[Provider], list[ModelInput]]: ( "llama-openai-compat", LLLAMA_MODEL_ENTRIES, - LlamaCompatConfig.sample_run_config(api_key="${env.LLAMA_API_KEY:}"), + LlamaCompatConfig.sample_run_config(api_key="${env.LLAMA_API_KEY:+}"), ), ] inference_providers = [] @@ -85,17 +85,17 @@ def get_distribution_template() -> DistributionTemplate: config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), ), Provider( - provider_id="${env.ENABLE_CHROMADB+chromadb}", + provider_id="${env.ENABLE_CHROMADB:+chromadb}", provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"), + config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"), ), Provider( - provider_id="${env.ENABLE_PGVECTOR+pgvector}", + provider_id="${env.ENABLE_PGVECTOR:+pgvector}", provider_type="remote::pgvector", config=PGVectorVectorIOConfig.sample_run_config( - db="${env.PGVECTOR_DB:}", - user="${env.PGVECTOR_USER:}", - password="${env.PGVECTOR_PASSWORD:}", + db="${env.PGVECTOR_DB:+}", + user="${env.PGVECTOR_USER:+}", + password="${env.PGVECTOR_PASSWORD:+}", ), ), ] diff --git a/llama_stack/templates/llama_api/run.yaml b/llama_stack/templates/llama_api/run.yaml index 2185eb4fc..b627ed2f1 100644 --- a/llama_stack/templates/llama_api/run.yaml +++ b/llama_stack/templates/llama_api/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: llama_api apis: - agents @@ -16,7 +16,7 @@ providers: provider_type: remote::llama-openai-compat config: openai_compat_api_base: https://api.llama.com/compat/v1/ - api_key: ${env.LLAMA_API_KEY:} + api_key: ${env.LLAMA_API_KEY:+} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -24,19 +24,19 @@ providers: - provider_id: sqlite-vec provider_type: inline::sqlite-vec config: - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/sqlite_vec.db - - provider_id: ${env.ENABLE_CHROMADB+chromadb} + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/sqlite_vec.db + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:} - - provider_id: ${env.ENABLE_PGVECTOR+pgvector} + url: ${env.CHROMADB_URL:+} + - provider_id: ${env.ENABLE_PGVECTOR:+pgvector} provider_type: remote::pgvector config: - host: ${env.PGVECTOR_HOST:localhost} - port: ${env.PGVECTOR_PORT:5432} - db: ${env.PGVECTOR_DB:} - user: ${env.PGVECTOR_USER:} - password: ${env.PGVECTOR_PASSWORD:} + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:+} + user: ${env.PGVECTOR_USER:+} + password: ${env.PGVECTOR_PASSWORD:+} safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -49,17 +49,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -67,7 +67,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -75,14 +75,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -93,17 +93,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -113,10 +113,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/inference_store.db models: - metadata: {} model_id: Llama-3.3-70B-Instruct diff --git a/llama_stack/templates/meta-reference-gpu/build.yaml b/llama_stack/templates/meta-reference-gpu/build.yaml index 53ad411e3..2119eeddd 100644 --- a/llama_stack/templates/meta-reference-gpu/build.yaml +++ b/llama_stack/templates/meta-reference-gpu/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use Meta Reference for running LLM inference providers: diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py index 95d126095..57fb8f2af 100644 --- a/llama_stack/templates/meta-reference-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml index e65445a9e..6b15a1e01 100644 --- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml +++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: meta-reference-gpu apis: - agents @@ -18,10 +18,10 @@ providers: model: ${env.INFERENCE_MODEL} checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null} quantization: - type: ${env.QUANTIZATION_TYPE:bf16} - model_parallel_size: ${env.MODEL_PARALLEL_SIZE:0} - max_batch_size: ${env.MAX_BATCH_SIZE:1} - max_seq_len: ${env.MAX_SEQ_LEN:4096} + type: ${env.QUANTIZATION_TYPE:=bf16} + model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0} + max_batch_size: ${env.MAX_BATCH_SIZE:=1} + max_seq_len: ${env.MAX_SEQ_LEN:=4096} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -31,10 +31,10 @@ providers: model: ${env.SAFETY_MODEL} checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:null} quantization: - type: ${env.QUANTIZATION_TYPE:bf16} - model_parallel_size: ${env.MODEL_PARALLEL_SIZE:0} - max_batch_size: ${env.MAX_BATCH_SIZE:1} - max_seq_len: ${env.MAX_SEQ_LEN:4096} + type: ${env.QUANTIZATION_TYPE:=bf16} + model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0} + max_batch_size: ${env.MAX_BATCH_SIZE:=1} + max_seq_len: ${env.MAX_SEQ_LEN:=4096} vector_io: - provider_id: faiss provider_type: inline::faiss @@ -42,7 +42,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -55,17 +55,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -73,7 +73,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -81,14 +81,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -99,17 +99,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -119,10 +119,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml index 8ef02f14d..1b44a0b3e 100644 --- a/llama_stack/templates/meta-reference-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-gpu/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: meta-reference-gpu apis: - agents @@ -18,10 +18,10 @@ providers: model: ${env.INFERENCE_MODEL} checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null} quantization: - type: ${env.QUANTIZATION_TYPE:bf16} - model_parallel_size: ${env.MODEL_PARALLEL_SIZE:0} - max_batch_size: ${env.MAX_BATCH_SIZE:1} - max_seq_len: ${env.MAX_SEQ_LEN:4096} + type: ${env.QUANTIZATION_TYPE:=bf16} + model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0} + max_batch_size: ${env.MAX_BATCH_SIZE:=1} + max_seq_len: ${env.MAX_SEQ_LEN:=4096} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -32,7 +32,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -45,17 +45,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -63,7 +63,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -71,14 +71,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -89,17 +89,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -109,10 +109,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/nvidia/build.yaml b/llama_stack/templates/nvidia/build.yaml index 6bd8a0100..51685b2e3 100644 --- a/llama_stack/templates/nvidia/build.yaml +++ b/llama_stack/templates/nvidia/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use NVIDIA NIM for running LLM inference, evaluation and safety providers: diff --git a/llama_stack/templates/nvidia/run-with-safety.yaml b/llama_stack/templates/nvidia/run-with-safety.yaml index eebfa1066..875fccc9d 100644 --- a/llama_stack/templates/nvidia/run-with-safety.yaml +++ b/llama_stack/templates/nvidia/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: nvidia apis: - agents @@ -16,14 +16,14 @@ providers: - provider_id: nvidia provider_type: remote::nvidia config: - url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com} - api_key: ${env.NVIDIA_API_KEY:} - append_api_version: ${env.NVIDIA_APPEND_API_VERSION:True} + url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} + api_key: ${env.NVIDIA_API_KEY:+} + append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} - provider_id: nvidia provider_type: remote::nvidia config: - guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331} - config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check} + guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331} + config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check} vector_io: - provider_id: faiss provider_type: inline::faiss @@ -31,13 +31,13 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db safety: - provider_id: nvidia provider_type: remote::nvidia config: - guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331} - config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check} + guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331} + config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check} agents: - provider_id: meta-reference provider_type: inline::meta-reference @@ -45,30 +45,30 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/trace_store.db eval: - provider_id: nvidia provider_type: remote::nvidia config: - evaluator_url: ${env.NVIDIA_EVALUATOR_URL:http://localhost:7331} + evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331} post_training: - provider_id: nvidia provider_type: remote::nvidia config: - api_key: ${env.NVIDIA_API_KEY:} - dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default} - project_id: ${env.NVIDIA_PROJECT_ID:test-project} - customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test} + api_key: ${env.NVIDIA_API_KEY:+} + dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} + project_id: ${env.NVIDIA_PROJECT_ID:=test-project} + customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test} datasetio: - provider_id: localfs provider_type: inline::localfs @@ -76,14 +76,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/localfs_datasetio.db - provider_id: nvidia provider_type: remote::nvidia config: - api_key: ${env.NVIDIA_API_KEY:} - dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default} - project_id: ${env.NVIDIA_PROJECT_ID:test-project} - datasets_url: ${env.NVIDIA_DATASETS_URL:http://nemo.test} + api_key: ${env.NVIDIA_API_KEY:+} + dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} + project_id: ${env.NVIDIA_PROJECT_ID:=test-project} + datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test} scoring: - provider_id: basic provider_type: inline::basic @@ -94,10 +94,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml index be0e3f6d1..4477d5244 100644 --- a/llama_stack/templates/nvidia/run.yaml +++ b/llama_stack/templates/nvidia/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: nvidia apis: - agents @@ -16,9 +16,9 @@ providers: - provider_id: nvidia provider_type: remote::nvidia config: - url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com} - api_key: ${env.NVIDIA_API_KEY:} - append_api_version: ${env.NVIDIA_APPEND_API_VERSION:True} + url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} + api_key: ${env.NVIDIA_API_KEY:+} + append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} vector_io: - provider_id: faiss provider_type: inline::faiss @@ -26,13 +26,13 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db safety: - provider_id: nvidia provider_type: remote::nvidia config: - guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331} - config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check} + guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331} + config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check} agents: - provider_id: meta-reference provider_type: inline::meta-reference @@ -40,38 +40,38 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/trace_store.db eval: - provider_id: nvidia provider_type: remote::nvidia config: - evaluator_url: ${env.NVIDIA_EVALUATOR_URL:http://localhost:7331} + evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331} post_training: - provider_id: nvidia provider_type: remote::nvidia config: - api_key: ${env.NVIDIA_API_KEY:} - dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default} - project_id: ${env.NVIDIA_PROJECT_ID:test-project} - customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test} + api_key: ${env.NVIDIA_API_KEY:+} + dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} + project_id: ${env.NVIDIA_PROJECT_ID:=test-project} + customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test} datasetio: - provider_id: nvidia provider_type: remote::nvidia config: - api_key: ${env.NVIDIA_API_KEY:} - dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default} - project_id: ${env.NVIDIA_PROJECT_ID:test-project} - datasets_url: ${env.NVIDIA_DATASETS_URL:http://nemo.test} + api_key: ${env.NVIDIA_API_KEY:+} + dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} + project_id: ${env.NVIDIA_PROJECT_ID:=test-project} + datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test} scoring: - provider_id: basic provider_type: inline::basic @@ -82,10 +82,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db models: - metadata: {} model_id: meta/llama3-8b-instruct diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml index ebe0849f3..cbf4281a2 100644 --- a/llama_stack/templates/ollama/build.yaml +++ b/llama_stack/templates/ollama/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use (an external) Ollama server for running LLM inference providers: diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py index 46c4852a4..cba25296b 100644 --- a/llama_stack/templates/ollama/ollama.py +++ b/llama_stack/templates/ollama/ollama.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml index 85d5c813b..5e906a12c 100644 --- a/llama_stack/templates/ollama/run-with-safety.yaml +++ b/llama_stack/templates/ollama/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: ollama apis: - agents @@ -17,7 +17,8 @@ providers: - provider_id: ollama provider_type: remote::ollama config: - url: ${env.OLLAMA_URL:http://localhost:11434} + url: ${env.OLLAMA_URL:=http://localhost:11434} + raise_on_connect_error: true vector_io: - provider_id: faiss provider_type: inline::faiss @@ -25,7 +26,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -40,17 +41,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -58,7 +59,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -66,14 +67,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -84,15 +85,15 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} files: - provider_id: meta-reference-files provider_type: inline::localfs config: - storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/ollama/files} + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ollama/files} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/files_metadata.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/files_metadata.db post_training: - provider_id: huggingface provider_type: inline::huggingface @@ -104,12 +105,12 @@ providers: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -120,13 +121,13 @@ providers: - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index 2d10a99a4..d2b4e3978 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: ollama apis: - agents @@ -17,7 +17,8 @@ providers: - provider_id: ollama provider_type: remote::ollama config: - url: ${env.OLLAMA_URL:http://localhost:11434} + url: ${env.OLLAMA_URL:=http://localhost:11434} + raise_on_connect_error: true vector_io: - provider_id: faiss provider_type: inline::faiss @@ -25,7 +26,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -38,17 +39,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -56,7 +57,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -64,14 +65,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -82,15 +83,15 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} files: - provider_id: meta-reference-files provider_type: inline::localfs config: - storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/ollama/files} + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ollama/files} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/files_metadata.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/files_metadata.db post_training: - provider_id: huggingface provider_type: inline::huggingface @@ -102,12 +103,12 @@ providers: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -118,13 +119,13 @@ providers: - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/open-benchmark/build.yaml b/llama_stack/templates/open-benchmark/build.yaml index 840f1e1db..5f82c5243 100644 --- a/llama_stack/templates/open-benchmark/build.yaml +++ b/llama_stack/templates/open-benchmark/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Distribution for running open benchmarks providers: diff --git a/llama_stack/templates/open-benchmark/open_benchmark.py b/llama_stack/templates/open-benchmark/open_benchmark.py index d944d4eff..b4cfbdb52 100644 --- a/llama_stack/templates/open-benchmark/open_benchmark.py +++ b/llama_stack/templates/open-benchmark/open_benchmark.py @@ -6,7 +6,7 @@ from llama_stack.apis.datasets import DatasetPurpose, URIDataSource -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( BenchmarkInput, DatasetInput, @@ -120,17 +120,17 @@ def get_distribution_template() -> DistributionTemplate: config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), ), Provider( - provider_id="${env.ENABLE_CHROMADB+chromadb}", + provider_id="${env.ENABLE_CHROMADB:+chromadb}", provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"), + config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"), ), Provider( - provider_id="${env.ENABLE_PGVECTOR+pgvector}", + provider_id="${env.ENABLE_PGVECTOR:+pgvector}", provider_type="remote::pgvector", config=PGVectorVectorIOConfig.sample_run_config( - db="${env.PGVECTOR_DB:}", - user="${env.PGVECTOR_USER:}", - password="${env.PGVECTOR_PASSWORD:}", + db="${env.PGVECTOR_DB:+}", + user="${env.PGVECTOR_USER:+}", + password="${env.PGVECTOR_PASSWORD:+}", ), ), ] diff --git a/llama_stack/templates/open-benchmark/run.yaml b/llama_stack/templates/open-benchmark/run.yaml index 051ca6f8e..403b0fd3d 100644 --- a/llama_stack/templates/open-benchmark/run.yaml +++ b/llama_stack/templates/open-benchmark/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: open-benchmark apis: - agents @@ -33,24 +33,24 @@ providers: provider_type: remote::together config: url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY:} + api_key: ${env.TOGETHER_API_KEY:+} vector_io: - provider_id: sqlite-vec provider_type: inline::sqlite-vec config: - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/sqlite_vec.db - - provider_id: ${env.ENABLE_CHROMADB+chromadb} + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec.db + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:} - - provider_id: ${env.ENABLE_PGVECTOR+pgvector} + url: ${env.CHROMADB_URL:+} + - provider_id: ${env.ENABLE_PGVECTOR:+pgvector} provider_type: remote::pgvector config: - host: ${env.PGVECTOR_HOST:localhost} - port: ${env.PGVECTOR_PORT:5432} - db: ${env.PGVECTOR_DB:} - user: ${env.PGVECTOR_USER:} - password: ${env.PGVECTOR_PASSWORD:} + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:+} + user: ${env.PGVECTOR_USER:+} + password: ${env.PGVECTOR_PASSWORD:+} safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -63,17 +63,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -81,7 +81,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -89,14 +89,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -107,17 +107,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -127,10 +127,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/inference_store.db models: - metadata: {} model_id: openai/gpt-4o diff --git a/llama_stack/templates/passthrough/build.yaml b/llama_stack/templates/passthrough/build.yaml index 46b99cb75..e2e041dbc 100644 --- a/llama_stack/templates/passthrough/build.yaml +++ b/llama_stack/templates/passthrough/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use Passthrough hosted llama-stack endpoint for LLM inference providers: diff --git a/llama_stack/templates/passthrough/passthrough.py b/llama_stack/templates/passthrough/passthrough.py index 6a30625c5..1b94a9aae 100644 --- a/llama_stack/templates/passthrough/passthrough.py +++ b/llama_stack/templates/passthrough/passthrough.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/passthrough/run-with-safety.yaml b/llama_stack/templates/passthrough/run-with-safety.yaml index 3168eeb9f..c5b047511 100644 --- a/llama_stack/templates/passthrough/run-with-safety.yaml +++ b/llama_stack/templates/passthrough/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: passthrough apis: - agents @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -45,17 +45,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -63,7 +63,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -71,14 +71,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -89,22 +89,22 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} @@ -113,10 +113,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/inference_store.db models: - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct diff --git a/llama_stack/templates/passthrough/run.yaml b/llama_stack/templates/passthrough/run.yaml index 48abf8577..896b3c91e 100644 --- a/llama_stack/templates/passthrough/run.yaml +++ b/llama_stack/templates/passthrough/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: passthrough apis: - agents @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -40,17 +40,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -58,7 +58,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -66,14 +66,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -84,22 +84,22 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} @@ -108,10 +108,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/inference_store.db models: - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct diff --git a/llama_stack/templates/postgres-demo/build.yaml b/llama_stack/templates/postgres-demo/build.yaml index 6416cd00f..645b59613 100644 --- a/llama_stack/templates/postgres-demo/build.yaml +++ b/llama_stack/templates/postgres-demo/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Quick start template for running Llama Stack with several popular providers providers: diff --git a/llama_stack/templates/postgres-demo/postgres_demo.py b/llama_stack/templates/postgres-demo/postgres_demo.py index 759281567..5d42b8901 100644 --- a/llama_stack/templates/postgres-demo/postgres_demo.py +++ b/llama_stack/templates/postgres-demo/postgres_demo.py @@ -5,7 +5,7 @@ # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, @@ -50,9 +50,9 @@ def get_distribution_template() -> DistributionTemplate: vector_io_providers = [ Provider( - provider_id="${env.ENABLE_CHROMADB+chromadb}", + provider_id="${env.ENABLE_CHROMADB:+chromadb}", provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"), + config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"), ), ] default_tool_groups = [ @@ -114,9 +114,9 @@ def get_distribution_template() -> DistributionTemplate: provider_id="meta-reference", provider_type="inline::meta-reference", config=dict( - service_name="${env.OTEL_SERVICE_NAME:}", - sinks="${env.TELEMETRY_SINKS:console,otel_trace}", - otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:http://localhost:4318/v1/traces}", + service_name="${env.OTEL_SERVICE_NAME:+}", + sinks="${env.TELEMETRY_SINKS:=console,otel_trace}", + otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}", ), ) ], diff --git a/llama_stack/templates/postgres-demo/run.yaml b/llama_stack/templates/postgres-demo/run.yaml index 0e0d020b2..03b7a59fb 100644 --- a/llama_stack/templates/postgres-demo/run.yaml +++ b/llama_stack/templates/postgres-demo/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: postgres-demo apis: - agents @@ -13,17 +13,17 @@ providers: provider_type: remote::vllm config: url: ${env.VLLM_URL:http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} vector_io: - - provider_id: ${env.ENABLE_CHROMADB+chromadb} + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:} + url: ${env.CHROMADB_URL:+} safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -35,35 +35,35 @@ providers: config: persistence_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} responses_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: ${env.OTEL_SERVICE_NAME:} - sinks: ${env.TELEMETRY_SINKS:console,otel_trace} - otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:http://localhost:4318/v1/traces} + service_name: ${env.OTEL_SERVICE_NAME:+} + sinks: ${env.TELEMETRY_SINKS:=console,otel_trace} + otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -73,19 +73,19 @@ providers: config: {} metadata_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} - table_name: ${env.POSTGRES_TABLE_NAME:llamastack_kvstore} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} inference_store: type: postgres - host: ${env.POSTGRES_HOST:localhost} - port: ${env.POSTGRES_PORT:5432} - db: ${env.POSTGRES_DB:llamastack} - user: ${env.POSTGRES_USER:llamastack} - password: ${env.POSTGRES_PASSWORD:llamastack} + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/remote-vllm/build.yaml b/llama_stack/templates/remote-vllm/build.yaml index 16fe5d4fd..0298b01c7 100644 --- a/llama_stack/templates/remote-vllm/build.yaml +++ b/llama_stack/templates/remote-vllm/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use (an external) vLLM server for running LLM inference providers: diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml index 64f71087a..b297f1489 100644 --- a/llama_stack/templates/remote-vllm/run-with-safety.yaml +++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: remote-vllm apis: - agents @@ -16,16 +16,16 @@ providers: provider_type: remote::vllm config: url: ${env.VLLM_URL:http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: vllm-safety provider_type: remote::vllm config: url: ${env.SAFETY_VLLM_URL} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -36,7 +36,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -49,10 +49,10 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/responses_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -60,7 +60,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -68,14 +68,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -86,24 +86,24 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/trace_store.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -114,13 +114,13 @@ providers: - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml index 353b9902d..6bd332cc9 100644 --- a/llama_stack/templates/remote-vllm/run.yaml +++ b/llama_stack/templates/remote-vllm/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: remote-vllm apis: - agents @@ -16,9 +16,9 @@ providers: provider_type: remote::vllm config: url: ${env.VLLM_URL:http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -29,7 +29,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -42,10 +42,10 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/responses_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -53,7 +53,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -61,14 +61,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -79,24 +79,24 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/trace_store.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -107,13 +107,13 @@ providers: - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py index 2782a3ea0..94606e9d0 100644 --- a/llama_stack/templates/remote-vllm/vllm.py +++ b/llama_stack/templates/remote-vllm/vllm.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/sambanova/build.yaml b/llama_stack/templates/sambanova/build.yaml index 14b1c8974..ba70f88c6 100644 --- a/llama_stack/templates/sambanova/build.yaml +++ b/llama_stack/templates/sambanova/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use SambaNova for running LLM inference and safety providers: diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml index 58d0d36e3..b96621b58 100644 --- a/llama_stack/templates/sambanova/run.yaml +++ b/llama_stack/templates/sambanova/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: sambanova apis: - agents @@ -24,19 +24,19 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/faiss_store.db - - provider_id: ${env.ENABLE_CHROMADB+chromadb} + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/faiss_store.db + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:} - - provider_id: ${env.ENABLE_PGVECTOR+pgvector} + url: ${env.CHROMADB_URL:+} + - provider_id: ${env.ENABLE_PGVECTOR:+pgvector} provider_type: remote::pgvector config: - host: ${env.PGVECTOR_HOST:localhost} - port: ${env.PGVECTOR_PORT:5432} - db: ${env.PGVECTOR_DB:} - user: ${env.PGVECTOR_USER:} - password: ${env.PGVECTOR_PASSWORD:} + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:+} + user: ${env.PGVECTOR_USER:+} + password: ${env.PGVECTOR_PASSWORD:+} safety: - provider_id: sambanova provider_type: remote::sambanova @@ -50,27 +50,27 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/trace_store.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -81,13 +81,13 @@ providers: - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/inference_store.db models: - metadata: {} model_id: sambanova/Meta-Llama-3.1-8B-Instruct diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py index 54a49423d..428577697 100644 --- a/llama_stack/templates/sambanova/sambanova.py +++ b/llama_stack/templates/sambanova/sambanova.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, @@ -73,17 +73,17 @@ def get_distribution_template() -> DistributionTemplate: ), ), Provider( - provider_id="${env.ENABLE_CHROMADB+chromadb}", + provider_id="${env.ENABLE_CHROMADB:+chromadb}", provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"), + config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"), ), Provider( - provider_id="${env.ENABLE_PGVECTOR+pgvector}", + provider_id="${env.ENABLE_PGVECTOR:+pgvector}", provider_type="remote::pgvector", config=PGVectorVectorIOConfig.sample_run_config( - db="${env.PGVECTOR_DB:}", - user="${env.PGVECTOR_USER:}", - password="${env.PGVECTOR_PASSWORD:}", + db="${env.PGVECTOR_DB:+}", + user="${env.PGVECTOR_USER:+}", + password="${env.PGVECTOR_PASSWORD:+}", ), ), ] diff --git a/llama_stack/templates/starter/build.yaml b/llama_stack/templates/starter/build.yaml index 9bf4913a7..3b48dcf7a 100644 --- a/llama_stack/templates/starter/build.yaml +++ b/llama_stack/templates/starter/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Quick start template for running Llama Stack with several popular providers providers: diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml index 960e96d01..f7c53170b 100644 --- a/llama_stack/templates/starter/run.yaml +++ b/llama_stack/templates/starter/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: starter apis: - agents @@ -16,74 +16,82 @@ providers: - provider_id: openai provider_type: remote::openai config: - api_key: ${env.OPENAI_API_KEY:} + api_key: ${env.OPENAI_API_KEY:+} - provider_id: fireworks provider_type: remote::fireworks config: url: https://api.fireworks.ai/inference/v1 - api_key: ${env.FIREWORKS_API_KEY:} + api_key: ${env.FIREWORKS_API_KEY:+} - provider_id: together provider_type: remote::together config: url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY:} + api_key: ${env.TOGETHER_API_KEY:+} - provider_id: ollama provider_type: remote::ollama config: - url: ${env.OLLAMA_URL:http://localhost:11434} + url: ${env.OLLAMA_URL:=http://localhost:11434} + raise_on_connect_error: false - provider_id: anthropic provider_type: remote::anthropic config: - api_key: ${env.ANTHROPIC_API_KEY:} + api_key: ${env.ANTHROPIC_API_KEY:+} - provider_id: gemini provider_type: remote::gemini config: - api_key: ${env.GEMINI_API_KEY:} + api_key: ${env.GEMINI_API_KEY:+} - provider_id: groq provider_type: remote::groq config: url: https://api.groq.com - api_key: ${env.GROQ_API_KEY:} + api_key: ${env.GROQ_API_KEY:+} - provider_id: sambanova provider_type: remote::sambanova config: url: https://api.sambanova.ai/v1 - api_key: ${env.SAMBANOVA_API_KEY:} + api_key: ${env.SAMBANOVA_API_KEY:+} - provider_id: vllm provider_type: remote::vllm config: - url: ${env.VLLM_URL:http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:4096} - api_token: ${env.VLLM_API_TOKEN:fake} - tls_verify: ${env.VLLM_TLS_VERIFY:true} + url: ${env.VLLM_URL:=http://localhost:8000/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} vector_io: - - provider_id: sqlite-vec + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db + - provider_id: ${env.ENABLE_SQLITE_VEC+sqlite-vec} provider_type: inline::sqlite-vec config: - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/sqlite_vec.db - - provider_id: ${env.ENABLE_CHROMADB+chromadb} + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:} - - provider_id: ${env.ENABLE_PGVECTOR+pgvector} + url: ${env.CHROMADB_URL:+} + - provider_id: ${env.ENABLE_PGVECTOR:+pgvector} provider_type: remote::pgvector config: - host: ${env.PGVECTOR_HOST:localhost} - port: ${env.PGVECTOR_PORT:5432} - db: ${env.PGVECTOR_DB:} - user: ${env.PGVECTOR_USER:} - password: ${env.PGVECTOR_PASSWORD:} + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:+} + user: ${env.PGVECTOR_USER:+} + password: ${env.PGVECTOR_PASSWORD:+} files: - provider_id: meta-reference-files provider_type: inline::localfs config: - storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/starter/files} + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/files_metadata.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -96,17 +104,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -114,7 +122,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -122,14 +130,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -140,17 +148,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -160,10 +168,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/inference_store.db models: - metadata: {} model_id: openai/gpt-4o @@ -530,160 +538,15 @@ models: provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 model_type: llm - metadata: {} - model_id: ollama/llama3.1:8b-instruct-fp16 + model_id: ollama/${env.OLLAMA_INFERENCE_MODEL:=__disabled__} provider_id: ollama - provider_model_id: llama3.1:8b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/meta-llama/Llama-3.1-8B-Instruct - provider_id: ollama - provider_model_id: llama3.1:8b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/llama3.1:8b - provider_id: ollama - provider_model_id: llama3.1:8b - model_type: llm -- metadata: {} - model_id: ollama/llama3.1:70b-instruct-fp16 - provider_id: ollama - provider_model_id: llama3.1:70b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/meta-llama/Llama-3.1-70B-Instruct - provider_id: ollama - provider_model_id: llama3.1:70b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/llama3.1:70b - provider_id: ollama - provider_model_id: llama3.1:70b - model_type: llm -- metadata: {} - model_id: ollama/llama3.1:405b-instruct-fp16 - provider_id: ollama - provider_model_id: llama3.1:405b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: ollama - provider_model_id: llama3.1:405b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/llama3.1:405b - provider_id: ollama - provider_model_id: llama3.1:405b - model_type: llm -- metadata: {} - model_id: ollama/llama3.2:1b-instruct-fp16 - provider_id: ollama - provider_model_id: llama3.2:1b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/meta-llama/Llama-3.2-1B-Instruct - provider_id: ollama - provider_model_id: llama3.2:1b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/llama3.2:1b - provider_id: ollama - provider_model_id: llama3.2:1b - model_type: llm -- metadata: {} - model_id: ollama/llama3.2:3b-instruct-fp16 - provider_id: ollama - provider_model_id: llama3.2:3b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/meta-llama/Llama-3.2-3B-Instruct - provider_id: ollama - provider_model_id: llama3.2:3b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/llama3.2:3b - provider_id: ollama - provider_model_id: llama3.2:3b - model_type: llm -- metadata: {} - model_id: ollama/llama3.2-vision:11b-instruct-fp16 - provider_id: ollama - provider_model_id: llama3.2-vision:11b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: ollama - provider_model_id: llama3.2-vision:11b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/llama3.2-vision:latest - provider_id: ollama - provider_model_id: llama3.2-vision:latest - model_type: llm -- metadata: {} - model_id: ollama/llama3.2-vision:90b-instruct-fp16 - provider_id: ollama - provider_model_id: llama3.2-vision:90b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: ollama - provider_model_id: llama3.2-vision:90b-instruct-fp16 - model_type: llm -- metadata: {} - model_id: ollama/llama3.2-vision:90b - provider_id: ollama - provider_model_id: llama3.2-vision:90b - model_type: llm -- metadata: {} - model_id: ollama/llama3.3:70b - provider_id: ollama - provider_model_id: llama3.3:70b - model_type: llm -- metadata: {} - model_id: ollama/meta-llama/Llama-3.3-70B-Instruct - provider_id: ollama - provider_model_id: llama3.3:70b - model_type: llm -- metadata: {} - model_id: ollama/llama-guard3:8b - provider_id: ollama - provider_model_id: llama-guard3:8b - model_type: llm -- metadata: {} - model_id: ollama/meta-llama/Llama-Guard-3-8B - provider_id: ollama - provider_model_id: llama-guard3:8b - model_type: llm -- metadata: {} - model_id: ollama/llama-guard3:1b - provider_id: ollama - provider_model_id: llama-guard3:1b - model_type: llm -- metadata: {} - model_id: ollama/meta-llama/Llama-Guard-3-1B - provider_id: ollama - provider_model_id: llama-guard3:1b + provider_model_id: ${env.OLLAMA_INFERENCE_MODEL:=__disabled__} model_type: llm - metadata: - embedding_dimension: 384 - context_length: 512 - model_id: ollama/all-minilm:latest + embedding_dimension: ${env.OLLAMA_EMBEDDING_DIMENSION:=384} + model_id: ollama/${env.OLLAMA_EMBEDDING_MODEL:=__disabled__} provider_id: ollama - provider_model_id: all-minilm:latest - model_type: embedding -- metadata: - embedding_dimension: 384 - context_length: 512 - model_id: ollama/all-minilm - provider_id: ollama - provider_model_id: all-minilm:latest - model_type: embedding -- metadata: - embedding_dimension: 768 - context_length: 8192 - model_id: ollama/nomic-embed-text - provider_id: ollama - provider_model_id: nomic-embed-text + provider_model_id: ${env.OLLAMA_EMBEDDING_MODEL:=__disabled__} model_type: embedding - metadata: {} model_id: anthropic/claude-3-5-sonnet-latest @@ -938,6 +801,11 @@ models: provider_id: sambanova provider_model_id: sambanova/Meta-Llama-Guard-3-8B model_type: llm +- metadata: {} + model_id: vllm/${env.VLLM_INFERENCE_MODEL:=__disabled__} + provider_id: vllm + provider_model_id: ${env.VLLM_INFERENCE_MODEL:=__disabled__} + model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py index 2a44a0a37..df31fed84 100644 --- a/llama_stack/templates/starter/starter.py +++ b/llama_stack/templates/starter/starter.py @@ -5,7 +5,7 @@ # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, @@ -16,6 +16,7 @@ from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplCo from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) +from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.sqlite_vec.config import ( SQLiteVectorIOConfig, ) @@ -36,9 +37,6 @@ from llama_stack.providers.remote.inference.groq.models import ( MODEL_ENTRIES as GROQ_MODEL_ENTRIES, ) from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig -from llama_stack.providers.remote.inference.ollama.models import ( - MODEL_ENTRIES as OLLAMA_MODEL_ENTRIES, -) from llama_stack.providers.remote.inference.openai.config import OpenAIConfig from llama_stack.providers.remote.inference.openai.models import ( MODEL_ENTRIES as OPENAI_MODEL_ENTRIES, @@ -71,48 +69,67 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo ( "openai", OPENAI_MODEL_ENTRIES, - OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:}"), + OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:+}"), ), ( "fireworks", FIREWORKS_MODEL_ENTRIES, - FireworksImplConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:}"), + FireworksImplConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:+}"), ), ( "together", TOGETHER_MODEL_ENTRIES, - TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:}"), + TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:+}"), ), ( "ollama", - OLLAMA_MODEL_ENTRIES, - OllamaImplConfig.sample_run_config(), + [ + ProviderModelEntry( + provider_model_id="${env.OLLAMA_INFERENCE_MODEL:=__disabled__}", + model_type=ModelType.llm, + ), + ProviderModelEntry( + provider_model_id="${env.OLLAMA_EMBEDDING_MODEL:=__disabled__}", + model_type=ModelType.embedding, + metadata={ + "embedding_dimension": "${env.OLLAMA_EMBEDDING_DIMENSION:=384}", + }, + ), + ], + OllamaImplConfig.sample_run_config( + url="${env.OLLAMA_URL:=http://localhost:11434}", raise_on_connect_error=False + ), ), ( "anthropic", ANTHROPIC_MODEL_ENTRIES, - AnthropicConfig.sample_run_config(api_key="${env.ANTHROPIC_API_KEY:}"), + AnthropicConfig.sample_run_config(api_key="${env.ANTHROPIC_API_KEY:+}"), ), ( "gemini", GEMINI_MODEL_ENTRIES, - GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:}"), + GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:+}"), ), ( "groq", GROQ_MODEL_ENTRIES, - GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:}"), + GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:+}"), ), ( "sambanova", SAMBANOVA_MODEL_ENTRIES, - SambaNovaImplConfig.sample_run_config(api_key="${env.SAMBANOVA_API_KEY:}"), + SambaNovaImplConfig.sample_run_config(api_key="${env.SAMBANOVA_API_KEY:+}"), ), ( "vllm", - [], + [ + ProviderModelEntry( + provider_model_id="${env.VLLM_INFERENCE_MODEL:=__disabled__}", + model_type=ModelType.llm, + ), + ], VLLMInferenceAdapterConfig.sample_run_config( - url="${env.VLLM_URL:http://localhost:8000/v1}", + url="${env.VLLM_URL:=http://localhost:8000/v1}", ), ), ] @@ -153,22 +170,27 @@ def get_distribution_template() -> DistributionTemplate: vector_io_providers = [ Provider( - provider_id="sqlite-vec", + provider_id="faiss", + provider_type="inline::faiss", + config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), + ), + Provider( + provider_id="${env.ENABLE_SQLITE_VEC+sqlite-vec}", provider_type="inline::sqlite-vec", config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), ), Provider( - provider_id="${env.ENABLE_CHROMADB+chromadb}", + provider_id="${env.ENABLE_CHROMADB:+chromadb}", provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"), + config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"), ), Provider( - provider_id="${env.ENABLE_PGVECTOR+pgvector}", + provider_id="${env.ENABLE_PGVECTOR:+pgvector}", provider_type="remote::pgvector", config=PGVectorVectorIOConfig.sample_run_config( - db="${env.PGVECTOR_DB:}", - user="${env.PGVECTOR_USER:}", - password="${env.PGVECTOR_PASSWORD:}", + db="${env.PGVECTOR_DB:+}", + user="${env.PGVECTOR_USER:+}", + password="${env.PGVECTOR_PASSWORD:+}", ), ), ] @@ -257,7 +279,27 @@ def get_distribution_template() -> DistributionTemplate: ), "VLLM_URL": ( "http://localhost:8000/v1", - "VLLM URL", + "vLLM URL", + ), + "VLLM_INFERENCE_MODEL": ( + "", + "Optional vLLM Inference Model to register on startup", + ), + "OLLAMA_URL": ( + "http://localhost:11434", + "Ollama URL", + ), + "OLLAMA_INFERENCE_MODEL": ( + "", + "Optional Ollama Inference Model to register on startup", + ), + "OLLAMA_EMBEDDING_MODEL": ( + "", + "Optional Ollama Embedding Model to register on startup", + ), + "OLLAMA_EMBEDDING_DIMENSION": ( + "384", + "Ollama Embedding Dimension", ), }, ) diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py index 712d2dcb4..7badff140 100644 --- a/llama_stack/templates/template.py +++ b/llama_stack/templates/template.py @@ -13,7 +13,7 @@ import yaml from pydantic import BaseModel, Field from llama_stack.apis.datasets import DatasetPurpose -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( Api, BenchmarkInput, diff --git a/llama_stack/templates/tgi/build.yaml b/llama_stack/templates/tgi/build.yaml index 361b0b680..3ac3968e8 100644 --- a/llama_stack/templates/tgi/build.yaml +++ b/llama_stack/templates/tgi/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use (an external) TGI server for running LLM inference providers: diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml index 22b7bcde6..63da62a03 100644 --- a/llama_stack/templates/tgi/run-with-safety.yaml +++ b/llama_stack/templates/tgi/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: tgi apis: - agents @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -40,17 +40,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -58,7 +58,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -66,14 +66,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -84,17 +84,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -104,10 +104,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml index dd012323c..430494121 100644 --- a/llama_stack/templates/tgi/run.yaml +++ b/llama_stack/templates/tgi/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: tgi apis: - agents @@ -26,7 +26,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -39,17 +39,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -57,7 +57,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -65,14 +65,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -83,17 +83,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -103,10 +103,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py index 2c97cbf80..394cde18e 100644 --- a/llama_stack/templates/tgi/tgi.py +++ b/llama_stack/templates/tgi/tgi.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml index 5ffeac873..518a843da 100644 --- a/llama_stack/templates/together/build.yaml +++ b/llama_stack/templates/together/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use Together.AI for running LLM inference providers: diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml index a24843416..7ae2a1d1a 100644 --- a/llama_stack/templates/together/run-with-safety.yaml +++ b/llama_stack/templates/together/run-with-safety.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: together apis: - agents @@ -16,7 +16,7 @@ providers: provider_type: remote::together config: url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY:} + api_key: ${env.TOGETHER_API_KEY:+} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -45,17 +45,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -63,7 +63,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -71,14 +71,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -89,17 +89,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -110,13 +110,13 @@ providers: - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/inference_store.db models: - metadata: {} model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml index c71f960bd..dc09aeac9 100644 --- a/llama_stack/templates/together/run.yaml +++ b/llama_stack/templates/together/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: together apis: - agents @@ -16,7 +16,7 @@ providers: provider_type: remote::together config: url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY:} + api_key: ${env.TOGETHER_API_KEY:+} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -27,7 +27,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -40,17 +40,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -58,7 +58,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -66,14 +66,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -84,17 +84,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -105,13 +105,13 @@ providers: - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/inference_store.db models: - metadata: {} model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py index 7761bd9fd..4c64ff3cd 100644 --- a/llama_stack/templates/together/together.py +++ b/llama_stack/templates/together/together.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( ModelInput, Provider, diff --git a/llama_stack/templates/vllm-gpu/build.yaml b/llama_stack/templates/vllm-gpu/build.yaml index d5ff0f1f4..147dca50d 100644 --- a/llama_stack/templates/vllm-gpu/build.yaml +++ b/llama_stack/templates/vllm-gpu/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use a built-in vLLM engine for running LLM inference providers: diff --git a/llama_stack/templates/vllm-gpu/run.yaml b/llama_stack/templates/vllm-gpu/run.yaml index 6878c22b2..104b3a239 100644 --- a/llama_stack/templates/vllm-gpu/run.yaml +++ b/llama_stack/templates/vllm-gpu/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: vllm-gpu apis: - agents @@ -15,12 +15,12 @@ providers: - provider_id: vllm provider_type: inline::vllm config: - tensor_parallel_size: ${env.TENSOR_PARALLEL_SIZE:1} - max_tokens: ${env.MAX_TOKENS:4096} - max_model_len: ${env.MAX_MODEL_LEN:4096} - max_num_seqs: ${env.MAX_NUM_SEQS:4} - enforce_eager: ${env.ENFORCE_EAGER:False} - gpu_memory_utilization: ${env.GPU_MEMORY_UTILIZATION:0.3} + tensor_parallel_size: ${env.TENSOR_PARALLEL_SIZE:=1} + max_tokens: ${env.MAX_TOKENS:=4096} + max_model_len: ${env.MAX_MODEL_LEN:=4096} + max_num_seqs: ${env.MAX_NUM_SEQS:=4} + enforce_eager: ${env.ENFORCE_EAGER:=False} + gpu_memory_utilization: ${env.GPU_MEMORY_UTILIZATION:=0.3} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -31,7 +31,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -44,17 +44,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -62,7 +62,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -70,14 +70,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -88,17 +88,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -108,10 +108,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/inference_store.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/templates/vllm-gpu/vllm.py b/llama_stack/templates/vllm-gpu/vllm.py index 5775138b1..443fcd7a3 100644 --- a/llama_stack/templates/vllm-gpu/vllm.py +++ b/llama_stack/templates/vllm-gpu/vllm.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ModelInput, Provider from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, diff --git a/llama_stack/templates/watsonx/build.yaml b/llama_stack/templates/watsonx/build.yaml index e68ace183..08ee2c5ce 100644 --- a/llama_stack/templates/watsonx/build.yaml +++ b/llama_stack/templates/watsonx/build.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 distribution_spec: description: Use watsonx for running LLM inference providers: diff --git a/llama_stack/templates/watsonx/run.yaml b/llama_stack/templates/watsonx/run.yaml index d60a87906..678bf72ff 100644 --- a/llama_stack/templates/watsonx/run.yaml +++ b/llama_stack/templates/watsonx/run.yaml @@ -1,4 +1,4 @@ -version: '2' +version: 2 image_name: watsonx apis: - agents @@ -15,9 +15,9 @@ providers: - provider_id: watsonx provider_type: remote::watsonx config: - url: ${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com} - api_key: ${env.WATSONX_API_KEY:} - project_id: ${env.WATSONX_PROJECT_ID:} + url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} + api_key: ${env.WATSONX_API_KEY:+} + project_id: ${env.WATSONX_PROJECT_ID:+} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -28,7 +28,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -41,17 +41,17 @@ providers: persistence_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/agents_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/agents_store.db responses_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/responses_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/trace_store.db eval: - provider_id: meta-reference provider_type: inline::meta-reference @@ -59,7 +59,7 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/meta_reference_eval.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/meta_reference_eval.db datasetio: - provider_id: huggingface provider_type: remote::huggingface @@ -67,14 +67,14 @@ providers: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/localfs_datasetio.db scoring: - provider_id: basic provider_type: inline::basic @@ -85,17 +85,17 @@ providers: - provider_id: braintrust provider_type: inline::braintrust config: - openai_api_key: ${env.OPENAI_API_KEY:} + openai_api_key: ${env.OPENAI_API_KEY:+} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime @@ -105,10 +105,10 @@ providers: config: {} metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/registry.db inference_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db models: - metadata: {} model_id: meta-llama/llama-3-3-70b-instruct diff --git a/llama_stack/templates/watsonx/watsonx.py b/llama_stack/templates/watsonx/watsonx.py index 802aaf8f1..7fa3a55e5 100644 --- a/llama_stack/templates/watsonx/watsonx.py +++ b/llama_stack/templates/watsonx/watsonx.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.apis.models.models import ModelType +from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, diff --git a/pyproject.toml b/pyproject.toml index 968a3ae60..99be1a80a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,8 +22,8 @@ classifiers = [ ] dependencies = [ "aiohttp", - "fastapi>=0.115.0,<1.0", - "fire", + "fastapi>=0.115.0,<1.0", # server + "fire", # for MCP in LLS client "httpx", "huggingface-hub>=0.30.0,<1.0", "jinja2>=3.1.6", @@ -34,15 +34,18 @@ dependencies = [ "python-dotenv", "python-jose", "pydantic>=2", - "requests", "rich", - "setuptools", "starlette", "termcolor", "tiktoken", "pillow", "h11>=0.16.0", - "python-multipart>=0.0.20", + "python-multipart>=0.0.20", # For fastapi Form + "uvicorn>=0.34.0", # server + "opentelemetry-sdk", # server + "opentelemetry-exporter-otlp-proto-http", # server + "aiosqlite>=0.21.0", # server - for metadata store + "asyncpg", # for metadata store ] [project.optional-dependencies] @@ -67,7 +70,6 @@ dev = [ "types-requests", "types-setuptools", "pre-commit", - "uvicorn", "ruamel.yaml", # needed for openapi generator ] # These are the dependencies required for running unit tests. @@ -80,7 +82,6 @@ unit = [ "mcp", "chardet", "qdrant-client", - "opentelemetry-exporter-otlp-proto-http", "sqlalchemy", "sqlalchemy[asyncio]>=2.0.41", "blobfile", @@ -96,8 +97,6 @@ test = [ "aiohttp", "torch>=2.6.0", "torchvision>=0.21.0", - "opentelemetry-sdk", - "opentelemetry-exporter-otlp-proto-http", "chardet", "pypdf", "mcp", @@ -106,6 +105,7 @@ test = [ "transformers", "sqlalchemy", "sqlalchemy[asyncio]>=2.0.41", + "requests", ] docs = [ "sphinx-autobuild", @@ -122,6 +122,7 @@ docs = [ "tomli", "linkify", "sphinxcontrib.openapi", + "requests", ] codegen = ["rich", "pydantic", "jinja2>=3.1.6"] @@ -204,6 +205,9 @@ unfixable = [ "RUF001", "PLE2515", ] +"llama_stack/apis/**/__init__.py" = [ + "F403", +] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API [tool.mypy] mypy_path = ["llama_stack"] diff --git a/requirements.txt b/requirements.txt index 7e7aa38ce..2e016ef72 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,8 @@ aiohttp==3.11.13 # via llama-stack aiosignal==1.3.2 # via aiohttp +aiosqlite==0.21.0 + # via llama-stack annotated-types==0.7.0 # via pydantic anyio==4.8.0 @@ -14,6 +16,8 @@ anyio==4.8.0 # llama-stack-client # openai # starlette +asyncpg==0.30.0 + # via llama-stack attrs==25.1.0 # via # aiohttp @@ -27,11 +31,18 @@ certifi==2025.1.31 charset-normalizer==3.4.1 # via requests click==8.1.8 - # via llama-stack-client + # via + # llama-stack-client + # uvicorn colorama==0.4.6 ; sys_platform == 'win32' # via # click # tqdm +deprecated==1.2.18 + # via + # opentelemetry-api + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-semantic-conventions distro==1.9.0 # via # llama-stack-client @@ -50,10 +61,13 @@ frozenlist==1.5.0 # aiosignal fsspec==2024.12.0 # via huggingface-hub +googleapis-common-protos==1.67.0 + # via opentelemetry-exporter-otlp-proto-http h11==0.16.0 # via # httpcore # llama-stack + # uvicorn hf-xet==1.1.5 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64' # via huggingface-hub httpcore==1.0.9 @@ -71,6 +85,8 @@ idna==3.10 # httpx # requests # yarl +importlib-metadata==8.5.0 + # via opentelemetry-api jinja2==3.1.6 # via llama-stack jiter==0.8.2 @@ -95,6 +111,25 @@ numpy==2.2.3 # via pandas openai==1.71.0 # via llama-stack +opentelemetry-api==1.30.0 + # via + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-exporter-otlp-proto-common==1.30.0 + # via opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-http==1.30.0 + # via llama-stack +opentelemetry-proto==1.30.0 + # via + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-http +opentelemetry-sdk==1.30.0 + # via + # llama-stack + # opentelemetry-exporter-otlp-proto-http +opentelemetry-semantic-conventions==0.51b0 + # via opentelemetry-sdk packaging==24.2 # via huggingface-hub pandas==2.2.3 @@ -109,6 +144,10 @@ propcache==0.3.0 # via # aiohttp # yarl +protobuf==5.29.3 + # via + # googleapis-common-protos + # opentelemetry-proto pyaml==25.1.0 # via llama-stack-client pyasn1==0.4.8 @@ -148,7 +187,7 @@ regex==2024.11.6 requests==2.32.4 # via # huggingface-hub - # llama-stack + # opentelemetry-exporter-otlp-proto-http # tiktoken rich==13.9.4 # via @@ -160,8 +199,6 @@ rpds-py==0.22.3 # referencing rsa==4.9 # via python-jose -setuptools==80.8.0 - # via llama-stack six==1.17.0 # via # ecdsa @@ -189,11 +226,13 @@ tqdm==4.67.1 # openai typing-extensions==4.12.2 # via + # aiosqlite # anyio # fastapi # huggingface-hub # llama-stack-client # openai + # opentelemetry-sdk # pydantic # pydantic-core # referencing @@ -201,7 +240,13 @@ tzdata==2025.1 # via pandas urllib3==2.3.0 # via requests +uvicorn==0.34.0 + # via llama-stack wcwidth==0.2.13 # via prompt-toolkit +wrapt==1.17.2 + # via deprecated yarl==1.18.3 # via aiohttp +zipp==3.21.0 + # via importlib-metadata diff --git a/tests/external-provider/llama-stack-provider-ollama/run.yaml b/tests/external-provider/llama-stack-provider-ollama/run.yaml index 158f6800f..60cff7503 100644 --- a/tests/external-provider/llama-stack-provider-ollama/run.yaml +++ b/tests/external-provider/llama-stack-provider-ollama/run.yaml @@ -1,71 +1,101 @@ -version: '2' +version: 2 image_name: ollama apis: +- agents +- datasetio +- eval - inference +- safety +- scoring - telemetry - tool_runtime -- datasetio - vector_io + providers: inference: - - provider_id: custom_ollama - provider_type: remote::custom_ollama + - provider_id: ollama + provider_type: remote::ollama config: - url: ${env.OLLAMA_URL:http://localhost:11434} + url: ${env.OLLAMA_URL:=http://localhost:11434} vector_io: - provider_id: faiss provider_type: inline::faiss config: - kvstore: + metadata_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + agents_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/responses_store.db telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db + service_name: "${env.OTEL_SERVICE_NAME:=\u200b}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + metadata_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db datasetio: - provider_id: huggingface provider_type: remote::huggingface config: - kvstore: + metadata_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/huggingface_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: - kvstore: + metadata_store: type: sqlite namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} + api_key: ${env.BRAVE_SEARCH_API_KEY:+} max_results: 3 - provider_id: tavily-search provider_type: remote::tavily-search config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} + api_key: ${env.TAVILY_SEARCH_API_KEY:+} max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} + metadata_store: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/tests/integration/agents/test_openai_responses.py b/tests/integration/agents/test_openai_responses.py index 26eac527b..b0b123c45 100644 --- a/tests/integration/agents/test_openai_responses.py +++ b/tests/integration/agents/test_openai_responses.py @@ -221,3 +221,56 @@ def test_list_response_input_items_with_limit_and_order(openai_client, client_wi assert hasattr(item, "type") assert item.type == "message" assert item.role in ["user", "assistant"] + + +@pytest.mark.skip(reason="Tool calling is not reliable.") +def test_function_call_output_response(openai_client, client_with_models, text_model_id): + """Test handling of function call outputs in responses.""" + if isinstance(client_with_models, LlamaStackAsLibraryClient): + pytest.skip("OpenAI responses are not supported when testing with library client yet.") + + client = openai_client + + # First create a response that triggers a function call + response = client.responses.create( + model=text_model_id, + input=[ + { + "role": "user", + "content": "what's the weather in tokyo? You MUST call the `get_weather` function to find out.", + } + ], + tools=[ + { + "type": "function", + "name": "get_weather", + "description": "Get the weather in a given city", + "parameters": { + "type": "object", + "properties": { + "city": {"type": "string", "description": "The city to get the weather for"}, + }, + }, + } + ], + stream=False, + ) + + # Verify we got a function call + assert response.output[0].type == "function_call" + call_id = response.output[0].call_id + + # Now send the function call output as a follow-up + response2 = client.responses.create( + model=text_model_id, + input=[{"type": "function_call_output", "call_id": call_id, "output": "sunny and warm"}], + previous_response_id=response.id, + stream=False, + ) + + # Verify the second response processed successfully + assert response2.id is not None + assert response2.output[0].type == "message" + assert ( + "sunny" in response2.output[0].content[0].text.lower() or "warm" in response2.output[0].content[0].text.lower() + ) diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py index 9cbdc8e51..0eeb68167 100644 --- a/tests/unit/distribution/routers/test_routing_tables.py +++ b/tests/unit/distribution/routers/test_routing_tables.py @@ -13,7 +13,7 @@ import pytest from llama_stack.apis.common.type_system import NumberType from llama_stack.apis.datasets.datasets import Dataset, DatasetPurpose, URIDataSource from llama_stack.apis.datatypes import Api -from llama_stack.apis.models.models import Model, ModelType +from llama_stack.apis.models import Model, ModelType from llama_stack.apis.shields.shields import Shield from llama_stack.apis.tools import ListToolDefsResponse, ToolDef, ToolGroup, ToolParameter from llama_stack.apis.vector_dbs.vector_dbs import VectorDB diff --git a/tests/unit/providers/agents/meta_reference/fixtures/__init__.py b/tests/unit/providers/agents/meta_reference/fixtures/__init__.py index e112bb6e5..2ebcd9970 100644 --- a/tests/unit/providers/agents/meta_reference/fixtures/__init__.py +++ b/tests/unit/providers/agents/meta_reference/fixtures/__init__.py @@ -8,7 +8,7 @@ import os import yaml -from llama_stack.apis.inference.inference import ( +from llama_stack.apis.inference import ( OpenAIChatCompletion, ) diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index a3d798083..0d1ef8eca 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -27,8 +27,9 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseOutputMessageWebSearchToolCall, OpenAIResponseText, OpenAIResponseTextFormat, + WebSearchToolTypes, ) -from llama_stack.apis.inference.inference import ( +from llama_stack.apis.inference import ( OpenAIAssistantMessageParam, OpenAIChatCompletionContentPartTextParam, OpenAIDeveloperMessageParam, @@ -161,11 +162,6 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon input_text = "What is the capital of Ireland?" model = "meta-llama/Llama-3.1-8B-Instruct" - mock_inference_api.openai_chat_completion.side_effect = [ - fake_stream("tool_call_completion.yaml"), - fake_stream(), - ] - openai_responses_impl.tool_groups_api.get_tool.return_value = Tool( identifier="web_search", provider_id="client", @@ -182,39 +178,50 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon ) # Execute - result = await openai_responses_impl.create_openai_response( - input=input_text, - model=model, - temperature=0.1, - tools=[ - OpenAIResponseInputToolWebSearch( - name="web_search", - ) - ], - ) + for tool_name in WebSearchToolTypes: + # Reset mock states as we loop through each tool type + mock_inference_api.openai_chat_completion.side_effect = [ + fake_stream("tool_call_completion.yaml"), + fake_stream(), + ] + openai_responses_impl.tool_groups_api.get_tool.reset_mock() + openai_responses_impl.tool_runtime_api.invoke_tool.reset_mock() + openai_responses_impl.responses_store.store_response_object.reset_mock() - # Verify - first_call = mock_inference_api.openai_chat_completion.call_args_list[0] - assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?" - assert first_call.kwargs["tools"] is not None - assert first_call.kwargs["temperature"] == 0.1 + result = await openai_responses_impl.create_openai_response( + input=input_text, + model=model, + temperature=0.1, + tools=[ + OpenAIResponseInputToolWebSearch( + name=tool_name, + ) + ], + ) - second_call = mock_inference_api.openai_chat_completion.call_args_list[1] - assert second_call.kwargs["messages"][-1].content == "Dublin" - assert second_call.kwargs["temperature"] == 0.1 + # Verify + first_call = mock_inference_api.openai_chat_completion.call_args_list[0] + assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?" + assert first_call.kwargs["tools"] is not None + assert first_call.kwargs["temperature"] == 0.1 - openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search") - openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with( - tool_name="web_search", - kwargs={"query": "What is the capital of Ireland?"}, - ) + second_call = mock_inference_api.openai_chat_completion.call_args_list[1] + assert second_call.kwargs["messages"][-1].content == "Dublin" + assert second_call.kwargs["temperature"] == 0.1 - openai_responses_impl.responses_store.store_response_object.assert_called_once() + openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search") + openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with( + tool_name="web_search", + kwargs={"query": "What is the capital of Ireland?"}, + ) - # Check that we got the content from our mocked tool execution result - assert len(result.output) >= 1 - assert isinstance(result.output[1], OpenAIResponseMessage) - assert result.output[1].content[0].text == "Dublin" + openai_responses_impl.responses_store.store_response_object.assert_called_once() + + # Check that we got the content from our mocked tool execution result + assert len(result.output) >= 1 + assert isinstance(result.output[1], OpenAIResponseMessage) + assert result.output[1].content[0].text == "Dublin" + assert result.output[1].content[0].annotations == [] @pytest.mark.asyncio diff --git a/tests/unit/providers/nvidia/test_safety.py b/tests/unit/providers/nvidia/test_safety.py index 8c74f178b..73fc32a02 100644 --- a/tests/unit/providers/nvidia/test_safety.py +++ b/tests/unit/providers/nvidia/test_safety.py @@ -11,7 +11,7 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest -from llama_stack.apis.inference.inference import CompletionMessage, UserMessage +from llama_stack.apis.inference import CompletionMessage, UserMessage from llama_stack.apis.safety import RunShieldResponse, ViolationLevel from llama_stack.apis.shields import Shield from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig diff --git a/tests/unit/providers/utils/inference/test_openai_compat.py b/tests/unit/providers/utils/inference/test_openai_compat.py index 4c75b8a2f..3598e4810 100644 --- a/tests/unit/providers/utils/inference/test_openai_compat.py +++ b/tests/unit/providers/utils/inference/test_openai_compat.py @@ -7,7 +7,7 @@ import pytest from llama_stack.apis.common.content_types import TextContentItem -from llama_stack.apis.inference.inference import ( +from llama_stack.apis.inference import ( CompletionMessage, OpenAIAssistantMessageParam, OpenAIChatCompletionContentPartTextParam, diff --git a/tests/unit/providers/utils/test_model_registry.py b/tests/unit/providers/utils/test_model_registry.py index 67f8a138f..10fa1e075 100644 --- a/tests/unit/providers/utils/test_model_registry.py +++ b/tests/unit/providers/utils/test_model_registry.py @@ -35,7 +35,7 @@ import pytest -from llama_stack.apis.models.models import Model +from llama_stack.apis.models import Model from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py index 3bcd0613f..5eaca8a25 100644 --- a/tests/unit/providers/vector_io/conftest.py +++ b/tests/unit/providers/vector_io/conftest.py @@ -9,7 +9,7 @@ import random import numpy as np import pytest -from llama_stack.apis.vector_io import Chunk +from llama_stack.apis.vector_io import Chunk, ChunkMetadata EMBEDDING_DIMENSION = 384 @@ -33,6 +33,20 @@ def sample_chunks(): for j in range(k) for i in range(n) ] + sample.extend( + [ + Chunk( + content=f"Sentence {i} from document {j + k}", + chunk_metadata=ChunkMetadata( + document_id=f"document-{j + k}", + chunk_id=f"document-{j}-chunk-{i}", + source=f"example source-{j + k}-{i}", + ), + ) + for j in range(k) + for i in range(n) + ] + ) return sample diff --git a/tests/unit/providers/vector_io/test_chunk_utils.py b/tests/unit/providers/vector_io/test_chunk_utils.py new file mode 100644 index 000000000..941928b6d --- /dev/null +++ b/tests/unit/providers/vector_io/test_chunk_utils.py @@ -0,0 +1,66 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.apis.vector_io import Chunk, ChunkMetadata +from llama_stack.providers.utils.vector_io.chunk_utils import generate_chunk_id + +# This test is a unit test for the chunk_utils.py helpers. This should only contain +# tests which are specific to this file. More general (API-level) tests should be placed in +# tests/integration/vector_io/ +# +# How to run this test: +# +# pytest tests/unit/providers/vector_io/test_chunk_utils.py \ +# -v -s --tb=short --disable-warnings --asyncio-mode=auto + + +def test_generate_chunk_id(): + chunks = [ + Chunk(content="test", metadata={"document_id": "doc-1"}), + Chunk(content="test ", metadata={"document_id": "doc-1"}), + Chunk(content="test 3", metadata={"document_id": "doc-1"}), + ] + + chunk_ids = sorted([chunk.chunk_id for chunk in chunks]) + assert chunk_ids == [ + "177a1368-f6a8-0c50-6e92-18677f2c3de3", + "bc744db3-1b25-0a9c-cdff-b6ba3df73c36", + "f68df25d-d9aa-ab4d-5684-64a233add20d", + ] + + +def test_chunk_id(): + # Test with existing chunk ID + chunk_with_id = Chunk(content="test", metadata={"document_id": "existing-id"}) + assert chunk_with_id.chunk_id == "84ededcc-b80b-a83e-1a20-ca6515a11350" + + # Test with document ID in metadata + chunk_with_doc_id = Chunk(content="test", metadata={"document_id": "doc-1"}) + assert chunk_with_doc_id.chunk_id == generate_chunk_id("doc-1", "test") + + # Test chunks with ChunkMetadata + chunk_with_metadata = Chunk( + content="test", + metadata={"document_id": "existing-id", "chunk_id": "chunk-id-1"}, + chunk_metadata=ChunkMetadata(document_id="document_1"), + ) + assert chunk_with_metadata.chunk_id == "chunk-id-1" + + # Test with no ID or document ID + chunk_without_id = Chunk(content="test") + generated_id = chunk_without_id.chunk_id + assert isinstance(generated_id, str) and len(generated_id) == 36 # Should be a valid UUID + + +def test_stored_chunk_id_alias(): + # Test with existing chunk ID alias + chunk_with_alias = Chunk(content="test", metadata={"document_id": "existing-id", "chunk_id": "chunk-id-1"}) + assert chunk_with_alias.chunk_id == "chunk-id-1" + serialized_chunk = chunk_with_alias.model_dump() + assert serialized_chunk["stored_chunk_id"] == "chunk-id-1" + # showing chunk_id is not serialized (i.e., a computed field) + assert "chunk_id" not in serialized_chunk + assert chunk_with_alias.stored_chunk_id == "chunk-id-1" diff --git a/tests/unit/providers/vector_io/test_qdrant.py b/tests/unit/providers/vector_io/test_qdrant.py index 607eccb24..6902c8850 100644 --- a/tests/unit/providers/vector_io/test_qdrant.py +++ b/tests/unit/providers/vector_io/test_qdrant.py @@ -81,7 +81,7 @@ __QUERY = "Sample query" @pytest.mark.asyncio -@pytest.mark.parametrize("max_query_chunks, expected_chunks", [(2, 2), (100, 30)]) +@pytest.mark.parametrize("max_query_chunks, expected_chunks", [(2, 2), (100, 60)]) async def test_qdrant_adapter_returns_expected_chunks( qdrant_adapter: QdrantVectorIOAdapter, vector_db_id, diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py index 6424b9e86..bbac717c7 100644 --- a/tests/unit/providers/vector_io/test_sqlite_vec.py +++ b/tests/unit/providers/vector_io/test_sqlite_vec.py @@ -15,7 +15,6 @@ from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import ( SQLiteVecIndex, SQLiteVecVectorIOAdapter, _create_sqlite_connection, - generate_chunk_id, ) # This test is a unit test for the SQLiteVecVectorIOAdapter class. This should only contain @@ -65,6 +64,14 @@ async def test_query_chunks_vector(sqlite_vec_index, sample_chunks, sample_embed assert len(response.chunks) == 2 +@pytest.mark.xfail(reason="Chunk Metadata not yet supported for SQLite-vec", strict=True) +async def test_query_chunk_metadata(sqlite_vec_index, sample_chunks, sample_embeddings): + await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) + query_embedding = sample_embeddings[0] + response = await sqlite_vec_index.query_vector(query_embedding, k=2, score_threshold=0.0) + assert response.chunks[-1].chunk_metadata == sample_chunks[-1].chunk_metadata + + @pytest.mark.asyncio async def test_query_chunks_full_text_search(sqlite_vec_index, sample_chunks, sample_embeddings): await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) @@ -150,21 +157,6 @@ async def sqlite_vec_adapter(sqlite_connection): await adapter.shutdown() -def test_generate_chunk_id(): - chunks = [ - Chunk(content="test", metadata={"document_id": "doc-1"}), - Chunk(content="test ", metadata={"document_id": "doc-1"}), - Chunk(content="test 3", metadata={"document_id": "doc-1"}), - ] - - chunk_ids = sorted([generate_chunk_id(chunk.metadata["document_id"], chunk.content) for chunk in chunks]) - assert chunk_ids == [ - "177a1368-f6a8-0c50-6e92-18677f2c3de3", - "bc744db3-1b25-0a9c-cdff-b6ba3df73c36", - "f68df25d-d9aa-ab4d-5684-64a233add20d", - ] - - @pytest.mark.asyncio async def test_query_chunks_hybrid_no_keyword_matches(sqlite_vec_index, sample_chunks, sample_embeddings): """Test hybrid search when keyword search returns no matches - should still return vector results.""" @@ -339,7 +331,7 @@ async def test_query_chunks_hybrid_mixed_results(sqlite_vec_index, sample_chunks # Verify scores are in descending order assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1)) # Verify we get results from both the vector-similar document and keyword-matched document - doc_ids = {chunk.metadata["document_id"] for chunk in response.chunks} + doc_ids = {chunk.metadata.get("document_id") or chunk.chunk_metadata.document_id for chunk in response.chunks} assert "document-0" in doc_ids # From vector search assert "document-2" in doc_ids # From keyword search @@ -364,7 +356,11 @@ async def test_query_chunks_hybrid_weighted_reranker_parametrization( reranker_params={"alpha": 1.0}, ) assert len(response.chunks) > 0 # Should get at least one result - assert any("document-0" in chunk.metadata["document_id"] for chunk in response.chunks) + assert any( + "document-0" + in (chunk.metadata.get("document_id") or (chunk.chunk_metadata.document_id if chunk.chunk_metadata else "")) + for chunk in response.chunks + ) # alpha=0.0 (should behave like pure vector) response = await sqlite_vec_index.query_hybrid( @@ -389,7 +385,11 @@ async def test_query_chunks_hybrid_weighted_reranker_parametrization( reranker_params={"alpha": 0.7}, ) assert len(response.chunks) > 0 # Should get at least one result - assert any("document-0" in chunk.metadata["document_id"] for chunk in response.chunks) + assert any( + "document-0" + in (chunk.metadata.get("document_id") or (chunk.chunk_metadata.document_id if chunk.chunk_metadata else "")) + for chunk in response.chunks + ) @pytest.mark.asyncio diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py index b9fd8cca4..d2dd1783b 100644 --- a/tests/unit/rag/test_rag_query.py +++ b/tests/unit/rag/test_rag_query.py @@ -4,10 +4,15 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from unittest.mock import MagicMock +from unittest.mock import AsyncMock, MagicMock import pytest +from llama_stack.apis.vector_io import ( + Chunk, + ChunkMetadata, + QueryChunksResponse, +) from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl @@ -17,3 +22,41 @@ class TestRagQuery: rag_tool = MemoryToolRuntimeImpl(config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock()) with pytest.raises(ValueError): await rag_tool.query(content=MagicMock(), vector_db_ids=[]) + + @pytest.mark.asyncio + async def test_query_chunk_metadata_handling(self): + rag_tool = MemoryToolRuntimeImpl(config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock()) + content = "test query content" + vector_db_ids = ["db1"] + + chunk_metadata = ChunkMetadata( + document_id="doc1", + chunk_id="chunk1", + source="test_source", + metadata_token_count=5, + ) + interleaved_content = MagicMock() + chunk = Chunk( + content=interleaved_content, + metadata={ + "key1": "value1", + "token_count": 10, + "metadata_token_count": 5, + # Note this is inserted into `metadata` during MemoryToolRuntimeImpl().insert() + "document_id": "doc1", + }, + stored_chunk_id="chunk1", + chunk_metadata=chunk_metadata, + ) + + query_response = QueryChunksResponse(chunks=[chunk], scores=[1.0]) + + rag_tool.vector_io_api.query_chunks = AsyncMock(return_value=query_response) + result = await rag_tool.query(content=content, vector_db_ids=vector_db_ids) + + assert result is not None + expected_metadata_string = ( + "Metadata: {'chunk_id': 'chunk1', 'document_id': 'doc1', 'source': 'test_source', 'key1': 'value1'}" + ) + assert expected_metadata_string in result.content[1].text + assert result.content is not None diff --git a/tests/unit/server/test_replace_env_vars.py b/tests/unit/server/test_replace_env_vars.py index 7fcbbfde9..0fb7c395e 100644 --- a/tests/unit/server/test_replace_env_vars.py +++ b/tests/unit/server/test_replace_env_vars.py @@ -26,39 +26,44 @@ class TestReplaceEnvVars(unittest.TestCase): self.assertEqual(replace_env_vars("${env.TEST_VAR}"), "test_value") def test_default_value_when_not_set(self): - self.assertEqual(replace_env_vars("${env.NOT_SET:default}"), "default") + self.assertEqual(replace_env_vars("${env.NOT_SET:=default}"), "default") def test_default_value_when_set(self): - self.assertEqual(replace_env_vars("${env.TEST_VAR:default}"), "test_value") + self.assertEqual(replace_env_vars("${env.TEST_VAR:=default}"), "test_value") def test_default_value_when_empty(self): - self.assertEqual(replace_env_vars("${env.EMPTY_VAR:default}"), "default") + self.assertEqual(replace_env_vars("${env.EMPTY_VAR:=default}"), "default") + + def test_empty_var_no_default(self): + self.assertEqual(replace_env_vars("${env.EMPTY_VAR_NO_DEFAULT:+}"), None) def test_conditional_value_when_set(self): - self.assertEqual(replace_env_vars("${env.TEST_VAR+conditional}"), "conditional") + self.assertEqual(replace_env_vars("${env.TEST_VAR:+conditional}"), "conditional") def test_conditional_value_when_not_set(self): - self.assertEqual(replace_env_vars("${env.NOT_SET+conditional}"), "") + self.assertEqual(replace_env_vars("${env.NOT_SET:+conditional}"), None) def test_conditional_value_when_empty(self): - self.assertEqual(replace_env_vars("${env.EMPTY_VAR+conditional}"), "") + self.assertEqual(replace_env_vars("${env.EMPTY_VAR:+conditional}"), None) def test_conditional_value_with_zero(self): - self.assertEqual(replace_env_vars("${env.ZERO_VAR+conditional}"), "conditional") + self.assertEqual(replace_env_vars("${env.ZERO_VAR:+conditional}"), "conditional") def test_mixed_syntax(self): - self.assertEqual(replace_env_vars("${env.TEST_VAR:default} and ${env.NOT_SET+conditional}"), "test_value and ") self.assertEqual( - replace_env_vars("${env.NOT_SET:default} and ${env.TEST_VAR+conditional}"), "default and conditional" + replace_env_vars("${env.TEST_VAR:=default} and ${env.NOT_SET:+conditional}"), "test_value and " + ) + self.assertEqual( + replace_env_vars("${env.NOT_SET:=default} and ${env.TEST_VAR:+conditional}"), "default and conditional" ) def test_nested_structures(self): data = { - "key1": "${env.TEST_VAR:default}", - "key2": ["${env.NOT_SET:default}", "${env.TEST_VAR+conditional}"], - "key3": {"nested": "${env.NOT_SET+conditional}"}, + "key1": "${env.TEST_VAR:=default}", + "key2": ["${env.NOT_SET:=default}", "${env.TEST_VAR:+conditional}"], + "key3": {"nested": "${env.NOT_SET:+conditional}"}, } - expected = {"key1": "test_value", "key2": ["default", "conditional"], "key3": {"nested": ""}} + expected = {"key1": "test_value", "key2": ["default", "conditional"], "key3": {"nested": None}} self.assertEqual(replace_env_vars(data), expected) diff --git a/uv.lock b/uv.lock index 42eece4e1..31e296642 100644 --- a/uv.lock +++ b/uv.lock @@ -158,6 +158,38 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918, upload-time = "2024-11-30T04:30:10.946Z" }, ] +[[package]] +name = "asyncpg" +version = "0.30.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2f/4c/7c991e080e106d854809030d8584e15b2e996e26f16aee6d757e387bc17d/asyncpg-0.30.0.tar.gz", hash = "sha256:c551e9928ab6707602f44811817f82ba3c446e018bfe1d3abecc8ba5f3eac851", size = 957746, upload-time = "2024-10-20T00:30:41.127Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/0e/f5d708add0d0b97446c402db7e8dd4c4183c13edaabe8a8500b411e7b495/asyncpg-0.30.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5e0511ad3dec5f6b4f7a9e063591d407eee66b88c14e2ea636f187da1dcfff6a", size = 674506, upload-time = "2024-10-20T00:29:27.988Z" }, + { url = "https://files.pythonhosted.org/packages/6a/a0/67ec9a75cb24a1d99f97b8437c8d56da40e6f6bd23b04e2f4ea5d5ad82ac/asyncpg-0.30.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:915aeb9f79316b43c3207363af12d0e6fd10776641a7de8a01212afd95bdf0ed", size = 645922, upload-time = "2024-10-20T00:29:29.391Z" }, + { url = "https://files.pythonhosted.org/packages/5c/d9/a7584f24174bd86ff1053b14bb841f9e714380c672f61c906eb01d8ec433/asyncpg-0.30.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c198a00cce9506fcd0bf219a799f38ac7a237745e1d27f0e1f66d3707c84a5a", size = 3079565, upload-time = "2024-10-20T00:29:30.832Z" }, + { url = "https://files.pythonhosted.org/packages/a0/d7/a4c0f9660e333114bdb04d1a9ac70db690dd4ae003f34f691139a5cbdae3/asyncpg-0.30.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3326e6d7381799e9735ca2ec9fd7be4d5fef5dcbc3cb555d8a463d8460607956", size = 3109962, upload-time = "2024-10-20T00:29:33.114Z" }, + { url = "https://files.pythonhosted.org/packages/3c/21/199fd16b5a981b1575923cbb5d9cf916fdc936b377e0423099f209e7e73d/asyncpg-0.30.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:51da377487e249e35bd0859661f6ee2b81db11ad1f4fc036194bc9cb2ead5056", size = 3064791, upload-time = "2024-10-20T00:29:34.677Z" }, + { url = "https://files.pythonhosted.org/packages/77/52/0004809b3427534a0c9139c08c87b515f1c77a8376a50ae29f001e53962f/asyncpg-0.30.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc6d84136f9c4d24d358f3b02be4b6ba358abd09f80737d1ac7c444f36108454", size = 3188696, upload-time = "2024-10-20T00:29:36.389Z" }, + { url = "https://files.pythonhosted.org/packages/52/cb/fbad941cd466117be58b774a3f1cc9ecc659af625f028b163b1e646a55fe/asyncpg-0.30.0-cp311-cp311-win32.whl", hash = "sha256:574156480df14f64c2d76450a3f3aaaf26105869cad3865041156b38459e935d", size = 567358, upload-time = "2024-10-20T00:29:37.915Z" }, + { url = "https://files.pythonhosted.org/packages/3c/0a/0a32307cf166d50e1ad120d9b81a33a948a1a5463ebfa5a96cc5606c0863/asyncpg-0.30.0-cp311-cp311-win_amd64.whl", hash = "sha256:3356637f0bd830407b5597317b3cb3571387ae52ddc3bca6233682be88bbbc1f", size = 629375, upload-time = "2024-10-20T00:29:39.987Z" }, + { url = "https://files.pythonhosted.org/packages/4b/64/9d3e887bb7b01535fdbc45fbd5f0a8447539833b97ee69ecdbb7a79d0cb4/asyncpg-0.30.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c902a60b52e506d38d7e80e0dd5399f657220f24635fee368117b8b5fce1142e", size = 673162, upload-time = "2024-10-20T00:29:41.88Z" }, + { url = "https://files.pythonhosted.org/packages/6e/eb/8b236663f06984f212a087b3e849731f917ab80f84450e943900e8ca4052/asyncpg-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aca1548e43bbb9f0f627a04666fedaca23db0a31a84136ad1f868cb15deb6e3a", size = 637025, upload-time = "2024-10-20T00:29:43.352Z" }, + { url = "https://files.pythonhosted.org/packages/cc/57/2dc240bb263d58786cfaa60920779af6e8d32da63ab9ffc09f8312bd7a14/asyncpg-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c2a2ef565400234a633da0eafdce27e843836256d40705d83ab7ec42074efb3", size = 3496243, upload-time = "2024-10-20T00:29:44.922Z" }, + { url = "https://files.pythonhosted.org/packages/f4/40/0ae9d061d278b10713ea9021ef6b703ec44698fe32178715a501ac696c6b/asyncpg-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1292b84ee06ac8a2ad8e51c7475aa309245874b61333d97411aab835c4a2f737", size = 3575059, upload-time = "2024-10-20T00:29:46.891Z" }, + { url = "https://files.pythonhosted.org/packages/c3/75/d6b895a35a2c6506952247640178e5f768eeb28b2e20299b6a6f1d743ba0/asyncpg-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f5712350388d0cd0615caec629ad53c81e506b1abaaf8d14c93f54b35e3595a", size = 3473596, upload-time = "2024-10-20T00:29:49.201Z" }, + { url = "https://files.pythonhosted.org/packages/c8/e7/3693392d3e168ab0aebb2d361431375bd22ffc7b4a586a0fc060d519fae7/asyncpg-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:db9891e2d76e6f425746c5d2da01921e9a16b5a71a1c905b13f30e12a257c4af", size = 3641632, upload-time = "2024-10-20T00:29:50.768Z" }, + { url = "https://files.pythonhosted.org/packages/32/ea/15670cea95745bba3f0352341db55f506a820b21c619ee66b7d12ea7867d/asyncpg-0.30.0-cp312-cp312-win32.whl", hash = "sha256:68d71a1be3d83d0570049cd1654a9bdfe506e794ecc98ad0873304a9f35e411e", size = 560186, upload-time = "2024-10-20T00:29:52.394Z" }, + { url = "https://files.pythonhosted.org/packages/7e/6b/fe1fad5cee79ca5f5c27aed7bd95baee529c1bf8a387435c8ba4fe53d5c1/asyncpg-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a0292c6af5c500523949155ec17b7fe01a00ace33b68a476d6b5059f9630305", size = 621064, upload-time = "2024-10-20T00:29:53.757Z" }, + { url = "https://files.pythonhosted.org/packages/3a/22/e20602e1218dc07692acf70d5b902be820168d6282e69ef0d3cb920dc36f/asyncpg-0.30.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05b185ebb8083c8568ea8a40e896d5f7af4b8554b64d7719c0eaa1eb5a5c3a70", size = 670373, upload-time = "2024-10-20T00:29:55.165Z" }, + { url = "https://files.pythonhosted.org/packages/3d/b3/0cf269a9d647852a95c06eb00b815d0b95a4eb4b55aa2d6ba680971733b9/asyncpg-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c47806b1a8cbb0a0db896f4cd34d89942effe353a5035c62734ab13b9f938da3", size = 634745, upload-time = "2024-10-20T00:29:57.14Z" }, + { url = "https://files.pythonhosted.org/packages/8e/6d/a4f31bf358ce8491d2a31bfe0d7bcf25269e80481e49de4d8616c4295a34/asyncpg-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b6fde867a74e8c76c71e2f64f80c64c0f3163e687f1763cfaf21633ec24ec33", size = 3512103, upload-time = "2024-10-20T00:29:58.499Z" }, + { url = "https://files.pythonhosted.org/packages/96/19/139227a6e67f407b9c386cb594d9628c6c78c9024f26df87c912fabd4368/asyncpg-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46973045b567972128a27d40001124fbc821c87a6cade040cfcd4fa8a30bcdc4", size = 3592471, upload-time = "2024-10-20T00:30:00.354Z" }, + { url = "https://files.pythonhosted.org/packages/67/e4/ab3ca38f628f53f0fd28d3ff20edff1c975dd1cb22482e0061916b4b9a74/asyncpg-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9110df111cabc2ed81aad2f35394a00cadf4f2e0635603db6ebbd0fc896f46a4", size = 3496253, upload-time = "2024-10-20T00:30:02.794Z" }, + { url = "https://files.pythonhosted.org/packages/ef/5f/0bf65511d4eeac3a1f41c54034a492515a707c6edbc642174ae79034d3ba/asyncpg-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04ff0785ae7eed6cc138e73fc67b8e51d54ee7a3ce9b63666ce55a0bf095f7ba", size = 3662720, upload-time = "2024-10-20T00:30:04.501Z" }, + { url = "https://files.pythonhosted.org/packages/e7/31/1513d5a6412b98052c3ed9158d783b1e09d0910f51fbe0e05f56cc370bc4/asyncpg-0.30.0-cp313-cp313-win32.whl", hash = "sha256:ae374585f51c2b444510cdf3595b97ece4f233fde739aa14b50e0d64e8a7a590", size = 560404, upload-time = "2024-10-20T00:30:06.537Z" }, + { url = "https://files.pythonhosted.org/packages/c8/a4/cec76b3389c4c5ff66301cd100fe88c318563ec8a520e0b2e792b5b84972/asyncpg-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:f59b430b8e27557c3fb9869222559f7417ced18688375825f8f12302c34e915e", size = 621623, upload-time = "2024-10-20T00:30:09.024Z" }, +] + [[package]] name = "attrs" version = "25.1.0" @@ -1187,6 +1219,8 @@ version = "0.2.12" source = { editable = "." } dependencies = [ { name = "aiohttp" }, + { name = "aiosqlite" }, + { name = "asyncpg" }, { name = "fastapi" }, { name = "fire" }, { name = "h11" }, @@ -1196,18 +1230,19 @@ dependencies = [ { name = "jsonschema" }, { name = "llama-stack-client" }, { name = "openai" }, + { name = "opentelemetry-exporter-otlp-proto-http" }, + { name = "opentelemetry-sdk" }, { name = "pillow" }, { name = "prompt-toolkit" }, { name = "pydantic" }, { name = "python-dotenv" }, { name = "python-jose" }, { name = "python-multipart" }, - { name = "requests" }, { name = "rich" }, - { name = "setuptools" }, { name = "starlette" }, { name = "termcolor" }, { name = "tiktoken" }, + { name = "uvicorn" }, ] [package.optional-dependencies] @@ -1238,11 +1273,11 @@ dev = [ { name = "ruff" }, { name = "types-requests" }, { name = "types-setuptools" }, - { name = "uvicorn" }, ] docs = [ { name = "linkify" }, { name = "myst-parser" }, + { name = "requests" }, { name = "sphinx" }, { name = "sphinx-autobuild" }, { name = "sphinx-copybutton" }, @@ -1264,9 +1299,8 @@ test = [ { name = "datasets" }, { name = "mcp" }, { name = "openai" }, - { name = "opentelemetry-exporter-otlp-proto-http" }, - { name = "opentelemetry-sdk" }, { name = "pypdf" }, + { name = "requests" }, { name = "sqlalchemy", extra = ["asyncio"] }, { name = "torch", version = "2.6.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, { name = "torch", version = "2.6.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" }, @@ -1282,7 +1316,6 @@ unit = [ { name = "faiss-cpu" }, { name = "mcp" }, { name = "openai" }, - { name = "opentelemetry-exporter-otlp-proto-http" }, { name = "pypdf" }, { name = "qdrant-client" }, { name = "sqlalchemy", extra = ["asyncio"] }, @@ -1292,6 +1325,8 @@ unit = [ [package.metadata] requires-dist = [ { name = "aiohttp" }, + { name = "aiosqlite", specifier = ">=0.21.0" }, + { name = "asyncpg" }, { name = "fastapi", specifier = ">=0.115.0,<1.0" }, { name = "fire" }, { name = "h11", specifier = ">=0.16.0" }, @@ -1302,6 +1337,8 @@ requires-dist = [ { name = "llama-stack-client", specifier = ">=0.2.12" }, { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.12" }, { name = "openai", specifier = ">=1.66" }, + { name = "opentelemetry-exporter-otlp-proto-http" }, + { name = "opentelemetry-sdk" }, { name = "pandas", marker = "extra == 'ui'" }, { name = "pillow" }, { name = "prompt-toolkit" }, @@ -1309,14 +1346,13 @@ requires-dist = [ { name = "python-dotenv" }, { name = "python-jose" }, { name = "python-multipart", specifier = ">=0.0.20" }, - { name = "requests" }, { name = "rich" }, - { name = "setuptools" }, { name = "starlette" }, { name = "streamlit", marker = "extra == 'ui'" }, { name = "streamlit-option-menu", marker = "extra == 'ui'" }, { name = "termcolor" }, { name = "tiktoken" }, + { name = "uvicorn", specifier = ">=0.34.0" }, ] provides-extras = ["ui"] @@ -1340,11 +1376,11 @@ dev = [ { name = "ruff" }, { name = "types-requests" }, { name = "types-setuptools" }, - { name = "uvicorn" }, ] docs = [ { name = "linkify" }, { name = "myst-parser" }, + { name = "requests" }, { name = "sphinx" }, { name = "sphinx-autobuild" }, { name = "sphinx-copybutton" }, @@ -1366,9 +1402,8 @@ test = [ { name = "datasets" }, { name = "mcp" }, { name = "openai" }, - { name = "opentelemetry-exporter-otlp-proto-http" }, - { name = "opentelemetry-sdk" }, { name = "pypdf" }, + { name = "requests" }, { name = "sqlalchemy" }, { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" }, { name = "torch", specifier = ">=2.6.0", index = "https://download.pytorch.org/whl/cpu" }, @@ -1383,7 +1418,6 @@ unit = [ { name = "faiss-cpu" }, { name = "mcp" }, { name = "openai" }, - { name = "opentelemetry-exporter-otlp-proto-http" }, { name = "pypdf" }, { name = "qdrant-client" }, { name = "sqlalchemy" },