From 209a78b618f5e71b1ff384ba9877c815950ac8e1 Mon Sep 17 00:00:00 2001 From: Dennis Kennetz Date: Mon, 10 Nov 2025 15:16:24 -0600 Subject: [PATCH] feat: add oci genai service as chat inference provider (#3876) # What does this PR do? Adds OCI GenAI PaaS models for openai chat completion endpoints. ## Test Plan In an OCI tenancy with access to GenAI PaaS, perform the following steps: 1. Ensure you have IAM policies in place to use service (check docs included in this PR) 2. For local development, [setup OCI cli](https://docs.oracle.com/en-us/iaas/Content/API/SDKDocs/cliinstall.htm) and configure the CLI with your region, tenancy, and auth [here](https://docs.oracle.com/en-us/iaas/Content/API/SDKDocs/cliconfigure.htm) 3. Once configured, go through llama-stack setup and run llama-stack (uses config based auth) like: ```bash OCI_AUTH_TYPE=config_file \ OCI_CLI_PROFILE=CHICAGO \ OCI_REGION=us-chicago-1 \ OCI_COMPARTMENT_OCID=ocid1.compartment.oc1..aaaaaaaa5...5a \ llama stack run oci ``` 4. Hit the `models` endpoint to list models after server is running: ```bash curl http://localhost:8321/v1/models | jq ... { "identifier": "meta.llama-4-scout-17b-16e-instruct", "provider_resource_id": "ocid1.generativeaimodel.oc1.us-chicago-1.am...q", "provider_id": "oci", "type": "model", "metadata": { "display_name": "meta.llama-4-scout-17b-16e-instruct", "capabilities": [ "CHAT" ], "oci_model_id": "ocid1.generativeaimodel.oc1.us-chicago-1.a...q" }, "model_type": "llm" }, ... ``` 5. Use the "display_name" field to use the model in a `/chat/completions` request: ```bash # Streaming result curl -X POST http://localhost:8321/v1/chat/completions -H "Content-Type: application/json" -d '{ "model": "meta.llama-4-scout-17b-16e-instruct", "stream": true, "temperature": 0.9, "messages": [ { "role": "system", "content": "You are a funny comedian. You can be crass." }, { "role": "user", "content": "Tell me a funny joke about programming." } ] }' # Non-streaming result curl -X POST http://localhost:8321/v1/chat/completions -H "Content-Type: application/json" -d '{ "model": "meta.llama-4-scout-17b-16e-instruct", "stream": false, "temperature": 0.9, "messages": [ { "role": "system", "content": "You are a funny comedian. You can be crass." }, { "role": "user", "content": "Tell me a funny joke about programming." } ] }' ``` 6. Try out other models from the `/models` endpoint. --- .../distributions/remote_hosted_distro/oci.md | 143 ++++++++++++++++++ docs/docs/providers/inference/remote_oci.mdx | 41 +++++ pyproject.toml | 1 + src/llama_stack/distributions/oci/__init__.py | 7 + src/llama_stack/distributions/oci/build.yaml | 35 +++++ .../distributions/oci/doc_template.md | 140 +++++++++++++++++ src/llama_stack/distributions/oci/oci.py | 108 +++++++++++++ src/llama_stack/distributions/oci/run.yaml | 136 +++++++++++++++++ .../providers/registry/inference.py | 14 ++ .../remote/inference/oci/__init__.py | 17 +++ .../providers/remote/inference/oci/auth.py | 79 ++++++++++ .../providers/remote/inference/oci/config.py | 75 +++++++++ .../providers/remote/inference/oci/oci.py | 140 +++++++++++++++++ .../inference/test_openai_completion.py | 1 + .../inference/test_openai_embeddings.py | 1 + 15 files changed, 938 insertions(+) create mode 100644 docs/docs/distributions/remote_hosted_distro/oci.md create mode 100644 docs/docs/providers/inference/remote_oci.mdx create mode 100644 src/llama_stack/distributions/oci/__init__.py create mode 100644 src/llama_stack/distributions/oci/build.yaml create mode 100644 src/llama_stack/distributions/oci/doc_template.md create mode 100644 src/llama_stack/distributions/oci/oci.py create mode 100644 src/llama_stack/distributions/oci/run.yaml create mode 100644 src/llama_stack/providers/remote/inference/oci/__init__.py create mode 100644 src/llama_stack/providers/remote/inference/oci/auth.py create mode 100644 src/llama_stack/providers/remote/inference/oci/config.py create mode 100644 src/llama_stack/providers/remote/inference/oci/oci.py diff --git a/docs/docs/distributions/remote_hosted_distro/oci.md b/docs/docs/distributions/remote_hosted_distro/oci.md new file mode 100644 index 000000000..b13cf5f73 --- /dev/null +++ b/docs/docs/distributions/remote_hosted_distro/oci.md @@ -0,0 +1,143 @@ +--- +orphan: true +--- + +# OCI Distribution + +The `llamastack/distribution-oci` distribution consists of the following provider configurations. + +| API | Provider(s) | +|-----|-------------| +| agents | `inline::meta-reference` | +| datasetio | `remote::huggingface`, `inline::localfs` | +| eval | `inline::meta-reference` | +| files | `inline::localfs` | +| inference | `remote::oci` | +| safety | `inline::llama-guard` | +| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol` | +| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` | + + +### Environment Variables + +The following environment variables can be configured: + +- `OCI_AUTH_TYPE`: OCI authentication type (instance_principal or config_file) (default: `instance_principal`) +- `OCI_REGION`: OCI region (e.g., us-ashburn-1, us-chicago-1, us-phoenix-1, eu-frankfurt-1) (default: ``) +- `OCI_COMPARTMENT_OCID`: OCI compartment ID for the Generative AI service (default: ``) +- `OCI_CONFIG_FILE_PATH`: OCI config file path (required if OCI_AUTH_TYPE is config_file) (default: `~/.oci/config`) +- `OCI_CLI_PROFILE`: OCI CLI profile name to use from config file (default: `DEFAULT`) + + +## Prerequisites +### Oracle Cloud Infrastructure Setup + +Before using the OCI Generative AI distribution, ensure you have: + +1. **Oracle Cloud Infrastructure Account**: Sign up at [Oracle Cloud Infrastructure](https://cloud.oracle.com/) +2. **Generative AI Service Access**: Enable the Generative AI service in your OCI tenancy +3. **Compartment**: Create or identify a compartment where you'll deploy Generative AI models +4. **Authentication**: Configure authentication using either: + - **Instance Principal** (recommended for cloud-hosted deployments) + - **API Key** (for on-premises or development environments) + +### Authentication Methods + +#### Instance Principal Authentication (Recommended) +Instance Principal authentication allows OCI resources to authenticate using the identity of the compute instance they're running on. This is the most secure method for production deployments. + +Requirements: +- Instance must be running in an Oracle Cloud Infrastructure compartment +- Instance must have appropriate IAM policies to access Generative AI services + +#### API Key Authentication +For development or on-premises deployments, follow [this doc](https://docs.oracle.com/en-us/iaas/Content/API/Concepts/apisigningkey.htm) to learn how to create your API signing key for your config file. + +### Required IAM Policies + +Ensure your OCI user or instance has the following policy statements: + +``` +Allow group to use generative-ai-inference-endpoints in compartment +Allow group to manage generative-ai-inference-endpoints in compartment +``` + +## Supported Services + +### Inference: OCI Generative AI +Oracle Cloud Infrastructure Generative AI provides access to high-performance AI models through OCI's Platform-as-a-Service offering. The service supports: + +- **Chat Completions**: Conversational AI with context awareness +- **Text Generation**: Complete prompts and generate text content + +#### Available Models +Common OCI Generative AI models include access to Meta, Cohere, OpenAI, Grok, and more models. + +### Safety: Llama Guard +For content safety and moderation, this distribution uses Meta's LlamaGuard model through the OCI Generative AI service to provide: +- Content filtering and moderation +- Policy compliance checking +- Harmful content detection + +### Vector Storage: Multiple Options +The distribution supports several vector storage providers: +- **FAISS**: Local in-memory vector search +- **ChromaDB**: Distributed vector database +- **PGVector**: PostgreSQL with vector extensions + +### Additional Services +- **Dataset I/O**: Local filesystem and Hugging Face integration +- **Tool Runtime**: Web search (Brave, Tavily) and RAG capabilities +- **Evaluation**: Meta reference evaluation framework + +## Running Llama Stack with OCI + +You can run the OCI distribution via Docker or local virtual environment. + +### Via venv + +If you've set up your local development environment, you can also build the image using your local virtual environment. + +```bash +OCI_AUTH=$OCI_AUTH_TYPE OCI_REGION=$OCI_REGION OCI_COMPARTMENT_OCID=$OCI_COMPARTMENT_OCID llama stack run --port 8321 oci +``` + +### Configuration Examples + +#### Using Instance Principal (Recommended for Production) +```bash +export OCI_AUTH_TYPE=instance_principal +export OCI_REGION=us-chicago-1 +export OCI_COMPARTMENT_OCID=ocid1.compartment.oc1.. +``` + +#### Using API Key Authentication (Development) +```bash +export OCI_AUTH_TYPE=config_file +export OCI_CONFIG_FILE_PATH=~/.oci/config +export OCI_CLI_PROFILE=DEFAULT +export OCI_REGION=us-chicago-1 +export OCI_COMPARTMENT_OCID=ocid1.compartment.oc1..your-compartment-id +``` + +## Regional Endpoints + +OCI Generative AI is available in multiple regions. The service automatically routes to the appropriate regional endpoint based on your configuration. For a full list of regional model availability, visit: + +https://docs.oracle.com/en-us/iaas/Content/generative-ai/overview.htm#regions + +## Troubleshooting + +### Common Issues + +1. **Authentication Errors**: Verify your OCI credentials and IAM policies +2. **Model Not Found**: Ensure the model OCID is correct and the model is available in your region +3. **Permission Denied**: Check compartment permissions and Generative AI service access +4. **Region Unavailable**: Verify the specified region supports Generative AI services + +### Getting Help + +For additional support: +- [OCI Generative AI Documentation](https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm) +- [Llama Stack Issues](https://github.com/meta-llama/llama-stack/issues) diff --git a/docs/docs/providers/inference/remote_oci.mdx b/docs/docs/providers/inference/remote_oci.mdx new file mode 100644 index 000000000..33a201a55 --- /dev/null +++ b/docs/docs/providers/inference/remote_oci.mdx @@ -0,0 +1,41 @@ +--- +description: | + Oracle Cloud Infrastructure (OCI) Generative AI inference provider for accessing OCI's Generative AI Platform-as-a-Service models. + Provider documentation + https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm +sidebar_label: Remote - Oci +title: remote::oci +--- + +# remote::oci + +## Description + + +Oracle Cloud Infrastructure (OCI) Generative AI inference provider for accessing OCI's Generative AI Platform-as-a-Service models. +Provider documentation +https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm + + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | +| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | +| `oci_auth_type` | `` | No | instance_principal | OCI authentication type (must be one of: instance_principal, config_file) | +| `oci_region` | `` | No | us-ashburn-1 | OCI region (e.g., us-ashburn-1) | +| `oci_compartment_id` | `` | No | | OCI compartment ID for the Generative AI service | +| `oci_config_file_path` | `` | No | ~/.oci/config | OCI config file path (required if oci_auth_type is config_file) | +| `oci_config_profile` | `` | No | DEFAULT | OCI config profile (required if oci_auth_type is config_file) | + +## Sample Configuration + +```yaml +oci_auth_type: ${env.OCI_AUTH_TYPE:=instance_principal} +oci_config_file_path: ${env.OCI_CONFIG_FILE_PATH:=~/.oci/config} +oci_config_profile: ${env.OCI_CLI_PROFILE:=DEFAULT} +oci_region: ${env.OCI_REGION:=us-ashburn-1} +oci_compartment_id: ${env.OCI_COMPARTMENT_OCID:=} +``` diff --git a/pyproject.toml b/pyproject.toml index 4ec83249c..653c6d613 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -298,6 +298,7 @@ exclude = [ "^src/llama_stack/providers/remote/agents/sample/", "^src/llama_stack/providers/remote/datasetio/huggingface/", "^src/llama_stack/providers/remote/datasetio/nvidia/", + "^src/llama_stack/providers/remote/inference/oci/", "^src/llama_stack/providers/remote/inference/bedrock/", "^src/llama_stack/providers/remote/inference/nvidia/", "^src/llama_stack/providers/remote/inference/passthrough/", diff --git a/src/llama_stack/distributions/oci/__init__.py b/src/llama_stack/distributions/oci/__init__.py new file mode 100644 index 000000000..68c0efe44 --- /dev/null +++ b/src/llama_stack/distributions/oci/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .oci import get_distribution_template # noqa: F401 diff --git a/src/llama_stack/distributions/oci/build.yaml b/src/llama_stack/distributions/oci/build.yaml new file mode 100644 index 000000000..7e082e1f6 --- /dev/null +++ b/src/llama_stack/distributions/oci/build.yaml @@ -0,0 +1,35 @@ +version: 2 +distribution_spec: + description: Use Oracle Cloud Infrastructure (OCI) Generative AI for running LLM + inference with scalable cloud services + providers: + inference: + - provider_type: remote::oci + vector_io: + - provider_type: inline::faiss + - provider_type: remote::chromadb + - provider_type: remote::pgvector + safety: + - provider_type: inline::llama-guard + agents: + - provider_type: inline::meta-reference + eval: + - provider_type: inline::meta-reference + datasetio: + - provider_type: remote::huggingface + - provider_type: inline::localfs + scoring: + - provider_type: inline::basic + - provider_type: inline::llm-as-judge + - provider_type: inline::braintrust + tool_runtime: + - provider_type: remote::brave-search + - provider_type: remote::tavily-search + - provider_type: inline::rag-runtime + - provider_type: remote::model-context-protocol + files: + - provider_type: inline::localfs +image_type: venv +additional_pip_packages: +- aiosqlite +- sqlalchemy[asyncio] diff --git a/src/llama_stack/distributions/oci/doc_template.md b/src/llama_stack/distributions/oci/doc_template.md new file mode 100644 index 000000000..320530ccd --- /dev/null +++ b/src/llama_stack/distributions/oci/doc_template.md @@ -0,0 +1,140 @@ +--- +orphan: true +--- +# OCI Distribution + +The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations. + +{{ providers_table }} + +{% if run_config_env_vars %} +### Environment Variables + +The following environment variables can be configured: + +{% for var, (default_value, description) in run_config_env_vars.items() %} +- `{{ var }}`: {{ description }} (default: `{{ default_value }}`) +{% endfor %} +{% endif %} + +{% if default_models %} +### Models + +The following models are available by default: + +{% for model in default_models %} +- `{{ model.model_id }} {{ model.doc_string }}` +{% endfor %} +{% endif %} + +## Prerequisites +### Oracle Cloud Infrastructure Setup + +Before using the OCI Generative AI distribution, ensure you have: + +1. **Oracle Cloud Infrastructure Account**: Sign up at [Oracle Cloud Infrastructure](https://cloud.oracle.com/) +2. **Generative AI Service Access**: Enable the Generative AI service in your OCI tenancy +3. **Compartment**: Create or identify a compartment where you'll deploy Generative AI models +4. **Authentication**: Configure authentication using either: + - **Instance Principal** (recommended for cloud-hosted deployments) + - **API Key** (for on-premises or development environments) + +### Authentication Methods + +#### Instance Principal Authentication (Recommended) +Instance Principal authentication allows OCI resources to authenticate using the identity of the compute instance they're running on. This is the most secure method for production deployments. + +Requirements: +- Instance must be running in an Oracle Cloud Infrastructure compartment +- Instance must have appropriate IAM policies to access Generative AI services + +#### API Key Authentication +For development or on-premises deployments, follow [this doc](https://docs.oracle.com/en-us/iaas/Content/API/Concepts/apisigningkey.htm) to learn how to create your API signing key for your config file. + +### Required IAM Policies + +Ensure your OCI user or instance has the following policy statements: + +``` +Allow group to use generative-ai-inference-endpoints in compartment +Allow group to manage generative-ai-inference-endpoints in compartment +``` + +## Supported Services + +### Inference: OCI Generative AI +Oracle Cloud Infrastructure Generative AI provides access to high-performance AI models through OCI's Platform-as-a-Service offering. The service supports: + +- **Chat Completions**: Conversational AI with context awareness +- **Text Generation**: Complete prompts and generate text content + +#### Available Models +Common OCI Generative AI models include access to Meta, Cohere, OpenAI, Grok, and more models. + +### Safety: Llama Guard +For content safety and moderation, this distribution uses Meta's LlamaGuard model through the OCI Generative AI service to provide: +- Content filtering and moderation +- Policy compliance checking +- Harmful content detection + +### Vector Storage: Multiple Options +The distribution supports several vector storage providers: +- **FAISS**: Local in-memory vector search +- **ChromaDB**: Distributed vector database +- **PGVector**: PostgreSQL with vector extensions + +### Additional Services +- **Dataset I/O**: Local filesystem and Hugging Face integration +- **Tool Runtime**: Web search (Brave, Tavily) and RAG capabilities +- **Evaluation**: Meta reference evaluation framework + +## Running Llama Stack with OCI + +You can run the OCI distribution via Docker or local virtual environment. + +### Via venv + +If you've set up your local development environment, you can also build the image using your local virtual environment. + +```bash +OCI_AUTH=$OCI_AUTH_TYPE OCI_REGION=$OCI_REGION OCI_COMPARTMENT_OCID=$OCI_COMPARTMENT_OCID llama stack run --port 8321 oci +``` + +### Configuration Examples + +#### Using Instance Principal (Recommended for Production) +```bash +export OCI_AUTH_TYPE=instance_principal +export OCI_REGION=us-chicago-1 +export OCI_COMPARTMENT_OCID=ocid1.compartment.oc1.. +``` + +#### Using API Key Authentication (Development) +```bash +export OCI_AUTH_TYPE=config_file +export OCI_CONFIG_FILE_PATH=~/.oci/config +export OCI_CLI_PROFILE=DEFAULT +export OCI_REGION=us-chicago-1 +export OCI_COMPARTMENT_OCID=ocid1.compartment.oc1..your-compartment-id +``` + +## Regional Endpoints + +OCI Generative AI is available in multiple regions. The service automatically routes to the appropriate regional endpoint based on your configuration. For a full list of regional model availability, visit: + +https://docs.oracle.com/en-us/iaas/Content/generative-ai/overview.htm#regions + +## Troubleshooting + +### Common Issues + +1. **Authentication Errors**: Verify your OCI credentials and IAM policies +2. **Model Not Found**: Ensure the model OCID is correct and the model is available in your region +3. **Permission Denied**: Check compartment permissions and Generative AI service access +4. **Region Unavailable**: Verify the specified region supports Generative AI services + +### Getting Help + +For additional support: +- [OCI Generative AI Documentation](https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm) +- [Llama Stack Issues](https://github.com/meta-llama/llama-stack/issues) \ No newline at end of file diff --git a/src/llama_stack/distributions/oci/oci.py b/src/llama_stack/distributions/oci/oci.py new file mode 100644 index 000000000..1f21840f1 --- /dev/null +++ b/src/llama_stack/distributions/oci/oci.py @@ -0,0 +1,108 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from pathlib import Path + +from llama_stack.core.datatypes import BuildProvider, Provider, ToolGroupInput +from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings +from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig +from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig +from llama_stack.providers.remote.inference.oci.config import OCIConfig + + +def get_distribution_template(name: str = "oci") -> DistributionTemplate: + providers = { + "inference": [BuildProvider(provider_type="remote::oci")], + "vector_io": [ + BuildProvider(provider_type="inline::faiss"), + BuildProvider(provider_type="remote::chromadb"), + BuildProvider(provider_type="remote::pgvector"), + ], + "safety": [BuildProvider(provider_type="inline::llama-guard")], + "agents": [BuildProvider(provider_type="inline::meta-reference")], + "eval": [BuildProvider(provider_type="inline::meta-reference")], + "datasetio": [ + BuildProvider(provider_type="remote::huggingface"), + BuildProvider(provider_type="inline::localfs"), + ], + "scoring": [ + BuildProvider(provider_type="inline::basic"), + BuildProvider(provider_type="inline::llm-as-judge"), + BuildProvider(provider_type="inline::braintrust"), + ], + "tool_runtime": [ + BuildProvider(provider_type="remote::brave-search"), + BuildProvider(provider_type="remote::tavily-search"), + BuildProvider(provider_type="inline::rag-runtime"), + BuildProvider(provider_type="remote::model-context-protocol"), + ], + "files": [BuildProvider(provider_type="inline::localfs")], + } + + inference_provider = Provider( + provider_id="oci", + provider_type="remote::oci", + config=OCIConfig.sample_run_config(), + ) + + vector_io_provider = Provider( + provider_id="faiss", + provider_type="inline::faiss", + config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), + ) + + files_provider = Provider( + provider_id="meta-reference-files", + provider_type="inline::localfs", + config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"), + ) + default_tool_groups = [ + ToolGroupInput( + toolgroup_id="builtin::websearch", + provider_id="tavily-search", + ), + ] + + return DistributionTemplate( + name=name, + distro_type="remote_hosted", + description="Use Oracle Cloud Infrastructure (OCI) Generative AI for running LLM inference with scalable cloud services", + container_image=None, + template_path=Path(__file__).parent / "doc_template.md", + providers=providers, + run_configs={ + "run.yaml": RunConfigSettings( + provider_overrides={ + "inference": [inference_provider], + "vector_io": [vector_io_provider], + "files": [files_provider], + }, + default_tool_groups=default_tool_groups, + ), + }, + run_config_env_vars={ + "OCI_AUTH_TYPE": ( + "instance_principal", + "OCI authentication type (instance_principal or config_file)", + ), + "OCI_REGION": ( + "", + "OCI region (e.g., us-ashburn-1, us-chicago-1, us-phoenix-1, eu-frankfurt-1)", + ), + "OCI_COMPARTMENT_OCID": ( + "", + "OCI compartment ID for the Generative AI service", + ), + "OCI_CONFIG_FILE_PATH": ( + "~/.oci/config", + "OCI config file path (required if OCI_AUTH_TYPE is config_file)", + ), + "OCI_CLI_PROFILE": ( + "DEFAULT", + "OCI CLI profile name to use from config file", + ), + }, + ) diff --git a/src/llama_stack/distributions/oci/run.yaml b/src/llama_stack/distributions/oci/run.yaml new file mode 100644 index 000000000..e385ec606 --- /dev/null +++ b/src/llama_stack/distributions/oci/run.yaml @@ -0,0 +1,136 @@ +version: 2 +image_name: oci +apis: +- agents +- datasetio +- eval +- files +- inference +- safety +- scoring +- tool_runtime +- vector_io +providers: + inference: + - provider_id: oci + provider_type: remote::oci + config: + oci_auth_type: ${env.OCI_AUTH_TYPE:=instance_principal} + oci_config_file_path: ${env.OCI_CONFIG_FILE_PATH:=~/.oci/config} + oci_config_profile: ${env.OCI_CLI_PROFILE:=DEFAULT} + oci_region: ${env.OCI_REGION:=us-ashburn-1} + oci_compartment_id: ${env.OCI_COMPARTMENT_OCID:=} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/oci/files} + metadata_store: + table_name: files_metadata + backend: sql_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/oci}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/oci}/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: [] + shields: [] + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search +server: + port: 8321 +telemetry: + enabled: true diff --git a/src/llama_stack/providers/registry/inference.py b/src/llama_stack/providers/registry/inference.py index 1b70182fc..3cbfd408b 100644 --- a/src/llama_stack/providers/registry/inference.py +++ b/src/llama_stack/providers/registry/inference.py @@ -297,6 +297,20 @@ Available Models: Azure OpenAI inference provider for accessing GPT models and other Azure services. Provider documentation https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview +""", + ), + RemoteProviderSpec( + api=Api.inference, + provider_type="remote::oci", + adapter_type="oci", + pip_packages=["oci"], + module="llama_stack.providers.remote.inference.oci", + config_class="llama_stack.providers.remote.inference.oci.config.OCIConfig", + provider_data_validator="llama_stack.providers.remote.inference.oci.config.OCIProviderDataValidator", + description=""" +Oracle Cloud Infrastructure (OCI) Generative AI inference provider for accessing OCI's Generative AI Platform-as-a-Service models. +Provider documentation +https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm """, ), ] diff --git a/src/llama_stack/providers/remote/inference/oci/__init__.py b/src/llama_stack/providers/remote/inference/oci/__init__.py new file mode 100644 index 000000000..280a8c1d2 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/oci/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.apis.inference import InferenceProvider + +from .config import OCIConfig + + +async def get_adapter_impl(config: OCIConfig, _deps) -> InferenceProvider: + from .oci import OCIInferenceAdapter + + adapter = OCIInferenceAdapter(config=config) + await adapter.initialize() + return adapter diff --git a/src/llama_stack/providers/remote/inference/oci/auth.py b/src/llama_stack/providers/remote/inference/oci/auth.py new file mode 100644 index 000000000..f64436eb5 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/oci/auth.py @@ -0,0 +1,79 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from collections.abc import Generator, Mapping +from typing import Any, override + +import httpx +import oci +import requests +from oci.config import DEFAULT_LOCATION, DEFAULT_PROFILE + +OciAuthSigner = type[oci.signer.AbstractBaseSigner] + + +class HttpxOciAuth(httpx.Auth): + """ + Custom HTTPX authentication class that implements OCI request signing. + + This class handles the authentication flow for HTTPX requests by signing them + using the OCI Signer, which adds the necessary authentication headers for + OCI API calls. + + Attributes: + signer (oci.signer.Signer): The OCI signer instance used for request signing + """ + + def __init__(self, signer: OciAuthSigner): + self.signer = signer + + @override + def auth_flow(self, request: httpx.Request) -> Generator[httpx.Request, httpx.Response, None]: + # Read the request content to handle streaming requests properly + try: + content = request.content + except httpx.RequestNotRead: + # For streaming requests, we need to read the content first + content = request.read() + + req = requests.Request( + method=request.method, + url=str(request.url), + headers=dict(request.headers), + data=content, + ) + prepared_request = req.prepare() + + # Sign the request using the OCI Signer + self.signer.do_request_sign(prepared_request) # type: ignore + + # Update the original HTTPX request with the signed headers + request.headers.update(prepared_request.headers) + + yield request + + +class OciInstancePrincipalAuth(HttpxOciAuth): + def __init__(self, **kwargs: Mapping[str, Any]): + self.signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner(**kwargs) + + +class OciUserPrincipalAuth(HttpxOciAuth): + def __init__(self, config_file: str = DEFAULT_LOCATION, profile_name: str = DEFAULT_PROFILE): + config = oci.config.from_file(config_file, profile_name) + oci.config.validate_config(config) # type: ignore + key_content = "" + with open(config["key_file"]) as f: + key_content = f.read() + + self.signer = oci.signer.Signer( + tenancy=config["tenancy"], + user=config["user"], + fingerprint=config["fingerprint"], + private_key_file_location=config.get("key_file"), + pass_phrase="none", # type: ignore + private_key_content=key_content, + ) diff --git a/src/llama_stack/providers/remote/inference/oci/config.py b/src/llama_stack/providers/remote/inference/oci/config.py new file mode 100644 index 000000000..9747b08ea --- /dev/null +++ b/src/llama_stack/providers/remote/inference/oci/config.py @@ -0,0 +1,75 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import os +from typing import Any + +from pydantic import BaseModel, Field + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack.schema_utils import json_schema_type + + +class OCIProviderDataValidator(BaseModel): + oci_auth_type: str = Field( + description="OCI authentication type (must be one of: instance_principal, config_file)", + ) + oci_region: str = Field( + description="OCI region (e.g., us-ashburn-1)", + ) + oci_compartment_id: str = Field( + description="OCI compartment ID for the Generative AI service", + ) + oci_config_file_path: str | None = Field( + default="~/.oci/config", + description="OCI config file path (required if oci_auth_type is config_file)", + ) + oci_config_profile: str | None = Field( + default="DEFAULT", + description="OCI config profile (required if oci_auth_type is config_file)", + ) + + +@json_schema_type +class OCIConfig(RemoteInferenceProviderConfig): + oci_auth_type: str = Field( + description="OCI authentication type (must be one of: instance_principal, config_file)", + default_factory=lambda: os.getenv("OCI_AUTH_TYPE", "instance_principal"), + ) + oci_region: str = Field( + default_factory=lambda: os.getenv("OCI_REGION", "us-ashburn-1"), + description="OCI region (e.g., us-ashburn-1)", + ) + oci_compartment_id: str = Field( + default_factory=lambda: os.getenv("OCI_COMPARTMENT_OCID", ""), + description="OCI compartment ID for the Generative AI service", + ) + oci_config_file_path: str = Field( + default_factory=lambda: os.getenv("OCI_CONFIG_FILE_PATH", "~/.oci/config"), + description="OCI config file path (required if oci_auth_type is config_file)", + ) + oci_config_profile: str = Field( + default_factory=lambda: os.getenv("OCI_CLI_PROFILE", "DEFAULT"), + description="OCI config profile (required if oci_auth_type is config_file)", + ) + + @classmethod + def sample_run_config( + cls, + oci_auth_type: str = "${env.OCI_AUTH_TYPE:=instance_principal}", + oci_config_file_path: str = "${env.OCI_CONFIG_FILE_PATH:=~/.oci/config}", + oci_config_profile: str = "${env.OCI_CLI_PROFILE:=DEFAULT}", + oci_region: str = "${env.OCI_REGION:=us-ashburn-1}", + oci_compartment_id: str = "${env.OCI_COMPARTMENT_OCID:=}", + **kwargs, + ) -> dict[str, Any]: + return { + "oci_auth_type": oci_auth_type, + "oci_config_file_path": oci_config_file_path, + "oci_config_profile": oci_config_profile, + "oci_region": oci_region, + "oci_compartment_id": oci_compartment_id, + } diff --git a/src/llama_stack/providers/remote/inference/oci/oci.py b/src/llama_stack/providers/remote/inference/oci/oci.py new file mode 100644 index 000000000..253dcf2b6 --- /dev/null +++ b/src/llama_stack/providers/remote/inference/oci/oci.py @@ -0,0 +1,140 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from collections.abc import Iterable +from typing import Any + +import httpx +import oci +from oci.generative_ai.generative_ai_client import GenerativeAiClient +from oci.generative_ai.models import ModelCollection +from openai._base_client import DefaultAsyncHttpxClient + +from llama_stack.apis.inference.inference import ( + OpenAIEmbeddingsRequestWithExtraBody, + OpenAIEmbeddingsResponse, +) +from llama_stack.apis.models import ModelType +from llama_stack.log import get_logger +from llama_stack.providers.remote.inference.oci.auth import OciInstancePrincipalAuth, OciUserPrincipalAuth +from llama_stack.providers.remote.inference.oci.config import OCIConfig +from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin + +logger = get_logger(name=__name__, category="inference::oci") + +OCI_AUTH_TYPE_INSTANCE_PRINCIPAL = "instance_principal" +OCI_AUTH_TYPE_CONFIG_FILE = "config_file" +VALID_OCI_AUTH_TYPES = [OCI_AUTH_TYPE_INSTANCE_PRINCIPAL, OCI_AUTH_TYPE_CONFIG_FILE] +DEFAULT_OCI_REGION = "us-ashburn-1" + +MODEL_CAPABILITIES = ["TEXT_GENERATION", "TEXT_SUMMARIZATION", "TEXT_EMBEDDINGS", "CHAT"] + + +class OCIInferenceAdapter(OpenAIMixin): + config: OCIConfig + + async def initialize(self) -> None: + """Initialize and validate OCI configuration.""" + if self.config.oci_auth_type not in VALID_OCI_AUTH_TYPES: + raise ValueError( + f"Invalid OCI authentication type: {self.config.oci_auth_type}." + f"Valid types are one of: {VALID_OCI_AUTH_TYPES}" + ) + + if not self.config.oci_compartment_id: + raise ValueError("OCI_COMPARTMENT_OCID is a required parameter. Either set in env variable or config.") + + def get_base_url(self) -> str: + region = self.config.oci_region or DEFAULT_OCI_REGION + return f"https://inference.generativeai.{region}.oci.oraclecloud.com/20231130/actions/v1" + + def get_api_key(self) -> str | None: + # OCI doesn't use API keys, it uses request signing + return "" + + def get_extra_client_params(self) -> dict[str, Any]: + """ + Get extra parameters for the AsyncOpenAI client, including OCI-specific auth and headers. + """ + auth = self._get_auth() + compartment_id = self.config.oci_compartment_id or "" + + return { + "http_client": DefaultAsyncHttpxClient( + auth=auth, + headers={ + "CompartmentId": compartment_id, + }, + ), + } + + def _get_oci_signer(self) -> oci.signer.AbstractBaseSigner | None: + if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL: + return oci.auth.signers.InstancePrincipalsSecurityTokenSigner() + return None + + def _get_oci_config(self) -> dict: + if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL: + config = {"region": self.config.oci_region} + elif self.config.oci_auth_type == OCI_AUTH_TYPE_CONFIG_FILE: + config = oci.config.from_file(self.config.oci_config_file_path, self.config.oci_config_profile) + if not config.get("region"): + raise ValueError( + "Region not specified in config. Please specify in config or with OCI_REGION env variable." + ) + + return config + + def _get_auth(self) -> httpx.Auth: + if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL: + return OciInstancePrincipalAuth() + elif self.config.oci_auth_type == OCI_AUTH_TYPE_CONFIG_FILE: + return OciUserPrincipalAuth( + config_file=self.config.oci_config_file_path, profile_name=self.config.oci_config_profile + ) + else: + raise ValueError(f"Invalid OCI authentication type: {self.config.oci_auth_type}") + + async def list_provider_model_ids(self) -> Iterable[str]: + """ + List available models from OCI Generative AI service. + """ + oci_config = self._get_oci_config() + oci_signer = self._get_oci_signer() + compartment_id = self.config.oci_compartment_id or "" + + if oci_signer is None: + client = GenerativeAiClient(config=oci_config) + else: + client = GenerativeAiClient(config=oci_config, signer=oci_signer) + + models: ModelCollection = client.list_models( + compartment_id=compartment_id, capability=MODEL_CAPABILITIES, lifecycle_state="ACTIVE" + ).data + + seen_models = set() + model_ids = [] + for model in models.items: + if model.time_deprecated or model.time_on_demand_retired: + continue + + if "CHAT" not in model.capabilities or "FINE_TUNE" in model.capabilities: + continue + + # Use display_name + model_type as the key to avoid conflicts + model_key = (model.display_name, ModelType.llm) + if model_key in seen_models: + continue + + seen_models.add(model_key) + model_ids.append(model.display_name) + + return model_ids + + async def openai_embeddings(self, params: OpenAIEmbeddingsRequestWithExtraBody) -> OpenAIEmbeddingsResponse: + # The constructed url is a mask that hits OCI's "chat" action, which is not supported for embeddings. + raise NotImplementedError("OCI Provider does not (currently) support embeddings") diff --git a/tests/integration/inference/test_openai_completion.py b/tests/integration/inference/test_openai_completion.py index 1568ffbe2..4ce2850b4 100644 --- a/tests/integration/inference/test_openai_completion.py +++ b/tests/integration/inference/test_openai_completion.py @@ -54,6 +54,7 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id) # {"error":{"message":"Unknown request URL: GET /openai/v1/completions. Please check the URL for typos, # or see the docs at https://console.groq.com/docs/","type":"invalid_request_error","code":"unknown_url"}} "remote::groq", + "remote::oci", "remote::gemini", # https://generativelanguage.googleapis.com/v1beta/openai/completions -> 404 "remote::anthropic", # at least claude-3-{5,7}-{haiku,sonnet}-* / claude-{sonnet,opus}-4-* are not supported "remote::azure", # {'error': {'code': 'OperationNotSupported', 'message': 'The completion operation diff --git a/tests/integration/inference/test_openai_embeddings.py b/tests/integration/inference/test_openai_embeddings.py index 704775716..fe8070162 100644 --- a/tests/integration/inference/test_openai_embeddings.py +++ b/tests/integration/inference/test_openai_embeddings.py @@ -138,6 +138,7 @@ def skip_if_model_doesnt_support_openai_embeddings(client, model_id): "remote::runpod", "remote::sambanova", "remote::tgi", + "remote::oci", ): pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI embeddings.")