feat: Adding OCI Embeddings (#4300)

# What does this PR do? Enabling usage of OCI embedding models. ## Test Plan Testing embedding model: `OCI_COMPARTMENT_OCID="" OCI_REGION="us-chicago-1" OCI_AUTH_TYPE=config_file pytest -sv tests/integration/inference/test_openai_embeddings.py --stack-config oci --embedding-model oci/openai.text-embedding-3-small --inference-mode live` Testing chat model: `OCI_COMPARTMENT_OCID="" OCI_REGION="us-chicago-1" OCI_AUTH_TYPE=config_file pytest -sv tests/integration/inference/ --stack-config oci --text-model oci/openai.gpt-4.1-nano-2025-04-14 --inference-mode live` Testing curl for embeddings: `curl -X POST http://localhost:8321/v1/embeddings -H "Content-Type: application/json" -d '{ "model": "oci/openai.text-embedding-3-small", "input": ["First text", "Second text"], "encoding_format": "float" }'` `{"object":"list","data":[{"object":"embedding","embedding":[-0.017190756...0.025272394],"index":1}],"model":"oci/openai.text-embedding-3-small","usage":{"prompt_tokens":4,"total_tokens":4}}` --------- Co-authored-by: Omar Abdelwahab <omaryashraf10@gmail.com>
2025-12-16 23:32:38 +00:00 · 2025-12-08 15:05:39 -06:00 · 2025-12-08 15:05:39 -06:00 · 6ad5fb5577
commit 6ad5fb5577
parent d82a2cd6f8
2 changed files with 32 additions and 11 deletions
--- a/src/llama_stack/providers/remote/inference/oci/oci.py
+++ b/src/llama_stack/providers/remote/inference/oci/oci.py
@ -18,11 +18,7 @@ from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.oci.auth import OciInstancePrincipalAuth, OciUserPrincipalAuth
 from llama_stack.providers.remote.inference.oci.config import OCIConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
-from llama_stack_api import (
-    ModelType,
-    OpenAIEmbeddingsRequestWithExtraBody,
-    OpenAIEmbeddingsResponse,
-)
+from llama_stack_api import Model, ModelType

 logger = get_logger(name=__name__, category="inference::oci")

@ -37,6 +33,8 @@ MODEL_CAPABILITIES = ["TEXT_GENERATION", "TEXT_SUMMARIZATION", "TEXT_EMBEDDINGS"
 class OCIInferenceAdapter(OpenAIMixin):
    config: OCIConfig

+    embedding_models: list[str] = []
+
    async def initialize(self) -> None:
        """Initialize and validate OCI configuration."""
        if self.config.oci_auth_type not in VALID_OCI_AUTH_TYPES:
@ -113,7 +111,9 @@ class OCIInferenceAdapter(OpenAIMixin):
            client = GenerativeAiClient(config=oci_config, signer=oci_signer)

        models: ModelCollection = client.list_models(
-            compartment_id=compartment_id, capability=MODEL_CAPABILITIES, lifecycle_state="ACTIVE"
+            compartment_id=compartment_id,
+            # capability=MODEL_CAPABILITIES,
+            lifecycle_state="ACTIVE",
        ).data

        seen_models = set()
@ -122,7 +122,7 @@ class OCIInferenceAdapter(OpenAIMixin):
            if model.time_deprecated or model.time_on_demand_retired:
                continue

-            if "CHAT" not in model.capabilities or "FINE_TUNE" in model.capabilities:
+            if "UNKNOWN_ENUM_VALUE" in model.capabilities or "FINE_TUNE" in model.capabilities:
                continue

            # Use display_name + model_type as the key to avoid conflicts
@ -133,8 +133,30 @@ class OCIInferenceAdapter(OpenAIMixin):
            seen_models.add(model_key)
            model_ids.append(model.display_name)

+            if "TEXT_EMBEDDINGS" in model.capabilities:
+                self.embedding_models.append(model.display_name)
+
        return model_ids

-    async def openai_embeddings(self, params: OpenAIEmbeddingsRequestWithExtraBody) -> OpenAIEmbeddingsResponse:
-        # The constructed url is a mask that hits OCI's "chat" action, which is not supported for embeddings.
-        raise NotImplementedError("OCI Provider does not (currently) support embeddings")
+    def construct_model_from_identifier(self, identifier: str) -> Model:
+        """
+        Construct a Model instance corresponding to the given identifier
+
+        Child classes can override this to customize model typing/metadata.
+
+        :param identifier: The provider's model identifier
+        :return: A Model instance
+        """
+        if identifier in self.embedding_models:
+            return Model(
+                provider_id=self.__provider_id__,  # type: ignore[attr-defined]
+                provider_resource_id=identifier,
+                identifier=identifier,
+                model_type=ModelType.embedding,
+            )
+        return Model(
+            provider_id=self.__provider_id__,  # type: ignore[attr-defined]
+            provider_resource_id=identifier,
+            identifier=identifier,
+            model_type=ModelType.llm,
+        )
--- a/tests/integration/inference/test_openai_embeddings.py
+++ b/tests/integration/inference/test_openai_embeddings.py
@ -138,7 +138,6 @@ def skip_if_model_doesnt_support_openai_embeddings(client, model_id):
        "remote::runpod",
        "remote::sambanova",
        "remote::tgi",
-        "remote::oci",
    ):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI embeddings.")