resolve conflict

2025-12-18 13:29:47 +00:00 · 2024-12-05 10:22:03 -08:00 · 2024-12-05 10:22:03 -08:00 · 99004b6df8
commit 99004b6df8
parent c0d9b81253 a2d9a983de
91 changed files with 3734 additions and 730 deletions
--- a/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
+++ b/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
@ -3,7 +3,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Optional
+from typing import Any, Dict, List, Optional

 from llama_stack.apis.datasetio import *  # noqa: F403

@ -64,6 +64,11 @@ class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
        )
        self.dataset_infos[dataset_def.identifier] = dataset_def

+    async def unregister_dataset(self, dataset_id: str) -> None:
+        key = f"{DATASETS_PREFIX}{dataset_id}"
+        await self.kvstore.delete(key=key)
+        del self.dataset_infos[dataset_id]
+
    async def get_rows_paginated(
        self,
        dataset_id: str,
@ -95,3 +100,22 @@ class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
            total_count=len(rows),
            next_page_token=str(end),
        )
+
+    async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None:
+        dataset_def = self.dataset_infos[dataset_id]
+        loaded_dataset = load_hf_dataset(dataset_def)
+
+        # Convert rows to HF Dataset format
+        new_dataset = hf_datasets.Dataset.from_list(rows)
+
+        # Concatenate the new rows with existing dataset
+        updated_dataset = hf_datasets.concatenate_datasets(
+            [loaded_dataset, new_dataset]
+        )
+
+        if dataset_def.metadata.get("path", None):
+            updated_dataset.push_to_hub(dataset_def.metadata["path"])
+        else:
+            raise NotImplementedError(
+                "Uploading to URL-based datasets is not supported yet"
+            )
--- a/llama_stack/providers/remote/inference/cerebras/init.py
+++ b/llama_stack/providers/remote/inference/cerebras/init.py
@ -0,0 +1,21 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .config import CerebrasImplConfig
+
+
+async def get_adapter_impl(config: CerebrasImplConfig, _deps):
+    from .cerebras import CerebrasInferenceAdapter
+
+    assert isinstance(
+        config, CerebrasImplConfig
+    ), f"Unexpected config type: {type(config)}"
+
+    impl = CerebrasInferenceAdapter(config)
+
+    await impl.initialize()
+
+    return impl
--- a/llama_stack/providers/remote/inference/cerebras/cerebras.py
+++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py
@ -0,0 +1,191 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import AsyncGenerator
+
+from cerebras.cloud.sdk import AsyncCerebras
+
+from llama_models.llama3.api.chat_format import ChatFormat
+
+from llama_models.llama3.api.datatypes import Message
+from llama_models.llama3.api.tokenizer import Tokenizer
+
+from llama_stack.apis.inference import *  # noqa: F403
+
+from llama_models.datatypes import CoreModelId
+
+from llama_stack.providers.utils.inference.model_registry import (
+    build_model_alias,
+    ModelRegistryHelper,
+)
+from llama_stack.providers.utils.inference.openai_compat import (
+    get_sampling_options,
+    process_chat_completion_response,
+    process_chat_completion_stream_response,
+    process_completion_response,
+    process_completion_stream_response,
+)
+from llama_stack.providers.utils.inference.prompt_adapter import (
+    chat_completion_request_to_prompt,
+    completion_request_to_prompt,
+)
+
+from .config import CerebrasImplConfig
+
+
+model_aliases = [
+    build_model_alias(
+        "llama3.1-8b",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_model_alias(
+        "llama3.1-70b",
+        CoreModelId.llama3_1_70b_instruct.value,
+    ),
+]
+
+
+class CerebrasInferenceAdapter(ModelRegistryHelper, Inference):
+    def __init__(self, config: CerebrasImplConfig) -> None:
+        ModelRegistryHelper.__init__(
+            self,
+            model_aliases=model_aliases,
+        )
+        self.config = config
+        self.formatter = ChatFormat(Tokenizer.get_instance())
+
+        self.client = AsyncCerebras(
+            base_url=self.config.base_url, api_key=self.config.api_key
+        )
+
+    async def initialize(self) -> None:
+        return
+
+    async def shutdown(self) -> None:
+        pass
+
+    async def completion(
+        self,
+        model_id: str,
+        content: InterleavedTextMedia,
+        sampling_params: Optional[SamplingParams] = SamplingParams(),
+        response_format: Optional[ResponseFormat] = None,
+        stream: Optional[bool] = False,
+        logprobs: Optional[LogProbConfig] = None,
+    ) -> AsyncGenerator:
+        model = await self.model_store.get_model(model_id)
+        request = CompletionRequest(
+            model=model.provider_resource_id,
+            content=content,
+            sampling_params=sampling_params,
+            response_format=response_format,
+            stream=stream,
+            logprobs=logprobs,
+        )
+        if stream:
+            return self._stream_completion(
+                request,
+            )
+        else:
+            return await self._nonstream_completion(request)
+
+    async def _nonstream_completion(
+        self, request: CompletionRequest
+    ) -> CompletionResponse:
+        params = self._get_params(request)
+
+        r = await self.client.completions.create(**params)
+
+        return process_completion_response(r, self.formatter)
+
+    async def _stream_completion(self, request: CompletionRequest) -> AsyncGenerator:
+        params = self._get_params(request)
+
+        stream = await self.client.completions.create(**params)
+
+        async for chunk in process_completion_stream_response(stream, self.formatter):
+            yield chunk
+
+    async def chat_completion(
+        self,
+        model_id: str,
+        messages: List[Message],
+        sampling_params: Optional[SamplingParams] = SamplingParams(),
+        tools: Optional[List[ToolDefinition]] = None,
+        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
+        tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json,
+        response_format: Optional[ResponseFormat] = None,
+        stream: Optional[bool] = False,
+        logprobs: Optional[LogProbConfig] = None,
+    ) -> AsyncGenerator:
+        model = await self.model_store.get_model(model_id)
+        request = ChatCompletionRequest(
+            model=model.provider_resource_id,
+            messages=messages,
+            sampling_params=sampling_params,
+            tools=tools or [],
+            tool_choice=tool_choice,
+            tool_prompt_format=tool_prompt_format,
+            response_format=response_format,
+            stream=stream,
+            logprobs=logprobs,
+        )
+
+        if stream:
+            return self._stream_chat_completion(request)
+        else:
+            return await self._nonstream_chat_completion(request)
+
+    async def _nonstream_chat_completion(
+        self, request: CompletionRequest
+    ) -> CompletionResponse:
+        params = self._get_params(request)
+
+        r = await self.client.completions.create(**params)
+
+        return process_chat_completion_response(r, self.formatter)
+
+    async def _stream_chat_completion(
+        self, request: CompletionRequest
+    ) -> AsyncGenerator:
+        params = self._get_params(request)
+
+        stream = await self.client.completions.create(**params)
+
+        async for chunk in process_chat_completion_stream_response(
+            stream, self.formatter
+        ):
+            yield chunk
+
+    def _get_params(
+        self, request: Union[ChatCompletionRequest, CompletionRequest]
+    ) -> dict:
+        if request.sampling_params and request.sampling_params.top_k:
+            raise ValueError("`top_k` not supported by Cerebras")
+
+        prompt = ""
+        if type(request) == ChatCompletionRequest:
+            prompt = chat_completion_request_to_prompt(
+                request, self.get_llama_model(request.model), self.formatter
+            )
+        elif type(request) == CompletionRequest:
+            prompt = completion_request_to_prompt(request, self.formatter)
+        else:
+            raise ValueError(f"Unknown request type {type(request)}")
+
+        return {
+            "model": request.model,
+            "prompt": prompt,
+            "stream": request.stream,
+            **get_sampling_options(request.sampling_params),
+        }
+
+    async def embeddings(
+        self,
+        model_id: str,
+        contents: List[InterleavedTextMedia],
+    ) -> EmbeddingsResponse:
+        raise NotImplementedError()
--- a/llama_stack/providers/remote/inference/cerebras/config.py
+++ b/llama_stack/providers/remote/inference/cerebras/config.py
@ -0,0 +1,32 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+from typing import Any, Dict, Optional
+
+from llama_models.schema_utils import json_schema_type
+from pydantic import BaseModel, Field
+
+DEFAULT_BASE_URL = "https://api.cerebras.ai"
+
+
+@json_schema_type
+class CerebrasImplConfig(BaseModel):
+    base_url: str = Field(
+        default=os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL),
+        description="Base URL for the Cerebras API",
+    )
+    api_key: Optional[str] = Field(
+        default=os.environ.get("CEREBRAS_API_KEY"),
+        description="Cerebras API Key",
+    )
+
+    @classmethod
+    def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
+        return {
+            "base_url": DEFAULT_BASE_URL,
+            "api_key": "${env.CEREBRAS_API_KEY}",
+        }
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@ -180,7 +180,6 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
    async def _nonstream_completion(self, request: CompletionRequest) -> AsyncGenerator:
        params = await self._get_params(request)
        r = await self.client.generate(**params)
-        assert isinstance(r, dict)

        choice = OpenAICompatCompletionChoice(
            finish_reason=r["done_reason"] if r["done"] else None,
--- a/llama_stack/providers/remote/telemetry/init.py
+++ b/llama_stack/providers/remote/telemetry/init.py
@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
--- a/llama_stack/providers/remote/telemetry/opentelemetry/init.py
+++ b/llama_stack/providers/remote/telemetry/opentelemetry/init.py
@ -1,15 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .config import OpenTelemetryConfig
-
-
-async def get_adapter_impl(config: OpenTelemetryConfig, _deps):
-    from .opentelemetry import OpenTelemetryAdapter
-
-    impl = OpenTelemetryAdapter(config)
-    await impl.initialize()
-    return impl
--- a/llama_stack/providers/remote/telemetry/opentelemetry/config.py
+++ b/llama_stack/providers/remote/telemetry/opentelemetry/config.py
@ -1,27 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import Any, Dict
-
-from pydantic import BaseModel, Field
-
-
-class OpenTelemetryConfig(BaseModel):
-    otel_endpoint: str = Field(
-        default="http://localhost:4318/v1/traces",
-        description="The OpenTelemetry collector endpoint URL",
-    )
-    service_name: str = Field(
-        default="llama-stack",
-        description="The service name to use for telemetry",
-    )
-
-    @classmethod
-    def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
-        return {
-            "otel_endpoint": "${env.OTEL_ENDPOINT:http://localhost:4318/v1/traces}",
-            "service_name": "${env.OTEL_SERVICE_NAME:llama-stack}",
-        }
--- a/llama_stack/providers/remote/telemetry/opentelemetry/opentelemetry.py
+++ b/llama_stack/providers/remote/telemetry/opentelemetry/opentelemetry.py
@ -1,208 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import threading
-
-from opentelemetry import metrics, trace
-from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
-from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
-from opentelemetry.sdk.metrics import MeterProvider
-from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
-from opentelemetry.sdk.resources import Resource
-from opentelemetry.sdk.trace import TracerProvider
-from opentelemetry.sdk.trace.export import BatchSpanProcessor
-from opentelemetry.semconv.resource import ResourceAttributes
-
-
-from llama_stack.apis.telemetry import *  # noqa: F403
-
-from .config import OpenTelemetryConfig
-
-_GLOBAL_STORAGE = {
-    "active_spans": {},
-    "counters": {},
-    "gauges": {},
-    "up_down_counters": {},
-}
-_global_lock = threading.Lock()
-
-
-def string_to_trace_id(s: str) -> int:
-    # Convert the string to bytes and then to an integer
-    return int.from_bytes(s.encode(), byteorder="big", signed=False)
-
-
-def string_to_span_id(s: str) -> int:
-    # Use only the first 8 bytes (64 bits) for span ID
-    return int.from_bytes(s.encode()[:8], byteorder="big", signed=False)
-
-
-def is_tracing_enabled(tracer):
-    with tracer.start_as_current_span("check_tracing") as span:
-        return span.is_recording()
-
-
-class OpenTelemetryAdapter(Telemetry):
-    def __init__(self, config: OpenTelemetryConfig):
-        self.config = config
-
-        resource = Resource.create(
-            {
-                ResourceAttributes.SERVICE_NAME: self.config.service_name,
-            }
-        )
-
-        provider = TracerProvider(resource=resource)
-        trace.set_tracer_provider(provider)
-        otlp_exporter = OTLPSpanExporter(
-            endpoint=self.config.otel_endpoint,
-        )
-        span_processor = BatchSpanProcessor(otlp_exporter)
-        trace.get_tracer_provider().add_span_processor(span_processor)
-        # Set up metrics
-        metric_reader = PeriodicExportingMetricReader(
-            OTLPMetricExporter(
-                endpoint=self.config.otel_endpoint,
-            )
-        )
-        metric_provider = MeterProvider(
-            resource=resource, metric_readers=[metric_reader]
-        )
-        metrics.set_meter_provider(metric_provider)
-        self.meter = metrics.get_meter(__name__)
-        self._lock = _global_lock
-
-    async def initialize(self) -> None:
-        pass
-
-    async def shutdown(self) -> None:
-        trace.get_tracer_provider().force_flush()
-        trace.get_tracer_provider().shutdown()
-        metrics.get_meter_provider().shutdown()
-
-    async def log_event(self, event: Event) -> None:
-        if isinstance(event, UnstructuredLogEvent):
-            self._log_unstructured(event)
-        elif isinstance(event, MetricEvent):
-            self._log_metric(event)
-        elif isinstance(event, StructuredLogEvent):
-            self._log_structured(event)
-
-    def _log_unstructured(self, event: UnstructuredLogEvent) -> None:
-        with self._lock:
-            # Use global storage instead of instance storage
-            span_id = string_to_span_id(event.span_id)
-            span = _GLOBAL_STORAGE["active_spans"].get(span_id)
-
-            if span:
-                timestamp_ns = int(event.timestamp.timestamp() * 1e9)
-                span.add_event(
-                    name=event.type,
-                    attributes={
-                        "message": event.message,
-                        "severity": event.severity.value,
-                        **event.attributes,
-                    },
-                    timestamp=timestamp_ns,
-                )
-            else:
-                print(
-                    f"Warning: No active span found for span_id {span_id}. Dropping event: {event}"
-                )
-
-    def _get_or_create_counter(self, name: str, unit: str) -> metrics.Counter:
-        if name not in _GLOBAL_STORAGE["counters"]:
-            _GLOBAL_STORAGE["counters"][name] = self.meter.create_counter(
-                name=name,
-                unit=unit,
-                description=f"Counter for {name}",
-            )
-        return _GLOBAL_STORAGE["counters"][name]
-
-    def _get_or_create_gauge(self, name: str, unit: str) -> metrics.ObservableGauge:
-        if name not in _GLOBAL_STORAGE["gauges"]:
-            _GLOBAL_STORAGE["gauges"][name] = self.meter.create_gauge(
-                name=name,
-                unit=unit,
-                description=f"Gauge for {name}",
-            )
-        return _GLOBAL_STORAGE["gauges"][name]
-
-    def _log_metric(self, event: MetricEvent) -> None:
-        if isinstance(event.value, int):
-            counter = self._get_or_create_counter(event.metric, event.unit)
-            counter.add(event.value, attributes=event.attributes)
-        elif isinstance(event.value, float):
-            up_down_counter = self._get_or_create_up_down_counter(
-                event.metric, event.unit
-            )
-            up_down_counter.add(event.value, attributes=event.attributes)
-
-    def _get_or_create_up_down_counter(
-        self, name: str, unit: str
-    ) -> metrics.UpDownCounter:
-        if name not in _GLOBAL_STORAGE["up_down_counters"]:
-            _GLOBAL_STORAGE["up_down_counters"][name] = (
-                self.meter.create_up_down_counter(
-                    name=name,
-                    unit=unit,
-                    description=f"UpDownCounter for {name}",
-                )
-            )
-        return _GLOBAL_STORAGE["up_down_counters"][name]
-
-    def _log_structured(self, event: StructuredLogEvent) -> None:
-        with self._lock:
-            span_id = string_to_span_id(event.span_id)
-            trace_id = string_to_trace_id(event.trace_id)
-            tracer = trace.get_tracer(__name__)
-
-            if isinstance(event.payload, SpanStartPayload):
-                # Check if span already exists to prevent duplicates
-                if span_id in _GLOBAL_STORAGE["active_spans"]:
-                    return
-
-                parent_span = None
-                if event.payload.parent_span_id:
-                    parent_span_id = string_to_span_id(event.payload.parent_span_id)
-                    parent_span = _GLOBAL_STORAGE["active_spans"].get(parent_span_id)
-
-                # Create a new trace context with the trace_id
-                context = trace.Context(trace_id=trace_id)
-                if parent_span:
-                    context = trace.set_span_in_context(parent_span, context)
-
-                span = tracer.start_span(
-                    name=event.payload.name,
-                    context=context,
-                    attributes=event.attributes or {},
-                    start_time=int(event.timestamp.timestamp() * 1e9),
-                )
-                _GLOBAL_STORAGE["active_spans"][span_id] = span
-
-                # Set as current span using context manager
-                with trace.use_span(span, end_on_exit=False):
-                    pass  # Let the span continue beyond this block
-
-            elif isinstance(event.payload, SpanEndPayload):
-                span = _GLOBAL_STORAGE["active_spans"].get(span_id)
-                if span:
-                    if event.attributes:
-                        span.set_attributes(event.attributes)
-
-                    status = (
-                        trace.Status(status_code=trace.StatusCode.OK)
-                        if event.payload.status == SpanStatus.OK
-                        else trace.Status(status_code=trace.StatusCode.ERROR)
-                    )
-                    span.set_status(status)
-                    span.end(end_time=int(event.timestamp.timestamp() * 1e9))
-
-                    # Remove from active spans
-                    _GLOBAL_STORAGE["active_spans"].pop(span_id, None)
-
-    async def get_trace(self, trace_id: str) -> Trace:
-        raise NotImplementedError("Trace retrieval not implemented yet")
--- a/llama_stack/providers/remote/telemetry/sample/init.py
+++ b/llama_stack/providers/remote/telemetry/sample/init.py
@ -1,17 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import Any
-
-from .config import SampleConfig
-
-
-async def get_adapter_impl(config: SampleConfig, _deps) -> Any:
-    from .sample import SampleTelemetryImpl
-
-    impl = SampleTelemetryImpl(config)
-    await impl.initialize()
-    return impl
--- a/llama_stack/providers/remote/telemetry/sample/config.py
+++ b/llama_stack/providers/remote/telemetry/sample/config.py
@ -1,12 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from pydantic import BaseModel
-
-
-class SampleConfig(BaseModel):
-    host: str = "localhost"
-    port: int = 9999
--- a/llama_stack/providers/remote/telemetry/sample/sample.py
+++ b/llama_stack/providers/remote/telemetry/sample/sample.py
@ -1,18 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .config import SampleConfig
-
-
-from llama_stack.apis.telemetry import *  # noqa: F403
-
-
-class SampleTelemetryImpl(Telemetry):
-    def __init__(self, config: SampleConfig):
-        self.config = config
-
-    async def initialize(self):
-        pass