mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-25 17:22:02 +00:00
Merge branch 'main' into vectordb_name
This commit is contained in:
commit
74b0ab69ed
161 changed files with 1844 additions and 11065 deletions
|
|
@ -26,8 +26,8 @@ class CerebrasImplConfig(BaseModel):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||
def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"base_url": DEFAULT_BASE_URL,
|
||||
"api_key": "${env.CEREBRAS_API_KEY}",
|
||||
"api_key": api_key,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,13 +13,9 @@ DEFAULT_OLLAMA_URL = "http://localhost:11434"
|
|||
|
||||
class OllamaImplConfig(BaseModel):
|
||||
url: str = DEFAULT_OLLAMA_URL
|
||||
raise_on_connect_error: bool = True
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(
|
||||
cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs
|
||||
) -> dict[str, Any]:
|
||||
def sample_run_config(cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"url": url,
|
||||
"raise_on_connect_error": raise_on_connect_error,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ MODEL_ENTRIES = [
|
|||
CoreModelId.llama_guard_3_1b.value,
|
||||
),
|
||||
ProviderModelEntry(
|
||||
provider_model_id="all-minilm:latest",
|
||||
provider_model_id="all-minilm:l6-v2",
|
||||
aliases=["all-minilm"],
|
||||
model_type=ModelType.embedding,
|
||||
metadata={
|
||||
|
|
|
|||
|
|
@ -94,7 +94,6 @@ class OllamaInferenceAdapter(
|
|||
def __init__(self, config: OllamaImplConfig) -> None:
|
||||
self.register_helper = ModelRegistryHelper(MODEL_ENTRIES)
|
||||
self.url = config.url
|
||||
self.raise_on_connect_error = config.raise_on_connect_error
|
||||
|
||||
@property
|
||||
def client(self) -> AsyncClient:
|
||||
|
|
@ -108,10 +107,7 @@ class OllamaInferenceAdapter(
|
|||
logger.debug(f"checking connectivity to Ollama at `{self.url}`...")
|
||||
health_response = await self.health()
|
||||
if health_response["status"] == HealthStatus.ERROR:
|
||||
if self.raise_on_connect_error:
|
||||
raise RuntimeError("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
|
||||
else:
|
||||
logger.warning("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
|
||||
raise RuntimeError("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
|
||||
|
||||
async def health(self) -> HealthResponse:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -24,8 +24,10 @@ class PassthroughImplConfig(BaseModel):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||
def sample_run_config(
|
||||
cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "${env.PASSTHROUGH_URL}",
|
||||
"api_key": "${env.PASSTHROUGH_API_KEY}",
|
||||
"url": url,
|
||||
"api_key": api_key,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,5 +26,5 @@ class RunpodImplConfig(BaseModel):
|
|||
def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "${env.RUNPOD_URL:=}",
|
||||
"api_token": "${env.RUNPOD_API_TOKEN:=}",
|
||||
"api_token": "${env.RUNPOD_API_TOKEN}",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,7 +17,11 @@ class TGIImplConfig(BaseModel):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, url: str = "${env.TGI_URL}", **kwargs):
|
||||
def sample_run_config(
|
||||
cls,
|
||||
url: str = "${env.TGI_URL}",
|
||||
**kwargs,
|
||||
):
|
||||
return {
|
||||
"url": url,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -327,7 +327,6 @@ class InferenceEndpointAdapter(_HfAdapter):
|
|||
# Get the inference endpoint details
|
||||
api = HfApi(token=config.api_token.get_secret_value())
|
||||
endpoint = api.get_inference_endpoint(config.endpoint_name)
|
||||
|
||||
# Wait for the endpoint to be ready (if not already)
|
||||
endpoint.wait(timeout=60)
|
||||
|
||||
|
|
|
|||
|
|
@ -26,5 +26,5 @@ class TogetherImplConfig(BaseModel):
|
|||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "https://api.together.xyz/v1",
|
||||
"api_key": "${env.TOGETHER_API_KEY:=}",
|
||||
"api_key": "${env.TOGETHER_API_KEY}",
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue