forked from phoenix-oss/llama-stack-mirror
		
	llama stack distributions / templates / docker refactor (#266)
* docker compose ollama * comment * update compose file * readme for distributions * readme * move distribution folders * move distribution/templates to distributions/ * rename * kill distribution/templates * readme * readme * build/developer cookbook/new api provider * developer cookbook * readme * readme * [bugfix] fix case for agent when memory bank registered without specifying provider_id (#264) * fix case where memory bank is registered without provider_id * memory test * agents unit test * Add an option to not use elastic agents for meta-reference inference (#269) * Allow overridding checkpoint_dir via config * Small rename * Make all methods `async def` again; add completion() for meta-reference (#270) PR #201 had made several changes while trying to fix issues with getting the stream=False branches of inference and agents API working. As part of this, it made a change which was slightly gratuitous. Namely, making chat_completion() and brethren "def" instead of "async def". The rationale was that this allowed the user (within llama-stack) of this to use it as: ``` async for chunk in api.chat_completion(params) ``` However, it causes unnecessary confusion for several folks. Given that clients (e.g., llama-stack-apps) anyway use the SDK methods (which are completely isolated) this choice was not ideal. Let's revert back so the call now looks like: ``` async for chunk in await api.chat_completion(params) ``` Bonus: Added a completion() implementation for the meta-reference provider. Technically should have been another PR :) * Improve an important error message * update ollama for llama-guard3 * Add vLLM inference provider for OpenAI compatible vLLM server (#178) This PR adds vLLM inference provider for OpenAI compatible vLLM server. * Create .readthedocs.yaml Trying out readthedocs * Update event_logger.py (#275) spelling error * vllm * build templates * delete templates * tmp add back build to avoid merge conflicts * vllm * vllm --------- Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com> Co-authored-by: Ashwin Bharambe <ashwin@meta.com> Co-authored-by: Yuan Tang <terrytangyuan@gmail.com> Co-authored-by: raghotham <rsm@meta.com> Co-authored-by: nehal-a2z <nehal@coderabbit.ai>
This commit is contained in:
		
							parent
							
								
									c995219731
								
							
						
					
					
						commit
						23210e8679
					
				
					 32 changed files with 850 additions and 335 deletions
				
			
		|  | @ -1,10 +0,0 @@ | |||
| name: local-bedrock-conda-example | ||||
| distribution_spec: | ||||
|   description: Use Amazon Bedrock APIs. | ||||
|   providers: | ||||
|     inference: remote::bedrock | ||||
|     memory: meta-reference | ||||
|     safety: meta-reference | ||||
|     agents: meta-reference | ||||
|     telemetry: meta-reference | ||||
| image_type: conda | ||||
|  | @ -1,15 +0,0 @@ | |||
| name: local-cpu | ||||
| distribution_spec: | ||||
|   description: remote inference + local safety/agents/memory | ||||
|   docker_image: null | ||||
|   providers: | ||||
|     inference: | ||||
|     - remote::ollama | ||||
|     - remote::tgi | ||||
|     - remote::together | ||||
|     - remote::fireworks | ||||
|     safety: meta-reference | ||||
|     agents: meta-reference | ||||
|     memory: meta-reference | ||||
|     telemetry: meta-reference | ||||
| image_type: docker | ||||
|  | @ -1,10 +0,0 @@ | |||
| name: local-databricks | ||||
| distribution_spec: | ||||
|   description: Use Databricks for running LLM inference | ||||
|   providers: | ||||
|     inference: remote::databricks | ||||
|     memory: meta-reference | ||||
|     safety: meta-reference | ||||
|     agents: meta-reference | ||||
|     telemetry: meta-reference | ||||
| image_type: conda | ||||
|  | @ -1,10 +0,0 @@ | |||
| name: local-fireworks | ||||
| distribution_spec: | ||||
|   description: Use Fireworks.ai for running LLM inference | ||||
|   providers: | ||||
|     inference: remote::fireworks | ||||
|     memory: meta-reference | ||||
|     safety: meta-reference | ||||
|     agents: meta-reference | ||||
|     telemetry: meta-reference | ||||
| image_type: conda | ||||
|  | @ -1,10 +0,0 @@ | |||
| name: local-gpu | ||||
| distribution_spec: | ||||
|   description: Use code from `llama_stack` itself to serve all llama stack APIs | ||||
|   providers: | ||||
|     inference: meta-reference | ||||
|     memory: meta-reference | ||||
|     safety: meta-reference | ||||
|     agents: meta-reference | ||||
|     telemetry: meta-reference | ||||
| image_type: docker | ||||
|  | @ -1,10 +0,0 @@ | |||
| name: local-hf-endpoint | ||||
| distribution_spec: | ||||
|   description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints." | ||||
|   providers: | ||||
|     inference: remote::hf::endpoint | ||||
|     memory: meta-reference | ||||
|     safety: meta-reference | ||||
|     agents: meta-reference | ||||
|     telemetry: meta-reference | ||||
| image_type: conda | ||||
|  | @ -1,10 +0,0 @@ | |||
| name: local-hf-serverless | ||||
| distribution_spec: | ||||
|   description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference." | ||||
|   providers: | ||||
|     inference: remote::hf::serverless | ||||
|     memory: meta-reference | ||||
|     safety: meta-reference | ||||
|     agents: meta-reference | ||||
|     telemetry: meta-reference | ||||
| image_type: conda | ||||
|  | @ -1,10 +0,0 @@ | |||
| name: local-ollama | ||||
| distribution_spec: | ||||
|   description: Like local, but use ollama for running LLM inference | ||||
|   providers: | ||||
|     inference: remote::ollama | ||||
|     memory: meta-reference | ||||
|     safety: meta-reference | ||||
|     agents: meta-reference | ||||
|     telemetry: meta-reference | ||||
| image_type: conda | ||||
|  | @ -1,10 +0,0 @@ | |||
| name: local-tgi | ||||
| distribution_spec: | ||||
|   description: Like local, but use a TGI server for running LLM inference. | ||||
|   providers: | ||||
|     inference: remote::tgi | ||||
|     memory: meta-reference | ||||
|     safety: meta-reference | ||||
|     agents: meta-reference | ||||
|     telemetry: meta-reference | ||||
| image_type: conda | ||||
|  | @ -1,11 +0,0 @@ | |||
| name: local-tgi-chroma | ||||
| distribution_spec: | ||||
|   description: remote tgi inference + chromadb memory | ||||
|   docker_image: null | ||||
|   providers: | ||||
|     inference: remote::tgi | ||||
|     safety: meta-reference | ||||
|     agents: meta-reference | ||||
|     memory: remote::chromadb | ||||
|     telemetry: meta-reference | ||||
| image_type: docker | ||||
|  | @ -1,10 +0,0 @@ | |||
| name: local-together | ||||
| distribution_spec: | ||||
|   description: Use Together.ai for running LLM inference | ||||
|   providers: | ||||
|     inference: remote::together | ||||
|     memory: meta-reference | ||||
|     safety: remote::together | ||||
|     agents: meta-reference | ||||
|     telemetry: meta-reference | ||||
| image_type: conda | ||||
|  | @ -1,50 +0,0 @@ | |||
| version: '2' | ||||
| built_at: '2024-10-08T17:40:45.325529' | ||||
| image_name: local | ||||
| docker_image: null | ||||
| conda_env: local | ||||
| apis: | ||||
| - shields | ||||
| - agents | ||||
| - models | ||||
| - memory | ||||
| - memory_banks | ||||
| - inference | ||||
| - safety | ||||
| providers: | ||||
|   inference: | ||||
|   - provider_id: meta-reference | ||||
|     provider_type: meta-reference | ||||
|     config: | ||||
|       model: Llama3.1-8B-Instruct | ||||
|       quantization: null | ||||
|       torch_seed: null | ||||
|       max_seq_len: 4096 | ||||
|       max_batch_size: 1 | ||||
|   safety: | ||||
|   - provider_id: meta-reference | ||||
|     provider_type: meta-reference | ||||
|     config: | ||||
|       llama_guard_shield: | ||||
|         model: Llama-Guard-3-1B | ||||
|         excluded_categories: [] | ||||
|         disable_input_check: false | ||||
|         disable_output_check: false | ||||
|       prompt_guard_shield: | ||||
|         model: Prompt-Guard-86M | ||||
|   memory: | ||||
|   - provider_id: meta-reference | ||||
|     provider_type: meta-reference | ||||
|     config: {} | ||||
|   agents: | ||||
|   - provider_id: meta-reference | ||||
|     provider_type: meta-reference | ||||
|     config: | ||||
|       persistence_store: | ||||
|         namespace: null | ||||
|         type: sqlite | ||||
|         db_path: ~/.llama/runtime/kvstore.db | ||||
|   telemetry: | ||||
|   - provider_id: meta-reference | ||||
|     provider_type: meta-reference | ||||
|     config: {} | ||||
|  | @ -1,46 +0,0 @@ | |||
| version: '2' | ||||
| built_at: '2024-10-08T17:40:45.325529' | ||||
| image_name: local | ||||
| docker_image: null | ||||
| conda_env: local | ||||
| apis: | ||||
| - shields | ||||
| - agents | ||||
| - models | ||||
| - memory | ||||
| - memory_banks | ||||
| - inference | ||||
| - safety | ||||
| providers: | ||||
|   inference: | ||||
|   - provider_id: tgi0 | ||||
|     provider_type: remote::tgi | ||||
|     config: | ||||
|       url: http://127.0.0.1:5009 | ||||
|   safety: | ||||
|   - provider_id: meta-reference | ||||
|     provider_type: meta-reference | ||||
|     config: | ||||
|       llama_guard_shield: | ||||
|         model: Llama-Guard-3-1B | ||||
|         excluded_categories: [] | ||||
|         disable_input_check: false | ||||
|         disable_output_check: false | ||||
|       prompt_guard_shield: | ||||
|         model: Prompt-Guard-86M | ||||
|   memory: | ||||
|   - provider_id: meta-reference | ||||
|     provider_type: meta-reference | ||||
|     config: {} | ||||
|   agents: | ||||
|   - provider_id: meta-reference | ||||
|     provider_type: meta-reference | ||||
|     config: | ||||
|       persistence_store: | ||||
|         namespace: null | ||||
|         type: sqlite | ||||
|         db_path: ~/.llama/runtime/kvstore.db | ||||
|   telemetry: | ||||
|   - provider_id: meta-reference | ||||
|     provider_type: meta-reference | ||||
|     config: {} | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue