kebbbnnn 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								0f66ae0f61 
								
							 
						 
						
							
							
								
								Add function for stopping inference ( #224 )  
							
							
							
						 
						
							2024-10-09 10:50:19 -04:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Xi Yan 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6b094b72d3 
								
							 
						 
						
							
							
								
								Update cli_reference.md  
							
							
							
						 
						
							2024-10-08 15:32:06 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Xi Yan 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								ce70d21f65 
								
							 
						 
						
							
							
								
								Add files via upload  
							
							
							
						 
						
							2024-10-08 15:29:19 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Dalton Flanagan 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2d4f7d8acf 
								
							 
						 
						
							
							
								
								Create SECURITY.md  
							
							
							
						 
						
							2024-10-08 13:30:40 -04:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Yuan Tang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								48d0d2001e 
								
							 
						 
						
							
							
								
								Add classifiers in setup.py ( #217 )  
							
							... 
							
							
							
							* Add classifiers in setup.py
* Update setup.py
* Update setup.py 
							
						 
						
							2024-10-08 06:55:16 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Xi Yan 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								4d5f7459aa 
								
							 
						 
						
							
							
								
								[bugfix] Fix logprobs on meta-reference impl ( #213 )  
							
							... 
							
							
							
							* fix log probs
* add back LogProbsConfig
* error handling
* bugfix 
							
						 
						
							2024-10-07 19:42:39 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Yuan Tang 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								e4ae09d090 
								
							 
						 
						
							
							
								
								Add .idea to .gitignore ( #216 )  
							
							... 
							
							
							
							Signed-off-by: Yuan Tang <terrytangyuan@gmail.com> 
							
						 
						
							2024-10-07 19:38:43 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Xi Yan 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								16ba0fa06f 
								
							 
						 
						
							
							
								
								Update README.md  
							
							
							
						 
						
							2024-10-07 11:24:27 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Russell Bryant 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								996efa9b42 
								
							 
						 
						
							
							
								
								README.md: Add vLLM to providers table ( #207 )  
							
							... 
							
							
							
							Signed-off-by: Russell Bryant <russell.bryant@gmail.com> 
							
						 
						
							2024-10-07 10:26:52 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Xi Yan 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								2366e18873 
								
							 
						 
						
							
							
								
								refactor docs ( #209 )  
							
							
							
						 
						
							2024-10-07 10:21:26 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Mindaugas 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								53d440e952 
								
							 
						 
						
							
							
								
								Fix ValueError in case chunks are empty ( #206 )  
							
							
							
						 
						
							2024-10-07 08:55:06 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Russell Bryant 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								a4e775c465 
								
							 
						 
						
							
							
								
								download: improve help text ( #204 )  
							
							
							
						 
						
							2024-10-07 08:40:04 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Ashwin Bharambe 
								
							 
						 
						
							
							
							
							
								
							
							
								4263764493 
								
							 
						 
						
							
							
								
								Fix adapter_id -> adapter_type for Weaviate  
							
							
							
						 
						
							2024-10-07 06:46:32 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Zain Hasan 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f4f7618120 
								
							 
						 
						
							
							
								
								add Weaviate memory adapter ( #95 )  
							
							
							
						 
						
							2024-10-06 22:21:50 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Xi Yan 
								
							 
						 
						
							
							
							
							
								
							
							
								27587f32bc 
								
							 
						 
						
							
							
								
								fix db path  
							
							
							
						 
						
							2024-10-06 11:46:08 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Xi Yan 
								
							 
						 
						
							
							
							
							
								
							
							
								cfe3ad33b3 
								
							 
						 
						
							
							
								
								fix db path  
							
							
							
						 
						
							2024-10-06 11:45:35 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Prithu Dasgupta 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								7abab7604b 
								
							 
						 
						
							
							
								
								add databricks provider ( #83 )  
							
							... 
							
							
							
							* add databricks provider
* update provider and test 
							
						 
						
							2024-10-05 23:35:54 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Russell Bryant 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								f73e247ba1 
								
							 
						 
						
							
							
								
								Inline vLLM inference provider ( #181 )  
							
							... 
							
							
							
							This is just like `local` using `meta-reference` for everything except
it uses `vllm` for inference.
Docker works, but So far, `conda` is a bit easier to use with the vllm
provider. The default container base image does not include all the
necessary libraries for all vllm features. More cuda dependencies are
necessary.
I started changing this base image used in this template, but it also
required changes to the Dockerfile, so it was getting too involved to
include in the first PR.
Working so far:
* `python -m llama_stack.apis.inference.client localhost 5000 --model Llama3.2-1B-Instruct --stream True`
* `python -m llama_stack.apis.inference.client localhost 5000 --model Llama3.2-1B-Instruct --stream False`
Example:
```
$ python -m llama_stack.apis.inference.client localhost 5000 --model Llama3.2-1B-Instruct --stream False
User>hello world, write me a 2 sentence poem about the moon
Assistant>
The moon glows bright in the midnight sky
A beacon of light,
```
I have only tested these models:
* `Llama3.1-8B-Instruct` - across 4 GPUs (tensor_parallel_size = 4)
* `Llama3.2-1B-Instruct` - on a single GPU (tensor_parallel_size = 1) 
							
						 
						
							2024-10-05 23:34:16 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Xi Yan 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								29138a5167 
								
							 
						 
						
							
							
								
								Update getting_started.md  
							
							
							
						 
						
							2024-10-05 12:28:02 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Xi Yan 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								6d4013ac99 
								
							 
						 
						
							
							
								
								Update getting_started.md  
							
							
							
						 
						
							2024-10-05 12:14:59 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Mindaugas 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								9d16129603 
								
							 
						 
						
							
							
								
								Add 'url' property to Redis KV config ( #192 )  
							
							
							
						 
						
							2024-10-05 11:26:26 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Ashwin Bharambe 
								
							 
						 
						
							
							
							
							
								
							
							
								bfb0e92034 
								
							 
						 
						
							
							
								
								Bump version to 0.0.40  
							
							
							
						 
						
							2024-10-04 09:33:43 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Ashwin Bharambe 
								
							 
						 
						
							
							
							
							
								
							
							
								dc75aab547 
								
							 
						 
						
							
							
								
								Add setuptools dependency  
							
							
							
						 
						
							2024-10-04 09:30:54 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Dalton Flanagan 
								
							 
						 
						
							
							
							
							
								
							
							
								441052b0fd 
								
							 
						 
						
							
							
								
								avoid jq since non-standard on macOS  
							
							
							
						 
						
							2024-10-04 10:11:43 -04:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Dalton Flanagan 
								
							 
						 
						
							
							
							
							
								
							
							
								9bf2e354ae 
								
							 
						 
						
							
							
								
								CLI now requires jq  
							
							
							
						 
						
							2024-10-04 10:05:59 -04:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									raghotham 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								00ed9a410b 
								
							 
						 
						
							
							
								
								Update getting_started.md  
							
							... 
							
							
							
							update discord invite link 
							
						 
						
							2024-10-03 23:28:43 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									AshleyT3 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								734f59d3b8 
								
							 
						 
						
							
							
								
								Check that the model is found before use. ( #182 )  
							
							
							
						 
						
							2024-10-03 23:24:47 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Ashwin Bharambe 
								
							 
						 
						
							
							
							
							
								
							
							
								f913b57397 
								
							 
						 
						
							
							
								
								fix fp8 imports  
							
							
							
						 
						
							2024-10-03 14:40:21 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Ashwin Bharambe 
								
							 
						 
						
							
							
							
							
								
							
							
								8d41e6caa9 
								
							 
						 
						
							
							
								
								Bump version to 0.0.39  
							
							
							
						 
						
							2024-10-03 11:31:03 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Ashwin Bharambe 
								
							 
						 
						
							
							
							
							
								
							
							
								7f49315822 
								
							 
						 
						
							
							
								
								Kill a derpy import  
							
							
							
						 
						
							2024-10-03 11:25:58 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Xi Yan 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								62d266f018 
								
							 
						 
						
							
							
								
								[CLI] avoid configure twice ( #171 )  
							
							... 
							
							
							
							* avoid configure twice
* cleanup tmp config
* update output msg
* address comment
* update msg
* script update 
							
						 
						
							2024-10-03 11:20:54 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Russell Bryant 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								06db9213b1 
								
							 
						 
						
							
							
								
								inference: Add model option to client ( #170 )  
							
							... 
							
							
							
							I was running this client for testing purposes and being able to
specify which model to use is a convenient addition. This change makes
that possible. 
							
						 
						
							2024-10-03 11:18:57 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Ashwin Bharambe 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								210b71b0ba 
								
							 
						 
						
							
							
								
								fix prompt guard ( #177 )  
							
							... 
							
							
							
							Several other fixes to configure. Add support for 1b/3b models in ollama. 
							
						 
						
							2024-10-03 11:07:53 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Xi Yan 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								b9b1e8b08b 
								
							 
						 
						
							
							
								
								[bugfix] conda path lookup ( #179 )  
							
							... 
							
							
							
							* fix conda lookup
* comments 
							
						 
						
							2024-10-03 10:45:16 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									raghotham 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								d74501f75c 
								
							 
						 
						
							
							
								
								Update README.md  
							
							... 
							
							
							
							Added pypi package version 
							
						 
						
							2024-10-03 10:21:16 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Ashwin Bharambe 
								
							 
						 
						
							
							
							
							
								
							
							
								c02a90e4c8 
								
							 
						 
						
							
							
								
								Bump version to 0.0.38  
							
							
							
						 
						
							2024-10-03 05:42:47 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Ashwin Bharambe 
								
							 
						 
						
							
							
							
							
								
							
							
								e9f6150588 
								
							 
						 
						
							
							
								
								A bit cleanup to avoid breakages  
							
							
							
						 
						
							2024-10-02 21:31:09 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Ashwin Bharambe 
								
							 
						 
						
							
							
							
							
								
							
							
								988a9cada3 
								
							 
						 
						
							
							
								
								Don't ask for Api.inspect in stack build  
							
							
							
						 
						
							2024-10-02 21:10:56 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Ashwin Bharambe 
								
							 
						 
						
							
							
							
							
								
							
							
								19ce6bf009 
								
							 
						 
						
							
							
								
								Don't validate prompt-guard anymore  
							
							
							
						 
						
							2024-10-02 20:43:57 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Xi Yan 
								
							 
						 
						
							
							
							
							
								
							
							
								703ab9385f 
								
							 
						 
						
							
							
								
								fix routing table key list  
							
							
							
						 
						
							2024-10-02 18:23:31 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Ashwin Bharambe 
								
							 
						 
						
							
							
							
							
								
							
							
								8d049000e3 
								
							 
						 
						
							
							
								
								Add an introspection "Api.inspect" API  
							
							
							
						 
						
							2024-10-02 15:41:14 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Adrian Cole 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								01d93be948 
								
							 
						 
						
							
							
								
								Adds markdown-link-check and fixes a broken link ( #165 )  
							
							... 
							
							
							
							Signed-off-by: Adrian Cole <adrian.cole@elastic.co>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com> 
							
						 
						
							2024-10-02 14:26:20 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Ashwin Bharambe 
								
							 
						 
						
							
							
							
							
								
							
							
								fe4aabd690 
								
							 
						 
						
							
							
								
								provider_id => provider_type, adapter_id => adapter_type  
							
							
							
						 
						
							2024-10-02 14:05:59 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Ashwin Bharambe 
								
							 
						 
						
							
							
							
							
								
							
							
								df68db644b 
								
							 
						 
						
							
							
								
								Refactoring distribution/distribution.py  
							
							... 
							
							
							
							This file was becoming too large and unclear what it housed. Split it
into pieces. 
							
						 
						
							2024-10-02 14:03:02 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Ashwin Bharambe 
								
							 
						 
						
							
							
							
							
								
							
							
								546f05bd3f 
								
							 
						 
						
							
							
								
								No automatic pager  
							
							
							
						 
						
							2024-10-02 12:26:09 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Russell Bryant 
								
							 
						 
						
							
							
								
								
							
							
							
								
							
							
								204eb6d810 
								
							 
						 
						
							
							
								
								docker: Check for selinux before using --security-opt ( #167 )  
							
							... 
							
							
							
							Before using `--security-opt label=disable`, check that SELinux is
enabled. Otherwise, the option is not relevant.
This fixes errors on Mac.
Closes  #166 
Signed-off-by: Russell Bryant <rbryant@redhat.com> 
							
						 
						
							2024-10-02 10:37:41 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Ashwin Bharambe 
								
							 
						 
						
							
							
							
							
								
							
							
								9b93ee2c2b 
								
							 
						 
						
							
							
								
								Bump version to 0.0.37  
							
							
							
						 
						
							2024-10-02 10:15:08 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Ashwin Bharambe 
								
							 
						 
						
							
							
							
							
								
							
							
								227b69e6e6 
								
							 
						 
						
							
							
								
								Fix sample memory impl  
							
							
							
						 
						
							2024-10-02 10:13:09 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Ashwin Bharambe 
								
							 
						 
						
							
							
							
							
								
							
							
								335dea849a 
								
							 
						 
						
							
							
								
								fix sample impls  
							
							
							
						 
						
							2024-10-02 10:10:31 -07:00 
							
								 
							
						 
					 
				
					
						
							
								
								
									Ashwin Bharambe 
								
							 
						 
						
							
							
							
							
								
							
							
								bf0d111c53 
								
							 
						 
						
							
							
								
								Fix build script  
							
							
							
						 
						
							2024-10-02 10:04:23 -07:00