forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_embedding_caching_updates
This commit is contained in:
commit
817a3d29b7
12 changed files with 320 additions and 242 deletions
|
@ -7,10 +7,17 @@ import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
Log Proxy Input, Output, Exceptions using Custom Callbacks, Langfuse, OpenTelemetry, LangFuse, DynamoDB, s3 Bucket
|
Log Proxy Input, Output, Exceptions using Custom Callbacks, Langfuse, OpenTelemetry, LangFuse, DynamoDB, s3 Bucket
|
||||||
|
|
||||||
|
- [Async Custom Callbacks](#custom-callback-class-async)
|
||||||
|
- [Logging to Langfuse](#logging-proxy-inputoutput---langfuse)
|
||||||
|
- [Logging to s3 Buckets](#logging-proxy-inputoutput---s3-buckets)
|
||||||
|
- [Logging to DynamoDB](#logging-proxy-inputoutput---dynamodb)
|
||||||
|
- [Logging to Sentry](#logging-proxy-inputoutput---sentry)
|
||||||
|
- [Logging to Traceloop (OpenTelemetry)](#opentelemetry---traceloop)
|
||||||
|
|
||||||
## Custom Callback Class [Async]
|
## Custom Callback Class [Async]
|
||||||
Use this when you want to run custom callbacks in `python`
|
Use this when you want to run custom callbacks in `python`
|
||||||
|
|
||||||
### Step 1 - Create your custom `litellm` callback class
|
#### Step 1 - Create your custom `litellm` callback class
|
||||||
We use `litellm.integrations.custom_logger` for this, **more details about litellm custom callbacks [here](https://docs.litellm.ai/docs/observability/custom_callback)**
|
We use `litellm.integrations.custom_logger` for this, **more details about litellm custom callbacks [here](https://docs.litellm.ai/docs/observability/custom_callback)**
|
||||||
|
|
||||||
Define your custom callback class in a python file.
|
Define your custom callback class in a python file.
|
||||||
|
@ -112,7 +119,7 @@ proxy_handler_instance = MyCustomHandler()
|
||||||
# need to set litellm.callbacks = [proxy_handler_instance] # on the proxy
|
# need to set litellm.callbacks = [proxy_handler_instance] # on the proxy
|
||||||
```
|
```
|
||||||
|
|
||||||
### Step 2 - Pass your custom callback class in `config.yaml`
|
#### Step 2 - Pass your custom callback class in `config.yaml`
|
||||||
We pass the custom callback class defined in **Step1** to the config.yaml.
|
We pass the custom callback class defined in **Step1** to the config.yaml.
|
||||||
Set `callbacks` to `python_filename.logger_instance_name`
|
Set `callbacks` to `python_filename.logger_instance_name`
|
||||||
|
|
||||||
|
@ -134,7 +141,7 @@ litellm_settings:
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Step 3 - Start proxy + test request
|
#### Step 3 - Start proxy + test request
|
||||||
```shell
|
```shell
|
||||||
litellm --config proxy_config.yaml
|
litellm --config proxy_config.yaml
|
||||||
```
|
```
|
||||||
|
@ -167,7 +174,7 @@ On Success
|
||||||
Proxy Metadata: {'user_api_key': None, 'headers': Headers({'host': '0.0.0.0:8000', 'user-agent': 'curl/7.88.1', 'accept': '*/*', 'authorization': 'Bearer sk-1234', 'content-length': '199', 'content-type': 'application/x-www-form-urlencoded'}), 'model_group': 'gpt-3.5-turbo', 'deployment': 'gpt-3.5-turbo-ModelID-gpt-3.5-turbo'}
|
Proxy Metadata: {'user_api_key': None, 'headers': Headers({'host': '0.0.0.0:8000', 'user-agent': 'curl/7.88.1', 'accept': '*/*', 'authorization': 'Bearer sk-1234', 'content-length': '199', 'content-type': 'application/x-www-form-urlencoded'}), 'model_group': 'gpt-3.5-turbo', 'deployment': 'gpt-3.5-turbo-ModelID-gpt-3.5-turbo'}
|
||||||
```
|
```
|
||||||
|
|
||||||
### Logging Proxy Request Object, Header, Url
|
#### Logging Proxy Request Object, Header, Url
|
||||||
|
|
||||||
Here's how you can access the `url`, `headers`, `request body` sent to the proxy for each request
|
Here's how you can access the `url`, `headers`, `request body` sent to the proxy for each request
|
||||||
|
|
||||||
|
@ -211,7 +218,7 @@ class MyCustomHandler(CustomLogger):
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Logging `model_info` set in config.yaml
|
#### Logging `model_info` set in config.yaml
|
||||||
|
|
||||||
Here is how to log the `model_info` set in your proxy `config.yaml`. Information on setting `model_info` on [config.yaml](https://docs.litellm.ai/docs/proxy/configs)
|
Here is how to log the `model_info` set in your proxy `config.yaml`. Information on setting `model_info` on [config.yaml](https://docs.litellm.ai/docs/proxy/configs)
|
||||||
|
|
||||||
|
@ -428,176 +435,6 @@ print(response)
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
## OpenTelemetry - Traceloop
|
|
||||||
|
|
||||||
Traceloop allows you to log LLM Input/Output in the OpenTelemetry format
|
|
||||||
|
|
||||||
We will use the `--config` to set `litellm.success_callback = ["traceloop"]` this will log all successfull LLM calls to traceloop
|
|
||||||
|
|
||||||
**Step 1** Install traceloop-sdk and set Traceloop API key
|
|
||||||
|
|
||||||
```shell
|
|
||||||
pip install traceloop-sdk -U
|
|
||||||
```
|
|
||||||
|
|
||||||
Traceloop outputs standard OpenTelemetry data that can be connected to your observability stack. Send standard OpenTelemetry from LiteLLM Proxy to [Traceloop](https://www.traceloop.com/docs/openllmetry/integrations/traceloop), [Dynatrace](https://www.traceloop.com/docs/openllmetry/integrations/dynatrace), [Datadog](https://www.traceloop.com/docs/openllmetry/integrations/datadog)
|
|
||||||
, [New Relic](https://www.traceloop.com/docs/openllmetry/integrations/newrelic), [Honeycomb](https://www.traceloop.com/docs/openllmetry/integrations/honeycomb), [Grafana Tempo](https://www.traceloop.com/docs/openllmetry/integrations/grafana), [Splunk](https://www.traceloop.com/docs/openllmetry/integrations/splunk), [OpenTelemetry Collector](https://www.traceloop.com/docs/openllmetry/integrations/otel-collector)
|
|
||||||
|
|
||||||
**Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback`
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: gpt-3.5-turbo
|
|
||||||
litellm_params:
|
|
||||||
model: gpt-3.5-turbo
|
|
||||||
litellm_settings:
|
|
||||||
success_callback: ["traceloop"]
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 3**: Start the proxy, make a test request
|
|
||||||
|
|
||||||
Start proxy
|
|
||||||
```shell
|
|
||||||
litellm --config config.yaml --debug
|
|
||||||
```
|
|
||||||
|
|
||||||
Test Request
|
|
||||||
```
|
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--data ' {
|
|
||||||
"model": "gpt-3.5-turbo",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "what llm are you"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<!-- ### Step 1 Start OpenTelemetry Collecter Docker Container
|
|
||||||
This container sends logs to your selected destination
|
|
||||||
|
|
||||||
#### Install OpenTelemetry Collecter Docker Image
|
|
||||||
```shell
|
|
||||||
docker pull otel/opentelemetry-collector:0.90.0
|
|
||||||
docker run -p 127.0.0.1:4317:4317 -p 127.0.0.1:55679:55679 otel/opentelemetry-collector:0.90.0
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Set Destination paths on OpenTelemetry Collecter
|
|
||||||
|
|
||||||
Here's the OpenTelemetry yaml config to use with Elastic Search
|
|
||||||
```yaml
|
|
||||||
receivers:
|
|
||||||
otlp:
|
|
||||||
protocols:
|
|
||||||
grpc:
|
|
||||||
endpoint: 0.0.0.0:4317
|
|
||||||
|
|
||||||
processors:
|
|
||||||
batch:
|
|
||||||
timeout: 1s
|
|
||||||
send_batch_size: 1024
|
|
||||||
|
|
||||||
exporters:
|
|
||||||
logging:
|
|
||||||
loglevel: debug
|
|
||||||
otlphttp/elastic:
|
|
||||||
endpoint: "<your elastic endpoint>"
|
|
||||||
headers:
|
|
||||||
Authorization: "Bearer <elastic api key>"
|
|
||||||
|
|
||||||
service:
|
|
||||||
pipelines:
|
|
||||||
metrics:
|
|
||||||
receivers: [otlp]
|
|
||||||
exporters: [logging, otlphttp/elastic]
|
|
||||||
traces:
|
|
||||||
receivers: [otlp]
|
|
||||||
exporters: [logging, otlphttp/elastic]
|
|
||||||
logs:
|
|
||||||
receivers: [otlp]
|
|
||||||
exporters: [logging,otlphttp/elastic]
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Start the OpenTelemetry container with config
|
|
||||||
Run the following command to start your docker container. We pass `otel_config.yaml` from the previous step
|
|
||||||
|
|
||||||
```shell
|
|
||||||
docker run -p 4317:4317 \
|
|
||||||
-v $(pwd)/otel_config.yaml:/etc/otel-collector-config.yaml \
|
|
||||||
otel/opentelemetry-collector:latest \
|
|
||||||
--config=/etc/otel-collector-config.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 2 Configure LiteLLM proxy to log on OpenTelemetry
|
|
||||||
|
|
||||||
#### Pip install opentelemetry
|
|
||||||
```shell
|
|
||||||
pip install opentelemetry-api opentelemetry-sdk opentelemetry-exporter-otlp -U
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Set (OpenTelemetry) `otel=True` on the proxy `config.yaml`
|
|
||||||
**Example config.yaml**
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: gpt-3.5-turbo
|
|
||||||
litellm_params:
|
|
||||||
model: azure/gpt-turbo-small-eu
|
|
||||||
api_base: https://my-endpoint-europe-berri-992.openai.azure.com/
|
|
||||||
api_key:
|
|
||||||
rpm: 6 # Rate limit for this deployment: in requests per minute (rpm)
|
|
||||||
|
|
||||||
general_settings:
|
|
||||||
otel: True # set OpenTelemetry=True, on litellm Proxy
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Set OTEL collector endpoint
|
|
||||||
LiteLLM will read the `OTEL_ENDPOINT` environment variable to send data to your OTEL collector
|
|
||||||
|
|
||||||
```python
|
|
||||||
os.environ['OTEL_ENDPOINT'] # defauls to 127.0.0.1:4317 if not provided
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Start LiteLLM Proxy
|
|
||||||
```shell
|
|
||||||
litellm -config config.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Run a test request to Proxy
|
|
||||||
```shell
|
|
||||||
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
|
||||||
--header 'Authorization: Bearer sk-1244' \
|
|
||||||
--data ' {
|
|
||||||
"model": "gpt-3.5-turbo",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "request from LiteLLM testing"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
#### Test & View Logs on OpenTelemetry Collecter
|
|
||||||
On successfull logging you should be able to see this log on your `OpenTelemetry Collecter` Docker Container
|
|
||||||
```shell
|
|
||||||
Events:
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
### View Log on Elastic Search
|
|
||||||
Here's the log view on Elastic Search. You can see the request `input`, `output` and `headers`
|
|
||||||
|
|
||||||
<Image img={require('../../img/elastic_otel.png')} /> -->
|
|
||||||
|
|
||||||
|
|
||||||
## Logging Proxy Input/Output - s3 Buckets
|
## Logging Proxy Input/Output - s3 Buckets
|
||||||
|
|
||||||
We will use the `--config` to set
|
We will use the `--config` to set
|
||||||
|
@ -815,3 +652,52 @@ Test Request
|
||||||
```
|
```
|
||||||
litellm --test
|
litellm --test
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Logging Proxy Input/Output Traceloop (OpenTelemetry)
|
||||||
|
|
||||||
|
Traceloop allows you to log LLM Input/Output in the OpenTelemetry format
|
||||||
|
|
||||||
|
We will use the `--config` to set `litellm.success_callback = ["traceloop"]` this will log all successfull LLM calls to traceloop
|
||||||
|
|
||||||
|
**Step 1** Install traceloop-sdk and set Traceloop API key
|
||||||
|
|
||||||
|
```shell
|
||||||
|
pip install traceloop-sdk -U
|
||||||
|
```
|
||||||
|
|
||||||
|
Traceloop outputs standard OpenTelemetry data that can be connected to your observability stack. Send standard OpenTelemetry from LiteLLM Proxy to [Traceloop](https://www.traceloop.com/docs/openllmetry/integrations/traceloop), [Dynatrace](https://www.traceloop.com/docs/openllmetry/integrations/dynatrace), [Datadog](https://www.traceloop.com/docs/openllmetry/integrations/datadog)
|
||||||
|
, [New Relic](https://www.traceloop.com/docs/openllmetry/integrations/newrelic), [Honeycomb](https://www.traceloop.com/docs/openllmetry/integrations/honeycomb), [Grafana Tempo](https://www.traceloop.com/docs/openllmetry/integrations/grafana), [Splunk](https://www.traceloop.com/docs/openllmetry/integrations/splunk), [OpenTelemetry Collector](https://www.traceloop.com/docs/openllmetry/integrations/otel-collector)
|
||||||
|
|
||||||
|
**Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback`
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-3.5-turbo
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-3.5-turbo
|
||||||
|
litellm_settings:
|
||||||
|
success_callback: ["traceloop"]
|
||||||
|
```
|
||||||
|
|
||||||
|
**Step 3**: Start the proxy, make a test request
|
||||||
|
|
||||||
|
Start proxy
|
||||||
|
```shell
|
||||||
|
litellm --config config.yaml --debug
|
||||||
|
```
|
||||||
|
|
||||||
|
Test Request
|
||||||
|
```
|
||||||
|
curl --location 'http://0.0.0.0:8000/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data ' {
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "what llm are you"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -93,6 +93,7 @@ class S3Logger:
|
||||||
messages = kwargs.get("messages")
|
messages = kwargs.get("messages")
|
||||||
optional_params = kwargs.get("optional_params", {})
|
optional_params = kwargs.get("optional_params", {})
|
||||||
call_type = kwargs.get("call_type", "litellm.completion")
|
call_type = kwargs.get("call_type", "litellm.completion")
|
||||||
|
cache_hit = kwargs.get("cache_hit", False)
|
||||||
usage = response_obj["usage"]
|
usage = response_obj["usage"]
|
||||||
id = response_obj.get("id", str(uuid.uuid4()))
|
id = response_obj.get("id", str(uuid.uuid4()))
|
||||||
|
|
||||||
|
@ -100,6 +101,7 @@ class S3Logger:
|
||||||
payload = {
|
payload = {
|
||||||
"id": id,
|
"id": id,
|
||||||
"call_type": call_type,
|
"call_type": call_type,
|
||||||
|
"cache_hit": cache_hit,
|
||||||
"startTime": start_time,
|
"startTime": start_time,
|
||||||
"endTime": end_time,
|
"endTime": end_time,
|
||||||
"model": kwargs.get("model", ""),
|
"model": kwargs.get("model", ""),
|
||||||
|
@ -118,7 +120,10 @@ class S3Logger:
|
||||||
except:
|
except:
|
||||||
# non blocking if it can't cast to a str
|
# non blocking if it can't cast to a str
|
||||||
pass
|
pass
|
||||||
s3_object_key = payload["id"]
|
|
||||||
|
s3_object_key = (
|
||||||
|
payload["id"] + "-time=" + str(start_time)
|
||||||
|
) # we need the s3 key to include the time, so we log cache hits too
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@ import json, copy, types
|
||||||
import os
|
import os
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import time
|
import time
|
||||||
from typing import Callable, Optional, Any
|
from typing import Callable, Optional, Any, Union
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.utils import ModelResponse, get_secret, Usage
|
from litellm.utils import ModelResponse, get_secret, Usage
|
||||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||||
|
@ -714,7 +714,7 @@ def _embedding_func_single(
|
||||||
|
|
||||||
def embedding(
|
def embedding(
|
||||||
model: str,
|
model: str,
|
||||||
input: list,
|
input: Union[list, str],
|
||||||
api_key: Optional[str] = None,
|
api_key: Optional[str] = None,
|
||||||
logging_obj=None,
|
logging_obj=None,
|
||||||
model_response=None,
|
model_response=None,
|
||||||
|
@ -737,18 +737,28 @@ def embedding(
|
||||||
aws_region_name=aws_region_name,
|
aws_region_name=aws_region_name,
|
||||||
aws_bedrock_runtime_endpoint=aws_bedrock_runtime_endpoint,
|
aws_bedrock_runtime_endpoint=aws_bedrock_runtime_endpoint,
|
||||||
)
|
)
|
||||||
|
if type(input) == str:
|
||||||
## Embedding Call
|
embeddings = [
|
||||||
embeddings = [
|
_embedding_func_single(
|
||||||
_embedding_func_single(
|
model,
|
||||||
model,
|
input,
|
||||||
i,
|
optional_params=optional_params,
|
||||||
optional_params=optional_params,
|
client=client,
|
||||||
client=client,
|
logging_obj=logging_obj,
|
||||||
logging_obj=logging_obj,
|
)
|
||||||
)
|
]
|
||||||
for i in input
|
else:
|
||||||
] # [TODO]: make these parallel calls
|
## Embedding Call
|
||||||
|
embeddings = [
|
||||||
|
_embedding_func_single(
|
||||||
|
model,
|
||||||
|
i,
|
||||||
|
optional_params=optional_params,
|
||||||
|
client=client,
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
)
|
||||||
|
for i in input
|
||||||
|
] # [TODO]: make these parallel calls
|
||||||
|
|
||||||
## Populate OpenAI compliant dictionary
|
## Populate OpenAI compliant dictionary
|
||||||
embedding_response = []
|
embedding_response = []
|
||||||
|
|
|
@ -202,6 +202,7 @@ async def acompletion(
|
||||||
- If `stream` is True, the function returns an async generator that yields completion lines.
|
- If `stream` is True, the function returns an async generator that yields completion lines.
|
||||||
"""
|
"""
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
|
custom_llm_provider = None
|
||||||
# Adjusted to use explicit arguments instead of *args and **kwargs
|
# Adjusted to use explicit arguments instead of *args and **kwargs
|
||||||
completion_kwargs = {
|
completion_kwargs = {
|
||||||
"model": model,
|
"model": model,
|
||||||
|
@ -243,6 +244,9 @@ async def acompletion(
|
||||||
ctx = contextvars.copy_context()
|
ctx = contextvars.copy_context()
|
||||||
func_with_context = partial(ctx.run, func)
|
func_with_context = partial(ctx.run, func)
|
||||||
|
|
||||||
|
_, custom_llm_provider, _, _ = get_llm_provider(
|
||||||
|
model=model, api_base=kwargs.get("api_base", None)
|
||||||
|
)
|
||||||
if (
|
if (
|
||||||
custom_llm_provider == "openai"
|
custom_llm_provider == "openai"
|
||||||
or custom_llm_provider == "azure"
|
or custom_llm_provider == "azure"
|
||||||
|
|
|
@ -6,7 +6,6 @@ from datetime import datetime
|
||||||
import importlib
|
import importlib
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
|
||||||
sys.path.append(os.getcwd())
|
sys.path.append(os.getcwd())
|
||||||
|
|
||||||
config_filename = "litellm.secrets"
|
config_filename = "litellm.secrets"
|
||||||
|
@ -349,6 +348,38 @@ def run_server(
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"Uvicorn, gunicorn needs to be imported. Run - `pip 'litellm[proxy]'`"
|
"Uvicorn, gunicorn needs to be imported. Run - `pip 'litellm[proxy]'`"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if config is not None:
|
||||||
|
"""
|
||||||
|
Allow user to pass in db url via config
|
||||||
|
|
||||||
|
read from there and save it to os.env['DATABASE_URL']
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import yaml
|
||||||
|
except:
|
||||||
|
raise ImportError(
|
||||||
|
"yaml needs to be imported. Run - `pip install 'litellm[proxy]'`"
|
||||||
|
)
|
||||||
|
|
||||||
|
if os.path.exists(config):
|
||||||
|
with open(config, "r") as config_file:
|
||||||
|
config = yaml.safe_load(config_file)
|
||||||
|
general_settings = config.get("general_settings", {})
|
||||||
|
database_url = general_settings.get("database_url", None)
|
||||||
|
if database_url and database_url.startswith("os.environ/"):
|
||||||
|
original_dir = os.getcwd()
|
||||||
|
# set the working directory to where this script is
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path - for litellm local dev
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
database_url = litellm.get_secret(database_url)
|
||||||
|
os.chdir(original_dir)
|
||||||
|
if database_url is not None and isinstance(database_url, str):
|
||||||
|
os.environ["DATABASE_URL"] = database_url
|
||||||
|
|
||||||
if os.getenv("DATABASE_URL", None) is not None:
|
if os.getenv("DATABASE_URL", None) is not None:
|
||||||
# run prisma db push, before starting server
|
# run prisma db push, before starting server
|
||||||
# Save the current working directory
|
# Save the current working directory
|
||||||
|
|
|
@ -1363,6 +1363,12 @@ class Router:
|
||||||
api_version=api_version,
|
api_version=api_version,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
max_retries=max_retries,
|
max_retries=max_retries,
|
||||||
|
http_client=httpx.AsyncClient(
|
||||||
|
transport=AsyncCustomHTTPTransport(),
|
||||||
|
limits=httpx.Limits(
|
||||||
|
max_connections=1000, max_keepalive_connections=100
|
||||||
|
),
|
||||||
|
), # type: ignore
|
||||||
)
|
)
|
||||||
self.cache.set_cache(
|
self.cache.set_cache(
|
||||||
key=cache_key,
|
key=cache_key,
|
||||||
|
@ -1378,6 +1384,12 @@ class Router:
|
||||||
api_version=api_version,
|
api_version=api_version,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
max_retries=max_retries,
|
max_retries=max_retries,
|
||||||
|
http_client=httpx.Client(
|
||||||
|
transport=CustomHTTPTransport(),
|
||||||
|
limits=httpx.Limits(
|
||||||
|
max_connections=1000, max_keepalive_connections=100
|
||||||
|
),
|
||||||
|
), # type: ignore
|
||||||
)
|
)
|
||||||
self.cache.set_cache(
|
self.cache.set_cache(
|
||||||
key=cache_key,
|
key=cache_key,
|
||||||
|
@ -1393,6 +1405,12 @@ class Router:
|
||||||
api_version=api_version,
|
api_version=api_version,
|
||||||
timeout=stream_timeout,
|
timeout=stream_timeout,
|
||||||
max_retries=max_retries,
|
max_retries=max_retries,
|
||||||
|
http_client=httpx.AsyncClient(
|
||||||
|
transport=AsyncCustomHTTPTransport(),
|
||||||
|
limits=httpx.Limits(
|
||||||
|
max_connections=1000, max_keepalive_connections=100
|
||||||
|
),
|
||||||
|
), # type: ignore
|
||||||
)
|
)
|
||||||
self.cache.set_cache(
|
self.cache.set_cache(
|
||||||
key=cache_key,
|
key=cache_key,
|
||||||
|
@ -1408,6 +1426,12 @@ class Router:
|
||||||
api_version=api_version,
|
api_version=api_version,
|
||||||
timeout=stream_timeout,
|
timeout=stream_timeout,
|
||||||
max_retries=max_retries,
|
max_retries=max_retries,
|
||||||
|
http_client=httpx.Client(
|
||||||
|
transport=CustomHTTPTransport(),
|
||||||
|
limits=httpx.Limits(
|
||||||
|
max_connections=1000, max_keepalive_connections=100
|
||||||
|
),
|
||||||
|
), # type: ignore
|
||||||
)
|
)
|
||||||
self.cache.set_cache(
|
self.cache.set_cache(
|
||||||
key=cache_key,
|
key=cache_key,
|
||||||
|
@ -1471,9 +1495,10 @@ class Router:
|
||||||
timeout=stream_timeout,
|
timeout=stream_timeout,
|
||||||
max_retries=max_retries,
|
max_retries=max_retries,
|
||||||
http_client=httpx.AsyncClient(
|
http_client=httpx.AsyncClient(
|
||||||
|
transport=AsyncCustomHTTPTransport(),
|
||||||
limits=httpx.Limits(
|
limits=httpx.Limits(
|
||||||
max_connections=1000, max_keepalive_connections=100
|
max_connections=1000, max_keepalive_connections=100
|
||||||
)
|
),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
self.cache.set_cache(
|
self.cache.set_cache(
|
||||||
|
@ -1491,9 +1516,10 @@ class Router:
|
||||||
timeout=stream_timeout,
|
timeout=stream_timeout,
|
||||||
max_retries=max_retries,
|
max_retries=max_retries,
|
||||||
http_client=httpx.Client(
|
http_client=httpx.Client(
|
||||||
|
transport=CustomHTTPTransport(),
|
||||||
limits=httpx.Limits(
|
limits=httpx.Limits(
|
||||||
max_connections=1000, max_keepalive_connections=100
|
max_connections=1000, max_keepalive_connections=100
|
||||||
)
|
),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
self.cache.set_cache(
|
self.cache.set_cache(
|
||||||
|
@ -1513,6 +1539,12 @@ class Router:
|
||||||
base_url=api_base,
|
base_url=api_base,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
max_retries=max_retries,
|
max_retries=max_retries,
|
||||||
|
http_client=httpx.AsyncClient(
|
||||||
|
transport=AsyncCustomHTTPTransport(),
|
||||||
|
limits=httpx.Limits(
|
||||||
|
max_connections=1000, max_keepalive_connections=100
|
||||||
|
),
|
||||||
|
), # type: ignore
|
||||||
)
|
)
|
||||||
self.cache.set_cache(
|
self.cache.set_cache(
|
||||||
key=cache_key,
|
key=cache_key,
|
||||||
|
@ -1527,6 +1559,12 @@ class Router:
|
||||||
base_url=api_base,
|
base_url=api_base,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
max_retries=max_retries,
|
max_retries=max_retries,
|
||||||
|
http_client=httpx.Client(
|
||||||
|
transport=CustomHTTPTransport(),
|
||||||
|
limits=httpx.Limits(
|
||||||
|
max_connections=1000, max_keepalive_connections=100
|
||||||
|
),
|
||||||
|
), # type: ignore
|
||||||
)
|
)
|
||||||
self.cache.set_cache(
|
self.cache.set_cache(
|
||||||
key=cache_key,
|
key=cache_key,
|
||||||
|
@ -1542,6 +1580,12 @@ class Router:
|
||||||
base_url=api_base,
|
base_url=api_base,
|
||||||
timeout=stream_timeout,
|
timeout=stream_timeout,
|
||||||
max_retries=max_retries,
|
max_retries=max_retries,
|
||||||
|
http_client=httpx.AsyncClient(
|
||||||
|
transport=AsyncCustomHTTPTransport(),
|
||||||
|
limits=httpx.Limits(
|
||||||
|
max_connections=1000, max_keepalive_connections=100
|
||||||
|
),
|
||||||
|
), # type: ignore
|
||||||
)
|
)
|
||||||
self.cache.set_cache(
|
self.cache.set_cache(
|
||||||
key=cache_key,
|
key=cache_key,
|
||||||
|
@ -1557,6 +1601,12 @@ class Router:
|
||||||
base_url=api_base,
|
base_url=api_base,
|
||||||
timeout=stream_timeout,
|
timeout=stream_timeout,
|
||||||
max_retries=max_retries,
|
max_retries=max_retries,
|
||||||
|
http_client=httpx.Client(
|
||||||
|
transport=CustomHTTPTransport(),
|
||||||
|
limits=httpx.Limits(
|
||||||
|
max_connections=1000, max_keepalive_connections=100
|
||||||
|
),
|
||||||
|
), # type: ignore
|
||||||
)
|
)
|
||||||
self.cache.set_cache(
|
self.cache.set_cache(
|
||||||
key=cache_key,
|
key=cache_key,
|
||||||
|
|
|
@ -186,13 +186,16 @@ def test_cohere_embedding3():
|
||||||
|
|
||||||
def test_bedrock_embedding_titan():
|
def test_bedrock_embedding_titan():
|
||||||
try:
|
try:
|
||||||
|
# this tests if we support str input for bedrock embedding
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
litellm.enable_cache()
|
||||||
|
import time
|
||||||
|
|
||||||
|
current_time = str(time.time())
|
||||||
|
# DO NOT MAKE THE INPUT A LIST in this test
|
||||||
response = embedding(
|
response = embedding(
|
||||||
model="amazon.titan-embed-text-v1",
|
model="bedrock/amazon.titan-embed-text-v1",
|
||||||
input=[
|
input=f"good morning from litellm, attempting to embed data {current_time}", # input should always be a string in this test
|
||||||
"good morning from litellm, attempting to embed data",
|
|
||||||
"lets test a second string for good measure",
|
|
||||||
],
|
|
||||||
)
|
)
|
||||||
print(f"response:", response)
|
print(f"response:", response)
|
||||||
assert isinstance(
|
assert isinstance(
|
||||||
|
@ -202,11 +205,28 @@ def test_bedrock_embedding_titan():
|
||||||
assert all(
|
assert all(
|
||||||
isinstance(x, float) for x in response["data"][0]["embedding"]
|
isinstance(x, float) for x in response["data"][0]["embedding"]
|
||||||
), "Expected response to be a list of floats"
|
), "Expected response to be a list of floats"
|
||||||
|
|
||||||
|
# this also tests if we can return a cache response for this scenario
|
||||||
|
import time
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
response = embedding(
|
||||||
|
model="bedrock/amazon.titan-embed-text-v1",
|
||||||
|
input=f"good morning from litellm, attempting to embed data {current_time}", # input should always be a string in this test
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
|
||||||
|
end_time = time.time()
|
||||||
|
print(f"Embedding 2 response time: {end_time - start_time} seconds")
|
||||||
|
|
||||||
|
assert end_time - start_time < 0.1
|
||||||
|
litellm.disable_cache()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
# test_bedrock_embedding_titan()
|
test_bedrock_embedding_titan()
|
||||||
|
|
||||||
|
|
||||||
def test_bedrock_embedding_cohere():
|
def test_bedrock_embedding_cohere():
|
||||||
|
@ -280,7 +300,7 @@ def test_aembedding():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
test_aembedding()
|
# test_aembedding()
|
||||||
|
|
||||||
|
|
||||||
def test_aembedding_azure():
|
def test_aembedding_azure():
|
||||||
|
|
|
@ -6,29 +6,34 @@ sys.path.insert(
|
||||||
import litellm
|
import litellm
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
|
||||||
def generate_text():
|
def generate_text():
|
||||||
try:
|
try:
|
||||||
|
litellm.set_verbose = True
|
||||||
messages = [
|
messages = [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": [
|
"content": [
|
||||||
{
|
{"type": "text", "text": "What is this image?"},
|
||||||
"type": "text",
|
|
||||||
"text": "What is this image?"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "image_url",
|
||||||
"image_url": {
|
"image_url": {
|
||||||
"url": "https://avatars.githubusercontent.com/u/17561003?v=4"
|
"url": "https://avatars.githubusercontent.com/u/17561003?v=4"
|
||||||
}
|
},
|
||||||
}
|
},
|
||||||
]
|
],
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
response = litellm.completion(model="gemini/gemini-pro-vision", messages=messages, stop="Hello world")
|
response = litellm.completion(
|
||||||
|
model="gemini/gemini-pro-vision",
|
||||||
|
messages=messages,
|
||||||
|
stop="Hello world",
|
||||||
|
num_retries=3,
|
||||||
|
)
|
||||||
print(response)
|
print(response)
|
||||||
assert isinstance(response.choices[0].message.content, str) == True
|
assert isinstance(response.choices[0].message.content, str) == True
|
||||||
except Exception as exception:
|
except Exception as exception:
|
||||||
raise Exception("An error occurred during text generation:", exception)
|
raise Exception("An error occurred during text generation:", exception)
|
||||||
|
|
||||||
generate_text()
|
|
||||||
|
# generate_text()
|
||||||
|
|
|
@ -20,8 +20,10 @@ def test_s3_logging():
|
||||||
# since we are modifying stdout, and pytests runs tests in parallel
|
# since we are modifying stdout, and pytests runs tests in parallel
|
||||||
# on circle ci - we only test litellm.acompletion()
|
# on circle ci - we only test litellm.acompletion()
|
||||||
try:
|
try:
|
||||||
# pre
|
|
||||||
# redirect stdout to log_file
|
# redirect stdout to log_file
|
||||||
|
litellm.cache = litellm.Cache(
|
||||||
|
type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2"
|
||||||
|
)
|
||||||
|
|
||||||
litellm.success_callback = ["s3"]
|
litellm.success_callback = ["s3"]
|
||||||
litellm.s3_callback_params = {
|
litellm.s3_callback_params = {
|
||||||
|
@ -35,10 +37,14 @@ def test_s3_logging():
|
||||||
|
|
||||||
expected_keys = []
|
expected_keys = []
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
|
curr_time = str(time.time())
|
||||||
|
|
||||||
async def _test():
|
async def _test():
|
||||||
return await litellm.acompletion(
|
return await litellm.acompletion(
|
||||||
model="gpt-3.5-turbo",
|
model="gpt-3.5-turbo",
|
||||||
messages=[{"role": "user", "content": "This is a test"}],
|
messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
|
||||||
max_tokens=10,
|
max_tokens=10,
|
||||||
temperature=0.7,
|
temperature=0.7,
|
||||||
user="ishaan-2",
|
user="ishaan-2",
|
||||||
|
@ -48,30 +54,18 @@ def test_s3_logging():
|
||||||
print(f"response: {response}")
|
print(f"response: {response}")
|
||||||
expected_keys.append(response.id)
|
expected_keys.append(response.id)
|
||||||
|
|
||||||
# # streaming + async
|
async def _test():
|
||||||
# async def _test2():
|
return await litellm.acompletion(
|
||||||
# response = await litellm.acompletion(
|
model="gpt-3.5-turbo",
|
||||||
# model="gpt-3.5-turbo",
|
messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
|
||||||
# messages=[{"role": "user", "content": "what llm are u"}],
|
max_tokens=10,
|
||||||
# max_tokens=10,
|
temperature=0.7,
|
||||||
# temperature=0.7,
|
user="ishaan-2",
|
||||||
# user="ishaan-2",
|
)
|
||||||
# stream=True,
|
|
||||||
# )
|
|
||||||
# async for chunk in response:
|
|
||||||
# pass
|
|
||||||
|
|
||||||
# asyncio.run(_test2())
|
response = asyncio.run(_test())
|
||||||
|
expected_keys.append(response.id)
|
||||||
# aembedding()
|
print(f"response: {response}")
|
||||||
# async def _test3():
|
|
||||||
# return await litellm.aembedding(
|
|
||||||
# model="text-embedding-ada-002", input=["hi"], user="ishaan-2"
|
|
||||||
# )
|
|
||||||
|
|
||||||
# response = asyncio.run(_test3())
|
|
||||||
# expected_keys.append(response.id)
|
|
||||||
# time.sleep(1)
|
|
||||||
|
|
||||||
import boto3
|
import boto3
|
||||||
|
|
||||||
|
@ -86,10 +80,33 @@ def test_s3_logging():
|
||||||
)
|
)
|
||||||
# Get the keys of the most recent objects
|
# Get the keys of the most recent objects
|
||||||
most_recent_keys = [obj["Key"] for obj in objects]
|
most_recent_keys = [obj["Key"] for obj in objects]
|
||||||
|
print(most_recent_keys)
|
||||||
|
# for each key, get the part before "-" as the key. Do it safely
|
||||||
|
cleaned_keys = []
|
||||||
|
for key in most_recent_keys:
|
||||||
|
split_key = key.split("-time=")
|
||||||
|
cleaned_keys.append(split_key[0])
|
||||||
print("\n most recent keys", most_recent_keys)
|
print("\n most recent keys", most_recent_keys)
|
||||||
|
print("\n cleaned keys", cleaned_keys)
|
||||||
print("\n Expected keys: ", expected_keys)
|
print("\n Expected keys: ", expected_keys)
|
||||||
|
matches = 0
|
||||||
for key in expected_keys:
|
for key in expected_keys:
|
||||||
assert key in most_recent_keys
|
assert key in cleaned_keys
|
||||||
|
|
||||||
|
if key in cleaned_keys:
|
||||||
|
matches += 1
|
||||||
|
# remove the match key
|
||||||
|
cleaned_keys.remove(key)
|
||||||
|
# this asserts we log, the first request + the 2nd cached request
|
||||||
|
print("we had two matches ! passed ", matches)
|
||||||
|
assert matches == 2
|
||||||
|
try:
|
||||||
|
# cleanup s3 bucket in test
|
||||||
|
for key in most_recent_keys:
|
||||||
|
s3.delete_object(Bucket=bucket_name, Key=key)
|
||||||
|
except:
|
||||||
|
# don't let cleanup fail a test
|
||||||
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"An exception occurred - {e}")
|
pytest.fail(f"An exception occurred - {e}")
|
||||||
finally:
|
finally:
|
||||||
|
|
|
@ -1975,6 +1975,8 @@ def client(original_function):
|
||||||
|
|
||||||
@wraps(original_function)
|
@wraps(original_function)
|
||||||
def wrapper(*args, **kwargs):
|
def wrapper(*args, **kwargs):
|
||||||
|
# Prints Exactly what was passed to litellm function - don't execute any logic here - it should just print
|
||||||
|
print_args_passed_to_litellm(original_function, args, kwargs)
|
||||||
start_time = datetime.datetime.now()
|
start_time = datetime.datetime.now()
|
||||||
result = None
|
result = None
|
||||||
logging_obj = kwargs.get("litellm_logging_obj", None)
|
logging_obj = kwargs.get("litellm_logging_obj", None)
|
||||||
|
@ -2175,6 +2177,7 @@ def client(original_function):
|
||||||
|
|
||||||
@wraps(original_function)
|
@wraps(original_function)
|
||||||
async def wrapper_async(*args, **kwargs):
|
async def wrapper_async(*args, **kwargs):
|
||||||
|
print_args_passed_to_litellm(original_function, args, kwargs)
|
||||||
start_time = datetime.datetime.now()
|
start_time = datetime.datetime.now()
|
||||||
result = None
|
result = None
|
||||||
logging_obj = kwargs.get("litellm_logging_obj", None)
|
logging_obj = kwargs.get("litellm_logging_obj", None)
|
||||||
|
@ -2991,7 +2994,7 @@ def cost_per_token(model="", prompt_tokens=0, completion_tokens=0):
|
||||||
response=httpx.Response(
|
response=httpx.Response(
|
||||||
status_code=404,
|
status_code=404,
|
||||||
content=error_str,
|
content=error_str,
|
||||||
request=httpx.request(method="cost_per_token", url="https://github.com/BerriAI/litellm"), # type: ignore
|
request=httpx.Request(method="cost_per_token", url="https://github.com/BerriAI/litellm"), # type: ignore
|
||||||
),
|
),
|
||||||
llm_provider="",
|
llm_provider="",
|
||||||
)
|
)
|
||||||
|
@ -4318,7 +4321,7 @@ def get_llm_provider(
|
||||||
response=httpx.Response(
|
response=httpx.Response(
|
||||||
status_code=400,
|
status_code=400,
|
||||||
content=error_str,
|
content=error_str,
|
||||||
request=httpx.request(method="completion", url="https://github.com/BerriAI/litellm"), # type: ignore
|
request=httpx.Request(method="completion", url="https://github.com/BerriAI/litellm"), # type: ignore
|
||||||
),
|
),
|
||||||
llm_provider="",
|
llm_provider="",
|
||||||
)
|
)
|
||||||
|
@ -4333,7 +4336,7 @@ def get_llm_provider(
|
||||||
response=httpx.Response(
|
response=httpx.Response(
|
||||||
status_code=400,
|
status_code=400,
|
||||||
content=error_str,
|
content=error_str,
|
||||||
request=httpx.request(method="completion", url="https://github.com/BerriAI/litellm"), # type: ignore
|
request=httpx.Request(method="completion", url="https://github.com/BerriAI/litellm"), # type: ignore
|
||||||
),
|
),
|
||||||
llm_provider="",
|
llm_provider="",
|
||||||
)
|
)
|
||||||
|
@ -8427,3 +8430,49 @@ def transform_logprobs(hf_response):
|
||||||
transformed_logprobs = token_info
|
transformed_logprobs = token_info
|
||||||
|
|
||||||
return transformed_logprobs
|
return transformed_logprobs
|
||||||
|
|
||||||
|
|
||||||
|
def print_args_passed_to_litellm(original_function, args, kwargs):
|
||||||
|
try:
|
||||||
|
# we've already printed this for acompletion, don't print for completion
|
||||||
|
if (
|
||||||
|
"acompletion" in kwargs
|
||||||
|
and kwargs["acompletion"] == True
|
||||||
|
and original_function.__name__ == "completion"
|
||||||
|
):
|
||||||
|
return
|
||||||
|
elif (
|
||||||
|
"aembedding" in kwargs
|
||||||
|
and kwargs["aembedding"] == True
|
||||||
|
and original_function.__name__ == "embedding"
|
||||||
|
):
|
||||||
|
return
|
||||||
|
elif (
|
||||||
|
"aimg_generation" in kwargs
|
||||||
|
and kwargs["aimg_generation"] == True
|
||||||
|
and original_function.__name__ == "img_generation"
|
||||||
|
):
|
||||||
|
return
|
||||||
|
|
||||||
|
args_str = ", ".join(map(repr, args))
|
||||||
|
kwargs_str = ", ".join(f"{key}={repr(value)}" for key, value in kwargs.items())
|
||||||
|
print_verbose("\n") # new line before
|
||||||
|
print_verbose("\033[92mRequest to litellm:\033[0m")
|
||||||
|
if args and kwargs:
|
||||||
|
print_verbose(
|
||||||
|
f"\033[92mlitellm.{original_function.__name__}({args_str}, {kwargs_str})\033[0m"
|
||||||
|
)
|
||||||
|
elif args:
|
||||||
|
print_verbose(
|
||||||
|
f"\033[92mlitellm.{original_function.__name__}({args_str})\033[0m"
|
||||||
|
)
|
||||||
|
elif kwargs:
|
||||||
|
print_verbose(
|
||||||
|
f"\033[92mlitellm.{original_function.__name__}({kwargs_str})\033[0m"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
print_verbose(f"\033[92mlitellm.{original_function.__name__}()\033[0m")
|
||||||
|
print_verbose("\n") # new line after
|
||||||
|
except:
|
||||||
|
# This should always be non blocking
|
||||||
|
pass
|
||||||
|
|
4
poetry.lock
generated
4
poetry.lock
generated
|
@ -1,4 +1,4 @@
|
||||||
# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
|
# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aiohttp"
|
name = "aiohttp"
|
||||||
|
@ -2689,4 +2689,4 @@ proxy = ["backoff", "fastapi", "gunicorn", "orjson", "pyyaml", "rq", "uvicorn"]
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.8.1,<3.9.7 || >3.9.7"
|
python-versions = ">=3.8.1,<3.9.7 || >3.9.7"
|
||||||
content-hash = "b49d09f51e8a57cdf883ab03cd9fecaf1ad007c3092d53347e30129e25adceab"
|
content-hash = "f4d60cb3f552af0d2a4e4ef5c6f55696fd6e546b75ff7b4ec362c3549a63c92a"
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.17.0"
|
version = "1.17.2"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT License"
|
license = "MIT License"
|
||||||
|
@ -16,6 +16,7 @@ tokenizers = "*"
|
||||||
click = "*"
|
click = "*"
|
||||||
jinja2 = "^3.1.2"
|
jinja2 = "^3.1.2"
|
||||||
aiohttp = "*"
|
aiohttp = "*"
|
||||||
|
requests = "^2.31.0"
|
||||||
|
|
||||||
uvicorn = {version = "^0.22.0", optional = true}
|
uvicorn = {version = "^0.22.0", optional = true}
|
||||||
gunicorn = {version = "^21.2.0", optional = true}
|
gunicorn = {version = "^21.2.0", optional = true}
|
||||||
|
@ -60,7 +61,7 @@ requires = ["poetry-core", "wheel"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "1.17.0"
|
version = "1.17.2"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue