From ce426f8b07f133bd3b817a41c13aeeef509d946b Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 11 Jan 2024 11:44:20 +0530 Subject: [PATCH 01/19] (fix) s3 log cache hits --- litellm/integrations/s3.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/litellm/integrations/s3.py b/litellm/integrations/s3.py index e7f607b41..db40ae832 100644 --- a/litellm/integrations/s3.py +++ b/litellm/integrations/s3.py @@ -118,7 +118,10 @@ class S3Logger: except: # non blocking if it can't cast to a str pass - s3_object_key = payload["id"] + + s3_object_key = ( + payload["id"] + "-time=" + str(start_time) + ) # we need the s3 key to include the time, so we log cache hits too import json From cc78e003bf9cf8f1677694c070c6f4119aabe067 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 11 Jan 2024 11:44:48 +0530 Subject: [PATCH 02/19] (test) s3 log cache hits --- litellm/tests/test_s3_logs.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/litellm/tests/test_s3_logs.py b/litellm/tests/test_s3_logs.py index 2a919d127..ee040928b 100644 --- a/litellm/tests/test_s3_logs.py +++ b/litellm/tests/test_s3_logs.py @@ -20,8 +20,10 @@ def test_s3_logging(): # since we are modifying stdout, and pytests runs tests in parallel # on circle ci - we only test litellm.acompletion() try: - # pre # redirect stdout to log_file + litellm.cache = litellm.Cache( + type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2" + ) litellm.success_callback = ["s3"] litellm.s3_callback_params = { @@ -35,10 +37,14 @@ def test_s3_logging(): expected_keys = [] + import time + + curr_time = str(time.time()) + async def _test(): return await litellm.acompletion( model="gpt-3.5-turbo", - messages=[{"role": "user", "content": "This is a test"}], + messages=[{"role": "user", "content": f"This is a test {curr_time}"}], max_tokens=10, temperature=0.7, user="ishaan-2", @@ -48,6 +54,19 @@ def test_s3_logging(): print(f"response: {response}") expected_keys.append(response.id) + async def _test(): + return await litellm.acompletion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": f"This is a test {curr_time}"}], + max_tokens=10, + temperature=0.7, + user="ishaan-2", + ) + + response = asyncio.run(_test()) + expected_keys.append(response.id) + print(f"response: {response}") + # # streaming + async # async def _test2(): # response = await litellm.acompletion( @@ -86,10 +105,17 @@ def test_s3_logging(): ) # Get the keys of the most recent objects most_recent_keys = [obj["Key"] for obj in objects] + print(most_recent_keys) + # for each key, get the part before "-" as the key. Do it safely + cleaned_keys = [] + for key in most_recent_keys: + split_key = key.split("-time=") + cleaned_keys.append(split_key[0]) print("\n most recent keys", most_recent_keys) + print("\n cleaned keys", cleaned_keys) print("\n Expected keys: ", expected_keys) for key in expected_keys: - assert key in most_recent_keys + assert key in cleaned_keys except Exception as e: pytest.fail(f"An exception occurred - {e}") finally: From 40c740089474b74baf8bdb68a31e4aa3dc00753a Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 11 Jan 2024 12:51:29 +0530 Subject: [PATCH 03/19] fix(router.py): bump httpx pool limits --- litellm/router.py | 54 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index f63555509..d5b42343c 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -1363,6 +1363,12 @@ class Router: api_version=api_version, timeout=timeout, max_retries=max_retries, + http_client=httpx.AsyncClient( + transport=AsyncCustomHTTPTransport(), + limits=httpx.Limits( + max_connections=1000, max_keepalive_connections=100 + ), + ), # type: ignore ) self.cache.set_cache( key=cache_key, @@ -1378,6 +1384,12 @@ class Router: api_version=api_version, timeout=timeout, max_retries=max_retries, + http_client=httpx.Client( + transport=CustomHTTPTransport(), + limits=httpx.Limits( + max_connections=1000, max_keepalive_connections=100 + ), + ), # type: ignore ) self.cache.set_cache( key=cache_key, @@ -1393,6 +1405,12 @@ class Router: api_version=api_version, timeout=stream_timeout, max_retries=max_retries, + http_client=httpx.AsyncClient( + transport=AsyncCustomHTTPTransport(), + limits=httpx.Limits( + max_connections=1000, max_keepalive_connections=100 + ), + ), # type: ignore ) self.cache.set_cache( key=cache_key, @@ -1408,6 +1426,12 @@ class Router: api_version=api_version, timeout=stream_timeout, max_retries=max_retries, + http_client=httpx.Client( + transport=CustomHTTPTransport(), + limits=httpx.Limits( + max_connections=1000, max_keepalive_connections=100 + ), + ), # type: ignore ) self.cache.set_cache( key=cache_key, @@ -1471,9 +1495,10 @@ class Router: timeout=stream_timeout, max_retries=max_retries, http_client=httpx.AsyncClient( + transport=AsyncCustomHTTPTransport(), limits=httpx.Limits( max_connections=1000, max_keepalive_connections=100 - ) + ), ), ) self.cache.set_cache( @@ -1491,9 +1516,10 @@ class Router: timeout=stream_timeout, max_retries=max_retries, http_client=httpx.Client( + transport=CustomHTTPTransport(), limits=httpx.Limits( max_connections=1000, max_keepalive_connections=100 - ) + ), ), ) self.cache.set_cache( @@ -1513,6 +1539,12 @@ class Router: base_url=api_base, timeout=timeout, max_retries=max_retries, + http_client=httpx.AsyncClient( + transport=AsyncCustomHTTPTransport(), + limits=httpx.Limits( + max_connections=1000, max_keepalive_connections=100 + ), + ), # type: ignore ) self.cache.set_cache( key=cache_key, @@ -1527,6 +1559,12 @@ class Router: base_url=api_base, timeout=timeout, max_retries=max_retries, + http_client=httpx.Client( + transport=CustomHTTPTransport(), + limits=httpx.Limits( + max_connections=1000, max_keepalive_connections=100 + ), + ), # type: ignore ) self.cache.set_cache( key=cache_key, @@ -1542,6 +1580,12 @@ class Router: base_url=api_base, timeout=stream_timeout, max_retries=max_retries, + http_client=httpx.AsyncClient( + transport=AsyncCustomHTTPTransport(), + limits=httpx.Limits( + max_connections=1000, max_keepalive_connections=100 + ), + ), # type: ignore ) self.cache.set_cache( key=cache_key, @@ -1557,6 +1601,12 @@ class Router: base_url=api_base, timeout=stream_timeout, max_retries=max_retries, + http_client=httpx.Client( + transport=CustomHTTPTransport(), + limits=httpx.Limits( + max_connections=1000, max_keepalive_connections=100 + ), + ), # type: ignore ) self.cache.set_cache( key=cache_key, From f89385eed8dbf63242aacbd5ed936d4dbe84d1b7 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 11 Jan 2024 14:22:37 +0530 Subject: [PATCH 04/19] (fix) acompletion kwargs type hints --- litellm/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/litellm/main.py b/litellm/main.py index 2b53c3a5f..8342ab4d5 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -202,6 +202,7 @@ async def acompletion( - If `stream` is True, the function returns an async generator that yields completion lines. """ loop = asyncio.get_event_loop() + custom_llm_provider = None # Adjusted to use explicit arguments instead of *args and **kwargs completion_kwargs = { "model": model, @@ -241,7 +242,7 @@ async def acompletion( func_with_context = partial(ctx.run, func) _, custom_llm_provider, _, _ = get_llm_provider( - model=model, api_base=completion_kwargs.get("base_url", None) + model=model, api_base=kwargs.get("api_base", None) ) if ( From 4a1541c4859ce4eac0ec3ca0686f5d6db667f6bd Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 11 Jan 2024 14:39:08 +0530 Subject: [PATCH 05/19] (fix) retry gemini-pro-vision 3 times --- litellm/tests/test_google_ai_studio_gemini.py | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/litellm/tests/test_google_ai_studio_gemini.py b/litellm/tests/test_google_ai_studio_gemini.py index 7cebd2537..5012717d3 100644 --- a/litellm/tests/test_google_ai_studio_gemini.py +++ b/litellm/tests/test_google_ai_studio_gemini.py @@ -6,29 +6,34 @@ sys.path.insert( import litellm from dotenv import load_dotenv + def generate_text(): try: + litellm.set_verbose = True messages = [ { "role": "user", "content": [ - { - "type": "text", - "text": "What is this image?" - }, + {"type": "text", "text": "What is this image?"}, { "type": "image_url", "image_url": { "url": "https://avatars.githubusercontent.com/u/17561003?v=4" - } - } - ] + }, + }, + ], } ] - response = litellm.completion(model="gemini/gemini-pro-vision", messages=messages, stop="Hello world") + response = litellm.completion( + model="gemini/gemini-pro-vision", + messages=messages, + stop="Hello world", + num_retries=3, + ) print(response) assert isinstance(response.choices[0].message.content, str) == True except Exception as exception: raise Exception("An error occurred during text generation:", exception) -generate_text() + +# generate_text() From c46a3709192e4b9e364df3c06a1e0cd6f6b4243f Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 11 Jan 2024 15:37:03 +0530 Subject: [PATCH 06/19] (docs) logging proxy input / output --- docs/my-website/docs/proxy/logging.md | 236 +++++++------------------- 1 file changed, 61 insertions(+), 175 deletions(-) diff --git a/docs/my-website/docs/proxy/logging.md b/docs/my-website/docs/proxy/logging.md index 5aa78f73d..8bf0fcee2 100644 --- a/docs/my-website/docs/proxy/logging.md +++ b/docs/my-website/docs/proxy/logging.md @@ -7,10 +7,17 @@ import TabItem from '@theme/TabItem'; Log Proxy Input, Output, Exceptions using Custom Callbacks, Langfuse, OpenTelemetry, LangFuse, DynamoDB, s3 Bucket +- [Async Custom Callbacks](#custom-callback-class-async) +- [Logging to Langfuse](#logging-proxy-inputoutput---langfuse) +- [Logging to s3 Buckets](#logging-proxy-inputoutput---s3-buckets) +- [Logging to DynamoDB](#logging-proxy-inputoutput---dynamodb) +- [Logging to Sentry](#logging-proxy-inputoutput---sentry) +- [Logging to Traceloop (OpenTelemetry)](#opentelemetry---traceloop) + ## Custom Callback Class [Async] Use this when you want to run custom callbacks in `python` -### Step 1 - Create your custom `litellm` callback class +#### Step 1 - Create your custom `litellm` callback class We use `litellm.integrations.custom_logger` for this, **more details about litellm custom callbacks [here](https://docs.litellm.ai/docs/observability/custom_callback)** Define your custom callback class in a python file. @@ -112,7 +119,7 @@ proxy_handler_instance = MyCustomHandler() # need to set litellm.callbacks = [proxy_handler_instance] # on the proxy ``` -### Step 2 - Pass your custom callback class in `config.yaml` +#### Step 2 - Pass your custom callback class in `config.yaml` We pass the custom callback class defined in **Step1** to the config.yaml. Set `callbacks` to `python_filename.logger_instance_name` @@ -134,7 +141,7 @@ litellm_settings: ``` -### Step 3 - Start proxy + test request +#### Step 3 - Start proxy + test request ```shell litellm --config proxy_config.yaml ``` @@ -167,7 +174,7 @@ On Success Proxy Metadata: {'user_api_key': None, 'headers': Headers({'host': '0.0.0.0:8000', 'user-agent': 'curl/7.88.1', 'accept': '*/*', 'authorization': 'Bearer sk-1234', 'content-length': '199', 'content-type': 'application/x-www-form-urlencoded'}), 'model_group': 'gpt-3.5-turbo', 'deployment': 'gpt-3.5-turbo-ModelID-gpt-3.5-turbo'} ``` -### Logging Proxy Request Object, Header, Url +#### Logging Proxy Request Object, Header, Url Here's how you can access the `url`, `headers`, `request body` sent to the proxy for each request @@ -211,7 +218,7 @@ class MyCustomHandler(CustomLogger): ``` -### Logging `model_info` set in config.yaml +#### Logging `model_info` set in config.yaml Here is how to log the `model_info` set in your proxy `config.yaml`. Information on setting `model_info` on [config.yaml](https://docs.litellm.ai/docs/proxy/configs) @@ -428,176 +435,6 @@ print(response) - -## OpenTelemetry - Traceloop - -Traceloop allows you to log LLM Input/Output in the OpenTelemetry format - -We will use the `--config` to set `litellm.success_callback = ["traceloop"]` this will log all successfull LLM calls to traceloop - -**Step 1** Install traceloop-sdk and set Traceloop API key - -```shell -pip install traceloop-sdk -U -``` - -Traceloop outputs standard OpenTelemetry data that can be connected to your observability stack. Send standard OpenTelemetry from LiteLLM Proxy to [Traceloop](https://www.traceloop.com/docs/openllmetry/integrations/traceloop), [Dynatrace](https://www.traceloop.com/docs/openllmetry/integrations/dynatrace), [Datadog](https://www.traceloop.com/docs/openllmetry/integrations/datadog) -, [New Relic](https://www.traceloop.com/docs/openllmetry/integrations/newrelic), [Honeycomb](https://www.traceloop.com/docs/openllmetry/integrations/honeycomb), [Grafana Tempo](https://www.traceloop.com/docs/openllmetry/integrations/grafana), [Splunk](https://www.traceloop.com/docs/openllmetry/integrations/splunk), [OpenTelemetry Collector](https://www.traceloop.com/docs/openllmetry/integrations/otel-collector) - -**Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback` -```yaml -model_list: - - model_name: gpt-3.5-turbo - litellm_params: - model: gpt-3.5-turbo -litellm_settings: - success_callback: ["traceloop"] -``` - -**Step 3**: Start the proxy, make a test request - -Start proxy -```shell -litellm --config config.yaml --debug -``` - -Test Request -``` -curl --location 'http://0.0.0.0:8000/chat/completions' \ - --header 'Content-Type: application/json' \ - --data ' { - "model": "gpt-3.5-turbo", - "messages": [ - { - "role": "user", - "content": "what llm are you" - } - ] - }' -``` - - - - - - ## Logging Proxy Input/Output - s3 Buckets We will use the `--config` to set @@ -815,3 +652,52 @@ Test Request ``` litellm --test ``` + +## Logging Proxy Input/Output Traceloop (OpenTelemetry) + +Traceloop allows you to log LLM Input/Output in the OpenTelemetry format + +We will use the `--config` to set `litellm.success_callback = ["traceloop"]` this will log all successfull LLM calls to traceloop + +**Step 1** Install traceloop-sdk and set Traceloop API key + +```shell +pip install traceloop-sdk -U +``` + +Traceloop outputs standard OpenTelemetry data that can be connected to your observability stack. Send standard OpenTelemetry from LiteLLM Proxy to [Traceloop](https://www.traceloop.com/docs/openllmetry/integrations/traceloop), [Dynatrace](https://www.traceloop.com/docs/openllmetry/integrations/dynatrace), [Datadog](https://www.traceloop.com/docs/openllmetry/integrations/datadog) +, [New Relic](https://www.traceloop.com/docs/openllmetry/integrations/newrelic), [Honeycomb](https://www.traceloop.com/docs/openllmetry/integrations/honeycomb), [Grafana Tempo](https://www.traceloop.com/docs/openllmetry/integrations/grafana), [Splunk](https://www.traceloop.com/docs/openllmetry/integrations/splunk), [OpenTelemetry Collector](https://www.traceloop.com/docs/openllmetry/integrations/otel-collector) + +**Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback` +```yaml +model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: gpt-3.5-turbo +litellm_settings: + success_callback: ["traceloop"] +``` + +**Step 3**: Start the proxy, make a test request + +Start proxy +```shell +litellm --config config.yaml --debug +``` + +Test Request +``` +curl --location 'http://0.0.0.0:8000/chat/completions' \ + --header 'Content-Type: application/json' \ + --data ' { + "model": "gpt-3.5-turbo", + "messages": [ + { + "role": "user", + "content": "what llm are you" + } + ] + }' +``` + + From 1d9dad4af4a16535bb0b0aa242e79c9e6232b2d6 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 11 Jan 2024 15:57:54 +0530 Subject: [PATCH 07/19] (feat) s3 logging - log cache hits --- litellm/integrations/s3.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/litellm/integrations/s3.py b/litellm/integrations/s3.py index db40ae832..0187d13d6 100644 --- a/litellm/integrations/s3.py +++ b/litellm/integrations/s3.py @@ -93,6 +93,7 @@ class S3Logger: messages = kwargs.get("messages") optional_params = kwargs.get("optional_params", {}) call_type = kwargs.get("call_type", "litellm.completion") + cache_hit = kwargs.get("cache_hit", False) usage = response_obj["usage"] id = response_obj.get("id", str(uuid.uuid4())) @@ -100,6 +101,7 @@ class S3Logger: payload = { "id": id, "call_type": call_type, + "cache_hit": cache_hit, "startTime": start_time, "endTime": end_time, "model": kwargs.get("model", ""), From bb8eac0597fb20f3d370678ab188f4f5b98cdb54 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 11 Jan 2024 16:57:51 +0530 Subject: [PATCH 08/19] (test) improve s3 logging test --- litellm/tests/test_s3_logs.py | 41 ++++++++++++++--------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/litellm/tests/test_s3_logs.py b/litellm/tests/test_s3_logs.py index ee040928b..a5347fb08 100644 --- a/litellm/tests/test_s3_logs.py +++ b/litellm/tests/test_s3_logs.py @@ -67,31 +67,6 @@ def test_s3_logging(): expected_keys.append(response.id) print(f"response: {response}") - # # streaming + async - # async def _test2(): - # response = await litellm.acompletion( - # model="gpt-3.5-turbo", - # messages=[{"role": "user", "content": "what llm are u"}], - # max_tokens=10, - # temperature=0.7, - # user="ishaan-2", - # stream=True, - # ) - # async for chunk in response: - # pass - - # asyncio.run(_test2()) - - # aembedding() - # async def _test3(): - # return await litellm.aembedding( - # model="text-embedding-ada-002", input=["hi"], user="ishaan-2" - # ) - - # response = asyncio.run(_test3()) - # expected_keys.append(response.id) - # time.sleep(1) - import boto3 s3 = boto3.client("s3") @@ -114,8 +89,24 @@ def test_s3_logging(): print("\n most recent keys", most_recent_keys) print("\n cleaned keys", cleaned_keys) print("\n Expected keys: ", expected_keys) + matches = 0 for key in expected_keys: assert key in cleaned_keys + + if key in cleaned_keys: + matches += 1 + # remove the match key + cleaned_keys.remove(key) + # this asserts we log, the first request + the 2nd cached request + print("we had two matches ! passed ", matches) + assert matches == 2 + try: + # cleanup s3 bucket in test + for key in most_recent_keys: + s3.delete_object(Bucket=bucket_name, Key=key) + except: + # don't let cleanup fail a test + pass except Exception as e: pytest.fail(f"An exception occurred - {e}") finally: From 1e80c1fd005acdf8af74826f0e25c6eef0c677d8 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 11 Jan 2024 17:17:16 +0530 Subject: [PATCH 09/19] =?UTF-8?q?bump:=20version=201.17.0=20=E2=86=92=201.?= =?UTF-8?q?17.1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index af738d509..ab7b630db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.17.0" +version = "1.17.1" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License" @@ -60,7 +60,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.17.0" +version = "1.17.1" version_files = [ "pyproject.toml:^version" ] From f297a4d174f824ced9a1de5c4348e554d2399c11 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 11 Jan 2024 17:56:27 +0530 Subject: [PATCH 10/19] (feat) show args passed to litellm.completion, acompletion on call --- litellm/utils.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/litellm/utils.py b/litellm/utils.py index fcf6e9dea..8f97ace43 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1975,6 +1975,8 @@ def client(original_function): @wraps(original_function) def wrapper(*args, **kwargs): + # Prints Exactly what was passed to litellm function - don't execute any logic here - it should just print + print_args_passed_to_litellm(original_function, args, kwargs) start_time = datetime.datetime.now() result = None logging_obj = kwargs.get("litellm_logging_obj", None) @@ -2170,6 +2172,7 @@ def client(original_function): @wraps(original_function) async def wrapper_async(*args, **kwargs): + print_args_passed_to_litellm(original_function, args, kwargs) start_time = datetime.datetime.now() result = None logging_obj = kwargs.get("litellm_logging_obj", None) @@ -8275,3 +8278,29 @@ def transform_logprobs(hf_response): transformed_logprobs = token_info return transformed_logprobs + + +def print_args_passed_to_litellm(original_function, args, kwargs): + try: + args_str = ", ".join(map(repr, args)) + kwargs_str = ", ".join(f"{key}={repr(value)}" for key, value in kwargs.items()) + print_verbose("\n") # new line before + print_verbose("\033[92mRequest to litellm:\033[0m") + if args and kwargs: + print_verbose( + f"\033[92mlitellm.{original_function.__name__}({args_str}, {kwargs_str})\033[0m" + ) + elif args: + print_verbose( + f"\033[92mlitellm.{original_function.__name__}({args_str})\033[0m" + ) + elif kwargs: + print_verbose( + f"\033[92mlitellm.{original_function.__name__}({kwargs_str})\033[0m" + ) + else: + print_verbose(f"\033[92mlitellm.{original_function.__name__}()\033[0m") + print_verbose("\n") # new line after + except: + # This should always be non blocking + pass From 1fb3547e48f16d95fc52e45dbab87cbb3b959a23 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 11 Jan 2024 18:13:08 +0530 Subject: [PATCH 11/19] (feat) improve litellm verbose logs --- litellm/utils.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/litellm/utils.py b/litellm/utils.py index 8f97ace43..8af329195 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -8282,6 +8282,26 @@ def transform_logprobs(hf_response): def print_args_passed_to_litellm(original_function, args, kwargs): try: + # we've already printed this for acompletion, don't print for completion + if ( + "acompletion" in kwargs + and kwargs["acompletion"] == True + and original_function.__name__ == "completion" + ): + return + elif ( + "aembedding" in kwargs + and kwargs["aembedding"] == True + and original_function.__name__ == "embedding" + ): + return + elif ( + "aimg_generation" in kwargs + and kwargs["aimg_generation"] == True + and original_function.__name__ == "img_generation" + ): + return + args_str = ", ".join(map(repr, args)) kwargs_str = ", ".join(f"{key}={repr(value)}" for key, value in kwargs.items()) print_verbose("\n") # new line before From 43533812a7a3872b79b1183d2db259441b6494a7 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 11 Jan 2024 19:19:29 +0530 Subject: [PATCH 12/19] fix(proxy_cli.py): read db url from config, not just environment --- litellm/main.py | 4 ++-- litellm/proxy/proxy_cli.py | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/litellm/main.py b/litellm/main.py index 8342ab4d5..70264b312 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -268,10 +268,10 @@ async def acompletion( elif asyncio.iscoroutine(init_response): response = await init_response else: - response = init_response + response = init_response # type: ignore else: # Call the synchronous function using run_in_executor - response = await loop.run_in_executor(None, func_with_context) + response = await loop.run_in_executor(None, func_with_context) # type: ignore # if kwargs.get("stream", False): # return an async generator # return _async_streaming( # response=response, diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index ade65ec96..c06ba7d32 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -6,7 +6,6 @@ from datetime import datetime import importlib from dotenv import load_dotenv - sys.path.append(os.getcwd()) config_filename = "litellm.secrets" @@ -349,6 +348,38 @@ def run_server( raise ImportError( "Uvicorn, gunicorn needs to be imported. Run - `pip 'litellm[proxy]'`" ) + + if config is not None: + """ + Allow user to pass in db url via config + + read from there and save it to os.env['DATABASE_URL'] + """ + try: + import yaml + except: + raise ImportError( + "yaml needs to be imported. Run - `pip install 'litellm[proxy]'`" + ) + + if os.path.exists(config): + with open(config, "r") as config_file: + config = yaml.safe_load(config_file) + general_settings = config.get("general_settings", {}) + database_url = general_settings.get("database_url", None) + if database_url and database_url.startswith("os.environ/"): + original_dir = os.getcwd() + # set the working directory to where this script is + sys.path.insert( + 0, os.path.abspath("../..") + ) # Adds the parent directory to the system path - for litellm local dev + import litellm + + database_url = litellm.get_secret(database_url) + os.chdir(original_dir) + if database_url is not None and isinstance(database_url, str): + os.environ["DATABASE_URL"] = database_url + if os.getenv("DATABASE_URL", None) is not None: # run prisma db push, before starting server # Save the current working directory From d14099f9b48ab6fbde1f67b2914b5cf4aeb4979d Mon Sep 17 00:00:00 2001 From: David Leen Date: Thu, 11 Jan 2024 16:20:50 +0100 Subject: [PATCH 13/19] Add explicit dependency on requests library --- poetry.lock | 4 ++-- pyproject.toml | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index 24673701a..496815f9a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. [[package]] name = "aiohttp" @@ -2689,4 +2689,4 @@ proxy = ["backoff", "fastapi", "gunicorn", "orjson", "pyyaml", "rq", "uvicorn"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.9.7 || >3.9.7" -content-hash = "b49d09f51e8a57cdf883ab03cd9fecaf1ad007c3092d53347e30129e25adceab" +content-hash = "f4d60cb3f552af0d2a4e4ef5c6f55696fd6e546b75ff7b4ec362c3549a63c92a" diff --git a/pyproject.toml b/pyproject.toml index ab7b630db..08b66e27e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ tokenizers = "*" click = "*" jinja2 = "^3.1.2" aiohttp = "*" +requests = "^2.31.0" uvicorn = {version = "^0.22.0", optional = true} gunicorn = {version = "^21.2.0", optional = true} From 6b87c13b9d5c904c9c66d9fce87fbd835b566ab0 Mon Sep 17 00:00:00 2001 From: David Leen Date: Thu, 11 Jan 2024 16:22:26 +0100 Subject: [PATCH 14/19] (fix) create httpx.Request instead of httpx.request fixes #1420 --- litellm/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/litellm/utils.py b/litellm/utils.py index fcf6e9dea..420147629 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -2844,7 +2844,7 @@ def cost_per_token(model="", prompt_tokens=0, completion_tokens=0): response=httpx.Response( status_code=404, content=error_str, - request=httpx.request(method="cost_per_token", url="https://github.com/BerriAI/litellm"), # type: ignore + request=httpx.Request(method="cost_per_token", url="https://github.com/BerriAI/litellm"), # type: ignore ), llm_provider="", ) @@ -4171,7 +4171,7 @@ def get_llm_provider( response=httpx.Response( status_code=400, content=error_str, - request=httpx.request(method="completion", url="https://github.com/BerriAI/litellm"), # type: ignore + request=httpx.Request(method="completion", url="https://github.com/BerriAI/litellm"), # type: ignore ), llm_provider="", ) @@ -4186,7 +4186,7 @@ def get_llm_provider( response=httpx.Response( status_code=400, content=error_str, - request=httpx.request(method="completion", url="https://github.com/BerriAI/litellm"), # type: ignore + request=httpx.Request(method="completion", url="https://github.com/BerriAI/litellm"), # type: ignore ), llm_provider="", ) From a876748bf57f045061e547c965e4a32c983502ad Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 11 Jan 2024 22:56:18 +0530 Subject: [PATCH 15/19] v0 --- litellm/llms/bedrock.py | 4 ++-- litellm/tests/test_embedding.py | 7 ++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/litellm/llms/bedrock.py b/litellm/llms/bedrock.py index 99550280a..dd9c86200 100644 --- a/litellm/llms/bedrock.py +++ b/litellm/llms/bedrock.py @@ -2,7 +2,7 @@ import json, copy, types import os from enum import Enum import time -from typing import Callable, Optional, Any +from typing import Callable, Optional, Any, Union import litellm from litellm.utils import ModelResponse, get_secret, Usage from .prompt_templates.factory import prompt_factory, custom_prompt @@ -714,7 +714,7 @@ def _embedding_func_single( def embedding( model: str, - input: list, + input: Union[list, str], api_key: Optional[str] = None, logging_obj=None, model_response=None, diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py index 954a53e2a..50dd6ee06 100644 --- a/litellm/tests/test_embedding.py +++ b/litellm/tests/test_embedding.py @@ -189,10 +189,7 @@ def test_bedrock_embedding_titan(): litellm.set_verbose = True response = embedding( model="amazon.titan-embed-text-v1", - input=[ - "good morning from litellm, attempting to embed data", - "lets test a second string for good measure", - ], + input="good morning from litellm, attempting to embed data", ) print(f"response:", response) assert isinstance( @@ -206,7 +203,7 @@ def test_bedrock_embedding_titan(): pytest.fail(f"Error occurred: {e}") -# test_bedrock_embedding_titan() +test_bedrock_embedding_titan() def test_bedrock_embedding_cohere(): From a9d812eb8da1bdbc31f7082bc470df56a76d5142 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 11 Jan 2024 23:02:12 +0530 Subject: [PATCH 16/19] (fix) bedrock - embedding - support str input --- litellm/llms/bedrock.py | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/litellm/llms/bedrock.py b/litellm/llms/bedrock.py index dd9c86200..617964a74 100644 --- a/litellm/llms/bedrock.py +++ b/litellm/llms/bedrock.py @@ -737,18 +737,28 @@ def embedding( aws_region_name=aws_region_name, aws_bedrock_runtime_endpoint=aws_bedrock_runtime_endpoint, ) - - ## Embedding Call - embeddings = [ - _embedding_func_single( - model, - i, - optional_params=optional_params, - client=client, - logging_obj=logging_obj, - ) - for i in input - ] # [TODO]: make these parallel calls + if type(input) == str: + embeddings = [ + _embedding_func_single( + model, + input, + optional_params=optional_params, + client=client, + logging_obj=logging_obj, + ) + ] + else: + ## Embedding Call + embeddings = [ + _embedding_func_single( + model, + i, + optional_params=optional_params, + client=client, + logging_obj=logging_obj, + ) + for i in input + ] # [TODO]: make these parallel calls ## Populate OpenAI compliant dictionary embedding_response = [] From 276d11946ee760e8c2f30e408c12ef6fe1462dcb Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 11 Jan 2024 23:04:41 +0530 Subject: [PATCH 17/19] (test) bedrock - embedding with strings --- litellm/tests/test_embedding.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py index 50dd6ee06..28cafe02c 100644 --- a/litellm/tests/test_embedding.py +++ b/litellm/tests/test_embedding.py @@ -186,10 +186,12 @@ def test_cohere_embedding3(): def test_bedrock_embedding_titan(): try: - litellm.set_verbose = True + # this tests if we support str input for bedrock embedding + litellm.set_verbose = False + # DO NOT MAKE THE INPUT A LIST in this test response = embedding( model="amazon.titan-embed-text-v1", - input="good morning from litellm, attempting to embed data", + input="good morning from litellm, attempting to embed data", # input should always be a string in this test ) print(f"response:", response) assert isinstance( @@ -199,6 +201,8 @@ def test_bedrock_embedding_titan(): assert all( isinstance(x, float) for x in response["data"][0]["embedding"] ), "Expected response to be a list of floats" + print("Response Usage", response.usage) + assert response.usage.prompt_tokens == 11 except Exception as e: pytest.fail(f"Error occurred: {e}") @@ -277,7 +281,7 @@ def test_aembedding(): pytest.fail(f"Error occurred: {e}") -test_aembedding() +# test_aembedding() def test_aembedding_azure(): From b7567865deb00ae68a3e951b258a53eabb513870 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 11 Jan 2024 23:12:57 +0530 Subject: [PATCH 18/19] (test) caching for bedrock/embedding str inputs --- litellm/tests/test_embedding.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py index 28cafe02c..6505d432d 100644 --- a/litellm/tests/test_embedding.py +++ b/litellm/tests/test_embedding.py @@ -187,11 +187,15 @@ def test_cohere_embedding3(): def test_bedrock_embedding_titan(): try: # this tests if we support str input for bedrock embedding - litellm.set_verbose = False + litellm.set_verbose = True + litellm.enable_cache() + import time + + current_time = str(time.time()) # DO NOT MAKE THE INPUT A LIST in this test response = embedding( - model="amazon.titan-embed-text-v1", - input="good morning from litellm, attempting to embed data", # input should always be a string in this test + model="bedrock/amazon.titan-embed-text-v1", + input=f"good morning from litellm, attempting to embed data {current_time}", # input should always be a string in this test ) print(f"response:", response) assert isinstance( @@ -201,8 +205,23 @@ def test_bedrock_embedding_titan(): assert all( isinstance(x, float) for x in response["data"][0]["embedding"] ), "Expected response to be a list of floats" - print("Response Usage", response.usage) - assert response.usage.prompt_tokens == 11 + + # this also tests if we can return a cache response for this scenario + import time + + start_time = time.time() + + response = embedding( + model="bedrock/amazon.titan-embed-text-v1", + input=f"good morning from litellm, attempting to embed data {current_time}", # input should always be a string in this test + ) + print(response) + + end_time = time.time() + print(f"Embedding 2 response time: {end_time - start_time} seconds") + + assert end_time - start_time < 0.1 + litellm.disable_cache() except Exception as e: pytest.fail(f"Error occurred: {e}") From 0a762695419e2187f9f82318c9e3a5194f65ce42 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 11 Jan 2024 23:25:02 +0530 Subject: [PATCH 19/19] =?UTF-8?q?bump:=20version=201.17.1=20=E2=86=92=201.?= =?UTF-8?q?17.2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 08b66e27e..f6a305ca2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.17.1" +version = "1.17.2" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License" @@ -61,7 +61,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.17.1" +version = "1.17.2" version_files = [ "pyproject.toml:^version" ]