diff --git a/docs/my-website/docs/providers/openai.md b/docs/my-website/docs/providers/openai.md index f6f987590..a8fe541fc 100644 --- a/docs/my-website/docs/providers/openai.md +++ b/docs/my-website/docs/providers/openai.md @@ -163,6 +163,7 @@ os.environ["OPENAI_API_BASE"] = "openaiai-api-base" # OPTIONAL | Model Name | Function Call | |-----------------------|-----------------------------------------------------------------| +| gpt-4-turbo | `response = completion(model="gpt-4-turbo", messages=messages)` | | gpt-4-turbo-preview | `response = completion(model="gpt-4-0125-preview", messages=messages)` | | gpt-4-0125-preview | `response = completion(model="gpt-4-0125-preview", messages=messages)` | | gpt-4-1106-preview | `response = completion(model="gpt-4-1106-preview", messages=messages)` | @@ -185,6 +186,7 @@ These also support the `OPENAI_API_BASE` environment variable, which can be used ## OpenAI Vision Models | Model Name | Function Call | |-----------------------|-----------------------------------------------------------------| +| gpt-4-turbo | `response = completion(model="gpt-4-turbo", messages=messages)` | | gpt-4-vision-preview | `response = completion(model="gpt-4-vision-preview", messages=messages)` | #### Usage diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index d5f10262f..6fb12539f 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -25,21 +25,21 @@ class PrometheusLogger: self.litellm_requests_metric = Counter( name="litellm_requests_metric", documentation="Total number of LLM calls to litellm", - labelnames=["end_user", "key", "model", "team"], + labelnames=["end_user", "hashed_api_key", "model", "team"], ) # Counter for spend self.litellm_spend_metric = Counter( "litellm_spend_metric", "Total spend on LLM requests", - labelnames=["end_user", "key", "model", "team"], + labelnames=["end_user", "hashed_api_key", "model", "team"], ) # Counter for total_output_tokens self.litellm_tokens_metric = Counter( "litellm_total_tokens", "Total number of input + output tokens from LLM requests", - labelnames=["end_user", "key", "model", "team"], + labelnames=["end_user", "hashed_api_key", "model", "team"], ) except Exception as e: print_verbose(f"Got exception on init prometheus client {str(e)}") @@ -75,6 +75,15 @@ class PrometheusLogger: f"inside track_prometheus_metrics, model {model}, response_cost {response_cost}, tokens_used {tokens_used}, end_user_id {end_user_id}, user_api_key {user_api_key}" ) + if ( + user_api_key is not None + and isinstance(user_api_key, str) + and user_api_key.startswith("sk-") + ): + from litellm.proxy.utils import hash_token + + user_api_key = hash_token(user_api_key) + self.litellm_requests_metric.labels( end_user_id, user_api_key, model, user_api_team ).inc() diff --git a/litellm/llms/vertex_ai.py b/litellm/llms/vertex_ai.py index 176902e1a..69feef63c 100644 --- a/litellm/llms/vertex_ai.py +++ b/litellm/llms/vertex_ai.py @@ -349,8 +349,17 @@ def completion( print_verbose( f"VERTEX AI: vertex_project={vertex_project}; vertex_location={vertex_location}" ) + if vertex_credentials is not None and isinstance(vertex_credentials, str): + import google.oauth2.service_account - creds, _ = google.auth.default(quota_project_id=vertex_project) + json_obj = json.loads(vertex_credentials) + + creds = google.oauth2.service_account.Credentials.from_service_account_info( + json_obj, + scopes=["https://www.googleapis.com/auth/cloud-platform"], + ) + else: + creds, _ = google.auth.default(quota_project_id=vertex_project) print_verbose( f"VERTEX AI: creds={creds}; google application credentials: {os.getenv('GOOGLE_APPLICATION_CREDENTIALS')}" ) @@ -1171,6 +1180,7 @@ def embedding( encoding=None, vertex_project=None, vertex_location=None, + vertex_credentials=None, aembedding=False, print_verbose=None, ): @@ -1191,7 +1201,17 @@ def embedding( print_verbose( f"VERTEX AI: vertex_project={vertex_project}; vertex_location={vertex_location}" ) - creds, _ = google.auth.default(quota_project_id=vertex_project) + if vertex_credentials is not None and isinstance(vertex_credentials, str): + import google.oauth2.service_account + + json_obj = json.loads(vertex_credentials) + + creds = google.oauth2.service_account.Credentials.from_service_account_info( + json_obj, + scopes=["https://www.googleapis.com/auth/cloud-platform"], + ) + else: + creds, _ = google.auth.default(quota_project_id=vertex_project) print_verbose( f"VERTEX AI: creds={creds}; google application credentials: {os.getenv('GOOGLE_APPLICATION_CREDENTIALS')}" ) diff --git a/litellm/main.py b/litellm/main.py index 4c4a9540e..593fc7eae 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -1710,6 +1710,7 @@ def completion( encoding=encoding, vertex_location=vertex_ai_location, vertex_project=vertex_ai_project, + vertex_credentials=vertex_credentials, logging_obj=logging, acompletion=acompletion, ) @@ -2807,6 +2808,11 @@ def embedding( or litellm.vertex_location or get_secret("VERTEXAI_LOCATION") ) + vertex_credentials = ( + optional_params.pop("vertex_credentials", None) + or optional_params.pop("vertex_ai_credentials", None) + or get_secret("VERTEXAI_CREDENTIALS") + ) response = vertex_ai.embedding( model=model, @@ -2817,6 +2823,7 @@ def embedding( model_response=EmbeddingResponse(), vertex_project=vertex_ai_project, vertex_location=vertex_ai_location, + vertex_credentials=vertex_credentials, aembedding=aembedding, print_verbose=print_verbose, ) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 3ace04f70..bf5adb430 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -75,7 +75,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, - "supports_parallel_function_calling": true + "supports_parallel_function_calling": true, + "supports_vision": true }, "gpt-4-turbo-2024-04-09": { "max_tokens": 4096, @@ -86,7 +87,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, - "supports_parallel_function_calling": true + "supports_parallel_function_calling": true, + "supports_vision": true }, "gpt-4-1106-preview": { "max_tokens": 4096, @@ -1268,8 +1270,21 @@ "litellm_provider": "gemini", "mode": "chat", "supports_function_calling": true, + "supports_vision": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, + "gemini/gemini-1.5-pro-latest": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "litellm_provider": "gemini", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "source": "https://ai.google.dev/models/gemini" + }, "gemini/gemini-pro-vision": { "max_tokens": 2048, "max_input_tokens": 30720, diff --git a/litellm/proxy/_experimental/out/404.html b/litellm/proxy/_experimental/out/404.html index b361a73b4..a2853cd76 100644 --- a/litellm/proxy/_experimental/out/404.html +++ b/litellm/proxy/_experimental/out/404.html @@ -1 +1 @@ -