diff --git a/.circleci/config.yml b/.circleci/config.yml index 26d12b5b5..40d498d6e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -44,8 +44,12 @@ jobs: pip install "logfire==0.29.0" pip install numpydoc pip install traceloop-sdk==0.21.1 + pip install opentelemetry-api==1.25.0 + pip install opentelemetry-sdk==1.25.0 + pip install opentelemetry-exporter-otlp==1.25.0 pip install openai - pip install prisma + pip install prisma + pip install "detect_secrets==1.5.0" pip install "httpx==0.24.1" pip install fastapi pip install "gunicorn==21.2.0" @@ -62,6 +66,7 @@ jobs: pip install "pydantic==2.7.1" pip install "diskcache==5.6.1" pip install "Pillow==10.3.0" + pip install "jsonschema==4.22.0" - save_cache: paths: - ./venv @@ -97,7 +102,7 @@ jobs: command: | pwd ls - python -m pytest -vv litellm/tests/ -x --junitxml=test-results/junit.xml --durations=5 + python -m pytest -vv litellm/tests/ -x --junitxml=test-results/junit.xml --durations=5 -k "not test_python_38.py" no_output_timeout: 120m # Store test results @@ -123,6 +128,7 @@ jobs: pip install jinja2 pip install tokenizers pip install openai + pip install jsonschema - run: name: Run tests command: | @@ -177,6 +183,7 @@ jobs: pip install numpydoc pip install prisma pip install fastapi + pip install jsonschema pip install "httpx==0.24.1" pip install "gunicorn==21.2.0" pip install "anyio==3.7.1" @@ -199,11 +206,13 @@ jobs: -e REDIS_PORT=$REDIS_PORT \ -e AZURE_FRANCE_API_KEY=$AZURE_FRANCE_API_KEY \ -e AZURE_EUROPE_API_KEY=$AZURE_EUROPE_API_KEY \ + -e MISTRAL_API_KEY=$MISTRAL_API_KEY \ -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ -e AWS_REGION_NAME=$AWS_REGION_NAME \ -e AUTO_INFER_REGION=True \ -e OPENAI_API_KEY=$OPENAI_API_KEY \ + -e LITELLM_LICENSE=$LITELLM_LICENSE \ -e LANGFUSE_PROJECT1_PUBLIC=$LANGFUSE_PROJECT1_PUBLIC \ -e LANGFUSE_PROJECT2_PUBLIC=$LANGFUSE_PROJECT2_PUBLIC \ -e LANGFUSE_PROJECT1_SECRET=$LANGFUSE_PROJECT1_SECRET \ @@ -341,4 +350,4 @@ workflows: filters: branches: only: - - main + - main \ No newline at end of file diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml new file mode 100644 index 000000000..58e7cfe10 --- /dev/null +++ b/.github/dependabot.yaml @@ -0,0 +1,10 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "daily" + groups: + github-actions: + patterns: + - "*" diff --git a/.github/workflows/ghcr_deploy.yml b/.github/workflows/ghcr_deploy.yml index 58cda02c3..51e24f856 100644 --- a/.github/workflows/ghcr_deploy.yml +++ b/.github/workflows/ghcr_deploy.yml @@ -25,6 +25,11 @@ jobs: if: github.repository == 'BerriAI/litellm' runs-on: ubuntu-latest steps: + - + name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.event.inputs.commit_hash }} - name: Set up QEMU uses: docker/setup-qemu-action@v3 @@ -41,12 +46,14 @@ jobs: name: Build and push uses: docker/build-push-action@v5 with: + context: . push: true tags: litellm/litellm:${{ github.event.inputs.tag || 'latest' }} - name: Build and push litellm-database image uses: docker/build-push-action@v5 with: + context: . push: true file: Dockerfile.database tags: litellm/litellm-database:${{ github.event.inputs.tag || 'latest' }} @@ -54,6 +61,7 @@ jobs: name: Build and push litellm-spend-logs image uses: docker/build-push-action@v5 with: + context: . push: true file: ./litellm-js/spend-logs/Dockerfile tags: litellm/litellm-spend_logs:${{ github.event.inputs.tag || 'latest' }} @@ -68,6 +76,8 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v4 + with: + ref: ${{ github.event.inputs.commit_hash }} # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. - name: Log in to the Container registry uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 @@ -92,7 +102,7 @@ jobs: - name: Build and push Docker image uses: docker/build-push-action@4976231911ebf5f32aad765192d35f942aa48cb8 with: - context: https://github.com/BerriAI/litellm.git#${{ github.event.inputs.commit_hash}} + context: . push: true tags: ${{ steps.meta.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta.outputs.tags }}-${{ github.event.inputs.release_type }} # if a tag is provided, use that, otherwise use the release tag, and if neither is available, use 'latest' labels: ${{ steps.meta.outputs.labels }} @@ -106,6 +116,8 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v4 + with: + ref: ${{ github.event.inputs.commit_hash }} - name: Log in to the Container registry uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 @@ -128,7 +140,7 @@ jobs: - name: Build and push Database Docker image uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 with: - context: https://github.com/BerriAI/litellm.git#${{ github.event.inputs.commit_hash}} + context: . file: Dockerfile.database push: true tags: ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.release_type }} @@ -143,6 +155,8 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v4 + with: + ref: ${{ github.event.inputs.commit_hash }} - name: Log in to the Container registry uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 @@ -165,7 +179,7 @@ jobs: - name: Build and push Database Docker image uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 with: - context: https://github.com/BerriAI/litellm.git#${{ github.event.inputs.commit_hash}} + context: . file: ./litellm-js/spend-logs/Dockerfile push: true tags: ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.release_type }} @@ -176,6 +190,8 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v4 + with: + ref: ${{ github.event.inputs.commit_hash }} - name: Log in to the Container registry uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 @@ -273,7 +289,8 @@ jobs: repo: context.repo.repo, release_id: process.env.RELEASE_ID, }); - return response.data.body; + const formattedBody = JSON.stringify(response.data.body).slice(1, -1); + return formattedBody; } catch (error) { core.setFailed(error.message); } @@ -286,14 +303,15 @@ jobs: RELEASE_NOTES: ${{ steps.release-notes.outputs.result }} run: | curl -H "Content-Type: application/json" -X POST -d '{ - "content": "New LiteLLM release ${{ env.RELEASE_TAG }}", + "content": "New LiteLLM release '"${RELEASE_TAG}"'", "username": "Release Changelog", "avatar_url": "https://cdn.discordapp.com/avatars/487431320314576937/bd64361e4ba6313d561d54e78c9e7171.png", "embeds": [ { - "title": "Changelog for LiteLLM ${{ env.RELEASE_TAG }}", - "description": "${{ env.RELEASE_NOTES }}", + "title": "Changelog for LiteLLM '"${RELEASE_TAG}"'", + "description": "'"${RELEASE_NOTES}"'", "color": 2105893 } ] }' $WEBHOOK_URL + diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 000000000..23e4a06da --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,34 @@ +name: Publish Dev Release to PyPI + +on: + workflow_dispatch: + +jobs: + publish-dev-release: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.8 # Adjust the Python version as needed + + - name: Install dependencies + run: pip install toml twine + + - name: Read version from pyproject.toml + id: read-version + run: | + version=$(python -c 'import toml; print(toml.load("pyproject.toml")["tool"]["commitizen"]["version"])') + printf "LITELLM_VERSION=%s" "$version" >> $GITHUB_ENV + + - name: Check if version exists on PyPI + id: check-version + run: | + set -e + if twine check --repository-url https://pypi.org/simple/ "litellm==$LITELLM_VERSION" >/dev/null 2>&1; then + echo "Version $LITELLM_VERSION already exists on PyPI. Skipping publish." + diff --git a/.gitignore b/.gitignore index b75a92309..8a9095b84 100644 --- a/.gitignore +++ b/.gitignore @@ -55,4 +55,10 @@ litellm/proxy/_super_secret_config.yaml litellm/proxy/_super_secret_config.yaml litellm/proxy/myenv/bin/activate litellm/proxy/myenv/bin/Activate.ps1 -myenv/* \ No newline at end of file +myenv/* +litellm/proxy/_experimental/out/404/index.html +litellm/proxy/_experimental/out/model_hub/index.html +litellm/proxy/_experimental/out/onboarding/index.html +litellm/tests/log.txt +litellm/tests/langfuse.log +litellm/tests/langfuse.log diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cc41d85f1..a33473b72 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,19 @@ repos: +- repo: local + hooks: + - id: mypy + name: mypy + entry: python3 -m mypy --ignore-missing-imports + language: system + types: [python] + files: ^litellm/ + - id: isort + name: isort + entry: isort + language: system + types: [python] + files: litellm/.*\.py + exclude: ^litellm/__init__.py$ - repo: https://github.com/psf/black rev: 24.2.0 hooks: @@ -7,20 +22,23 @@ repos: rev: 7.0.0 # The version of flake8 to use hooks: - id: flake8 - exclude: ^litellm/tests/|^litellm/proxy/proxy_cli.py|^litellm/proxy/tests/ + exclude: ^litellm/tests/|^litellm/proxy/tests/ additional_dependencies: [flake8-print] files: litellm/.*\.py +- repo: https://github.com/python-poetry/poetry + rev: 1.8.0 + hooks: + - id: poetry-check - repo: local hooks: - id: check-files-match name: Check if files match entry: python3 ci_cd/check_files_match.py language: system -- repo: local - hooks: - - id: mypy - name: mypy - entry: python3 -m mypy --ignore-missing-imports - language: system - types: [python] - files: ^litellm/ \ No newline at end of file + # - id: check-file-length + # name: Check file length + # entry: python check_file_length.py + # args: ["10000"] # set your desired maximum number of lines + # language: python + # files: litellm/.*\.py + # exclude: ^litellm/tests/ \ No newline at end of file diff --git a/README.md b/README.md index 2dafcf863..7df894ea1 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,8 @@ Support for more providers. Missing a provider or LLM Platform, raise a [feature # Usage ([**Docs**](https://docs.litellm.ai/docs/)) > [!IMPORTANT] -> LiteLLM v1.0.0 now requires `openai>=1.0.0`. Migration guide [here](https://docs.litellm.ai/docs/migration) +> LiteLLM v1.0.0 now requires `openai>=1.0.0`. Migration guide [here](https://docs.litellm.ai/docs/migration) +> LiteLLM v1.40.14+ now requires `pydantic>=2.0.0`. No changes required. Open In Colab @@ -147,6 +148,7 @@ The proxy provides: ## 📖 Proxy Endpoints - [Swagger Docs](https://litellm-api.up.railway.app/) + ## Quick Start Proxy - CLI ```shell @@ -179,6 +181,24 @@ print(response) ## Proxy Key Management ([Docs](https://docs.litellm.ai/docs/proxy/virtual_keys)) +Connect the proxy with a Postgres DB to create proxy keys + +```bash +# Get the code +git clone https://github.com/BerriAI/litellm + +# Go to folder +cd litellm + +# Add the master key +echo 'LITELLM_MASTER_KEY="sk-1234"' > .env +source .env + +# Start +docker-compose up +``` + + UI on `/ui` on your proxy server ![ui_3](https://github.com/BerriAI/litellm/assets/29436595/47c97d5e-b9be-4839-b28c-43d7f4f10033) @@ -206,38 +226,39 @@ curl 'http://0.0.0.0:4000/key/generate' \ ## Supported Providers ([Docs](https://docs.litellm.ai/docs/providers)) | Provider | [Completion](https://docs.litellm.ai/docs/#basic-usage) | [Streaming](https://docs.litellm.ai/docs/completion/stream#streaming-responses) | [Async Completion](https://docs.litellm.ai/docs/completion/stream#async-completion) | [Async Streaming](https://docs.litellm.ai/docs/completion/stream#async-streaming) | [Async Embedding](https://docs.litellm.ai/docs/embedding/supported_embedding) | [Async Image Generation](https://docs.litellm.ai/docs/image_generation) | -| ----------------------------------------------------------------------------------- | ------------------------------------------------------- | ------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | --------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------- | -| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [google - vertex_ai [Gemini]](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ | -| [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ | -| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | ✅ | ✅ | ✅ | | -| [mistral ai api](https://docs.litellm.ai/docs/providers/mistral) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers) | ✅ | ✅ | ✅ | ✅ | -| [cohere](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [anthropic](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | ✅ | +|-------------------------------------------------------------------------------------|---------------------------------------------------------|---------------------------------------------------------------------------------|-------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------|-------------------------------------------------------------------------------|-------------------------------------------------------------------------| +| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [google - vertex_ai](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ | | | +| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | ✅ | ✅ | ✅ | | | +| [mistral ai api](https://docs.litellm.ai/docs/providers/mistral) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers) | ✅ | ✅ | ✅ | ✅ | | | +| [cohere](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [anthropic](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | ✅ | | | | [empower](https://docs.litellm.ai/docs/providers/empower) | ✅ | ✅ | ✅ | ✅ | -| [huggingface](https://docs.litellm.ai/docs/providers/huggingface) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [replicate](https://docs.litellm.ai/docs/providers/replicate) | ✅ | ✅ | ✅ | ✅ | -| [together_ai](https://docs.litellm.ai/docs/providers/togetherai) | ✅ | ✅ | ✅ | ✅ | -| [openrouter](https://docs.litellm.ai/docs/providers/openrouter) | ✅ | ✅ | ✅ | ✅ | -| [ai21](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | ✅ | -| [baseten](https://docs.litellm.ai/docs/providers/baseten) | ✅ | ✅ | ✅ | ✅ | -| [vllm](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | ✅ | -| [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud) | ✅ | ✅ | ✅ | ✅ | -| [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha) | ✅ | ✅ | ✅ | ✅ | -| [petals](https://docs.litellm.ai/docs/providers/petals) | ✅ | ✅ | ✅ | ✅ | -| [ollama](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | ✅ | -| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ | -| [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ | -| [Deepseek](https://docs.litellm.ai/docs/providers/deepseek) | ✅ | ✅ | ✅ | ✅ | -| [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ | -| [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅ -| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ | -| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ | +| [huggingface](https://docs.litellm.ai/docs/providers/huggingface) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [replicate](https://docs.litellm.ai/docs/providers/replicate) | ✅ | ✅ | ✅ | ✅ | | | +| [together_ai](https://docs.litellm.ai/docs/providers/togetherai) | ✅ | ✅ | ✅ | ✅ | | | +| [openrouter](https://docs.litellm.ai/docs/providers/openrouter) | ✅ | ✅ | ✅ | ✅ | | | +| [ai21](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | ✅ | | | +| [baseten](https://docs.litellm.ai/docs/providers/baseten) | ✅ | ✅ | ✅ | ✅ | | | +| [vllm](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | ✅ | | | +| [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud) | ✅ | ✅ | ✅ | ✅ | | | +| [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha) | ✅ | ✅ | ✅ | ✅ | | | +| [petals](https://docs.litellm.ai/docs/providers/petals) | ✅ | ✅ | ✅ | ✅ | | | +| [ollama](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | ✅ | | | +| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ | | | +| [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ | | | +| [Deepseek](https://docs.litellm.ai/docs/providers/deepseek) | ✅ | ✅ | ✅ | ✅ | | | +| [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ | | | +| [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ | | +| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ | | +| [FriendliAI](https://docs.litellm.ai/docs/providers/friendliai) | ✅ | ✅ | ✅ | ✅ | | | [**Read the Docs**](https://docs.litellm.ai/docs/) diff --git a/check_file_length.py b/check_file_length.py new file mode 100644 index 000000000..f23b79add --- /dev/null +++ b/check_file_length.py @@ -0,0 +1,28 @@ +import sys + + +def check_file_length(max_lines, filenames): + bad_files = [] + for filename in filenames: + with open(filename, "r") as file: + lines = file.readlines() + if len(lines) > max_lines: + bad_files.append((filename, len(lines))) + return bad_files + + +if __name__ == "__main__": + max_lines = int(sys.argv[1]) + filenames = sys.argv[2:] + + bad_files = check_file_length(max_lines, filenames) + if bad_files: + bad_files.sort( + key=lambda x: x[1], reverse=True + ) # Sort files by length in descending order + for filename, length in bad_files: + print(f"{filename}: {length} lines") + + sys.exit(1) + else: + sys.exit(0) diff --git a/cookbook/litellm_proxy_server/grafana_dashboard/dashboard_1/grafana_dashboard.json b/cookbook/litellm_proxy_server/grafana_dashboard/dashboard_1/grafana_dashboard.json new file mode 100644 index 000000000..17fef1ffd --- /dev/null +++ b/cookbook/litellm_proxy_server/grafana_dashboard/dashboard_1/grafana_dashboard.json @@ -0,0 +1,594 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 2039, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "rMzWaBvIk" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "rMzWaBvIk" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(litellm_self_latency_bucket{self=\"self\"}[1m])) by (le))", + "legendFormat": "Time to first token", + "range": true, + "refId": "A" + } + ], + "title": "Time to first token (latency)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "rMzWaBvIk" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "currencyUSD" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "7e4b0627fd32efdd2313c846325575808aadcf2839f0fde90723aab9ab73c78f" + }, + "properties": [ + { + "id": "displayName", + "value": "Translata" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "rMzWaBvIk" + }, + "editorMode": "code", + "expr": "sum(increase(litellm_spend_metric_total[30d])) by (hashed_api_key)", + "legendFormat": "{{team}}", + "range": true, + "refId": "A" + } + ], + "title": "Spend by team", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "rMzWaBvIk" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "rMzWaBvIk" + }, + "editorMode": "code", + "expr": "sum by (model) (increase(litellm_requests_metric_total[5m]))", + "legendFormat": "{{model}}", + "range": true, + "refId": "A" + } + ], + "title": "Requests by model", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "rMzWaBvIk" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 3, + "x": 0, + "y": 25 + }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.17", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "rMzWaBvIk" + }, + "editorMode": "code", + "expr": "sum(increase(litellm_llm_api_failed_requests_metric_total[1h]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Faild Requests", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "rMzWaBvIk" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "currencyUSD" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 3, + "x": 3, + "y": 25 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "rMzWaBvIk" + }, + "editorMode": "code", + "expr": "sum(increase(litellm_spend_metric_total[30d])) by (model)", + "legendFormat": "{{model}}", + "range": true, + "refId": "A" + } + ], + "title": "Spend", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "rMzWaBvIk" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 25 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "rMzWaBvIk" + }, + "editorMode": "code", + "expr": "sum(increase(litellm_total_tokens_total[5m])) by (model)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Tokens", + "type": "timeseries" + } + ], + "refresh": "1m", + "revision": 1, + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "LLM Proxy", + "uid": "rgRrHxESz", + "version": 15, + "weekStart": "" + } \ No newline at end of file diff --git a/cookbook/litellm_proxy_server/grafana_dashboard/dashboard_1/readme.md b/cookbook/litellm_proxy_server/grafana_dashboard/dashboard_1/readme.md new file mode 100644 index 000000000..1f193aba7 --- /dev/null +++ b/cookbook/litellm_proxy_server/grafana_dashboard/dashboard_1/readme.md @@ -0,0 +1,6 @@ +## This folder contains the `json` for creating the following Grafana Dashboard + +### Pre-Requisites +- Setup LiteLLM Proxy Prometheus Metrics https://docs.litellm.ai/docs/proxy/prometheus + +![1716623265684](https://github.com/BerriAI/litellm/assets/29436595/0e12c57e-4a2d-4850-bd4f-e4294f87a814) diff --git a/cookbook/litellm_proxy_server/grafana_dashboard/readme.md b/cookbook/litellm_proxy_server/grafana_dashboard/readme.md new file mode 100644 index 000000000..fae1d792d --- /dev/null +++ b/cookbook/litellm_proxy_server/grafana_dashboard/readme.md @@ -0,0 +1,6 @@ +## Contains example Grafana Dashboard made for LiteLLM Proxy Server + +This folder contains the `json` for creating Grafana Dashboards + +### Pre-Requisites +- Setup LiteLLM Proxy Prometheus Metrics https://docs.litellm.ai/docs/proxy/prometheus \ No newline at end of file diff --git a/cookbook/proxy-server/readme.md b/cookbook/litellm_proxy_server/readme.md similarity index 100% rename from cookbook/proxy-server/readme.md rename to cookbook/litellm_proxy_server/readme.md diff --git a/cookbook/misc/add_new_models.py b/cookbook/misc/add_new_models.py new file mode 100644 index 000000000..c9b5a91e3 --- /dev/null +++ b/cookbook/misc/add_new_models.py @@ -0,0 +1,72 @@ +import requests +import json + + +def get_initial_config(): + proxy_base_url = input("Enter your proxy base URL (e.g., http://localhost:4000): ") + master_key = input("Enter your LITELLM_MASTER_KEY ") + return proxy_base_url, master_key + + +def get_user_input(): + model_name = input( + "Enter model_name (this is the 'model' passed in /chat/completions requests):" + ) + model = input("litellm_params: Enter model eg. 'azure/': ") + tpm = int(input("litellm_params: Enter tpm (tokens per minute): ")) + rpm = int(input("litellm_params: Enter rpm (requests per minute): ")) + api_key = input("litellm_params: Enter api_key: ") + api_base = input("litellm_params: Enter api_base: ") + api_version = input("litellm_params: Enter api_version: ") + timeout = int(input("litellm_params: Enter timeout (0 for default): ")) + stream_timeout = int( + input("litellm_params: Enter stream_timeout (0 for default): ") + ) + max_retries = int(input("litellm_params: Enter max_retries (0 for default): ")) + + return { + "model_name": model_name, + "litellm_params": { + "model": model, + "tpm": tpm, + "rpm": rpm, + "api_key": api_key, + "api_base": api_base, + "api_version": api_version, + "timeout": timeout, + "stream_timeout": stream_timeout, + "max_retries": max_retries, + }, + } + + +def make_request(proxy_base_url, master_key, data): + url = f"{proxy_base_url}/model/new" + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {master_key}", + } + + response = requests.post(url, headers=headers, json=data) + + print(f"Status Code: {response.status_code}") + print(f"Response from adding model: {response.text}") + + +def main(): + proxy_base_url, master_key = get_initial_config() + + while True: + print("Adding new Model to your proxy server...") + data = get_user_input() + make_request(proxy_base_url, master_key, data) + + add_another = input("Do you want to add another model? (yes/no): ").lower() + if add_another != "yes": + break + + print("Script finished.") + + +if __name__ == "__main__": + main() diff --git a/deploy/charts/litellm-helm/Chart.yaml b/deploy/charts/litellm-helm/Chart.yaml index 7f68acf88..fcd2e83cc 100644 --- a/deploy/charts/litellm-helm/Chart.yaml +++ b/deploy/charts/litellm-helm/Chart.yaml @@ -18,13 +18,13 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.2.0 +version: 0.2.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: v1.35.38 +appVersion: v1.41.8 dependencies: - name: "postgresql" diff --git a/deploy/charts/litellm-helm/index.yaml b/deploy/charts/litellm-helm/index.yaml new file mode 100644 index 000000000..5c6b75454 --- /dev/null +++ b/deploy/charts/litellm-helm/index.yaml @@ -0,0 +1,88 @@ +apiVersion: v1 +entries: + postgresql: + - annotations: + category: Database + images: | + - name: os-shell + image: docker.io/bitnami/os-shell:12-debian-12-r16 + - name: postgres-exporter + image: docker.io/bitnami/postgres-exporter:0.15.0-debian-12-r14 + - name: postgresql + image: docker.io/bitnami/postgresql:16.2.0-debian-12-r6 + licenses: Apache-2.0 + apiVersion: v2 + appVersion: 16.2.0 + created: "2024-07-08T11:05:19.312515+08:00" + dependencies: + - name: common + repository: oci://registry-1.docker.io/bitnamicharts + tags: + - bitnami-common + version: 2.x.x + description: PostgreSQL (Postgres) is an open source object-relational database + known for reliability and data integrity. ACID-compliant, it supports foreign + keys, joins, views, triggers and stored procedures. + digest: 3c8125526b06833df32e2f626db34aeaedb29d38f03d15349db6604027d4a167 + home: https://bitnami.com + icon: https://bitnami.com/assets/stacks/postgresql/img/postgresql-stack-220x234.png + keywords: + - postgresql + - postgres + - database + - sql + - replication + - cluster + maintainers: + - name: VMware, Inc. + url: https://github.com/bitnami/charts + name: postgresql + sources: + - https://github.com/bitnami/charts/tree/main/bitnami/postgresql + urls: + - https://berriai.github.io/litellm/charts/postgresql-14.3.1.tgz + version: 14.3.1 + redis: + - annotations: + category: Database + images: | + - name: kubectl + image: docker.io/bitnami/kubectl:1.29.2-debian-12-r3 + - name: os-shell + image: docker.io/bitnami/os-shell:12-debian-12-r16 + - name: redis + image: docker.io/bitnami/redis:7.2.4-debian-12-r9 + - name: redis-exporter + image: docker.io/bitnami/redis-exporter:1.58.0-debian-12-r4 + - name: redis-sentinel + image: docker.io/bitnami/redis-sentinel:7.2.4-debian-12-r7 + licenses: Apache-2.0 + apiVersion: v2 + appVersion: 7.2.4 + created: "2024-07-08T11:05:19.317065+08:00" + dependencies: + - name: common + repository: oci://registry-1.docker.io/bitnamicharts + tags: + - bitnami-common + version: 2.x.x + description: Redis(R) is an open source, advanced key-value store. It is often + referred to as a data structure server since keys can contain strings, hashes, + lists, sets and sorted sets. + digest: b2fa1835f673a18002ca864c54fadac3c33789b26f6c5e58e2851b0b14a8f984 + home: https://bitnami.com + icon: https://bitnami.com/assets/stacks/redis/img/redis-stack-220x234.png + keywords: + - redis + - keyvalue + - database + maintainers: + - name: VMware, Inc. + url: https://github.com/bitnami/charts + name: redis + sources: + - https://github.com/bitnami/charts/tree/main/bitnami/redis + urls: + - https://berriai.github.io/litellm/charts/redis-18.19.1.tgz + version: 18.19.1 +generated: "2024-07-08T11:05:19.308028+08:00" diff --git a/docker-compose.yml b/docker-compose.yml index 05439b1df..6c1f5f57b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,16 +1,29 @@ -version: "3.9" +version: "3.11" services: litellm: build: context: . args: target: runtime - image: ghcr.io/berriai/litellm:main-latest + image: ghcr.io/berriai/litellm:main-stable ports: - "4000:4000" # Map the container port to the host, change the host port if necessary - volumes: - - ./litellm-config.yaml:/app/config.yaml # Mount the local configuration file - # You can change the port or number of workers as per your requirements or pass any new supported CLI augument. Make sure the port passed here matches with the container port defined above in `ports` value - command: [ "--config", "/app/config.yaml", "--port", "4000", "--num_workers", "8" ] + environment: + DATABASE_URL: "postgresql://postgres:example@db:5432/postgres" + STORE_MODEL_IN_DB: "True" # allows adding models to proxy via UI + env_file: + - .env # Load local .env file + + + db: + image: postgres + restart: always + environment: + POSTGRES_PASSWORD: example + healthcheck: + test: ["CMD-SHELL", "pg_isready"] + interval: 1s + timeout: 5s + retries: 10 # ...rest of your docker-compose config if any \ No newline at end of file diff --git a/docs/my-website/docs/assistants.md b/docs/my-website/docs/assistants.md new file mode 100644 index 000000000..1af780500 --- /dev/null +++ b/docs/my-website/docs/assistants.md @@ -0,0 +1,238 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Assistants API + +Covers Threads, Messages, Assistants. + +LiteLLM currently covers: +- Get Assistants +- Create Thread +- Get Thread +- Add Messages +- Get Messages +- Run Thread + +## Quick Start + +Call an existing Assistant. + +- Get the Assistant + +- Create a Thread when a user starts a conversation. + +- Add Messages to the Thread as the user asks questions. + +- Run the Assistant on the Thread to generate a response by calling the model and the tools. + + + + +**Get the Assistant** + +```python +from litellm import get_assistants, aget_assistants +import os + +# setup env +os.environ["OPENAI_API_KEY"] = "sk-.." + +assistants = get_assistants(custom_llm_provider="openai") + +### ASYNC USAGE ### +# assistants = await aget_assistants(custom_llm_provider="openai") +``` + +**Create a Thread** + +```python +from litellm import create_thread, acreate_thread +import os + +os.environ["OPENAI_API_KEY"] = "sk-.." + +new_thread = create_thread( + custom_llm_provider="openai", + messages=[{"role": "user", "content": "Hey, how's it going?"}], # type: ignore + ) + +### ASYNC USAGE ### +# new_thread = await acreate_thread(custom_llm_provider="openai",messages=[{"role": "user", "content": "Hey, how's it going?"}]) +``` + +**Add Messages to the Thread** + +```python +from litellm import create_thread, get_thread, aget_thread, add_message, a_add_message +import os + +os.environ["OPENAI_API_KEY"] = "sk-.." + +## CREATE A THREAD +_new_thread = create_thread( + custom_llm_provider="openai", + messages=[{"role": "user", "content": "Hey, how's it going?"}], # type: ignore + ) + +## OR retrieve existing thread +received_thread = get_thread( + custom_llm_provider="openai", + thread_id=_new_thread.id, + ) + +### ASYNC USAGE ### +# received_thread = await aget_thread(custom_llm_provider="openai", thread_id=_new_thread.id,) + +## ADD MESSAGE TO THREAD +message = {"role": "user", "content": "Hey, how's it going?"} +added_message = add_message( + thread_id=_new_thread.id, custom_llm_provider="openai", **message + ) + +### ASYNC USAGE ### +# added_message = await a_add_message(thread_id=_new_thread.id, custom_llm_provider="openai", **message) +``` + +**Run the Assistant on the Thread** + +```python +from litellm import get_assistants, create_thread, add_message, run_thread, arun_thread +import os + +os.environ["OPENAI_API_KEY"] = "sk-.." +assistants = get_assistants(custom_llm_provider="openai") + +## get the first assistant ### +assistant_id = assistants.data[0].id + +## GET A THREAD +_new_thread = create_thread( + custom_llm_provider="openai", + messages=[{"role": "user", "content": "Hey, how's it going?"}], # type: ignore + ) + +## ADD MESSAGE +message = {"role": "user", "content": "Hey, how's it going?"} +added_message = add_message( + thread_id=_new_thread.id, custom_llm_provider="openai", **message + ) + +## 🚨 RUN THREAD +response = run_thread( + custom_llm_provider="openai", thread_id=thread_id, assistant_id=assistant_id + ) + +### ASYNC USAGE ### +# response = await arun_thread(custom_llm_provider="openai", thread_id=thread_id, assistant_id=assistant_id) + +print(f"run_thread: {run_thread}") +``` + + + +```yaml +assistant_settings: + custom_llm_provider: azure + litellm_params: + api_key: os.environ/AZURE_API_KEY + api_base: os.environ/AZURE_API_BASE + api_version: os.environ/AZURE_API_VERSION +``` + +```bash +$ litellm --config /path/to/config.yaml + +# RUNNING on http://0.0.0.0:4000 +``` + +**Get the Assistant** + +```bash +curl "http://0.0.0.0:4000/v1/assistants?order=desc&limit=20" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" +``` + +**Create a Thread** + +```bash +curl http://0.0.0.0:4000/v1/threads \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '' +``` + +**Get a Thread** + +```bash +curl http://0.0.0.0:4000/v1/threads/{thread_id} \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" +``` + +**Add Messages to the Thread** + +```bash +curl http://0.0.0.0:4000/v1/threads/{thread_id}/messages \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "role": "user", + "content": "How does AI work? Explain it in simple terms." + }' +``` + +**Run the Assistant on the Thread** + +```bash +curl http://0.0.0.0:4000/v1/threads/thread_abc123/runs \ + -H "Authorization: Bearer sk-1234" \ + -H "Content-Type: application/json" \ + -d '{ + "assistant_id": "asst_abc123" + }' +``` + + + + +## Streaming + + + + +```python +from litellm import run_thread_stream +import os + +os.environ["OPENAI_API_KEY"] = "sk-.." + +message = {"role": "user", "content": "Hey, how's it going?"} + +data = {"custom_llm_provider": "openai", "thread_id": _new_thread.id, "assistant_id": assistant_id, **message} + +run = run_thread_stream(**data) +with run as run: + assert isinstance(run, AssistantEventHandler) + for chunk in run: + print(f"chunk: {chunk}") + run.until_done() +``` + + + + +```bash +curl -X POST 'http://0.0.0.0:4000/threads/{thread_id}/runs' \ +-H 'Authorization: Bearer sk-1234' \ +-H 'Content-Type: application/json' \ +-D '{ + "assistant_id": "asst_6xVZQFFy1Kw87NbnYeNebxTf", + "stream": true +}' +``` + + + + +## [👉 Proxy API Reference](https://litellm-api.up.railway.app/#/assistants) diff --git a/docs/my-website/docs/batches.md b/docs/my-website/docs/batches.md new file mode 100644 index 000000000..51f3bb5ca --- /dev/null +++ b/docs/my-website/docs/batches.md @@ -0,0 +1,124 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Batches API + +Covers Batches, Files + + +## Quick Start + +Call an existing Assistant. + +- Create File for Batch Completion + +- Create Batch Request + +- Retrieve the Specific Batch and File Content + + + + + +**Create File for Batch Completion** + +```python +from litellm +import os + +os.environ["OPENAI_API_KEY"] = "sk-.." + +file_name = "openai_batch_completions.jsonl" +_current_dir = os.path.dirname(os.path.abspath(__file__)) +file_path = os.path.join(_current_dir, file_name) +file_obj = await litellm.acreate_file( + file=open(file_path, "rb"), + purpose="batch", + custom_llm_provider="openai", +) +print("Response from creating file=", file_obj) +``` + +**Create Batch Request** + +```python +from litellm +import os + +create_batch_response = await litellm.acreate_batch( + completion_window="24h", + endpoint="/v1/chat/completions", + input_file_id=batch_input_file_id, + custom_llm_provider="openai", + metadata={"key1": "value1", "key2": "value2"}, +) + +print("response from litellm.create_batch=", create_batch_response) +``` + +**Retrieve the Specific Batch and File Content** + +```python + +retrieved_batch = await litellm.aretrieve_batch( + batch_id=create_batch_response.id, custom_llm_provider="openai" +) +print("retrieved batch=", retrieved_batch) +# just assert that we retrieved a non None batch + +assert retrieved_batch.id == create_batch_response.id + +# try to get file content for our original file + +file_content = await litellm.afile_content( + file_id=batch_input_file_id, custom_llm_provider="openai" +) + +print("file content = ", file_content) +``` + + + + +```bash +$ export OPENAI_API_KEY="sk-..." + +$ litellm + +# RUNNING on http://0.0.0.0:4000 +``` + +**Create File for Batch Completion** + +```shell +curl https://api.openai.com/v1/files \ + -H "Authorization: Bearer sk-1234" \ + -F purpose="batch" \ + -F file="@mydata.jsonl" +``` + +**Create Batch Request** + +```bash +curl http://localhost:4000/v1/batches \ + -H "Authorization: Bearer sk-1234" \ + -H "Content-Type: application/json" \ + -d '{ + "input_file_id": "file-abc123", + "endpoint": "/v1/chat/completions", + "completion_window": "24h" + }' +``` + +**Retrieve the Specific Batch** + +```bash +curl http://localhost:4000/v1/batches/batch_abc123 \ + -H "Authorization: Bearer sk-1234" \ + -H "Content-Type: application/json" \ +``` + + + + +## [👉 Proxy API Reference](https://litellm-api.up.railway.app/#/batch) diff --git a/docs/my-website/docs/caching/all_caches.md b/docs/my-website/docs/caching/all_caches.md index eb309f9b8..1b8bbd8e0 100644 --- a/docs/my-website/docs/caching/all_caches.md +++ b/docs/my-website/docs/caching/all_caches.md @@ -212,6 +212,94 @@ If you run the code two times, response1 will use the cache from the first run t + + +## Switch Cache On / Off Per LiteLLM Call + +LiteLLM supports 4 cache-controls: + +- `no-cache`: *Optional(bool)* When `True`, Will not return a cached response, but instead call the actual endpoint. +- `no-store`: *Optional(bool)* When `True`, Will not cache the response. +- `ttl`: *Optional(int)* - Will cache the response for the user-defined amount of time (in seconds). +- `s-maxage`: *Optional(int)* Will only accept cached responses that are within user-defined range (in seconds). + +[Let us know if you need more](https://github.com/BerriAI/litellm/issues/1218) + + + +Example usage `no-cache` - When `True`, Will not return a cached response + +```python +response = litellm.completion( + model="gpt-3.5-turbo", + messages=[ + { + "role": "user", + "content": "hello who are you" + } + ], + cache={"no-cache": True}, + ) +``` + + + + + +Example usage `no-store` - When `True`, Will not cache the response. + +```python +response = litellm.completion( + model="gpt-3.5-turbo", + messages=[ + { + "role": "user", + "content": "hello who are you" + } + ], + cache={"no-store": True}, + ) +``` + + + + +Example usage `ttl` - cache the response for 10 seconds + +```python +response = litellm.completion( + model="gpt-3.5-turbo", + messages=[ + { + "role": "user", + "content": "hello who are you" + } + ], + cache={"ttl": 10}, + ) +``` + + + + +Example usage `s-maxage` - Will only accept cached responses for 60 seconds + +```python +response = litellm.completion( + model="gpt-3.5-turbo", + messages=[ + { + "role": "user", + "content": "hello who are you" + } + ], + cache={"s-maxage": 60}, + ) +``` + + + + ## Cache Context Manager - Enable, Disable, Update Cache diff --git a/docs/my-website/docs/completion/drop_params.md b/docs/my-website/docs/completion/drop_params.md new file mode 100644 index 000000000..e79a88e14 --- /dev/null +++ b/docs/my-website/docs/completion/drop_params.md @@ -0,0 +1,110 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Drop Unsupported Params + +Drop unsupported OpenAI params by your LLM Provider. + +## Quick Start + +```python +import litellm +import os + +# set keys +os.environ["COHERE_API_KEY"] = "co-.." + +litellm.drop_params = True # 👈 KEY CHANGE + +response = litellm.completion( + model="command-r", + messages=[{"role": "user", "content": "Hey, how's it going?"}], + response_format={"key": "value"}, + ) +``` + + +LiteLLM maps all supported openai params by provider + model (e.g. function calling is supported by anthropic on bedrock but not titan). + +See `litellm.get_supported_openai_params("command-r")` [**Code**](https://github.com/BerriAI/litellm/blob/main/litellm/utils.py#L3584) + +If a provider/model doesn't support a particular param, you can drop it. + +## OpenAI Proxy Usage + +```yaml +litellm_settings: + drop_params: true +``` + +## Pass drop_params in `completion(..)` + +Just drop_params when calling specific models + + + + +```python +import litellm +import os + +# set keys +os.environ["COHERE_API_KEY"] = "co-.." + +response = litellm.completion( + model="command-r", + messages=[{"role": "user", "content": "Hey, how's it going?"}], + response_format={"key": "value"}, + drop_params=True + ) +``` + + + +```yaml +- litellm_params: + api_base: my-base + model: openai/my-model + drop_params: true # 👈 KEY CHANGE + model_name: my-model +``` + + + +## Specify params to drop + +To drop specific params when calling a provider (E.g. 'logit_bias' for vllm) + +Use `additional_drop_params` + + + + +```python +import litellm +import os + +# set keys +os.environ["COHERE_API_KEY"] = "co-.." + +response = litellm.completion( + model="command-r", + messages=[{"role": "user", "content": "Hey, how's it going?"}], + response_format={"key": "value"}, + additional_drop_params=["response_format"] + ) +``` + + + +```yaml +- litellm_params: + api_base: my-base + model: openai/my-model + additional_drop_params: ["response_format"] # 👈 KEY CHANGE + model_name: my-model +``` + + + +**additional_drop_params**: List or null - Is a list of openai params you want to drop when making a call to the model. \ No newline at end of file diff --git a/docs/my-website/docs/completion/function_call.md b/docs/my-website/docs/completion/function_call.md index 5daccf723..514e8cda1 100644 --- a/docs/my-website/docs/completion/function_call.md +++ b/docs/my-website/docs/completion/function_call.md @@ -502,10 +502,10 @@ response = completion(model="gpt-3.5-turbo-0613", messages=messages, functions=f print(response) ``` -## Function calling for Non-OpenAI LLMs +## Function calling for Models w/out function-calling support ### Adding Function to prompt -For Non OpenAI LLMs LiteLLM allows you to add the function to the prompt set: `litellm.add_function_to_prompt = True` +For Models/providers without function calling support, LiteLLM allows you to add the function to the prompt set: `litellm.add_function_to_prompt = True` #### Usage ```python diff --git a/docs/my-website/docs/completion/input.md b/docs/my-website/docs/completion/input.md index e844c541c..5e2bd6079 100644 --- a/docs/my-website/docs/completion/input.md +++ b/docs/my-website/docs/completion/input.md @@ -39,38 +39,38 @@ This is a list of openai params we translate across providers. Use `litellm.get_supported_openai_params()` for an updated list of params for each model + provider -| Provider | temperature | max_tokens | top_p | stream | stop | n | presence_penalty | frequency_penalty | functions | function_call | logit_bias | user | response_format | seed | tools | tool_choice | logprobs | top_logprobs | extra_headers | -|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|--| -|Anthropic| ✅ | ✅ | ✅ | ✅ | ✅ | | | | | | |✅ | ✅ | ✅ | ✅ | ✅ | | | ✅ -|OpenAI| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ | ✅ | -|Azure OpenAI| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ |✅ | ✅ | | | ✅ | +| Provider | temperature | max_tokens | top_p | stream | stream_options | stop | n | presence_penalty | frequency_penalty | functions | function_call | logit_bias | user | response_format | seed | tools | tool_choice | logprobs | top_logprobs | extra_headers | +|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---| +|Anthropic| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | | |✅ | ✅ | ✅ | ✅ | ✅ | | | ✅ | +|OpenAI| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ | ✅ | +|Azure OpenAI| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ |✅ | ✅ | | | ✅ | |Replicate | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | | -|Anyscale | ✅ | ✅ | ✅ | ✅ | +|Anyscale | ✅ | ✅ | ✅ | ✅ | ✅ | |Cohere| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | -|Huggingface| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | +|Huggingface| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | |Openrouter| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | ✅ | | | | | |AI21| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | -|VertexAI| ✅ | ✅ | | ✅ | | | | | | | | | | | ✅ | | | +|VertexAI| ✅ | ✅ | | ✅ | ✅ | | | | | | | | | ✅ | ✅ | | | |Bedrock| ✅ | ✅ | ✅ | ✅ | ✅ | | | | | | | | | | ✅ (for anthropic) | | -|Sagemaker| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | +|Sagemaker| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | |TogetherAI| ✅ | ✅ | ✅ | ✅ | ✅ | | | | | | ✅ | -|AlephAlpha| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | -|Palm| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | -|NLP Cloud| ✅ | ✅ | ✅ | ✅ | ✅ | | | | | | -|Petals| ✅ | ✅ | | ✅ | | | | | | | +|AlephAlpha| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | +|Palm| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | +|NLP Cloud| ✅ | ✅ | ✅ | ✅ | ✅ | | | | | | +|Petals| ✅ | ✅ | | ✅ | ✅ | | | | | | |Ollama| ✅ | ✅ | ✅ | ✅ | ✅ | | | ✅ | | | | | ✅ | | | |Databricks| ✅ | ✅ | ✅ | ✅ | ✅ | | | | | | | | | | | -|ClarifAI| ✅ | ✅ | | | | | | | | | | | | | | - +|ClarifAI| ✅ | ✅ | |✅ | ✅ | | | | | | | | | | | :::note By default, LiteLLM raises an exception if the openai param being passed in isn't supported. -To drop the param instead, set `litellm.drop_params = True`. +To drop the param instead, set `litellm.drop_params = True` or `completion(..drop_params=True)`. -**For function calling:** +This **ONLY DROPS UNSUPPORTED OPENAI PARAMS**. + +LiteLLM assumes any non-openai param is provider specific and passes it in as a kwarg in the request body -Add to prompt for non-openai models, set: `litellm.add_function_to_prompt = True`. ::: ## Input Params @@ -97,6 +97,7 @@ def completion( seed: Optional[int] = None, tools: Optional[List] = None, tool_choice: Optional[str] = None, + parallel_tool_calls: Optional[bool] = None, logprobs: Optional[bool] = None, top_logprobs: Optional[int] = None, deployment_id=None, @@ -166,10 +167,12 @@ def completion( - `function`: *object* - Required. -- `tool_choice`: *string or object (optional)* - Controls which (if any) function is called by the model. none means the model will not call a function and instead generates a message. auto means the model can pick between generating a message or calling a function. Specifying a particular function via {"type: "function", "function": {"name": "my_function"}} forces the model to call that function. +- `tool_choice`: *string or object (optional)* - Controls which (if any) function is called by the model. none means the model will not call a function and instead generates a message. auto means the model can pick between generating a message or calling a function. Specifying a particular function via `{"type: "function", "function": {"name": "my_function"}}` forces the model to call that function. - `none` is the default when no functions are present. `auto` is the default if functions are present. +- `parallel_tool_calls`: *boolean (optional)* - Whether to enable parallel function calling during tool use.. OpenAI default is true. + - `frequency_penalty`: *number or null (optional)* - It is used to penalize new tokens based on their frequency in the text so far. - `logit_bias`: *map (optional)* - Used to modify the probability of specific tokens appearing in the completion. diff --git a/docs/my-website/docs/completion/reliable_completions.md b/docs/my-website/docs/completion/reliable_completions.md index 2656f9a4f..94102e194 100644 --- a/docs/my-website/docs/completion/reliable_completions.md +++ b/docs/my-website/docs/completion/reliable_completions.md @@ -31,9 +31,15 @@ response = completion( ) ``` -## Fallbacks +## Fallbacks (SDK) -### Context Window Fallbacks +:::info + +[See how to do on PROXY](../proxy/reliability.md) + +::: + +### Context Window Fallbacks (SDK) ```python from litellm import completion @@ -43,7 +49,7 @@ messages = [{"content": "how does a court case get to the Supreme Court?" * 500, completion(model="gpt-3.5-turbo", messages=messages, context_window_fallback_dict=ctx_window_fallback_dict) ``` -### Fallbacks - Switch Models/API Keys/API Bases +### Fallbacks - Switch Models/API Keys/API Bases (SDK) LLM APIs can be unstable, completion() with fallbacks ensures you'll always get a response from your calls @@ -69,7 +75,7 @@ response = completion(model="azure/gpt-4", messages=messages, api_key=api_key, [Check out this section for implementation details](#fallbacks-1) -## Implementation Details +## Implementation Details (SDK) ### Fallbacks #### Output from calls diff --git a/docs/my-website/docs/completion/token_usage.md b/docs/my-website/docs/completion/token_usage.md index 807ccfd91..0bec6b3f9 100644 --- a/docs/my-website/docs/completion/token_usage.md +++ b/docs/my-website/docs/completion/token_usage.md @@ -1,7 +1,21 @@ # Completion Token Usage & Cost By default LiteLLM returns token usage in all completion requests ([See here](https://litellm.readthedocs.io/en/latest/output/)) -However, we also expose some helper functions + **[NEW]** an API to calculate token usage across providers: +LiteLLM returns `response_cost` in all calls. + +```python +from litellm import completion + +response = litellm.completion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hey, how's it going?"}], + mock_response="Hello world", + ) + +print(response._hidden_params["response_cost"]) +``` + +LiteLLM also exposes some helper functions: - `encode`: This encodes the text passed in, using the model-specific tokenizer. [**Jump to code**](#1-encode) @@ -23,7 +37,7 @@ However, we also expose some helper functions + **[NEW]** an API to calculate to - `api.litellm.ai`: Live token + price count across [all supported models](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json). [**Jump to code**](#10-apilitellmai) -📣 This is a community maintained list. Contributions are welcome! ❤️ +📣 [This is a community maintained list](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json). Contributions are welcome! ❤️ ## Example Usage diff --git a/docs/my-website/docs/completion/vision.md b/docs/my-website/docs/completion/vision.md index ea04b1e1e..69af03c98 100644 --- a/docs/my-website/docs/completion/vision.md +++ b/docs/my-website/docs/completion/vision.md @@ -39,7 +39,7 @@ Use `litellm.supports_vision(model="")` -> returns `True` if model supports `vis ```python assert litellm.supports_vision(model="gpt-4-vision-preview") == True -assert litellm.supports_vision(model="gemini-1.0-pro-visionn") == True +assert litellm.supports_vision(model="gemini-1.0-pro-vision") == True assert litellm.supports_vision(model="gpt-3.5-turbo") == False ``` diff --git a/docs/my-website/docs/debugging/hosted_debugging.md b/docs/my-website/docs/debugging/hosted_debugging.md index 5c98ac6f5..e69de29bb 100644 --- a/docs/my-website/docs/debugging/hosted_debugging.md +++ b/docs/my-website/docs/debugging/hosted_debugging.md @@ -1,90 +0,0 @@ -import Image from '@theme/IdealImage'; -import QueryParamReader from '../../src/components/queryParamReader.js' - -# [Beta] Monitor Logs in Production - -:::note - -This is in beta. Expect frequent updates, as we improve based on your feedback. - -::: - -LiteLLM provides an integration to let you monitor logs in production. - -👉 Jump to our sample LiteLLM Dashboard: https://admin.litellm.ai/ - - -Dashboard - -## Debug your first logs - - Open In Colab - - - -### 1. Get your LiteLLM Token - -Go to [admin.litellm.ai](https://admin.litellm.ai/) and copy the code snippet with your unique token - -Usage - -### 2. Set up your environment - -**Add it to your .env** - -```python -import os - -os.env["LITELLM_TOKEN"] = "e24c4c06-d027-4c30-9e78-18bc3a50aebb" # replace with your unique token - -``` - -**Turn on LiteLLM Client** -```python -import litellm -litellm.client = True -``` - -### 3. Make a normal `completion()` call -```python -import litellm -from litellm import completion -import os - -# set env variables -os.environ["LITELLM_TOKEN"] = "e24c4c06-d027-4c30-9e78-18bc3a50aebb" # replace with your unique token -os.environ["OPENAI_API_KEY"] = "openai key" - -litellm.use_client = True # enable logging dashboard -messages = [{ "content": "Hello, how are you?","role": "user"}] - -# openai call -response = completion(model="gpt-3.5-turbo", messages=messages) -``` - -Your `completion()` call print with a link to your session dashboard (https://admin.litellm.ai/) - -In the above case it would be: [`admin.litellm.ai/e24c4c06-d027-4c30-9e78-18bc3a50aebb`](https://admin.litellm.ai/e24c4c06-d027-4c30-9e78-18bc3a50aebb) - -Click on your personal dashboard link. Here's how you can find it 👇 - -Dashboard - -[👋 Tell us if you need better privacy controls](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version?month=2023-08) - -### 3. Review request log - -Oh! Looks like our request was made successfully. Let's click on it and see exactly what got sent to the LLM provider. - - - - -Ah! So we can see that this request was made to a **Baseten** (see litellm_params > custom_llm_provider) for a model with ID - **7qQNLDB** (see model). The message sent was - `"Hey, how's it going?"` and the response received was - `"As an AI language model, I don't have feelings or emotions, but I can assist you with your queries. How can I assist you today?"` - -Dashboard Log Row - -:::info - -🎉 Congratulations! You've successfully debugger your first log! - -::: \ No newline at end of file diff --git a/docs/my-website/docs/enterprise.md b/docs/my-website/docs/enterprise.md index 3dc4cb0e2..7035b25ce 100644 --- a/docs/my-website/docs/enterprise.md +++ b/docs/my-website/docs/enterprise.md @@ -2,38 +2,62 @@ For companies that need SSO, user management and professional support for LiteLLM Proxy :::info - +Interested in Enterprise? Schedule a meeting with us here 👉 [Talk to founders](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat) ::: -This covers: -- ✅ **Features under the [LiteLLM Commercial License (Content Mod, Custom Tags, etc.)](https://docs.litellm.ai/docs/proxy/enterprise)** -- ✅ [**Secure UI access with Single Sign-On**](../docs/proxy/ui.md#setup-ssoauth-for-ui) -- ✅ [**JWT-Auth**](../docs/proxy/token_auth.md) -- ✅ [**Prompt Injection Detection**](#prompt-injection-detection-lakeraai) -- ✅ [**Invite Team Members to access `/spend` Routes**](../docs/proxy/cost_tracking#allowing-non-proxy-admins-to-access-spend-endpoints) -- ✅ **Feature Prioritization** -- ✅ **Custom Integrations** -- ✅ **Professional Support - Dedicated discord + slack** -- ✅ **Custom SLAs** - - -## [COMING SOON] AWS Marketplace Support - Deploy managed LiteLLM Proxy within your VPC. Includes all enterprise features. +[**View AWS Marketplace Listing**](https://aws.amazon.com/marketplace/pp/prodview-gdm3gswgjhgjo?sr=0-1&ref_=beagle&applicationId=AWSMPContessa) + [**Get early access**](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat) + +This covers: +- **Enterprise Features** + - **Security** + - ✅ [SSO for Admin UI](./proxy/ui#✨-enterprise-features) + - ✅ [Audit Logs with retention policy](./proxy/enterprise#audit-logs) + - ✅ [JWT-Auth](../docs/proxy/token_auth.md) + - ✅ [Control available public, private routes](./proxy/enterprise#control-available-public-private-routes) + - ✅ [[BETA] AWS Key Manager v2 - Key Decryption](./proxy/enterprise#beta-aws-key-manager---key-decryption) + - ✅ Track Request IP Address + - ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](./proxy/pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints) + - ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](./proxy/enterprise#enforce-required-params-for-llm-requests) + - **Spend Tracking** + - ✅ [Tracking Spend for Custom Tags](./proxy/enterprise#tracking-spend-for-custom-tags) + - ✅ [API Endpoints to get Spend Reports per Team, API Key, Customer](./proxy/cost_tracking.md#✨-enterprise-api-endpoints-to-get-spend) + - **Advanced Metrics** + - ✅ [`x-ratelimit-remaining-requests`, `x-ratelimit-remaining-tokens` for LLM APIs on Prometheus](./proxy/prometheus#✨-enterprise-llm-remaining-requests-and-remaining-tokens) + - **Guardrails, PII Masking, Content Moderation** + - ✅ [Content Moderation with LLM Guard, LlamaGuard, Secret Detection, Google Text Moderations](./proxy/enterprise#content-moderation) + - ✅ [Prompt Injection Detection (with LakeraAI API)](./proxy/enterprise#prompt-injection-detection---lakeraai) + - ✅ Reject calls from Blocked User list + - ✅ Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors) + - **Custom Branding** + - ✅ [Custom Branding + Routes on Swagger Docs](./proxy/enterprise#swagger-docs---custom-routes--branding) + - ✅ [Public Model Hub](../docs/proxy/enterprise.md#public-model-hub) + - ✅ [Custom Email Branding](../docs/proxy/email.md#customizing-email-branding) +- ✅ **Feature Prioritization** +- ✅ **Custom Integrations** +- ✅ **Professional Support - Dedicated discord + slack** + + + ## Frequently Asked Questions ### What topics does Professional support cover and what SLAs do you offer? Professional Support can assist with LLM/Provider integrations, deployment, upgrade management, and LLM Provider troubleshooting. We can’t solve your own infrastructure-related issues but we will guide you to fix them. -We offer custom SLAs based on your needs and the severity of the issue. The standard SLA is 6 hours for Sev0-Sev1 severity and 24h for Sev2-Sev3 between 7am – 7pm PT (Monday through Saturday). +- 1 hour for Sev0 issues +- 6 hours for Sev1 +- 24h for Sev2-Sev3 between 7am – 7pm PT (Monday through Saturday) + +**We can offer custom SLAs** based on your needs and the severity of the issue ### What’s the cost of the Self-Managed Enterprise edition? diff --git a/docs/my-website/docs/image_generation.md b/docs/my-website/docs/image_generation.md index 7bb4d2c99..10b5b5e68 100644 --- a/docs/my-website/docs/image_generation.md +++ b/docs/my-website/docs/image_generation.md @@ -51,7 +51,7 @@ print(f"response: {response}") - `api_base`: *string (optional)* - The api endpoint you want to call the model with -- `api_version`: *string (optional)* - (Azure-specific) the api version for the call +- `api_version`: *string (optional)* - (Azure-specific) the api version for the call; required for dall-e-3 on Azure - `api_key`: *string (optional)* - The API key to authenticate and authorize requests. If not provided, the default API key is used. @@ -166,4 +166,4 @@ response = litellm.image_generation( vertex_ai_location="us-central1", ) print(f"response: {response}") -``` \ No newline at end of file +``` diff --git a/docs/my-website/docs/observability/athina_integration.md b/docs/my-website/docs/observability/athina_integration.md index 62c889751..cd1442f35 100644 --- a/docs/my-website/docs/observability/athina_integration.md +++ b/docs/my-website/docs/observability/athina_integration.md @@ -2,6 +2,15 @@ import Image from '@theme/IdealImage'; # Athina + +:::tip + +This is community maintained, Please make an issue if you run into a bug +https://github.com/BerriAI/litellm + +::: + + [Athina](https://athina.ai/) is an evaluation framework and production monitoring platform for your LLM-powered app. Athina is designed to enhance the performance and reliability of AI applications through real-time monitoring, granular analytics, and plug-and-play evaluations. diff --git a/docs/my-website/docs/observability/custom_callback.md b/docs/my-website/docs/observability/custom_callback.md index 316822227..373b4a96c 100644 --- a/docs/my-website/docs/observability/custom_callback.md +++ b/docs/my-website/docs/observability/custom_callback.md @@ -38,7 +38,7 @@ class MyCustomHandler(CustomLogger): print(f"On Async Success") async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): - print(f"On Async Success") + print(f"On Async Failure") customHandler = MyCustomHandler() diff --git a/docs/my-website/docs/observability/greenscale_integration.md b/docs/my-website/docs/observability/greenscale_integration.md index 0dd673226..49eadc645 100644 --- a/docs/my-website/docs/observability/greenscale_integration.md +++ b/docs/my-website/docs/observability/greenscale_integration.md @@ -1,5 +1,14 @@ # Greenscale - Track LLM Spend and Responsible Usage + +:::tip + +This is community maintained, Please make an issue if you run into a bug +https://github.com/BerriAI/litellm + +::: + + [Greenscale](https://greenscale.ai/) is a production monitoring platform for your LLM-powered app that provides you granular key insights into your GenAI spending and responsible usage. Greenscale only captures metadata to minimize the exposure risk of personally identifiable information (PII). ## Getting Started diff --git a/docs/my-website/docs/observability/helicone_integration.md b/docs/my-website/docs/observability/helicone_integration.md index de89ba8da..f7fd330c3 100644 --- a/docs/my-website/docs/observability/helicone_integration.md +++ b/docs/my-website/docs/observability/helicone_integration.md @@ -1,4 +1,13 @@ # Helicone Tutorial + +:::tip + +This is community maintained, Please make an issue if you run into a bug +https://github.com/BerriAI/litellm + +::: + + [Helicone](https://helicone.ai/) is an open source observability platform that proxies your OpenAI traffic and provides you key insights into your spend, latency and usage. ## Use Helicone to log requests across all LLM Providers (OpenAI, Azure, Anthropic, Cohere, Replicate, PaLM) diff --git a/docs/my-website/docs/observability/langfuse_integration.md b/docs/my-website/docs/observability/langfuse_integration.md index 6dd5377ea..9703d38a0 100644 --- a/docs/my-website/docs/observability/langfuse_integration.md +++ b/docs/my-website/docs/observability/langfuse_integration.md @@ -1,6 +1,6 @@ import Image from '@theme/IdealImage'; -# Langfuse - Logging LLM Input/Output +# 🔥 Langfuse - Logging LLM Input/Output LangFuse is open Source Observability & Analytics for LLM Apps Detailed production traces and a granular view on quality, cost and latency @@ -122,10 +122,12 @@ response = completion( metadata={ "generation_name": "ishaan-test-generation", # set langfuse Generation Name "generation_id": "gen-id22", # set langfuse Generation ID + "parent_observation_id": "obs-id9" # set langfuse Parent Observation ID "version": "test-generation-version" # set langfuse Generation Version "trace_user_id": "user-id2", # set langfuse Trace User ID "session_id": "session-1", # set langfuse Session ID "tags": ["tag1", "tag2"], # set langfuse Tags + "trace_name": "new-trace-name" # set langfuse Trace Name "trace_id": "trace-id22", # set langfuse Trace ID "trace_metadata": {"key": "value"}, # set langfuse Trace Metadata "trace_version": "test-trace-version", # set langfuse Trace Version (if not set, defaults to Generation Version) @@ -144,6 +146,27 @@ print(response) ``` +You can also pass `metadata` as part of the request header with a `langfuse_*` prefix: + +```shell +curl --location --request POST 'http://0.0.0.0:4000/chat/completions' \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer sk-1234' \ + --header 'langfuse_trace_id: trace-id2' \ + --header 'langfuse_trace_user_id: user-id2' \ + --header 'langfuse_trace_metadata: {"key":"value"}' \ + --data '{ + "model": "gpt-3.5-turbo", + "messages": [ + { + "role": "user", + "content": "what llm are you" + } + ] +}' +``` + + ### Trace & Generation Parameters #### Trace Specific Parameters @@ -170,9 +193,10 @@ The following parameters can be updated on a continuation of a trace by passing #### Generation Specific Parameters -* `generation_id` - Identifier for the generation, auto-generated by default -* `generation_name` - Identifier for the generation, auto-generated by default -* `prompt` - Langfuse prompt object used for the generation, defaults to None +* `generation_id` - Identifier for the generation, auto-generated by default +* `generation_name` - Identifier for the generation, auto-generated by default +* `parent_observation_id` - Identifier for the parent observation, defaults to `None` +* `prompt` - Langfuse prompt object used for the generation, defaults to `None` Any other key value pairs passed into the metadata not listed in the above spec for a `litellm` completion will be added as a metadata key value pair for the generation. diff --git a/docs/my-website/docs/observability/langsmith_integration.md b/docs/my-website/docs/observability/langsmith_integration.md index b115866d5..c038abd82 100644 --- a/docs/my-website/docs/observability/langsmith_integration.md +++ b/docs/my-website/docs/observability/langsmith_integration.md @@ -1,6 +1,16 @@ import Image from '@theme/IdealImage'; # Langsmith - Logging LLM Input/Output + + +:::tip + +This is community maintained, Please make an issue if you run into a bug +https://github.com/BerriAI/litellm + +::: + + An all-in-one developer platform for every step of the application lifecycle https://smith.langchain.com/ diff --git a/docs/my-website/docs/observability/logfire_integration.md b/docs/my-website/docs/observability/logfire_integration.md index c1f425f42..a2d406f9c 100644 --- a/docs/my-website/docs/observability/logfire_integration.md +++ b/docs/my-website/docs/observability/logfire_integration.md @@ -1,6 +1,6 @@ import Image from '@theme/IdealImage'; -# Logfire - Logging LLM Input/Output +# 🔥 Logfire - Logging LLM Input/Output Logfire is open Source Observability & Analytics for LLM Apps Detailed production traces and a granular view on quality, cost and latency @@ -14,10 +14,14 @@ join our [discord](https://discord.gg/wuPM9dRgDw) ## Pre-Requisites -Ensure you have run `pip install logfire` for this integration +Ensure you have installed the following packages to use this integration ```shell -pip install logfire litellm +pip install litellm + +pip install opentelemetry-api==1.25.0 +pip install opentelemetry-sdk==1.25.0 +pip install opentelemetry-exporter-otlp==1.25.0 ``` ## Quick Start @@ -25,8 +29,7 @@ pip install logfire litellm Get your Logfire token from [Logfire](https://logfire.pydantic.dev/) ```python -litellm.success_callback = ["logfire"] -litellm.failure_callback = ["logfire"] # logs errors to logfire +litellm.callbacks = ["logfire"] ``` ```python diff --git a/docs/my-website/docs/observability/lunary_integration.md b/docs/my-website/docs/observability/lunary_integration.md index 9b8e90df7..56e74132f 100644 --- a/docs/my-website/docs/observability/lunary_integration.md +++ b/docs/my-website/docs/observability/lunary_integration.md @@ -1,5 +1,13 @@ # Lunary - Logging and tracing LLM input/output +:::tip + +This is community maintained, Please make an issue if you run into a bug +https://github.com/BerriAI/litellm + +::: + + [Lunary](https://lunary.ai/) is an open-source AI developer platform providing observability, prompt management, and evaluation tools for AI developers.