diff --git a/litellm/main.py b/litellm/main.py
index f85b2d3aa6..bb7da6352e 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -431,7 +431,37 @@ def completion(
       generator = get_ollama_response_stream(endpoint, model, prompt)
       # assume all responses are streamed
       return generator
-    else: 
+    elif custom_llm_provider == "petals":
+      install_and_import("transformers")
+      from transformers import AutoTokenizer
+      from petals import AutoDistributedModelForCausalLM
+
+      tokenizer = AutoTokenizer.from_pretrained(model)
+      model = AutoDistributedModelForCausalLM.from_pretrained(model)
+
+      print("got model", model)
+      # Embeddings & prompts are on your device, transformer blocks are distributed across the Internet
+
+      inputs = tokenizer(prompt, return_tensors="pt")["input_ids"]
+
+      outputs = model.generate(
+            inputs=inputs,
+            temperature=1.0
+        )
+      
+      print("got output", outputs)
+      completion_response = tokenizer.decode(outputs[0])
+
+      print("got output text", completion_response)
+      ## LOGGING
+      logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
+
+      ## RESPONSE OBJECT
+      model_response["choices"][0]["message"]["content"] = completion_response
+      model_response["created"] = time.time()
+      model_response["model"] = model
+      response = model_response
+    else:
       ## LOGGING
       logging(model=model, input=messages, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
       args = locals()
diff --git a/pyproject.toml b/pyproject.toml
index 1d0ed98bed..b0ea10b80a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.392"
+version = "0.1.393"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"