diff --git a/litellm/llms/petals.py b/litellm/llms/petals.py
index 45b1a9d9f..666602b2d 100644
--- a/litellm/llms/petals.py
+++ b/litellm/llms/petals.py
@@ -39,7 +39,6 @@ def completion(
 
     tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False, add_bos_token=False)
     model_obj = AutoDistributedModelForCausalLM.from_pretrained(model)
-    model_obj = model_obj.cuda()
 
     prompt = ""
     for message in messages:
@@ -64,7 +63,7 @@ def completion(
         )
     
     ## COMPLETION CALL
-    inputs = tokenizer(prompt, return_tensors="pt")["input_ids"].cuda()
+    inputs = tokenizer(prompt, return_tensors="pt")["input_ids"]
     
     # optional params: max_new_tokens=1,temperature=0.9, top_p=0.6
     outputs = model_obj.generate(inputs, **optional_params)
diff --git a/pyproject.toml b/pyproject.toml
index 857a46ad0..0422b52fc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.714"
+version = "0.1.715"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"