speechtoimage/imageserver/main.py

50 lines
1.3 KiB
Python

from multiprocessing import context
from httplib2 import Response
import torch
import uuid
import os
from diffusers import StableDiffusionPipeline
from dotenv import load_dotenv
from os import getenv
from fastapi import FastAPI, Response
from pydantic import BaseModel
import io
load_dotenv()
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", revision="fp16", torch_dtype=torch.float16, use_auth_token=getenv("TOKEN"))
pipe.to("cuda")
class Text(BaseModel):
text: str
app = FastAPI()
@app.get("/",
responses = {
200: {
"content": {"image/png": {}}
}
},
response_class=Response
)
async def root(text: Text):
# get your token at https://huggingface.co/settings/tokens
prompt = text.text
print(prompt)
image = pipe(prompt).images[0]
# print(image)
# image = Image.new('RGB', (1000, 1000), (100,200,10))
imgByteArr = io.BytesIO()
# image.save expects a file as a argument, passing a bytes io ins
image.save(imgByteArr, format="PNG")
# Turn the BytesIO object back into a bytes object
imgByteArr = imgByteArr.getvalue()
# media_type here sets the media type of the actual response sent to the client.
return Response(content=imgByteArr, media_type="image/png")