speechtoimage/imageserver/main.py

from multiprocessing import context
from httplib2 import Response
import torch
import uuid
import os
from diffusers import StableDiffusionPipeline
from dotenv import load_dotenv
from os import getenv
from fastapi import FastAPI, Response
from pydantic import BaseModel
import io

load_dotenv()


pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", revision="fp16", torch_dtype=torch.float16, use_auth_token=getenv("TOKEN"))
pipe.to("cuda")

class Text(BaseModel):
    text: str

app = FastAPI()


@app.get("/",
    responses = {
        200: {
            "content": {"image/png": {}}
        }
    },
    response_class=Response
)
async def root(text: Text):
    # get your token at https://huggingface.co/settings/tokens

    prompt = text.text
    print(prompt)
    image = pipe(prompt).images[0]

#    print(image)

    # image = Image.new('RGB', (1000, 1000), (100,200,10))
    imgByteArr = io.BytesIO()
    # image.save expects a file as a argument, passing a bytes io ins
    image.save(imgByteArr, format="PNG")
    # Turn the BytesIO object back into a bytes object
    imgByteArr = imgByteArr.getvalue()
    # media_type here sets the media type of the actual response sent to the client.
    return Response(content=imgByteArr, media_type="image/png")