Compare commits

...

13 Commits

Author SHA1 Message Date
018163fae8 Save images locally. Add manual seed 2022-11-01 14:00:35 +13:00
2f91297ff9 Expose ports, Add volumes 2022-11-01 13:59:54 +13:00
80097d995d Don't use gpui 2022-11-01 13:59:10 +13:00
76813d86ec Ignore images folder 2022-11-01 13:55:21 +13:00
294ff694cb Use gpu for vosk 2022-10-25 22:15:01 +13:00
3a63a37777 Add httplib2 2022-10-24 18:25:57 +13:00
f76f62e53d Add error handling 2022-10-24 18:25:33 +13:00
39f104288b Add headings. Add docker-compose commands 2022-10-21 04:59:20 +00:00
34e7a96e48 Merge pull request 'pi' (#1) from pi into master
Reviewed-on: jimmy/speechtoimage#1
2022-10-21 04:55:51 +00:00
abc34b8d37 Ignore image.png 2022-10-21 17:43:35 +13:00
1571d31d31 Working with fastapi 2022-10-21 17:42:59 +13:00
f03d90f378 Add fastapi. Cache models 2022-10-21 17:42:28 +13:00
5968ea305a Add vosk server. Expose port 2022-10-21 17:41:33 +13:00
5 changed files with 74 additions and 12 deletions

4
.gitignore vendored
View File

@@ -1 +1,3 @@
.env
.env
image.png
images/

View File

@@ -1,5 +1,7 @@
# Speech to Image
## Pi Setup
https://learn.adafruit.com/adafruit-i2s-mems-microphone-breakout/raspberry-pi-wiring-test
https://github.com/alphacep/vosk-server
@@ -7,8 +9,9 @@ https://github.com/alphacep/vosk-server
``` sudo apt install python3-pip git python3-pyaudio vlc```
``` sudo pip3 install websockets sounddevice```
```sudo docker-compose up -d vosk```
## Image Server
https://huggingface.co/CompVis/stable-diffusion-v1-4
@@ -23,4 +26,6 @@ distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
```
sudo apt-get update
sudo apt-get install -y nvidia-docker2
```
```
```sudo docker-compose up -d --build imageserver```

View File

@@ -5,6 +5,19 @@ services:
image: imageserver
build: imageserver
runtime: nvidia
command: python3 /main.py
ports:
- 8000:8000
env_file:
- .env
- .env
volumes:
- ./images:/images
- ./notebooks:/notebooks
- ./models:/root/.cache/huggingface/diffusers/
ports:
- 8888:8888
- 8000:8000
vosk:
image: alphacep/kaldi-en
ports:
- 2700:2700

View File

@@ -1,8 +1,14 @@
FROM nvidia/cuda:11.6.0-base-ubuntu20.04
RUN apt-get update && apt-get install python3 python3-pip -y
RUN pip3 install --upgrade diffusers transformers scipy python-dotenv cuda-python && \
RUN pip3 install --upgrade diffusers transformers scipy python-dotenv cuda-python fastapi uvicorn httplib2 && \
pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu117 && \
pip install torch==1.11.0+cu115 torchvision==0.12.0+cu115 torchaudio==0.11.0+cu115 -f https://download.pytorch.org/whl/torch_stable.html
COPY main.py /main.py
WORKDIR /app
COPY main.py /app/main.py
VOLUME /root/.cache/huggingface/diffusers/
CMD [ "uvicorn", "main:app", "--host", "0.0.0.0" ]

View File

@@ -1,19 +1,55 @@
from multiprocessing import context
from httplib2 import Response
import torch
import uuid
import os
from diffusers import StableDiffusionPipeline
from dotenv import load_dotenv
from os import getenv
from fastapi import FastAPI, Response, HTTPException
from pydantic import BaseModel
import io
from PIL.PngImagePlugin import PngInfo
load_dotenv()
# get your token at https://huggingface.co/settings/tokens
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", revision="fp16", torch_dtype=torch.float16, use_auth_token=getenv("TOKEN"))
pipe.to("cuda")
prompt = "metal buttons are often soldiers who just got out of high school or a couple of years graduated from college easy as an air conditioned box about radar the patriot radar known as the a n n e e e pi this is an extremely powerful radar unit so powerful that they actually"
class Text(BaseModel):
text: str
for _ in range(10):
image = pipe(prompt)["sample"][0]
image.save(f"{uuid.uuid4()}.png".replace(" ", "_"))
app = FastAPI()
@app.get("/",
responses = {
200: {
"content": {"image/png": {}}
}
},
response_class=Response
)
def root(text: str):
prompt = text.replace('+', ' ')
print(prompt)
try:
generator = torch.Generator("cuda").manual_seed(1024)
resp = pipe(prompt)
print(resp)
image = resp.images[0]
except RuntimeError as e:
raise HTTPException(status_code=202, detail="Busy")
except:
raise HTTPException(status_code=504)
metadata = PngInfo()
metadata.add_text("text", prompt)
image.save(f'/images/{str(uuid.uuid4())}.png', pnginfo=metadata)
imgByteArr = io.BytesIO()
image.save(imgByteArr, format="PNG")
imgByteArr = imgByteArr.getvalue()
running = False
return Response(content=imgByteArr, media_type="image/png")