Compare commits

...

3 Commits

Author SHA1 Message Date
Jimmy e39c77807c Cast to float 2022-11-17 19:37:16 +13:00
Jimmy 5742eaf275 Add local vosk 2022-11-17 19:36:41 +13:00
Jimmy f27a46bf32 Update README 2022-11-17 19:34:42 +13:00
3 changed files with 133 additions and 6 deletions

View File

@ -2,12 +2,15 @@
## Pi Setup
https://forums.raspberrypi.com/viewtopic.php?t=330358
https://learn.adafruit.com/adafruit-i2s-mems-microphone-breakout/raspberry-pi-wiring-test
https://github.com/alphacep/vosk-server
``` sudo apt install python3-pip git python3-pyaudio vlc```
``` sudo pip3 install websockets sounddevice```
```sudo apt install python3-pip git python3-pyaudio vlc```
```sudo pip3 install sounddevice aiohttp aiofiles python-vlc termcolor vosk websockets```
```sudo docker-compose up -d vosk```
@ -28,4 +31,4 @@ sudo apt-get update
sudo apt-get install -y nvidia-docker2
```
```sudo docker-compose up -d --build imageserver```
```sudo docker-compose up -d --build imageserver```

124
pi/local.py Normal file
View File

@ -0,0 +1,124 @@
#!/usr/bin/env python3
import argparse
import queue
import sys
import json
import asyncio
import sounddevice as sd
from termcolor import colored
import aiohttp
import aiofiles
import vlc
from time import sleep
from vosk import Model, KaldiRecognizer
q = queue.Queue()
def int_or_str(text):
"""Helper function for argument parsing."""
try:
return int(text)
except ValueError:
return text
def callback(indata, frames, time, status):
"""This is called (from a separate thread) for each audio block."""
if status:
print(status, file=sys.stderr)
q.put(bytes(indata))
async def main():
vlc_instance = vlc.Instance()
player = vlc_instance.media_player_new()
media = vlc_instance.media_new("image.png")
player.set_media(media)
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument(
"-l", "--list-devices", action="store_true",
help="show list of audio devices and exit")
args, remaining = parser.parse_known_args()
if args.list_devices:
print(sd.query_devices())
parser.exit(0)
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=[parser])
parser.add_argument(
"-f", "--filename", type=str, metavar="FILENAME",
help="audio file to store recording to")
parser.add_argument(
"-d", "--device", type=int_or_str,
help="input device (numeric ID or substring)")
parser.add_argument(
"-r", "--samplerate", type=int, help="sampling rate")
args = parser.parse_args(remaining)
try:
if args.samplerate is None:
device_info = sd.query_devices(args.device, "input")
# soundfile expects an int, sounddevice provides a float:
args.samplerate = int(device_info["default_samplerate"])
model = Model(lang="en-us")
if args.filename:
dump_fn = open(args.filename, "wb")
else:
dump_fn = None
with sd.RawInputStream(samplerate=args.samplerate, blocksize = 8000, device=args.device,
dtype="int16", channels=1, callback=callback):
print("#" * 80)
print("Press Ctrl+C to stop the recording")
print("#" * 80)
rec = KaldiRecognizer(model, args.samplerate)
while True:
data = q.get()
if rec.AcceptWaveform(data):
print(rec.Result())
j = json.loads(rec.Result())
if "text" in j and "result" in j:
n = 0
for word in j["result"]:
n += float(word["conf"])
if float(word["conf"]) > 0.7:
print(colored(word["word"], "green"), end=" ")
elif float(word["conf"]) > 0.5:
print(colored(word["word"], "yellow"), end=" ")
else:
print(colored(word["word"], "red"), end=" ")
print(n/len(j["result"]))
print("Generating Image")
if len(j["result"]) > 2:
async with aiohttp.ClientSession() as session:
url = f'http://192.168.1.95:8000?text={j["text"].replace(" ", "+")}'
async with session.get(url) as resp:
print(resp.status)
if resp.status == 200:
f = await aiofiles.open('image.png', mode='wb')
await f.write(await resp.read())
await f.close()
print("Image generated")
player.stop()
player.play()
sleep(1)
player.pause()
# else:
# print(rec.PartialResult())
if dump_fn is not None:
dump_fn.write(data)
except KeyboardInterrupt:
print("\nDone")
parser.exit(0)
except Exception as e:
parser.exit(type(e).__name__ + ": " + str(e))
if __name__ == '__main__':
asyncio.run(main())

View File

@ -43,10 +43,10 @@ async def run_test():
if "text" in j and "result" in j:
n = 0
for word in j["result"]:
n += int(word["conf"])
if int(word["conf"]) > 0.7:
n += float(word["conf"])
if float(word["conf"]) > 0.7:
print(colored(word["word"], "green"), end=" ")
elif int(word["conf"]) > 0.7:
elif float(word["conf"]) > 0.5:
print(colored(word["word"], "yellow"), end=" ")
else:
print(colored(word["word"], "red"), end=" ")