speechtoimage/pi/test_microphone.py

104 lines
3.9 KiB
Python
Raw Permalink Normal View History

2022-10-17 09:36:48 +00:00
#!/usr/bin/env python3
import json
import asyncio
import websockets
import logging
import sounddevice as sd
import argparse
2022-11-01 00:22:04 +00:00
from termcolor import colored
import aiohttp
import aiofiles
import vlc
from time import sleep
2022-10-17 09:36:48 +00:00
def int_or_str(text):
"""Helper function for argument parsing."""
try:
return int(text)
except ValueError:
return text
def callback(indata, frames, time, status):
"""This is called (from a separate thread) for each audio block."""
loop.call_soon_threadsafe(audio_queue.put_nowait, bytes(indata))
async def run_test():
2022-11-01 00:22:04 +00:00
vlc_instance = vlc.Instance()
player = vlc_instance.media_player_new()
media = vlc_instance.media_new("image.png")
player.set_media(media)
2022-10-17 09:36:48 +00:00
with sd.RawInputStream(samplerate=args.samplerate, blocksize = 4000, device=args.device, dtype='int16',
channels=1, callback=callback) as device:
async with websockets.connect(args.uri) as websocket:
await websocket.send('{ "config" : { "sample_rate" : %d } }' % (device.samplerate))
while True:
data = await audio_queue.get()
await websocket.send(data)
result = await websocket.recv()
j = json.loads(result)
if "text" in j and "result" in j:
n = 0
for word in j["result"]:
2022-11-17 06:37:16 +00:00
n += float(word["conf"])
if float(word["conf"]) > 0.7:
2022-11-01 00:22:04 +00:00
print(colored(word["word"], "green"), end=" ")
2022-11-17 06:37:16 +00:00
elif float(word["conf"]) > 0.5:
2022-11-01 00:22:04 +00:00
print(colored(word["word"], "yellow"), end=" ")
else:
print(colored(word["word"], "red"), end=" ")
2022-10-17 09:36:48 +00:00
print(n/len(j["result"]))
2022-11-01 00:22:04 +00:00
print("Generating Image")
if len(j["result"]) > 2:
async with aiohttp.ClientSession() as session:
url = f'http://192.168.1.95:8000?text={j["text"].replace(" ", "+")}'
async with session.get(url) as resp:
print(resp.status)
if resp.status == 200:
f = await aiofiles.open('image.png', mode='wb')
await f.write(await resp.read())
await f.close()
print("Image generated")
player.stop()
player.play()
sleep(1)
player.pause()
2022-10-17 09:36:48 +00:00
await websocket.send('{"eof" : 1}')
print (await websocket.recv())
2022-11-01 00:22:04 +00:00
async def main():
2022-10-17 09:36:48 +00:00
global args
global loop
global audio_queue
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('-l', '--list-devices', action='store_true',
help='show list of audio devices and exit')
args, remaining = parser.parse_known_args()
if args.list_devices:
print(sd.query_devices())
parser.exit(0)
parser = argparse.ArgumentParser(description="ASR Server",
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=[parser])
parser.add_argument('-u', '--uri', type=str, metavar='URL',
2022-11-01 00:22:04 +00:00
help='Server URL', default='ws://192.168.1.95:2700')
2022-10-17 09:36:48 +00:00
parser.add_argument('-d', '--device', type=int_or_str,
help='input device (numeric ID or substring)')
parser.add_argument('-r', '--samplerate', type=int, help='sampling rate', default=16000)
args = parser.parse_args(remaining)
loop = asyncio.get_running_loop()
audio_queue = asyncio.Queue()
logging.basicConfig(level=logging.INFO)
await run_test()
if __name__ == '__main__':
asyncio.run(main())