speechtoimage/pi/test_microphone.py

104 lines
3.9 KiB
Python
Executable File

#!/usr/bin/env python3
import json
import asyncio
import websockets
import logging
import sounddevice as sd
import argparse
from termcolor import colored
import aiohttp
import aiofiles
import vlc
from time import sleep
def int_or_str(text):
"""Helper function for argument parsing."""
try:
return int(text)
except ValueError:
return text
def callback(indata, frames, time, status):
"""This is called (from a separate thread) for each audio block."""
loop.call_soon_threadsafe(audio_queue.put_nowait, bytes(indata))
async def run_test():
vlc_instance = vlc.Instance()
player = vlc_instance.media_player_new()
media = vlc_instance.media_new("image.png")
player.set_media(media)
with sd.RawInputStream(samplerate=args.samplerate, blocksize = 4000, device=args.device, dtype='int16',
channels=1, callback=callback) as device:
async with websockets.connect(args.uri) as websocket:
await websocket.send('{ "config" : { "sample_rate" : %d } }' % (device.samplerate))
while True:
data = await audio_queue.get()
await websocket.send(data)
result = await websocket.recv()
j = json.loads(result)
if "text" in j and "result" in j:
n = 0
for word in j["result"]:
n += float(word["conf"])
if float(word["conf"]) > 0.7:
print(colored(word["word"], "green"), end=" ")
elif float(word["conf"]) > 0.5:
print(colored(word["word"], "yellow"), end=" ")
else:
print(colored(word["word"], "red"), end=" ")
print(n/len(j["result"]))
print("Generating Image")
if len(j["result"]) > 2:
async with aiohttp.ClientSession() as session:
url = f'http://192.168.1.95:8000?text={j["text"].replace(" ", "+")}'
async with session.get(url) as resp:
print(resp.status)
if resp.status == 200:
f = await aiofiles.open('image.png', mode='wb')
await f.write(await resp.read())
await f.close()
print("Image generated")
player.stop()
player.play()
sleep(1)
player.pause()
await websocket.send('{"eof" : 1}')
print (await websocket.recv())
async def main():
global args
global loop
global audio_queue
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('-l', '--list-devices', action='store_true',
help='show list of audio devices and exit')
args, remaining = parser.parse_known_args()
if args.list_devices:
print(sd.query_devices())
parser.exit(0)
parser = argparse.ArgumentParser(description="ASR Server",
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=[parser])
parser.add_argument('-u', '--uri', type=str, metavar='URL',
help='Server URL', default='ws://192.168.1.95:2700')
parser.add_argument('-d', '--device', type=int_or_str,
help='input device (numeric ID or substring)')
parser.add_argument('-r', '--samplerate', type=int, help='sampling rate', default=16000)
args = parser.parse_args(remaining)
loop = asyncio.get_running_loop()
audio_queue = asyncio.Queue()
logging.basicConfig(level=logging.INFO)
await run_test()
if __name__ == '__main__':
asyncio.run(main())