From 0cb068107b1ac8a6c9452a8b5ed4d894d890dc39 Mon Sep 17 00:00:00 2001 From: Jimmy Date: Mon, 17 Oct 2022 10:36:48 +0100 Subject: [PATCH] Add speechtotext script --- test_microphone.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100755 test_microphone.py diff --git a/test_microphone.py b/test_microphone.py new file mode 100755 index 0000000..96dea15 --- /dev/null +++ b/test_microphone.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 + +import json +import os +import sys +import asyncio +import websockets +import logging +import sounddevice as sd +import argparse + +def int_or_str(text): + """Helper function for argument parsing.""" + try: + return int(text) + except ValueError: + return text + +def callback(indata, frames, time, status): + """This is called (from a separate thread) for each audio block.""" + loop.call_soon_threadsafe(audio_queue.put_nowait, bytes(indata)) + +async def run_test(): + + with sd.RawInputStream(samplerate=args.samplerate, blocksize = 4000, device=args.device, dtype='int16', + channels=1, callback=callback) as device: + + async with websockets.connect(args.uri) as websocket: + await websocket.send('{ "config" : { "sample_rate" : %d } }' % (device.samplerate)) + + while True: + data = await audio_queue.get() + await websocket.send(data) + result = await websocket.recv() + j = json.loads(result) + if "text" in j and "result" in j: + print(j["text"]) + n = 0 + for word in j["result"]: + n += int(word["conf"]) + print(n/len(j["result"])) + await websocket.send('{"eof" : 1}') + print (await websocket.recv()) + +async def main(): + + global args + global loop + global audio_queue + + parser = argparse.ArgumentParser(add_help=False) + parser.add_argument('-l', '--list-devices', action='store_true', + help='show list of audio devices and exit') + args, remaining = parser.parse_known_args() + if args.list_devices: + print(sd.query_devices()) + parser.exit(0) + parser = argparse.ArgumentParser(description="ASR Server", + formatter_class=argparse.RawDescriptionHelpFormatter, + parents=[parser]) + parser.add_argument('-u', '--uri', type=str, metavar='URL', + help='Server URL', default='ws://localhost:2700') + parser.add_argument('-d', '--device', type=int_or_str, + help='input device (numeric ID or substring)') + parser.add_argument('-r', '--samplerate', type=int, help='sampling rate', default=16000) + args = parser.parse_args(remaining) + loop = asyncio.get_running_loop() + audio_queue = asyncio.Queue() + + logging.basicConfig(level=logging.INFO) + await run_test() + +if __name__ == '__main__': + asyncio.run(main())