Add speechtotext script

2022-10-17 10:36:48 +01:00 · 2022-10-17 10:36:48 +01:00 · 0cb068107b
parent 962a44f6e9
commit 0cb068107b
1 changed files with 74 additions and 0 deletions
--- a/test_microphone.py
+++ b/test_microphone.py
@ -0,0 +1,74 @@
 #!/usr/bin/env python3
 import json
 import os
 import sys
 import asyncio
 import websockets
 import logging
 import sounddevice as sd
 import argparse
 def int_or_str(text):
    """Helper function for argument parsing."""
    try:
        return int(text)
    except ValueError:
        return text
 def callback(indata, frames, time, status):
    """This is called (from a separate thread) for each audio block."""
    loop.call_soon_threadsafe(audio_queue.put_nowait, bytes(indata))
 async def run_test():
    with sd.RawInputStream(samplerate=args.samplerate, blocksize = 4000, device=args.device, dtype='int16',
                           channels=1, callback=callback) as device:
        async with websockets.connect(args.uri) as websocket:
            await websocket.send('{ "config" : { "sample_rate" : %d } }' % (device.samplerate))
            while True:
                data = await audio_queue.get()
                await websocket.send(data)
                result = await websocket.recv()
                j = json.loads(result)
                if "text" in j and "result" in j:
                    print(j["text"])
                    n = 0
                    for word in j["result"]: 
                        n += int(word["conf"])
                    print(n/len(j["result"])) 
            await websocket.send('{"eof" : 1}')
            print (await websocket.recv())
 async def main():
    global args
    global loop
    global audio_queue
    parser = argparse.ArgumentParser(add_help=False)
    parser.add_argument('-l', '--list-devices', action='store_true',
                        help='show list of audio devices and exit')
    args, remaining = parser.parse_known_args()
    if args.list_devices:
        print(sd.query_devices())
        parser.exit(0)
    parser = argparse.ArgumentParser(description="ASR Server",
                                     formatter_class=argparse.RawDescriptionHelpFormatter,
                                     parents=[parser])
    parser.add_argument('-u', '--uri', type=str, metavar='URL',
                        help='Server URL', default='ws://localhost:2700')
    parser.add_argument('-d', '--device', type=int_or_str,
                        help='input device (numeric ID or substring)')
    parser.add_argument('-r', '--samplerate', type=int, help='sampling rate', default=16000)
    args = parser.parse_args(remaining)
    loop = asyncio.get_running_loop()
    audio_queue = asyncio.Queue()
    logging.basicConfig(level=logging.INFO)
    await run_test()
 if __name__ == '__main__':
    asyncio.run(main())